kreuzberg 4.0.0.pre.rc.29 → 4.0.0.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +0 -6
- data/.rubocop.yaml +534 -1
- data/Gemfile +2 -1
- data/Gemfile.lock +28 -116
- data/README.md +269 -629
- data/Rakefile +0 -9
- data/Steepfile +4 -8
- data/examples/async_patterns.rb +58 -1
- data/ext/kreuzberg_rb/extconf.rb +5 -35
- data/ext/kreuzberg_rb/native/Cargo.toml +16 -55
- data/ext/kreuzberg_rb/native/build.rs +14 -12
- data/ext/kreuzberg_rb/native/include/ieeefp.h +1 -1
- data/ext/kreuzberg_rb/native/include/msvc_compat/strings.h +1 -1
- data/ext/kreuzberg_rb/native/include/strings.h +2 -2
- data/ext/kreuzberg_rb/native/include/unistd.h +1 -1
- data/ext/kreuzberg_rb/native/src/lib.rs +34 -897
- data/extconf.rb +6 -38
- data/kreuzberg.gemspec +20 -114
- data/lib/kreuzberg/api_proxy.rb +18 -2
- data/lib/kreuzberg/cache_api.rb +0 -22
- data/lib/kreuzberg/cli.rb +10 -2
- data/lib/kreuzberg/cli_proxy.rb +10 -0
- data/lib/kreuzberg/config.rb +22 -274
- data/lib/kreuzberg/errors.rb +7 -73
- data/lib/kreuzberg/extraction_api.rb +8 -237
- data/lib/kreuzberg/mcp_proxy.rb +11 -2
- data/lib/kreuzberg/ocr_backend_protocol.rb +73 -0
- data/lib/kreuzberg/post_processor_protocol.rb +71 -0
- data/lib/kreuzberg/result.rb +33 -151
- data/lib/kreuzberg/setup_lib_path.rb +2 -22
- data/lib/kreuzberg/validator_protocol.rb +73 -0
- data/lib/kreuzberg/version.rb +1 -1
- data/lib/kreuzberg.rb +13 -27
- data/pkg/kreuzberg-4.0.0.rc1.gem +0 -0
- data/sig/kreuzberg.rbs +12 -105
- data/spec/binding/cache_spec.rb +22 -22
- data/spec/binding/cli_proxy_spec.rb +4 -2
- data/spec/binding/cli_spec.rb +11 -12
- data/spec/binding/config_spec.rb +0 -74
- data/spec/binding/config_validation_spec.rb +6 -100
- data/spec/binding/error_handling_spec.rb +97 -283
- data/spec/binding/plugins/ocr_backend_spec.rb +8 -8
- data/spec/binding/plugins/postprocessor_spec.rb +11 -11
- data/spec/binding/plugins/validator_spec.rb +13 -12
- data/spec/examples.txt +104 -0
- data/spec/fixtures/config.toml +1 -0
- data/spec/fixtures/config.yaml +1 -0
- data/spec/fixtures/invalid_config.toml +1 -0
- data/spec/smoke/package_spec.rb +3 -2
- data/spec/spec_helper.rb +3 -1
- data/vendor/kreuzberg/Cargo.toml +67 -192
- data/vendor/kreuzberg/README.md +9 -97
- data/vendor/kreuzberg/build.rs +194 -516
- data/vendor/kreuzberg/src/api/handlers.rs +9 -130
- data/vendor/kreuzberg/src/api/mod.rs +3 -18
- data/vendor/kreuzberg/src/api/server.rs +71 -236
- data/vendor/kreuzberg/src/api/types.rs +7 -43
- data/vendor/kreuzberg/src/bin/profile_extract.rs +455 -0
- data/vendor/kreuzberg/src/cache/mod.rs +3 -27
- data/vendor/kreuzberg/src/chunking/mod.rs +79 -1705
- data/vendor/kreuzberg/src/core/batch_mode.rs +0 -60
- data/vendor/kreuzberg/src/core/config.rs +23 -905
- data/vendor/kreuzberg/src/core/extractor.rs +106 -403
- data/vendor/kreuzberg/src/core/io.rs +2 -4
- data/vendor/kreuzberg/src/core/mime.rs +12 -2
- data/vendor/kreuzberg/src/core/mod.rs +3 -22
- data/vendor/kreuzberg/src/core/pipeline.rs +78 -395
- data/vendor/kreuzberg/src/embeddings.rs +21 -169
- data/vendor/kreuzberg/src/error.rs +2 -2
- data/vendor/kreuzberg/src/extraction/archive.rs +31 -36
- data/vendor/kreuzberg/src/extraction/docx.rs +1 -365
- data/vendor/kreuzberg/src/extraction/email.rs +11 -12
- data/vendor/kreuzberg/src/extraction/excel.rs +129 -138
- data/vendor/kreuzberg/src/extraction/html.rs +170 -1447
- data/vendor/kreuzberg/src/extraction/image.rs +14 -138
- data/vendor/kreuzberg/src/extraction/libreoffice.rs +3 -13
- data/vendor/kreuzberg/src/extraction/mod.rs +5 -21
- data/vendor/kreuzberg/src/extraction/office_metadata/mod.rs +0 -2
- data/vendor/kreuzberg/src/extraction/pandoc/batch.rs +275 -0
- data/vendor/kreuzberg/src/extraction/pandoc/mime_types.rs +178 -0
- data/vendor/kreuzberg/src/extraction/pandoc/mod.rs +491 -0
- data/vendor/kreuzberg/src/extraction/pandoc/server.rs +496 -0
- data/vendor/kreuzberg/src/extraction/pandoc/subprocess.rs +1188 -0
- data/vendor/kreuzberg/src/extraction/pandoc/version.rs +162 -0
- data/vendor/kreuzberg/src/extraction/pptx.rs +94 -196
- data/vendor/kreuzberg/src/extraction/structured.rs +4 -5
- data/vendor/kreuzberg/src/extraction/table.rs +1 -2
- data/vendor/kreuzberg/src/extraction/text.rs +10 -18
- data/vendor/kreuzberg/src/extractors/archive.rs +0 -22
- data/vendor/kreuzberg/src/extractors/docx.rs +148 -69
- data/vendor/kreuzberg/src/extractors/email.rs +9 -37
- data/vendor/kreuzberg/src/extractors/excel.rs +40 -81
- data/vendor/kreuzberg/src/extractors/html.rs +173 -182
- data/vendor/kreuzberg/src/extractors/image.rs +8 -32
- data/vendor/kreuzberg/src/extractors/mod.rs +10 -171
- data/vendor/kreuzberg/src/extractors/pandoc.rs +201 -0
- data/vendor/kreuzberg/src/extractors/pdf.rs +64 -329
- data/vendor/kreuzberg/src/extractors/pptx.rs +34 -79
- data/vendor/kreuzberg/src/extractors/structured.rs +0 -16
- data/vendor/kreuzberg/src/extractors/text.rs +7 -30
- data/vendor/kreuzberg/src/extractors/xml.rs +8 -27
- data/vendor/kreuzberg/src/keywords/processor.rs +1 -9
- data/vendor/kreuzberg/src/keywords/rake.rs +1 -0
- data/vendor/kreuzberg/src/language_detection/mod.rs +51 -94
- data/vendor/kreuzberg/src/lib.rs +5 -17
- data/vendor/kreuzberg/src/mcp/mod.rs +1 -4
- data/vendor/kreuzberg/src/mcp/server.rs +21 -145
- data/vendor/kreuzberg/src/ocr/mod.rs +0 -2
- data/vendor/kreuzberg/src/ocr/processor.rs +8 -19
- data/vendor/kreuzberg/src/ocr/tesseract_backend.rs +0 -2
- data/vendor/kreuzberg/src/pdf/error.rs +1 -93
- data/vendor/kreuzberg/src/pdf/metadata.rs +100 -263
- data/vendor/kreuzberg/src/pdf/mod.rs +2 -33
- data/vendor/kreuzberg/src/pdf/rendering.rs +12 -12
- data/vendor/kreuzberg/src/pdf/table.rs +64 -61
- data/vendor/kreuzberg/src/pdf/text.rs +24 -416
- data/vendor/kreuzberg/src/plugins/extractor.rs +8 -40
- data/vendor/kreuzberg/src/plugins/mod.rs +0 -3
- data/vendor/kreuzberg/src/plugins/ocr.rs +14 -22
- data/vendor/kreuzberg/src/plugins/processor.rs +1 -10
- data/vendor/kreuzberg/src/plugins/registry.rs +0 -15
- data/vendor/kreuzberg/src/plugins/validator.rs +8 -20
- data/vendor/kreuzberg/src/stopwords/mod.rs +2 -2
- data/vendor/kreuzberg/src/text/mod.rs +0 -8
- data/vendor/kreuzberg/src/text/quality.rs +15 -28
- data/vendor/kreuzberg/src/text/string_utils.rs +10 -22
- data/vendor/kreuzberg/src/text/token_reduction/core.rs +50 -86
- data/vendor/kreuzberg/src/text/token_reduction/filters.rs +16 -37
- data/vendor/kreuzberg/src/text/token_reduction/simd_text.rs +1 -2
- data/vendor/kreuzberg/src/types.rs +67 -907
- data/vendor/kreuzberg/src/utils/mod.rs +0 -14
- data/vendor/kreuzberg/src/utils/quality.rs +3 -12
- data/vendor/kreuzberg/tests/api_tests.rs +0 -506
- data/vendor/kreuzberg/tests/archive_integration.rs +0 -2
- data/vendor/kreuzberg/tests/batch_orchestration.rs +12 -57
- data/vendor/kreuzberg/tests/batch_processing.rs +8 -32
- data/vendor/kreuzberg/tests/chunking_offset_demo.rs +92 -0
- data/vendor/kreuzberg/tests/concurrency_stress.rs +8 -40
- data/vendor/kreuzberg/tests/config_features.rs +1 -33
- data/vendor/kreuzberg/tests/config_loading_tests.rs +39 -16
- data/vendor/kreuzberg/tests/core_integration.rs +9 -35
- data/vendor/kreuzberg/tests/csv_integration.rs +81 -71
- data/vendor/kreuzberg/tests/docx_metadata_extraction_test.rs +25 -23
- data/vendor/kreuzberg/tests/email_integration.rs +1 -3
- data/vendor/kreuzberg/tests/error_handling.rs +34 -43
- data/vendor/kreuzberg/tests/format_integration.rs +1 -7
- data/vendor/kreuzberg/tests/helpers/mod.rs +0 -60
- data/vendor/kreuzberg/tests/image_integration.rs +0 -2
- data/vendor/kreuzberg/tests/mime_detection.rs +16 -17
- data/vendor/kreuzberg/tests/ocr_configuration.rs +0 -4
- data/vendor/kreuzberg/tests/ocr_errors.rs +0 -22
- data/vendor/kreuzberg/tests/ocr_quality.rs +0 -2
- data/vendor/kreuzberg/tests/pandoc_integration.rs +503 -0
- data/vendor/kreuzberg/tests/pdf_integration.rs +0 -2
- data/vendor/kreuzberg/tests/pipeline_integration.rs +2 -36
- data/vendor/kreuzberg/tests/plugin_ocr_backend_test.rs +0 -5
- data/vendor/kreuzberg/tests/plugin_postprocessor_test.rs +1 -17
- data/vendor/kreuzberg/tests/plugin_system.rs +0 -6
- data/vendor/kreuzberg/tests/registry_integration_tests.rs +22 -2
- data/vendor/kreuzberg/tests/security_validation.rs +1 -13
- data/vendor/kreuzberg/tests/test_fastembed.rs +23 -45
- metadata +25 -171
- data/.rubocop.yml +0 -543
- data/ext/kreuzberg_rb/native/.cargo/config.toml +0 -23
- data/ext/kreuzberg_rb/native/Cargo.lock +0 -7619
- data/lib/kreuzberg/error_context.rb +0 -136
- data/lib/kreuzberg/types.rb +0 -170
- data/lib/libpdfium.so +0 -0
- data/spec/binding/async_operations_spec.rb +0 -473
- data/spec/binding/batch_operations_spec.rb +0 -595
- data/spec/binding/batch_spec.rb +0 -359
- data/spec/binding/config_result_spec.rb +0 -377
- data/spec/binding/embeddings_spec.rb +0 -816
- data/spec/binding/error_recovery_spec.rb +0 -488
- data/spec/binding/font_config_spec.rb +0 -220
- data/spec/binding/images_spec.rb +0 -738
- data/spec/binding/keywords_extraction_spec.rb +0 -600
- data/spec/binding/metadata_types_spec.rb +0 -1228
- data/spec/binding/pages_extraction_spec.rb +0 -471
- data/spec/binding/tables_spec.rb +0 -641
- data/spec/unit/config/chunking_config_spec.rb +0 -213
- data/spec/unit/config/embedding_config_spec.rb +0 -343
- data/spec/unit/config/extraction_config_spec.rb +0 -438
- data/spec/unit/config/font_config_spec.rb +0 -285
- data/spec/unit/config/hierarchy_config_spec.rb +0 -314
- data/spec/unit/config/image_extraction_config_spec.rb +0 -209
- data/spec/unit/config/image_preprocessing_config_spec.rb +0 -249
- data/spec/unit/config/keyword_config_spec.rb +0 -229
- data/spec/unit/config/language_detection_config_spec.rb +0 -258
- data/spec/unit/config/ocr_config_spec.rb +0 -171
- data/spec/unit/config/page_config_spec.rb +0 -221
- data/spec/unit/config/pdf_config_spec.rb +0 -267
- data/spec/unit/config/postprocessor_config_spec.rb +0 -290
- data/spec/unit/config/tesseract_config_spec.rb +0 -181
- data/spec/unit/config/token_reduction_config_spec.rb +0 -251
- data/test/metadata_types_test.rb +0 -959
- data/vendor/Cargo.toml +0 -61
- data/vendor/kreuzberg/examples/bench_fixes.rs +0 -71
- data/vendor/kreuzberg/examples/test_pdfium_fork.rs +0 -62
- data/vendor/kreuzberg/src/chunking/processor.rs +0 -219
- data/vendor/kreuzberg/src/core/batch_optimizations.rs +0 -385
- data/vendor/kreuzberg/src/core/config_validation.rs +0 -949
- data/vendor/kreuzberg/src/core/formats.rs +0 -235
- data/vendor/kreuzberg/src/core/server_config.rs +0 -1220
- data/vendor/kreuzberg/src/extraction/capacity.rs +0 -263
- data/vendor/kreuzberg/src/extraction/markdown.rs +0 -216
- data/vendor/kreuzberg/src/extraction/office_metadata/odt_properties.rs +0 -284
- data/vendor/kreuzberg/src/extractors/bibtex.rs +0 -470
- data/vendor/kreuzberg/src/extractors/docbook.rs +0 -504
- data/vendor/kreuzberg/src/extractors/epub.rs +0 -696
- data/vendor/kreuzberg/src/extractors/fictionbook.rs +0 -492
- data/vendor/kreuzberg/src/extractors/jats.rs +0 -1054
- data/vendor/kreuzberg/src/extractors/jupyter.rs +0 -368
- data/vendor/kreuzberg/src/extractors/latex.rs +0 -653
- data/vendor/kreuzberg/src/extractors/markdown.rs +0 -701
- data/vendor/kreuzberg/src/extractors/odt.rs +0 -628
- data/vendor/kreuzberg/src/extractors/opml.rs +0 -635
- data/vendor/kreuzberg/src/extractors/orgmode.rs +0 -529
- data/vendor/kreuzberg/src/extractors/rst.rs +0 -577
- data/vendor/kreuzberg/src/extractors/rtf.rs +0 -809
- data/vendor/kreuzberg/src/extractors/security.rs +0 -484
- data/vendor/kreuzberg/src/extractors/security_tests.rs +0 -367
- data/vendor/kreuzberg/src/extractors/typst.rs +0 -651
- data/vendor/kreuzberg/src/language_detection/processor.rs +0 -218
- data/vendor/kreuzberg/src/ocr/language_registry.rs +0 -520
- data/vendor/kreuzberg/src/panic_context.rs +0 -154
- data/vendor/kreuzberg/src/pdf/bindings.rs +0 -306
- data/vendor/kreuzberg/src/pdf/bundled.rs +0 -408
- data/vendor/kreuzberg/src/pdf/fonts.rs +0 -358
- data/vendor/kreuzberg/src/pdf/hierarchy.rs +0 -903
- data/vendor/kreuzberg/src/text/quality_processor.rs +0 -231
- data/vendor/kreuzberg/src/text/utf8_validation.rs +0 -193
- data/vendor/kreuzberg/src/utils/pool.rs +0 -503
- data/vendor/kreuzberg/src/utils/pool_sizing.rs +0 -364
- data/vendor/kreuzberg/src/utils/string_pool.rs +0 -761
- data/vendor/kreuzberg/tests/api_embed.rs +0 -360
- data/vendor/kreuzberg/tests/api_extract_multipart.rs +0 -52
- data/vendor/kreuzberg/tests/api_large_pdf_extraction.rs +0 -471
- data/vendor/kreuzberg/tests/api_large_pdf_extraction_diagnostics.rs +0 -289
- data/vendor/kreuzberg/tests/batch_pooling_benchmark.rs +0 -154
- data/vendor/kreuzberg/tests/bibtex_parity_test.rs +0 -421
- data/vendor/kreuzberg/tests/config_integration_test.rs +0 -753
- data/vendor/kreuzberg/tests/data/hierarchy_ground_truth.json +0 -294
- data/vendor/kreuzberg/tests/docbook_extractor_tests.rs +0 -500
- data/vendor/kreuzberg/tests/docx_vs_pandoc_comparison.rs +0 -370
- data/vendor/kreuzberg/tests/epub_native_extractor_tests.rs +0 -275
- data/vendor/kreuzberg/tests/fictionbook_extractor_tests.rs +0 -228
- data/vendor/kreuzberg/tests/html_table_test.rs +0 -551
- data/vendor/kreuzberg/tests/instrumentation_test.rs +0 -139
- data/vendor/kreuzberg/tests/jats_extractor_tests.rs +0 -639
- data/vendor/kreuzberg/tests/jupyter_extractor_tests.rs +0 -704
- data/vendor/kreuzberg/tests/latex_extractor_tests.rs +0 -496
- data/vendor/kreuzberg/tests/markdown_extractor_tests.rs +0 -490
- data/vendor/kreuzberg/tests/ocr_language_registry.rs +0 -191
- data/vendor/kreuzberg/tests/odt_extractor_tests.rs +0 -674
- data/vendor/kreuzberg/tests/opml_extractor_tests.rs +0 -616
- data/vendor/kreuzberg/tests/orgmode_extractor_tests.rs +0 -822
- data/vendor/kreuzberg/tests/page_markers.rs +0 -297
- data/vendor/kreuzberg/tests/pdf_hierarchy_detection.rs +0 -301
- data/vendor/kreuzberg/tests/pdf_hierarchy_quality.rs +0 -589
- data/vendor/kreuzberg/tests/pdf_ocr_triggering.rs +0 -301
- data/vendor/kreuzberg/tests/pdf_text_merging.rs +0 -475
- data/vendor/kreuzberg/tests/pdfium_linking.rs +0 -340
- data/vendor/kreuzberg/tests/rst_extractor_tests.rs +0 -694
- data/vendor/kreuzberg/tests/rtf_extractor_tests.rs +0 -775
- data/vendor/kreuzberg/tests/typst_behavioral_tests.rs +0 -1260
- data/vendor/kreuzberg/tests/typst_extractor_tests.rs +0 -648
- data/vendor/kreuzberg-ffi/Cargo.toml +0 -67
- data/vendor/kreuzberg-ffi/README.md +0 -851
- data/vendor/kreuzberg-ffi/benches/result_view_benchmark.rs +0 -227
- data/vendor/kreuzberg-ffi/build.rs +0 -168
- data/vendor/kreuzberg-ffi/cbindgen.toml +0 -37
- data/vendor/kreuzberg-ffi/kreuzberg-ffi.pc.in +0 -12
- data/vendor/kreuzberg-ffi/kreuzberg.h +0 -3012
- data/vendor/kreuzberg-ffi/src/batch_streaming.rs +0 -588
- data/vendor/kreuzberg-ffi/src/config.rs +0 -1341
- data/vendor/kreuzberg-ffi/src/error.rs +0 -901
- data/vendor/kreuzberg-ffi/src/extraction.rs +0 -555
- data/vendor/kreuzberg-ffi/src/helpers.rs +0 -879
- data/vendor/kreuzberg-ffi/src/lib.rs +0 -977
- data/vendor/kreuzberg-ffi/src/memory.rs +0 -493
- data/vendor/kreuzberg-ffi/src/mime.rs +0 -329
- data/vendor/kreuzberg-ffi/src/panic_shield.rs +0 -265
- data/vendor/kreuzberg-ffi/src/plugins/document_extractor.rs +0 -442
- data/vendor/kreuzberg-ffi/src/plugins/mod.rs +0 -14
- data/vendor/kreuzberg-ffi/src/plugins/ocr_backend.rs +0 -628
- data/vendor/kreuzberg-ffi/src/plugins/post_processor.rs +0 -438
- data/vendor/kreuzberg-ffi/src/plugins/validator.rs +0 -329
- data/vendor/kreuzberg-ffi/src/result.rs +0 -510
- data/vendor/kreuzberg-ffi/src/result_pool.rs +0 -639
- data/vendor/kreuzberg-ffi/src/result_view.rs +0 -773
- data/vendor/kreuzberg-ffi/src/string_intern.rs +0 -568
- data/vendor/kreuzberg-ffi/src/types.rs +0 -363
- data/vendor/kreuzberg-ffi/src/util.rs +0 -210
- data/vendor/kreuzberg-ffi/src/validation.rs +0 -848
- data/vendor/kreuzberg-ffi/tests.disabled/README.md +0 -48
- data/vendor/kreuzberg-ffi/tests.disabled/config_loading_tests.rs +0 -299
- data/vendor/kreuzberg-ffi/tests.disabled/config_tests.rs +0 -346
- data/vendor/kreuzberg-ffi/tests.disabled/extractor_tests.rs +0 -232
- data/vendor/kreuzberg-ffi/tests.disabled/plugin_registration_tests.rs +0 -470
- data/vendor/kreuzberg-tesseract/.commitlintrc.json +0 -13
- data/vendor/kreuzberg-tesseract/.crate-ignore +0 -2
- data/vendor/kreuzberg-tesseract/Cargo.lock +0 -2933
- data/vendor/kreuzberg-tesseract/Cargo.toml +0 -57
- data/vendor/kreuzberg-tesseract/LICENSE +0 -22
- data/vendor/kreuzberg-tesseract/README.md +0 -399
- data/vendor/kreuzberg-tesseract/build.rs +0 -1127
- data/vendor/kreuzberg-tesseract/patches/README.md +0 -71
- data/vendor/kreuzberg-tesseract/patches/tesseract.diff +0 -199
- data/vendor/kreuzberg-tesseract/src/api.rs +0 -1371
- data/vendor/kreuzberg-tesseract/src/choice_iterator.rs +0 -77
- data/vendor/kreuzberg-tesseract/src/enums.rs +0 -297
- data/vendor/kreuzberg-tesseract/src/error.rs +0 -81
- data/vendor/kreuzberg-tesseract/src/lib.rs +0 -145
- data/vendor/kreuzberg-tesseract/src/monitor.rs +0 -57
- data/vendor/kreuzberg-tesseract/src/mutable_iterator.rs +0 -197
- data/vendor/kreuzberg-tesseract/src/page_iterator.rs +0 -253
- data/vendor/kreuzberg-tesseract/src/result_iterator.rs +0 -286
- data/vendor/kreuzberg-tesseract/src/result_renderer.rs +0 -183
- data/vendor/kreuzberg-tesseract/tests/integration_test.rs +0 -211
|
@@ -1,493 +0,0 @@
|
|
|
1
|
-
//! Memory management functions for FFI.
|
|
2
|
-
//!
|
|
3
|
-
//! This module contains all memory allocation and deallocation functions for the FFI layer.
|
|
4
|
-
//! These functions are CRITICAL for proper memory management across language boundaries.
|
|
5
|
-
//!
|
|
6
|
-
//! # Safety
|
|
7
|
-
//!
|
|
8
|
-
//! All functions in this module are unsafe and must be called correctly:
|
|
9
|
-
//! - Pointers must be valid and allocated by Rust
|
|
10
|
-
//! - Pointers must not be used after being freed
|
|
11
|
-
//! - NULL pointers are always safe and result in no-ops
|
|
12
|
-
//!
|
|
13
|
-
//! # Memory Leak Bugs Fixed
|
|
14
|
-
//!
|
|
15
|
-
//! - PR #3: Fixed Box/Vec mismatch in `kreuzberg_free_batch_result` causing segfaults
|
|
16
|
-
//! - PR #3: Fixed missing `page_structure_json` and `pages_json` deallocation in `kreuzberg_free_result`
|
|
17
|
-
|
|
18
|
-
use std::ffi::{CStr, CString};
|
|
19
|
-
use std::os::raw::c_char;
|
|
20
|
-
use std::ptr;
|
|
21
|
-
|
|
22
|
-
use crate::ffi_panic_guard;
|
|
23
|
-
use crate::helpers::{clear_last_error, set_last_error};
|
|
24
|
-
use crate::types::{CBatchResult, CExtractionResult};
|
|
25
|
-
|
|
26
|
-
/// Free a batch result returned by batch extraction functions.
|
|
27
|
-
///
|
|
28
|
-
/// # Safety
|
|
29
|
-
///
|
|
30
|
-
/// - `batch_result` must be a pointer previously returned by a batch extraction function
|
|
31
|
-
/// - `batch_result` can be NULL (no-op)
|
|
32
|
-
/// - `batch_result` must not be used after this call
|
|
33
|
-
/// - All individual results in the batch will be freed automatically
|
|
34
|
-
///
|
|
35
|
-
/// # Memory Layout
|
|
36
|
-
///
|
|
37
|
-
/// CRITICAL: The results array is allocated as `Box<[*mut CExtractionResult]>` (boxed slice),
|
|
38
|
-
/// NOT as `Vec<*mut CExtractionResult>`. We must use `Box::from_raw` with a slice pointer,
|
|
39
|
-
/// not `Vec::from_raw_parts`, to avoid Box/Vec mismatch that causes segfaults.
|
|
40
|
-
///
|
|
41
|
-
/// # Example (C)
|
|
42
|
-
///
|
|
43
|
-
/// ```c
|
|
44
|
-
/// CBatchResult* batch = kreuzberg_extract_batch_sync(paths, count);
|
|
45
|
-
/// // Use batch...
|
|
46
|
-
/// kreuzberg_free_batch_result(batch);
|
|
47
|
-
/// // batch is now invalid
|
|
48
|
-
/// ```
|
|
49
|
-
#[unsafe(no_mangle)]
|
|
50
|
-
pub unsafe extern "C" fn kreuzberg_free_batch_result(batch_result: *mut CBatchResult) {
|
|
51
|
-
if batch_result.is_null() {
|
|
52
|
-
return;
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
let batch = unsafe { Box::from_raw(batch_result) };
|
|
56
|
-
|
|
57
|
-
// Free individual results first, then the array
|
|
58
|
-
if !batch.results.is_null() {
|
|
59
|
-
if batch.count > 0 {
|
|
60
|
-
unsafe {
|
|
61
|
-
// Free each individual result
|
|
62
|
-
for i in 0..batch.count {
|
|
63
|
-
let result_ptr = *batch.results.add(i);
|
|
64
|
-
if !result_ptr.is_null() {
|
|
65
|
-
kreuzberg_free_result(result_ptr);
|
|
66
|
-
}
|
|
67
|
-
}
|
|
68
|
-
}
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
// Free the results array itself (was created with into_boxed_slice())
|
|
72
|
-
// IMPORTANT: Must use Box::from_raw with slice pointer, not Vec::from_raw_parts
|
|
73
|
-
// because the array was allocated as Box<[T]>, not Vec<T>
|
|
74
|
-
unsafe {
|
|
75
|
-
let _boxed_slice = Box::from_raw(std::ptr::slice_from_raw_parts_mut(batch.results, batch.count));
|
|
76
|
-
// Box will be dropped here, freeing the array allocation
|
|
77
|
-
};
|
|
78
|
-
}
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
/// Free a string returned by Kreuzberg functions.
|
|
82
|
-
///
|
|
83
|
-
/// # Safety
|
|
84
|
-
///
|
|
85
|
-
/// - `s` must be a string previously returned by a Kreuzberg function
|
|
86
|
-
/// - `s` can be NULL (no-op)
|
|
87
|
-
/// - `s` must not be used after this call
|
|
88
|
-
///
|
|
89
|
-
/// # Example (C)
|
|
90
|
-
///
|
|
91
|
-
/// ```c
|
|
92
|
-
/// char* str = result->content;
|
|
93
|
-
/// kreuzberg_free_string(str);
|
|
94
|
-
/// // str is now invalid
|
|
95
|
-
/// ```
|
|
96
|
-
#[unsafe(no_mangle)]
|
|
97
|
-
pub unsafe extern "C" fn kreuzberg_free_string(s: *mut c_char) {
|
|
98
|
-
if !s.is_null() {
|
|
99
|
-
unsafe { drop(CString::from_raw(s)) };
|
|
100
|
-
}
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
/// Clone a null-terminated string using Rust's allocator.
|
|
104
|
-
///
|
|
105
|
-
/// # Safety
|
|
106
|
-
///
|
|
107
|
-
/// - `s` must be a valid null-terminated UTF-8 string
|
|
108
|
-
/// - Returned pointer must be freed with `kreuzberg_free_string`
|
|
109
|
-
/// - Returns NULL on error (check `kreuzberg_last_error`)
|
|
110
|
-
#[unsafe(no_mangle)]
|
|
111
|
-
pub unsafe extern "C" fn kreuzberg_clone_string(s: *const c_char) -> *mut c_char {
|
|
112
|
-
ffi_panic_guard!("kreuzberg_clone_string", {
|
|
113
|
-
clear_last_error();
|
|
114
|
-
|
|
115
|
-
if s.is_null() {
|
|
116
|
-
set_last_error("Input string cannot be NULL".to_string());
|
|
117
|
-
return ptr::null_mut();
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
let raw = match unsafe { CStr::from_ptr(s) }.to_str() {
|
|
121
|
-
Ok(val) => val,
|
|
122
|
-
Err(e) => {
|
|
123
|
-
set_last_error(format!("Invalid UTF-8 in string: {}", e));
|
|
124
|
-
return ptr::null_mut();
|
|
125
|
-
}
|
|
126
|
-
};
|
|
127
|
-
|
|
128
|
-
match CString::new(raw) {
|
|
129
|
-
Ok(cstr) => cstr.into_raw(),
|
|
130
|
-
Err(e) => {
|
|
131
|
-
set_last_error(format!("Failed to clone string: {}", e));
|
|
132
|
-
ptr::null_mut()
|
|
133
|
-
}
|
|
134
|
-
}
|
|
135
|
-
})
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
/// Free an extraction result returned by `kreuzberg_extract_file_sync`.
|
|
139
|
-
///
|
|
140
|
-
/// # Safety
|
|
141
|
-
///
|
|
142
|
-
/// - `result` must be a pointer previously returned by `kreuzberg_extract_file_sync`
|
|
143
|
-
/// - `result` can be NULL (no-op)
|
|
144
|
-
/// - `result` must not be used after this call
|
|
145
|
-
/// - All string fields within the result will be freed automatically
|
|
146
|
-
///
|
|
147
|
-
/// # Memory Layout
|
|
148
|
-
///
|
|
149
|
-
/// This function frees all 12 string fields in CExtractionResult:
|
|
150
|
-
/// 1. content
|
|
151
|
-
/// 2. mime_type
|
|
152
|
-
/// 3. language
|
|
153
|
-
/// 4. date
|
|
154
|
-
/// 5. subject
|
|
155
|
-
/// 6. tables_json
|
|
156
|
-
/// 7. detected_languages_json
|
|
157
|
-
/// 8. metadata_json
|
|
158
|
-
/// 9. chunks_json
|
|
159
|
-
/// 10. images_json
|
|
160
|
-
/// 11. page_structure_json (FIXED: was missing before PR #3)
|
|
161
|
-
/// 12. pages_json (FIXED: was missing before PR #3)
|
|
162
|
-
///
|
|
163
|
-
/// # Example (C)
|
|
164
|
-
///
|
|
165
|
-
/// ```c
|
|
166
|
-
/// CExtractionResult* result = kreuzberg_extract_file_sync(path);
|
|
167
|
-
/// // Use result...
|
|
168
|
-
/// kreuzberg_free_result(result);
|
|
169
|
-
/// // result is now invalid
|
|
170
|
-
/// ```
|
|
171
|
-
#[unsafe(no_mangle)]
|
|
172
|
-
pub unsafe extern "C" fn kreuzberg_free_result(result: *mut CExtractionResult) {
|
|
173
|
-
if !result.is_null() {
|
|
174
|
-
let result_box = unsafe { Box::from_raw(result) };
|
|
175
|
-
|
|
176
|
-
if !result_box.content.is_null() {
|
|
177
|
-
unsafe { drop(CString::from_raw(result_box.content)) };
|
|
178
|
-
}
|
|
179
|
-
if !result_box.mime_type.is_null() {
|
|
180
|
-
unsafe { drop(CString::from_raw(result_box.mime_type)) };
|
|
181
|
-
}
|
|
182
|
-
if !result_box.language.is_null() {
|
|
183
|
-
unsafe { drop(CString::from_raw(result_box.language)) };
|
|
184
|
-
}
|
|
185
|
-
if !result_box.date.is_null() {
|
|
186
|
-
unsafe { drop(CString::from_raw(result_box.date)) };
|
|
187
|
-
}
|
|
188
|
-
if !result_box.subject.is_null() {
|
|
189
|
-
unsafe { drop(CString::from_raw(result_box.subject)) };
|
|
190
|
-
}
|
|
191
|
-
if !result_box.tables_json.is_null() {
|
|
192
|
-
unsafe { drop(CString::from_raw(result_box.tables_json)) };
|
|
193
|
-
}
|
|
194
|
-
if !result_box.detected_languages_json.is_null() {
|
|
195
|
-
unsafe { drop(CString::from_raw(result_box.detected_languages_json)) };
|
|
196
|
-
}
|
|
197
|
-
if !result_box.metadata_json.is_null() {
|
|
198
|
-
unsafe { drop(CString::from_raw(result_box.metadata_json)) };
|
|
199
|
-
}
|
|
200
|
-
if !result_box.chunks_json.is_null() {
|
|
201
|
-
unsafe { drop(CString::from_raw(result_box.chunks_json)) };
|
|
202
|
-
}
|
|
203
|
-
if !result_box.images_json.is_null() {
|
|
204
|
-
unsafe { drop(CString::from_raw(result_box.images_json)) };
|
|
205
|
-
}
|
|
206
|
-
if !result_box.page_structure_json.is_null() {
|
|
207
|
-
unsafe { drop(CString::from_raw(result_box.page_structure_json)) };
|
|
208
|
-
}
|
|
209
|
-
if !result_box.pages_json.is_null() {
|
|
210
|
-
unsafe { drop(CString::from_raw(result_box.pages_json)) };
|
|
211
|
-
}
|
|
212
|
-
}
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
#[cfg(test)]
|
|
216
|
-
mod tests {
|
|
217
|
-
use super::*;
|
|
218
|
-
use std::ffi::CString;
|
|
219
|
-
|
|
220
|
-
/// Helper to create a test CExtractionResult with all fields populated
|
|
221
|
-
fn create_test_result() -> *mut CExtractionResult {
|
|
222
|
-
Box::into_raw(Box::new(CExtractionResult {
|
|
223
|
-
content: CString::new("test content").unwrap().into_raw(),
|
|
224
|
-
mime_type: CString::new("text/plain").unwrap().into_raw(),
|
|
225
|
-
language: CString::new("en").unwrap().into_raw(),
|
|
226
|
-
date: CString::new("2024-01-01").unwrap().into_raw(),
|
|
227
|
-
subject: CString::new("test subject").unwrap().into_raw(),
|
|
228
|
-
tables_json: CString::new("[]").unwrap().into_raw(),
|
|
229
|
-
detected_languages_json: CString::new("[\"en\"]").unwrap().into_raw(),
|
|
230
|
-
metadata_json: CString::new("{}").unwrap().into_raw(),
|
|
231
|
-
chunks_json: CString::new("[]").unwrap().into_raw(),
|
|
232
|
-
images_json: CString::new("[]").unwrap().into_raw(),
|
|
233
|
-
page_structure_json: CString::new("{}").unwrap().into_raw(),
|
|
234
|
-
pages_json: CString::new("[]").unwrap().into_raw(),
|
|
235
|
-
success: true,
|
|
236
|
-
_padding1: [0u8; 7],
|
|
237
|
-
}))
|
|
238
|
-
}
|
|
239
|
-
|
|
240
|
-
/// Helper to create a test CExtractionResult with some NULL fields
|
|
241
|
-
fn create_partial_result() -> *mut CExtractionResult {
|
|
242
|
-
Box::into_raw(Box::new(CExtractionResult {
|
|
243
|
-
content: CString::new("test content").unwrap().into_raw(),
|
|
244
|
-
mime_type: CString::new("text/plain").unwrap().into_raw(),
|
|
245
|
-
language: ptr::null_mut(),
|
|
246
|
-
date: ptr::null_mut(),
|
|
247
|
-
subject: ptr::null_mut(),
|
|
248
|
-
tables_json: ptr::null_mut(),
|
|
249
|
-
detected_languages_json: ptr::null_mut(),
|
|
250
|
-
metadata_json: CString::new("{}").unwrap().into_raw(),
|
|
251
|
-
chunks_json: ptr::null_mut(),
|
|
252
|
-
images_json: ptr::null_mut(),
|
|
253
|
-
page_structure_json: ptr::null_mut(),
|
|
254
|
-
pages_json: ptr::null_mut(),
|
|
255
|
-
success: true,
|
|
256
|
-
_padding1: [0u8; 7],
|
|
257
|
-
}))
|
|
258
|
-
}
|
|
259
|
-
|
|
260
|
-
#[test]
|
|
261
|
-
fn test_free_string_null() {
|
|
262
|
-
// Should not crash on NULL
|
|
263
|
-
unsafe { kreuzberg_free_string(ptr::null_mut()) };
|
|
264
|
-
}
|
|
265
|
-
|
|
266
|
-
#[test]
|
|
267
|
-
fn test_free_string_valid() {
|
|
268
|
-
let s = CString::new("test string").unwrap().into_raw();
|
|
269
|
-
unsafe { kreuzberg_free_string(s) };
|
|
270
|
-
// If we get here without crashing, the test passed
|
|
271
|
-
}
|
|
272
|
-
|
|
273
|
-
#[test]
|
|
274
|
-
fn test_clone_string_null() {
|
|
275
|
-
let result = unsafe { kreuzberg_clone_string(ptr::null()) };
|
|
276
|
-
assert!(result.is_null());
|
|
277
|
-
}
|
|
278
|
-
|
|
279
|
-
#[test]
|
|
280
|
-
fn test_clone_string_valid() {
|
|
281
|
-
let original = CString::new("test string").unwrap();
|
|
282
|
-
let cloned = unsafe { kreuzberg_clone_string(original.as_ptr()) };
|
|
283
|
-
|
|
284
|
-
assert!(!cloned.is_null());
|
|
285
|
-
|
|
286
|
-
// Verify the cloned string matches
|
|
287
|
-
unsafe {
|
|
288
|
-
let cloned_str = CStr::from_ptr(cloned);
|
|
289
|
-
assert_eq!(cloned_str.to_str().unwrap(), "test string");
|
|
290
|
-
|
|
291
|
-
// Free the cloned string
|
|
292
|
-
kreuzberg_free_string(cloned);
|
|
293
|
-
}
|
|
294
|
-
}
|
|
295
|
-
|
|
296
|
-
#[test]
|
|
297
|
-
fn test_clone_and_free_cycle() {
|
|
298
|
-
// Test multiple clone and free cycles
|
|
299
|
-
for _ in 0..100 {
|
|
300
|
-
let original = CString::new("test").unwrap();
|
|
301
|
-
let cloned = unsafe { kreuzberg_clone_string(original.as_ptr()) };
|
|
302
|
-
assert!(!cloned.is_null());
|
|
303
|
-
unsafe { kreuzberg_free_string(cloned) };
|
|
304
|
-
}
|
|
305
|
-
}
|
|
306
|
-
|
|
307
|
-
#[test]
|
|
308
|
-
fn test_free_result_null() {
|
|
309
|
-
// Should not crash on NULL
|
|
310
|
-
unsafe { kreuzberg_free_result(ptr::null_mut()) };
|
|
311
|
-
}
|
|
312
|
-
|
|
313
|
-
#[test]
|
|
314
|
-
fn test_free_result_all_fields() {
|
|
315
|
-
// Test freeing a result with all 12 string fields populated
|
|
316
|
-
let result = create_test_result();
|
|
317
|
-
unsafe { kreuzberg_free_result(result) };
|
|
318
|
-
// If we get here without crashing, the test passed
|
|
319
|
-
}
|
|
320
|
-
|
|
321
|
-
#[test]
|
|
322
|
-
fn test_free_result_partial_fields() {
|
|
323
|
-
// Test freeing a result with some NULL fields
|
|
324
|
-
let result = create_partial_result();
|
|
325
|
-
unsafe { kreuzberg_free_result(result) };
|
|
326
|
-
// If we get here without crashing, the test passed
|
|
327
|
-
}
|
|
328
|
-
|
|
329
|
-
#[test]
|
|
330
|
-
fn test_free_result_page_structure_and_pages_json() {
|
|
331
|
-
// Regression test: ensure page_structure_json and pages_json are freed
|
|
332
|
-
// These were missing in the original implementation before PR #3
|
|
333
|
-
let result = Box::into_raw(Box::new(CExtractionResult {
|
|
334
|
-
content: CString::new("test").unwrap().into_raw(),
|
|
335
|
-
mime_type: CString::new("text/plain").unwrap().into_raw(),
|
|
336
|
-
language: ptr::null_mut(),
|
|
337
|
-
date: ptr::null_mut(),
|
|
338
|
-
subject: ptr::null_mut(),
|
|
339
|
-
tables_json: ptr::null_mut(),
|
|
340
|
-
detected_languages_json: ptr::null_mut(),
|
|
341
|
-
metadata_json: ptr::null_mut(),
|
|
342
|
-
chunks_json: ptr::null_mut(),
|
|
343
|
-
images_json: ptr::null_mut(),
|
|
344
|
-
page_structure_json: CString::new("{\"pages\": []}").unwrap().into_raw(),
|
|
345
|
-
pages_json: CString::new("[{\"content\": \"page 1\"}]").unwrap().into_raw(),
|
|
346
|
-
success: true,
|
|
347
|
-
_padding1: [0u8; 7],
|
|
348
|
-
}));
|
|
349
|
-
|
|
350
|
-
unsafe { kreuzberg_free_result(result) };
|
|
351
|
-
// If we get here without crashing or leaking, the test passed
|
|
352
|
-
}
|
|
353
|
-
|
|
354
|
-
#[test]
|
|
355
|
-
fn test_free_batch_result_null() {
|
|
356
|
-
// Should not crash on NULL
|
|
357
|
-
unsafe { kreuzberg_free_batch_result(ptr::null_mut()) };
|
|
358
|
-
}
|
|
359
|
-
|
|
360
|
-
#[test]
|
|
361
|
-
fn test_free_batch_result_empty() {
|
|
362
|
-
// Test freeing a batch with 0 results
|
|
363
|
-
let batch = Box::into_raw(Box::new(CBatchResult {
|
|
364
|
-
results: ptr::null_mut(),
|
|
365
|
-
count: 0,
|
|
366
|
-
success: true,
|
|
367
|
-
_padding2: [0u8; 7],
|
|
368
|
-
}));
|
|
369
|
-
|
|
370
|
-
unsafe { kreuzberg_free_batch_result(batch) };
|
|
371
|
-
}
|
|
372
|
-
|
|
373
|
-
#[test]
|
|
374
|
-
fn test_free_batch_result_single() {
|
|
375
|
-
// Test freeing a batch with 1 result
|
|
376
|
-
let result = create_test_result();
|
|
377
|
-
let results_array = vec![result].into_boxed_slice();
|
|
378
|
-
let results_ptr = Box::into_raw(results_array) as *mut *mut CExtractionResult;
|
|
379
|
-
|
|
380
|
-
let batch = Box::into_raw(Box::new(CBatchResult {
|
|
381
|
-
results: results_ptr,
|
|
382
|
-
count: 1,
|
|
383
|
-
success: true,
|
|
384
|
-
_padding2: [0u8; 7],
|
|
385
|
-
}));
|
|
386
|
-
|
|
387
|
-
unsafe { kreuzberg_free_batch_result(batch) };
|
|
388
|
-
}
|
|
389
|
-
|
|
390
|
-
#[test]
|
|
391
|
-
fn test_free_batch_result_multiple() {
|
|
392
|
-
// Test freeing a batch with 100 results
|
|
393
|
-
let mut results = Vec::new();
|
|
394
|
-
for _ in 0..100 {
|
|
395
|
-
results.push(create_test_result());
|
|
396
|
-
}
|
|
397
|
-
|
|
398
|
-
let results_array = results.into_boxed_slice();
|
|
399
|
-
let results_ptr = Box::into_raw(results_array) as *mut *mut CExtractionResult;
|
|
400
|
-
|
|
401
|
-
let batch = Box::into_raw(Box::new(CBatchResult {
|
|
402
|
-
results: results_ptr,
|
|
403
|
-
count: 100,
|
|
404
|
-
success: true,
|
|
405
|
-
_padding2: [0u8; 7],
|
|
406
|
-
}));
|
|
407
|
-
|
|
408
|
-
unsafe { kreuzberg_free_batch_result(batch) };
|
|
409
|
-
}
|
|
410
|
-
|
|
411
|
-
#[test]
|
|
412
|
-
fn test_free_batch_result_box_vec_symmetry() {
|
|
413
|
-
// Regression test for Box/Vec mismatch bug fixed in PR #3
|
|
414
|
-
// This test ensures we use Box::from_raw with slice pointer,
|
|
415
|
-
// not Vec::from_raw_parts, which would cause a segfault
|
|
416
|
-
|
|
417
|
-
// Create results using Box<[T]> allocation (same as production code)
|
|
418
|
-
let mut results = Vec::new();
|
|
419
|
-
for _ in 0..10 {
|
|
420
|
-
results.push(create_test_result());
|
|
421
|
-
}
|
|
422
|
-
|
|
423
|
-
// Convert to boxed slice (this is what production code does)
|
|
424
|
-
let results_array = results.into_boxed_slice();
|
|
425
|
-
let count = results_array.len();
|
|
426
|
-
let results_ptr = Box::into_raw(results_array) as *mut *mut CExtractionResult;
|
|
427
|
-
|
|
428
|
-
let batch = Box::into_raw(Box::new(CBatchResult {
|
|
429
|
-
results: results_ptr,
|
|
430
|
-
count,
|
|
431
|
-
success: true,
|
|
432
|
-
_padding2: [0u8; 7],
|
|
433
|
-
}));
|
|
434
|
-
|
|
435
|
-
// This should NOT segfault
|
|
436
|
-
unsafe { kreuzberg_free_batch_result(batch) };
|
|
437
|
-
}
|
|
438
|
-
|
|
439
|
-
#[test]
|
|
440
|
-
fn test_free_batch_result_with_null_results() {
|
|
441
|
-
// Test freeing a batch where some results are NULL
|
|
442
|
-
let results = vec![
|
|
443
|
-
create_test_result(),
|
|
444
|
-
ptr::null_mut(),
|
|
445
|
-
create_test_result(),
|
|
446
|
-
ptr::null_mut(),
|
|
447
|
-
];
|
|
448
|
-
|
|
449
|
-
let results_array = results.into_boxed_slice();
|
|
450
|
-
let results_ptr = Box::into_raw(results_array) as *mut *mut CExtractionResult;
|
|
451
|
-
|
|
452
|
-
let batch = Box::into_raw(Box::new(CBatchResult {
|
|
453
|
-
results: results_ptr,
|
|
454
|
-
count: 4,
|
|
455
|
-
success: true,
|
|
456
|
-
_padding2: [0u8; 7],
|
|
457
|
-
}));
|
|
458
|
-
|
|
459
|
-
unsafe { kreuzberg_free_batch_result(batch) };
|
|
460
|
-
}
|
|
461
|
-
|
|
462
|
-
#[test]
|
|
463
|
-
fn test_memory_stress_test() {
|
|
464
|
-
// Stress test: allocate and free 1000 results
|
|
465
|
-
for _ in 0..1000 {
|
|
466
|
-
let result = create_test_result();
|
|
467
|
-
unsafe { kreuzberg_free_result(result) };
|
|
468
|
-
}
|
|
469
|
-
}
|
|
470
|
-
|
|
471
|
-
#[test]
|
|
472
|
-
fn test_memory_stress_test_batch() {
|
|
473
|
-
// Stress test: allocate and free 100 batches of 10 results each
|
|
474
|
-
for _ in 0..100 {
|
|
475
|
-
let mut results = Vec::new();
|
|
476
|
-
for _ in 0..10 {
|
|
477
|
-
results.push(create_test_result());
|
|
478
|
-
}
|
|
479
|
-
|
|
480
|
-
let results_array = results.into_boxed_slice();
|
|
481
|
-
let results_ptr = Box::into_raw(results_array) as *mut *mut CExtractionResult;
|
|
482
|
-
|
|
483
|
-
let batch = Box::into_raw(Box::new(CBatchResult {
|
|
484
|
-
results: results_ptr,
|
|
485
|
-
count: 10,
|
|
486
|
-
success: true,
|
|
487
|
-
_padding2: [0u8; 7],
|
|
488
|
-
}));
|
|
489
|
-
|
|
490
|
-
unsafe { kreuzberg_free_batch_result(batch) };
|
|
491
|
-
}
|
|
492
|
-
}
|
|
493
|
-
}
|