kreuzberg 2.1.2__tar.gz → 4.0.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kreuzberg might be problematic. Click here for more details.
- kreuzberg-4.0.4/Cargo.lock +8888 -0
- kreuzberg-4.0.4/Cargo.toml +81 -0
- kreuzberg-4.0.4/PKG-INFO +470 -0
- kreuzberg-4.0.4/README.md +421 -0
- kreuzberg-4.0.4/crates/kreuzberg/Cargo.toml +259 -0
- kreuzberg-4.0.4/crates/kreuzberg/README.md +263 -0
- kreuzberg-4.0.4/crates/kreuzberg/build.rs +782 -0
- kreuzberg-4.0.4/crates/kreuzberg/examples/bench_fixes.rs +71 -0
- kreuzberg-4.0.4/crates/kreuzberg/examples/test_pdfium_fork.rs +62 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/api/error.rs +81 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/api/handlers.rs +320 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/api/mod.rs +94 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/api/server.rs +518 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/api/types.rs +206 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/cache/mod.rs +1167 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/chunking/mod.rs +2303 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/chunking/processor.rs +219 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/core/batch_mode.rs +95 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/core/batch_optimizations.rs +385 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/core/config.rs +1914 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/core/config_validation.rs +949 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/core/extractor.rs +1200 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/core/formats.rs +235 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/core/io.rs +329 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/core/mime.rs +605 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/core/mod.rs +61 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/core/pipeline.rs +1223 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/core/server_config.rs +1220 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/embeddings.rs +471 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/error.rs +431 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extraction/archive.rs +959 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extraction/capacity.rs +263 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extraction/docx.rs +404 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extraction/email.rs +855 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extraction/excel.rs +697 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extraction/html.rs +1830 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extraction/image.rs +492 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extraction/libreoffice.rs +574 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extraction/markdown.rs +216 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extraction/mod.rs +93 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extraction/office_metadata/app_properties.rs +398 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extraction/office_metadata/core_properties.rs +247 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extraction/office_metadata/custom_properties.rs +240 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extraction/office_metadata/mod.rs +130 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extraction/office_metadata/odt_properties.rs +284 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extraction/pptx.rs +3102 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extraction/structured.rs +491 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extraction/table.rs +329 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extraction/text.rs +277 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extraction/xml.rs +333 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extractors/archive.rs +447 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extractors/bibtex.rs +470 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extractors/docbook.rs +504 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extractors/docx.rs +400 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extractors/email.rs +157 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extractors/epub.rs +696 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extractors/excel.rs +385 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extractors/fictionbook.rs +492 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extractors/html.rs +419 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extractors/image.rs +219 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extractors/jats.rs +1054 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extractors/jupyter.rs +368 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extractors/latex.rs +653 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extractors/markdown.rs +701 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extractors/mod.rs +429 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extractors/odt.rs +628 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extractors/opml.rs +635 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extractors/orgmode.rs +529 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extractors/pdf.rs +761 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extractors/pptx.rs +279 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extractors/rst.rs +577 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extractors/rtf.rs +809 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extractors/security.rs +484 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extractors/security_tests.rs +367 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extractors/structured.rs +142 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extractors/text.rs +265 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extractors/typst.rs +651 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/extractors/xml.rs +147 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/image/dpi.rs +164 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/image/mod.rs +6 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/image/preprocessing.rs +417 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/image/resize.rs +89 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/keywords/config.rs +154 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/keywords/mod.rs +237 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/keywords/processor.rs +275 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/keywords/rake.rs +293 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/keywords/types.rs +68 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/keywords/yake.rs +163 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/language_detection/mod.rs +985 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/language_detection/processor.rs +218 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/lib.rs +114 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/mcp/mod.rs +35 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/mcp/server.rs +2090 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/ocr/cache.rs +469 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/ocr/error.rs +37 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/ocr/hocr.rs +216 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/ocr/language_registry.rs +520 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/ocr/mod.rs +60 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/ocr/processor.rs +858 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/ocr/table/mod.rs +4 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/ocr/table/tsv_parser.rs +144 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/ocr/tesseract_backend.rs +456 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/ocr/types.rs +393 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/ocr/utils.rs +47 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/ocr/validation.rs +206 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/panic_context.rs +154 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/pdf/bindings.rs +306 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/pdf/bundled.rs +408 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/pdf/error.rs +214 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/pdf/fonts.rs +358 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/pdf/hierarchy.rs +903 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/pdf/images.rs +139 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/pdf/metadata.rs +509 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/pdf/mod.rs +81 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/pdf/rendering.rs +369 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/pdf/table.rs +417 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/pdf/text.rs +553 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/plugins/extractor.rs +1042 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/plugins/mod.rs +212 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/plugins/ocr.rs +637 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/plugins/processor.rs +650 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/plugins/registry.rs +1339 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/plugins/traits.rs +258 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/plugins/validator.rs +967 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/stopwords/mod.rs +1470 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/text/mod.rs +27 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/text/quality.rs +710 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/text/quality_processor.rs +231 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/text/string_utils.rs +229 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/text/token_reduction/cjk_utils.rs +164 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/text/token_reduction/config.rs +100 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/text/token_reduction/core.rs +832 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/text/token_reduction/filters.rs +923 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/text/token_reduction/mod.rs +160 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/text/token_reduction/semantic.rs +619 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/text/token_reduction/simd_text.rs +148 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/text/utf8_validation.rs +193 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/types.rs +1713 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/utils/mod.rs +31 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/utils/pool.rs +503 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/utils/pool_sizing.rs +364 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/utils/quality.rs +968 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/utils/string_pool.rs +762 -0
- kreuzberg-4.0.4/crates/kreuzberg/src/utils/string_utils.rs +381 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/af_stopwords.json +53 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/ar_stopwords.json +482 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/bg_stopwords.json +261 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/bn_stopwords.json +400 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/br_stopwords.json +1205 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/ca_stopwords.json +280 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/cs_stopwords.json +425 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/da_stopwords.json +172 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/de_stopwords.json +622 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/el_stopwords.json +849 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/en_stopwords.json +1300 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/eo_stopwords.json +175 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/es_stopwords.json +734 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/et_stopwords.json +37 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/eu_stopwords.json +100 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/fa_stopwords.json +801 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/fi_stopwords.json +849 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/fr_stopwords.json +693 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/ga_stopwords.json +111 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/gl_stopwords.json +162 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/gu_stopwords.json +226 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/ha_stopwords.json +41 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/he_stopwords.json +196 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/hi_stopwords.json +227 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/hr_stopwords.json +181 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/hu_stopwords.json +791 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/hy_stopwords.json +47 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/id_stopwords.json +760 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/it_stopwords.json +634 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/ja_stopwords.json +136 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/kn_stopwords.json +84 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/ko_stopwords.json +681 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/ku_stopwords.json +64 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/la_stopwords.json +51 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/lt_stopwords.json +476 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/lv_stopwords.json +163 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/ml_stopwords.json +1 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/mr_stopwords.json +101 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/ms_stopwords.json +477 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/ne_stopwords.json +490 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/nl_stopwords.json +415 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/no_stopwords.json +223 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/pl_stopwords.json +331 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/pt_stopwords.json +562 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/ro_stopwords.json +436 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/ru_stopwords.json +561 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/si_stopwords.json +193 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/sk_stopwords.json +420 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/sl_stopwords.json +448 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/so_stopwords.json +32 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/st_stopwords.json +33 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/sv_stopwords.json +420 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/sw_stopwords.json +76 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/ta_stopwords.json +129 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/te_stopwords.json +54 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/th_stopwords.json +118 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/tl_stopwords.json +149 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/tr_stopwords.json +506 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/uk_stopwords.json +75 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/ur_stopwords.json +519 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/vi_stopwords.json +647 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/yo_stopwords.json +62 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/zh_stopwords.json +796 -0
- kreuzberg-4.0.4/crates/kreuzberg/stopwords/zu_stopwords.json +31 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/api_embed.rs +360 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/api_extract_multipart.rs +52 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/api_large_pdf_extraction.rs +471 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/api_large_pdf_extraction_diagnostics.rs +289 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/api_tests.rs +1472 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/archive_integration.rs +545 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/batch_orchestration.rs +587 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/batch_pooling_benchmark.rs +154 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/batch_processing.rs +328 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/bibtex_parity_test.rs +421 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/concurrency_stress.rs +541 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/config_features.rs +612 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/config_integration_test.rs +753 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/config_loading_tests.rs +416 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/core_integration.rs +519 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/csv_integration.rs +414 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/data/hierarchy_ground_truth.json +294 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/docbook_extractor_tests.rs +500 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/docx_metadata_extraction_test.rs +122 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/docx_vs_pandoc_comparison.rs +370 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/email_integration.rs +327 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/epub_native_extractor_tests.rs +275 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/error_handling.rs +402 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/fictionbook_extractor_tests.rs +228 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/format_integration.rs +165 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/helpers/mod.rs +202 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/html_table_test.rs +551 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/image_integration.rs +255 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/instrumentation_test.rs +139 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/jats_extractor_tests.rs +639 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/jupyter_extractor_tests.rs +704 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/keywords_integration.rs +479 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/keywords_quality.rs +509 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/latex_extractor_tests.rs +496 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/markdown_extractor_tests.rs +490 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/mime_detection.rs +429 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/ocr_configuration.rs +514 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/ocr_errors.rs +698 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/ocr_language_registry.rs +191 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/ocr_quality.rs +629 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/ocr_stress.rs +469 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/odt_extractor_tests.rs +674 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/opml_extractor_tests.rs +616 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/orgmode_extractor_tests.rs +822 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/page_markers.rs +297 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/pdf_hierarchy_detection.rs +301 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/pdf_hierarchy_quality.rs +589 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/pdf_integration.rs +45 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/pdf_ocr_triggering.rs +301 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/pdf_text_merging.rs +475 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/pdfium_linking.rs +340 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/pipeline_integration.rs +1446 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/plugin_ocr_backend_test.rs +776 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/plugin_postprocessor_test.rs +577 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/plugin_system.rs +927 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/plugin_validator_test.rs +783 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/registry_integration_tests.rs +587 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/rst_extractor_tests.rs +694 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/rtf_extractor_tests.rs +775 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/security_validation.rs +416 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/stopwords_integration_test.rs +888 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/test_fastembed.rs +631 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/typst_behavioral_tests.rs +1260 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/typst_extractor_tests.rs +648 -0
- kreuzberg-4.0.4/crates/kreuzberg/tests/xlsx_metadata_extraction_test.rs +87 -0
- kreuzberg-4.0.4/crates/kreuzberg-ffi/Cargo.toml +67 -0
- kreuzberg-4.0.4/crates/kreuzberg-ffi/README.md +851 -0
- kreuzberg-4.0.4/crates/kreuzberg-ffi/benches/result_view_benchmark.rs +227 -0
- kreuzberg-4.0.4/crates/kreuzberg-ffi/build.rs +168 -0
- kreuzberg-4.0.4/crates/kreuzberg-ffi/cbindgen.toml +37 -0
- kreuzberg-4.0.4/crates/kreuzberg-ffi/kreuzberg-ffi.pc.in +12 -0
- kreuzberg-4.0.4/crates/kreuzberg-ffi/kreuzberg.h +3012 -0
- kreuzberg-4.0.4/crates/kreuzberg-ffi/src/batch_streaming.rs +588 -0
- kreuzberg-4.0.4/crates/kreuzberg-ffi/src/config.rs +1341 -0
- kreuzberg-4.0.4/crates/kreuzberg-ffi/src/error.rs +901 -0
- kreuzberg-4.0.4/crates/kreuzberg-ffi/src/extraction.rs +555 -0
- kreuzberg-4.0.4/crates/kreuzberg-ffi/src/helpers.rs +879 -0
- kreuzberg-4.0.4/crates/kreuzberg-ffi/src/lib.rs +977 -0
- kreuzberg-4.0.4/crates/kreuzberg-ffi/src/memory.rs +493 -0
- kreuzberg-4.0.4/crates/kreuzberg-ffi/src/mime.rs +329 -0
- kreuzberg-4.0.4/crates/kreuzberg-ffi/src/panic_shield.rs +265 -0
- kreuzberg-4.0.4/crates/kreuzberg-ffi/src/plugins/document_extractor.rs +442 -0
- kreuzberg-4.0.4/crates/kreuzberg-ffi/src/plugins/mod.rs +14 -0
- kreuzberg-4.0.4/crates/kreuzberg-ffi/src/plugins/ocr_backend.rs +628 -0
- kreuzberg-4.0.4/crates/kreuzberg-ffi/src/plugins/post_processor.rs +438 -0
- kreuzberg-4.0.4/crates/kreuzberg-ffi/src/plugins/validator.rs +329 -0
- kreuzberg-4.0.4/crates/kreuzberg-ffi/src/result.rs +510 -0
- kreuzberg-4.0.4/crates/kreuzberg-ffi/src/result_pool.rs +639 -0
- kreuzberg-4.0.4/crates/kreuzberg-ffi/src/result_view.rs +773 -0
- kreuzberg-4.0.4/crates/kreuzberg-ffi/src/string_intern.rs +568 -0
- kreuzberg-4.0.4/crates/kreuzberg-ffi/src/types.rs +363 -0
- kreuzberg-4.0.4/crates/kreuzberg-ffi/src/util.rs +210 -0
- kreuzberg-4.0.4/crates/kreuzberg-ffi/src/validation.rs +848 -0
- kreuzberg-4.0.4/crates/kreuzberg-ffi/tests.disabled/README.md +48 -0
- kreuzberg-4.0.4/crates/kreuzberg-ffi/tests.disabled/config_loading_tests.rs +299 -0
- kreuzberg-4.0.4/crates/kreuzberg-ffi/tests.disabled/config_tests.rs +346 -0
- kreuzberg-4.0.4/crates/kreuzberg-ffi/tests.disabled/extractor_tests.rs +232 -0
- kreuzberg-4.0.4/crates/kreuzberg-ffi/tests.disabled/plugin_registration_tests.rs +470 -0
- kreuzberg-4.0.4/crates/kreuzberg-py/Cargo.toml +34 -0
- kreuzberg-4.0.4/crates/kreuzberg-py/README.md +360 -0
- kreuzberg-4.0.4/crates/kreuzberg-py/benches/async_patterns_benchmark.py +98 -0
- kreuzberg-4.0.4/crates/kreuzberg-py/build.rs +72 -0
- kreuzberg-4.0.4/crates/kreuzberg-py/src/config.rs +2513 -0
- kreuzberg-4.0.4/crates/kreuzberg-py/src/core.rs +536 -0
- kreuzberg-4.0.4/crates/kreuzberg-py/src/error.rs +375 -0
- kreuzberg-4.0.4/crates/kreuzberg-py/src/ffi.rs +188 -0
- kreuzberg-4.0.4/crates/kreuzberg-py/src/lib.rs +387 -0
- kreuzberg-4.0.4/crates/kreuzberg-py/src/plugins.rs +1923 -0
- kreuzberg-4.0.4/crates/kreuzberg-py/src/types.rs +590 -0
- kreuzberg-4.0.4/crates/kreuzberg-py/src/validation.rs +189 -0
- kreuzberg-4.0.4/crates/kreuzberg-tesseract/.commitlintrc.json +13 -0
- kreuzberg-4.0.4/crates/kreuzberg-tesseract/.crate-ignore +2 -0
- kreuzberg-4.0.4/crates/kreuzberg-tesseract/Cargo.lock +2933 -0
- kreuzberg-4.0.4/crates/kreuzberg-tesseract/Cargo.toml +57 -0
- kreuzberg-4.0.4/crates/kreuzberg-tesseract/LICENSE +22 -0
- kreuzberg-4.0.4/crates/kreuzberg-tesseract/README.md +399 -0
- kreuzberg-4.0.4/crates/kreuzberg-tesseract/build.rs +1127 -0
- kreuzberg-4.0.4/crates/kreuzberg-tesseract/patches/README.md +71 -0
- kreuzberg-4.0.4/crates/kreuzberg-tesseract/patches/tesseract.diff +199 -0
- kreuzberg-4.0.4/crates/kreuzberg-tesseract/src/api.rs +1371 -0
- kreuzberg-4.0.4/crates/kreuzberg-tesseract/src/choice_iterator.rs +77 -0
- kreuzberg-4.0.4/crates/kreuzberg-tesseract/src/enums.rs +297 -0
- kreuzberg-4.0.4/crates/kreuzberg-tesseract/src/error.rs +81 -0
- kreuzberg-4.0.4/crates/kreuzberg-tesseract/src/lib.rs +145 -0
- kreuzberg-4.0.4/crates/kreuzberg-tesseract/src/monitor.rs +57 -0
- kreuzberg-4.0.4/crates/kreuzberg-tesseract/src/mutable_iterator.rs +197 -0
- kreuzberg-4.0.4/crates/kreuzberg-tesseract/src/page_iterator.rs +253 -0
- kreuzberg-4.0.4/crates/kreuzberg-tesseract/src/result_iterator.rs +286 -0
- kreuzberg-4.0.4/crates/kreuzberg-tesseract/src/result_renderer.rs +183 -0
- kreuzberg-4.0.4/crates/kreuzberg-tesseract/tests/integration_test.rs +211 -0
- kreuzberg-4.0.4/kreuzberg/__init__.py +931 -0
- kreuzberg-4.0.4/kreuzberg/__main__.py +160 -0
- kreuzberg-4.0.4/kreuzberg/_internal_bindings.pyi +1383 -0
- kreuzberg-4.0.4/kreuzberg/_setup_lib_path.py +143 -0
- kreuzberg-4.0.4/kreuzberg/exceptions.py +254 -0
- kreuzberg-4.0.4/kreuzberg/ocr/__init__.py +25 -0
- kreuzberg-4.0.4/kreuzberg/ocr/easyocr.py +371 -0
- kreuzberg-4.0.4/kreuzberg/ocr/paddleocr.py +284 -0
- kreuzberg-4.0.4/kreuzberg/ocr/protocol.py +150 -0
- kreuzberg-4.0.4/kreuzberg/postprocessors/__init__.py +61 -0
- kreuzberg-4.0.4/kreuzberg/postprocessors/protocol.py +83 -0
- kreuzberg-4.0.4/kreuzberg/types.py +509 -0
- kreuzberg-4.0.4/pyproject.toml +109 -0
- kreuzberg-2.1.2/LICENSE +0 -7
- kreuzberg-2.1.2/PKG-INFO +0 -446
- kreuzberg-2.1.2/README.md +0 -411
- kreuzberg-2.1.2/kreuzberg/__init__.py +0 -28
- kreuzberg-2.1.2/kreuzberg/_constants.py +0 -8
- kreuzberg-2.1.2/kreuzberg/_html.py +0 -31
- kreuzberg-2.1.2/kreuzberg/_mime_types.py +0 -202
- kreuzberg-2.1.2/kreuzberg/_pandoc.py +0 -366
- kreuzberg-2.1.2/kreuzberg/_pdf.py +0 -190
- kreuzberg-2.1.2/kreuzberg/_pptx.py +0 -88
- kreuzberg-2.1.2/kreuzberg/_string.py +0 -41
- kreuzberg-2.1.2/kreuzberg/_sync.py +0 -74
- kreuzberg-2.1.2/kreuzberg/_tesseract.py +0 -231
- kreuzberg-2.1.2/kreuzberg/_tmp.py +0 -37
- kreuzberg-2.1.2/kreuzberg/_types.py +0 -71
- kreuzberg-2.1.2/kreuzberg/_xlsx.py +0 -88
- kreuzberg-2.1.2/kreuzberg/exceptions.py +0 -56
- kreuzberg-2.1.2/kreuzberg/extraction.py +0 -364
- kreuzberg-2.1.2/kreuzberg.egg-info/PKG-INFO +0 -446
- kreuzberg-2.1.2/kreuzberg.egg-info/SOURCES.txt +0 -24
- kreuzberg-2.1.2/kreuzberg.egg-info/dependency_links.txt +0 -1
- kreuzberg-2.1.2/kreuzberg.egg-info/requires.txt +0 -12
- kreuzberg-2.1.2/kreuzberg.egg-info/top_level.txt +0 -1
- kreuzberg-2.1.2/pyproject.toml +0 -137
- kreuzberg-2.1.2/setup.cfg +0 -4
- {kreuzberg-2.1.2 → kreuzberg-4.0.4}/kreuzberg/py.typed +0 -0
There are too many changes on this page to be displayed.
The amount of changes on this page would crash your brower.
You can still verify the content by downloading the package file manually.