kreuzberg 4.0.0.pre.rc.7 → 4.0.0.pre.rc.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +13 -12
- data/README.md +22 -0
- data/ext/kreuzberg_rb/native/.cargo/config.toml +1 -1
- data/ext/kreuzberg_rb/native/Cargo.lock +397 -183
- data/ext/kreuzberg_rb/native/Cargo.toml +3 -3
- data/ext/kreuzberg_rb/native/src/lib.rs +36 -13
- data/kreuzberg.gemspec +34 -2
- data/lib/kreuzberg/cache_api.rb +35 -0
- data/lib/kreuzberg/error_context.rb +49 -1
- data/lib/kreuzberg/extraction_api.rb +255 -0
- data/lib/kreuzberg/version.rb +1 -1
- data/lib/kreuzberg.rb +6 -0
- data/lib/libpdfium.dylib +0 -0
- data/sig/kreuzberg.rbs +9 -0
- data/vendor/Cargo.toml +44 -0
- data/vendor/kreuzberg/Cargo.toml +65 -35
- data/vendor/kreuzberg/README.md +50 -0
- data/vendor/kreuzberg/build.rs +548 -190
- data/vendor/kreuzberg/src/api/mod.rs +0 -2
- data/vendor/kreuzberg/src/core/pipeline.rs +13 -0
- data/vendor/kreuzberg/src/embeddings.rs +71 -3
- data/vendor/kreuzberg/src/error.rs +1 -1
- data/vendor/kreuzberg/src/extraction/docx.rs +1 -1
- data/vendor/kreuzberg/src/extraction/html.rs +37 -5
- data/vendor/kreuzberg/src/extractors/pdf.rs +99 -47
- data/vendor/kreuzberg/src/mcp/mod.rs +3 -2
- data/vendor/kreuzberg/src/mcp/server.rs +106 -0
- data/vendor/kreuzberg/src/pdf/bindings.rs +44 -0
- data/vendor/kreuzberg/src/pdf/bundled.rs +346 -0
- data/vendor/kreuzberg/src/pdf/metadata.rs +2 -2
- data/vendor/kreuzberg/src/pdf/mod.rs +6 -0
- data/vendor/kreuzberg/src/pdf/rendering.rs +2 -2
- data/vendor/kreuzberg/src/pdf/table.rs +3 -0
- data/vendor/kreuzberg/src/pdf/text.rs +2 -2
- data/vendor/kreuzberg/src/text/quality_processor.rs +1 -1
- data/vendor/kreuzberg/tests/concurrency_stress.rs +1 -1
- data/vendor/kreuzberg/tests/format_integration.rs +4 -1
- data/vendor/kreuzberg/tests/pdfium_linking.rs +374 -0
- data/vendor/kreuzberg-ffi/Cargo.toml +63 -0
- data/vendor/kreuzberg-ffi/README.md +851 -0
- data/vendor/kreuzberg-ffi/build.rs +176 -0
- data/vendor/kreuzberg-ffi/cbindgen.toml +27 -0
- data/vendor/kreuzberg-ffi/kreuzberg-ffi-install.pc +12 -0
- data/vendor/kreuzberg-ffi/kreuzberg-ffi.pc.in +12 -0
- data/vendor/kreuzberg-ffi/kreuzberg.h +1087 -0
- data/vendor/kreuzberg-ffi/src/lib.rs +3616 -0
- data/vendor/kreuzberg-ffi/src/panic_shield.rs +247 -0
- data/vendor/kreuzberg-ffi/tests.disabled/README.md +48 -0
- data/vendor/kreuzberg-ffi/tests.disabled/config_loading_tests.rs +299 -0
- data/vendor/kreuzberg-ffi/tests.disabled/config_tests.rs +346 -0
- data/vendor/kreuzberg-ffi/tests.disabled/extractor_tests.rs +232 -0
- data/vendor/kreuzberg-ffi/tests.disabled/plugin_registration_tests.rs +470 -0
- data/vendor/kreuzberg-tesseract/.commitlintrc.json +13 -0
- data/vendor/kreuzberg-tesseract/.crate-ignore +2 -0
- data/vendor/kreuzberg-tesseract/Cargo.lock +2933 -0
- data/vendor/kreuzberg-tesseract/Cargo.toml +48 -0
- data/vendor/kreuzberg-tesseract/LICENSE +22 -0
- data/vendor/kreuzberg-tesseract/README.md +399 -0
- data/vendor/kreuzberg-tesseract/build.rs +1354 -0
- data/vendor/kreuzberg-tesseract/patches/README.md +71 -0
- data/vendor/kreuzberg-tesseract/patches/tesseract.diff +199 -0
- data/vendor/kreuzberg-tesseract/src/api.rs +1371 -0
- data/vendor/kreuzberg-tesseract/src/choice_iterator.rs +77 -0
- data/vendor/kreuzberg-tesseract/src/enums.rs +297 -0
- data/vendor/kreuzberg-tesseract/src/error.rs +81 -0
- data/vendor/kreuzberg-tesseract/src/lib.rs +145 -0
- data/vendor/kreuzberg-tesseract/src/monitor.rs +57 -0
- data/vendor/kreuzberg-tesseract/src/mutable_iterator.rs +197 -0
- data/vendor/kreuzberg-tesseract/src/page_iterator.rs +253 -0
- data/vendor/kreuzberg-tesseract/src/result_iterator.rs +286 -0
- data/vendor/kreuzberg-tesseract/src/result_renderer.rs +183 -0
- data/vendor/kreuzberg-tesseract/tests/integration_test.rs +211 -0
- data/vendor/rb-sys/src/lib.rs +1 -0
- metadata +41 -3
- data/vendor/rb-sys/bin/release.sh +0 -22
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: kreuzberg
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 4.0.0.pre.rc.
|
|
4
|
+
version: 4.0.0.pre.rc.11
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Na'aman Hirschfeld
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2025-12-
|
|
11
|
+
date: 2025-12-19 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|
|
@@ -214,6 +214,7 @@ files:
|
|
|
214
214
|
- lib/kreuzberg/setup_lib_path.rb
|
|
215
215
|
- lib/kreuzberg/validator_protocol.rb
|
|
216
216
|
- lib/kreuzberg/version.rb
|
|
217
|
+
- lib/libpdfium.dylib
|
|
217
218
|
- sig/kreuzberg.rbs
|
|
218
219
|
- sig/kreuzberg/internal.rbs
|
|
219
220
|
- spec/binding/cache_spec.rb
|
|
@@ -231,6 +232,41 @@ files:
|
|
|
231
232
|
- spec/fixtures/invalid_config.toml
|
|
232
233
|
- spec/smoke/package_spec.rb
|
|
233
234
|
- spec/spec_helper.rb
|
|
235
|
+
- vendor/Cargo.toml
|
|
236
|
+
- vendor/kreuzberg-ffi/Cargo.toml
|
|
237
|
+
- vendor/kreuzberg-ffi/README.md
|
|
238
|
+
- vendor/kreuzberg-ffi/build.rs
|
|
239
|
+
- vendor/kreuzberg-ffi/cbindgen.toml
|
|
240
|
+
- vendor/kreuzberg-ffi/kreuzberg-ffi-install.pc
|
|
241
|
+
- vendor/kreuzberg-ffi/kreuzberg-ffi.pc.in
|
|
242
|
+
- vendor/kreuzberg-ffi/kreuzberg.h
|
|
243
|
+
- vendor/kreuzberg-ffi/src/lib.rs
|
|
244
|
+
- vendor/kreuzberg-ffi/src/panic_shield.rs
|
|
245
|
+
- vendor/kreuzberg-ffi/tests.disabled/README.md
|
|
246
|
+
- vendor/kreuzberg-ffi/tests.disabled/config_loading_tests.rs
|
|
247
|
+
- vendor/kreuzberg-ffi/tests.disabled/config_tests.rs
|
|
248
|
+
- vendor/kreuzberg-ffi/tests.disabled/extractor_tests.rs
|
|
249
|
+
- vendor/kreuzberg-ffi/tests.disabled/plugin_registration_tests.rs
|
|
250
|
+
- vendor/kreuzberg-tesseract/.commitlintrc.json
|
|
251
|
+
- vendor/kreuzberg-tesseract/.crate-ignore
|
|
252
|
+
- vendor/kreuzberg-tesseract/Cargo.lock
|
|
253
|
+
- vendor/kreuzberg-tesseract/Cargo.toml
|
|
254
|
+
- vendor/kreuzberg-tesseract/LICENSE
|
|
255
|
+
- vendor/kreuzberg-tesseract/README.md
|
|
256
|
+
- vendor/kreuzberg-tesseract/build.rs
|
|
257
|
+
- vendor/kreuzberg-tesseract/patches/README.md
|
|
258
|
+
- vendor/kreuzberg-tesseract/patches/tesseract.diff
|
|
259
|
+
- vendor/kreuzberg-tesseract/src/api.rs
|
|
260
|
+
- vendor/kreuzberg-tesseract/src/choice_iterator.rs
|
|
261
|
+
- vendor/kreuzberg-tesseract/src/enums.rs
|
|
262
|
+
- vendor/kreuzberg-tesseract/src/error.rs
|
|
263
|
+
- vendor/kreuzberg-tesseract/src/lib.rs
|
|
264
|
+
- vendor/kreuzberg-tesseract/src/monitor.rs
|
|
265
|
+
- vendor/kreuzberg-tesseract/src/mutable_iterator.rs
|
|
266
|
+
- vendor/kreuzberg-tesseract/src/page_iterator.rs
|
|
267
|
+
- vendor/kreuzberg-tesseract/src/result_iterator.rs
|
|
268
|
+
- vendor/kreuzberg-tesseract/src/result_renderer.rs
|
|
269
|
+
- vendor/kreuzberg-tesseract/tests/integration_test.rs
|
|
234
270
|
- vendor/kreuzberg/Cargo.toml
|
|
235
271
|
- vendor/kreuzberg/README.md
|
|
236
272
|
- vendor/kreuzberg/benches/otel_overhead.rs
|
|
@@ -326,6 +362,8 @@ files:
|
|
|
326
362
|
- vendor/kreuzberg/src/ocr/utils.rs
|
|
327
363
|
- vendor/kreuzberg/src/ocr/validation.rs
|
|
328
364
|
- vendor/kreuzberg/src/panic_context.rs
|
|
365
|
+
- vendor/kreuzberg/src/pdf/bindings.rs
|
|
366
|
+
- vendor/kreuzberg/src/pdf/bundled.rs
|
|
329
367
|
- vendor/kreuzberg/src/pdf/error.rs
|
|
330
368
|
- vendor/kreuzberg/src/pdf/images.rs
|
|
331
369
|
- vendor/kreuzberg/src/pdf/metadata.rs
|
|
@@ -458,6 +496,7 @@ files:
|
|
|
458
496
|
- vendor/kreuzberg/tests/opml_extractor_tests.rs
|
|
459
497
|
- vendor/kreuzberg/tests/orgmode_extractor_tests.rs
|
|
460
498
|
- vendor/kreuzberg/tests/pdf_integration.rs
|
|
499
|
+
- vendor/kreuzberg/tests/pdfium_linking.rs
|
|
461
500
|
- vendor/kreuzberg/tests/pipeline_integration.rs
|
|
462
501
|
- vendor/kreuzberg/tests/plugin_ocr_backend_test.rs
|
|
463
502
|
- vendor/kreuzberg/tests/plugin_postprocessor_test.rs
|
|
@@ -478,7 +517,6 @@ files:
|
|
|
478
517
|
- vendor/rb-sys/Cargo.toml.orig
|
|
479
518
|
- vendor/rb-sys/LICENSE-APACHE
|
|
480
519
|
- vendor/rb-sys/LICENSE-MIT
|
|
481
|
-
- vendor/rb-sys/bin/release.sh
|
|
482
520
|
- vendor/rb-sys/build/features.rs
|
|
483
521
|
- vendor/rb-sys/build/main.rs
|
|
484
522
|
- vendor/rb-sys/build/stable_api_config.rs
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
#!/bin/bash
|
|
2
|
-
|
|
3
|
-
set -euo pipefail
|
|
4
|
-
IFS=$'\n\t'
|
|
5
|
-
|
|
6
|
-
if ! git diff-index --quiet HEAD --; then
|
|
7
|
-
echo "There are git changes, cannot release"
|
|
8
|
-
exit 1
|
|
9
|
-
fi
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
read -rp "What version would you like to release? (current $(grep version Cargo.toml)): " version
|
|
13
|
-
read -rp "Are you sure you want to bump to v$version? <y/N> " prompt
|
|
14
|
-
|
|
15
|
-
if [[ $prompt =~ [yY](es)* ]]; then
|
|
16
|
-
sed -i '' "s/^version = .*/version = \"$version\"/g" Cargo.toml
|
|
17
|
-
cargo build
|
|
18
|
-
git add Cargo.lock Cargo.toml ../../Cargo.lock
|
|
19
|
-
git commit -am "Bump to v$version"
|
|
20
|
-
git tag "v$version"
|
|
21
|
-
git push --atomic origin main "v$version"
|
|
22
|
-
fi
|