kreuzberg 4.0.0.pre.rc.7 → 4.0.0.pre.rc.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +13 -12
  3. data/README.md +22 -0
  4. data/ext/kreuzberg_rb/native/.cargo/config.toml +1 -1
  5. data/ext/kreuzberg_rb/native/Cargo.lock +397 -183
  6. data/ext/kreuzberg_rb/native/Cargo.toml +3 -3
  7. data/ext/kreuzberg_rb/native/src/lib.rs +36 -13
  8. data/kreuzberg.gemspec +34 -2
  9. data/lib/kreuzberg/cache_api.rb +35 -0
  10. data/lib/kreuzberg/error_context.rb +49 -1
  11. data/lib/kreuzberg/extraction_api.rb +255 -0
  12. data/lib/kreuzberg/version.rb +1 -1
  13. data/lib/kreuzberg.rb +6 -0
  14. data/lib/libpdfium.dylib +0 -0
  15. data/sig/kreuzberg.rbs +9 -0
  16. data/vendor/Cargo.toml +44 -0
  17. data/vendor/kreuzberg/Cargo.toml +65 -35
  18. data/vendor/kreuzberg/README.md +50 -0
  19. data/vendor/kreuzberg/build.rs +548 -190
  20. data/vendor/kreuzberg/src/api/mod.rs +0 -2
  21. data/vendor/kreuzberg/src/core/pipeline.rs +13 -0
  22. data/vendor/kreuzberg/src/embeddings.rs +71 -3
  23. data/vendor/kreuzberg/src/error.rs +1 -1
  24. data/vendor/kreuzberg/src/extraction/docx.rs +1 -1
  25. data/vendor/kreuzberg/src/extraction/html.rs +37 -5
  26. data/vendor/kreuzberg/src/extractors/pdf.rs +99 -47
  27. data/vendor/kreuzberg/src/mcp/mod.rs +3 -2
  28. data/vendor/kreuzberg/src/mcp/server.rs +106 -0
  29. data/vendor/kreuzberg/src/pdf/bindings.rs +44 -0
  30. data/vendor/kreuzberg/src/pdf/bundled.rs +346 -0
  31. data/vendor/kreuzberg/src/pdf/metadata.rs +2 -2
  32. data/vendor/kreuzberg/src/pdf/mod.rs +6 -0
  33. data/vendor/kreuzberg/src/pdf/rendering.rs +2 -2
  34. data/vendor/kreuzberg/src/pdf/table.rs +3 -0
  35. data/vendor/kreuzberg/src/pdf/text.rs +2 -2
  36. data/vendor/kreuzberg/src/text/quality_processor.rs +1 -1
  37. data/vendor/kreuzberg/tests/concurrency_stress.rs +1 -1
  38. data/vendor/kreuzberg/tests/format_integration.rs +4 -1
  39. data/vendor/kreuzberg/tests/pdfium_linking.rs +374 -0
  40. data/vendor/kreuzberg-ffi/Cargo.toml +63 -0
  41. data/vendor/kreuzberg-ffi/README.md +851 -0
  42. data/vendor/kreuzberg-ffi/build.rs +176 -0
  43. data/vendor/kreuzberg-ffi/cbindgen.toml +27 -0
  44. data/vendor/kreuzberg-ffi/kreuzberg-ffi-install.pc +12 -0
  45. data/vendor/kreuzberg-ffi/kreuzberg-ffi.pc.in +12 -0
  46. data/vendor/kreuzberg-ffi/kreuzberg.h +1087 -0
  47. data/vendor/kreuzberg-ffi/src/lib.rs +3616 -0
  48. data/vendor/kreuzberg-ffi/src/panic_shield.rs +247 -0
  49. data/vendor/kreuzberg-ffi/tests.disabled/README.md +48 -0
  50. data/vendor/kreuzberg-ffi/tests.disabled/config_loading_tests.rs +299 -0
  51. data/vendor/kreuzberg-ffi/tests.disabled/config_tests.rs +346 -0
  52. data/vendor/kreuzberg-ffi/tests.disabled/extractor_tests.rs +232 -0
  53. data/vendor/kreuzberg-ffi/tests.disabled/plugin_registration_tests.rs +470 -0
  54. data/vendor/kreuzberg-tesseract/.commitlintrc.json +13 -0
  55. data/vendor/kreuzberg-tesseract/.crate-ignore +2 -0
  56. data/vendor/kreuzberg-tesseract/Cargo.lock +2933 -0
  57. data/vendor/kreuzberg-tesseract/Cargo.toml +48 -0
  58. data/vendor/kreuzberg-tesseract/LICENSE +22 -0
  59. data/vendor/kreuzberg-tesseract/README.md +399 -0
  60. data/vendor/kreuzberg-tesseract/build.rs +1354 -0
  61. data/vendor/kreuzberg-tesseract/patches/README.md +71 -0
  62. data/vendor/kreuzberg-tesseract/patches/tesseract.diff +199 -0
  63. data/vendor/kreuzberg-tesseract/src/api.rs +1371 -0
  64. data/vendor/kreuzberg-tesseract/src/choice_iterator.rs +77 -0
  65. data/vendor/kreuzberg-tesseract/src/enums.rs +297 -0
  66. data/vendor/kreuzberg-tesseract/src/error.rs +81 -0
  67. data/vendor/kreuzberg-tesseract/src/lib.rs +145 -0
  68. data/vendor/kreuzberg-tesseract/src/monitor.rs +57 -0
  69. data/vendor/kreuzberg-tesseract/src/mutable_iterator.rs +197 -0
  70. data/vendor/kreuzberg-tesseract/src/page_iterator.rs +253 -0
  71. data/vendor/kreuzberg-tesseract/src/result_iterator.rs +286 -0
  72. data/vendor/kreuzberg-tesseract/src/result_renderer.rs +183 -0
  73. data/vendor/kreuzberg-tesseract/tests/integration_test.rs +211 -0
  74. data/vendor/rb-sys/src/lib.rs +1 -0
  75. metadata +41 -3
  76. data/vendor/rb-sys/bin/release.sh +0 -22
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kreuzberg
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.0.0.pre.rc.7
4
+ version: 4.0.0.pre.rc.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - Na'aman Hirschfeld
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-12-13 00:00:00.000000000 Z
11
+ date: 2025-12-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -214,6 +214,7 @@ files:
214
214
  - lib/kreuzberg/setup_lib_path.rb
215
215
  - lib/kreuzberg/validator_protocol.rb
216
216
  - lib/kreuzberg/version.rb
217
+ - lib/libpdfium.dylib
217
218
  - sig/kreuzberg.rbs
218
219
  - sig/kreuzberg/internal.rbs
219
220
  - spec/binding/cache_spec.rb
@@ -231,6 +232,41 @@ files:
231
232
  - spec/fixtures/invalid_config.toml
232
233
  - spec/smoke/package_spec.rb
233
234
  - spec/spec_helper.rb
235
+ - vendor/Cargo.toml
236
+ - vendor/kreuzberg-ffi/Cargo.toml
237
+ - vendor/kreuzberg-ffi/README.md
238
+ - vendor/kreuzberg-ffi/build.rs
239
+ - vendor/kreuzberg-ffi/cbindgen.toml
240
+ - vendor/kreuzberg-ffi/kreuzberg-ffi-install.pc
241
+ - vendor/kreuzberg-ffi/kreuzberg-ffi.pc.in
242
+ - vendor/kreuzberg-ffi/kreuzberg.h
243
+ - vendor/kreuzberg-ffi/src/lib.rs
244
+ - vendor/kreuzberg-ffi/src/panic_shield.rs
245
+ - vendor/kreuzberg-ffi/tests.disabled/README.md
246
+ - vendor/kreuzberg-ffi/tests.disabled/config_loading_tests.rs
247
+ - vendor/kreuzberg-ffi/tests.disabled/config_tests.rs
248
+ - vendor/kreuzberg-ffi/tests.disabled/extractor_tests.rs
249
+ - vendor/kreuzberg-ffi/tests.disabled/plugin_registration_tests.rs
250
+ - vendor/kreuzberg-tesseract/.commitlintrc.json
251
+ - vendor/kreuzberg-tesseract/.crate-ignore
252
+ - vendor/kreuzberg-tesseract/Cargo.lock
253
+ - vendor/kreuzberg-tesseract/Cargo.toml
254
+ - vendor/kreuzberg-tesseract/LICENSE
255
+ - vendor/kreuzberg-tesseract/README.md
256
+ - vendor/kreuzberg-tesseract/build.rs
257
+ - vendor/kreuzberg-tesseract/patches/README.md
258
+ - vendor/kreuzberg-tesseract/patches/tesseract.diff
259
+ - vendor/kreuzberg-tesseract/src/api.rs
260
+ - vendor/kreuzberg-tesseract/src/choice_iterator.rs
261
+ - vendor/kreuzberg-tesseract/src/enums.rs
262
+ - vendor/kreuzberg-tesseract/src/error.rs
263
+ - vendor/kreuzberg-tesseract/src/lib.rs
264
+ - vendor/kreuzberg-tesseract/src/monitor.rs
265
+ - vendor/kreuzberg-tesseract/src/mutable_iterator.rs
266
+ - vendor/kreuzberg-tesseract/src/page_iterator.rs
267
+ - vendor/kreuzberg-tesseract/src/result_iterator.rs
268
+ - vendor/kreuzberg-tesseract/src/result_renderer.rs
269
+ - vendor/kreuzberg-tesseract/tests/integration_test.rs
234
270
  - vendor/kreuzberg/Cargo.toml
235
271
  - vendor/kreuzberg/README.md
236
272
  - vendor/kreuzberg/benches/otel_overhead.rs
@@ -326,6 +362,8 @@ files:
326
362
  - vendor/kreuzberg/src/ocr/utils.rs
327
363
  - vendor/kreuzberg/src/ocr/validation.rs
328
364
  - vendor/kreuzberg/src/panic_context.rs
365
+ - vendor/kreuzberg/src/pdf/bindings.rs
366
+ - vendor/kreuzberg/src/pdf/bundled.rs
329
367
  - vendor/kreuzberg/src/pdf/error.rs
330
368
  - vendor/kreuzberg/src/pdf/images.rs
331
369
  - vendor/kreuzberg/src/pdf/metadata.rs
@@ -458,6 +496,7 @@ files:
458
496
  - vendor/kreuzberg/tests/opml_extractor_tests.rs
459
497
  - vendor/kreuzberg/tests/orgmode_extractor_tests.rs
460
498
  - vendor/kreuzberg/tests/pdf_integration.rs
499
+ - vendor/kreuzberg/tests/pdfium_linking.rs
461
500
  - vendor/kreuzberg/tests/pipeline_integration.rs
462
501
  - vendor/kreuzberg/tests/plugin_ocr_backend_test.rs
463
502
  - vendor/kreuzberg/tests/plugin_postprocessor_test.rs
@@ -478,7 +517,6 @@ files:
478
517
  - vendor/rb-sys/Cargo.toml.orig
479
518
  - vendor/rb-sys/LICENSE-APACHE
480
519
  - vendor/rb-sys/LICENSE-MIT
481
- - vendor/rb-sys/bin/release.sh
482
520
  - vendor/rb-sys/build/features.rs
483
521
  - vendor/rb-sys/build/main.rs
484
522
  - vendor/rb-sys/build/stable_api_config.rs
@@ -1,22 +0,0 @@
1
- #!/bin/bash
2
-
3
- set -euo pipefail
4
- IFS=$'\n\t'
5
-
6
- if ! git diff-index --quiet HEAD --; then
7
- echo "There are git changes, cannot release"
8
- exit 1
9
- fi
10
-
11
-
12
- read -rp "What version would you like to release? (current $(grep version Cargo.toml)): " version
13
- read -rp "Are you sure you want to bump to v$version? <y/N> " prompt
14
-
15
- if [[ $prompt =~ [yY](es)* ]]; then
16
- sed -i '' "s/^version = .*/version = \"$version\"/g" Cargo.toml
17
- cargo build
18
- git add Cargo.lock Cargo.toml ../../Cargo.lock
19
- git commit -am "Bump to v$version"
20
- git tag "v$version"
21
- git push --atomic origin main "v$version"
22
- fi