kreuzberg 4.0.0.pre.rc.27 → 4.0.0.pre.rc.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +10 -10
- data/README.md +10 -0
- data/ext/kreuzberg_rb/native/Cargo.lock +541 -35
- data/ext/kreuzberg_rb/native/Cargo.toml +7 -1
- data/lib/kreuzberg/version.rb +1 -1
- data/spec/binding/batch_spec.rb +0 -2
- data/vendor/Cargo.toml +11 -7
- data/vendor/kreuzberg/Cargo.toml +35 -15
- data/vendor/kreuzberg/build.rs +7 -8
- data/vendor/kreuzberg/src/api/handlers.rs +126 -1
- data/vendor/kreuzberg/src/api/mod.rs +10 -3
- data/vendor/kreuzberg/src/api/server.rs +207 -199
- data/vendor/kreuzberg/src/api/types.rs +23 -0
- data/vendor/kreuzberg/src/core/config.rs +561 -0
- data/vendor/kreuzberg/src/core/config_validation.rs +295 -0
- data/vendor/kreuzberg/src/core/mod.rs +2 -0
- data/vendor/kreuzberg/src/core/server_config.rs +1220 -0
- data/vendor/kreuzberg/src/embeddings.rs +21 -49
- data/vendor/kreuzberg/src/extractors/docx.rs +1 -1
- data/vendor/kreuzberg/src/extractors/pdf.rs +29 -30
- data/vendor/kreuzberg/src/extractors/pptx.rs +30 -18
- data/vendor/kreuzberg/src/lib.rs +3 -0
- data/vendor/kreuzberg/src/mcp/server.rs +39 -40
- data/vendor/kreuzberg/src/ocr/processor.rs +1 -6
- data/vendor/kreuzberg/src/pdf/bindings.rs +20 -4
- data/vendor/kreuzberg/src/pdf/metadata.rs +7 -0
- data/vendor/kreuzberg/tests/api_embed.rs +360 -0
- data/vendor/kreuzberg/tests/api_large_pdf_extraction.rs +1 -2
- data/vendor/kreuzberg/tests/config_integration_test.rs +753 -0
- data/vendor/kreuzberg-ffi/Cargo.toml +1 -2
- data/vendor/kreuzberg-tesseract/Cargo.toml +15 -4
- data/vendor/kreuzberg-tesseract/build.rs +1 -0
- metadata +5 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: f243ab522441e38c6e3a68083ce4d75a271e123a16444bdb78f98112f6d62f6a
|
|
4
|
+
data.tar.gz: 890da0d377ca08b10fd2c511ac553fafed46bbb7ba08ac87408f5653a56fcb24
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 364bf3ad47e380b9cbf3a4e625b07552f0b82a6b8b23bc0778ec9a970b0c8d5d9920d5edd87175c7e6a4aae65a82f4253bef9991601152e2e015e12fc6a59d7c
|
|
7
|
+
data.tar.gz: 58bb34b94650ae86d9ca99a3365cf23b25e6a99311028f22520df994794e705e22601b5e2c94b7f636dc94ce15d3af9e46ddb20408de67f916575229e76ca7e5
|
data/Gemfile.lock
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
kreuzberg (4.0.0.pre.rc.
|
|
4
|
+
kreuzberg (4.0.0.pre.rc.29)
|
|
5
5
|
|
|
6
6
|
GEM
|
|
7
7
|
remote: https://rubygems.org/
|
|
8
8
|
specs:
|
|
9
|
-
activesupport (8.1.
|
|
9
|
+
activesupport (8.1.2)
|
|
10
10
|
base64
|
|
11
11
|
bigdecimal
|
|
12
12
|
concurrent-ruby (~> 1.0, >= 1.3.1)
|
|
@@ -76,7 +76,7 @@ GEM
|
|
|
76
76
|
ffi (~> 1.0)
|
|
77
77
|
rb_sys (0.9.119)
|
|
78
78
|
rake-compiler-dock (= 1.10.0)
|
|
79
|
-
rbs (3.10.
|
|
79
|
+
rbs (3.10.2)
|
|
80
80
|
logger
|
|
81
81
|
regexp_parser (2.11.3)
|
|
82
82
|
rspec (3.13.2)
|
|
@@ -110,7 +110,7 @@ GEM
|
|
|
110
110
|
lint_roller (~> 1.1)
|
|
111
111
|
rubocop (>= 1.75.0, < 2.0)
|
|
112
112
|
rubocop-ast (>= 1.47.1, < 2.0)
|
|
113
|
-
rubocop-rspec (3.
|
|
113
|
+
rubocop-rspec (3.9.0)
|
|
114
114
|
lint_roller (~> 1.1)
|
|
115
115
|
rubocop (~> 1.81)
|
|
116
116
|
ruby-progressbar (1.13.0)
|
|
@@ -132,7 +132,7 @@ GEM
|
|
|
132
132
|
strscan (>= 1.0.0)
|
|
133
133
|
terminal-table (>= 2, < 5)
|
|
134
134
|
uri (>= 0.12.0)
|
|
135
|
-
strscan (3.1.
|
|
135
|
+
strscan (3.1.7)
|
|
136
136
|
terminal-table (4.0.0)
|
|
137
137
|
unicode-display_width (>= 1.1.1, < 4)
|
|
138
138
|
tzinfo (2.0.6)
|
|
@@ -173,7 +173,7 @@ DEPENDENCIES
|
|
|
173
173
|
yard (~> 0.9)
|
|
174
174
|
|
|
175
175
|
CHECKSUMS
|
|
176
|
-
activesupport (8.1.
|
|
176
|
+
activesupport (8.1.2) sha256=88842578ccd0d40f658289b0e8c842acfe9af751afee2e0744a7873f50b6fdae
|
|
177
177
|
ast (2.4.3) sha256=954615157c1d6a382bc27d690d973195e79db7f55e9765ac7c481c60bdb4d383
|
|
178
178
|
base64 (0.3.0) sha256=27337aeabad6ffae05c265c450490628ef3ebd4b67be58257393227588f5a97b
|
|
179
179
|
bigdecimal (4.0.1) sha256=8b07d3d065a9f921c80ceaea7c9d4ae596697295b584c296fe599dd0ad01c4a7
|
|
@@ -198,7 +198,7 @@ CHECKSUMS
|
|
|
198
198
|
fileutils (1.8.0) sha256=8c6b1df54e2540bdb2f39258f08af78853aa70bad52b4d394bbc6424593c6e02
|
|
199
199
|
i18n (1.14.8) sha256=285778639134865c5e0f6269e0b818256017e8cde89993fdfcbfb64d088824a5
|
|
200
200
|
json (2.18.0) sha256=b10506aee4183f5cf49e0efc48073d7b75843ce3782c68dbeb763351c08fd505
|
|
201
|
-
kreuzberg (4.0.0.pre.rc.
|
|
201
|
+
kreuzberg (4.0.0.pre.rc.29)
|
|
202
202
|
language_server-protocol (3.17.0.5) sha256=fd1e39a51a28bf3eec959379985a72e296e9f9acfce46f6a79d31ca8760803cc
|
|
203
203
|
lint_roller (1.1.0) sha256=2c0c845b632a7d172cb849cc90c1bce937a28c5c8ccccb50dfd46a485003cc87
|
|
204
204
|
listen (3.9.0) sha256=db9e4424e0e5834480385197c139cb6b0ae0ef28cc13310cfd1ca78377d59c67
|
|
@@ -219,7 +219,7 @@ CHECKSUMS
|
|
|
219
219
|
rb-fsevent (0.11.2) sha256=43900b972e7301d6570f64b850a5aa67833ee7d87b458ee92805d56b7318aefe
|
|
220
220
|
rb-inotify (0.11.1) sha256=a0a700441239b0ff18eb65e3866236cd78613d6b9f78fea1f9ac47a85e47be6e
|
|
221
221
|
rb_sys (0.9.119) sha256=64393fa148e402e1b79b64496d2aabfc7df79da6b822b8bb48dc1141eaf40b4b
|
|
222
|
-
rbs (3.10.
|
|
222
|
+
rbs (3.10.2) sha256=bd8a5dc4c62f229f020146b61844a31f9c79e649449d212904a474eb79c846fc
|
|
223
223
|
regexp_parser (2.11.3) sha256=ca13f381a173b7a93450e53459075c9b76a10433caadcb2f1180f2c741fc55a4
|
|
224
224
|
rspec (3.13.2) sha256=206284a08ad798e61f86d7ca3e376718d52c0bc944626b2349266f239f820587
|
|
225
225
|
rspec-core (3.13.6) sha256=a8823c6411667b60a8bca135364351dda34cd55e44ff94c4be4633b37d828b2d
|
|
@@ -229,11 +229,11 @@ CHECKSUMS
|
|
|
229
229
|
rubocop (1.82.1) sha256=09f1a6a654a960eda767aebea33e47603080f8e9c9a3f019bf9b94c9cab5e273
|
|
230
230
|
rubocop-ast (1.49.0) sha256=49c3676d3123a0923d333e20c6c2dbaaae2d2287b475273fddee0c61da9f71fd
|
|
231
231
|
rubocop-performance (1.26.1) sha256=cd19b936ff196df85829d264b522fd4f98b6c89ad271fa52744a8c11b8f71834
|
|
232
|
-
rubocop-rspec (3.
|
|
232
|
+
rubocop-rspec (3.9.0) sha256=8fa70a3619408237d789aeecfb9beef40576acc855173e60939d63332fdb55e2
|
|
233
233
|
ruby-progressbar (1.13.0) sha256=80fc9c47a9b640d6834e0dc7b3c94c9df37f08cb072b7761e4a71e22cff29b33
|
|
234
234
|
securerandom (0.4.1) sha256=cc5193d414a4341b6e225f0cb4446aceca8e50d5e1888743fac16987638ea0b1
|
|
235
235
|
steep (1.10.0) sha256=1b295b55f9aaff1b8d3ee42453ee55bc2a1078fda0268f288edb2dc014f4d7d1
|
|
236
|
-
strscan (3.1.
|
|
236
|
+
strscan (3.1.7) sha256=5f76462b94a3ea50b44973225b7d75b2cb96d4e1bee9ef1319b99ca117b72c8c
|
|
237
237
|
terminal-table (4.0.0) sha256=f504793203f8251b2ea7c7068333053f0beeea26093ec9962e62ea79f94301d2
|
|
238
238
|
tzinfo (2.0.6) sha256=8daf828cc77bcf7d63b0e3bdb6caa47e2272dcfaf4fbfe46f8c3a9df087a829b
|
|
239
239
|
unicode-display_width (3.2.0) sha256=0cdd96b5681a5949cdbc2c55e7b420facae74c4aaf9a9815eee1087cb1853c42
|
data/README.md
CHANGED
|
@@ -82,6 +82,16 @@ gem 'kreuzberg'
|
|
|
82
82
|
- Optional: [ONNX Runtime](https://github.com/microsoft/onnxruntime/releases) version 1.22.x for embeddings support
|
|
83
83
|
- Optional: [Tesseract OCR](https://github.com/tesseract-ocr/tesseract) for OCR functionality
|
|
84
84
|
|
|
85
|
+
### Platform Support
|
|
86
|
+
|
|
87
|
+
Precompiled native extensions are available for the following platforms, providing instant installation without compilation:
|
|
88
|
+
|
|
89
|
+
- ✅ Linux x86_64
|
|
90
|
+
- ✅ Linux aarch64 (ARM64)
|
|
91
|
+
- ✅ macOS aarch64 (Apple Silicon)
|
|
92
|
+
|
|
93
|
+
On these platforms, no C compiler or Rust toolchain is required for installation.
|
|
94
|
+
|
|
85
95
|
## Quick Start
|
|
86
96
|
|
|
87
97
|
### Basic Extraction
|