kreuzberg 4.0.0.pre.rc.7 → 4.0.0.pre.rc.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +3 -5
- data/ext/kreuzberg_rb/native/.cargo/config.toml +1 -1
- data/ext/kreuzberg_rb/native/Cargo.lock +0 -6
- data/ext/kreuzberg_rb/native/Cargo.toml +1 -1
- data/lib/kreuzberg/version.rb +1 -1
- data/vendor/kreuzberg/Cargo.toml +9 -2
- data/vendor/kreuzberg/README.md +41 -0
- data/vendor/kreuzberg/build.rs +539 -133
- data/vendor/kreuzberg/src/api/mod.rs +0 -2
- data/vendor/kreuzberg/src/extraction/docx.rs +1 -1
- data/vendor/kreuzberg/src/extractors/pdf.rs +6 -3
- data/vendor/kreuzberg/src/mcp/mod.rs +3 -2
- data/vendor/kreuzberg/src/mcp/server.rs +106 -0
- data/vendor/kreuzberg/src/pdf/bundled.rs +328 -0
- data/vendor/kreuzberg/src/pdf/mod.rs +4 -0
- data/vendor/kreuzberg/tests/pdfium_linking.rs +374 -0
- data/vendor/rb-sys/bin/release.sh +8 -9
- data/vendor/rb-sys/src/lib.rs +1 -0
- metadata +4 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 916533f5e53e159638c5efcf50ca58ca16705ad2df86095b013fbfe7ed8f4cfb
|
|
4
|
+
data.tar.gz: dd0d04f68f00849b83e996b806af808400383400a29fdabd618108c63c6c9ffa
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: b8b2db6d787185dd3764ed749bb5079528d8306bd6ce3a8b1458ab65561c8002ce2741e2d21a3cc7cff391fe44b9634a9aa0afc6112b4844ed5490bd109d52dc
|
|
7
|
+
data.tar.gz: 45a0ea17841640ed4e6bcaf0b96476cbde875dd6436305395ba7f32483ae8cf54aef0aed2cc7351ad4c7bed82cce74764a27659e5f2f215cda39b0027fef9b48
|
data/Gemfile.lock
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
kreuzberg (4.0.0.pre.rc.
|
|
4
|
+
kreuzberg (4.0.0.pre.rc.8)
|
|
5
5
|
|
|
6
6
|
GEM
|
|
7
7
|
remote: https://rubygems.org/
|
|
@@ -24,7 +24,7 @@ GEM
|
|
|
24
24
|
bigdecimal (3.3.1)
|
|
25
25
|
byebug (12.0.0)
|
|
26
26
|
coderay (1.1.3)
|
|
27
|
-
concurrent-ruby (1.3.
|
|
27
|
+
concurrent-ruby (1.3.6)
|
|
28
28
|
connection_pool (3.0.2)
|
|
29
29
|
csv (3.3.5)
|
|
30
30
|
diff-lcs (1.6.2)
|
|
@@ -58,7 +58,7 @@ GEM
|
|
|
58
58
|
racc (1.8.1)
|
|
59
59
|
rainbow (3.1.1)
|
|
60
60
|
rake (13.3.1)
|
|
61
|
-
rake-compiler (1.3.
|
|
61
|
+
rake-compiler (1.3.1)
|
|
62
62
|
rake
|
|
63
63
|
rake-compiler-dock (1.10.0)
|
|
64
64
|
rb-fsevent (0.11.2)
|
|
@@ -136,8 +136,6 @@ GEM
|
|
|
136
136
|
PLATFORMS
|
|
137
137
|
arm64-darwin-23
|
|
138
138
|
arm64-darwin-24
|
|
139
|
-
x64-mingw-ucrt
|
|
140
|
-
x86_64-linux
|
|
141
139
|
|
|
142
140
|
DEPENDENCIES
|
|
143
141
|
bundler (~> 4.0)
|
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
[build]
|
|
2
|
-
rustflags = ["-A", "unpredictable-function-pointer-comparisons"]
|
|
2
|
+
rustflags = ["-A", "unpredictable-function-pointer-comparisons", "-A", "fn_ptr_eq"]
|
|
@@ -4388,8 +4388,6 @@ dependencies = [
|
|
|
4388
4388
|
[[package]]
|
|
4389
4389
|
name = "rb-sys"
|
|
4390
4390
|
version = "0.9.119"
|
|
4391
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
4392
|
-
checksum = "4519fc8de033923105f512c504a8f27714ef38648ccc30969362194c50b2ed08"
|
|
4393
4391
|
dependencies = [
|
|
4394
4392
|
"rb-sys-build",
|
|
4395
4393
|
]
|
|
@@ -6721,7 +6719,3 @@ checksum = "dc6fb7703e32e9a07fb3f757360338b3a567a5054f21b5f52a666752e333d58e"
|
|
|
6721
6719
|
dependencies = [
|
|
6722
6720
|
"zune-core 0.5.0",
|
|
6723
6721
|
]
|
|
6724
|
-
|
|
6725
|
-
[[patch.unused]]
|
|
6726
|
-
name = "rb-sys"
|
|
6727
|
-
version = "4.0.0-rc.6"
|
data/lib/kreuzberg/version.rb
CHANGED
data/vendor/kreuzberg/Cargo.toml
CHANGED
|
@@ -26,6 +26,9 @@ tokio-runtime = ["dep:tokio"]
|
|
|
26
26
|
|
|
27
27
|
# Format extractors
|
|
28
28
|
pdf = ["dep:pdfium-render", "dep:lopdf", "dep:image"]
|
|
29
|
+
pdf-static = ["pdf"] # Download + static link (no runtime dependency)
|
|
30
|
+
pdf-bundled = ["pdf"] # Embed library in binary (self-contained)
|
|
31
|
+
pdf-system = ["pdf"] # Use system-installed pdfium via pkg-config
|
|
29
32
|
excel = ["dep:calamine", "dep:polars", "tokio-runtime"]
|
|
30
33
|
office = [
|
|
31
34
|
"dep:roxmltree",
|
|
@@ -71,6 +74,7 @@ keywords = ["keywords-yake", "keywords-rake"]
|
|
|
71
74
|
# Server features
|
|
72
75
|
api = ["dep:axum", "dep:tower", "dep:tower-http", "tokio-runtime"]
|
|
73
76
|
mcp = ["dep:rmcp", "tokio-runtime"]
|
|
77
|
+
mcp-http = ["mcp", "api"] # NEW - enables HTTP transport
|
|
74
78
|
|
|
75
79
|
# Observability features
|
|
76
80
|
otel = ["dep:opentelemetry", "dep:opentelemetry_sdk", "dep:tracing-opentelemetry"]
|
|
@@ -98,6 +102,7 @@ cli = ["pdf", "excel", "office", "html", "ocr", "language-detection", "chunking"
|
|
|
98
102
|
|
|
99
103
|
[build-dependencies]
|
|
100
104
|
tracing = { workspace = true }
|
|
105
|
+
pkg-config = "0.3" # For system pdfium detection
|
|
101
106
|
|
|
102
107
|
[dependencies]
|
|
103
108
|
# Core dependencies (always included)
|
|
@@ -138,7 +143,7 @@ roxmltree = { version = "0.21.1", optional = true }
|
|
|
138
143
|
zip = { version = "6.0.0", optional = true }
|
|
139
144
|
mail-parser = { version = "0.11.1", optional = true }
|
|
140
145
|
msg_parser = { version = "0.1.1", optional = true }
|
|
141
|
-
html-to-markdown-rs = { version = "2.14.
|
|
146
|
+
html-to-markdown-rs = { version = "2.14.2", features = ["inline-images"], optional = true }
|
|
142
147
|
quick-xml = { version = "0.38.4", features = ["serialize"], optional = true }
|
|
143
148
|
tar = { version = "0.4.44", optional = true }
|
|
144
149
|
sevenz-rust = { version = "0.6.1", optional = true }
|
|
@@ -187,6 +192,8 @@ rmcp = { version = "0.11.0", features = [
|
|
|
187
192
|
"macros",
|
|
188
193
|
"base64",
|
|
189
194
|
"transport-io",
|
|
195
|
+
"transport-streamable-http-server",
|
|
196
|
+
"axum",
|
|
190
197
|
], optional = true }
|
|
191
198
|
# Observability features (optional)
|
|
192
199
|
opentelemetry = { version = "0.31", features = ["trace"], optional = true }
|
|
@@ -215,4 +222,4 @@ harness = false
|
|
|
215
222
|
pprof = { version = "0.15.0", features = ["flamegraph"], optional = true }
|
|
216
223
|
|
|
217
224
|
[target.'cfg(target_arch = "wasm32")'.dependencies]
|
|
218
|
-
wasm-bindgen-rayon = "1.
|
|
225
|
+
wasm-bindgen-rayon = "1.3"
|
data/vendor/kreuzberg/README.md
CHANGED
|
@@ -169,6 +169,47 @@ kreuzberg = { version = "4.0", features = ["server"] }
|
|
|
169
169
|
kreuzberg = { version = "4.0", features = ["cli"] }
|
|
170
170
|
```
|
|
171
171
|
|
|
172
|
+
## PDFium Linking Options
|
|
173
|
+
|
|
174
|
+
When using the `pdf` feature, you can choose how PDFium is linked to your binary. Four strategies are supported:
|
|
175
|
+
|
|
176
|
+
| Strategy | Feature | Use Case |
|
|
177
|
+
|----------|---------|----------|
|
|
178
|
+
| **Dynamic (default)** | `pdf` | Fast builds, runtime library dependency |
|
|
179
|
+
| **Static** | `pdf`, `pdf-static` | Embed PDFium in binary, larger binary size |
|
|
180
|
+
| **Bundled** | `pdf`, `pdf-bundled` | Self-contained per-binary copies |
|
|
181
|
+
| **System** | `pdf`, `pdf-system` | Use system-installed PDFium |
|
|
182
|
+
|
|
183
|
+
### Examples
|
|
184
|
+
|
|
185
|
+
**Default (dynamic linking)** - Fastest compilation, requires libpdfium at runtime:
|
|
186
|
+
```toml
|
|
187
|
+
[dependencies]
|
|
188
|
+
kreuzberg = { version = "4.0", features = ["pdf"] }
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
**Static linking** - Larger binary, no runtime dependency:
|
|
192
|
+
```toml
|
|
193
|
+
[dependencies]
|
|
194
|
+
kreuzberg = { version = "4.0", features = ["pdf", "pdf-static"] }
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
**Bundled** - Each binary extracts its own copy:
|
|
198
|
+
```toml
|
|
199
|
+
[dependencies]
|
|
200
|
+
kreuzberg = { version = "4.0", features = ["pdf", "pdf-bundled"] }
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
**System-installed** - Use pkg-config or manual paths:
|
|
204
|
+
```toml
|
|
205
|
+
[dependencies]
|
|
206
|
+
kreuzberg = { version = "4.0", features = ["pdf", "pdf-system"] }
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
For comprehensive guidance on linking strategies, environment variables, and troubleshooting, see the [PDFium Linking Guide](../../docs/guides/pdfium-linking.md).
|
|
210
|
+
|
|
211
|
+
**Note:** Language bindings (Python, TypeScript, Ruby, Java, Go) bundle PDFium automatically and do not expose linking options.
|
|
212
|
+
|
|
172
213
|
## Documentation
|
|
173
214
|
|
|
174
215
|
**[API Documentation](https://docs.rs/kreuzberg)** – Complete API reference with examples
|