kreuzberg 4.0.0.pre.rc.7 → 4.0.0.pre.rc.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 86ef74ea08ce1971136cf72aac3e1082a25bd60f10675f9d9e8cab7659076189
4
- data.tar.gz: 7635098e6aa3d1aeaa238e88f993aece6b3adf46497f6423fcc92fa0c75e478f
3
+ metadata.gz: 916533f5e53e159638c5efcf50ca58ca16705ad2df86095b013fbfe7ed8f4cfb
4
+ data.tar.gz: dd0d04f68f00849b83e996b806af808400383400a29fdabd618108c63c6c9ffa
5
5
  SHA512:
6
- metadata.gz: e29f2972b1384283dee4a11de61e3de8330a4a523ae754dea6995729a0cc03485efc22dc966846328f998d3fa0e91b973fa8e072169c177dcae432f3b1655118
7
- data.tar.gz: 3c6146bbca9b009ddb2b1dd107ef767ffd921747555a973390eebd36c4074e4b520380b0a4d4925ed8e2e3f6c4aef351505a1d4b4d0aa0ceabafef28e8c517b7
6
+ metadata.gz: b8b2db6d787185dd3764ed749bb5079528d8306bd6ce3a8b1458ab65561c8002ce2741e2d21a3cc7cff391fe44b9634a9aa0afc6112b4844ed5490bd109d52dc
7
+ data.tar.gz: 45a0ea17841640ed4e6bcaf0b96476cbde875dd6436305395ba7f32483ae8cf54aef0aed2cc7351ad4c7bed82cce74764a27659e5f2f215cda39b0027fef9b48
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- kreuzberg (4.0.0.pre.rc.7)
4
+ kreuzberg (4.0.0.pre.rc.8)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -24,7 +24,7 @@ GEM
24
24
  bigdecimal (3.3.1)
25
25
  byebug (12.0.0)
26
26
  coderay (1.1.3)
27
- concurrent-ruby (1.3.5)
27
+ concurrent-ruby (1.3.6)
28
28
  connection_pool (3.0.2)
29
29
  csv (3.3.5)
30
30
  diff-lcs (1.6.2)
@@ -58,7 +58,7 @@ GEM
58
58
  racc (1.8.1)
59
59
  rainbow (3.1.1)
60
60
  rake (13.3.1)
61
- rake-compiler (1.3.0)
61
+ rake-compiler (1.3.1)
62
62
  rake
63
63
  rake-compiler-dock (1.10.0)
64
64
  rb-fsevent (0.11.2)
@@ -136,8 +136,6 @@ GEM
136
136
  PLATFORMS
137
137
  arm64-darwin-23
138
138
  arm64-darwin-24
139
- x64-mingw-ucrt
140
- x86_64-linux
141
139
 
142
140
  DEPENDENCIES
143
141
  bundler (~> 4.0)
@@ -1,2 +1,2 @@
1
1
  [build]
2
- rustflags = ["-A", "unpredictable-function-pointer-comparisons"]
2
+ rustflags = ["-A", "unpredictable-function-pointer-comparisons", "-A", "fn_ptr_eq"]
@@ -4388,8 +4388,6 @@ dependencies = [
4388
4388
  [[package]]
4389
4389
  name = "rb-sys"
4390
4390
  version = "0.9.119"
4391
- source = "registry+https://github.com/rust-lang/crates.io-index"
4392
- checksum = "4519fc8de033923105f512c504a8f27714ef38648ccc30969362194c50b2ed08"
4393
4391
  dependencies = [
4394
4392
  "rb-sys-build",
4395
4393
  ]
@@ -6721,7 +6719,3 @@ checksum = "dc6fb7703e32e9a07fb3f757360338b3a567a5054f21b5f52a666752e333d58e"
6721
6719
  dependencies = [
6722
6720
  "zune-core 0.5.0",
6723
6721
  ]
6724
-
6725
- [[patch.unused]]
6726
- name = "rb-sys"
6727
- version = "4.0.0-rc.6"
@@ -7,7 +7,7 @@ rb-sys = { path = "../../../vendor/rb-sys" }
7
7
 
8
8
  [package]
9
9
  name = "kreuzberg-rb"
10
- version = "4.0.0-rc.7"
10
+ version = "4.0.0-rc.8"
11
11
  edition = "2024"
12
12
  rust-version = "1.91"
13
13
  authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Kreuzberg
4
- VERSION = '4.0.0-rc.7'
4
+ VERSION = '4.0.0-rc.8'
5
5
  end
@@ -26,6 +26,9 @@ tokio-runtime = ["dep:tokio"]
26
26
 
27
27
  # Format extractors
28
28
  pdf = ["dep:pdfium-render", "dep:lopdf", "dep:image"]
29
+ pdf-static = ["pdf"] # Download + static link (no runtime dependency)
30
+ pdf-bundled = ["pdf"] # Embed library in binary (self-contained)
31
+ pdf-system = ["pdf"] # Use system-installed pdfium via pkg-config
29
32
  excel = ["dep:calamine", "dep:polars", "tokio-runtime"]
30
33
  office = [
31
34
  "dep:roxmltree",
@@ -71,6 +74,7 @@ keywords = ["keywords-yake", "keywords-rake"]
71
74
  # Server features
72
75
  api = ["dep:axum", "dep:tower", "dep:tower-http", "tokio-runtime"]
73
76
  mcp = ["dep:rmcp", "tokio-runtime"]
77
+ mcp-http = ["mcp", "api"] # NEW - enables HTTP transport
74
78
 
75
79
  # Observability features
76
80
  otel = ["dep:opentelemetry", "dep:opentelemetry_sdk", "dep:tracing-opentelemetry"]
@@ -98,6 +102,7 @@ cli = ["pdf", "excel", "office", "html", "ocr", "language-detection", "chunking"
98
102
 
99
103
  [build-dependencies]
100
104
  tracing = { workspace = true }
105
+ pkg-config = "0.3" # For system pdfium detection
101
106
 
102
107
  [dependencies]
103
108
  # Core dependencies (always included)
@@ -138,7 +143,7 @@ roxmltree = { version = "0.21.1", optional = true }
138
143
  zip = { version = "6.0.0", optional = true }
139
144
  mail-parser = { version = "0.11.1", optional = true }
140
145
  msg_parser = { version = "0.1.1", optional = true }
141
- html-to-markdown-rs = { version = "2.14.1", features = ["inline-images"], optional = true }
146
+ html-to-markdown-rs = { version = "2.14.2", features = ["inline-images"], optional = true }
142
147
  quick-xml = { version = "0.38.4", features = ["serialize"], optional = true }
143
148
  tar = { version = "0.4.44", optional = true }
144
149
  sevenz-rust = { version = "0.6.1", optional = true }
@@ -187,6 +192,8 @@ rmcp = { version = "0.11.0", features = [
187
192
  "macros",
188
193
  "base64",
189
194
  "transport-io",
195
+ "transport-streamable-http-server",
196
+ "axum",
190
197
  ], optional = true }
191
198
  # Observability features (optional)
192
199
  opentelemetry = { version = "0.31", features = ["trace"], optional = true }
@@ -215,4 +222,4 @@ harness = false
215
222
  pprof = { version = "0.15.0", features = ["flamegraph"], optional = true }
216
223
 
217
224
  [target.'cfg(target_arch = "wasm32")'.dependencies]
218
- wasm-bindgen-rayon = "1.2"
225
+ wasm-bindgen-rayon = "1.3"
@@ -169,6 +169,47 @@ kreuzberg = { version = "4.0", features = ["server"] }
169
169
  kreuzberg = { version = "4.0", features = ["cli"] }
170
170
  ```
171
171
 
172
+ ## PDFium Linking Options
173
+
174
+ When using the `pdf` feature, you can choose how PDFium is linked to your binary. Four strategies are supported:
175
+
176
+ | Strategy | Feature | Use Case |
177
+ |----------|---------|----------|
178
+ | **Dynamic (default)** | `pdf` | Fast builds, runtime library dependency |
179
+ | **Static** | `pdf`, `pdf-static` | Embed PDFium in binary, larger binary size |
180
+ | **Bundled** | `pdf`, `pdf-bundled` | Self-contained per-binary copies |
181
+ | **System** | `pdf`, `pdf-system` | Use system-installed PDFium |
182
+
183
+ ### Examples
184
+
185
+ **Default (dynamic linking)** - Fastest compilation, requires libpdfium at runtime:
186
+ ```toml
187
+ [dependencies]
188
+ kreuzberg = { version = "4.0", features = ["pdf"] }
189
+ ```
190
+
191
+ **Static linking** - Larger binary, no runtime dependency:
192
+ ```toml
193
+ [dependencies]
194
+ kreuzberg = { version = "4.0", features = ["pdf", "pdf-static"] }
195
+ ```
196
+
197
+ **Bundled** - Each binary extracts its own copy:
198
+ ```toml
199
+ [dependencies]
200
+ kreuzberg = { version = "4.0", features = ["pdf", "pdf-bundled"] }
201
+ ```
202
+
203
+ **System-installed** - Use pkg-config or manual paths:
204
+ ```toml
205
+ [dependencies]
206
+ kreuzberg = { version = "4.0", features = ["pdf", "pdf-system"] }
207
+ ```
208
+
209
+ For comprehensive guidance on linking strategies, environment variables, and troubleshooting, see the [PDFium Linking Guide](../../docs/guides/pdfium-linking.md).
210
+
211
+ **Note:** Language bindings (Python, TypeScript, Ruby, Java, Go) bundle PDFium automatically and do not expose linking options.
212
+
172
213
  ## Documentation
173
214
 
174
215
  **[API Documentation](https://docs.rs/kreuzberg)** – Complete API reference with examples