liter_llm 1.0.0.pre.rc.9 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +11 -0
- data/ext/liter_llm_rb/native/Cargo.toml +1 -1
- data/vendor/Cargo.toml +10 -4
- data/vendor/liter-llm/Cargo.toml +3 -3
- data/vendor/liter-llm/README.md +11 -0
- data/vendor/liter-llm/src/auth/azure_ad.rs +3 -1
- data/vendor/liter-llm/src/auth/bedrock_sts.rs +3 -1
- data/vendor/liter-llm/src/auth/vertex_oauth.rs +3 -1
- data/vendor/liter-llm/tests/cache_integration.rs +202 -0
- data/vendor/liter-llm/tests/concurrency.rs +379 -0
- data/vendor/liter-llm/tests/middleware_integration.rs +981 -0
- data/vendor/liter-llm/tests/operations_integration.rs +641 -0
- data/vendor/liter-llm/tests/routing_integration.rs +463 -0
- data/vendor/liter-llm-ffi/Cargo.toml +3 -3
- data/vendor/liter-llm-ffi/liter_llm.h +2 -2
- metadata +7 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: bd0fb204ec361182a87bff72c133eee44d081ecbf37ce936361c631433a28817
|
|
4
|
+
data.tar.gz: a42719e7acbdd1d097a392eeb3a55c21be528e27abcfdd5d4333e541c6e8521b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: d0e2aa5b9b03cf11c55535f95f9cfe78e87e496ac4b7f62a15a3408de508547cd3aa4503bee5f27ce4ce71ac7fa2ce1fdc2cb4bc8b828a5b073b5cdbdd769f96
|
|
7
|
+
data.tar.gz: 541a179bb37ecf1a3332f364d6f7fd5e7d2b8cba369e210b2e2807d64564135b58a8cb388e4ced465f57eda308782bf869f950149f07633d1c64103f307ce7ab
|
data/README.md
CHANGED
|
@@ -220,6 +220,17 @@ See the [provider registry](https://github.com/kreuzberg-dev/liter-llm/blob/main
|
|
|
220
220
|
|
|
221
221
|
|
|
222
222
|
|
|
223
|
+
## Proxy Server
|
|
224
|
+
|
|
225
|
+
liter-llm also ships as an OpenAI-compatible proxy server with Docker support:
|
|
226
|
+
|
|
227
|
+
```bash
|
|
228
|
+
docker run -p 4000:4000 -e LITER_LLM_MASTER_KEY=sk-your-key ghcr.io/kreuzberg-dev/liter-llm
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
See the [proxy server documentation](https://docs.liter-llm.kreuzberg.dev/server/proxy/) for configuration, CLI usage, and MCP integration.
|
|
232
|
+
|
|
233
|
+
|
|
223
234
|
## Documentation
|
|
224
235
|
|
|
225
236
|
- **[Documentation](https://docs.liter-llm.kreuzberg.dev)** -- Full docs and API reference
|
data/vendor/Cargo.toml
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
members = ["liter-llm", "liter-llm-ffi"]
|
|
3
3
|
|
|
4
4
|
[workspace.package]
|
|
5
|
-
version = "1.
|
|
5
|
+
version = "1.1.0"
|
|
6
6
|
edition = "2024"
|
|
7
7
|
authors = ["Na'aman Hirschfeld <naaman@kreuzberg.dev>"]
|
|
8
8
|
license = "MIT"
|
|
@@ -11,9 +11,11 @@ homepage = "https://kreuzberg.dev"
|
|
|
11
11
|
|
|
12
12
|
[workspace.dependencies]
|
|
13
13
|
anyhow = "1"
|
|
14
|
+
axum = { version = "0.8", features = ["macros"] }
|
|
15
|
+
axum-extra = { version = "0.12", features = ["typed-header"] }
|
|
14
16
|
base64 = "0.22"
|
|
15
17
|
bytes = "1"
|
|
16
|
-
clap = { version = "4", features = ["derive"] }
|
|
18
|
+
clap = { version = "4", features = ["derive", "env"] }
|
|
17
19
|
dashmap = "6"
|
|
18
20
|
futures-core = "0.3"
|
|
19
21
|
futures-util = "0.3"
|
|
@@ -24,7 +26,7 @@ memchr = "2"
|
|
|
24
26
|
napi = { version = "3", features = ["napi4", "serde-json", "async"] }
|
|
25
27
|
napi-build = "2"
|
|
26
28
|
napi-derive = "3"
|
|
27
|
-
opendal = { version = "0.
|
|
29
|
+
opendal = { version = "0.55", features = ["services-memory", "services-redis", "services-fs", "services-s3"], default-features = false }
|
|
28
30
|
opentelemetry = "0.31"
|
|
29
31
|
pin-project-lite = "0.2"
|
|
30
32
|
pyo3 = { version = "0.28", features = ["abi3-py310"] }
|
|
@@ -32,7 +34,9 @@ pyo3-async-runtimes = { version = "0.28", features = ["tokio-runtime"] }
|
|
|
32
34
|
rayon = "1"
|
|
33
35
|
rb-sys = "0.9"
|
|
34
36
|
reqwest = { version = "0.13", features = ["json", "stream", "rustls", "multipart", "form"], default-features = false }
|
|
37
|
+
rmcp = { version = "1.3", features = ["server", "macros", "transport-io", "transport-streamable-http-server", "server-side-http"] }
|
|
35
38
|
rustler = "0.37"
|
|
39
|
+
schemars = "1"
|
|
36
40
|
secrecy = { version = "0.10", features = ["serde"] }
|
|
37
41
|
serde = { version = "1", features = ["derive"] }
|
|
38
42
|
serde_json = "1"
|
|
@@ -43,11 +47,13 @@ tokenizers = { version = "0.22", features = ["http", "fancy-regex"], default-fea
|
|
|
43
47
|
tokio = { version = "1", features = ["full"] }
|
|
44
48
|
toml = "1.1"
|
|
45
49
|
tower = { version = "0.5", features = ["retry", "limit", "timeout", "buffer", "load-shed", "steer", "util"] }
|
|
46
|
-
tower-http = { version = "0.6", features = ["follow-redirect", "set-header", "sensitive-headers", "trace", "request-id"] }
|
|
50
|
+
tower-http = { version = "0.6", features = ["follow-redirect", "set-header", "sensitive-headers", "trace", "request-id", "cors", "compression-gzip", "catch-panic", "limit"] }
|
|
47
51
|
tower-layer = "0.3"
|
|
48
52
|
tower-service = "0.3"
|
|
49
53
|
tracing = "0.1"
|
|
50
54
|
tracing-opentelemetry = "0.32"
|
|
55
|
+
tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] }
|
|
56
|
+
utoipa = { version = "5.4", features = ["axum_extras"] }
|
|
51
57
|
walkdir = "2.5"
|
|
52
58
|
wasm-bindgen = "0.2"
|
|
53
59
|
wasm-bindgen-test = "0.3"
|
data/vendor/liter-llm/Cargo.toml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "liter-llm"
|
|
3
|
-
version = "1.
|
|
3
|
+
version = "1.1.0"
|
|
4
4
|
edition = "2024"
|
|
5
5
|
license = "MIT"
|
|
6
6
|
repository.workspace = true
|
|
@@ -71,7 +71,7 @@ futures-core = "0.3"
|
|
|
71
71
|
futures-util = { version = "0.3", optional = true }
|
|
72
72
|
jsonwebtoken = { version = "10", features = ["use_pem"], default-features = false, optional = true }
|
|
73
73
|
memchr = { version = "2", optional = true }
|
|
74
|
-
opendal = { version = "0.
|
|
74
|
+
opendal = { version = "0.55", features = ["services-memory", "services-redis", "services-fs", "services-s3"], default-features = false, optional = true }
|
|
75
75
|
opentelemetry = { version = "0.31", optional = true }
|
|
76
76
|
pin-project-lite = "0.2"
|
|
77
77
|
reqwest = { version = "0.13", features = ["json", "stream", "rustls", "multipart", "form"], default-features = false, optional = true }
|
|
@@ -83,7 +83,7 @@ tokenizers = { version = "0.22", features = ["http", "fancy-regex"], default-fea
|
|
|
83
83
|
tokio = { version = "1", features = ["time", "rt", "macros"], optional = true }
|
|
84
84
|
toml = "1.1"
|
|
85
85
|
tower = { version = "0.5", features = ["retry", "limit", "timeout", "buffer", "load-shed", "steer", "util"], optional = true }
|
|
86
|
-
tower-http = { version = "0.6", features = ["follow-redirect", "set-header", "sensitive-headers", "trace", "request-id"], optional = true }
|
|
86
|
+
tower-http = { version = "0.6", features = ["follow-redirect", "set-header", "sensitive-headers", "trace", "request-id", "cors", "compression-gzip", "catch-panic", "limit"], optional = true }
|
|
87
87
|
tracing = { version = "0.1", optional = true }
|
|
88
88
|
tracing-opentelemetry = { version = "0.32", optional = true }
|
|
89
89
|
|
data/vendor/liter-llm/README.md
CHANGED
|
@@ -233,6 +233,17 @@ See the [provider registry](https://github.com/kreuzberg-dev/liter-llm/blob/main
|
|
|
233
233
|
|
|
234
234
|
|
|
235
235
|
|
|
236
|
+
## Proxy Server
|
|
237
|
+
|
|
238
|
+
liter-llm also ships as an OpenAI-compatible proxy server with Docker support:
|
|
239
|
+
|
|
240
|
+
```bash
|
|
241
|
+
docker run -p 4000:4000 -e LITER_LLM_MASTER_KEY=sk-your-key ghcr.io/kreuzberg-dev/liter-llm
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
See the [proxy server documentation](https://docs.liter-llm.kreuzberg.dev/server/proxy/) for configuration, CLI usage, and MCP integration.
|
|
245
|
+
|
|
246
|
+
|
|
236
247
|
## Documentation
|
|
237
248
|
|
|
238
249
|
- **[Documentation](https://docs.liter-llm.kreuzberg.dev)** -- Full docs and API reference
|
|
@@ -257,7 +257,9 @@ mod tests {
|
|
|
257
257
|
#[tokio::test]
|
|
258
258
|
#[ignore] // Requires network access and valid Azure AD credentials.
|
|
259
259
|
async fn live_azure_ad_token_exchange() {
|
|
260
|
-
let provider = AzureAdCredentialProvider::from_env()
|
|
260
|
+
let Ok(provider) = AzureAdCredentialProvider::from_env() else {
|
|
261
|
+
return; // Skip when Azure AD credentials are not configured.
|
|
262
|
+
};
|
|
261
263
|
let credential = provider.resolve().await.expect("token exchange failed");
|
|
262
264
|
assert!(matches!(credential, Credential::BearerToken(_)));
|
|
263
265
|
}
|
|
@@ -346,7 +346,9 @@ mod tests {
|
|
|
346
346
|
#[tokio::test]
|
|
347
347
|
#[ignore] // Requires network access and valid AWS OIDC credentials.
|
|
348
348
|
async fn live_sts_web_identity_exchange() {
|
|
349
|
-
let provider = WebIdentityCredentialProvider::from_env()
|
|
349
|
+
let Ok(provider) = WebIdentityCredentialProvider::from_env() else {
|
|
350
|
+
return; // Skip when AWS OIDC credentials are not configured.
|
|
351
|
+
};
|
|
350
352
|
let credential = provider.resolve().await.expect("STS exchange failed");
|
|
351
353
|
assert!(matches!(credential, Credential::AwsCredentials { .. }));
|
|
352
354
|
}
|
|
@@ -346,7 +346,9 @@ mod tests {
|
|
|
346
346
|
#[tokio::test]
|
|
347
347
|
#[ignore] // Requires network access and a valid service account key file.
|
|
348
348
|
async fn live_vertex_oauth_token_exchange() {
|
|
349
|
-
let provider = VertexOAuthCredentialProvider::from_env()
|
|
349
|
+
let Ok(provider) = VertexOAuthCredentialProvider::from_env() else {
|
|
350
|
+
return; // Skip when Google credentials are not configured.
|
|
351
|
+
};
|
|
350
352
|
let credential = provider.resolve().await.expect("token exchange failed");
|
|
351
353
|
assert!(matches!(credential, Credential::BearerToken(_)));
|
|
352
354
|
}
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
//! Cache backend integration tests.
|
|
2
|
+
//!
|
|
3
|
+
//! Tests for InMemoryStore (in-process), filesystem cache via OpenDAL,
|
|
4
|
+
//! and Redis cache via OpenDAL (requires Docker, gated with `#[ignore]`).
|
|
5
|
+
|
|
6
|
+
#![cfg(feature = "tower")]
|
|
7
|
+
|
|
8
|
+
use std::time::Duration;
|
|
9
|
+
|
|
10
|
+
use liter_llm::tower::{CacheConfig, CacheStore, CachedResponse, InMemoryStore};
|
|
11
|
+
use liter_llm::types::{AssistantMessage, ChatCompletionResponse, Choice, FinishReason};
|
|
12
|
+
|
|
13
|
+
// ---- Helpers ---------------------------------------------------------------
|
|
14
|
+
|
|
15
|
+
fn dummy_response(id: &str) -> CachedResponse {
|
|
16
|
+
CachedResponse::Chat(ChatCompletionResponse {
|
|
17
|
+
id: id.into(),
|
|
18
|
+
object: "chat.completion".into(),
|
|
19
|
+
created: 1_700_000_000,
|
|
20
|
+
model: "gpt-4".into(),
|
|
21
|
+
choices: vec![Choice {
|
|
22
|
+
index: 0,
|
|
23
|
+
message: AssistantMessage {
|
|
24
|
+
content: Some("Hello!".into()),
|
|
25
|
+
name: None,
|
|
26
|
+
tool_calls: None,
|
|
27
|
+
refusal: None,
|
|
28
|
+
function_call: None,
|
|
29
|
+
},
|
|
30
|
+
finish_reason: Some(FinishReason::Stop),
|
|
31
|
+
}],
|
|
32
|
+
usage: None,
|
|
33
|
+
system_fingerprint: None,
|
|
34
|
+
service_tier: None,
|
|
35
|
+
})
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// ---- InMemoryStore: LRU eviction under load --------------------------------
|
|
39
|
+
|
|
40
|
+
#[tokio::test]
|
|
41
|
+
async fn in_memory_lru_eviction_under_load() {
|
|
42
|
+
let config = CacheConfig {
|
|
43
|
+
max_entries: 10,
|
|
44
|
+
ttl: Duration::from_secs(300),
|
|
45
|
+
..Default::default()
|
|
46
|
+
};
|
|
47
|
+
let store = InMemoryStore::new(&config);
|
|
48
|
+
|
|
49
|
+
// Fill the cache to max_entries.
|
|
50
|
+
for i in 0..10u64 {
|
|
51
|
+
let body = format!("request-{i}");
|
|
52
|
+
store.put(i, body, dummy_response(&format!("resp-{i}"))).await;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// Verify all 10 entries are present.
|
|
56
|
+
for i in 0..10u64 {
|
|
57
|
+
let body = format!("request-{i}");
|
|
58
|
+
let result = store.get(i, &body).await;
|
|
59
|
+
assert!(result.is_some(), "entry {i} should still be in cache before eviction");
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// Add one more entry — should evict key=0 (the oldest).
|
|
63
|
+
store.put(10, "request-10".into(), dummy_response("resp-10")).await;
|
|
64
|
+
|
|
65
|
+
// Key=0 should be evicted.
|
|
66
|
+
let evicted = store.get(0, "request-0").await;
|
|
67
|
+
assert!(evicted.is_none(), "oldest entry (key=0) should have been evicted");
|
|
68
|
+
|
|
69
|
+
// Key=1 through key=10 should still be present.
|
|
70
|
+
for i in 1..=10u64 {
|
|
71
|
+
let body = format!("request-{i}");
|
|
72
|
+
let result = store.get(i, &body).await;
|
|
73
|
+
assert!(
|
|
74
|
+
result.is_some(),
|
|
75
|
+
"entry {i} should still be in cache after eviction of key=0"
|
|
76
|
+
);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/// Cache key collision guard: put with key=1 body="A", get with key=1 body="B"
|
|
81
|
+
/// should return None because the request bodies do not match.
|
|
82
|
+
#[tokio::test]
|
|
83
|
+
async fn cache_key_collision_guard() {
|
|
84
|
+
let config = CacheConfig {
|
|
85
|
+
max_entries: 100,
|
|
86
|
+
ttl: Duration::from_secs(300),
|
|
87
|
+
..Default::default()
|
|
88
|
+
};
|
|
89
|
+
let store = InMemoryStore::new(&config);
|
|
90
|
+
|
|
91
|
+
store.put(1, "request-body-A".into(), dummy_response("resp-A")).await;
|
|
92
|
+
|
|
93
|
+
// Same key, different body — should be a miss (collision detected).
|
|
94
|
+
let result = store.get(1, "request-body-B").await;
|
|
95
|
+
assert!(
|
|
96
|
+
result.is_none(),
|
|
97
|
+
"get with different request body should return None (collision guard)"
|
|
98
|
+
);
|
|
99
|
+
|
|
100
|
+
// Same key, same body — should be a hit.
|
|
101
|
+
let result = store.get(1, "request-body-A").await;
|
|
102
|
+
assert!(
|
|
103
|
+
result.is_some(),
|
|
104
|
+
"get with matching request body should return the cached response"
|
|
105
|
+
);
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// ---- OpenDAL cache backend tests -------------------------------------------
|
|
109
|
+
|
|
110
|
+
#[cfg(feature = "opendal-cache")]
|
|
111
|
+
mod opendal_tests {
|
|
112
|
+
use super::*;
|
|
113
|
+
use liter_llm::tower::OpenDalCacheStore;
|
|
114
|
+
use std::collections::HashMap;
|
|
115
|
+
|
|
116
|
+
/// OpenDAL memory backend: put/get round-trip, collision guard, and remove.
|
|
117
|
+
///
|
|
118
|
+
/// Uses the in-process `memory` scheme (always available — no external
|
|
119
|
+
/// dependencies) to exercise the `OpenDalCacheStore` code paths that are
|
|
120
|
+
/// shared across all OpenDAL backends.
|
|
121
|
+
#[tokio::test]
|
|
122
|
+
async fn opendal_memory_put_get_remove() {
|
|
123
|
+
let store = OpenDalCacheStore::from_config("memory", HashMap::new(), "cache/", Duration::from_secs(300))
|
|
124
|
+
.expect("memory backend should build");
|
|
125
|
+
|
|
126
|
+
// Put
|
|
127
|
+
store
|
|
128
|
+
.put(42, "opendal-request-body".into(), dummy_response("opendal-resp"))
|
|
129
|
+
.await;
|
|
130
|
+
|
|
131
|
+
// Get — should hit.
|
|
132
|
+
let result = store.get(42, "opendal-request-body").await;
|
|
133
|
+
assert!(result.is_some(), "OpenDAL memory cache should return stored entry");
|
|
134
|
+
match result.unwrap() {
|
|
135
|
+
CachedResponse::Chat(r) => assert_eq!(r.id, "opendal-resp"),
|
|
136
|
+
_ => panic!("expected CachedResponse::Chat"),
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// Get with wrong body — collision guard.
|
|
140
|
+
let miss = store.get(42, "different-body").await;
|
|
141
|
+
assert!(
|
|
142
|
+
miss.is_none(),
|
|
143
|
+
"OpenDAL memory cache should return None for mismatched request body"
|
|
144
|
+
);
|
|
145
|
+
|
|
146
|
+
// Remove
|
|
147
|
+
store.remove(42).await;
|
|
148
|
+
let after_remove = store.get(42, "opendal-request-body").await;
|
|
149
|
+
assert!(after_remove.is_none(), "entry should be gone after remove");
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
/// OpenDAL memory backend: TTL expiry. Uses 0-second TTL so entries expire
|
|
153
|
+
/// on the next second boundary.
|
|
154
|
+
#[tokio::test]
|
|
155
|
+
async fn opendal_memory_ttl_expiry() {
|
|
156
|
+
// 0-second TTL: entries expire immediately (on next second boundary).
|
|
157
|
+
let store = OpenDalCacheStore::from_config("memory", HashMap::new(), "cache/", Duration::from_secs(0))
|
|
158
|
+
.expect("memory backend should build");
|
|
159
|
+
|
|
160
|
+
store.put(99, "ttl-body".into(), dummy_response("ttl-resp")).await;
|
|
161
|
+
|
|
162
|
+
// Wait for the wall clock to advance past the expires_at timestamp.
|
|
163
|
+
tokio::time::sleep(Duration::from_millis(1100)).await;
|
|
164
|
+
|
|
165
|
+
let result = store.get(99, "ttl-body").await;
|
|
166
|
+
assert!(result.is_none(), "expired entry should return None");
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/// Redis cache via OpenDAL. Requires a running Redis instance at
|
|
170
|
+
/// localhost:6379 (e.g. via `docker compose up -d redis`).
|
|
171
|
+
///
|
|
172
|
+
/// Requires Redis on localhost:6379 (see docker-compose.yml).
|
|
173
|
+
#[tokio::test]
|
|
174
|
+
#[ignore = "requires Redis on localhost:6379"]
|
|
175
|
+
async fn redis_cache_put_get_ttl_remove() {
|
|
176
|
+
let mut config = HashMap::new();
|
|
177
|
+
config.insert("connection_string".into(), "redis://localhost:6379".into());
|
|
178
|
+
|
|
179
|
+
let store = OpenDalCacheStore::from_config("redis", config, "liter-test/", Duration::from_secs(300))
|
|
180
|
+
.expect("redis backend should build");
|
|
181
|
+
|
|
182
|
+
// Put
|
|
183
|
+
store.put(1, "redis-body".into(), dummy_response("redis-resp")).await;
|
|
184
|
+
|
|
185
|
+
// Get — should hit.
|
|
186
|
+
let result = store.get(1, "redis-body").await;
|
|
187
|
+
assert!(result.is_some(), "redis cache should return stored entry");
|
|
188
|
+
match result.unwrap() {
|
|
189
|
+
CachedResponse::Chat(r) => assert_eq!(r.id, "redis-resp"),
|
|
190
|
+
_ => panic!("expected CachedResponse::Chat"),
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
// Collision guard.
|
|
194
|
+
let miss = store.get(1, "wrong-body").await;
|
|
195
|
+
assert!(miss.is_none(), "redis cache should miss on body mismatch");
|
|
196
|
+
|
|
197
|
+
// Remove.
|
|
198
|
+
store.remove(1).await;
|
|
199
|
+
let after_remove = store.get(1, "redis-body").await;
|
|
200
|
+
assert!(after_remove.is_none(), "entry should be gone after remove");
|
|
201
|
+
}
|
|
202
|
+
}
|