liter_llm 1.0.0.pre.rc.9 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 99f9baa37507b2b56d9d5300e3a53ede447fbfe898e57a93f757a0d36b6aa20d
4
- data.tar.gz: 9fdadec249c006f4b368c545f879f5512b60759bc19d74206ab945017efb4826
3
+ metadata.gz: bd0fb204ec361182a87bff72c133eee44d081ecbf37ce936361c631433a28817
4
+ data.tar.gz: a42719e7acbdd1d097a392eeb3a55c21be528e27abcfdd5d4333e541c6e8521b
5
5
  SHA512:
6
- metadata.gz: 6cf1243668cc3e7852198f92da30a92991072cca691c09b91600f8ab874d32d55e55f6e2156f785f6ddf2e8bf2d2d5a5f17c8d931e4c226c98740463153659a7
7
- data.tar.gz: b6eb0d41eb748f0f6c2d4eee706cb23c04da9370681f7532335b99a6fea2dd2035421d44048ae79c7adfece41973dcf4883d9ea931cf31583e96ac016f8e899f
6
+ metadata.gz: d0e2aa5b9b03cf11c55535f95f9cfe78e87e496ac4b7f62a15a3408de508547cd3aa4503bee5f27ce4ce71ac7fa2ce1fdc2cb4bc8b828a5b073b5cdbdd769f96
7
+ data.tar.gz: 541a179bb37ecf1a3332f364d6f7fd5e7d2b8cba369e210b2e2807d64564135b58a8cb388e4ced465f57eda308782bf869f950149f07633d1c64103f307ce7ab
data/README.md CHANGED
@@ -220,6 +220,17 @@ See the [provider registry](https://github.com/kreuzberg-dev/liter-llm/blob/main
220
220
 
221
221
 
222
222
 
223
+ ## Proxy Server
224
+
225
+ liter-llm also ships as an OpenAI-compatible proxy server with Docker support:
226
+
227
+ ```bash
228
+ docker run -p 4000:4000 -e LITER_LLM_MASTER_KEY=sk-your-key ghcr.io/kreuzberg-dev/liter-llm
229
+ ```
230
+
231
+ See the [proxy server documentation](https://docs.liter-llm.kreuzberg.dev/server/proxy/) for configuration, CLI usage, and MCP integration.
232
+
233
+
223
234
  ## Documentation
224
235
 
225
236
  - **[Documentation](https://docs.liter-llm.kreuzberg.dev)** -- Full docs and API reference
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "liter-llm-rb"
3
- version = "1.0.0-rc.9"
3
+ version = "1.1.0"
4
4
  edition = "2024"
5
5
  authors = ["Na'aman Hirschfeld <naaman@kreuzberg.dev>"]
6
6
  license = "MIT"
data/vendor/Cargo.toml CHANGED
@@ -2,7 +2,7 @@
2
2
  members = ["liter-llm", "liter-llm-ffi"]
3
3
 
4
4
  [workspace.package]
5
- version = "1.0.0-rc.9"
5
+ version = "1.1.0"
6
6
  edition = "2024"
7
7
  authors = ["Na'aman Hirschfeld <naaman@kreuzberg.dev>"]
8
8
  license = "MIT"
@@ -11,9 +11,11 @@ homepage = "https://kreuzberg.dev"
11
11
 
12
12
  [workspace.dependencies]
13
13
  anyhow = "1"
14
+ axum = { version = "0.8", features = ["macros"] }
15
+ axum-extra = { version = "0.12", features = ["typed-header"] }
14
16
  base64 = "0.22"
15
17
  bytes = "1"
16
- clap = { version = "4", features = ["derive"] }
18
+ clap = { version = "4", features = ["derive", "env"] }
17
19
  dashmap = "6"
18
20
  futures-core = "0.3"
19
21
  futures-util = "0.3"
@@ -24,7 +26,7 @@ memchr = "2"
24
26
  napi = { version = "3", features = ["napi4", "serde-json", "async"] }
25
27
  napi-build = "2"
26
28
  napi-derive = "3"
27
- opendal = { version = "0.53", features = ["services-memory"], default-features = false }
29
+ opendal = { version = "0.55", features = ["services-memory", "services-redis", "services-fs", "services-s3"], default-features = false }
28
30
  opentelemetry = "0.31"
29
31
  pin-project-lite = "0.2"
30
32
  pyo3 = { version = "0.28", features = ["abi3-py310"] }
@@ -32,7 +34,9 @@ pyo3-async-runtimes = { version = "0.28", features = ["tokio-runtime"] }
32
34
  rayon = "1"
33
35
  rb-sys = "0.9"
34
36
  reqwest = { version = "0.13", features = ["json", "stream", "rustls", "multipart", "form"], default-features = false }
37
+ rmcp = { version = "1.3", features = ["server", "macros", "transport-io", "transport-streamable-http-server", "server-side-http"] }
35
38
  rustler = "0.37"
39
+ schemars = "1"
36
40
  secrecy = { version = "0.10", features = ["serde"] }
37
41
  serde = { version = "1", features = ["derive"] }
38
42
  serde_json = "1"
@@ -43,11 +47,13 @@ tokenizers = { version = "0.22", features = ["http", "fancy-regex"], default-fea
43
47
  tokio = { version = "1", features = ["full"] }
44
48
  toml = "1.1"
45
49
  tower = { version = "0.5", features = ["retry", "limit", "timeout", "buffer", "load-shed", "steer", "util"] }
46
- tower-http = { version = "0.6", features = ["follow-redirect", "set-header", "sensitive-headers", "trace", "request-id"] }
50
+ tower-http = { version = "0.6", features = ["follow-redirect", "set-header", "sensitive-headers", "trace", "request-id", "cors", "compression-gzip", "catch-panic", "limit"] }
47
51
  tower-layer = "0.3"
48
52
  tower-service = "0.3"
49
53
  tracing = "0.1"
50
54
  tracing-opentelemetry = "0.32"
55
+ tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] }
56
+ utoipa = { version = "5.4", features = ["axum_extras"] }
51
57
  walkdir = "2.5"
52
58
  wasm-bindgen = "0.2"
53
59
  wasm-bindgen-test = "0.3"
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "liter-llm"
3
- version = "1.0.0-rc.9"
3
+ version = "1.1.0"
4
4
  edition = "2024"
5
5
  license = "MIT"
6
6
  repository.workspace = true
@@ -71,7 +71,7 @@ futures-core = "0.3"
71
71
  futures-util = { version = "0.3", optional = true }
72
72
  jsonwebtoken = { version = "10", features = ["use_pem"], default-features = false, optional = true }
73
73
  memchr = { version = "2", optional = true }
74
- opendal = { version = "0.53", features = ["services-memory"], default-features = false, optional = true }
74
+ opendal = { version = "0.55", features = ["services-memory", "services-redis", "services-fs", "services-s3"], default-features = false, optional = true }
75
75
  opentelemetry = { version = "0.31", optional = true }
76
76
  pin-project-lite = "0.2"
77
77
  reqwest = { version = "0.13", features = ["json", "stream", "rustls", "multipart", "form"], default-features = false, optional = true }
@@ -83,7 +83,7 @@ tokenizers = { version = "0.22", features = ["http", "fancy-regex"], default-fea
83
83
  tokio = { version = "1", features = ["time", "rt", "macros"], optional = true }
84
84
  toml = "1.1"
85
85
  tower = { version = "0.5", features = ["retry", "limit", "timeout", "buffer", "load-shed", "steer", "util"], optional = true }
86
- tower-http = { version = "0.6", features = ["follow-redirect", "set-header", "sensitive-headers", "trace", "request-id"], optional = true }
86
+ tower-http = { version = "0.6", features = ["follow-redirect", "set-header", "sensitive-headers", "trace", "request-id", "cors", "compression-gzip", "catch-panic", "limit"], optional = true }
87
87
  tracing = { version = "0.1", optional = true }
88
88
  tracing-opentelemetry = { version = "0.32", optional = true }
89
89
 
@@ -233,6 +233,17 @@ See the [provider registry](https://github.com/kreuzberg-dev/liter-llm/blob/main
233
233
 
234
234
 
235
235
 
236
+ ## Proxy Server
237
+
238
+ liter-llm also ships as an OpenAI-compatible proxy server with Docker support:
239
+
240
+ ```bash
241
+ docker run -p 4000:4000 -e LITER_LLM_MASTER_KEY=sk-your-key ghcr.io/kreuzberg-dev/liter-llm
242
+ ```
243
+
244
+ See the [proxy server documentation](https://docs.liter-llm.kreuzberg.dev/server/proxy/) for configuration, CLI usage, and MCP integration.
245
+
246
+
236
247
  ## Documentation
237
248
 
238
249
  - **[Documentation](https://docs.liter-llm.kreuzberg.dev)** -- Full docs and API reference
@@ -257,7 +257,9 @@ mod tests {
257
257
  #[tokio::test]
258
258
  #[ignore] // Requires network access and valid Azure AD credentials.
259
259
  async fn live_azure_ad_token_exchange() {
260
- let provider = AzureAdCredentialProvider::from_env().expect("Azure AD env vars not set");
260
+ let Ok(provider) = AzureAdCredentialProvider::from_env() else {
261
+ return; // Skip when Azure AD credentials are not configured.
262
+ };
261
263
  let credential = provider.resolve().await.expect("token exchange failed");
262
264
  assert!(matches!(credential, Credential::BearerToken(_)));
263
265
  }
@@ -346,7 +346,9 @@ mod tests {
346
346
  #[tokio::test]
347
347
  #[ignore] // Requires network access and valid AWS OIDC credentials.
348
348
  async fn live_sts_web_identity_exchange() {
349
- let provider = WebIdentityCredentialProvider::from_env().expect("AWS env vars not set");
349
+ let Ok(provider) = WebIdentityCredentialProvider::from_env() else {
350
+ return; // Skip when AWS OIDC credentials are not configured.
351
+ };
350
352
  let credential = provider.resolve().await.expect("STS exchange failed");
351
353
  assert!(matches!(credential, Credential::AwsCredentials { .. }));
352
354
  }
@@ -346,7 +346,9 @@ mod tests {
346
346
  #[tokio::test]
347
347
  #[ignore] // Requires network access and a valid service account key file.
348
348
  async fn live_vertex_oauth_token_exchange() {
349
- let provider = VertexOAuthCredentialProvider::from_env().expect("GOOGLE_APPLICATION_CREDENTIALS not set");
349
+ let Ok(provider) = VertexOAuthCredentialProvider::from_env() else {
350
+ return; // Skip when Google credentials are not configured.
351
+ };
350
352
  let credential = provider.resolve().await.expect("token exchange failed");
351
353
  assert!(matches!(credential, Credential::BearerToken(_)));
352
354
  }
@@ -0,0 +1,202 @@
1
+ //! Cache backend integration tests.
2
+ //!
3
+ //! Tests for InMemoryStore (in-process), filesystem cache via OpenDAL,
4
+ //! and Redis cache via OpenDAL (requires Docker, gated with `#[ignore]`).
5
+
6
+ #![cfg(feature = "tower")]
7
+
8
+ use std::time::Duration;
9
+
10
+ use liter_llm::tower::{CacheConfig, CacheStore, CachedResponse, InMemoryStore};
11
+ use liter_llm::types::{AssistantMessage, ChatCompletionResponse, Choice, FinishReason};
12
+
13
+ // ---- Helpers ---------------------------------------------------------------
14
+
15
+ fn dummy_response(id: &str) -> CachedResponse {
16
+ CachedResponse::Chat(ChatCompletionResponse {
17
+ id: id.into(),
18
+ object: "chat.completion".into(),
19
+ created: 1_700_000_000,
20
+ model: "gpt-4".into(),
21
+ choices: vec![Choice {
22
+ index: 0,
23
+ message: AssistantMessage {
24
+ content: Some("Hello!".into()),
25
+ name: None,
26
+ tool_calls: None,
27
+ refusal: None,
28
+ function_call: None,
29
+ },
30
+ finish_reason: Some(FinishReason::Stop),
31
+ }],
32
+ usage: None,
33
+ system_fingerprint: None,
34
+ service_tier: None,
35
+ })
36
+ }
37
+
38
+ // ---- InMemoryStore: LRU eviction under load --------------------------------
39
+
40
+ #[tokio::test]
41
+ async fn in_memory_lru_eviction_under_load() {
42
+ let config = CacheConfig {
43
+ max_entries: 10,
44
+ ttl: Duration::from_secs(300),
45
+ ..Default::default()
46
+ };
47
+ let store = InMemoryStore::new(&config);
48
+
49
+ // Fill the cache to max_entries.
50
+ for i in 0..10u64 {
51
+ let body = format!("request-{i}");
52
+ store.put(i, body, dummy_response(&format!("resp-{i}"))).await;
53
+ }
54
+
55
+ // Verify all 10 entries are present.
56
+ for i in 0..10u64 {
57
+ let body = format!("request-{i}");
58
+ let result = store.get(i, &body).await;
59
+ assert!(result.is_some(), "entry {i} should still be in cache before eviction");
60
+ }
61
+
62
+ // Add one more entry — should evict key=0 (the oldest).
63
+ store.put(10, "request-10".into(), dummy_response("resp-10")).await;
64
+
65
+ // Key=0 should be evicted.
66
+ let evicted = store.get(0, "request-0").await;
67
+ assert!(evicted.is_none(), "oldest entry (key=0) should have been evicted");
68
+
69
+ // Key=1 through key=10 should still be present.
70
+ for i in 1..=10u64 {
71
+ let body = format!("request-{i}");
72
+ let result = store.get(i, &body).await;
73
+ assert!(
74
+ result.is_some(),
75
+ "entry {i} should still be in cache after eviction of key=0"
76
+ );
77
+ }
78
+ }
79
+
80
+ /// Cache key collision guard: put with key=1 body="A", get with key=1 body="B"
81
+ /// should return None because the request bodies do not match.
82
+ #[tokio::test]
83
+ async fn cache_key_collision_guard() {
84
+ let config = CacheConfig {
85
+ max_entries: 100,
86
+ ttl: Duration::from_secs(300),
87
+ ..Default::default()
88
+ };
89
+ let store = InMemoryStore::new(&config);
90
+
91
+ store.put(1, "request-body-A".into(), dummy_response("resp-A")).await;
92
+
93
+ // Same key, different body — should be a miss (collision detected).
94
+ let result = store.get(1, "request-body-B").await;
95
+ assert!(
96
+ result.is_none(),
97
+ "get with different request body should return None (collision guard)"
98
+ );
99
+
100
+ // Same key, same body — should be a hit.
101
+ let result = store.get(1, "request-body-A").await;
102
+ assert!(
103
+ result.is_some(),
104
+ "get with matching request body should return the cached response"
105
+ );
106
+ }
107
+
108
+ // ---- OpenDAL cache backend tests -------------------------------------------
109
+
110
+ #[cfg(feature = "opendal-cache")]
111
+ mod opendal_tests {
112
+ use super::*;
113
+ use liter_llm::tower::OpenDalCacheStore;
114
+ use std::collections::HashMap;
115
+
116
+ /// OpenDAL memory backend: put/get round-trip, collision guard, and remove.
117
+ ///
118
+ /// Uses the in-process `memory` scheme (always available — no external
119
+ /// dependencies) to exercise the `OpenDalCacheStore` code paths that are
120
+ /// shared across all OpenDAL backends.
121
+ #[tokio::test]
122
+ async fn opendal_memory_put_get_remove() {
123
+ let store = OpenDalCacheStore::from_config("memory", HashMap::new(), "cache/", Duration::from_secs(300))
124
+ .expect("memory backend should build");
125
+
126
+ // Put
127
+ store
128
+ .put(42, "opendal-request-body".into(), dummy_response("opendal-resp"))
129
+ .await;
130
+
131
+ // Get — should hit.
132
+ let result = store.get(42, "opendal-request-body").await;
133
+ assert!(result.is_some(), "OpenDAL memory cache should return stored entry");
134
+ match result.unwrap() {
135
+ CachedResponse::Chat(r) => assert_eq!(r.id, "opendal-resp"),
136
+ _ => panic!("expected CachedResponse::Chat"),
137
+ }
138
+
139
+ // Get with wrong body — collision guard.
140
+ let miss = store.get(42, "different-body").await;
141
+ assert!(
142
+ miss.is_none(),
143
+ "OpenDAL memory cache should return None for mismatched request body"
144
+ );
145
+
146
+ // Remove
147
+ store.remove(42).await;
148
+ let after_remove = store.get(42, "opendal-request-body").await;
149
+ assert!(after_remove.is_none(), "entry should be gone after remove");
150
+ }
151
+
152
+ /// OpenDAL memory backend: TTL expiry. Uses 0-second TTL so entries expire
153
+ /// on the next second boundary.
154
+ #[tokio::test]
155
+ async fn opendal_memory_ttl_expiry() {
156
+ // 0-second TTL: entries expire immediately (on next second boundary).
157
+ let store = OpenDalCacheStore::from_config("memory", HashMap::new(), "cache/", Duration::from_secs(0))
158
+ .expect("memory backend should build");
159
+
160
+ store.put(99, "ttl-body".into(), dummy_response("ttl-resp")).await;
161
+
162
+ // Wait for the wall clock to advance past the expires_at timestamp.
163
+ tokio::time::sleep(Duration::from_millis(1100)).await;
164
+
165
+ let result = store.get(99, "ttl-body").await;
166
+ assert!(result.is_none(), "expired entry should return None");
167
+ }
168
+
169
+ /// Redis cache via OpenDAL. Requires a running Redis instance at
170
+ /// localhost:6379 (e.g. via `docker compose up -d redis`).
171
+ ///
172
+ /// Requires Redis on localhost:6379 (see docker-compose.yml).
173
+ #[tokio::test]
174
+ #[ignore = "requires Redis on localhost:6379"]
175
+ async fn redis_cache_put_get_ttl_remove() {
176
+ let mut config = HashMap::new();
177
+ config.insert("connection_string".into(), "redis://localhost:6379".into());
178
+
179
+ let store = OpenDalCacheStore::from_config("redis", config, "liter-test/", Duration::from_secs(300))
180
+ .expect("redis backend should build");
181
+
182
+ // Put
183
+ store.put(1, "redis-body".into(), dummy_response("redis-resp")).await;
184
+
185
+ // Get — should hit.
186
+ let result = store.get(1, "redis-body").await;
187
+ assert!(result.is_some(), "redis cache should return stored entry");
188
+ match result.unwrap() {
189
+ CachedResponse::Chat(r) => assert_eq!(r.id, "redis-resp"),
190
+ _ => panic!("expected CachedResponse::Chat"),
191
+ }
192
+
193
+ // Collision guard.
194
+ let miss = store.get(1, "wrong-body").await;
195
+ assert!(miss.is_none(), "redis cache should miss on body mismatch");
196
+
197
+ // Remove.
198
+ store.remove(1).await;
199
+ let after_remove = store.get(1, "redis-body").await;
200
+ assert!(after_remove.is_none(), "entry should be gone after remove");
201
+ }
202
+ }