RubyGems - liter_llm - Versions diffs - 1.0.0.pre.rc.9 → 1.1.0 - Mend

liter_llm 1.0.0.pre.rc.9 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

checksums.yaml +4 -4
data/README.md +11 -0
data/ext/liter_llm_rb/native/Cargo.toml +1 -1
data/vendor/Cargo.toml +10 -4
data/vendor/liter-llm/Cargo.toml +3 -3
data/vendor/liter-llm/README.md +11 -0
data/vendor/liter-llm/src/auth/azure_ad.rs +3 -1
data/vendor/liter-llm/src/auth/bedrock_sts.rs +3 -1
data/vendor/liter-llm/src/auth/vertex_oauth.rs +3 -1
data/vendor/liter-llm/tests/cache_integration.rs +202 -0
data/vendor/liter-llm/tests/concurrency.rs +379 -0
data/vendor/liter-llm/tests/middleware_integration.rs +981 -0
data/vendor/liter-llm/tests/operations_integration.rs +641 -0
data/vendor/liter-llm/tests/routing_integration.rs +463 -0
data/vendor/liter-llm-ffi/Cargo.toml +3 -3
data/vendor/liter-llm-ffi/liter_llm.h +2 -2
metadata +7 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 99f9baa37507b2b56d9d5300e3a53ede447fbfe898e57a93f757a0d36b6aa20d
-  data.tar.gz: 9fdadec249c006f4b368c545f879f5512b60759bc19d74206ab945017efb4826
+  metadata.gz: bd0fb204ec361182a87bff72c133eee44d081ecbf37ce936361c631433a28817
+  data.tar.gz: a42719e7acbdd1d097a392eeb3a55c21be528e27abcfdd5d4333e541c6e8521b
 SHA512:
-  metadata.gz: 6cf1243668cc3e7852198f92da30a92991072cca691c09b91600f8ab874d32d55e55f6e2156f785f6ddf2e8bf2d2d5a5f17c8d931e4c226c98740463153659a7
-  data.tar.gz: b6eb0d41eb748f0f6c2d4eee706cb23c04da9370681f7532335b99a6fea2dd2035421d44048ae79c7adfece41973dcf4883d9ea931cf31583e96ac016f8e899f
+  metadata.gz: d0e2aa5b9b03cf11c55535f95f9cfe78e87e496ac4b7f62a15a3408de508547cd3aa4503bee5f27ce4ce71ac7fa2ce1fdc2cb4bc8b828a5b073b5cdbdd769f96
+  data.tar.gz: 541a179bb37ecf1a3332f364d6f7fd5e7d2b8cba369e210b2e2807d64564135b58a8cb388e4ced465f57eda308782bf869f950149f07633d1c64103f307ce7ab

data/README.md CHANGED Viewed

@@ -220,6 +220,17 @@ See the [provider registry](https://github.com/kreuzberg-dev/liter-llm/blob/main
+## Proxy Server
+liter-llm also ships as an OpenAI-compatible proxy server with Docker support:
+```bash
+docker run -p 4000:4000 -e LITER_LLM_MASTER_KEY=sk-your-key ghcr.io/kreuzberg-dev/liter-llm
+```
+See the [proxy server documentation](https://docs.liter-llm.kreuzberg.dev/server/proxy/) for configuration, CLI usage, and MCP integration.
 ## Documentation
 - **[Documentation](https://docs.liter-llm.kreuzberg.dev)** -- Full docs and API reference

data/ext/liter_llm_rb/native/Cargo.toml CHANGED Viewed

@@ -1,6 +1,6 @@
 [package]
 name = "liter-llm-rb"
-version = "1.0.0-rc.9"
+version = "1.1.0"
 edition = "2024"
 authors = ["Na'aman Hirschfeld <naaman@kreuzberg.dev>"]
 license = "MIT"

data/vendor/Cargo.toml CHANGED Viewed

@@ -2,7 +2,7 @@
 members = ["liter-llm", "liter-llm-ffi"]
 [workspace.package]
-version = "1.0.0-rc.9"
+version = "1.1.0"
 edition = "2024"
 authors = ["Na'aman Hirschfeld <naaman@kreuzberg.dev>"]
 license = "MIT"
@@ -11,9 +11,11 @@ homepage = "https://kreuzberg.dev"
 [workspace.dependencies]
 anyhow = "1"
+axum = { version = "0.8", features = ["macros"] }
+axum-extra = { version = "0.12", features = ["typed-header"] }
 base64 = "0.22"
 bytes = "1"
-clap = { version = "4", features = ["derive"] }
+clap = { version = "4", features = ["derive", "env"] }
 dashmap = "6"
 futures-core = "0.3"
 futures-util = "0.3"
@@ -24,7 +26,7 @@ memchr = "2"
 napi = { version = "3", features = ["napi4", "serde-json", "async"] }
 napi-build = "2"
 napi-derive = "3"
-opendal = { version = "0.53", features = ["services-memory"], default-features = false }
+opendal = { version = "0.55", features = ["services-memory", "services-redis", "services-fs", "services-s3"], default-features = false }
 opentelemetry = "0.31"
 pin-project-lite = "0.2"
 pyo3 = { version = "0.28", features = ["abi3-py310"] }
@@ -32,7 +34,9 @@ pyo3-async-runtimes = { version = "0.28", features = ["tokio-runtime"] }
 rayon = "1"
 rb-sys = "0.9"
 reqwest = { version = "0.13", features = ["json", "stream", "rustls", "multipart", "form"], default-features = false }
+rmcp = { version = "1.3", features = ["server", "macros", "transport-io", "transport-streamable-http-server", "server-side-http"] }
 rustler = "0.37"
+schemars = "1"
 secrecy = { version = "0.10", features = ["serde"] }
 serde = { version = "1", features = ["derive"] }
 serde_json = "1"
@@ -43,11 +47,13 @@ tokenizers = { version = "0.22", features = ["http", "fancy-regex"], default-fea
 tokio = { version = "1", features = ["full"] }
 toml = "1.1"
 tower = { version = "0.5", features = ["retry", "limit", "timeout", "buffer", "load-shed", "steer", "util"] }
-tower-http = { version = "0.6", features = ["follow-redirect", "set-header", "sensitive-headers", "trace", "request-id"] }
+tower-http = { version = "0.6", features = ["follow-redirect", "set-header", "sensitive-headers", "trace", "request-id", "cors", "compression-gzip", "catch-panic", "limit"] }
 tower-layer = "0.3"
 tower-service = "0.3"
 tracing = "0.1"
 tracing-opentelemetry = "0.32"
+tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] }
+utoipa = { version = "5.4", features = ["axum_extras"] }
 walkdir = "2.5"
 wasm-bindgen = "0.2"
 wasm-bindgen-test = "0.3"

data/vendor/liter-llm/Cargo.toml CHANGED Viewed

@@ -1,6 +1,6 @@
 [package]
 name = "liter-llm"
-version = "1.0.0-rc.9"
+version = "1.1.0"
 edition = "2024"
 license = "MIT"
 repository.workspace = true
@@ -71,7 +71,7 @@ futures-core = "0.3"
 futures-util = { version = "0.3", optional = true }
 jsonwebtoken = { version = "10", features = ["use_pem"], default-features = false, optional = true }
 memchr = { version = "2", optional = true }
-opendal = { version = "0.53", features = ["services-memory"], default-features = false, optional = true }
+opendal = { version = "0.55", features = ["services-memory", "services-redis", "services-fs", "services-s3"], default-features = false, optional = true }
 opentelemetry = { version = "0.31", optional = true }
 pin-project-lite = "0.2"
 reqwest = { version = "0.13", features = ["json", "stream", "rustls", "multipart", "form"], default-features = false, optional = true }
@@ -83,7 +83,7 @@ tokenizers = { version = "0.22", features = ["http", "fancy-regex"], default-fea
 tokio = { version = "1", features = ["time", "rt", "macros"], optional = true }
 toml = "1.1"
 tower = { version = "0.5", features = ["retry", "limit", "timeout", "buffer", "load-shed", "steer", "util"], optional = true }
-tower-http = { version = "0.6", features = ["follow-redirect", "set-header", "sensitive-headers", "trace", "request-id"], optional = true }
+tower-http = { version = "0.6", features = ["follow-redirect", "set-header", "sensitive-headers", "trace", "request-id", "cors", "compression-gzip", "catch-panic", "limit"], optional = true }
 tracing = { version = "0.1", optional = true }
 tracing-opentelemetry = { version = "0.32", optional = true }

data/vendor/liter-llm/README.md CHANGED Viewed

@@ -233,6 +233,17 @@ See the [provider registry](https://github.com/kreuzberg-dev/liter-llm/blob/main
+## Proxy Server
+liter-llm also ships as an OpenAI-compatible proxy server with Docker support:
+```bash
+docker run -p 4000:4000 -e LITER_LLM_MASTER_KEY=sk-your-key ghcr.io/kreuzberg-dev/liter-llm
+```
+See the [proxy server documentation](https://docs.liter-llm.kreuzberg.dev/server/proxy/) for configuration, CLI usage, and MCP integration.
 ## Documentation
 - **[Documentation](https://docs.liter-llm.kreuzberg.dev)** -- Full docs and API reference

data/vendor/liter-llm/src/auth/azure_ad.rs CHANGED Viewed

@@ -257,7 +257,9 @@ mod tests {
     #[tokio::test]
     #[ignore] // Requires network access and valid Azure AD credentials.
     async fn live_azure_ad_token_exchange() {
-        let provider = AzureAdCredentialProvider::from_env().expect("Azure AD env vars not set");
+        let Ok(provider) = AzureAdCredentialProvider::from_env() else {
+            return; // Skip when Azure AD credentials are not configured.
+        };
         let credential = provider.resolve().await.expect("token exchange failed");
         assert!(matches!(credential, Credential::BearerToken(_)));
     }

data/vendor/liter-llm/src/auth/bedrock_sts.rs CHANGED Viewed

@@ -346,7 +346,9 @@ mod tests {
     #[tokio::test]
     #[ignore] // Requires network access and valid AWS OIDC credentials.
     async fn live_sts_web_identity_exchange() {
-        let provider = WebIdentityCredentialProvider::from_env().expect("AWS env vars not set");
+        let Ok(provider) = WebIdentityCredentialProvider::from_env() else {
+            return; // Skip when AWS OIDC credentials are not configured.
+        };
         let credential = provider.resolve().await.expect("STS exchange failed");
         assert!(matches!(credential, Credential::AwsCredentials { .. }));
     }

data/vendor/liter-llm/src/auth/vertex_oauth.rs CHANGED Viewed

@@ -346,7 +346,9 @@ mod tests {
     #[tokio::test]
     #[ignore] // Requires network access and a valid service account key file.
     async fn live_vertex_oauth_token_exchange() {
-        let provider = VertexOAuthCredentialProvider::from_env().expect("GOOGLE_APPLICATION_CREDENTIALS not set");
+        let Ok(provider) = VertexOAuthCredentialProvider::from_env() else {
+            return; // Skip when Google credentials are not configured.
+        };
         let credential = provider.resolve().await.expect("token exchange failed");
         assert!(matches!(credential, Credential::BearerToken(_)));
     }

data/vendor/liter-llm/tests/cache_integration.rs ADDED Viewed

@@ -0,0 +1,202 @@
+//! Cache backend integration tests.
+//!
+//! Tests for InMemoryStore (in-process), filesystem cache via OpenDAL,
+//! and Redis cache via OpenDAL (requires Docker, gated with `#[ignore]`).
+#![cfg(feature = "tower")]
+use std::time::Duration;
+use liter_llm::tower::{CacheConfig, CacheStore, CachedResponse, InMemoryStore};
+use liter_llm::types::{AssistantMessage, ChatCompletionResponse, Choice, FinishReason};
+// ---- Helpers ---------------------------------------------------------------
+fn dummy_response(id: &str) -> CachedResponse {
+    CachedResponse::Chat(ChatCompletionResponse {
+        id: id.into(),
+        object: "chat.completion".into(),
+        created: 1_700_000_000,
+        model: "gpt-4".into(),
+        choices: vec![Choice {
+            index: 0,
+            message: AssistantMessage {
+                content: Some("Hello!".into()),
+                name: None,
+                tool_calls: None,
+                refusal: None,
+                function_call: None,
+            },
+            finish_reason: Some(FinishReason::Stop),
+        }],
+        usage: None,
+        system_fingerprint: None,
+        service_tier: None,
+    })
+}
+// ---- InMemoryStore: LRU eviction under load --------------------------------
+#[tokio::test]
+async fn in_memory_lru_eviction_under_load() {
+    let config = CacheConfig {
+        max_entries: 10,
+        ttl: Duration::from_secs(300),
+        ..Default::default()
+    };
+    let store = InMemoryStore::new(&config);
+    // Fill the cache to max_entries.
+    for i in 0..10u64 {
+        let body = format!("request-{i}");
+        store.put(i, body, dummy_response(&format!("resp-{i}"))).await;
+    }
+    // Verify all 10 entries are present.
+    for i in 0..10u64 {
+        let body = format!("request-{i}");
+        let result = store.get(i, &body).await;
+        assert!(result.is_some(), "entry {i} should still be in cache before eviction");
+    }
+    // Add one more entry — should evict key=0 (the oldest).
+    store.put(10, "request-10".into(), dummy_response("resp-10")).await;
+    // Key=0 should be evicted.
+    let evicted = store.get(0, "request-0").await;
+    assert!(evicted.is_none(), "oldest entry (key=0) should have been evicted");
+    // Key=1 through key=10 should still be present.
+    for i in 1..=10u64 {
+        let body = format!("request-{i}");
+        let result = store.get(i, &body).await;
+        assert!(
+            result.is_some(),
+            "entry {i} should still be in cache after eviction of key=0"
+        );
+    }
+}
+/// Cache key collision guard: put with key=1 body="A", get with key=1 body="B"
+/// should return None because the request bodies do not match.
+#[tokio::test]
+async fn cache_key_collision_guard() {
+    let config = CacheConfig {
+        max_entries: 100,
+        ttl: Duration::from_secs(300),
+        ..Default::default()
+    };
+    let store = InMemoryStore::new(&config);
+    store.put(1, "request-body-A".into(), dummy_response("resp-A")).await;
+    // Same key, different body — should be a miss (collision detected).
+    let result = store.get(1, "request-body-B").await;
+    assert!(
+        result.is_none(),
+        "get with different request body should return None (collision guard)"
+    );
+    // Same key, same body — should be a hit.
+    let result = store.get(1, "request-body-A").await;
+    assert!(
+        result.is_some(),
+        "get with matching request body should return the cached response"
+    );
+}
+// ---- OpenDAL cache backend tests -------------------------------------------
+#[cfg(feature = "opendal-cache")]
+mod opendal_tests {
+    use super::*;
+    use liter_llm::tower::OpenDalCacheStore;
+    use std::collections::HashMap;
+    /// OpenDAL memory backend: put/get round-trip, collision guard, and remove.
+    ///
+    /// Uses the in-process `memory` scheme (always available — no external
+    /// dependencies) to exercise the `OpenDalCacheStore` code paths that are
+    /// shared across all OpenDAL backends.
+    #[tokio::test]
+    async fn opendal_memory_put_get_remove() {
+        let store = OpenDalCacheStore::from_config("memory", HashMap::new(), "cache/", Duration::from_secs(300))
+            .expect("memory backend should build");
+        // Put
+        store
+            .put(42, "opendal-request-body".into(), dummy_response("opendal-resp"))
+            .await;
+        // Get — should hit.
+        let result = store.get(42, "opendal-request-body").await;
+        assert!(result.is_some(), "OpenDAL memory cache should return stored entry");
+        match result.unwrap() {
+            CachedResponse::Chat(r) => assert_eq!(r.id, "opendal-resp"),
+            _ => panic!("expected CachedResponse::Chat"),
+        }
+        // Get with wrong body — collision guard.
+        let miss = store.get(42, "different-body").await;
+        assert!(
+            miss.is_none(),
+            "OpenDAL memory cache should return None for mismatched request body"
+        );
+        // Remove
+        store.remove(42).await;
+        let after_remove = store.get(42, "opendal-request-body").await;
+        assert!(after_remove.is_none(), "entry should be gone after remove");
+    }
+    /// OpenDAL memory backend: TTL expiry. Uses 0-second TTL so entries expire
+    /// on the next second boundary.
+    #[tokio::test]
+    async fn opendal_memory_ttl_expiry() {
+        // 0-second TTL: entries expire immediately (on next second boundary).
+        let store = OpenDalCacheStore::from_config("memory", HashMap::new(), "cache/", Duration::from_secs(0))
+            .expect("memory backend should build");
+        store.put(99, "ttl-body".into(), dummy_response("ttl-resp")).await;
+        // Wait for the wall clock to advance past the expires_at timestamp.
+        tokio::time::sleep(Duration::from_millis(1100)).await;
+        let result = store.get(99, "ttl-body").await;
+        assert!(result.is_none(), "expired entry should return None");
+    }
+    /// Redis cache via OpenDAL. Requires a running Redis instance at
+    /// localhost:6379 (e.g. via `docker compose up -d redis`).
+    ///
+    /// Requires Redis on localhost:6379 (see docker-compose.yml).
+    #[tokio::test]
+    #[ignore = "requires Redis on localhost:6379"]
+    async fn redis_cache_put_get_ttl_remove() {
+        let mut config = HashMap::new();
+        config.insert("connection_string".into(), "redis://localhost:6379".into());
+        let store = OpenDalCacheStore::from_config("redis", config, "liter-test/", Duration::from_secs(300))
+            .expect("redis backend should build");
+        // Put
+        store.put(1, "redis-body".into(), dummy_response("redis-resp")).await;
+        // Get — should hit.
+        let result = store.get(1, "redis-body").await;
+        assert!(result.is_some(), "redis cache should return stored entry");
+        match result.unwrap() {
+            CachedResponse::Chat(r) => assert_eq!(r.id, "redis-resp"),
+            _ => panic!("expected CachedResponse::Chat"),
+        }
+        // Collision guard.
+        let miss = store.get(1, "wrong-body").await;
+        assert!(miss.is_none(), "redis cache should miss on body mismatch");
+        // Remove.
+        store.remove(1).await;
+        let after_remove = store.get(1, "redis-body").await;
+        assert!(after_remove.is_none(), "entry should be gone after remove");
+    }
+}