RubyGems - liter_llm - Versions diffs - 1.0.0.pre.rc.6 - Mend

liter_llm 1.0.0.pre.rc.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

checksums.yaml +7 -0
data/README.md +239 -0
data/ext/liter_llm_rb/extconf.rb +65 -0
data/ext/liter_llm_rb/native/.cargo/config.toml +23 -0
data/ext/liter_llm_rb/native/Cargo.lock +3713 -0
data/ext/liter_llm_rb/native/Cargo.toml +32 -0
data/ext/liter_llm_rb/native/build.rs +15 -0
data/ext/liter_llm_rb/native/src/lib.rs +1079 -0
data/lib/liter_llm.rb +8 -0
data/sig/liter_llm.rbs +416 -0
data/vendor/Cargo.toml +54 -0
data/vendor/liter-llm/Cargo.toml +92 -0
data/vendor/liter-llm/README.md +252 -0
data/vendor/liter-llm/schemas/pricing.json +40 -0
data/vendor/liter-llm/schemas/providers.json +1662 -0
data/vendor/liter-llm/src/auth/azure_ad.rs +264 -0
data/vendor/liter-llm/src/auth/bedrock_sts.rs +353 -0
data/vendor/liter-llm/src/auth/mod.rs +68 -0
data/vendor/liter-llm/src/auth/vertex_oauth.rs +353 -0
data/vendor/liter-llm/src/client/config.rs +351 -0
data/vendor/liter-llm/src/client/managed.rs +622 -0
data/vendor/liter-llm/src/client/mod.rs +864 -0
data/vendor/liter-llm/src/cost.rs +212 -0
data/vendor/liter-llm/src/error.rs +190 -0
data/vendor/liter-llm/src/http/eventstream.rs +860 -0
data/vendor/liter-llm/src/http/mod.rs +12 -0
data/vendor/liter-llm/src/http/request.rs +438 -0
data/vendor/liter-llm/src/http/retry.rs +72 -0
data/vendor/liter-llm/src/http/streaming.rs +289 -0
data/vendor/liter-llm/src/lib.rs +37 -0
data/vendor/liter-llm/src/provider/anthropic.rs +2250 -0
data/vendor/liter-llm/src/provider/azure.rs +579 -0
data/vendor/liter-llm/src/provider/bedrock.rs +1543 -0
data/vendor/liter-llm/src/provider/cohere.rs +654 -0
data/vendor/liter-llm/src/provider/custom.rs +404 -0
data/vendor/liter-llm/src/provider/google_ai.rs +281 -0
data/vendor/liter-llm/src/provider/mistral.rs +188 -0
data/vendor/liter-llm/src/provider/mod.rs +616 -0
data/vendor/liter-llm/src/provider/vertex.rs +1504 -0
data/vendor/liter-llm/src/tests.rs +1425 -0
data/vendor/liter-llm/src/tokenizer.rs +281 -0
data/vendor/liter-llm/src/tower/budget.rs +599 -0
data/vendor/liter-llm/src/tower/cache.rs +502 -0
data/vendor/liter-llm/src/tower/cache_opendal.rs +270 -0
data/vendor/liter-llm/src/tower/cooldown.rs +231 -0
data/vendor/liter-llm/src/tower/cost.rs +404 -0
data/vendor/liter-llm/src/tower/fallback.rs +121 -0
data/vendor/liter-llm/src/tower/health.rs +219 -0
data/vendor/liter-llm/src/tower/hooks.rs +369 -0
data/vendor/liter-llm/src/tower/mod.rs +77 -0
data/vendor/liter-llm/src/tower/rate_limit.rs +300 -0
data/vendor/liter-llm/src/tower/router.rs +436 -0
data/vendor/liter-llm/src/tower/service.rs +181 -0
data/vendor/liter-llm/src/tower/tests.rs +539 -0
data/vendor/liter-llm/src/tower/tests_common.rs +252 -0
data/vendor/liter-llm/src/tower/tracing.rs +209 -0
data/vendor/liter-llm/src/tower/types.rs +170 -0
data/vendor/liter-llm/src/types/audio.rs +52 -0
data/vendor/liter-llm/src/types/batch.rs +77 -0
data/vendor/liter-llm/src/types/chat.rs +214 -0
data/vendor/liter-llm/src/types/common.rs +244 -0
data/vendor/liter-llm/src/types/embedding.rs +84 -0
data/vendor/liter-llm/src/types/files.rs +58 -0
data/vendor/liter-llm/src/types/image.rs +40 -0
data/vendor/liter-llm/src/types/mod.rs +27 -0
data/vendor/liter-llm/src/types/models.rs +21 -0
data/vendor/liter-llm/src/types/moderation.rs +80 -0
data/vendor/liter-llm/src/types/ocr.rs +87 -0
data/vendor/liter-llm/src/types/rerank.rs +46 -0
data/vendor/liter-llm/src/types/responses.rs +55 -0
data/vendor/liter-llm/src/types/search.rs +45 -0
data/vendor/liter-llm/tests/contract.rs +332 -0
data/vendor/liter-llm-ffi/Cargo.toml +30 -0
data/vendor/liter-llm-ffi/build.rs +66 -0
data/vendor/liter-llm-ffi/cbindgen.toml +60 -0
data/vendor/liter-llm-ffi/liter_llm.h +850 -0
data/vendor/liter-llm-ffi/src/lib.rs +2488 -0
metadata +286 -0

data/vendor/liter-llm/src/tower/health.rs ADDED Viewed

@@ -0,0 +1,219 @@
+//! Health check middleware.
+//!
+//! [`HealthCheckLayer`] wraps a service and spawns a background task that
+//! periodically probes the service by sending a [`LlmRequest::ListModels`]
+//! request.  If the probe fails, the service is marked unhealthy and incoming
+//! requests are immediately rejected with [`LiterLlmError::ServiceUnavailable`].
+//!
+//! The health flag is an [`AtomicBool`] shared between the background probe
+//! task and the request path, so checking health adds minimal overhead (a
+//! single atomic load).
+use std::sync::Arc;
+use std::sync::atomic::{AtomicBool, Ordering};
+use std::task::{Context, Poll};
+use std::time::Duration;
+use tower::{Layer, Service};
+use super::types::{LlmRequest, LlmResponse};
+use crate::client::BoxFuture;
+use crate::error::{LiterLlmError, Result};
+// ---- Layer -----------------------------------------------------------------
+/// Tower [`Layer`] that monitors service health via periodic probes.
+///
+/// The background health-check task is spawned when the layer wraps a service
+/// (i.e. when [`Layer::layer`] is called).  The task runs until the
+/// [`HealthCheckService`] (and all its clones) are dropped.
+pub struct HealthCheckLayer {
+    interval: Duration,
+}
+impl HealthCheckLayer {
+    /// Create a new health-check layer that probes every `interval`.
+    #[must_use]
+    pub fn new(interval: Duration) -> Self {
+        Self { interval }
+    }
+}
+impl<S> Layer<S> for HealthCheckLayer
+where
+    S: Service<LlmRequest, Response = LlmResponse, Error = LiterLlmError> + Clone + Send + 'static,
+    S::Future: Send + 'static,
+{
+    type Service = HealthCheckService<S>;
+    fn layer(&self, inner: S) -> Self::Service {
+        let healthy = Arc::new(AtomicBool::new(true));
+        // Spawn the background probe task.
+        let probe_svc = inner.clone();
+        let probe_healthy = Arc::clone(&healthy);
+        let interval = self.interval;
+        tokio::spawn(async move {
+            run_health_probe(probe_svc, probe_healthy, interval).await;
+        });
+        HealthCheckService { inner, healthy }
+    }
+}
+// ---- Background probe ------------------------------------------------------
+async fn run_health_probe<S>(mut svc: S, healthy: Arc<AtomicBool>, interval: Duration)
+where
+    S: Service<LlmRequest, Response = LlmResponse, Error = LiterLlmError> + Send + 'static,
+    S::Future: Send + 'static,
+{
+    loop {
+        tokio::time::sleep(interval).await;
+        // If the Arc is held only by us, all service clones have been dropped
+        // and we should stop probing.
+        if Arc::strong_count(&healthy) <= 1 {
+            break;
+        }
+        let result = svc.call(LlmRequest::ListModels).await;
+        let is_healthy = result.is_ok();
+        healthy.store(is_healthy, Ordering::Release);
+        if !is_healthy {
+            tracing::warn!("health check failed; marking service as unhealthy");
+        }
+    }
+}
+// ---- Service ---------------------------------------------------------------
+/// Tower service produced by [`HealthCheckLayer`].
+pub struct HealthCheckService<S> {
+    inner: S,
+    healthy: Arc<AtomicBool>,
+}
+impl<S: Clone> Clone for HealthCheckService<S> {
+    fn clone(&self) -> Self {
+        Self {
+            inner: self.inner.clone(),
+            healthy: Arc::clone(&self.healthy),
+        }
+    }
+}
+impl<S> HealthCheckService<S> {
+    /// Returns `true` if the last health probe succeeded.
+    #[must_use]
+    pub fn is_healthy(&self) -> bool {
+        self.healthy.load(Ordering::Acquire)
+    }
+}
+impl<S> Service<LlmRequest> for HealthCheckService<S>
+where
+    S: Service<LlmRequest, Response = LlmResponse, Error = LiterLlmError> + Send + 'static,
+    S::Future: Send + 'static,
+{
+    type Response = LlmResponse;
+    type Error = LiterLlmError;
+    type Future = BoxFuture<'static, LlmResponse>;
+    fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll<Result<()>> {
+        if !self.healthy.load(Ordering::Acquire) {
+            return Poll::Ready(Err(LiterLlmError::ServiceUnavailable {
+                message: "service is unhealthy (health check failed)".into(),
+            }));
+        }
+        self.inner.poll_ready(cx)
+    }
+    fn call(&mut self, req: LlmRequest) -> Self::Future {
+        if !self.healthy.load(Ordering::Acquire) {
+            return Box::pin(async {
+                Err(LiterLlmError::ServiceUnavailable {
+                    message: "service is unhealthy (health check failed)".into(),
+                })
+            });
+        }
+        let fut = self.inner.call(req);
+        Box::pin(fut)
+    }
+}
+// ---- Tests -----------------------------------------------------------------
+#[cfg(test)]
+mod tests {
+    use std::sync::atomic::Ordering;
+    use tower::Service as _;
+    use super::*;
+    use crate::tower::service::LlmService;
+    use crate::tower::tests_common::{MockClient, chat_req};
+    use crate::tower::types::LlmRequest;
+    #[tokio::test]
+    async fn healthy_service_passes_through() {
+        let inner = LlmService::new(MockClient::ok());
+        let healthy = Arc::new(AtomicBool::new(true));
+        let mut svc = HealthCheckService {
+            inner,
+            healthy: Arc::clone(&healthy),
+        };
+        let resp = svc.call(LlmRequest::Chat(chat_req("gpt-4"))).await;
+        assert!(resp.is_ok());
+    }
+    #[tokio::test]
+    async fn unhealthy_service_rejects_requests() {
+        let inner = LlmService::new(MockClient::ok());
+        let healthy = Arc::new(AtomicBool::new(false));
+        let mut svc = HealthCheckService {
+            inner,
+            healthy: Arc::clone(&healthy),
+        };
+        let err = svc
+            .call(LlmRequest::Chat(chat_req("gpt-4")))
+            .await
+            .expect_err("unhealthy service should reject");
+        assert!(matches!(err, LiterLlmError::ServiceUnavailable { .. }));
+    }
+    #[tokio::test]
+    async fn is_healthy_reflects_flag() {
+        let inner = LlmService::new(MockClient::ok());
+        let healthy = Arc::new(AtomicBool::new(true));
+        let svc = HealthCheckService {
+            inner,
+            healthy: Arc::clone(&healthy),
+        };
+        assert!(svc.is_healthy());
+        healthy.store(false, Ordering::Release);
+        assert!(!svc.is_healthy());
+    }
+    #[tokio::test]
+    async fn recovery_after_becoming_healthy_again() {
+        let inner = LlmService::new(MockClient::ok());
+        let healthy = Arc::new(AtomicBool::new(false));
+        let mut svc = HealthCheckService {
+            inner,
+            healthy: Arc::clone(&healthy),
+        };
+        // Unhealthy — should reject.
+        assert!(svc.call(LlmRequest::Chat(chat_req("gpt-4"))).await.is_err());
+        // Mark as healthy again.
+        healthy.store(true, Ordering::Release);
+        assert!(svc.call(LlmRequest::Chat(chat_req("gpt-4"))).await.is_ok());
+    }
+}

data/vendor/liter-llm/src/tower/hooks.rs ADDED Viewed

@@ -0,0 +1,369 @@
+//! Tower middleware that invokes user-defined hooks before and after requests.
+//!
+//! [`HooksLayer`] wraps any [`Service<LlmRequest>`] and calls registered
+//! [`LlmHook`] implementations at three lifecycle points:
+//!
+//! - **`on_request`** — before the request is forwarded to the inner service.
+//!   Returning `Err` from any hook short-circuits the chain (guardrail
+//!   rejection).
+//! - **`on_response`** — after a successful response from the inner service.
+//! - **`on_error`** — when the inner service returns an error.
+//!
+//! Hooks are invoked sequentially in registration order.
+//!
+//! # Example
+//!
+//! ```rust,ignore
+//! use std::sync::Arc;
+//! use liter_llm::tower::{HooksLayer, LlmHook, LlmService, TracingLayer};
+//! use tower::ServiceBuilder;
+//!
+//! let hook: Arc<dyn LlmHook> = Arc::new(MyAuditHook);
+//! let service = ServiceBuilder::new()
+//!     .layer(HooksLayer::single(hook))
+//!     .service(LlmService::new(client));
+//! ```
+use std::future::Future;
+use std::panic::AssertUnwindSafe;
+use std::pin::Pin;
+use std::sync::Arc;
+use std::task::{Context, Poll};
+use futures_util::FutureExt as _;
+use tower::Layer;
+use tower::Service;
+use super::types::{LlmRequest, LlmResponse};
+use crate::client::BoxFuture;
+use crate::error::{LiterLlmError, Result};
+// ─── Hook Trait ──────────────────────────────────────────────────────────────
+/// Callback trait for observing and guarding LLM requests.
+///
+/// All methods have default no-op implementations, so consumers only need to
+/// override the lifecycle points they care about.
+pub trait LlmHook: Send + Sync + 'static {
+    /// Called before the request is sent to the inner service.
+    ///
+    /// Return `Err` to short-circuit the entire service chain — this enables
+    /// guardrail patterns such as content filtering or budget enforcement.
+    fn on_request(&self, _req: &LlmRequest) -> Pin<Box<dyn Future<Output = Result<()>> + Send + '_>> {
+        Box::pin(async { Ok(()) })
+    }
+    /// Called after the inner service returns a successful response.
+    fn on_response(&self, _req: &LlmRequest, _resp: &LlmResponse) -> Pin<Box<dyn Future<Output = ()> + Send + '_>> {
+        Box::pin(async {})
+    }
+    /// Called when the inner service returns an error.
+    fn on_error(&self, _req: &LlmRequest, _err: &LiterLlmError) -> Pin<Box<dyn Future<Output = ()> + Send + '_>> {
+        Box::pin(async {})
+    }
+}
+// ─── Layer ───────────────────────────────────────────────────────────────────
+/// Tower [`Layer`] that attaches [`LlmHook`] callbacks to a service.
+///
+/// Hooks are stored behind `Arc` so that the layer and all services it
+/// produces share the same hook instances without cloning them.
+#[derive(Clone)]
+pub struct HooksLayer {
+    hooks: Arc<Vec<Arc<dyn LlmHook>>>,
+}
+impl HooksLayer {
+    /// Create a new layer with the given list of hooks.
+    ///
+    /// Hooks are invoked sequentially in the order they appear in the vector.
+    #[must_use]
+    pub fn new(hooks: Vec<Arc<dyn LlmHook>>) -> Self {
+        Self { hooks: Arc::new(hooks) }
+    }
+    /// Convenience constructor for a single hook.
+    #[must_use]
+    pub fn single(hook: Arc<dyn LlmHook>) -> Self {
+        Self::new(vec![hook])
+    }
+}
+impl<S> Layer<S> for HooksLayer {
+    type Service = HooksService<S>;
+    fn layer(&self, inner: S) -> Self::Service {
+        HooksService {
+            inner,
+            hooks: Arc::clone(&self.hooks),
+        }
+    }
+}
+// ─── Service ─────────────────────────────────────────────────────────────────
+/// Tower service produced by [`HooksLayer`].
+pub struct HooksService<S> {
+    inner: S,
+    hooks: Arc<Vec<Arc<dyn LlmHook>>>,
+}
+impl<S: Clone> Clone for HooksService<S> {
+    fn clone(&self) -> Self {
+        Self {
+            inner: self.inner.clone(),
+            hooks: Arc::clone(&self.hooks),
+        }
+    }
+}
+impl<S> Service<LlmRequest> for HooksService<S>
+where
+    S: Service<LlmRequest, Response = LlmResponse, Error = LiterLlmError> + Send + 'static,
+    S::Future: Send + 'static,
+{
+    type Response = LlmResponse;
+    type Error = LiterLlmError;
+    type Future = BoxFuture<'static, LlmResponse>;
+    fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll<Result<()>> {
+        self.inner.poll_ready(cx)
+    }
+    fn call(&mut self, req: LlmRequest) -> Self::Future {
+        let hooks = Arc::clone(&self.hooks);
+        // Clone the request so we can pass it to post-hooks after the inner
+        // service consumes the original.
+        let req_clone = req.clone();
+        let fut = self.inner.call(req);
+        Box::pin(async move {
+            // Pre-hooks: run sequentially; short-circuit on first Err or panic.
+            for hook in hooks.iter() {
+                let result = AssertUnwindSafe(hook.on_request(&req_clone)).catch_unwind().await;
+                match result {
+                    Ok(Ok(())) => {}
+                    Ok(Err(e)) => return Err(e),
+                    Err(_panic) => {
+                        tracing::error!("hook panicked during on_request");
+                        return Err(LiterLlmError::HookRejected {
+                            message: "hook panicked".into(),
+                        });
+                    }
+                }
+            }
+            match fut.await {
+                Ok(resp) => {
+                    // Post-hooks (success path) — panics are logged but do not
+                    // propagate so the caller still receives the response.
+                    for hook in hooks.iter() {
+                        if AssertUnwindSafe(hook.on_response(&req_clone, &resp))
+                            .catch_unwind()
+                            .await
+                            .is_err()
+                        {
+                            tracing::error!("hook panicked during on_response");
+                        }
+                    }
+                    Ok(resp)
+                }
+                Err(err) => {
+                    // Post-hooks (error path) — panics are logged but do not
+                    // replace the original error.
+                    for hook in hooks.iter() {
+                        if AssertUnwindSafe(hook.on_error(&req_clone, &err))
+                            .catch_unwind()
+                            .await
+                            .is_err()
+                        {
+                            tracing::error!("hook panicked during on_error");
+                        }
+                    }
+                    Err(err)
+                }
+            }
+        })
+    }
+}
+// ─── Tests ───────────────────────────────────────────────────────────────────
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+    use std::sync::atomic::{AtomicUsize, Ordering};
+    use tower::Layer as _;
+    use tower::Service as _;
+    use super::*;
+    use crate::tower::service::LlmService;
+    use crate::tower::tests_common::{MockClient, chat_req};
+    use crate::tower::types::{LlmRequest, LlmResponse};
+    // ── Test hook implementations ────────────────────────────────────────────
+    /// A hook that records how many times each callback was invoked.
+    struct CountingHook {
+        on_request_count: AtomicUsize,
+        on_response_count: AtomicUsize,
+        on_error_count: AtomicUsize,
+    }
+    impl CountingHook {
+        fn new() -> Self {
+            Self {
+                on_request_count: AtomicUsize::new(0),
+                on_response_count: AtomicUsize::new(0),
+                on_error_count: AtomicUsize::new(0),
+            }
+        }
+    }
+    impl LlmHook for CountingHook {
+        fn on_request(&self, _req: &LlmRequest) -> Pin<Box<dyn Future<Output = Result<()>> + Send + '_>> {
+            self.on_request_count.fetch_add(1, Ordering::SeqCst);
+            Box::pin(async { Ok(()) })
+        }
+        fn on_response(&self, _req: &LlmRequest, _resp: &LlmResponse) -> Pin<Box<dyn Future<Output = ()> + Send + '_>> {
+            self.on_response_count.fetch_add(1, Ordering::SeqCst);
+            Box::pin(async {})
+        }
+        fn on_error(&self, _req: &LlmRequest, _err: &LiterLlmError) -> Pin<Box<dyn Future<Output = ()> + Send + '_>> {
+            self.on_error_count.fetch_add(1, Ordering::SeqCst);
+            Box::pin(async {})
+        }
+    }
+    /// A hook that rejects all requests (guardrail).
+    struct RejectAllHook;
+    impl LlmHook for RejectAllHook {
+        fn on_request(&self, _req: &LlmRequest) -> Pin<Box<dyn Future<Output = Result<()>> + Send + '_>> {
+            Box::pin(async {
+                Err(LiterLlmError::HookRejected {
+                    message: "rejected by guardrail".into(),
+                })
+            })
+        }
+    }
+    /// A hook that records its invocation order into a shared vector.
+    struct OrderTrackingHook {
+        id: usize,
+        order: Arc<std::sync::Mutex<Vec<usize>>>,
+    }
+    impl LlmHook for OrderTrackingHook {
+        fn on_request(&self, _req: &LlmRequest) -> Pin<Box<dyn Future<Output = Result<()>> + Send + '_>> {
+            self.order.lock().expect("lock poisoned").push(self.id);
+            Box::pin(async { Ok(()) })
+        }
+        fn on_response(&self, _req: &LlmRequest, _resp: &LlmResponse) -> Pin<Box<dyn Future<Output = ()> + Send + '_>> {
+            self.order.lock().expect("lock poisoned").push(self.id + 100);
+            Box::pin(async {})
+        }
+    }
+    // ── Tests ────────────────────────────────────────────────────────────────
+    #[tokio::test]
+    async fn on_request_hook_is_called() {
+        let hook = Arc::new(CountingHook::new());
+        let inner = LlmService::new(MockClient::ok());
+        let mut svc = HooksLayer::single(Arc::clone(&hook) as Arc<dyn LlmHook>).layer(inner);
+        let _resp = svc
+            .call(LlmRequest::Chat(chat_req("gpt-4")))
+            .await
+            .expect("should succeed");
+        assert_eq!(hook.on_request_count.load(Ordering::SeqCst), 1);
+    }
+    #[tokio::test]
+    async fn on_response_hook_is_called_on_success() {
+        let hook = Arc::new(CountingHook::new());
+        let inner = LlmService::new(MockClient::ok());
+        let mut svc = HooksLayer::single(Arc::clone(&hook) as Arc<dyn LlmHook>).layer(inner);
+        let _resp = svc
+            .call(LlmRequest::Chat(chat_req("gpt-4")))
+            .await
+            .expect("should succeed");
+        assert_eq!(hook.on_response_count.load(Ordering::SeqCst), 1);
+        assert_eq!(hook.on_error_count.load(Ordering::SeqCst), 0);
+    }
+    #[tokio::test]
+    async fn on_error_hook_is_called_on_failure() {
+        let hook = Arc::new(CountingHook::new());
+        let inner = LlmService::new(MockClient::failing_timeout());
+        let mut svc = HooksLayer::single(Arc::clone(&hook) as Arc<dyn LlmHook>).layer(inner);
+        let err = svc
+            .call(LlmRequest::Chat(chat_req("gpt-4")))
+            .await
+            .expect_err("should fail");
+        assert!(matches!(err, LiterLlmError::Timeout));
+        assert_eq!(hook.on_error_count.load(Ordering::SeqCst), 1);
+        assert_eq!(hook.on_response_count.load(Ordering::SeqCst), 0);
+    }
+    #[tokio::test]
+    async fn guardrail_rejection_short_circuits_inner_service() {
+        let mock = MockClient::ok();
+        let call_count = Arc::clone(&mock.call_count);
+        let inner = LlmService::new(mock);
+        let mut svc = HooksLayer::single(Arc::new(RejectAllHook) as Arc<dyn LlmHook>).layer(inner);
+        let err = svc
+            .call(LlmRequest::Chat(chat_req("gpt-4")))
+            .await
+            .expect_err("should be rejected by guardrail");
+        assert!(matches!(err, LiterLlmError::HookRejected { .. }));
+        // The inner service must NOT have been called.
+        assert_eq!(call_count.load(Ordering::SeqCst), 0);
+    }
+    #[tokio::test]
+    async fn multiple_hooks_called_in_registration_order() {
+        let order = Arc::new(std::sync::Mutex::new(Vec::new()));
+        let hooks: Vec<Arc<dyn LlmHook>> = vec![
+            Arc::new(OrderTrackingHook {
+                id: 1,
+                order: Arc::clone(&order),
+            }),
+            Arc::new(OrderTrackingHook {
+                id: 2,
+                order: Arc::clone(&order),
+            }),
+            Arc::new(OrderTrackingHook {
+                id: 3,
+                order: Arc::clone(&order),
+            }),
+        ];
+        let inner = LlmService::new(MockClient::ok());
+        let mut svc = HooksLayer::new(hooks).layer(inner);
+        let _resp = svc
+            .call(LlmRequest::Chat(chat_req("gpt-4")))
+            .await
+            .expect("should succeed");
+        let recorded = order.lock().expect("lock poisoned").clone();
+        // Pre-hooks: 1, 2, 3 then post-hooks: 101, 102, 103
+        assert_eq!(recorded, vec![1, 2, 3, 101, 102, 103]);
+    }
+}

data/vendor/liter-llm/src/tower/mod.rs ADDED Viewed

@@ -0,0 +1,77 @@
+//! Tower middleware integration for [`crate::client::LlmClient`].
+//!
+//! This module is only compiled when the `tower` feature is enabled.  It
+//! provides:
+//!
+//! - [`types::LlmRequest`] / [`types::LlmResponse`] — the request/response
+//!   enums that cross the tower `Service` boundary.
+//! - [`service::LlmService`] — a thin `tower::Service` wrapper around any
+//!   [`crate::client::LlmClient`].
+//! - [`tracing::TracingLayer`] / [`tracing::TracingService`] — OTEL-compatible
+//!   tracing middleware.
+//! - [`fallback::FallbackLayer`] / [`fallback::FallbackService`] — route to a
+//!   backup service on transient errors.
+//! - [`cost::CostTrackingLayer`] / [`cost::CostTrackingService`] — emit
+//!   `gen_ai.usage.cost` tracing span attribute from embedded pricing data.
+//! - [`rate_limit::ModelRateLimitLayer`] / [`rate_limit::ModelRateLimitService`]
+//!   — per-model RPM / TPM rate limiting.
+//! - [`cache::CacheLayer`] / [`cache::CacheService`] — in-memory response
+//!   caching for non-streaming requests.
+//! - [`cooldown::CooldownLayer`] / [`cooldown::CooldownService`] — deployment
+//!   cooldowns after transient errors.
+//! - [`health::HealthCheckLayer`] / [`health::HealthCheckService`] — periodic
+//!   health probes with automatic request rejection on failure.
+//! - [`budget::BudgetLayer`] / [`budget::BudgetService`] — global and per-model
+//!   spending budget enforcement (hard reject or soft warn).
+//! - [`hooks::HooksLayer`] / [`hooks::HooksService`] — user-defined pre/post
+//!   request hooks for guardrails, logging, and auditing.
+//!
+//! # Example
+//!
+//! ```rust,ignore
+//! use liter_llm::tower::{CostTrackingLayer, LlmService, TracingLayer};
+//! use tower::ServiceBuilder;
+//!
+//! let client = liter_llm::DefaultClient::new(config, None)?;
+//! let service = ServiceBuilder::new()
+//!     .layer(TracingLayer)
+//!     .layer(CostTrackingLayer)
+//!     .service(LlmService::new(client));
+//! ```
+pub mod budget;
+pub mod cache;
+#[cfg(feature = "opendal-cache")]
+pub mod cache_opendal;
+pub mod cooldown;
+pub mod cost;
+pub mod fallback;
+pub mod health;
+pub mod hooks;
+pub mod rate_limit;
+pub mod router;
+pub mod service;
+#[cfg(test)]
+mod tests;
+#[cfg(test)]
+pub(crate) mod tests_common;
+pub mod tracing;
+pub mod types;
+// Re-export tower core types for convenient access
+pub use tower::ServiceExt;
+pub use budget::{BudgetConfig, BudgetLayer, BudgetService, BudgetState, Enforcement};
+pub use cache::{CacheBackend, CacheConfig, CacheLayer, CacheService, CacheStore, CachedResponse, InMemoryStore};
+#[cfg(feature = "opendal-cache")]
+pub use cache_opendal::OpenDalCacheStore;
+pub use cooldown::{CooldownLayer, CooldownService};
+pub use cost::{CostTrackingLayer, CostTrackingService};
+pub use fallback::{FallbackLayer, FallbackService};
+pub use health::{HealthCheckLayer, HealthCheckService};
+pub use hooks::{HooksLayer, HooksService, LlmHook};
+pub use rate_limit::{ModelRateLimitLayer, ModelRateLimitService, RateLimitConfig};
+pub use router::{Router, RoutingStrategy};
+pub use service::LlmService;
+pub use tracing::{TracingLayer, TracingService};
+pub use types::{LlmRequest, LlmResponse};