liter_llm 1.0.0.pre.rc.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +239 -0
  3. data/ext/liter_llm_rb/extconf.rb +65 -0
  4. data/ext/liter_llm_rb/native/.cargo/config.toml +23 -0
  5. data/ext/liter_llm_rb/native/Cargo.lock +3713 -0
  6. data/ext/liter_llm_rb/native/Cargo.toml +32 -0
  7. data/ext/liter_llm_rb/native/build.rs +15 -0
  8. data/ext/liter_llm_rb/native/src/lib.rs +1079 -0
  9. data/lib/liter_llm.rb +8 -0
  10. data/sig/liter_llm.rbs +416 -0
  11. data/vendor/Cargo.toml +54 -0
  12. data/vendor/liter-llm/Cargo.toml +92 -0
  13. data/vendor/liter-llm/README.md +252 -0
  14. data/vendor/liter-llm/schemas/pricing.json +40 -0
  15. data/vendor/liter-llm/schemas/providers.json +1662 -0
  16. data/vendor/liter-llm/src/auth/azure_ad.rs +264 -0
  17. data/vendor/liter-llm/src/auth/bedrock_sts.rs +353 -0
  18. data/vendor/liter-llm/src/auth/mod.rs +68 -0
  19. data/vendor/liter-llm/src/auth/vertex_oauth.rs +353 -0
  20. data/vendor/liter-llm/src/client/config.rs +351 -0
  21. data/vendor/liter-llm/src/client/managed.rs +622 -0
  22. data/vendor/liter-llm/src/client/mod.rs +864 -0
  23. data/vendor/liter-llm/src/cost.rs +212 -0
  24. data/vendor/liter-llm/src/error.rs +190 -0
  25. data/vendor/liter-llm/src/http/eventstream.rs +860 -0
  26. data/vendor/liter-llm/src/http/mod.rs +12 -0
  27. data/vendor/liter-llm/src/http/request.rs +438 -0
  28. data/vendor/liter-llm/src/http/retry.rs +72 -0
  29. data/vendor/liter-llm/src/http/streaming.rs +289 -0
  30. data/vendor/liter-llm/src/lib.rs +37 -0
  31. data/vendor/liter-llm/src/provider/anthropic.rs +2250 -0
  32. data/vendor/liter-llm/src/provider/azure.rs +579 -0
  33. data/vendor/liter-llm/src/provider/bedrock.rs +1543 -0
  34. data/vendor/liter-llm/src/provider/cohere.rs +654 -0
  35. data/vendor/liter-llm/src/provider/custom.rs +404 -0
  36. data/vendor/liter-llm/src/provider/google_ai.rs +281 -0
  37. data/vendor/liter-llm/src/provider/mistral.rs +188 -0
  38. data/vendor/liter-llm/src/provider/mod.rs +616 -0
  39. data/vendor/liter-llm/src/provider/vertex.rs +1504 -0
  40. data/vendor/liter-llm/src/tests.rs +1425 -0
  41. data/vendor/liter-llm/src/tokenizer.rs +281 -0
  42. data/vendor/liter-llm/src/tower/budget.rs +599 -0
  43. data/vendor/liter-llm/src/tower/cache.rs +502 -0
  44. data/vendor/liter-llm/src/tower/cache_opendal.rs +270 -0
  45. data/vendor/liter-llm/src/tower/cooldown.rs +231 -0
  46. data/vendor/liter-llm/src/tower/cost.rs +404 -0
  47. data/vendor/liter-llm/src/tower/fallback.rs +121 -0
  48. data/vendor/liter-llm/src/tower/health.rs +219 -0
  49. data/vendor/liter-llm/src/tower/hooks.rs +369 -0
  50. data/vendor/liter-llm/src/tower/mod.rs +77 -0
  51. data/vendor/liter-llm/src/tower/rate_limit.rs +300 -0
  52. data/vendor/liter-llm/src/tower/router.rs +436 -0
  53. data/vendor/liter-llm/src/tower/service.rs +181 -0
  54. data/vendor/liter-llm/src/tower/tests.rs +539 -0
  55. data/vendor/liter-llm/src/tower/tests_common.rs +252 -0
  56. data/vendor/liter-llm/src/tower/tracing.rs +209 -0
  57. data/vendor/liter-llm/src/tower/types.rs +170 -0
  58. data/vendor/liter-llm/src/types/audio.rs +52 -0
  59. data/vendor/liter-llm/src/types/batch.rs +77 -0
  60. data/vendor/liter-llm/src/types/chat.rs +214 -0
  61. data/vendor/liter-llm/src/types/common.rs +244 -0
  62. data/vendor/liter-llm/src/types/embedding.rs +84 -0
  63. data/vendor/liter-llm/src/types/files.rs +58 -0
  64. data/vendor/liter-llm/src/types/image.rs +40 -0
  65. data/vendor/liter-llm/src/types/mod.rs +27 -0
  66. data/vendor/liter-llm/src/types/models.rs +21 -0
  67. data/vendor/liter-llm/src/types/moderation.rs +80 -0
  68. data/vendor/liter-llm/src/types/ocr.rs +87 -0
  69. data/vendor/liter-llm/src/types/rerank.rs +46 -0
  70. data/vendor/liter-llm/src/types/responses.rs +55 -0
  71. data/vendor/liter-llm/src/types/search.rs +45 -0
  72. data/vendor/liter-llm/tests/contract.rs +332 -0
  73. data/vendor/liter-llm-ffi/Cargo.toml +30 -0
  74. data/vendor/liter-llm-ffi/build.rs +66 -0
  75. data/vendor/liter-llm-ffi/cbindgen.toml +60 -0
  76. data/vendor/liter-llm-ffi/liter_llm.h +850 -0
  77. data/vendor/liter-llm-ffi/src/lib.rs +2488 -0
  78. metadata +286 -0
@@ -0,0 +1,622 @@
1
+ //! A managed LLM client that optionally routes requests through a Tower
2
+ //! middleware stack (cache, budget, hooks, cooldown, rate limiting, health
3
+ //! checks, cost tracking, tracing) when the corresponding [`ClientConfig`]
4
+ //! fields are set.
5
+ //!
6
+ //! When no middleware is configured the client delegates directly to the
7
+ //! underlying [`DefaultClient`], adding zero overhead. When middleware *is*
8
+ //! configured, each [`LlmClient`] method converts its typed request into an
9
+ //! [`LlmRequest`], sends it through a cloned Tower service stack, and extracts
10
+ //! the typed response from the resulting [`LlmResponse`].
11
+ //!
12
+ //! # Tower `Service::call` takes `&mut self`
13
+ //!
14
+ //! The [`LlmClient`] trait requires `&self` receivers but Tower's
15
+ //! `Service::call` takes `&mut self`. All our middleware services are `Clone`
16
+ //! (state is behind `Arc`) so we clone the service per call — this is a cheap
17
+ //! series of `Arc` reference-count bumps.
18
+ //!
19
+ //! Tower's [`BoxCloneService`](tower::util::BoxCloneService) is `Send` but not
20
+ //! `Sync` (its inner trait object is `dyn ... + Send`). Since [`LlmClient`]
21
+ //! requires `Sync`, we wrap the service in a [`std::sync::Mutex`] that is held
22
+ //! only for the brief duration of `Clone::clone` (a few `Arc` ref-count bumps).
23
+ //! This makes `ManagedClient` `Sync` with negligible contention.
24
+
25
+ use std::sync::{Arc, Mutex};
26
+
27
+ use tower::{Layer, Service};
28
+
29
+ use super::config::ClientConfig;
30
+ use super::{BatchClient, BoxFuture, BoxStream, DefaultClient, FileClient, LlmClient, ResponseClient};
31
+ use crate::error::{LiterLlmError, Result};
32
+ #[cfg(feature = "opendal-cache")]
33
+ use crate::tower::OpenDalCacheStore;
34
+ use crate::tower::types::{LlmRequest, LlmResponse};
35
+ use crate::tower::{
36
+ BudgetLayer, BudgetState, CacheBackend, CacheLayer, CooldownLayer, CostTrackingLayer, HealthCheckLayer, HooksLayer,
37
+ LlmService, ModelRateLimitLayer, TracingLayer,
38
+ };
39
+ use crate::types::audio::{CreateSpeechRequest, CreateTranscriptionRequest, TranscriptionResponse};
40
+ use crate::types::batch::{BatchListQuery, BatchListResponse, BatchObject, CreateBatchRequest};
41
+ use crate::types::files::{CreateFileRequest, DeleteResponse, FileListQuery, FileListResponse, FileObject};
42
+ use crate::types::image::{CreateImageRequest, ImagesResponse};
43
+ use crate::types::moderation::{ModerationRequest, ModerationResponse};
44
+ use crate::types::ocr::{OcrRequest, OcrResponse};
45
+ use crate::types::rerank::{RerankRequest, RerankResponse};
46
+ use crate::types::responses::{CreateResponseRequest, ResponseObject};
47
+ use crate::types::search::{SearchRequest, SearchResponse};
48
+ use crate::types::{
49
+ ChatCompletionChunk, ChatCompletionRequest, ChatCompletionResponse, EmbeddingRequest, EmbeddingResponse,
50
+ ModelsListResponse,
51
+ };
52
+
53
+ // ---------------------------------------------------------------------------
54
+ // Type-erased Tower service wrapper
55
+ // ---------------------------------------------------------------------------
56
+
57
+ /// A `Send + Sync` wrapper around [`tower::util::BoxCloneService`].
58
+ ///
59
+ /// `BoxCloneService` is `Send` but not `Sync` because its inner trait object
60
+ /// only requires `Send`. All our concrete middleware services *are* `Sync`
61
+ /// (they store shared state behind `Arc`), so wrapping in a `Mutex` is safe
62
+ /// and incurs negligible overhead — the lock is held only for the duration of
63
+ /// `Clone::clone` (a handful of `Arc` ref-count bumps).
64
+ struct SyncService {
65
+ inner: Mutex<tower::util::BoxCloneService<LlmRequest, LlmResponse, LiterLlmError>>,
66
+ }
67
+
68
+ impl SyncService {
69
+ /// Clone the inner service out of the mutex, returning an owned mutable
70
+ /// service that can be `.call()`-ed.
71
+ fn clone_service(&self) -> tower::util::BoxCloneService<LlmRequest, LlmResponse, LiterLlmError> {
72
+ self.inner.lock().expect("ManagedClient service mutex poisoned").clone()
73
+ }
74
+ }
75
+
76
+ // ---------------------------------------------------------------------------
77
+ // ManagedClient
78
+ // ---------------------------------------------------------------------------
79
+
80
+ /// A managed LLM client that wraps [`DefaultClient`] with optional Tower
81
+ /// middleware (cache, cooldown, rate limiting, health checks, cost tracking,
82
+ /// budget, hooks, tracing).
83
+ ///
84
+ /// Construct via [`ManagedClient::new`]. If the provided [`ClientConfig`]
85
+ /// contains any middleware configuration the corresponding Tower layers are
86
+ /// composed into a service stack. Otherwise requests pass straight through
87
+ /// to the inner [`DefaultClient`].
88
+ ///
89
+ /// `ManagedClient` implements [`LlmClient`] and can be used everywhere a
90
+ /// `DefaultClient` is expected.
91
+ pub struct ManagedClient {
92
+ /// The raw client — used directly when no middleware is configured, and
93
+ /// also wrapped by the Tower service when middleware *is* configured.
94
+ inner: Arc<DefaultClient>,
95
+
96
+ /// When `Some`, requests are routed through this Tower service stack
97
+ /// instead of going directly to `inner`.
98
+ service: Option<SyncService>,
99
+
100
+ /// Budget state handle, exposed so callers can query accumulated spend.
101
+ /// `None` when no budget middleware is configured.
102
+ budget_state: Option<Arc<BudgetState>>,
103
+ }
104
+
105
+ // SAFETY: `SyncService` wraps a `Mutex<BoxCloneService>` which is `Send + Sync`.
106
+ // `Arc<DefaultClient>` and `Arc<BudgetState>` are both `Send + Sync`.
107
+ // The compiler can verify Send + Sync on `ManagedClient` automatically now
108
+ // that `SyncService` is `Send + Sync` (Mutex<T: Send> is Sync).
109
+
110
+ impl ManagedClient {
111
+ /// Build a managed client.
112
+ ///
113
+ /// `model_hint` guides provider auto-detection — see
114
+ /// [`DefaultClient::new`] for details.
115
+ ///
116
+ /// If the config contains any middleware settings (cache, budget, hooks,
117
+ /// cooldown, rate limit, health check, cost tracking, tracing) the
118
+ /// corresponding Tower layers are composed into a service stack.
119
+ /// Otherwise requests pass straight through to the inner client.
120
+ ///
121
+ /// # Errors
122
+ ///
123
+ /// Returns an error if the underlying [`DefaultClient`] cannot be
124
+ /// constructed (e.g. invalid headers or HTTP client build failure).
125
+ pub fn new(config: ClientConfig, model_hint: Option<&str>) -> Result<Self> {
126
+ let client = DefaultClient::new(config.clone(), model_hint)?;
127
+ let inner = Arc::new(client);
128
+
129
+ let (service, budget_state) = build_service_stack(&config, Arc::clone(&inner));
130
+
131
+ Ok(Self {
132
+ inner,
133
+ service,
134
+ budget_state,
135
+ })
136
+ }
137
+
138
+ /// Return a reference to the underlying [`DefaultClient`].
139
+ #[must_use]
140
+ pub fn inner(&self) -> &DefaultClient {
141
+ &self.inner
142
+ }
143
+
144
+ /// Return the budget state handle, if budget middleware is configured.
145
+ ///
146
+ /// Use this to query accumulated spend at runtime.
147
+ #[must_use]
148
+ pub fn budget_state(&self) -> Option<&Arc<BudgetState>> {
149
+ self.budget_state.as_ref()
150
+ }
151
+
152
+ /// Return `true` when middleware is active (requests go through the Tower
153
+ /// service stack).
154
+ #[must_use]
155
+ pub fn has_middleware(&self) -> bool {
156
+ self.service.is_some()
157
+ }
158
+
159
+ // -- helpers ----------------------------------------------------------
160
+
161
+ /// Clone the Tower service and call it with `req`, returning the raw
162
+ /// [`LlmResponse`].
163
+ fn call_service(&self, req: LlmRequest) -> BoxFuture<'static, LlmResponse> {
164
+ let mut svc = match self.service.as_ref() {
165
+ Some(s) => s.clone_service(),
166
+ None => {
167
+ return Box::pin(async {
168
+ Err(LiterLlmError::InternalError {
169
+ message: "call_service called without middleware stack".into(),
170
+ })
171
+ });
172
+ }
173
+ };
174
+ Box::pin(async move { svc.call(req).await })
175
+ }
176
+ }
177
+
178
+ /// Inspect the config and, when at least one middleware option is set,
179
+ /// compose a Tower service stack wrapping the given client.
180
+ ///
181
+ /// Returns `(Some(service), budget_state)` when middleware is configured,
182
+ /// or `(None, None)` when the config has no middleware.
183
+ fn build_service_stack(
184
+ config: &ClientConfig,
185
+ client: Arc<DefaultClient>,
186
+ ) -> (Option<SyncService>, Option<Arc<BudgetState>>) {
187
+ let has_cache = config.cache_config.is_some();
188
+ let has_budget = config.budget_config.is_some();
189
+ let has_hooks = !config.hooks.is_empty();
190
+ let has_cooldown = config.cooldown_duration.is_some();
191
+ let has_rate_limit = config.rate_limit_config.is_some();
192
+ let has_health_check = config.health_check_interval.is_some();
193
+ let has_cost = config.enable_cost_tracking;
194
+ let has_tracing = config.enable_tracing;
195
+
196
+ if !has_cache
197
+ && !has_budget
198
+ && !has_hooks
199
+ && !has_cooldown
200
+ && !has_rate_limit
201
+ && !has_health_check
202
+ && !has_cost
203
+ && !has_tracing
204
+ {
205
+ return (None, None);
206
+ }
207
+
208
+ // Start with the base LlmService wrapping the DefaultClient.
209
+ let base = LlmService::new_from_arc(client);
210
+
211
+ let mut budget_state: Option<Arc<BudgetState>> = None;
212
+
213
+ // We cannot use ServiceBuilder generics easily when layers are optional,
214
+ // so we type-erase into BoxCloneService at each step.
215
+ type Bcs = tower::util::BoxCloneService<LlmRequest, LlmResponse, LiterLlmError>;
216
+
217
+ // Start by boxing the base service.
218
+ let svc: Bcs = tower::util::BoxCloneService::new(base);
219
+
220
+ // 1. Cache (innermost — avoids hitting downstream for cached responses).
221
+ let svc = if let Some(ref cache_cfg) = config.cache_config {
222
+ let layer = if let Some(ref store) = config.cache_store {
223
+ CacheLayer::with_store(Arc::clone(store))
224
+ } else {
225
+ match &cache_cfg.backend {
226
+ CacheBackend::Memory => CacheLayer::new(cache_cfg.clone()),
227
+ #[cfg(feature = "opendal-cache")]
228
+ CacheBackend::OpenDal {
229
+ scheme,
230
+ config: backend_config,
231
+ } => {
232
+ match OpenDalCacheStore::from_config(scheme, backend_config.clone(), "llm-cache/", cache_cfg.ttl) {
233
+ Ok(store) => CacheLayer::with_store(Arc::new(store)),
234
+ Err(e) => {
235
+ tracing::warn!("Failed to create OpenDAL cache store, falling back to in-memory: {e}");
236
+ CacheLayer::new(cache_cfg.clone())
237
+ }
238
+ }
239
+ }
240
+ }
241
+ };
242
+ tower::util::BoxCloneService::new(layer.layer(svc))
243
+ } else {
244
+ svc
245
+ };
246
+
247
+ // 2. Health check — rejects requests when provider is unhealthy.
248
+ let svc = if let Some(interval) = config.health_check_interval {
249
+ let layer = HealthCheckLayer::new(interval);
250
+ tower::util::BoxCloneService::new(layer.layer(svc))
251
+ } else {
252
+ svc
253
+ };
254
+
255
+ // 3. Cooldown — rejects requests during cooldown after transient errors.
256
+ let svc = if let Some(duration) = config.cooldown_duration {
257
+ let layer = CooldownLayer::new(duration);
258
+ tower::util::BoxCloneService::new(layer.layer(svc))
259
+ } else {
260
+ svc
261
+ };
262
+
263
+ // 4. Rate limit — enforces per-model RPM/TPM limits.
264
+ let svc = if let Some(ref rl_cfg) = config.rate_limit_config {
265
+ let layer = ModelRateLimitLayer::new(rl_cfg.clone());
266
+ tower::util::BoxCloneService::new(layer.layer(svc))
267
+ } else {
268
+ svc
269
+ };
270
+
271
+ // 5. Cost tracking — records estimated USD cost on tracing spans.
272
+ let svc = if has_cost {
273
+ tower::util::BoxCloneService::new(CostTrackingLayer.layer(svc))
274
+ } else {
275
+ svc
276
+ };
277
+
278
+ // 6. Budget — enforces spending limits.
279
+ let svc = if let Some(ref budget_cfg) = config.budget_config {
280
+ let state = Arc::new(BudgetState::new());
281
+ budget_state = Some(Arc::clone(&state));
282
+ let layer = BudgetLayer::new(budget_cfg.clone(), state);
283
+ tower::util::BoxCloneService::new(layer.layer(svc))
284
+ } else {
285
+ svc
286
+ };
287
+
288
+ // 7. Hooks — user-defined pre/post request callbacks.
289
+ let svc = if has_hooks {
290
+ let layer = HooksLayer::new(config.hooks.clone());
291
+ tower::util::BoxCloneService::new(layer.layer(svc))
292
+ } else {
293
+ svc
294
+ };
295
+
296
+ // 8. Tracing (outermost — wraps everything in an OpenTelemetry span).
297
+ let svc = if has_tracing {
298
+ tower::util::BoxCloneService::new(TracingLayer.layer(svc))
299
+ } else {
300
+ svc
301
+ };
302
+
303
+ // Wrap in SyncService so ManagedClient is Sync.
304
+ (Some(SyncService { inner: Mutex::new(svc) }), budget_state)
305
+ }
306
+
307
+ // ---------------------------------------------------------------------------
308
+ // LlmClient implementation
309
+ // ---------------------------------------------------------------------------
310
+
311
+ impl LlmClient for ManagedClient {
312
+ fn chat(&self, req: ChatCompletionRequest) -> BoxFuture<'_, ChatCompletionResponse> {
313
+ if self.service.is_none() {
314
+ return self.inner.chat(req);
315
+ }
316
+ let fut = self.call_service(LlmRequest::Chat(req));
317
+ Box::pin(async move {
318
+ match fut.await? {
319
+ LlmResponse::Chat(r) => Ok(r),
320
+ other => Err(LiterLlmError::InternalError {
321
+ message: format!("expected Chat response, got {other:?}"),
322
+ }),
323
+ }
324
+ })
325
+ }
326
+
327
+ fn chat_stream(&self, req: ChatCompletionRequest) -> BoxFuture<'_, BoxStream<'_, ChatCompletionChunk>> {
328
+ if self.service.is_none() {
329
+ return self.inner.chat_stream(req);
330
+ }
331
+ let fut = self.call_service(LlmRequest::ChatStream(req));
332
+ Box::pin(async move {
333
+ match fut.await? {
334
+ LlmResponse::ChatStream(s) => Ok(s),
335
+ other => Err(LiterLlmError::InternalError {
336
+ message: format!("expected ChatStream response, got {other:?}"),
337
+ }),
338
+ }
339
+ })
340
+ }
341
+
342
+ fn embed(&self, req: EmbeddingRequest) -> BoxFuture<'_, EmbeddingResponse> {
343
+ if self.service.is_none() {
344
+ return self.inner.embed(req);
345
+ }
346
+ let fut = self.call_service(LlmRequest::Embed(req));
347
+ Box::pin(async move {
348
+ match fut.await? {
349
+ LlmResponse::Embed(r) => Ok(r),
350
+ other => Err(LiterLlmError::InternalError {
351
+ message: format!("expected Embed response, got {other:?}"),
352
+ }),
353
+ }
354
+ })
355
+ }
356
+
357
+ fn list_models(&self) -> BoxFuture<'_, ModelsListResponse> {
358
+ if self.service.is_none() {
359
+ return self.inner.list_models();
360
+ }
361
+ let fut = self.call_service(LlmRequest::ListModels);
362
+ Box::pin(async move {
363
+ match fut.await? {
364
+ LlmResponse::ListModels(r) => Ok(r),
365
+ other => Err(LiterLlmError::InternalError {
366
+ message: format!("expected ListModels response, got {other:?}"),
367
+ }),
368
+ }
369
+ })
370
+ }
371
+
372
+ fn image_generate(&self, req: CreateImageRequest) -> BoxFuture<'_, ImagesResponse> {
373
+ if self.service.is_none() {
374
+ return self.inner.image_generate(req);
375
+ }
376
+ let fut = self.call_service(LlmRequest::ImageGenerate(req));
377
+ Box::pin(async move {
378
+ match fut.await? {
379
+ LlmResponse::ImageGenerate(r) => Ok(r),
380
+ other => Err(LiterLlmError::InternalError {
381
+ message: format!("expected ImageGenerate response, got {other:?}"),
382
+ }),
383
+ }
384
+ })
385
+ }
386
+
387
+ fn speech(&self, req: CreateSpeechRequest) -> BoxFuture<'_, bytes::Bytes> {
388
+ if self.service.is_none() {
389
+ return self.inner.speech(req);
390
+ }
391
+ let fut = self.call_service(LlmRequest::Speech(req));
392
+ Box::pin(async move {
393
+ match fut.await? {
394
+ LlmResponse::Speech(r) => Ok(r),
395
+ other => Err(LiterLlmError::InternalError {
396
+ message: format!("expected Speech response, got {other:?}"),
397
+ }),
398
+ }
399
+ })
400
+ }
401
+
402
+ fn transcribe(&self, req: CreateTranscriptionRequest) -> BoxFuture<'_, TranscriptionResponse> {
403
+ if self.service.is_none() {
404
+ return self.inner.transcribe(req);
405
+ }
406
+ let fut = self.call_service(LlmRequest::Transcribe(req));
407
+ Box::pin(async move {
408
+ match fut.await? {
409
+ LlmResponse::Transcribe(r) => Ok(r),
410
+ other => Err(LiterLlmError::InternalError {
411
+ message: format!("expected Transcribe response, got {other:?}"),
412
+ }),
413
+ }
414
+ })
415
+ }
416
+
417
+ fn moderate(&self, req: ModerationRequest) -> BoxFuture<'_, ModerationResponse> {
418
+ if self.service.is_none() {
419
+ return self.inner.moderate(req);
420
+ }
421
+ let fut = self.call_service(LlmRequest::Moderate(req));
422
+ Box::pin(async move {
423
+ match fut.await? {
424
+ LlmResponse::Moderate(r) => Ok(r),
425
+ other => Err(LiterLlmError::InternalError {
426
+ message: format!("expected Moderate response, got {other:?}"),
427
+ }),
428
+ }
429
+ })
430
+ }
431
+
432
+ fn rerank(&self, req: RerankRequest) -> BoxFuture<'_, RerankResponse> {
433
+ if self.service.is_none() {
434
+ return self.inner.rerank(req);
435
+ }
436
+ let fut = self.call_service(LlmRequest::Rerank(req));
437
+ Box::pin(async move {
438
+ match fut.await? {
439
+ LlmResponse::Rerank(r) => Ok(r),
440
+ other => Err(LiterLlmError::InternalError {
441
+ message: format!("expected Rerank response, got {other:?}"),
442
+ }),
443
+ }
444
+ })
445
+ }
446
+
447
+ fn search(&self, req: SearchRequest) -> BoxFuture<'_, SearchResponse> {
448
+ if self.service.is_none() {
449
+ return self.inner.search(req);
450
+ }
451
+ let fut = self.call_service(LlmRequest::Search(req));
452
+ Box::pin(async move {
453
+ match fut.await? {
454
+ LlmResponse::Search(r) => Ok(r),
455
+ other => Err(LiterLlmError::InternalError {
456
+ message: format!("expected Search response, got {other:?}"),
457
+ }),
458
+ }
459
+ })
460
+ }
461
+
462
+ fn ocr(&self, req: OcrRequest) -> BoxFuture<'_, OcrResponse> {
463
+ if self.service.is_none() {
464
+ return self.inner.ocr(req);
465
+ }
466
+ let fut = self.call_service(LlmRequest::Ocr(req));
467
+ Box::pin(async move {
468
+ match fut.await? {
469
+ LlmResponse::Ocr(r) => Ok(r),
470
+ other => Err(LiterLlmError::InternalError {
471
+ message: format!("expected Ocr response, got {other:?}"),
472
+ }),
473
+ }
474
+ })
475
+ }
476
+ }
477
+
478
+ // ---------------------------------------------------------------------------
479
+ // FileClient implementation — delegates directly to the inner DefaultClient.
480
+ // File operations are not routed through the Tower middleware stack because
481
+ // they are administrative and should not be subject to cache/budget/hooks.
482
+ // ---------------------------------------------------------------------------
483
+
484
+ impl FileClient for ManagedClient {
485
+ fn create_file(&self, req: CreateFileRequest) -> BoxFuture<'_, FileObject> {
486
+ self.inner.create_file(req)
487
+ }
488
+
489
+ fn retrieve_file(&self, file_id: &str) -> BoxFuture<'_, FileObject> {
490
+ self.inner.retrieve_file(file_id)
491
+ }
492
+
493
+ fn delete_file(&self, file_id: &str) -> BoxFuture<'_, DeleteResponse> {
494
+ self.inner.delete_file(file_id)
495
+ }
496
+
497
+ fn list_files(&self, query: Option<FileListQuery>) -> BoxFuture<'_, FileListResponse> {
498
+ self.inner.list_files(query)
499
+ }
500
+
501
+ fn file_content(&self, file_id: &str) -> BoxFuture<'_, bytes::Bytes> {
502
+ self.inner.file_content(file_id)
503
+ }
504
+ }
505
+
506
+ // ---------------------------------------------------------------------------
507
+ // BatchClient implementation — delegates directly to the inner DefaultClient.
508
+ // ---------------------------------------------------------------------------
509
+
510
+ impl BatchClient for ManagedClient {
511
+ fn create_batch(&self, req: CreateBatchRequest) -> BoxFuture<'_, BatchObject> {
512
+ self.inner.create_batch(req)
513
+ }
514
+
515
+ fn retrieve_batch(&self, batch_id: &str) -> BoxFuture<'_, BatchObject> {
516
+ self.inner.retrieve_batch(batch_id)
517
+ }
518
+
519
+ fn list_batches(&self, query: Option<BatchListQuery>) -> BoxFuture<'_, BatchListResponse> {
520
+ self.inner.list_batches(query)
521
+ }
522
+
523
+ fn cancel_batch(&self, batch_id: &str) -> BoxFuture<'_, BatchObject> {
524
+ self.inner.cancel_batch(batch_id)
525
+ }
526
+ }
527
+
528
+ // ---------------------------------------------------------------------------
529
+ // ResponseClient implementation — delegates directly to the inner DefaultClient.
530
+ // ---------------------------------------------------------------------------
531
+
532
+ impl ResponseClient for ManagedClient {
533
+ fn create_response(&self, req: CreateResponseRequest) -> BoxFuture<'_, ResponseObject> {
534
+ self.inner.create_response(req)
535
+ }
536
+
537
+ fn retrieve_response(&self, id: &str) -> BoxFuture<'_, ResponseObject> {
538
+ self.inner.retrieve_response(id)
539
+ }
540
+
541
+ fn cancel_response(&self, id: &str) -> BoxFuture<'_, ResponseObject> {
542
+ self.inner.cancel_response(id)
543
+ }
544
+ }
545
+
546
+ #[cfg(test)]
547
+ mod tests {
548
+ use super::*;
549
+ use crate::client::ClientConfigBuilder;
550
+
551
+ /// Verify that `ManagedClient` with no middleware config has no service
552
+ /// stack and `has_middleware()` returns false.
553
+ #[test]
554
+ fn no_middleware_when_config_is_plain() {
555
+ let config = ClientConfig::new("test-key");
556
+ let client = ManagedClient::new(config, None).expect("should build");
557
+ assert!(!client.has_middleware());
558
+ assert!(client.budget_state().is_none());
559
+ }
560
+
561
+ /// Verify that adding a cache config activates middleware.
562
+ #[test]
563
+ fn middleware_active_with_cache_config() {
564
+ use crate::tower::CacheConfig;
565
+ let config = ClientConfigBuilder::new("test-key")
566
+ .cache(CacheConfig::default())
567
+ .build();
568
+ let client = ManagedClient::new(config, None).expect("should build");
569
+ assert!(client.has_middleware());
570
+ }
571
+
572
+ /// Verify that adding a budget config activates middleware and exposes
573
+ /// budget state.
574
+ #[test]
575
+ fn middleware_active_with_budget_config() {
576
+ use crate::tower::BudgetConfig;
577
+ let config = ClientConfigBuilder::new("test-key")
578
+ .budget(BudgetConfig::default())
579
+ .build();
580
+ let client = ManagedClient::new(config, None).expect("should build");
581
+ assert!(client.has_middleware());
582
+ assert!(client.budget_state().is_some());
583
+ }
584
+
585
+ /// Verify that cooldown configuration activates middleware.
586
+ #[test]
587
+ fn middleware_active_with_cooldown() {
588
+ use std::time::Duration;
589
+ let config = ClientConfigBuilder::new("test-key")
590
+ .cooldown(Duration::from_secs(30))
591
+ .build();
592
+ let client = ManagedClient::new(config, None).expect("should build");
593
+ assert!(client.has_middleware());
594
+ }
595
+
596
+ /// Verify that tracing configuration activates middleware.
597
+ #[test]
598
+ fn middleware_active_with_tracing() {
599
+ let config = ClientConfigBuilder::new("test-key").tracing(true).build();
600
+ let client = ManagedClient::new(config, None).expect("should build");
601
+ assert!(client.has_middleware());
602
+ }
603
+
604
+ /// Verify that cost tracking configuration activates middleware.
605
+ #[test]
606
+ fn middleware_active_with_cost_tracking() {
607
+ let config = ClientConfigBuilder::new("test-key").cost_tracking(true).build();
608
+ let client = ManagedClient::new(config, None).expect("should build");
609
+ assert!(client.has_middleware());
610
+ }
611
+
612
+ /// Verify that tracing=false alone does not activate middleware.
613
+ #[test]
614
+ fn no_middleware_when_tracing_false() {
615
+ let config = ClientConfigBuilder::new("test-key")
616
+ .tracing(false)
617
+ .cost_tracking(false)
618
+ .build();
619
+ let client = ManagedClient::new(config, None).expect("should build");
620
+ assert!(!client.has_middleware());
621
+ }
622
+ }