liter_llm 1.0.0.pre.rc.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +239 -0
- data/ext/liter_llm_rb/extconf.rb +65 -0
- data/ext/liter_llm_rb/native/.cargo/config.toml +23 -0
- data/ext/liter_llm_rb/native/Cargo.lock +3713 -0
- data/ext/liter_llm_rb/native/Cargo.toml +32 -0
- data/ext/liter_llm_rb/native/build.rs +15 -0
- data/ext/liter_llm_rb/native/src/lib.rs +1079 -0
- data/lib/liter_llm.rb +8 -0
- data/sig/liter_llm.rbs +416 -0
- data/vendor/Cargo.toml +54 -0
- data/vendor/liter-llm/Cargo.toml +92 -0
- data/vendor/liter-llm/README.md +252 -0
- data/vendor/liter-llm/schemas/pricing.json +40 -0
- data/vendor/liter-llm/schemas/providers.json +1662 -0
- data/vendor/liter-llm/src/auth/azure_ad.rs +264 -0
- data/vendor/liter-llm/src/auth/bedrock_sts.rs +353 -0
- data/vendor/liter-llm/src/auth/mod.rs +68 -0
- data/vendor/liter-llm/src/auth/vertex_oauth.rs +353 -0
- data/vendor/liter-llm/src/client/config.rs +351 -0
- data/vendor/liter-llm/src/client/managed.rs +622 -0
- data/vendor/liter-llm/src/client/mod.rs +864 -0
- data/vendor/liter-llm/src/cost.rs +212 -0
- data/vendor/liter-llm/src/error.rs +190 -0
- data/vendor/liter-llm/src/http/eventstream.rs +860 -0
- data/vendor/liter-llm/src/http/mod.rs +12 -0
- data/vendor/liter-llm/src/http/request.rs +438 -0
- data/vendor/liter-llm/src/http/retry.rs +72 -0
- data/vendor/liter-llm/src/http/streaming.rs +289 -0
- data/vendor/liter-llm/src/lib.rs +37 -0
- data/vendor/liter-llm/src/provider/anthropic.rs +2250 -0
- data/vendor/liter-llm/src/provider/azure.rs +579 -0
- data/vendor/liter-llm/src/provider/bedrock.rs +1543 -0
- data/vendor/liter-llm/src/provider/cohere.rs +654 -0
- data/vendor/liter-llm/src/provider/custom.rs +404 -0
- data/vendor/liter-llm/src/provider/google_ai.rs +281 -0
- data/vendor/liter-llm/src/provider/mistral.rs +188 -0
- data/vendor/liter-llm/src/provider/mod.rs +616 -0
- data/vendor/liter-llm/src/provider/vertex.rs +1504 -0
- data/vendor/liter-llm/src/tests.rs +1425 -0
- data/vendor/liter-llm/src/tokenizer.rs +281 -0
- data/vendor/liter-llm/src/tower/budget.rs +599 -0
- data/vendor/liter-llm/src/tower/cache.rs +502 -0
- data/vendor/liter-llm/src/tower/cache_opendal.rs +270 -0
- data/vendor/liter-llm/src/tower/cooldown.rs +231 -0
- data/vendor/liter-llm/src/tower/cost.rs +404 -0
- data/vendor/liter-llm/src/tower/fallback.rs +121 -0
- data/vendor/liter-llm/src/tower/health.rs +219 -0
- data/vendor/liter-llm/src/tower/hooks.rs +369 -0
- data/vendor/liter-llm/src/tower/mod.rs +77 -0
- data/vendor/liter-llm/src/tower/rate_limit.rs +300 -0
- data/vendor/liter-llm/src/tower/router.rs +436 -0
- data/vendor/liter-llm/src/tower/service.rs +181 -0
- data/vendor/liter-llm/src/tower/tests.rs +539 -0
- data/vendor/liter-llm/src/tower/tests_common.rs +252 -0
- data/vendor/liter-llm/src/tower/tracing.rs +209 -0
- data/vendor/liter-llm/src/tower/types.rs +170 -0
- data/vendor/liter-llm/src/types/audio.rs +52 -0
- data/vendor/liter-llm/src/types/batch.rs +77 -0
- data/vendor/liter-llm/src/types/chat.rs +214 -0
- data/vendor/liter-llm/src/types/common.rs +244 -0
- data/vendor/liter-llm/src/types/embedding.rs +84 -0
- data/vendor/liter-llm/src/types/files.rs +58 -0
- data/vendor/liter-llm/src/types/image.rs +40 -0
- data/vendor/liter-llm/src/types/mod.rs +27 -0
- data/vendor/liter-llm/src/types/models.rs +21 -0
- data/vendor/liter-llm/src/types/moderation.rs +80 -0
- data/vendor/liter-llm/src/types/ocr.rs +87 -0
- data/vendor/liter-llm/src/types/rerank.rs +46 -0
- data/vendor/liter-llm/src/types/responses.rs +55 -0
- data/vendor/liter-llm/src/types/search.rs +45 -0
- data/vendor/liter-llm/tests/contract.rs +332 -0
- data/vendor/liter-llm-ffi/Cargo.toml +30 -0
- data/vendor/liter-llm-ffi/build.rs +66 -0
- data/vendor/liter-llm-ffi/cbindgen.toml +60 -0
- data/vendor/liter-llm-ffi/liter_llm.h +850 -0
- data/vendor/liter-llm-ffi/src/lib.rs +2488 -0
- metadata +286 -0
|
@@ -0,0 +1,622 @@
|
|
|
1
|
+
//! A managed LLM client that optionally routes requests through a Tower
|
|
2
|
+
//! middleware stack (cache, budget, hooks, cooldown, rate limiting, health
|
|
3
|
+
//! checks, cost tracking, tracing) when the corresponding [`ClientConfig`]
|
|
4
|
+
//! fields are set.
|
|
5
|
+
//!
|
|
6
|
+
//! When no middleware is configured the client delegates directly to the
|
|
7
|
+
//! underlying [`DefaultClient`], adding zero overhead. When middleware *is*
|
|
8
|
+
//! configured, each [`LlmClient`] method converts its typed request into an
|
|
9
|
+
//! [`LlmRequest`], sends it through a cloned Tower service stack, and extracts
|
|
10
|
+
//! the typed response from the resulting [`LlmResponse`].
|
|
11
|
+
//!
|
|
12
|
+
//! # Tower `Service::call` takes `&mut self`
|
|
13
|
+
//!
|
|
14
|
+
//! The [`LlmClient`] trait requires `&self` receivers but Tower's
|
|
15
|
+
//! `Service::call` takes `&mut self`. All our middleware services are `Clone`
|
|
16
|
+
//! (state is behind `Arc`) so we clone the service per call — this is a cheap
|
|
17
|
+
//! series of `Arc` reference-count bumps.
|
|
18
|
+
//!
|
|
19
|
+
//! Tower's [`BoxCloneService`](tower::util::BoxCloneService) is `Send` but not
|
|
20
|
+
//! `Sync` (its inner trait object is `dyn ... + Send`). Since [`LlmClient`]
|
|
21
|
+
//! requires `Sync`, we wrap the service in a [`std::sync::Mutex`] that is held
|
|
22
|
+
//! only for the brief duration of `Clone::clone` (a few `Arc` ref-count bumps).
|
|
23
|
+
//! This makes `ManagedClient` `Sync` with negligible contention.
|
|
24
|
+
|
|
25
|
+
use std::sync::{Arc, Mutex};
|
|
26
|
+
|
|
27
|
+
use tower::{Layer, Service};
|
|
28
|
+
|
|
29
|
+
use super::config::ClientConfig;
|
|
30
|
+
use super::{BatchClient, BoxFuture, BoxStream, DefaultClient, FileClient, LlmClient, ResponseClient};
|
|
31
|
+
use crate::error::{LiterLlmError, Result};
|
|
32
|
+
#[cfg(feature = "opendal-cache")]
|
|
33
|
+
use crate::tower::OpenDalCacheStore;
|
|
34
|
+
use crate::tower::types::{LlmRequest, LlmResponse};
|
|
35
|
+
use crate::tower::{
|
|
36
|
+
BudgetLayer, BudgetState, CacheBackend, CacheLayer, CooldownLayer, CostTrackingLayer, HealthCheckLayer, HooksLayer,
|
|
37
|
+
LlmService, ModelRateLimitLayer, TracingLayer,
|
|
38
|
+
};
|
|
39
|
+
use crate::types::audio::{CreateSpeechRequest, CreateTranscriptionRequest, TranscriptionResponse};
|
|
40
|
+
use crate::types::batch::{BatchListQuery, BatchListResponse, BatchObject, CreateBatchRequest};
|
|
41
|
+
use crate::types::files::{CreateFileRequest, DeleteResponse, FileListQuery, FileListResponse, FileObject};
|
|
42
|
+
use crate::types::image::{CreateImageRequest, ImagesResponse};
|
|
43
|
+
use crate::types::moderation::{ModerationRequest, ModerationResponse};
|
|
44
|
+
use crate::types::ocr::{OcrRequest, OcrResponse};
|
|
45
|
+
use crate::types::rerank::{RerankRequest, RerankResponse};
|
|
46
|
+
use crate::types::responses::{CreateResponseRequest, ResponseObject};
|
|
47
|
+
use crate::types::search::{SearchRequest, SearchResponse};
|
|
48
|
+
use crate::types::{
|
|
49
|
+
ChatCompletionChunk, ChatCompletionRequest, ChatCompletionResponse, EmbeddingRequest, EmbeddingResponse,
|
|
50
|
+
ModelsListResponse,
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
// ---------------------------------------------------------------------------
|
|
54
|
+
// Type-erased Tower service wrapper
|
|
55
|
+
// ---------------------------------------------------------------------------
|
|
56
|
+
|
|
57
|
+
/// A `Send + Sync` wrapper around [`tower::util::BoxCloneService`].
|
|
58
|
+
///
|
|
59
|
+
/// `BoxCloneService` is `Send` but not `Sync` because its inner trait object
|
|
60
|
+
/// only requires `Send`. All our concrete middleware services *are* `Sync`
|
|
61
|
+
/// (they store shared state behind `Arc`), so wrapping in a `Mutex` is safe
|
|
62
|
+
/// and incurs negligible overhead — the lock is held only for the duration of
|
|
63
|
+
/// `Clone::clone` (a handful of `Arc` ref-count bumps).
|
|
64
|
+
struct SyncService {
|
|
65
|
+
inner: Mutex<tower::util::BoxCloneService<LlmRequest, LlmResponse, LiterLlmError>>,
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
impl SyncService {
|
|
69
|
+
/// Clone the inner service out of the mutex, returning an owned mutable
|
|
70
|
+
/// service that can be `.call()`-ed.
|
|
71
|
+
fn clone_service(&self) -> tower::util::BoxCloneService<LlmRequest, LlmResponse, LiterLlmError> {
|
|
72
|
+
self.inner.lock().expect("ManagedClient service mutex poisoned").clone()
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// ---------------------------------------------------------------------------
|
|
77
|
+
// ManagedClient
|
|
78
|
+
// ---------------------------------------------------------------------------
|
|
79
|
+
|
|
80
|
+
/// A managed LLM client that wraps [`DefaultClient`] with optional Tower
|
|
81
|
+
/// middleware (cache, cooldown, rate limiting, health checks, cost tracking,
|
|
82
|
+
/// budget, hooks, tracing).
|
|
83
|
+
///
|
|
84
|
+
/// Construct via [`ManagedClient::new`]. If the provided [`ClientConfig`]
|
|
85
|
+
/// contains any middleware configuration the corresponding Tower layers are
|
|
86
|
+
/// composed into a service stack. Otherwise requests pass straight through
|
|
87
|
+
/// to the inner [`DefaultClient`].
|
|
88
|
+
///
|
|
89
|
+
/// `ManagedClient` implements [`LlmClient`] and can be used everywhere a
|
|
90
|
+
/// `DefaultClient` is expected.
|
|
91
|
+
pub struct ManagedClient {
|
|
92
|
+
/// The raw client — used directly when no middleware is configured, and
|
|
93
|
+
/// also wrapped by the Tower service when middleware *is* configured.
|
|
94
|
+
inner: Arc<DefaultClient>,
|
|
95
|
+
|
|
96
|
+
/// When `Some`, requests are routed through this Tower service stack
|
|
97
|
+
/// instead of going directly to `inner`.
|
|
98
|
+
service: Option<SyncService>,
|
|
99
|
+
|
|
100
|
+
/// Budget state handle, exposed so callers can query accumulated spend.
|
|
101
|
+
/// `None` when no budget middleware is configured.
|
|
102
|
+
budget_state: Option<Arc<BudgetState>>,
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// SAFETY: `SyncService` wraps a `Mutex<BoxCloneService>` which is `Send + Sync`.
|
|
106
|
+
// `Arc<DefaultClient>` and `Arc<BudgetState>` are both `Send + Sync`.
|
|
107
|
+
// The compiler can verify Send + Sync on `ManagedClient` automatically now
|
|
108
|
+
// that `SyncService` is `Send + Sync` (Mutex<T: Send> is Sync).
|
|
109
|
+
|
|
110
|
+
impl ManagedClient {
|
|
111
|
+
/// Build a managed client.
|
|
112
|
+
///
|
|
113
|
+
/// `model_hint` guides provider auto-detection — see
|
|
114
|
+
/// [`DefaultClient::new`] for details.
|
|
115
|
+
///
|
|
116
|
+
/// If the config contains any middleware settings (cache, budget, hooks,
|
|
117
|
+
/// cooldown, rate limit, health check, cost tracking, tracing) the
|
|
118
|
+
/// corresponding Tower layers are composed into a service stack.
|
|
119
|
+
/// Otherwise requests pass straight through to the inner client.
|
|
120
|
+
///
|
|
121
|
+
/// # Errors
|
|
122
|
+
///
|
|
123
|
+
/// Returns an error if the underlying [`DefaultClient`] cannot be
|
|
124
|
+
/// constructed (e.g. invalid headers or HTTP client build failure).
|
|
125
|
+
pub fn new(config: ClientConfig, model_hint: Option<&str>) -> Result<Self> {
|
|
126
|
+
let client = DefaultClient::new(config.clone(), model_hint)?;
|
|
127
|
+
let inner = Arc::new(client);
|
|
128
|
+
|
|
129
|
+
let (service, budget_state) = build_service_stack(&config, Arc::clone(&inner));
|
|
130
|
+
|
|
131
|
+
Ok(Self {
|
|
132
|
+
inner,
|
|
133
|
+
service,
|
|
134
|
+
budget_state,
|
|
135
|
+
})
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/// Return a reference to the underlying [`DefaultClient`].
|
|
139
|
+
#[must_use]
|
|
140
|
+
pub fn inner(&self) -> &DefaultClient {
|
|
141
|
+
&self.inner
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/// Return the budget state handle, if budget middleware is configured.
|
|
145
|
+
///
|
|
146
|
+
/// Use this to query accumulated spend at runtime.
|
|
147
|
+
#[must_use]
|
|
148
|
+
pub fn budget_state(&self) -> Option<&Arc<BudgetState>> {
|
|
149
|
+
self.budget_state.as_ref()
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
/// Return `true` when middleware is active (requests go through the Tower
|
|
153
|
+
/// service stack).
|
|
154
|
+
#[must_use]
|
|
155
|
+
pub fn has_middleware(&self) -> bool {
|
|
156
|
+
self.service.is_some()
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// -- helpers ----------------------------------------------------------
|
|
160
|
+
|
|
161
|
+
/// Clone the Tower service and call it with `req`, returning the raw
|
|
162
|
+
/// [`LlmResponse`].
|
|
163
|
+
fn call_service(&self, req: LlmRequest) -> BoxFuture<'static, LlmResponse> {
|
|
164
|
+
let mut svc = match self.service.as_ref() {
|
|
165
|
+
Some(s) => s.clone_service(),
|
|
166
|
+
None => {
|
|
167
|
+
return Box::pin(async {
|
|
168
|
+
Err(LiterLlmError::InternalError {
|
|
169
|
+
message: "call_service called without middleware stack".into(),
|
|
170
|
+
})
|
|
171
|
+
});
|
|
172
|
+
}
|
|
173
|
+
};
|
|
174
|
+
Box::pin(async move { svc.call(req).await })
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
/// Inspect the config and, when at least one middleware option is set,
|
|
179
|
+
/// compose a Tower service stack wrapping the given client.
|
|
180
|
+
///
|
|
181
|
+
/// Returns `(Some(service), budget_state)` when middleware is configured,
|
|
182
|
+
/// or `(None, None)` when the config has no middleware.
|
|
183
|
+
fn build_service_stack(
|
|
184
|
+
config: &ClientConfig,
|
|
185
|
+
client: Arc<DefaultClient>,
|
|
186
|
+
) -> (Option<SyncService>, Option<Arc<BudgetState>>) {
|
|
187
|
+
let has_cache = config.cache_config.is_some();
|
|
188
|
+
let has_budget = config.budget_config.is_some();
|
|
189
|
+
let has_hooks = !config.hooks.is_empty();
|
|
190
|
+
let has_cooldown = config.cooldown_duration.is_some();
|
|
191
|
+
let has_rate_limit = config.rate_limit_config.is_some();
|
|
192
|
+
let has_health_check = config.health_check_interval.is_some();
|
|
193
|
+
let has_cost = config.enable_cost_tracking;
|
|
194
|
+
let has_tracing = config.enable_tracing;
|
|
195
|
+
|
|
196
|
+
if !has_cache
|
|
197
|
+
&& !has_budget
|
|
198
|
+
&& !has_hooks
|
|
199
|
+
&& !has_cooldown
|
|
200
|
+
&& !has_rate_limit
|
|
201
|
+
&& !has_health_check
|
|
202
|
+
&& !has_cost
|
|
203
|
+
&& !has_tracing
|
|
204
|
+
{
|
|
205
|
+
return (None, None);
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
// Start with the base LlmService wrapping the DefaultClient.
|
|
209
|
+
let base = LlmService::new_from_arc(client);
|
|
210
|
+
|
|
211
|
+
let mut budget_state: Option<Arc<BudgetState>> = None;
|
|
212
|
+
|
|
213
|
+
// We cannot use ServiceBuilder generics easily when layers are optional,
|
|
214
|
+
// so we type-erase into BoxCloneService at each step.
|
|
215
|
+
type Bcs = tower::util::BoxCloneService<LlmRequest, LlmResponse, LiterLlmError>;
|
|
216
|
+
|
|
217
|
+
// Start by boxing the base service.
|
|
218
|
+
let svc: Bcs = tower::util::BoxCloneService::new(base);
|
|
219
|
+
|
|
220
|
+
// 1. Cache (innermost — avoids hitting downstream for cached responses).
|
|
221
|
+
let svc = if let Some(ref cache_cfg) = config.cache_config {
|
|
222
|
+
let layer = if let Some(ref store) = config.cache_store {
|
|
223
|
+
CacheLayer::with_store(Arc::clone(store))
|
|
224
|
+
} else {
|
|
225
|
+
match &cache_cfg.backend {
|
|
226
|
+
CacheBackend::Memory => CacheLayer::new(cache_cfg.clone()),
|
|
227
|
+
#[cfg(feature = "opendal-cache")]
|
|
228
|
+
CacheBackend::OpenDal {
|
|
229
|
+
scheme,
|
|
230
|
+
config: backend_config,
|
|
231
|
+
} => {
|
|
232
|
+
match OpenDalCacheStore::from_config(scheme, backend_config.clone(), "llm-cache/", cache_cfg.ttl) {
|
|
233
|
+
Ok(store) => CacheLayer::with_store(Arc::new(store)),
|
|
234
|
+
Err(e) => {
|
|
235
|
+
tracing::warn!("Failed to create OpenDAL cache store, falling back to in-memory: {e}");
|
|
236
|
+
CacheLayer::new(cache_cfg.clone())
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
};
|
|
242
|
+
tower::util::BoxCloneService::new(layer.layer(svc))
|
|
243
|
+
} else {
|
|
244
|
+
svc
|
|
245
|
+
};
|
|
246
|
+
|
|
247
|
+
// 2. Health check — rejects requests when provider is unhealthy.
|
|
248
|
+
let svc = if let Some(interval) = config.health_check_interval {
|
|
249
|
+
let layer = HealthCheckLayer::new(interval);
|
|
250
|
+
tower::util::BoxCloneService::new(layer.layer(svc))
|
|
251
|
+
} else {
|
|
252
|
+
svc
|
|
253
|
+
};
|
|
254
|
+
|
|
255
|
+
// 3. Cooldown — rejects requests during cooldown after transient errors.
|
|
256
|
+
let svc = if let Some(duration) = config.cooldown_duration {
|
|
257
|
+
let layer = CooldownLayer::new(duration);
|
|
258
|
+
tower::util::BoxCloneService::new(layer.layer(svc))
|
|
259
|
+
} else {
|
|
260
|
+
svc
|
|
261
|
+
};
|
|
262
|
+
|
|
263
|
+
// 4. Rate limit — enforces per-model RPM/TPM limits.
|
|
264
|
+
let svc = if let Some(ref rl_cfg) = config.rate_limit_config {
|
|
265
|
+
let layer = ModelRateLimitLayer::new(rl_cfg.clone());
|
|
266
|
+
tower::util::BoxCloneService::new(layer.layer(svc))
|
|
267
|
+
} else {
|
|
268
|
+
svc
|
|
269
|
+
};
|
|
270
|
+
|
|
271
|
+
// 5. Cost tracking — records estimated USD cost on tracing spans.
|
|
272
|
+
let svc = if has_cost {
|
|
273
|
+
tower::util::BoxCloneService::new(CostTrackingLayer.layer(svc))
|
|
274
|
+
} else {
|
|
275
|
+
svc
|
|
276
|
+
};
|
|
277
|
+
|
|
278
|
+
// 6. Budget — enforces spending limits.
|
|
279
|
+
let svc = if let Some(ref budget_cfg) = config.budget_config {
|
|
280
|
+
let state = Arc::new(BudgetState::new());
|
|
281
|
+
budget_state = Some(Arc::clone(&state));
|
|
282
|
+
let layer = BudgetLayer::new(budget_cfg.clone(), state);
|
|
283
|
+
tower::util::BoxCloneService::new(layer.layer(svc))
|
|
284
|
+
} else {
|
|
285
|
+
svc
|
|
286
|
+
};
|
|
287
|
+
|
|
288
|
+
// 7. Hooks — user-defined pre/post request callbacks.
|
|
289
|
+
let svc = if has_hooks {
|
|
290
|
+
let layer = HooksLayer::new(config.hooks.clone());
|
|
291
|
+
tower::util::BoxCloneService::new(layer.layer(svc))
|
|
292
|
+
} else {
|
|
293
|
+
svc
|
|
294
|
+
};
|
|
295
|
+
|
|
296
|
+
// 8. Tracing (outermost — wraps everything in an OpenTelemetry span).
|
|
297
|
+
let svc = if has_tracing {
|
|
298
|
+
tower::util::BoxCloneService::new(TracingLayer.layer(svc))
|
|
299
|
+
} else {
|
|
300
|
+
svc
|
|
301
|
+
};
|
|
302
|
+
|
|
303
|
+
// Wrap in SyncService so ManagedClient is Sync.
|
|
304
|
+
(Some(SyncService { inner: Mutex::new(svc) }), budget_state)
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
// ---------------------------------------------------------------------------
|
|
308
|
+
// LlmClient implementation
|
|
309
|
+
// ---------------------------------------------------------------------------
|
|
310
|
+
|
|
311
|
+
impl LlmClient for ManagedClient {
|
|
312
|
+
fn chat(&self, req: ChatCompletionRequest) -> BoxFuture<'_, ChatCompletionResponse> {
|
|
313
|
+
if self.service.is_none() {
|
|
314
|
+
return self.inner.chat(req);
|
|
315
|
+
}
|
|
316
|
+
let fut = self.call_service(LlmRequest::Chat(req));
|
|
317
|
+
Box::pin(async move {
|
|
318
|
+
match fut.await? {
|
|
319
|
+
LlmResponse::Chat(r) => Ok(r),
|
|
320
|
+
other => Err(LiterLlmError::InternalError {
|
|
321
|
+
message: format!("expected Chat response, got {other:?}"),
|
|
322
|
+
}),
|
|
323
|
+
}
|
|
324
|
+
})
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
fn chat_stream(&self, req: ChatCompletionRequest) -> BoxFuture<'_, BoxStream<'_, ChatCompletionChunk>> {
|
|
328
|
+
if self.service.is_none() {
|
|
329
|
+
return self.inner.chat_stream(req);
|
|
330
|
+
}
|
|
331
|
+
let fut = self.call_service(LlmRequest::ChatStream(req));
|
|
332
|
+
Box::pin(async move {
|
|
333
|
+
match fut.await? {
|
|
334
|
+
LlmResponse::ChatStream(s) => Ok(s),
|
|
335
|
+
other => Err(LiterLlmError::InternalError {
|
|
336
|
+
message: format!("expected ChatStream response, got {other:?}"),
|
|
337
|
+
}),
|
|
338
|
+
}
|
|
339
|
+
})
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
fn embed(&self, req: EmbeddingRequest) -> BoxFuture<'_, EmbeddingResponse> {
|
|
343
|
+
if self.service.is_none() {
|
|
344
|
+
return self.inner.embed(req);
|
|
345
|
+
}
|
|
346
|
+
let fut = self.call_service(LlmRequest::Embed(req));
|
|
347
|
+
Box::pin(async move {
|
|
348
|
+
match fut.await? {
|
|
349
|
+
LlmResponse::Embed(r) => Ok(r),
|
|
350
|
+
other => Err(LiterLlmError::InternalError {
|
|
351
|
+
message: format!("expected Embed response, got {other:?}"),
|
|
352
|
+
}),
|
|
353
|
+
}
|
|
354
|
+
})
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
fn list_models(&self) -> BoxFuture<'_, ModelsListResponse> {
|
|
358
|
+
if self.service.is_none() {
|
|
359
|
+
return self.inner.list_models();
|
|
360
|
+
}
|
|
361
|
+
let fut = self.call_service(LlmRequest::ListModels);
|
|
362
|
+
Box::pin(async move {
|
|
363
|
+
match fut.await? {
|
|
364
|
+
LlmResponse::ListModels(r) => Ok(r),
|
|
365
|
+
other => Err(LiterLlmError::InternalError {
|
|
366
|
+
message: format!("expected ListModels response, got {other:?}"),
|
|
367
|
+
}),
|
|
368
|
+
}
|
|
369
|
+
})
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
fn image_generate(&self, req: CreateImageRequest) -> BoxFuture<'_, ImagesResponse> {
|
|
373
|
+
if self.service.is_none() {
|
|
374
|
+
return self.inner.image_generate(req);
|
|
375
|
+
}
|
|
376
|
+
let fut = self.call_service(LlmRequest::ImageGenerate(req));
|
|
377
|
+
Box::pin(async move {
|
|
378
|
+
match fut.await? {
|
|
379
|
+
LlmResponse::ImageGenerate(r) => Ok(r),
|
|
380
|
+
other => Err(LiterLlmError::InternalError {
|
|
381
|
+
message: format!("expected ImageGenerate response, got {other:?}"),
|
|
382
|
+
}),
|
|
383
|
+
}
|
|
384
|
+
})
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
fn speech(&self, req: CreateSpeechRequest) -> BoxFuture<'_, bytes::Bytes> {
|
|
388
|
+
if self.service.is_none() {
|
|
389
|
+
return self.inner.speech(req);
|
|
390
|
+
}
|
|
391
|
+
let fut = self.call_service(LlmRequest::Speech(req));
|
|
392
|
+
Box::pin(async move {
|
|
393
|
+
match fut.await? {
|
|
394
|
+
LlmResponse::Speech(r) => Ok(r),
|
|
395
|
+
other => Err(LiterLlmError::InternalError {
|
|
396
|
+
message: format!("expected Speech response, got {other:?}"),
|
|
397
|
+
}),
|
|
398
|
+
}
|
|
399
|
+
})
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
fn transcribe(&self, req: CreateTranscriptionRequest) -> BoxFuture<'_, TranscriptionResponse> {
|
|
403
|
+
if self.service.is_none() {
|
|
404
|
+
return self.inner.transcribe(req);
|
|
405
|
+
}
|
|
406
|
+
let fut = self.call_service(LlmRequest::Transcribe(req));
|
|
407
|
+
Box::pin(async move {
|
|
408
|
+
match fut.await? {
|
|
409
|
+
LlmResponse::Transcribe(r) => Ok(r),
|
|
410
|
+
other => Err(LiterLlmError::InternalError {
|
|
411
|
+
message: format!("expected Transcribe response, got {other:?}"),
|
|
412
|
+
}),
|
|
413
|
+
}
|
|
414
|
+
})
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
fn moderate(&self, req: ModerationRequest) -> BoxFuture<'_, ModerationResponse> {
|
|
418
|
+
if self.service.is_none() {
|
|
419
|
+
return self.inner.moderate(req);
|
|
420
|
+
}
|
|
421
|
+
let fut = self.call_service(LlmRequest::Moderate(req));
|
|
422
|
+
Box::pin(async move {
|
|
423
|
+
match fut.await? {
|
|
424
|
+
LlmResponse::Moderate(r) => Ok(r),
|
|
425
|
+
other => Err(LiterLlmError::InternalError {
|
|
426
|
+
message: format!("expected Moderate response, got {other:?}"),
|
|
427
|
+
}),
|
|
428
|
+
}
|
|
429
|
+
})
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
fn rerank(&self, req: RerankRequest) -> BoxFuture<'_, RerankResponse> {
|
|
433
|
+
if self.service.is_none() {
|
|
434
|
+
return self.inner.rerank(req);
|
|
435
|
+
}
|
|
436
|
+
let fut = self.call_service(LlmRequest::Rerank(req));
|
|
437
|
+
Box::pin(async move {
|
|
438
|
+
match fut.await? {
|
|
439
|
+
LlmResponse::Rerank(r) => Ok(r),
|
|
440
|
+
other => Err(LiterLlmError::InternalError {
|
|
441
|
+
message: format!("expected Rerank response, got {other:?}"),
|
|
442
|
+
}),
|
|
443
|
+
}
|
|
444
|
+
})
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
fn search(&self, req: SearchRequest) -> BoxFuture<'_, SearchResponse> {
|
|
448
|
+
if self.service.is_none() {
|
|
449
|
+
return self.inner.search(req);
|
|
450
|
+
}
|
|
451
|
+
let fut = self.call_service(LlmRequest::Search(req));
|
|
452
|
+
Box::pin(async move {
|
|
453
|
+
match fut.await? {
|
|
454
|
+
LlmResponse::Search(r) => Ok(r),
|
|
455
|
+
other => Err(LiterLlmError::InternalError {
|
|
456
|
+
message: format!("expected Search response, got {other:?}"),
|
|
457
|
+
}),
|
|
458
|
+
}
|
|
459
|
+
})
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
fn ocr(&self, req: OcrRequest) -> BoxFuture<'_, OcrResponse> {
|
|
463
|
+
if self.service.is_none() {
|
|
464
|
+
return self.inner.ocr(req);
|
|
465
|
+
}
|
|
466
|
+
let fut = self.call_service(LlmRequest::Ocr(req));
|
|
467
|
+
Box::pin(async move {
|
|
468
|
+
match fut.await? {
|
|
469
|
+
LlmResponse::Ocr(r) => Ok(r),
|
|
470
|
+
other => Err(LiterLlmError::InternalError {
|
|
471
|
+
message: format!("expected Ocr response, got {other:?}"),
|
|
472
|
+
}),
|
|
473
|
+
}
|
|
474
|
+
})
|
|
475
|
+
}
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
// ---------------------------------------------------------------------------
|
|
479
|
+
// FileClient implementation — delegates directly to the inner DefaultClient.
|
|
480
|
+
// File operations are not routed through the Tower middleware stack because
|
|
481
|
+
// they are administrative and should not be subject to cache/budget/hooks.
|
|
482
|
+
// ---------------------------------------------------------------------------
|
|
483
|
+
|
|
484
|
+
impl FileClient for ManagedClient {
|
|
485
|
+
fn create_file(&self, req: CreateFileRequest) -> BoxFuture<'_, FileObject> {
|
|
486
|
+
self.inner.create_file(req)
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
fn retrieve_file(&self, file_id: &str) -> BoxFuture<'_, FileObject> {
|
|
490
|
+
self.inner.retrieve_file(file_id)
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
fn delete_file(&self, file_id: &str) -> BoxFuture<'_, DeleteResponse> {
|
|
494
|
+
self.inner.delete_file(file_id)
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
fn list_files(&self, query: Option<FileListQuery>) -> BoxFuture<'_, FileListResponse> {
|
|
498
|
+
self.inner.list_files(query)
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
fn file_content(&self, file_id: &str) -> BoxFuture<'_, bytes::Bytes> {
|
|
502
|
+
self.inner.file_content(file_id)
|
|
503
|
+
}
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
// ---------------------------------------------------------------------------
|
|
507
|
+
// BatchClient implementation — delegates directly to the inner DefaultClient.
|
|
508
|
+
// ---------------------------------------------------------------------------
|
|
509
|
+
|
|
510
|
+
impl BatchClient for ManagedClient {
|
|
511
|
+
fn create_batch(&self, req: CreateBatchRequest) -> BoxFuture<'_, BatchObject> {
|
|
512
|
+
self.inner.create_batch(req)
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
fn retrieve_batch(&self, batch_id: &str) -> BoxFuture<'_, BatchObject> {
|
|
516
|
+
self.inner.retrieve_batch(batch_id)
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
fn list_batches(&self, query: Option<BatchListQuery>) -> BoxFuture<'_, BatchListResponse> {
|
|
520
|
+
self.inner.list_batches(query)
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
fn cancel_batch(&self, batch_id: &str) -> BoxFuture<'_, BatchObject> {
|
|
524
|
+
self.inner.cancel_batch(batch_id)
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
// ---------------------------------------------------------------------------
|
|
529
|
+
// ResponseClient implementation — delegates directly to the inner DefaultClient.
|
|
530
|
+
// ---------------------------------------------------------------------------
|
|
531
|
+
|
|
532
|
+
impl ResponseClient for ManagedClient {
|
|
533
|
+
fn create_response(&self, req: CreateResponseRequest) -> BoxFuture<'_, ResponseObject> {
|
|
534
|
+
self.inner.create_response(req)
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
fn retrieve_response(&self, id: &str) -> BoxFuture<'_, ResponseObject> {
|
|
538
|
+
self.inner.retrieve_response(id)
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
fn cancel_response(&self, id: &str) -> BoxFuture<'_, ResponseObject> {
|
|
542
|
+
self.inner.cancel_response(id)
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
#[cfg(test)]
|
|
547
|
+
mod tests {
|
|
548
|
+
use super::*;
|
|
549
|
+
use crate::client::ClientConfigBuilder;
|
|
550
|
+
|
|
551
|
+
/// Verify that `ManagedClient` with no middleware config has no service
|
|
552
|
+
/// stack and `has_middleware()` returns false.
|
|
553
|
+
#[test]
|
|
554
|
+
fn no_middleware_when_config_is_plain() {
|
|
555
|
+
let config = ClientConfig::new("test-key");
|
|
556
|
+
let client = ManagedClient::new(config, None).expect("should build");
|
|
557
|
+
assert!(!client.has_middleware());
|
|
558
|
+
assert!(client.budget_state().is_none());
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
/// Verify that adding a cache config activates middleware.
|
|
562
|
+
#[test]
|
|
563
|
+
fn middleware_active_with_cache_config() {
|
|
564
|
+
use crate::tower::CacheConfig;
|
|
565
|
+
let config = ClientConfigBuilder::new("test-key")
|
|
566
|
+
.cache(CacheConfig::default())
|
|
567
|
+
.build();
|
|
568
|
+
let client = ManagedClient::new(config, None).expect("should build");
|
|
569
|
+
assert!(client.has_middleware());
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
/// Verify that adding a budget config activates middleware and exposes
|
|
573
|
+
/// budget state.
|
|
574
|
+
#[test]
|
|
575
|
+
fn middleware_active_with_budget_config() {
|
|
576
|
+
use crate::tower::BudgetConfig;
|
|
577
|
+
let config = ClientConfigBuilder::new("test-key")
|
|
578
|
+
.budget(BudgetConfig::default())
|
|
579
|
+
.build();
|
|
580
|
+
let client = ManagedClient::new(config, None).expect("should build");
|
|
581
|
+
assert!(client.has_middleware());
|
|
582
|
+
assert!(client.budget_state().is_some());
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
/// Verify that cooldown configuration activates middleware.
|
|
586
|
+
#[test]
|
|
587
|
+
fn middleware_active_with_cooldown() {
|
|
588
|
+
use std::time::Duration;
|
|
589
|
+
let config = ClientConfigBuilder::new("test-key")
|
|
590
|
+
.cooldown(Duration::from_secs(30))
|
|
591
|
+
.build();
|
|
592
|
+
let client = ManagedClient::new(config, None).expect("should build");
|
|
593
|
+
assert!(client.has_middleware());
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
/// Verify that tracing configuration activates middleware.
|
|
597
|
+
#[test]
|
|
598
|
+
fn middleware_active_with_tracing() {
|
|
599
|
+
let config = ClientConfigBuilder::new("test-key").tracing(true).build();
|
|
600
|
+
let client = ManagedClient::new(config, None).expect("should build");
|
|
601
|
+
assert!(client.has_middleware());
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
/// Verify that cost tracking configuration activates middleware.
|
|
605
|
+
#[test]
|
|
606
|
+
fn middleware_active_with_cost_tracking() {
|
|
607
|
+
let config = ClientConfigBuilder::new("test-key").cost_tracking(true).build();
|
|
608
|
+
let client = ManagedClient::new(config, None).expect("should build");
|
|
609
|
+
assert!(client.has_middleware());
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
/// Verify that tracing=false alone does not activate middleware.
|
|
613
|
+
#[test]
|
|
614
|
+
fn no_middleware_when_tracing_false() {
|
|
615
|
+
let config = ClientConfigBuilder::new("test-key")
|
|
616
|
+
.tracing(false)
|
|
617
|
+
.cost_tracking(false)
|
|
618
|
+
.build();
|
|
619
|
+
let client = ManagedClient::new(config, None).expect("should build");
|
|
620
|
+
assert!(!client.has_middleware());
|
|
621
|
+
}
|
|
622
|
+
}
|