liter_llm 1.0.0.pre.rc.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +239 -0
  3. data/ext/liter_llm_rb/extconf.rb +65 -0
  4. data/ext/liter_llm_rb/native/.cargo/config.toml +23 -0
  5. data/ext/liter_llm_rb/native/Cargo.lock +3713 -0
  6. data/ext/liter_llm_rb/native/Cargo.toml +32 -0
  7. data/ext/liter_llm_rb/native/build.rs +15 -0
  8. data/ext/liter_llm_rb/native/src/lib.rs +1079 -0
  9. data/lib/liter_llm.rb +8 -0
  10. data/sig/liter_llm.rbs +416 -0
  11. data/vendor/Cargo.toml +54 -0
  12. data/vendor/liter-llm/Cargo.toml +92 -0
  13. data/vendor/liter-llm/README.md +252 -0
  14. data/vendor/liter-llm/schemas/pricing.json +40 -0
  15. data/vendor/liter-llm/schemas/providers.json +1662 -0
  16. data/vendor/liter-llm/src/auth/azure_ad.rs +264 -0
  17. data/vendor/liter-llm/src/auth/bedrock_sts.rs +353 -0
  18. data/vendor/liter-llm/src/auth/mod.rs +68 -0
  19. data/vendor/liter-llm/src/auth/vertex_oauth.rs +353 -0
  20. data/vendor/liter-llm/src/client/config.rs +351 -0
  21. data/vendor/liter-llm/src/client/managed.rs +622 -0
  22. data/vendor/liter-llm/src/client/mod.rs +864 -0
  23. data/vendor/liter-llm/src/cost.rs +212 -0
  24. data/vendor/liter-llm/src/error.rs +190 -0
  25. data/vendor/liter-llm/src/http/eventstream.rs +860 -0
  26. data/vendor/liter-llm/src/http/mod.rs +12 -0
  27. data/vendor/liter-llm/src/http/request.rs +438 -0
  28. data/vendor/liter-llm/src/http/retry.rs +72 -0
  29. data/vendor/liter-llm/src/http/streaming.rs +289 -0
  30. data/vendor/liter-llm/src/lib.rs +37 -0
  31. data/vendor/liter-llm/src/provider/anthropic.rs +2250 -0
  32. data/vendor/liter-llm/src/provider/azure.rs +579 -0
  33. data/vendor/liter-llm/src/provider/bedrock.rs +1543 -0
  34. data/vendor/liter-llm/src/provider/cohere.rs +654 -0
  35. data/vendor/liter-llm/src/provider/custom.rs +404 -0
  36. data/vendor/liter-llm/src/provider/google_ai.rs +281 -0
  37. data/vendor/liter-llm/src/provider/mistral.rs +188 -0
  38. data/vendor/liter-llm/src/provider/mod.rs +616 -0
  39. data/vendor/liter-llm/src/provider/vertex.rs +1504 -0
  40. data/vendor/liter-llm/src/tests.rs +1425 -0
  41. data/vendor/liter-llm/src/tokenizer.rs +281 -0
  42. data/vendor/liter-llm/src/tower/budget.rs +599 -0
  43. data/vendor/liter-llm/src/tower/cache.rs +502 -0
  44. data/vendor/liter-llm/src/tower/cache_opendal.rs +270 -0
  45. data/vendor/liter-llm/src/tower/cooldown.rs +231 -0
  46. data/vendor/liter-llm/src/tower/cost.rs +404 -0
  47. data/vendor/liter-llm/src/tower/fallback.rs +121 -0
  48. data/vendor/liter-llm/src/tower/health.rs +219 -0
  49. data/vendor/liter-llm/src/tower/hooks.rs +369 -0
  50. data/vendor/liter-llm/src/tower/mod.rs +77 -0
  51. data/vendor/liter-llm/src/tower/rate_limit.rs +300 -0
  52. data/vendor/liter-llm/src/tower/router.rs +436 -0
  53. data/vendor/liter-llm/src/tower/service.rs +181 -0
  54. data/vendor/liter-llm/src/tower/tests.rs +539 -0
  55. data/vendor/liter-llm/src/tower/tests_common.rs +252 -0
  56. data/vendor/liter-llm/src/tower/tracing.rs +209 -0
  57. data/vendor/liter-llm/src/tower/types.rs +170 -0
  58. data/vendor/liter-llm/src/types/audio.rs +52 -0
  59. data/vendor/liter-llm/src/types/batch.rs +77 -0
  60. data/vendor/liter-llm/src/types/chat.rs +214 -0
  61. data/vendor/liter-llm/src/types/common.rs +244 -0
  62. data/vendor/liter-llm/src/types/embedding.rs +84 -0
  63. data/vendor/liter-llm/src/types/files.rs +58 -0
  64. data/vendor/liter-llm/src/types/image.rs +40 -0
  65. data/vendor/liter-llm/src/types/mod.rs +27 -0
  66. data/vendor/liter-llm/src/types/models.rs +21 -0
  67. data/vendor/liter-llm/src/types/moderation.rs +80 -0
  68. data/vendor/liter-llm/src/types/ocr.rs +87 -0
  69. data/vendor/liter-llm/src/types/rerank.rs +46 -0
  70. data/vendor/liter-llm/src/types/responses.rs +55 -0
  71. data/vendor/liter-llm/src/types/search.rs +45 -0
  72. data/vendor/liter-llm/tests/contract.rs +332 -0
  73. data/vendor/liter-llm-ffi/Cargo.toml +30 -0
  74. data/vendor/liter-llm-ffi/build.rs +66 -0
  75. data/vendor/liter-llm-ffi/cbindgen.toml +60 -0
  76. data/vendor/liter-llm-ffi/liter_llm.h +850 -0
  77. data/vendor/liter-llm-ffi/src/lib.rs +2488 -0
  78. metadata +286 -0
@@ -0,0 +1,351 @@
1
+ use std::sync::Arc;
2
+ use std::time::Duration;
3
+
4
+ use secrecy::SecretString;
5
+
6
+ use crate::auth::CredentialProvider;
7
+ #[cfg(feature = "native-http")]
8
+ use crate::error::{LiterLlmError, Result};
9
+ #[cfg(feature = "tower")]
10
+ use crate::tower::{BudgetConfig, CacheConfig, CacheStore, LlmHook, RateLimitConfig};
11
+
12
+ /// Configuration for an LLM client.
13
+ ///
14
+ /// `api_key` is stored as a [`SecretString`] so it is zeroed on drop and never
15
+ /// printed accidentally. Access it via [`secrecy::ExposeSecret`].
16
+ #[derive(Clone)]
17
+ pub struct ClientConfig {
18
+ /// API key for authentication (stored as a secret).
19
+ pub api_key: SecretString,
20
+ /// Override base URL. When set, all requests go here regardless of model
21
+ /// name, and provider auto-detection is skipped.
22
+ pub base_url: Option<String>,
23
+ /// Request timeout.
24
+ pub timeout: Duration,
25
+ /// Maximum number of retries on 429 / 5xx responses.
26
+ pub max_retries: u32,
27
+ /// Extra headers sent on every request.
28
+ ///
29
+ /// Use `Vec<(String, String)>` rather than `HashMap` to preserve insertion
30
+ /// order and avoid non-deterministic iteration when building the reqwest
31
+ /// `HeaderMap`. Access via [`ClientConfig::headers`]; do not mutate
32
+ /// directly from outside this crate.
33
+ pub(crate) extra_headers: Vec<(String, String)>,
34
+ /// Optional dynamic credential provider for token-based auth
35
+ /// (Azure AD, Vertex OAuth2) or refreshable credentials (AWS STS).
36
+ ///
37
+ /// When set, the client calls `resolve()` before each request to obtain
38
+ /// a fresh credential. When `None`, the static `api_key` is used.
39
+ pub credential_provider: Option<Arc<dyn CredentialProvider>>,
40
+
41
+ /// Configuration for the response cache Tower middleware layer.
42
+ ///
43
+ /// When set, bindings and advanced Rust users can use this to construct
44
+ /// a [`CacheLayer`](crate::tower::CacheLayer) in their Tower stack.
45
+ #[cfg(feature = "tower")]
46
+ pub cache_config: Option<CacheConfig>,
47
+
48
+ /// Custom cache store backend for the cache Tower middleware layer.
49
+ ///
50
+ /// When set alongside `cache_config`, the cache layer will use this
51
+ /// store instead of the default in-memory LRU.
52
+ #[cfg(feature = "tower")]
53
+ pub cache_store: Option<Arc<dyn CacheStore>>,
54
+
55
+ /// Configuration for the budget enforcement Tower middleware layer.
56
+ ///
57
+ /// When set, bindings and advanced Rust users can use this to construct
58
+ /// a [`BudgetLayer`](crate::tower::BudgetLayer) in their Tower stack.
59
+ #[cfg(feature = "tower")]
60
+ pub budget_config: Option<BudgetConfig>,
61
+
62
+ /// User-defined hooks for the hooks Tower middleware layer.
63
+ ///
64
+ /// These hooks are invoked at request lifecycle points (pre-request,
65
+ /// post-response, on-error) when a
66
+ /// [`HooksLayer`](crate::tower::HooksLayer) is constructed from this
67
+ /// config.
68
+ #[cfg(feature = "tower")]
69
+ pub hooks: Vec<Arc<dyn LlmHook>>,
70
+
71
+ /// Cooldown duration after transient errors (rate limit, timeout, server error).
72
+ /// When set, the client rejects requests with `ServiceUnavailable` during cooldown.
73
+ #[cfg(feature = "tower")]
74
+ pub cooldown_duration: Option<Duration>,
75
+
76
+ /// Per-model rate limiting configuration (RPM/TPM).
77
+ #[cfg(feature = "tower")]
78
+ pub rate_limit_config: Option<RateLimitConfig>,
79
+
80
+ /// Background health check interval. When set, periodically probes the provider
81
+ /// and rejects requests when the provider is unhealthy.
82
+ #[cfg(feature = "tower")]
83
+ pub health_check_interval: Option<Duration>,
84
+
85
+ /// Enable per-request cost tracking. Costs are accumulated atomically and
86
+ /// logged via `tracing::info`.
87
+ #[cfg(feature = "tower")]
88
+ pub enable_cost_tracking: bool,
89
+
90
+ /// Enable OpenTelemetry-compatible tracing spans for every request.
91
+ #[cfg(feature = "tower")]
92
+ pub enable_tracing: bool,
93
+ }
94
+
95
+ impl ClientConfig {
96
+ /// Create a config with the given API key and sensible defaults.
97
+ pub fn new(api_key: impl Into<String>) -> Self {
98
+ Self {
99
+ api_key: SecretString::from(api_key.into()),
100
+ base_url: None,
101
+ timeout: Duration::from_secs(60),
102
+ max_retries: 3,
103
+ extra_headers: Vec::new(),
104
+ credential_provider: None,
105
+ #[cfg(feature = "tower")]
106
+ cache_config: None,
107
+ #[cfg(feature = "tower")]
108
+ cache_store: None,
109
+ #[cfg(feature = "tower")]
110
+ budget_config: None,
111
+ #[cfg(feature = "tower")]
112
+ hooks: Vec::new(),
113
+ #[cfg(feature = "tower")]
114
+ cooldown_duration: None,
115
+ #[cfg(feature = "tower")]
116
+ rate_limit_config: None,
117
+ #[cfg(feature = "tower")]
118
+ health_check_interval: None,
119
+ #[cfg(feature = "tower")]
120
+ enable_cost_tracking: false,
121
+ #[cfg(feature = "tower")]
122
+ enable_tracing: false,
123
+ }
124
+ }
125
+
126
+ /// Return the extra headers as an ordered slice of `(name, value)` pairs.
127
+ pub fn headers(&self) -> &[(String, String)] {
128
+ &self.extra_headers
129
+ }
130
+ }
131
+
132
+ /// Note: intentionally does *not* implement `Debug` so the secret key is never
133
+ /// accidentally logged via `{:?}`.
134
+ impl std::fmt::Debug for ClientConfig {
135
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
136
+ // Redact all header values — they may contain API keys or secrets.
137
+ let redacted_headers: Vec<(&str, &str)> = self
138
+ .extra_headers
139
+ .iter()
140
+ .map(|(k, _v)| (k.as_str(), "[redacted]"))
141
+ .collect();
142
+ let mut dbg = f.debug_struct("ClientConfig");
143
+ dbg.field("api_key", &"[redacted]")
144
+ .field("base_url", &self.base_url)
145
+ .field("timeout", &self.timeout)
146
+ .field("max_retries", &self.max_retries)
147
+ .field("extra_headers", &redacted_headers)
148
+ .field(
149
+ "credential_provider",
150
+ &self.credential_provider.as_ref().map(|_| "[configured]"),
151
+ );
152
+
153
+ #[cfg(feature = "tower")]
154
+ {
155
+ dbg.field("cache_config", &self.cache_config)
156
+ .field("cache_store", &self.cache_store.as_ref().map(|_| "[configured]"))
157
+ .field("budget_config", &self.budget_config)
158
+ .field("hooks_count", &self.hooks.len())
159
+ .field("cooldown_duration", &self.cooldown_duration)
160
+ .field("rate_limit_config", &self.rate_limit_config)
161
+ .field("health_check_interval", &self.health_check_interval)
162
+ .field("enable_cost_tracking", &self.enable_cost_tracking)
163
+ .field("enable_tracing", &self.enable_tracing);
164
+ }
165
+
166
+ dbg.finish()
167
+ }
168
+ }
169
+
170
+ /// Builder for [`ClientConfig`].
171
+ ///
172
+ /// Construct with [`ClientConfigBuilder::new`] and call builder methods to
173
+ /// customise the configuration, then call [`ClientConfigBuilder::build`] to
174
+ /// obtain a [`ClientConfig`].
175
+ #[must_use]
176
+ pub struct ClientConfigBuilder {
177
+ config: ClientConfig,
178
+ }
179
+
180
+ impl ClientConfigBuilder {
181
+ /// Create a new builder with the given API key and sensible defaults.
182
+ pub fn new(api_key: impl Into<String>) -> Self {
183
+ Self {
184
+ config: ClientConfig::new(api_key),
185
+ }
186
+ }
187
+
188
+ /// Override the provider base URL for all requests.
189
+ pub fn base_url(mut self, url: impl Into<String>) -> Self {
190
+ self.config.base_url = Some(url.into());
191
+ self
192
+ }
193
+
194
+ /// Set the per-request timeout (default: 60 s).
195
+ pub fn timeout(mut self, timeout: Duration) -> Self {
196
+ self.config.timeout = timeout;
197
+ self
198
+ }
199
+
200
+ /// Set the maximum number of retries on 429 / 5xx responses (default: 3).
201
+ pub fn max_retries(mut self, retries: u32) -> Self {
202
+ self.config.max_retries = retries;
203
+ self
204
+ }
205
+
206
+ /// Set a dynamic credential provider for token-based or refreshable auth.
207
+ ///
208
+ /// When configured, the client calls `resolve()` before each request
209
+ /// instead of using the static `api_key` for authentication.
210
+ pub fn credential_provider(mut self, provider: Arc<dyn CredentialProvider>) -> Self {
211
+ self.config.credential_provider = Some(provider);
212
+ self
213
+ }
214
+
215
+ /// Add a custom header sent on every request.
216
+ ///
217
+ /// Returns an error if either `key` or `value` is not a valid HTTP header
218
+ /// name / value.
219
+ ///
220
+ /// This method is only available when the `native-http` feature is enabled
221
+ /// because header validation relies on `reqwest`'s header types.
222
+ #[cfg(feature = "native-http")]
223
+ pub fn header(mut self, key: impl Into<String>, value: impl Into<String>) -> Result<Self> {
224
+ let key = key.into();
225
+ let value = value.into();
226
+
227
+ // Validate header name.
228
+ reqwest::header::HeaderName::from_bytes(key.as_bytes()).map_err(|e| LiterLlmError::InvalidHeader {
229
+ name: key.clone(),
230
+ reason: e.to_string(),
231
+ })?;
232
+
233
+ // Validate header value.
234
+ reqwest::header::HeaderValue::from_str(&value).map_err(|e| LiterLlmError::InvalidHeader {
235
+ name: key.clone(),
236
+ reason: e.to_string(),
237
+ })?;
238
+
239
+ self.config.extra_headers.push((key, value));
240
+ Ok(self)
241
+ }
242
+
243
+ /// Set the response cache configuration for the Tower middleware stack.
244
+ ///
245
+ /// When set, bindings and advanced Rust users can read this from the
246
+ /// built [`ClientConfig`] to construct a
247
+ /// [`CacheLayer`](crate::tower::CacheLayer).
248
+ #[cfg(feature = "tower")]
249
+ pub fn cache(mut self, config: CacheConfig) -> Self {
250
+ self.config.cache_config = Some(config);
251
+ self
252
+ }
253
+
254
+ /// Set a custom cache store backend for the Tower cache middleware.
255
+ ///
256
+ /// When set alongside [`cache`](Self::cache), the cache layer will use
257
+ /// this store instead of the default in-memory LRU.
258
+ #[cfg(feature = "tower")]
259
+ pub fn cache_store(mut self, store: Arc<dyn CacheStore>) -> Self {
260
+ self.config.cache_store = Some(store);
261
+ self
262
+ }
263
+
264
+ /// Set the budget enforcement configuration for the Tower middleware stack.
265
+ ///
266
+ /// When set, bindings and advanced Rust users can read this from the
267
+ /// built [`ClientConfig`] to construct a
268
+ /// [`BudgetLayer`](crate::tower::BudgetLayer).
269
+ #[cfg(feature = "tower")]
270
+ pub fn budget(mut self, config: BudgetConfig) -> Self {
271
+ self.config.budget_config = Some(config);
272
+ self
273
+ }
274
+
275
+ /// Add a single hook to the Tower hooks middleware stack.
276
+ ///
277
+ /// Hooks are invoked sequentially in registration order at request
278
+ /// lifecycle points (pre-request, post-response, on-error).
279
+ #[cfg(feature = "tower")]
280
+ pub fn hook(mut self, hook: Arc<dyn LlmHook>) -> Self {
281
+ self.config.hooks.push(hook);
282
+ self
283
+ }
284
+
285
+ /// Set the full list of hooks for the Tower hooks middleware stack,
286
+ /// replacing any previously registered hooks.
287
+ ///
288
+ /// Hooks are invoked sequentially in registration order.
289
+ #[cfg(feature = "tower")]
290
+ pub fn hooks(mut self, hooks: Vec<Arc<dyn LlmHook>>) -> Self {
291
+ self.config.hooks = hooks;
292
+ self
293
+ }
294
+
295
+ /// Set the cooldown duration after transient errors.
296
+ ///
297
+ /// When set, the client rejects requests with `ServiceUnavailable` for
298
+ /// the given duration after a transient error (rate limit, timeout,
299
+ /// server error).
300
+ #[cfg(feature = "tower")]
301
+ pub fn cooldown(mut self, duration: Duration) -> Self {
302
+ self.config.cooldown_duration = Some(duration);
303
+ self
304
+ }
305
+
306
+ /// Set per-model rate limiting configuration.
307
+ ///
308
+ /// When set, requests exceeding the configured RPM or TPM limits are
309
+ /// rejected with [`LiterLlmError::RateLimited`](crate::error::LiterLlmError::RateLimited).
310
+ #[cfg(feature = "tower")]
311
+ pub fn rate_limit(mut self, config: RateLimitConfig) -> Self {
312
+ self.config.rate_limit_config = Some(config);
313
+ self
314
+ }
315
+
316
+ /// Set the background health check interval.
317
+ ///
318
+ /// When set, the client periodically probes the provider and rejects
319
+ /// requests when the provider is unhealthy.
320
+ #[cfg(feature = "tower")]
321
+ pub fn health_check(mut self, interval: Duration) -> Self {
322
+ self.config.health_check_interval = Some(interval);
323
+ self
324
+ }
325
+
326
+ /// Enable or disable per-request cost tracking.
327
+ ///
328
+ /// When enabled, estimated USD cost is recorded on the current tracing
329
+ /// span as `gen_ai.usage.cost`.
330
+ #[cfg(feature = "tower")]
331
+ pub fn cost_tracking(mut self, enabled: bool) -> Self {
332
+ self.config.enable_cost_tracking = enabled;
333
+ self
334
+ }
335
+
336
+ /// Enable or disable OpenTelemetry-compatible tracing spans.
337
+ ///
338
+ /// When enabled, every request is wrapped in a `gen_ai` tracing span
339
+ /// with semantic convention attributes.
340
+ #[cfg(feature = "tower")]
341
+ pub fn tracing(mut self, enabled: bool) -> Self {
342
+ self.config.enable_tracing = enabled;
343
+ self
344
+ }
345
+
346
+ /// Consume the builder and return the completed [`ClientConfig`].
347
+ #[must_use]
348
+ pub fn build(self) -> ClientConfig {
349
+ self.config
350
+ }
351
+ }