liter_llm 1.0.0.pre.rc.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +239 -0
  3. data/ext/liter_llm_rb/extconf.rb +65 -0
  4. data/ext/liter_llm_rb/native/.cargo/config.toml +23 -0
  5. data/ext/liter_llm_rb/native/Cargo.lock +3713 -0
  6. data/ext/liter_llm_rb/native/Cargo.toml +32 -0
  7. data/ext/liter_llm_rb/native/build.rs +15 -0
  8. data/ext/liter_llm_rb/native/src/lib.rs +1079 -0
  9. data/lib/liter_llm.rb +8 -0
  10. data/sig/liter_llm.rbs +416 -0
  11. data/vendor/Cargo.toml +54 -0
  12. data/vendor/liter-llm/Cargo.toml +92 -0
  13. data/vendor/liter-llm/README.md +252 -0
  14. data/vendor/liter-llm/schemas/pricing.json +40 -0
  15. data/vendor/liter-llm/schemas/providers.json +1662 -0
  16. data/vendor/liter-llm/src/auth/azure_ad.rs +264 -0
  17. data/vendor/liter-llm/src/auth/bedrock_sts.rs +353 -0
  18. data/vendor/liter-llm/src/auth/mod.rs +68 -0
  19. data/vendor/liter-llm/src/auth/vertex_oauth.rs +353 -0
  20. data/vendor/liter-llm/src/client/config.rs +351 -0
  21. data/vendor/liter-llm/src/client/managed.rs +622 -0
  22. data/vendor/liter-llm/src/client/mod.rs +864 -0
  23. data/vendor/liter-llm/src/cost.rs +212 -0
  24. data/vendor/liter-llm/src/error.rs +190 -0
  25. data/vendor/liter-llm/src/http/eventstream.rs +860 -0
  26. data/vendor/liter-llm/src/http/mod.rs +12 -0
  27. data/vendor/liter-llm/src/http/request.rs +438 -0
  28. data/vendor/liter-llm/src/http/retry.rs +72 -0
  29. data/vendor/liter-llm/src/http/streaming.rs +289 -0
  30. data/vendor/liter-llm/src/lib.rs +37 -0
  31. data/vendor/liter-llm/src/provider/anthropic.rs +2250 -0
  32. data/vendor/liter-llm/src/provider/azure.rs +579 -0
  33. data/vendor/liter-llm/src/provider/bedrock.rs +1543 -0
  34. data/vendor/liter-llm/src/provider/cohere.rs +654 -0
  35. data/vendor/liter-llm/src/provider/custom.rs +404 -0
  36. data/vendor/liter-llm/src/provider/google_ai.rs +281 -0
  37. data/vendor/liter-llm/src/provider/mistral.rs +188 -0
  38. data/vendor/liter-llm/src/provider/mod.rs +616 -0
  39. data/vendor/liter-llm/src/provider/vertex.rs +1504 -0
  40. data/vendor/liter-llm/src/tests.rs +1425 -0
  41. data/vendor/liter-llm/src/tokenizer.rs +281 -0
  42. data/vendor/liter-llm/src/tower/budget.rs +599 -0
  43. data/vendor/liter-llm/src/tower/cache.rs +502 -0
  44. data/vendor/liter-llm/src/tower/cache_opendal.rs +270 -0
  45. data/vendor/liter-llm/src/tower/cooldown.rs +231 -0
  46. data/vendor/liter-llm/src/tower/cost.rs +404 -0
  47. data/vendor/liter-llm/src/tower/fallback.rs +121 -0
  48. data/vendor/liter-llm/src/tower/health.rs +219 -0
  49. data/vendor/liter-llm/src/tower/hooks.rs +369 -0
  50. data/vendor/liter-llm/src/tower/mod.rs +77 -0
  51. data/vendor/liter-llm/src/tower/rate_limit.rs +300 -0
  52. data/vendor/liter-llm/src/tower/router.rs +436 -0
  53. data/vendor/liter-llm/src/tower/service.rs +181 -0
  54. data/vendor/liter-llm/src/tower/tests.rs +539 -0
  55. data/vendor/liter-llm/src/tower/tests_common.rs +252 -0
  56. data/vendor/liter-llm/src/tower/tracing.rs +209 -0
  57. data/vendor/liter-llm/src/tower/types.rs +170 -0
  58. data/vendor/liter-llm/src/types/audio.rs +52 -0
  59. data/vendor/liter-llm/src/types/batch.rs +77 -0
  60. data/vendor/liter-llm/src/types/chat.rs +214 -0
  61. data/vendor/liter-llm/src/types/common.rs +244 -0
  62. data/vendor/liter-llm/src/types/embedding.rs +84 -0
  63. data/vendor/liter-llm/src/types/files.rs +58 -0
  64. data/vendor/liter-llm/src/types/image.rs +40 -0
  65. data/vendor/liter-llm/src/types/mod.rs +27 -0
  66. data/vendor/liter-llm/src/types/models.rs +21 -0
  67. data/vendor/liter-llm/src/types/moderation.rs +80 -0
  68. data/vendor/liter-llm/src/types/ocr.rs +87 -0
  69. data/vendor/liter-llm/src/types/rerank.rs +46 -0
  70. data/vendor/liter-llm/src/types/responses.rs +55 -0
  71. data/vendor/liter-llm/src/types/search.rs +45 -0
  72. data/vendor/liter-llm/tests/contract.rs +332 -0
  73. data/vendor/liter-llm-ffi/Cargo.toml +30 -0
  74. data/vendor/liter-llm-ffi/build.rs +66 -0
  75. data/vendor/liter-llm-ffi/cbindgen.toml +60 -0
  76. data/vendor/liter-llm-ffi/liter_llm.h +850 -0
  77. data/vendor/liter-llm-ffi/src/lib.rs +2488 -0
  78. metadata +286 -0
@@ -0,0 +1,252 @@
1
+ //! Shared test helpers for tower middleware tests.
2
+ //!
3
+ //! This module provides a [`MockClient`] and helper functions used across
4
+ //! multiple middleware test modules.
5
+ #![allow(dead_code)]
6
+
7
+ use std::pin::Pin;
8
+ use std::sync::Arc;
9
+ use std::sync::atomic::{AtomicUsize, Ordering};
10
+ use std::task::{Context, Poll};
11
+
12
+ use futures_core::Stream;
13
+
14
+ use crate::client::{BoxFuture, BoxStream, LlmClient};
15
+ use crate::error::{LiterLlmError, Result};
16
+ use crate::types::audio::{CreateSpeechRequest, CreateTranscriptionRequest, TranscriptionResponse};
17
+ use crate::types::image::{CreateImageRequest, ImagesResponse};
18
+ use crate::types::moderation::{ModerationRequest, ModerationResponse};
19
+ use crate::types::ocr::{OcrRequest, OcrResponse};
20
+ use crate::types::rerank::{RerankRequest, RerankResponse};
21
+ use crate::types::search::{SearchRequest, SearchResponse};
22
+ use crate::types::{
23
+ AssistantMessage, ChatCompletionChunk, ChatCompletionRequest, ChatCompletionResponse, Choice, EmbeddingObject,
24
+ EmbeddingRequest, EmbeddingResponse, FinishReason, Message, ModelsListResponse, SystemMessage, Usage,
25
+ };
26
+
27
+ /// A stream that yields no items.
28
+ pub struct EmptyStream;
29
+
30
+ impl Stream for EmptyStream {
31
+ type Item = Result<ChatCompletionChunk>;
32
+ fn poll_next(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
33
+ Poll::Ready(None)
34
+ }
35
+ }
36
+
37
+ /// A serializable subset of [`LiterLlmError`] variants used in tests.
38
+ /// `LiterLlmError` is not `Clone`, so we store an enum of the variants we care about.
39
+ pub enum LiterLlmErrorKind {
40
+ RateLimited { message: String },
41
+ ServiceUnavailable { message: String },
42
+ Timeout,
43
+ Authentication { message: String },
44
+ }
45
+
46
+ impl LiterLlmErrorKind {
47
+ pub fn to_error(&self) -> LiterLlmError {
48
+ match self {
49
+ Self::RateLimited { message } => LiterLlmError::RateLimited {
50
+ message: message.clone(),
51
+ retry_after: None,
52
+ },
53
+ Self::ServiceUnavailable { message } => LiterLlmError::ServiceUnavailable {
54
+ message: message.clone(),
55
+ },
56
+ Self::Timeout => LiterLlmError::Timeout,
57
+ Self::Authentication { message } => LiterLlmError::BadRequest {
58
+ message: message.clone(),
59
+ },
60
+ }
61
+ }
62
+ }
63
+
64
+ /// A synchronous mock client. All methods return configurable canned
65
+ /// responses or errors.
66
+ #[derive(Clone)]
67
+ pub struct MockClient {
68
+ /// When set, `chat` returns this error instead of the canned response.
69
+ chat_error: Option<Arc<LiterLlmErrorKind>>,
70
+ /// Number of times `chat` / `chat_stream` has been called.
71
+ pub call_count: Arc<AtomicUsize>,
72
+ }
73
+
74
+ pub fn make_chat_response(model: &str) -> ChatCompletionResponse {
75
+ ChatCompletionResponse {
76
+ id: "test-id".into(),
77
+ object: "chat.completion".into(),
78
+ created: 0,
79
+ model: model.into(),
80
+ choices: vec![Choice {
81
+ index: 0,
82
+ message: AssistantMessage {
83
+ content: Some("Hello!".into()),
84
+ name: None,
85
+ tool_calls: None,
86
+ refusal: None,
87
+ function_call: None,
88
+ },
89
+ finish_reason: Some(FinishReason::Stop),
90
+ }],
91
+ usage: Some(Usage {
92
+ prompt_tokens: 10,
93
+ completion_tokens: 5,
94
+ total_tokens: 15,
95
+ }),
96
+ system_fingerprint: None,
97
+ service_tier: None,
98
+ }
99
+ }
100
+
101
+ impl MockClient {
102
+ fn new_with_error(error: Option<LiterLlmErrorKind>) -> Self {
103
+ Self {
104
+ chat_error: error.map(Arc::new),
105
+ call_count: Arc::new(AtomicUsize::new(0)),
106
+ }
107
+ }
108
+
109
+ pub fn ok() -> Self {
110
+ Self::new_with_error(None)
111
+ }
112
+
113
+ pub fn failing_rate_limited() -> Self {
114
+ Self::new_with_error(Some(LiterLlmErrorKind::RateLimited {
115
+ message: "too many requests".into(),
116
+ }))
117
+ }
118
+
119
+ pub fn failing_service_unavailable() -> Self {
120
+ Self::new_with_error(Some(LiterLlmErrorKind::ServiceUnavailable { message: "503".into() }))
121
+ }
122
+
123
+ pub fn failing_auth() -> Self {
124
+ Self::new_with_error(Some(LiterLlmErrorKind::Authentication {
125
+ message: "invalid key".into(),
126
+ }))
127
+ }
128
+
129
+ pub fn failing_timeout() -> Self {
130
+ Self::new_with_error(Some(LiterLlmErrorKind::Timeout))
131
+ }
132
+ }
133
+
134
+ impl LlmClient for MockClient {
135
+ fn chat(&self, req: ChatCompletionRequest) -> BoxFuture<'_, ChatCompletionResponse> {
136
+ self.call_count.fetch_add(1, Ordering::SeqCst);
137
+ let result = match &self.chat_error {
138
+ Some(kind) => Err(kind.to_error()),
139
+ None => Ok(make_chat_response(&req.model)),
140
+ };
141
+ Box::pin(async move { result })
142
+ }
143
+
144
+ fn chat_stream(&self, _req: ChatCompletionRequest) -> BoxFuture<'_, BoxStream<'_, ChatCompletionChunk>> {
145
+ self.call_count.fetch_add(1, Ordering::SeqCst);
146
+ Box::pin(async move {
147
+ let stream: BoxStream<'_, ChatCompletionChunk> = Box::pin(EmptyStream);
148
+ Ok(stream)
149
+ })
150
+ }
151
+
152
+ fn embed(&self, req: EmbeddingRequest) -> BoxFuture<'_, EmbeddingResponse> {
153
+ let resp = EmbeddingResponse {
154
+ object: "list".into(),
155
+ data: vec![EmbeddingObject {
156
+ object: "embedding".into(),
157
+ embedding: vec![0.1, 0.2, 0.3],
158
+ index: 0,
159
+ }],
160
+ model: req.model.clone(),
161
+ usage: Some(Usage {
162
+ prompt_tokens: 4,
163
+ completion_tokens: 0,
164
+ total_tokens: 4,
165
+ }),
166
+ };
167
+ Box::pin(async move { Ok(resp) })
168
+ }
169
+
170
+ fn list_models(&self) -> BoxFuture<'_, ModelsListResponse> {
171
+ Box::pin(async move {
172
+ Ok(ModelsListResponse {
173
+ object: "list".into(),
174
+ data: vec![],
175
+ })
176
+ })
177
+ }
178
+
179
+ fn image_generate(&self, _req: CreateImageRequest) -> BoxFuture<'_, ImagesResponse> {
180
+ Box::pin(async move {
181
+ Ok(ImagesResponse {
182
+ created: 0,
183
+ data: vec![],
184
+ })
185
+ })
186
+ }
187
+
188
+ fn speech(&self, _req: CreateSpeechRequest) -> BoxFuture<'_, bytes::Bytes> {
189
+ Box::pin(async move { Ok(bytes::Bytes::new()) })
190
+ }
191
+
192
+ fn transcribe(&self, _req: CreateTranscriptionRequest) -> BoxFuture<'_, TranscriptionResponse> {
193
+ Box::pin(async move {
194
+ Ok(TranscriptionResponse {
195
+ text: String::new(),
196
+ language: None,
197
+ duration: None,
198
+ segments: None,
199
+ })
200
+ })
201
+ }
202
+
203
+ fn moderate(&self, _req: ModerationRequest) -> BoxFuture<'_, ModerationResponse> {
204
+ Box::pin(async move {
205
+ Ok(ModerationResponse {
206
+ id: String::new(),
207
+ model: String::new(),
208
+ results: vec![],
209
+ })
210
+ })
211
+ }
212
+
213
+ fn rerank(&self, _req: RerankRequest) -> BoxFuture<'_, RerankResponse> {
214
+ Box::pin(async move {
215
+ Ok(RerankResponse {
216
+ id: None,
217
+ results: vec![],
218
+ meta: None,
219
+ })
220
+ })
221
+ }
222
+
223
+ fn search(&self, _req: SearchRequest) -> BoxFuture<'_, SearchResponse> {
224
+ Box::pin(async {
225
+ Err(LiterLlmError::EndpointNotSupported {
226
+ endpoint: "search".into(),
227
+ provider: "mock".into(),
228
+ })
229
+ })
230
+ }
231
+
232
+ fn ocr(&self, _req: OcrRequest) -> BoxFuture<'_, OcrResponse> {
233
+ Box::pin(async {
234
+ Err(LiterLlmError::EndpointNotSupported {
235
+ endpoint: "ocr".into(),
236
+ provider: "mock".into(),
237
+ })
238
+ })
239
+ }
240
+ }
241
+
242
+ /// Build a [`ChatCompletionRequest`] with the given model name.
243
+ pub fn chat_req(model: &str) -> ChatCompletionRequest {
244
+ ChatCompletionRequest {
245
+ model: model.into(),
246
+ messages: vec![Message::System(SystemMessage {
247
+ content: "test".into(),
248
+ name: None,
249
+ })],
250
+ ..Default::default()
251
+ }
252
+ }
@@ -0,0 +1,209 @@
1
+ use std::task::{Context, Poll};
2
+
3
+ use tower::Layer;
4
+ use tower::Service;
5
+ use tracing::Instrument as _;
6
+
7
+ use super::types::{LlmRequest, LlmResponse};
8
+ use crate::client::BoxFuture;
9
+ use crate::error::{LiterLlmError, Result};
10
+ use crate::types::FinishReason;
11
+
12
+ /// Tower [`Layer`] that wraps a service with OpenTelemetry GenAI semantic
13
+ /// convention tracing spans.
14
+ ///
15
+ /// Each call creates a [`tracing::info_span`] named `"gen_ai"` with the
16
+ /// following attributes:
17
+ ///
18
+ /// - `gen_ai.operation.name` — `"chat"`, `"embeddings"`, or `"list_models"`.
19
+ /// - `gen_ai.request.model` — the model name from the request, or `""` for
20
+ /// [`LlmRequest::ListModels`].
21
+ /// - `gen_ai.system` — the provider prefix extracted from the model name (e.g.
22
+ /// `"openai"` for `"openai/gpt-4"`), or `""` when absent.
23
+ /// - `gen_ai.usage.input_tokens` — populated on successful chat / embed
24
+ /// responses where usage data is present.
25
+ /// - `gen_ai.usage.output_tokens` — populated on successful chat responses.
26
+ /// - `gen_ai.response.id` — the completion ID from the response.
27
+ /// - `gen_ai.response.model` — the actual model used (may differ from requested).
28
+ /// - `gen_ai.response.finish_reasons` — space-separated finish reasons from
29
+ /// all choices (e.g. `"stop"`).
30
+ /// - `error.type` — set to the error variant name if the inner service returns
31
+ /// an error.
32
+ pub struct TracingLayer;
33
+
34
+ impl<S> Layer<S> for TracingLayer {
35
+ type Service = TracingService<S>;
36
+
37
+ fn layer(&self, inner: S) -> Self::Service {
38
+ TracingService { inner }
39
+ }
40
+ }
41
+
42
+ /// Tower service produced by [`TracingLayer`].
43
+ pub struct TracingService<S> {
44
+ inner: S,
45
+ }
46
+
47
+ impl<S> Clone for TracingService<S>
48
+ where
49
+ S: Clone,
50
+ {
51
+ fn clone(&self) -> Self {
52
+ Self {
53
+ inner: self.inner.clone(),
54
+ }
55
+ }
56
+ }
57
+
58
+ impl<S> Service<LlmRequest> for TracingService<S>
59
+ where
60
+ S: Service<LlmRequest, Response = LlmResponse, Error = LiterLlmError> + Send + 'static,
61
+ S::Future: Send + 'static,
62
+ {
63
+ type Response = LlmResponse;
64
+ type Error = LiterLlmError;
65
+ type Future = BoxFuture<'static, LlmResponse>;
66
+
67
+ fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll<Result<()>> {
68
+ self.inner.poll_ready(cx)
69
+ }
70
+
71
+ fn call(&mut self, req: LlmRequest) -> Self::Future {
72
+ let operation_name = req.operation_name();
73
+ // Borrow the model string from the request; split_once gives a &str
74
+ // slice so we avoid an extra allocation for the provider prefix.
75
+ let model_str = req.model().unwrap_or("");
76
+ let system = model_str.split_once('/').map_or("", |(prefix, _)| prefix);
77
+ // Clone once so the span owns the string values (required by tracing
78
+ // macros, which store field values inside the span).
79
+ let model = model_str.to_owned();
80
+
81
+ let span = tracing::info_span!(
82
+ "gen_ai",
83
+ gen_ai.operation.name = operation_name,
84
+ gen_ai.request.model = %model,
85
+ gen_ai.system = system,
86
+ gen_ai.usage.input_tokens = tracing::field::Empty,
87
+ gen_ai.usage.output_tokens = tracing::field::Empty,
88
+ gen_ai.response.id = tracing::field::Empty,
89
+ gen_ai.response.model = tracing::field::Empty,
90
+ gen_ai.usage.cost = tracing::field::Empty,
91
+ gen_ai.response.finish_reasons = tracing::field::Empty,
92
+ error.type = tracing::field::Empty,
93
+ );
94
+
95
+ let fut = self.inner.call(req);
96
+
97
+ // Use `.instrument(span)` rather than `span.enter()` in the async
98
+ // block. `span.enter()` in an async context is incorrect because the
99
+ // guard is dropped when the future suspends at an await point, causing
100
+ // the span to close prematurely. `Instrument` attaches the span to
101
+ // the future so it is entered and exited correctly around each poll.
102
+ Box::pin(
103
+ async move {
104
+ match fut.await {
105
+ Ok(resp) => {
106
+ // Record usage statistics and response metadata from the response when available.
107
+ record_response(&tracing::Span::current(), &resp);
108
+ Ok(resp)
109
+ }
110
+ Err(e) => {
111
+ tracing::Span::current().record("error.type", e.error_type());
112
+ Err(e)
113
+ }
114
+ }
115
+ }
116
+ .instrument(span),
117
+ )
118
+ }
119
+ }
120
+
121
+ /// Re-export `tracing_opentelemetry` when the `otel` feature is active.
122
+ ///
123
+ /// This lets callers compose a subscriber that exports spans to an
124
+ /// OpenTelemetry collector without taking a direct dependency on the crate:
125
+ ///
126
+ /// ```rust,ignore
127
+ /// use liter_llm::tower::tracing::otel::tracing_opentelemetry::OpenTelemetryLayer;
128
+ /// ```
129
+ #[cfg(feature = "otel")]
130
+ pub use tracing_opentelemetry;
131
+
132
+ /// Re-export `opentelemetry` when the `otel` feature is active.
133
+ ///
134
+ /// Provides access to tracer/provider types needed to build a full
135
+ /// OpenTelemetry pipeline (e.g. `opentelemetry::global::tracer`).
136
+ #[cfg(feature = "otel")]
137
+ pub use opentelemetry;
138
+
139
+ /// Record span attributes from the response according to GenAI semantic conventions.
140
+ fn record_response(span: &tracing::Span, resp: &LlmResponse) {
141
+ match resp {
142
+ LlmResponse::Chat(r) => {
143
+ span.record("gen_ai.response.id", r.id.as_str());
144
+ span.record("gen_ai.response.model", r.model.as_str());
145
+
146
+ let finish_reasons = finish_reasons_str(r.choices.iter().map(|c| c.finish_reason.as_ref()));
147
+ if !finish_reasons.is_empty() {
148
+ span.record("gen_ai.response.finish_reasons", finish_reasons.as_str());
149
+ }
150
+ }
151
+ LlmResponse::Embed(r) => {
152
+ span.record("gen_ai.response.model", r.model.as_str());
153
+ }
154
+ // Other response variants do not carry aggregated usage or response metadata.
155
+ LlmResponse::ChatStream(_)
156
+ | LlmResponse::ListModels(_)
157
+ | LlmResponse::ImageGenerate(_)
158
+ | LlmResponse::Speech(_)
159
+ | LlmResponse::Transcribe(_)
160
+ | LlmResponse::Moderate(_)
161
+ | LlmResponse::Rerank(_)
162
+ | LlmResponse::Search(_)
163
+ | LlmResponse::Ocr(_) => {}
164
+ }
165
+
166
+ // Record usage tokens from the shared accessor — avoids duplicating the
167
+ // match arms that extract `Option<&Usage>` from each response variant.
168
+ if let Some(usage) = resp.usage() {
169
+ span.record("gen_ai.usage.input_tokens", usage.prompt_tokens);
170
+ span.record("gen_ai.usage.output_tokens", usage.completion_tokens);
171
+ }
172
+ }
173
+
174
+ /// Build a space-separated string of finish reason names from an iterator of
175
+ /// optional [`FinishReason`] values. `None` entries are skipped.
176
+ ///
177
+ /// Optimised for the common single-choice case: when there is exactly one
178
+ /// reason, the static `&str` is returned directly as an owned `String` without
179
+ /// an intermediate `Vec` or repeated `push_str` calls.
180
+ fn finish_reasons_str<'a>(reasons: impl Iterator<Item = Option<&'a FinishReason>>) -> String {
181
+ // Fast path: single reason (the overwhelmingly common case).
182
+ let first = reasons.filter_map(|r| r.map(finish_reason_name));
183
+ // We need to re-bind after filter_map, so use a peekable to check length.
184
+ let mut iter = first.peekable();
185
+ let Some(first_name) = iter.next() else {
186
+ return String::new();
187
+ };
188
+ if iter.peek().is_none() {
189
+ return first_name.to_owned();
190
+ }
191
+ // Multi-choice path: fold remaining names with space separator.
192
+ iter.fold(first_name.to_owned(), |mut acc, name| {
193
+ acc.push(' ');
194
+ acc.push_str(name);
195
+ acc
196
+ })
197
+ }
198
+
199
+ /// Map a [`FinishReason`] variant to its GenAI semantic convention string.
200
+ const fn finish_reason_name(reason: &FinishReason) -> &'static str {
201
+ match reason {
202
+ FinishReason::Stop => "stop",
203
+ FinishReason::Length => "length",
204
+ FinishReason::ToolCalls => "tool_calls",
205
+ FinishReason::ContentFilter => "content_filter",
206
+ FinishReason::FunctionCall => "function_call",
207
+ FinishReason::Other => "other",
208
+ }
209
+ }
@@ -0,0 +1,170 @@
1
+ use serde::Serialize;
2
+
3
+ use crate::client::BoxStream;
4
+ use crate::types::audio::{CreateSpeechRequest, CreateTranscriptionRequest, TranscriptionResponse};
5
+ use crate::types::image::{CreateImageRequest, ImagesResponse};
6
+ use crate::types::moderation::{ModerationRequest, ModerationResponse};
7
+ use crate::types::ocr::{OcrRequest, OcrResponse};
8
+ use crate::types::rerank::{RerankRequest, RerankResponse};
9
+ use crate::types::search::{SearchRequest, SearchResponse};
10
+ use crate::types::{
11
+ ChatCompletionChunk, ChatCompletionRequest, ChatCompletionResponse, EmbeddingRequest, EmbeddingResponse,
12
+ ModelsListResponse, Usage,
13
+ };
14
+
15
+ /// The request variant passed through the tower `Service` stack.
16
+ ///
17
+ /// Each variant corresponds to one method on [`crate::client::LlmClient`].
18
+ #[derive(Debug, Clone, Serialize)]
19
+ pub enum LlmRequest {
20
+ /// Non-streaming chat completion.
21
+ Chat(ChatCompletionRequest),
22
+ /// Streaming chat completion — yields a stream of chunks.
23
+ ChatStream(ChatCompletionRequest),
24
+ /// Text embedding.
25
+ Embed(EmbeddingRequest),
26
+ /// List available models from the provider.
27
+ ListModels,
28
+ /// Image generation.
29
+ ImageGenerate(CreateImageRequest),
30
+ /// Text-to-speech audio generation.
31
+ Speech(CreateSpeechRequest),
32
+ /// Audio transcription.
33
+ Transcribe(CreateTranscriptionRequest),
34
+ /// Content moderation.
35
+ Moderate(ModerationRequest),
36
+ /// Document reranking.
37
+ Rerank(RerankRequest),
38
+ /// Web/document search.
39
+ Search(SearchRequest),
40
+ /// Document OCR.
41
+ Ocr(OcrRequest),
42
+ }
43
+
44
+ impl LlmRequest {
45
+ /// OpenTelemetry GenAI `gen_ai.operation.name` value for this request.
46
+ ///
47
+ /// Maps each variant to one of the canonical GenAI semantic convention
48
+ /// operation names: `"chat"`, `"embeddings"`, or `"list_models"`.
49
+ /// Both streaming and non-streaming chat map to `"chat"`.
50
+ #[must_use]
51
+ pub fn operation_name(&self) -> &'static str {
52
+ match self {
53
+ Self::Chat(_) | Self::ChatStream(_) => "chat",
54
+ Self::Embed(_) => "embeddings",
55
+ Self::ListModels => "list_models",
56
+ Self::ImageGenerate(_) => "image_generate",
57
+ Self::Speech(_) => "speech",
58
+ Self::Transcribe(_) => "transcribe",
59
+ Self::Moderate(_) => "moderate",
60
+ Self::Rerank(_) => "rerank",
61
+ Self::Search(_) => "search",
62
+ Self::Ocr(_) => "ocr",
63
+ }
64
+ }
65
+
66
+ /// Human-readable name of the request type; used as a span / metric label.
67
+ #[must_use]
68
+ pub fn request_type(&self) -> &'static str {
69
+ match self {
70
+ Self::Chat(_) => "chat",
71
+ Self::ChatStream(_) => "chat_stream",
72
+ Self::Embed(_) => "embeddings",
73
+ Self::ListModels => "list_models",
74
+ Self::ImageGenerate(_) => "image_generate",
75
+ Self::Speech(_) => "speech",
76
+ Self::Transcribe(_) => "transcribe",
77
+ Self::Moderate(_) => "moderate",
78
+ Self::Rerank(_) => "rerank",
79
+ Self::Search(_) => "search",
80
+ Self::Ocr(_) => "ocr",
81
+ }
82
+ }
83
+
84
+ /// Return the model name embedded in the request, if any.
85
+ #[must_use]
86
+ pub fn model(&self) -> Option<&str> {
87
+ match self {
88
+ Self::Chat(r) | Self::ChatStream(r) => Some(r.model.as_str()),
89
+ Self::Embed(r) => Some(r.model.as_str()),
90
+ Self::ImageGenerate(r) => r.model.as_deref(),
91
+ Self::Speech(r) => Some(r.model.as_str()),
92
+ Self::Transcribe(r) => Some(r.model.as_str()),
93
+ Self::Moderate(r) => r.model.as_deref(),
94
+ Self::Rerank(r) => Some(r.model.as_str()),
95
+ Self::Search(r) => Some(r.model.as_str()),
96
+ Self::Ocr(r) => Some(r.model.as_str()),
97
+ Self::ListModels => None,
98
+ }
99
+ }
100
+ }
101
+
102
+ /// The response variant returned through the tower `Service` stack.
103
+ pub enum LlmResponse {
104
+ /// Non-streaming chat completion.
105
+ Chat(ChatCompletionResponse),
106
+ /// Streaming chat completion.
107
+ ChatStream(BoxStream<'static, ChatCompletionChunk>),
108
+ /// Text embedding.
109
+ Embed(EmbeddingResponse),
110
+ /// Model list.
111
+ ListModels(ModelsListResponse),
112
+ /// Image generation.
113
+ ImageGenerate(ImagesResponse),
114
+ /// Text-to-speech audio (raw bytes).
115
+ Speech(bytes::Bytes),
116
+ /// Audio transcription.
117
+ Transcribe(TranscriptionResponse),
118
+ /// Content moderation.
119
+ Moderate(ModerationResponse),
120
+ /// Document reranking.
121
+ Rerank(RerankResponse),
122
+ /// Search results.
123
+ Search(SearchResponse),
124
+ /// OCR results.
125
+ Ocr(OcrResponse),
126
+ }
127
+
128
+ impl LlmResponse {
129
+ /// Return the usage data from the response, if present.
130
+ ///
131
+ /// Streaming, model-list, and non-chat responses do not carry aggregated
132
+ /// usage data and always return `None`.
133
+ #[must_use]
134
+ pub fn usage(&self) -> Option<&Usage> {
135
+ match self {
136
+ Self::Chat(r) => r.usage.as_ref(),
137
+ Self::Embed(r) => r.usage.as_ref(),
138
+ Self::Ocr(r) => r.usage.as_ref(),
139
+ Self::ChatStream(_)
140
+ | Self::ListModels(_)
141
+ | Self::ImageGenerate(_)
142
+ | Self::Speech(_)
143
+ | Self::Transcribe(_)
144
+ | Self::Moderate(_)
145
+ | Self::Rerank(_)
146
+ | Self::Search(_) => None,
147
+ }
148
+ }
149
+ }
150
+
151
+ impl std::fmt::Debug for LlmResponse {
152
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
153
+ match self {
154
+ Self::Chat(r) => f.debug_tuple("Chat").field(r).finish(),
155
+ Self::ChatStream(_) => f.write_str("ChatStream(<stream>)"),
156
+ Self::Embed(r) => f.debug_tuple("Embed").field(r).finish(),
157
+ Self::ListModels(r) => f.debug_tuple("ListModels").field(r).finish(),
158
+ Self::ImageGenerate(r) => f.debug_tuple("ImageGenerate").field(r).finish(),
159
+ Self::Speech(b) => f
160
+ .debug_tuple("Speech")
161
+ .field(&format_args!("<{} bytes>", b.len()))
162
+ .finish(),
163
+ Self::Transcribe(r) => f.debug_tuple("Transcribe").field(r).finish(),
164
+ Self::Moderate(r) => f.debug_tuple("Moderate").field(r).finish(),
165
+ Self::Rerank(r) => f.debug_tuple("Rerank").field(r).finish(),
166
+ Self::Search(r) => f.debug_tuple("Search").field(r).finish(),
167
+ Self::Ocr(r) => f.debug_tuple("Ocr").field(r).finish(),
168
+ }
169
+ }
170
+ }
@@ -0,0 +1,52 @@
1
+ use serde::{Deserialize, Serialize};
2
+
3
+ /// Request to generate speech audio from text.
4
+ #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
5
+ #[serde(deny_unknown_fields)]
6
+ pub struct CreateSpeechRequest {
7
+ pub model: String,
8
+ pub input: String,
9
+ pub voice: String,
10
+ #[serde(default, skip_serializing_if = "Option::is_none")]
11
+ pub response_format: Option<String>,
12
+ #[serde(default, skip_serializing_if = "Option::is_none")]
13
+ pub speed: Option<f64>,
14
+ }
15
+
16
+ /// Request to transcribe audio into text.
17
+ #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
18
+ #[serde(deny_unknown_fields)]
19
+ pub struct CreateTranscriptionRequest {
20
+ pub model: String,
21
+ /// Base64-encoded audio file data.
22
+ pub file: String,
23
+ #[serde(default, skip_serializing_if = "Option::is_none")]
24
+ pub language: Option<String>,
25
+ #[serde(default, skip_serializing_if = "Option::is_none")]
26
+ pub prompt: Option<String>,
27
+ #[serde(default, skip_serializing_if = "Option::is_none")]
28
+ pub response_format: Option<String>,
29
+ #[serde(default, skip_serializing_if = "Option::is_none")]
30
+ pub temperature: Option<f64>,
31
+ }
32
+
33
+ /// Response from a transcription request.
34
+ #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
35
+ pub struct TranscriptionResponse {
36
+ pub text: String,
37
+ #[serde(default, skip_serializing_if = "Option::is_none")]
38
+ pub language: Option<String>,
39
+ #[serde(default, skip_serializing_if = "Option::is_none")]
40
+ pub duration: Option<f64>,
41
+ #[serde(default, skip_serializing_if = "Option::is_none")]
42
+ pub segments: Option<Vec<TranscriptionSegment>>,
43
+ }
44
+
45
+ /// A segment of transcribed audio with timing information.
46
+ #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
47
+ pub struct TranscriptionSegment {
48
+ pub id: u32,
49
+ pub start: f64,
50
+ pub end: f64,
51
+ pub text: String,
52
+ }