liter_llm 1.0.0.pre.rc.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +239 -0
- data/ext/liter_llm_rb/extconf.rb +65 -0
- data/ext/liter_llm_rb/native/.cargo/config.toml +23 -0
- data/ext/liter_llm_rb/native/Cargo.lock +3713 -0
- data/ext/liter_llm_rb/native/Cargo.toml +32 -0
- data/ext/liter_llm_rb/native/build.rs +15 -0
- data/ext/liter_llm_rb/native/src/lib.rs +1079 -0
- data/lib/liter_llm.rb +8 -0
- data/sig/liter_llm.rbs +416 -0
- data/vendor/Cargo.toml +54 -0
- data/vendor/liter-llm/Cargo.toml +92 -0
- data/vendor/liter-llm/README.md +252 -0
- data/vendor/liter-llm/schemas/pricing.json +40 -0
- data/vendor/liter-llm/schemas/providers.json +1662 -0
- data/vendor/liter-llm/src/auth/azure_ad.rs +264 -0
- data/vendor/liter-llm/src/auth/bedrock_sts.rs +353 -0
- data/vendor/liter-llm/src/auth/mod.rs +68 -0
- data/vendor/liter-llm/src/auth/vertex_oauth.rs +353 -0
- data/vendor/liter-llm/src/client/config.rs +351 -0
- data/vendor/liter-llm/src/client/managed.rs +622 -0
- data/vendor/liter-llm/src/client/mod.rs +864 -0
- data/vendor/liter-llm/src/cost.rs +212 -0
- data/vendor/liter-llm/src/error.rs +190 -0
- data/vendor/liter-llm/src/http/eventstream.rs +860 -0
- data/vendor/liter-llm/src/http/mod.rs +12 -0
- data/vendor/liter-llm/src/http/request.rs +438 -0
- data/vendor/liter-llm/src/http/retry.rs +72 -0
- data/vendor/liter-llm/src/http/streaming.rs +289 -0
- data/vendor/liter-llm/src/lib.rs +37 -0
- data/vendor/liter-llm/src/provider/anthropic.rs +2250 -0
- data/vendor/liter-llm/src/provider/azure.rs +579 -0
- data/vendor/liter-llm/src/provider/bedrock.rs +1543 -0
- data/vendor/liter-llm/src/provider/cohere.rs +654 -0
- data/vendor/liter-llm/src/provider/custom.rs +404 -0
- data/vendor/liter-llm/src/provider/google_ai.rs +281 -0
- data/vendor/liter-llm/src/provider/mistral.rs +188 -0
- data/vendor/liter-llm/src/provider/mod.rs +616 -0
- data/vendor/liter-llm/src/provider/vertex.rs +1504 -0
- data/vendor/liter-llm/src/tests.rs +1425 -0
- data/vendor/liter-llm/src/tokenizer.rs +281 -0
- data/vendor/liter-llm/src/tower/budget.rs +599 -0
- data/vendor/liter-llm/src/tower/cache.rs +502 -0
- data/vendor/liter-llm/src/tower/cache_opendal.rs +270 -0
- data/vendor/liter-llm/src/tower/cooldown.rs +231 -0
- data/vendor/liter-llm/src/tower/cost.rs +404 -0
- data/vendor/liter-llm/src/tower/fallback.rs +121 -0
- data/vendor/liter-llm/src/tower/health.rs +219 -0
- data/vendor/liter-llm/src/tower/hooks.rs +369 -0
- data/vendor/liter-llm/src/tower/mod.rs +77 -0
- data/vendor/liter-llm/src/tower/rate_limit.rs +300 -0
- data/vendor/liter-llm/src/tower/router.rs +436 -0
- data/vendor/liter-llm/src/tower/service.rs +181 -0
- data/vendor/liter-llm/src/tower/tests.rs +539 -0
- data/vendor/liter-llm/src/tower/tests_common.rs +252 -0
- data/vendor/liter-llm/src/tower/tracing.rs +209 -0
- data/vendor/liter-llm/src/tower/types.rs +170 -0
- data/vendor/liter-llm/src/types/audio.rs +52 -0
- data/vendor/liter-llm/src/types/batch.rs +77 -0
- data/vendor/liter-llm/src/types/chat.rs +214 -0
- data/vendor/liter-llm/src/types/common.rs +244 -0
- data/vendor/liter-llm/src/types/embedding.rs +84 -0
- data/vendor/liter-llm/src/types/files.rs +58 -0
- data/vendor/liter-llm/src/types/image.rs +40 -0
- data/vendor/liter-llm/src/types/mod.rs +27 -0
- data/vendor/liter-llm/src/types/models.rs +21 -0
- data/vendor/liter-llm/src/types/moderation.rs +80 -0
- data/vendor/liter-llm/src/types/ocr.rs +87 -0
- data/vendor/liter-llm/src/types/rerank.rs +46 -0
- data/vendor/liter-llm/src/types/responses.rs +55 -0
- data/vendor/liter-llm/src/types/search.rs +45 -0
- data/vendor/liter-llm/tests/contract.rs +332 -0
- data/vendor/liter-llm-ffi/Cargo.toml +30 -0
- data/vendor/liter-llm-ffi/build.rs +66 -0
- data/vendor/liter-llm-ffi/cbindgen.toml +60 -0
- data/vendor/liter-llm-ffi/liter_llm.h +850 -0
- data/vendor/liter-llm-ffi/src/lib.rs +2488 -0
- metadata +286 -0
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
use std::pin::Pin;
|
|
2
|
+
use std::task::{Context, Poll};
|
|
3
|
+
|
|
4
|
+
use bytes::Bytes;
|
|
5
|
+
use futures_core::Stream;
|
|
6
|
+
use memchr::memchr;
|
|
7
|
+
use pin_project_lite::pin_project;
|
|
8
|
+
|
|
9
|
+
use crate::error::{LiterLlmError, Result};
|
|
10
|
+
use crate::http::request::with_retry;
|
|
11
|
+
use crate::types::ChatCompletionChunk;
|
|
12
|
+
|
|
13
|
+
/// Maximum number of bytes buffered before declaring a streaming error.
|
|
14
|
+
const MAX_BUFFER_BYTES: usize = 1024 * 1024; // 1 MiB
|
|
15
|
+
|
|
16
|
+
// ---------------------------------------------------------------------------
|
|
17
|
+
// Public entry point
|
|
18
|
+
// ---------------------------------------------------------------------------
|
|
19
|
+
|
|
20
|
+
/// Send a streaming POST request and return an SSE stream of
|
|
21
|
+
/// `ChatCompletionChunk`s.
|
|
22
|
+
///
|
|
23
|
+
/// Before opening the stream, retries on 429 / 500 / 502 / 503 / 504 up to
|
|
24
|
+
/// `max_retries` times honouring any `Retry-After` header. Once the stream
|
|
25
|
+
/// is open, individual chunk errors are yielded as `Err` items rather than
|
|
26
|
+
/// causing a retry.
|
|
27
|
+
///
|
|
28
|
+
/// `auth_header` is `Some((name, value))` when the provider requires
|
|
29
|
+
/// authentication, or `None` when no auth header should be added.
|
|
30
|
+
///
|
|
31
|
+
/// `extra_headers` carries provider-specific mandatory headers (e.g.
|
|
32
|
+
/// `anthropic-version`) beyond the single auth header.
|
|
33
|
+
///
|
|
34
|
+
/// `parse_event` translates a raw SSE `data:` payload string into a
|
|
35
|
+
/// `ChatCompletionChunk`. Pass the provider's `parse_stream_event` method
|
|
36
|
+
/// to support non-OpenAI SSE formats.
|
|
37
|
+
#[cfg_attr(
|
|
38
|
+
feature = "tracing",
|
|
39
|
+
tracing::instrument(
|
|
40
|
+
skip_all,
|
|
41
|
+
fields(
|
|
42
|
+
http.method = "POST",
|
|
43
|
+
http.url = %url,
|
|
44
|
+
http.status_code = tracing::field::Empty,
|
|
45
|
+
http.retry_count = tracing::field::Empty,
|
|
46
|
+
)
|
|
47
|
+
)
|
|
48
|
+
)]
|
|
49
|
+
pub async fn post_stream<P>(
|
|
50
|
+
client: &reqwest::Client,
|
|
51
|
+
url: &str,
|
|
52
|
+
auth_header: Option<(&str, &str)>,
|
|
53
|
+
extra_headers: &[(&str, &str)],
|
|
54
|
+
body: Bytes,
|
|
55
|
+
max_retries: u32,
|
|
56
|
+
parse_event: P,
|
|
57
|
+
) -> Result<Pin<Box<dyn Stream<Item = Result<ChatCompletionChunk>> + Send>>>
|
|
58
|
+
where
|
|
59
|
+
P: Fn(&str) -> Result<Option<ChatCompletionChunk>> + Send + 'static,
|
|
60
|
+
{
|
|
61
|
+
let mut retry_count = 0u32;
|
|
62
|
+
|
|
63
|
+
let resp = with_retry(max_retries, || {
|
|
64
|
+
// Clone is a zero-copy ref-count bump on `Bytes`.
|
|
65
|
+
let mut builder = client
|
|
66
|
+
.post(url)
|
|
67
|
+
.header(reqwest::header::CONTENT_TYPE, "application/json")
|
|
68
|
+
.body(body.clone());
|
|
69
|
+
if let Some((name, value)) = auth_header {
|
|
70
|
+
builder = builder.header(name, value);
|
|
71
|
+
}
|
|
72
|
+
for (name, value) in extra_headers {
|
|
73
|
+
builder = builder.header(*name, *value);
|
|
74
|
+
}
|
|
75
|
+
retry_count += 1;
|
|
76
|
+
builder.send()
|
|
77
|
+
})
|
|
78
|
+
.await?;
|
|
79
|
+
|
|
80
|
+
#[cfg(feature = "tracing")]
|
|
81
|
+
{
|
|
82
|
+
let span = tracing::Span::current();
|
|
83
|
+
span.record("http.status_code", resp.status().as_u16());
|
|
84
|
+
span.record("http.retry_count", retry_count.saturating_sub(1));
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
let byte_stream = resp.bytes_stream();
|
|
88
|
+
let stream = SseParser::new(byte_stream, parse_event);
|
|
89
|
+
Ok(Box::pin(stream))
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// ---------------------------------------------------------------------------
|
|
93
|
+
// SSE parser
|
|
94
|
+
// ---------------------------------------------------------------------------
|
|
95
|
+
|
|
96
|
+
pin_project! {
|
|
97
|
+
/// Wraps a `bytes::Bytes` stream and yields parsed `ChatCompletionChunk`s.
|
|
98
|
+
///
|
|
99
|
+
/// The `P` type parameter is the parse function used to translate a raw
|
|
100
|
+
/// SSE `data:` payload string into a `ChatCompletionChunk`. This allows
|
|
101
|
+
/// non-OpenAI SSE formats (e.g. Anthropic, Vertex) to plug in their own
|
|
102
|
+
/// event parsers without duplicating the byte-buffering and line-splitting
|
|
103
|
+
/// logic.
|
|
104
|
+
struct SseParser<S, P> {
|
|
105
|
+
#[pin]
|
|
106
|
+
inner: S,
|
|
107
|
+
buffer: String,
|
|
108
|
+
// Read cursor into `buffer`. All bytes before `cursor` have already
|
|
109
|
+
// been processed. We compact (drain) only when the cursor exceeds
|
|
110
|
+
// half the buffer length, amortising memmove cost to O(total_bytes).
|
|
111
|
+
cursor: usize,
|
|
112
|
+
// Set to true once the inner stream is exhausted.
|
|
113
|
+
done: bool,
|
|
114
|
+
// Provider-supplied event parser; translates raw SSE data payloads.
|
|
115
|
+
parse_event: P,
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
impl<S, P> SseParser<S, P>
|
|
120
|
+
where
|
|
121
|
+
P: Fn(&str) -> Result<Option<ChatCompletionChunk>>,
|
|
122
|
+
{
|
|
123
|
+
fn new(inner: S, parse_event: P) -> Self {
|
|
124
|
+
Self {
|
|
125
|
+
inner,
|
|
126
|
+
// Pre-allocate 4 KiB — a reasonable size for SSE lines to
|
|
127
|
+
// reduce reallocations during the first few chunks.
|
|
128
|
+
buffer: String::with_capacity(4096),
|
|
129
|
+
cursor: 0,
|
|
130
|
+
done: false,
|
|
131
|
+
parse_event,
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
impl<S, P> Stream for SseParser<S, P>
|
|
137
|
+
where
|
|
138
|
+
S: Stream<Item = std::result::Result<Bytes, reqwest::Error>> + Send,
|
|
139
|
+
P: Fn(&str) -> Result<Option<ChatCompletionChunk>>,
|
|
140
|
+
{
|
|
141
|
+
type Item = Result<ChatCompletionChunk>;
|
|
142
|
+
|
|
143
|
+
fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
|
144
|
+
let mut this = self.project();
|
|
145
|
+
|
|
146
|
+
loop {
|
|
147
|
+
// --- Process any complete lines already in the buffer ---
|
|
148
|
+
// Search for `\n` only in the unprocessed portion (from cursor onward).
|
|
149
|
+
if let Some(offset) = memchr(b'\n', &this.buffer.as_bytes()[*this.cursor..]) {
|
|
150
|
+
let newline_pos = *this.cursor + offset;
|
|
151
|
+
|
|
152
|
+
// Borrow the line slice from cursor..newline_pos — zero allocation
|
|
153
|
+
// on the hot path. All decisions (empty check, prefix match, JSON
|
|
154
|
+
// parse) operate on this borrowed `&str`.
|
|
155
|
+
let line = this.buffer[*this.cursor..newline_pos].trim_end_matches('\r').trim();
|
|
156
|
+
|
|
157
|
+
// Skip empty lines and SSE comments.
|
|
158
|
+
if line.is_empty() || line.starts_with(':') {
|
|
159
|
+
*this.cursor = newline_pos + 1;
|
|
160
|
+
compact_if_needed(this.buffer, this.cursor);
|
|
161
|
+
continue;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
if let Some(raw) = line.strip_prefix("data:") {
|
|
165
|
+
// Strip exactly one optional leading space (RFC 8895 §3.3).
|
|
166
|
+
let data = raw.strip_prefix(' ').unwrap_or(raw).trim();
|
|
167
|
+
|
|
168
|
+
// Handle the OpenAI `[DONE]` sentinel at the SSE parser
|
|
169
|
+
// level — this terminates the stream regardless of provider.
|
|
170
|
+
if data == "[DONE]" {
|
|
171
|
+
*this.cursor = newline_pos + 1;
|
|
172
|
+
compact_if_needed(this.buffer, this.cursor);
|
|
173
|
+
return Poll::Ready(None);
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// Delegate to the provider-supplied parser.
|
|
177
|
+
// - `Ok(Some(chunk))` → yield the chunk.
|
|
178
|
+
// - `Ok(None)` → skip this event (e.g. Anthropic ping,
|
|
179
|
+
// content_block_stop, message_stop) and continue parsing.
|
|
180
|
+
// - `Err(e)` → yield the error to the consumer.
|
|
181
|
+
let result = (this.parse_event)(data);
|
|
182
|
+
*this.cursor = newline_pos + 1;
|
|
183
|
+
compact_if_needed(this.buffer, this.cursor);
|
|
184
|
+
match result {
|
|
185
|
+
Ok(None) => continue,
|
|
186
|
+
Ok(Some(chunk)) => return Poll::Ready(Some(Ok(chunk))),
|
|
187
|
+
Err(e) => return Poll::Ready(Some(Err(e))),
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// Ignore other SSE fields (event:, id:, retry:).
|
|
192
|
+
*this.cursor = newline_pos + 1;
|
|
193
|
+
compact_if_needed(this.buffer, this.cursor);
|
|
194
|
+
continue;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
// --- Buffer has only a partial line (or nothing unprocessed); fetch more bytes ---
|
|
198
|
+
|
|
199
|
+
if *this.done {
|
|
200
|
+
// Any bytes remaining in the buffer after the stream ends were
|
|
201
|
+
// not terminated by a newline — they form an incomplete SSE
|
|
202
|
+
// line that would be silently dropped. Emit a warning so that
|
|
203
|
+
// protocol bugs or truncated responses are visible in logs.
|
|
204
|
+
let remaining = this.buffer.len() - *this.cursor;
|
|
205
|
+
if remaining > 0 {
|
|
206
|
+
#[cfg(feature = "tracing")]
|
|
207
|
+
tracing::warn!(
|
|
208
|
+
leftover_bytes = remaining,
|
|
209
|
+
preview = &this.buffer[*this.cursor..(*this.cursor + remaining.min(64))],
|
|
210
|
+
"SSE stream ended with unterminated data in buffer; dropping partial line"
|
|
211
|
+
);
|
|
212
|
+
this.buffer.clear();
|
|
213
|
+
*this.cursor = 0;
|
|
214
|
+
}
|
|
215
|
+
return Poll::Ready(None);
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
match this.inner.as_mut().poll_next(cx) {
|
|
219
|
+
Poll::Ready(Some(Ok(bytes))) => {
|
|
220
|
+
// Guard against unbounded growth.
|
|
221
|
+
if this.buffer.len() + bytes.len() > MAX_BUFFER_BYTES {
|
|
222
|
+
// Mark done so subsequent polls don't continue reading.
|
|
223
|
+
*this.done = true;
|
|
224
|
+
return Poll::Ready(Some(Err(LiterLlmError::Streaming {
|
|
225
|
+
message: format!("SSE buffer exceeded {MAX_BUFFER_BYTES} bytes; stream aborted"),
|
|
226
|
+
})));
|
|
227
|
+
}
|
|
228
|
+
match std::str::from_utf8(&bytes) {
|
|
229
|
+
Ok(s) => this.buffer.push_str(s),
|
|
230
|
+
Err(e) => {
|
|
231
|
+
// Mark done so the next poll does not try to read
|
|
232
|
+
// more data from the (now-corrupt) stream.
|
|
233
|
+
*this.done = true;
|
|
234
|
+
return Poll::Ready(Some(Err(LiterLlmError::Streaming {
|
|
235
|
+
message: format!("invalid UTF-8 in SSE stream: {e}"),
|
|
236
|
+
})));
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
Poll::Ready(Some(Err(e))) => {
|
|
241
|
+
return Poll::Ready(Some(Err(LiterLlmError::from(e))));
|
|
242
|
+
}
|
|
243
|
+
Poll::Ready(None) => {
|
|
244
|
+
*this.done = true;
|
|
245
|
+
// Loop once more to flush any remaining buffered line.
|
|
246
|
+
continue;
|
|
247
|
+
}
|
|
248
|
+
Poll::Pending => {
|
|
249
|
+
return Poll::Pending;
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
/// Compact the buffer when the cursor has advanced past half the buffer length.
|
|
257
|
+
///
|
|
258
|
+
/// This amortises the O(n) memmove cost: instead of shifting bytes on every
|
|
259
|
+
/// line, we only compact when at least half the buffer is consumed, giving
|
|
260
|
+
/// amortised O(total_bytes) cost across the entire stream.
|
|
261
|
+
fn compact_if_needed(buffer: &mut String, cursor: &mut usize) {
|
|
262
|
+
if *cursor > buffer.len() / 2 {
|
|
263
|
+
buffer.drain(..*cursor);
|
|
264
|
+
*cursor = 0;
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
// ---------------------------------------------------------------------------
|
|
269
|
+
// Utility
|
|
270
|
+
// ---------------------------------------------------------------------------
|
|
271
|
+
|
|
272
|
+
/// Parse a single SSE `data:` line into a `ChatCompletionChunk`.
|
|
273
|
+
///
|
|
274
|
+
/// Returns `None` for the terminal `[DONE]` sentinel.
|
|
275
|
+
///
|
|
276
|
+
/// Only used in crate-internal tests; external consumers should use the
|
|
277
|
+
/// streaming API instead.
|
|
278
|
+
#[cfg(test)]
|
|
279
|
+
pub(crate) fn parse_sse_line(line: &str) -> Option<Result<ChatCompletionChunk>> {
|
|
280
|
+
// Strip "data:" then optionally one leading space (RFC 8895 §3.3).
|
|
281
|
+
let raw = line.strip_prefix("data:")?;
|
|
282
|
+
let data = raw.strip_prefix(' ').unwrap_or(raw).trim();
|
|
283
|
+
if data == "[DONE]" {
|
|
284
|
+
return None;
|
|
285
|
+
}
|
|
286
|
+
Some(serde_json::from_str(data).map_err(|e| LiterLlmError::Streaming {
|
|
287
|
+
message: format!("failed to parse SSE data: {e}"),
|
|
288
|
+
}))
|
|
289
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
// Provider, HTTP, and retry infrastructure are only active with native-http.
|
|
2
|
+
// Suppress dead_code lints on the wasm / no-native-http target so that the
|
|
3
|
+
// type-only surface compiles cleanly.
|
|
4
|
+
#![cfg_attr(not(feature = "native-http"), allow(dead_code, unused_imports))]
|
|
5
|
+
|
|
6
|
+
pub mod auth;
|
|
7
|
+
pub mod client;
|
|
8
|
+
pub mod cost;
|
|
9
|
+
pub mod error;
|
|
10
|
+
pub(crate) mod http;
|
|
11
|
+
pub(crate) mod provider;
|
|
12
|
+
#[cfg(test)]
|
|
13
|
+
mod tests;
|
|
14
|
+
#[cfg(feature = "tokenizer")]
|
|
15
|
+
pub mod tokenizer;
|
|
16
|
+
#[cfg(feature = "tower")]
|
|
17
|
+
pub mod tower;
|
|
18
|
+
pub mod types;
|
|
19
|
+
|
|
20
|
+
// Re-export key types at crate root.
|
|
21
|
+
pub use client::{
|
|
22
|
+
BatchClient, BoxFuture, BoxStream, ClientConfig, ClientConfigBuilder, FileClient, LlmClient, ResponseClient,
|
|
23
|
+
};
|
|
24
|
+
// DefaultClient requires the native HTTP stack (reqwest + tokio).
|
|
25
|
+
#[cfg(feature = "native-http")]
|
|
26
|
+
pub use client::DefaultClient;
|
|
27
|
+
// ManagedClient requires both the native HTTP stack and Tower middleware.
|
|
28
|
+
#[cfg(all(feature = "native-http", feature = "tower"))]
|
|
29
|
+
pub use client::managed::ManagedClient;
|
|
30
|
+
pub use error::{LiterLlmError, Result};
|
|
31
|
+
// Re-export the public provider helper functions that are part of the crate's
|
|
32
|
+
// public API even though the `provider` module itself is pub(crate).
|
|
33
|
+
pub use provider::custom::{
|
|
34
|
+
AuthHeaderFormat, CustomProviderConfig, register_custom_provider, unregister_custom_provider,
|
|
35
|
+
};
|
|
36
|
+
pub use provider::{ProviderConfig, all_providers, complex_provider_names};
|
|
37
|
+
pub use types::*;
|