liter_llm 1.0.0.pre.rc.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +239 -0
  3. data/ext/liter_llm_rb/extconf.rb +65 -0
  4. data/ext/liter_llm_rb/native/.cargo/config.toml +23 -0
  5. data/ext/liter_llm_rb/native/Cargo.lock +3713 -0
  6. data/ext/liter_llm_rb/native/Cargo.toml +32 -0
  7. data/ext/liter_llm_rb/native/build.rs +15 -0
  8. data/ext/liter_llm_rb/native/src/lib.rs +1079 -0
  9. data/lib/liter_llm.rb +8 -0
  10. data/sig/liter_llm.rbs +416 -0
  11. data/vendor/Cargo.toml +54 -0
  12. data/vendor/liter-llm/Cargo.toml +92 -0
  13. data/vendor/liter-llm/README.md +252 -0
  14. data/vendor/liter-llm/schemas/pricing.json +40 -0
  15. data/vendor/liter-llm/schemas/providers.json +1662 -0
  16. data/vendor/liter-llm/src/auth/azure_ad.rs +264 -0
  17. data/vendor/liter-llm/src/auth/bedrock_sts.rs +353 -0
  18. data/vendor/liter-llm/src/auth/mod.rs +68 -0
  19. data/vendor/liter-llm/src/auth/vertex_oauth.rs +353 -0
  20. data/vendor/liter-llm/src/client/config.rs +351 -0
  21. data/vendor/liter-llm/src/client/managed.rs +622 -0
  22. data/vendor/liter-llm/src/client/mod.rs +864 -0
  23. data/vendor/liter-llm/src/cost.rs +212 -0
  24. data/vendor/liter-llm/src/error.rs +190 -0
  25. data/vendor/liter-llm/src/http/eventstream.rs +860 -0
  26. data/vendor/liter-llm/src/http/mod.rs +12 -0
  27. data/vendor/liter-llm/src/http/request.rs +438 -0
  28. data/vendor/liter-llm/src/http/retry.rs +72 -0
  29. data/vendor/liter-llm/src/http/streaming.rs +289 -0
  30. data/vendor/liter-llm/src/lib.rs +37 -0
  31. data/vendor/liter-llm/src/provider/anthropic.rs +2250 -0
  32. data/vendor/liter-llm/src/provider/azure.rs +579 -0
  33. data/vendor/liter-llm/src/provider/bedrock.rs +1543 -0
  34. data/vendor/liter-llm/src/provider/cohere.rs +654 -0
  35. data/vendor/liter-llm/src/provider/custom.rs +404 -0
  36. data/vendor/liter-llm/src/provider/google_ai.rs +281 -0
  37. data/vendor/liter-llm/src/provider/mistral.rs +188 -0
  38. data/vendor/liter-llm/src/provider/mod.rs +616 -0
  39. data/vendor/liter-llm/src/provider/vertex.rs +1504 -0
  40. data/vendor/liter-llm/src/tests.rs +1425 -0
  41. data/vendor/liter-llm/src/tokenizer.rs +281 -0
  42. data/vendor/liter-llm/src/tower/budget.rs +599 -0
  43. data/vendor/liter-llm/src/tower/cache.rs +502 -0
  44. data/vendor/liter-llm/src/tower/cache_opendal.rs +270 -0
  45. data/vendor/liter-llm/src/tower/cooldown.rs +231 -0
  46. data/vendor/liter-llm/src/tower/cost.rs +404 -0
  47. data/vendor/liter-llm/src/tower/fallback.rs +121 -0
  48. data/vendor/liter-llm/src/tower/health.rs +219 -0
  49. data/vendor/liter-llm/src/tower/hooks.rs +369 -0
  50. data/vendor/liter-llm/src/tower/mod.rs +77 -0
  51. data/vendor/liter-llm/src/tower/rate_limit.rs +300 -0
  52. data/vendor/liter-llm/src/tower/router.rs +436 -0
  53. data/vendor/liter-llm/src/tower/service.rs +181 -0
  54. data/vendor/liter-llm/src/tower/tests.rs +539 -0
  55. data/vendor/liter-llm/src/tower/tests_common.rs +252 -0
  56. data/vendor/liter-llm/src/tower/tracing.rs +209 -0
  57. data/vendor/liter-llm/src/tower/types.rs +170 -0
  58. data/vendor/liter-llm/src/types/audio.rs +52 -0
  59. data/vendor/liter-llm/src/types/batch.rs +77 -0
  60. data/vendor/liter-llm/src/types/chat.rs +214 -0
  61. data/vendor/liter-llm/src/types/common.rs +244 -0
  62. data/vendor/liter-llm/src/types/embedding.rs +84 -0
  63. data/vendor/liter-llm/src/types/files.rs +58 -0
  64. data/vendor/liter-llm/src/types/image.rs +40 -0
  65. data/vendor/liter-llm/src/types/mod.rs +27 -0
  66. data/vendor/liter-llm/src/types/models.rs +21 -0
  67. data/vendor/liter-llm/src/types/moderation.rs +80 -0
  68. data/vendor/liter-llm/src/types/ocr.rs +87 -0
  69. data/vendor/liter-llm/src/types/rerank.rs +46 -0
  70. data/vendor/liter-llm/src/types/responses.rs +55 -0
  71. data/vendor/liter-llm/src/types/search.rs +45 -0
  72. data/vendor/liter-llm/tests/contract.rs +332 -0
  73. data/vendor/liter-llm-ffi/Cargo.toml +30 -0
  74. data/vendor/liter-llm-ffi/build.rs +66 -0
  75. data/vendor/liter-llm-ffi/cbindgen.toml +60 -0
  76. data/vendor/liter-llm-ffi/liter_llm.h +850 -0
  77. data/vendor/liter-llm-ffi/src/lib.rs +2488 -0
  78. metadata +286 -0
@@ -0,0 +1,289 @@
1
+ use std::pin::Pin;
2
+ use std::task::{Context, Poll};
3
+
4
+ use bytes::Bytes;
5
+ use futures_core::Stream;
6
+ use memchr::memchr;
7
+ use pin_project_lite::pin_project;
8
+
9
+ use crate::error::{LiterLlmError, Result};
10
+ use crate::http::request::with_retry;
11
+ use crate::types::ChatCompletionChunk;
12
+
13
+ /// Maximum number of bytes buffered before declaring a streaming error.
14
+ const MAX_BUFFER_BYTES: usize = 1024 * 1024; // 1 MiB
15
+
16
+ // ---------------------------------------------------------------------------
17
+ // Public entry point
18
+ // ---------------------------------------------------------------------------
19
+
20
+ /// Send a streaming POST request and return an SSE stream of
21
+ /// `ChatCompletionChunk`s.
22
+ ///
23
+ /// Before opening the stream, retries on 429 / 500 / 502 / 503 / 504 up to
24
+ /// `max_retries` times honouring any `Retry-After` header. Once the stream
25
+ /// is open, individual chunk errors are yielded as `Err` items rather than
26
+ /// causing a retry.
27
+ ///
28
+ /// `auth_header` is `Some((name, value))` when the provider requires
29
+ /// authentication, or `None` when no auth header should be added.
30
+ ///
31
+ /// `extra_headers` carries provider-specific mandatory headers (e.g.
32
+ /// `anthropic-version`) beyond the single auth header.
33
+ ///
34
+ /// `parse_event` translates a raw SSE `data:` payload string into a
35
+ /// `ChatCompletionChunk`. Pass the provider's `parse_stream_event` method
36
+ /// to support non-OpenAI SSE formats.
37
+ #[cfg_attr(
38
+ feature = "tracing",
39
+ tracing::instrument(
40
+ skip_all,
41
+ fields(
42
+ http.method = "POST",
43
+ http.url = %url,
44
+ http.status_code = tracing::field::Empty,
45
+ http.retry_count = tracing::field::Empty,
46
+ )
47
+ )
48
+ )]
49
+ pub async fn post_stream<P>(
50
+ client: &reqwest::Client,
51
+ url: &str,
52
+ auth_header: Option<(&str, &str)>,
53
+ extra_headers: &[(&str, &str)],
54
+ body: Bytes,
55
+ max_retries: u32,
56
+ parse_event: P,
57
+ ) -> Result<Pin<Box<dyn Stream<Item = Result<ChatCompletionChunk>> + Send>>>
58
+ where
59
+ P: Fn(&str) -> Result<Option<ChatCompletionChunk>> + Send + 'static,
60
+ {
61
+ let mut retry_count = 0u32;
62
+
63
+ let resp = with_retry(max_retries, || {
64
+ // Clone is a zero-copy ref-count bump on `Bytes`.
65
+ let mut builder = client
66
+ .post(url)
67
+ .header(reqwest::header::CONTENT_TYPE, "application/json")
68
+ .body(body.clone());
69
+ if let Some((name, value)) = auth_header {
70
+ builder = builder.header(name, value);
71
+ }
72
+ for (name, value) in extra_headers {
73
+ builder = builder.header(*name, *value);
74
+ }
75
+ retry_count += 1;
76
+ builder.send()
77
+ })
78
+ .await?;
79
+
80
+ #[cfg(feature = "tracing")]
81
+ {
82
+ let span = tracing::Span::current();
83
+ span.record("http.status_code", resp.status().as_u16());
84
+ span.record("http.retry_count", retry_count.saturating_sub(1));
85
+ }
86
+
87
+ let byte_stream = resp.bytes_stream();
88
+ let stream = SseParser::new(byte_stream, parse_event);
89
+ Ok(Box::pin(stream))
90
+ }
91
+
92
+ // ---------------------------------------------------------------------------
93
+ // SSE parser
94
+ // ---------------------------------------------------------------------------
95
+
96
+ pin_project! {
97
+ /// Wraps a `bytes::Bytes` stream and yields parsed `ChatCompletionChunk`s.
98
+ ///
99
+ /// The `P` type parameter is the parse function used to translate a raw
100
+ /// SSE `data:` payload string into a `ChatCompletionChunk`. This allows
101
+ /// non-OpenAI SSE formats (e.g. Anthropic, Vertex) to plug in their own
102
+ /// event parsers without duplicating the byte-buffering and line-splitting
103
+ /// logic.
104
+ struct SseParser<S, P> {
105
+ #[pin]
106
+ inner: S,
107
+ buffer: String,
108
+ // Read cursor into `buffer`. All bytes before `cursor` have already
109
+ // been processed. We compact (drain) only when the cursor exceeds
110
+ // half the buffer length, amortising memmove cost to O(total_bytes).
111
+ cursor: usize,
112
+ // Set to true once the inner stream is exhausted.
113
+ done: bool,
114
+ // Provider-supplied event parser; translates raw SSE data payloads.
115
+ parse_event: P,
116
+ }
117
+ }
118
+
119
+ impl<S, P> SseParser<S, P>
120
+ where
121
+ P: Fn(&str) -> Result<Option<ChatCompletionChunk>>,
122
+ {
123
+ fn new(inner: S, parse_event: P) -> Self {
124
+ Self {
125
+ inner,
126
+ // Pre-allocate 4 KiB — a reasonable size for SSE lines to
127
+ // reduce reallocations during the first few chunks.
128
+ buffer: String::with_capacity(4096),
129
+ cursor: 0,
130
+ done: false,
131
+ parse_event,
132
+ }
133
+ }
134
+ }
135
+
136
+ impl<S, P> Stream for SseParser<S, P>
137
+ where
138
+ S: Stream<Item = std::result::Result<Bytes, reqwest::Error>> + Send,
139
+ P: Fn(&str) -> Result<Option<ChatCompletionChunk>>,
140
+ {
141
+ type Item = Result<ChatCompletionChunk>;
142
+
143
+ fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
144
+ let mut this = self.project();
145
+
146
+ loop {
147
+ // --- Process any complete lines already in the buffer ---
148
+ // Search for `\n` only in the unprocessed portion (from cursor onward).
149
+ if let Some(offset) = memchr(b'\n', &this.buffer.as_bytes()[*this.cursor..]) {
150
+ let newline_pos = *this.cursor + offset;
151
+
152
+ // Borrow the line slice from cursor..newline_pos — zero allocation
153
+ // on the hot path. All decisions (empty check, prefix match, JSON
154
+ // parse) operate on this borrowed `&str`.
155
+ let line = this.buffer[*this.cursor..newline_pos].trim_end_matches('\r').trim();
156
+
157
+ // Skip empty lines and SSE comments.
158
+ if line.is_empty() || line.starts_with(':') {
159
+ *this.cursor = newline_pos + 1;
160
+ compact_if_needed(this.buffer, this.cursor);
161
+ continue;
162
+ }
163
+
164
+ if let Some(raw) = line.strip_prefix("data:") {
165
+ // Strip exactly one optional leading space (RFC 8895 §3.3).
166
+ let data = raw.strip_prefix(' ').unwrap_or(raw).trim();
167
+
168
+ // Handle the OpenAI `[DONE]` sentinel at the SSE parser
169
+ // level — this terminates the stream regardless of provider.
170
+ if data == "[DONE]" {
171
+ *this.cursor = newline_pos + 1;
172
+ compact_if_needed(this.buffer, this.cursor);
173
+ return Poll::Ready(None);
174
+ }
175
+
176
+ // Delegate to the provider-supplied parser.
177
+ // - `Ok(Some(chunk))` → yield the chunk.
178
+ // - `Ok(None)` → skip this event (e.g. Anthropic ping,
179
+ // content_block_stop, message_stop) and continue parsing.
180
+ // - `Err(e)` → yield the error to the consumer.
181
+ let result = (this.parse_event)(data);
182
+ *this.cursor = newline_pos + 1;
183
+ compact_if_needed(this.buffer, this.cursor);
184
+ match result {
185
+ Ok(None) => continue,
186
+ Ok(Some(chunk)) => return Poll::Ready(Some(Ok(chunk))),
187
+ Err(e) => return Poll::Ready(Some(Err(e))),
188
+ }
189
+ }
190
+
191
+ // Ignore other SSE fields (event:, id:, retry:).
192
+ *this.cursor = newline_pos + 1;
193
+ compact_if_needed(this.buffer, this.cursor);
194
+ continue;
195
+ }
196
+
197
+ // --- Buffer has only a partial line (or nothing unprocessed); fetch more bytes ---
198
+
199
+ if *this.done {
200
+ // Any bytes remaining in the buffer after the stream ends were
201
+ // not terminated by a newline — they form an incomplete SSE
202
+ // line that would be silently dropped. Emit a warning so that
203
+ // protocol bugs or truncated responses are visible in logs.
204
+ let remaining = this.buffer.len() - *this.cursor;
205
+ if remaining > 0 {
206
+ #[cfg(feature = "tracing")]
207
+ tracing::warn!(
208
+ leftover_bytes = remaining,
209
+ preview = &this.buffer[*this.cursor..(*this.cursor + remaining.min(64))],
210
+ "SSE stream ended with unterminated data in buffer; dropping partial line"
211
+ );
212
+ this.buffer.clear();
213
+ *this.cursor = 0;
214
+ }
215
+ return Poll::Ready(None);
216
+ }
217
+
218
+ match this.inner.as_mut().poll_next(cx) {
219
+ Poll::Ready(Some(Ok(bytes))) => {
220
+ // Guard against unbounded growth.
221
+ if this.buffer.len() + bytes.len() > MAX_BUFFER_BYTES {
222
+ // Mark done so subsequent polls don't continue reading.
223
+ *this.done = true;
224
+ return Poll::Ready(Some(Err(LiterLlmError::Streaming {
225
+ message: format!("SSE buffer exceeded {MAX_BUFFER_BYTES} bytes; stream aborted"),
226
+ })));
227
+ }
228
+ match std::str::from_utf8(&bytes) {
229
+ Ok(s) => this.buffer.push_str(s),
230
+ Err(e) => {
231
+ // Mark done so the next poll does not try to read
232
+ // more data from the (now-corrupt) stream.
233
+ *this.done = true;
234
+ return Poll::Ready(Some(Err(LiterLlmError::Streaming {
235
+ message: format!("invalid UTF-8 in SSE stream: {e}"),
236
+ })));
237
+ }
238
+ }
239
+ }
240
+ Poll::Ready(Some(Err(e))) => {
241
+ return Poll::Ready(Some(Err(LiterLlmError::from(e))));
242
+ }
243
+ Poll::Ready(None) => {
244
+ *this.done = true;
245
+ // Loop once more to flush any remaining buffered line.
246
+ continue;
247
+ }
248
+ Poll::Pending => {
249
+ return Poll::Pending;
250
+ }
251
+ }
252
+ }
253
+ }
254
+ }
255
+
256
+ /// Compact the buffer when the cursor has advanced past half the buffer length.
257
+ ///
258
+ /// This amortises the O(n) memmove cost: instead of shifting bytes on every
259
+ /// line, we only compact when at least half the buffer is consumed, giving
260
+ /// amortised O(total_bytes) cost across the entire stream.
261
+ fn compact_if_needed(buffer: &mut String, cursor: &mut usize) {
262
+ if *cursor > buffer.len() / 2 {
263
+ buffer.drain(..*cursor);
264
+ *cursor = 0;
265
+ }
266
+ }
267
+
268
+ // ---------------------------------------------------------------------------
269
+ // Utility
270
+ // ---------------------------------------------------------------------------
271
+
272
+ /// Parse a single SSE `data:` line into a `ChatCompletionChunk`.
273
+ ///
274
+ /// Returns `None` for the terminal `[DONE]` sentinel.
275
+ ///
276
+ /// Only used in crate-internal tests; external consumers should use the
277
+ /// streaming API instead.
278
+ #[cfg(test)]
279
+ pub(crate) fn parse_sse_line(line: &str) -> Option<Result<ChatCompletionChunk>> {
280
+ // Strip "data:" then optionally one leading space (RFC 8895 §3.3).
281
+ let raw = line.strip_prefix("data:")?;
282
+ let data = raw.strip_prefix(' ').unwrap_or(raw).trim();
283
+ if data == "[DONE]" {
284
+ return None;
285
+ }
286
+ Some(serde_json::from_str(data).map_err(|e| LiterLlmError::Streaming {
287
+ message: format!("failed to parse SSE data: {e}"),
288
+ }))
289
+ }
@@ -0,0 +1,37 @@
1
+ // Provider, HTTP, and retry infrastructure are only active with native-http.
2
+ // Suppress dead_code lints on the wasm / no-native-http target so that the
3
+ // type-only surface compiles cleanly.
4
+ #![cfg_attr(not(feature = "native-http"), allow(dead_code, unused_imports))]
5
+
6
+ pub mod auth;
7
+ pub mod client;
8
+ pub mod cost;
9
+ pub mod error;
10
+ pub(crate) mod http;
11
+ pub(crate) mod provider;
12
+ #[cfg(test)]
13
+ mod tests;
14
+ #[cfg(feature = "tokenizer")]
15
+ pub mod tokenizer;
16
+ #[cfg(feature = "tower")]
17
+ pub mod tower;
18
+ pub mod types;
19
+
20
+ // Re-export key types at crate root.
21
+ pub use client::{
22
+ BatchClient, BoxFuture, BoxStream, ClientConfig, ClientConfigBuilder, FileClient, LlmClient, ResponseClient,
23
+ };
24
+ // DefaultClient requires the native HTTP stack (reqwest + tokio).
25
+ #[cfg(feature = "native-http")]
26
+ pub use client::DefaultClient;
27
+ // ManagedClient requires both the native HTTP stack and Tower middleware.
28
+ #[cfg(all(feature = "native-http", feature = "tower"))]
29
+ pub use client::managed::ManagedClient;
30
+ pub use error::{LiterLlmError, Result};
31
+ // Re-export the public provider helper functions that are part of the crate's
32
+ // public API even though the `provider` module itself is pub(crate).
33
+ pub use provider::custom::{
34
+ AuthHeaderFormat, CustomProviderConfig, register_custom_provider, unregister_custom_provider,
35
+ };
36
+ pub use provider::{ProviderConfig, all_providers, complex_provider_names};
37
+ pub use types::*;