RubyGems - liter_llm - Versions diffs - 1.0.0.pre.rc.6 - Mend

liter_llm 1.0.0.pre.rc.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

checksums.yaml +7 -0
data/README.md +239 -0
data/ext/liter_llm_rb/extconf.rb +65 -0
data/ext/liter_llm_rb/native/.cargo/config.toml +23 -0
data/ext/liter_llm_rb/native/Cargo.lock +3713 -0
data/ext/liter_llm_rb/native/Cargo.toml +32 -0
data/ext/liter_llm_rb/native/build.rs +15 -0
data/ext/liter_llm_rb/native/src/lib.rs +1079 -0
data/lib/liter_llm.rb +8 -0
data/sig/liter_llm.rbs +416 -0
data/vendor/Cargo.toml +54 -0
data/vendor/liter-llm/Cargo.toml +92 -0
data/vendor/liter-llm/README.md +252 -0
data/vendor/liter-llm/schemas/pricing.json +40 -0
data/vendor/liter-llm/schemas/providers.json +1662 -0
data/vendor/liter-llm/src/auth/azure_ad.rs +264 -0
data/vendor/liter-llm/src/auth/bedrock_sts.rs +353 -0
data/vendor/liter-llm/src/auth/mod.rs +68 -0
data/vendor/liter-llm/src/auth/vertex_oauth.rs +353 -0
data/vendor/liter-llm/src/client/config.rs +351 -0
data/vendor/liter-llm/src/client/managed.rs +622 -0
data/vendor/liter-llm/src/client/mod.rs +864 -0
data/vendor/liter-llm/src/cost.rs +212 -0
data/vendor/liter-llm/src/error.rs +190 -0
data/vendor/liter-llm/src/http/eventstream.rs +860 -0
data/vendor/liter-llm/src/http/mod.rs +12 -0
data/vendor/liter-llm/src/http/request.rs +438 -0
data/vendor/liter-llm/src/http/retry.rs +72 -0
data/vendor/liter-llm/src/http/streaming.rs +289 -0
data/vendor/liter-llm/src/lib.rs +37 -0
data/vendor/liter-llm/src/provider/anthropic.rs +2250 -0
data/vendor/liter-llm/src/provider/azure.rs +579 -0
data/vendor/liter-llm/src/provider/bedrock.rs +1543 -0
data/vendor/liter-llm/src/provider/cohere.rs +654 -0
data/vendor/liter-llm/src/provider/custom.rs +404 -0
data/vendor/liter-llm/src/provider/google_ai.rs +281 -0
data/vendor/liter-llm/src/provider/mistral.rs +188 -0
data/vendor/liter-llm/src/provider/mod.rs +616 -0
data/vendor/liter-llm/src/provider/vertex.rs +1504 -0
data/vendor/liter-llm/src/tests.rs +1425 -0
data/vendor/liter-llm/src/tokenizer.rs +281 -0
data/vendor/liter-llm/src/tower/budget.rs +599 -0
data/vendor/liter-llm/src/tower/cache.rs +502 -0
data/vendor/liter-llm/src/tower/cache_opendal.rs +270 -0
data/vendor/liter-llm/src/tower/cooldown.rs +231 -0
data/vendor/liter-llm/src/tower/cost.rs +404 -0
data/vendor/liter-llm/src/tower/fallback.rs +121 -0
data/vendor/liter-llm/src/tower/health.rs +219 -0
data/vendor/liter-llm/src/tower/hooks.rs +369 -0
data/vendor/liter-llm/src/tower/mod.rs +77 -0
data/vendor/liter-llm/src/tower/rate_limit.rs +300 -0
data/vendor/liter-llm/src/tower/router.rs +436 -0
data/vendor/liter-llm/src/tower/service.rs +181 -0
data/vendor/liter-llm/src/tower/tests.rs +539 -0
data/vendor/liter-llm/src/tower/tests_common.rs +252 -0
data/vendor/liter-llm/src/tower/tracing.rs +209 -0
data/vendor/liter-llm/src/tower/types.rs +170 -0
data/vendor/liter-llm/src/types/audio.rs +52 -0
data/vendor/liter-llm/src/types/batch.rs +77 -0
data/vendor/liter-llm/src/types/chat.rs +214 -0
data/vendor/liter-llm/src/types/common.rs +244 -0
data/vendor/liter-llm/src/types/embedding.rs +84 -0
data/vendor/liter-llm/src/types/files.rs +58 -0
data/vendor/liter-llm/src/types/image.rs +40 -0
data/vendor/liter-llm/src/types/mod.rs +27 -0
data/vendor/liter-llm/src/types/models.rs +21 -0
data/vendor/liter-llm/src/types/moderation.rs +80 -0
data/vendor/liter-llm/src/types/ocr.rs +87 -0
data/vendor/liter-llm/src/types/rerank.rs +46 -0
data/vendor/liter-llm/src/types/responses.rs +55 -0
data/vendor/liter-llm/src/types/search.rs +45 -0
data/vendor/liter-llm/tests/contract.rs +332 -0
data/vendor/liter-llm-ffi/Cargo.toml +30 -0
data/vendor/liter-llm-ffi/build.rs +66 -0
data/vendor/liter-llm-ffi/cbindgen.toml +60 -0
data/vendor/liter-llm-ffi/liter_llm.h +850 -0
data/vendor/liter-llm-ffi/src/lib.rs +2488 -0
metadata +286 -0

data/vendor/liter-llm/src/cost.rs ADDED Viewed

@@ -0,0 +1,212 @@
+//! Cost estimation for LLM API calls.
+//!
+//! Pricing data is embedded at compile time from `schemas/pricing.json` and
+//! covers the most commonly used models across major providers.  Prices are
+//! approximate and derived from the [litellm](https://github.com/BerriAI/litellm)
+//! project (MIT License, Copyright 2023 Berri AI).
+//!
+//! # Example
+//!
+//! ```rust
+//! use liter_llm::cost;
+//!
+//! // Returns None for unknown models.
+//! assert!(cost::completion_cost("unknown-model", 100, 50).is_none());
+//!
+//! // Returns Some(cost_in_usd) for known models.
+//! let cost = cost::completion_cost("gpt-4o", 1000, 500).unwrap();
+//! assert!(cost > 0.0);
+//! ```
+use std::collections::HashMap;
+use std::sync::LazyLock;
+use serde::Deserialize;
+// Embedded at compile time so the binary is self-contained with no runtime
+// file-system dependency.
+const PRICING_JSON: &str = include_str!("../schemas/pricing.json");
+/// Lazy-initialised registry parsed from the embedded JSON.
+/// Stores a `Result` so that parse failures surface at call time rather than
+/// panicking the process (mirrors the pattern used in `provider/mod.rs`).
+static PRICING: LazyLock<std::result::Result<PricingRegistry, String>> =
+    LazyLock::new(|| serde_json::from_str(PRICING_JSON).map_err(|e| e.to_string()));
+/// Access the pricing registry, returning `None` if the embedded JSON was invalid.
+///
+/// Invalid embedded JSON is a compile-time defect; callers treat it the same
+/// as an unknown model (no pricing available).
+fn pricing() -> Option<&'static PricingRegistry> {
+    PRICING.as_ref().ok()
+}
+// ─── Registry ─────────────────────────────────────────────────────────────────
+#[derive(Debug, Deserialize)]
+struct PricingRegistry {
+    models: HashMap<String, ModelPricing>,
+}
+/// Per-token pricing for a single model (USD per token).
+#[derive(Debug, Clone, Deserialize)]
+pub struct ModelPricing {
+    /// Cost in USD per input (prompt) token.
+    pub input_cost_per_token: f64,
+    /// Cost in USD per output (completion) token.  Zero for embedding models.
+    pub output_cost_per_token: f64,
+}
+// ─── Public API ───────────────────────────────────────────────────────────────
+/// Calculate the estimated cost of a completion given a model name and token
+/// counts.
+///
+/// Returns `None` if the model is not present in the embedded pricing registry.
+/// Returns `Some(cost_usd)` otherwise, where the value is in US dollars.
+///
+/// When an exact model name match is not found, progressively shorter prefixes
+/// are tried by stripping from the last `-` or `.` separator.  For example,
+/// `gpt-4-0613` will match `gpt-4` if no `gpt-4-0613` entry exists.
+///
+/// # Example
+///
+/// ```rust
+/// use liter_llm::cost;
+///
+/// let usd = cost::completion_cost("gpt-4o", 1_000, 500).unwrap();
+/// // 1000 * 0.0000025 + 500 * 0.00001 = 0.0025 + 0.005 = 0.0075
+/// assert!((usd - 0.0075).abs() < 1e-9);
+/// ```
+#[must_use]
+pub fn completion_cost(model: &str, prompt_tokens: u64, completion_tokens: u64) -> Option<f64> {
+    let pricing = model_pricing(model)?;
+    Some(
+        (prompt_tokens as f64) * pricing.input_cost_per_token
+            + (completion_tokens as f64) * pricing.output_cost_per_token,
+    )
+}
+/// Look up the per-token pricing for a model.
+///
+/// Returns `None` if the model is not present in the embedded pricing registry.
+/// The returned reference is valid for the lifetime of the process (`'static`).
+///
+/// When an exact model name match is not found, progressively shorter prefixes
+/// are tried by stripping from the last `-` or `.` separator.  For example,
+/// `gpt-4-0613` will try `gpt-4-0613`, then `gpt-4`, then `gpt`.  The first
+/// match wins.
+#[must_use]
+pub fn model_pricing(model: &str) -> Option<&'static ModelPricing> {
+    let models = &pricing()?.models;
+    // Exact match first.
+    if let Some(p) = models.get(model) {
+        return Some(p);
+    }
+    // Progressively strip the last `-` or `.` segment and retry.
+    let mut candidate = model;
+    while let Some(pos) = candidate.rfind(['-', '.']) {
+        candidate = &candidate[..pos];
+        if let Some(p) = models.get(candidate) {
+            return Some(p);
+        }
+    }
+    None
+}
+// ─── Tests ────────────────────────────────────────────────────────────────────
+#[cfg(test)]
+mod tests {
+    use super::*;
+    #[test]
+    fn completion_cost_known_model_returns_expected_value() {
+        // gpt-4: input=0.00003, output=0.00006
+        // 100 * 0.00003 + 50 * 0.00006 = 0.003 + 0.003 = 0.006
+        let cost = completion_cost("gpt-4", 100, 50).expect("gpt-4 must be in registry");
+        let expected = 100.0 * 0.00003 + 50.0 * 0.00006;
+        assert!((cost - expected).abs() < 1e-12, "expected {expected}, got {cost}");
+    }
+    #[test]
+    fn completion_cost_unknown_model_returns_none() {
+        assert!(
+            completion_cost("unknown-model-xyz", 100, 50).is_none(),
+            "unknown model should return None"
+        );
+    }
+    #[test]
+    fn completion_cost_gpt4o_matches_published_pricing() {
+        // gpt-4o: input=$2.50/1M tokens = 0.0000025/token
+        //         output=$10/1M tokens  = 0.00001/token
+        let cost = completion_cost("gpt-4o", 1_000, 500).expect("gpt-4o must be in registry");
+        let expected = 1_000.0 * 0.0000025 + 500.0 * 0.00001;
+        assert!((cost - expected).abs() < 1e-12, "expected {expected}, got {cost}");
+    }
+    #[test]
+    fn completion_cost_embedding_model_has_zero_output_cost() {
+        // Embedding models only charge for input tokens.
+        let cost =
+            completion_cost("text-embedding-3-small", 100, 0).expect("text-embedding-3-small must be in registry");
+        assert!(cost > 0.0, "input tokens must have a positive cost");
+        let pricing = model_pricing("text-embedding-3-small").unwrap();
+        assert_eq!(pricing.output_cost_per_token, 0.0, "embedding output cost must be zero");
+    }
+    #[test]
+    fn model_pricing_returns_none_for_unknown_model() {
+        assert!(model_pricing("does-not-exist").is_none());
+    }
+    #[test]
+    fn model_pricing_prefix_fallback_matches_shorter_name() {
+        // gpt-4 is in the registry; gpt-4-0613 is a versioned variant that
+        // should fall back to the gpt-4 entry via prefix stripping.
+        let exact = model_pricing("gpt-4").expect("gpt-4 must be in registry");
+        let prefix = model_pricing("gpt-4-0613").expect("gpt-4-0613 should match gpt-4 via prefix");
+        assert!(
+            (exact.input_cost_per_token - prefix.input_cost_per_token).abs() < 1e-15,
+            "prefix match should return the same pricing as exact match"
+        );
+    }
+    #[test]
+    fn completion_cost_prefix_fallback() {
+        // Versioned model name should resolve via prefix stripping.
+        let cost = completion_cost("gpt-4-0613", 100, 50);
+        assert!(cost.is_some(), "gpt-4-0613 should resolve via prefix fallback to gpt-4");
+    }
+    #[test]
+    fn model_pricing_returns_correct_fields_for_known_model() {
+        let p = model_pricing("gpt-4o-mini").expect("gpt-4o-mini must be in registry");
+        // Published: input $0.15/1M = 0.00000015, output $0.60/1M = 0.0000006
+        assert!(
+            (p.input_cost_per_token - 0.00000015).abs() < 1e-12,
+            "unexpected input_cost_per_token: {}",
+            p.input_cost_per_token
+        );
+        assert!(
+            (p.output_cost_per_token - 0.0000006).abs() < 1e-12,
+            "unexpected output_cost_per_token: {}",
+            p.output_cost_per_token
+        );
+    }
+    #[test]
+    fn pricing_registry_embedded_json_is_valid() {
+        // Confirm the embedded JSON parses correctly — PRICING holds Ok(...).
+        assert!(
+            PRICING.as_ref().is_ok(),
+            "embedded schemas/pricing.json failed to parse: {:?}",
+            PRICING.as_ref().err()
+        );
+    }
+}

data/vendor/liter-llm/src/error.rs ADDED Viewed

@@ -0,0 +1,190 @@
+use std::time::Duration;
+use serde::{Deserialize, Serialize};
+/// Error response from an OpenAI-compatible API.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ErrorResponse {
+    pub error: ApiError,
+}
+/// Inner error object.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ApiError {
+    pub message: String,
+    #[serde(rename = "type")]
+    pub error_type: String,
+    #[serde(default)]
+    pub param: Option<String>,
+    #[serde(default)]
+    pub code: Option<String>,
+}
+/// All errors that can occur when using `liter-llm`.
+#[derive(Debug, thiserror::Error)]
+#[non_exhaustive]
+pub enum LiterLlmError {
+    #[error("authentication failed: {message}")]
+    Authentication { message: String },
+    #[error("rate limited: {message}")]
+    RateLimited {
+        message: String,
+        retry_after: Option<Duration>,
+    },
+    #[error("bad request: {message}")]
+    BadRequest { message: String },
+    #[error("context window exceeded: {message}")]
+    ContextWindowExceeded { message: String },
+    #[error("content policy violation: {message}")]
+    ContentPolicy { message: String },
+    #[error("not found: {message}")]
+    NotFound { message: String },
+    #[error("server error: {message}")]
+    ServerError { message: String },
+    #[error("service unavailable: {message}")]
+    ServiceUnavailable { message: String },
+    #[error("request timeout")]
+    Timeout,
+    #[cfg(feature = "native-http")]
+    #[error(transparent)]
+    Network(#[from] reqwest::Error),
+    /// A catch-all for errors that occur during streaming response processing.
+    ///
+    /// This variant covers multiple sub-conditions including UTF-8 decoding
+    /// failures, CRC/checksum mismatches (AWS EventStream), JSON parse errors
+    /// in individual SSE chunks, and buffer overflow conditions.  The `message`
+    /// field contains a human-readable description of the specific failure.
+    #[error("streaming error: {message}")]
+    Streaming { message: String },
+    #[error("provider {provider} does not support {endpoint}")]
+    EndpointNotSupported { endpoint: String, provider: String },
+    #[error("invalid header {name:?}: {reason}")]
+    InvalidHeader { name: String, reason: String },
+    #[error("serialization error: {0}")]
+    Serialization(#[from] serde_json::Error),
+    #[error("budget exceeded: {message}")]
+    BudgetExceeded { message: String, model: Option<String> },
+    #[error("hook rejected: {message}")]
+    HookRejected { message: String },
+    /// An internal logic error (e.g. unexpected Tower response variant).
+    ///
+    /// This should never surface in normal operation — if it does, it
+    /// indicates a bug in the library.
+    #[error("internal error: {message}")]
+    InternalError { message: String },
+}
+impl LiterLlmError {
+    /// Returns `true` for errors that are worth retrying on a different service
+    /// or deployment (transient failures).
+    ///
+    /// Used by [`crate::tower::fallback::FallbackService`] and
+    /// [`crate::tower::router::Router`] to decide whether to route to an
+    /// alternative endpoint.
+    #[must_use]
+    pub fn is_transient(&self) -> bool {
+        match self {
+            Self::RateLimited { .. } | Self::ServiceUnavailable { .. } | Self::Timeout | Self::ServerError { .. } => {
+                true
+            }
+            #[cfg(feature = "native-http")]
+            Self::Network(_) => true,
+            _ => false,
+        }
+    }
+    /// Return the OpenTelemetry `error.type` string for this error variant.
+    ///
+    /// Used by the tracing middleware to record the `error.type` span attribute
+    /// on failed requests per the GenAI semantic conventions.
+    #[must_use]
+    pub fn error_type(&self) -> &'static str {
+        match self {
+            Self::Authentication { .. } => "Authentication",
+            Self::RateLimited { .. } => "RateLimited",
+            Self::BadRequest { .. } => "BadRequest",
+            Self::ContextWindowExceeded { .. } => "ContextWindowExceeded",
+            Self::ContentPolicy { .. } => "ContentPolicy",
+            Self::NotFound { .. } => "NotFound",
+            Self::ServerError { .. } => "ServerError",
+            Self::ServiceUnavailable { .. } => "ServiceUnavailable",
+            Self::Timeout => "Timeout",
+            #[cfg(feature = "native-http")]
+            Self::Network(_) => "Network",
+            Self::Streaming { .. } => "Streaming",
+            Self::EndpointNotSupported { .. } => "EndpointNotSupported",
+            Self::InvalidHeader { .. } => "InvalidHeader",
+            Self::Serialization(_) => "Serialization",
+            Self::BudgetExceeded { .. } => "BudgetExceeded",
+            Self::HookRejected { .. } => "HookRejected",
+            Self::InternalError { .. } => "InternalError",
+        }
+    }
+    /// Create from an HTTP status code, an API error response body, and an
+    /// optional `Retry-After` duration already parsed from the response header.
+    ///
+    /// The `retry_after` value is forwarded into [`LiterLlmError::RateLimited`]
+    /// so callers can honour the server-requested delay without re-parsing the
+    /// header.
+    pub fn from_status(status: u16, body: &str, retry_after: Option<Duration>) -> Self {
+        let parsed = serde_json::from_str::<ErrorResponse>(body).ok();
+        let code = parsed.as_ref().and_then(|r| r.error.code.clone());
+        let message = parsed.map(|r| r.error.message).unwrap_or_else(|| body.to_string());
+        match status {
+            401 | 403 => Self::Authentication { message },
+            429 => Self::RateLimited { message, retry_after },
+            400 | 422 => {
+                // Check the structured `code` field first — it is more reliable
+                // than substring matching on the human-readable message.
+                if code.as_deref() == Some("context_length_exceeded") {
+                    Self::ContextWindowExceeded { message }
+                } else if code.as_deref() == Some("content_policy_violation")
+                    || code.as_deref() == Some("content_filter")
+                {
+                    Self::ContentPolicy { message }
+                }
+                // Fall back to message-based heuristics for providers that do not
+                // populate the `code` field.
+                else if message.contains("context_length_exceeded")
+                    || message.contains("context window")
+                    || message.contains("maximum context length")
+                {
+                    Self::ContextWindowExceeded { message }
+                } else if message.contains("content_policy") || message.contains("content_filter") {
+                    Self::ContentPolicy { message }
+                } else {
+                    Self::BadRequest { message }
+                }
+            }
+            404 => Self::NotFound { message },
+            405 | 413 => Self::BadRequest { message },
+            408 => Self::Timeout,
+            500 => Self::ServerError { message },
+            502..=504 => Self::ServiceUnavailable { message },
+            // Map remaining 4xx codes to BadRequest (client errors) and
+            // everything else (5xx, unknown) to ServerError.
+            400..=499 => Self::BadRequest { message },
+            _ => Self::ServerError { message },
+        }
+    }
+}
+pub type Result<T> = std::result::Result<T, LiterLlmError>;