RubyGems - liter_llm - Versions diffs - 1.0.0.pre.rc.6 - Mend

liter_llm 1.0.0.pre.rc.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

checksums.yaml +7 -0
data/README.md +239 -0
data/ext/liter_llm_rb/extconf.rb +65 -0
data/ext/liter_llm_rb/native/.cargo/config.toml +23 -0
data/ext/liter_llm_rb/native/Cargo.lock +3713 -0
data/ext/liter_llm_rb/native/Cargo.toml +32 -0
data/ext/liter_llm_rb/native/build.rs +15 -0
data/ext/liter_llm_rb/native/src/lib.rs +1079 -0
data/lib/liter_llm.rb +8 -0
data/sig/liter_llm.rbs +416 -0
data/vendor/Cargo.toml +54 -0
data/vendor/liter-llm/Cargo.toml +92 -0
data/vendor/liter-llm/README.md +252 -0
data/vendor/liter-llm/schemas/pricing.json +40 -0
data/vendor/liter-llm/schemas/providers.json +1662 -0
data/vendor/liter-llm/src/auth/azure_ad.rs +264 -0
data/vendor/liter-llm/src/auth/bedrock_sts.rs +353 -0
data/vendor/liter-llm/src/auth/mod.rs +68 -0
data/vendor/liter-llm/src/auth/vertex_oauth.rs +353 -0
data/vendor/liter-llm/src/client/config.rs +351 -0
data/vendor/liter-llm/src/client/managed.rs +622 -0
data/vendor/liter-llm/src/client/mod.rs +864 -0
data/vendor/liter-llm/src/cost.rs +212 -0
data/vendor/liter-llm/src/error.rs +190 -0
data/vendor/liter-llm/src/http/eventstream.rs +860 -0
data/vendor/liter-llm/src/http/mod.rs +12 -0
data/vendor/liter-llm/src/http/request.rs +438 -0
data/vendor/liter-llm/src/http/retry.rs +72 -0
data/vendor/liter-llm/src/http/streaming.rs +289 -0
data/vendor/liter-llm/src/lib.rs +37 -0
data/vendor/liter-llm/src/provider/anthropic.rs +2250 -0
data/vendor/liter-llm/src/provider/azure.rs +579 -0
data/vendor/liter-llm/src/provider/bedrock.rs +1543 -0
data/vendor/liter-llm/src/provider/cohere.rs +654 -0
data/vendor/liter-llm/src/provider/custom.rs +404 -0
data/vendor/liter-llm/src/provider/google_ai.rs +281 -0
data/vendor/liter-llm/src/provider/mistral.rs +188 -0
data/vendor/liter-llm/src/provider/mod.rs +616 -0
data/vendor/liter-llm/src/provider/vertex.rs +1504 -0
data/vendor/liter-llm/src/tests.rs +1425 -0
data/vendor/liter-llm/src/tokenizer.rs +281 -0
data/vendor/liter-llm/src/tower/budget.rs +599 -0
data/vendor/liter-llm/src/tower/cache.rs +502 -0
data/vendor/liter-llm/src/tower/cache_opendal.rs +270 -0
data/vendor/liter-llm/src/tower/cooldown.rs +231 -0
data/vendor/liter-llm/src/tower/cost.rs +404 -0
data/vendor/liter-llm/src/tower/fallback.rs +121 -0
data/vendor/liter-llm/src/tower/health.rs +219 -0
data/vendor/liter-llm/src/tower/hooks.rs +369 -0
data/vendor/liter-llm/src/tower/mod.rs +77 -0
data/vendor/liter-llm/src/tower/rate_limit.rs +300 -0
data/vendor/liter-llm/src/tower/router.rs +436 -0
data/vendor/liter-llm/src/tower/service.rs +181 -0
data/vendor/liter-llm/src/tower/tests.rs +539 -0
data/vendor/liter-llm/src/tower/tests_common.rs +252 -0
data/vendor/liter-llm/src/tower/tracing.rs +209 -0
data/vendor/liter-llm/src/tower/types.rs +170 -0
data/vendor/liter-llm/src/types/audio.rs +52 -0
data/vendor/liter-llm/src/types/batch.rs +77 -0
data/vendor/liter-llm/src/types/chat.rs +214 -0
data/vendor/liter-llm/src/types/common.rs +244 -0
data/vendor/liter-llm/src/types/embedding.rs +84 -0
data/vendor/liter-llm/src/types/files.rs +58 -0
data/vendor/liter-llm/src/types/image.rs +40 -0
data/vendor/liter-llm/src/types/mod.rs +27 -0
data/vendor/liter-llm/src/types/models.rs +21 -0
data/vendor/liter-llm/src/types/moderation.rs +80 -0
data/vendor/liter-llm/src/types/ocr.rs +87 -0
data/vendor/liter-llm/src/types/rerank.rs +46 -0
data/vendor/liter-llm/src/types/responses.rs +55 -0
data/vendor/liter-llm/src/types/search.rs +45 -0
data/vendor/liter-llm/tests/contract.rs +332 -0
data/vendor/liter-llm-ffi/Cargo.toml +30 -0
data/vendor/liter-llm-ffi/build.rs +66 -0
data/vendor/liter-llm-ffi/cbindgen.toml +60 -0
data/vendor/liter-llm-ffi/liter_llm.h +850 -0
data/vendor/liter-llm-ffi/src/lib.rs +2488 -0
metadata +286 -0

data/lib/liter_llm.rb ADDED Viewed

@@ -0,0 +1,8 @@
+# frozen_string_literal: true
+require 'liter_llm_rb'
+# LiterLlm is a Ruby binding for the Rust core library providing a unified
+# LLM client interface with streaming, tool calling, and provider routing.
+module LiterLlm
+end

data/sig/liter_llm.rbs ADDED Viewed

@@ -0,0 +1,416 @@
+# Type signatures for the LiterLlm Ruby binding.
+# Generated from crates/liter-llm-rb Rust source — keep in sync with the
+# Magnus extension and lib/liter_llm.rb public API.
+module LiterLlm
+  VERSION: String
+  # ─── Shared types ────────────────────────────────────────────────────────────
+  # Token usage counts returned in both chat and embedding responses.
+  type usage_response = { prompt_tokens: Integer, completion_tokens: Integer, total_tokens: Integer }
+  # ─── Content types ───────────────────────────────────────────────────────────
+  type image_url_param = { url: String, detail: ("low" | "high" | "auto")? }
+  # A single part of a multipart user message.
+  type content_part_param =
+    { type: "text", text: String } |
+    { type: "image_url", image_url: image_url_param }
+  # ─── Message types ───────────────────────────────────────────────────────────
+  # A single message in the conversation history.
+  # The `content` field is a plain string for system/tool/developer/function
+  # roles and either a string or a list of content parts for user messages.
+  type message_param = {
+    role: ("system" | "user" | "assistant" | "tool" | "developer" | "function"),
+    content: String | Array[content_part_param],
+    name: String?,
+    tool_call_id: String?
+  }
+  # ─── Tool / function call types ──────────────────────────────────────────────
+  type function_definition = {
+    name: String,
+    description: String?,
+    parameters: Hash[String, untyped]?,
+    strict: bool?
+  }
+  type tool_param = { type: "function", function: function_definition }
+  type specific_tool_choice = { type: "function", function: { name: String } }
+  type tool_choice_param = ("auto" | "required" | "none") | specific_tool_choice
+  type function_call = { name: String, arguments: String }
+  type tool_call = { id: String, type: "function", function: function_call }
+  # ─── Response format ─────────────────────────────────────────────────────────
+  type json_schema_format = {
+    name: String,
+    description: String?,
+    schema: Hash[String, untyped],
+    strict: bool?
+  }
+  type response_format_param =
+    { type: "text" } |
+    { type: "json_object" } |
+    { type: "json_schema", json_schema: json_schema_format }
+  # ─── Chat request / response ─────────────────────────────────────────────────
+  type stream_options = { include_usage: bool? }
+  # Full OpenAI-compatible chat completion request.
+  type chat_request = {
+    model: String,
+    messages: Array[message_param],
+    temperature: Float?,
+    top_p: Float?,
+    n: Integer?,
+    stream: bool?,
+    stop: (String | Array[String])?,
+    max_tokens: Integer?,
+    presence_penalty: Float?,
+    frequency_penalty: Float?,
+    logit_bias: Hash[String, Float]?,
+    user: String?,
+    tools: Array[tool_param]?,
+    tool_choice: tool_choice_param?,
+    parallel_tool_calls: bool?,
+    response_format: response_format_param?,
+    stream_options: stream_options?,
+    seed: Integer?
+  }
+  type assistant_message = {
+    content: String?,
+    name: String?,
+    tool_calls: Array[tool_call]?,
+    refusal: String?,
+    function_call: function_call?
+  }
+  type choice_response = {
+    index: Integer,
+    message: assistant_message,
+    finish_reason: ("stop" | "length" | "tool_calls" | "content_filter" | "function_call" | String)?
+  }
+  # Full OpenAI-compatible chat completion response.
+  type chat_response = {
+    id: String,
+    object: String,
+    created: Integer,
+    model: String,
+    choices: Array[choice_response],
+    usage: usage_response?,
+    system_fingerprint: String?,
+    service_tier: String?
+  }
+  # ─── Streaming chunk types ───────────────────────────────────────────────────
+  type stream_function_call = { name: String?, arguments: String? }
+  type stream_tool_call = {
+    index: Integer,
+    id: String?,
+    type: "function"?,
+    function: stream_function_call?
+  }
+  type stream_delta = {
+    role: String?,
+    content: String?,
+    tool_calls: Array[stream_tool_call]?,
+    function_call: stream_function_call?,
+    refusal: String?
+  }
+  type stream_choice = { index: Integer, delta: stream_delta, finish_reason: String? }
+  type chat_completion_chunk = {
+    id: String,
+    object: String,
+    created: Integer,
+    model: String,
+    choices: Array[stream_choice],
+    usage: usage_response?,
+    service_tier: String?
+  }
+  # ─── Embedding types ─────────────────────────────────────────────────────────
+  type embedding_request = {
+    model: String,
+    input: String | Array[String],
+    encoding_format: String?,
+    dimensions: Integer?,
+    user: String?
+  }
+  type embedding_object = { object: String, embedding: Array[Float], index: Integer }
+  type embedding_response = {
+    object: String,
+    data: Array[embedding_object],
+    model: String,
+    usage: usage_response
+  }
+  # ─── Models types ────────────────────────────────────────────────────────────
+  type model_object = { id: String, object: String, created: Integer, owned_by: String }
+  type models_response = { object: String, data: Array[model_object] }
+  # ─── Image Generation types ──────────────────────────────────────────────────
+  type create_image_request = {
+    prompt: String,
+    model: String?,
+    n: Integer?,
+    quality: String?,
+    response_format: String?,
+    size: String?,
+    style: String?,
+    user: String?
+  }
+  type image_data = { url: String?, b64_json: String?, revised_prompt: String? }
+  type images_response = { created: Integer, data: Array[image_data] }
+  # ─── Speech types ──────────────────────────────────────────────────────────
+  type create_speech_request = {
+    model: String,
+    input: String,
+    voice: String,
+    response_format: String?,
+    speed: Float?
+  }
+  # ─── Transcription types ───────────────────────────────────────────────────
+  type create_transcription_request = {
+    file: String,
+    model: String,
+    language: String?,
+    prompt: String?,
+    response_format: String?,
+    temperature: Float?
+  }
+  type transcription_response = { text: String }
+  # ─── Moderation types ──────────────────────────────────────────────────────
+  type moderation_request = {
+    input: untyped,
+    model: String?
+  }
+  type moderation_categories = {
+    sexual: bool,
+    hate: bool,
+    harassment: bool,
+    violence: bool
+  }
+  type moderation_category_scores = {
+    sexual: Float,
+    hate: Float,
+    harassment: Float,
+    violence: Float
+  }
+  type moderation_result = {
+    flagged: bool,
+    categories: moderation_categories,
+    category_scores: moderation_category_scores
+  }
+  type moderation_response = { id: String, model: String, results: Array[moderation_result] }
+  # ─── Rerank types ──────────────────────────────────────────────────────────
+  type rerank_request = {
+    model: String,
+    query: String,
+    documents: untyped,
+    top_n: Integer?
+  }
+  type rerank_result = { index: Integer, relevance_score: Float }
+  type rerank_response = { results: Array[rerank_result], model: String, usage: usage_response? }
+  # ─── File types ────────────────────────────────────────────────────────────
+  type create_file_request = { file: String, purpose: String, filename: String? }
+  type file_object = {
+    id: String,
+    object: String,
+    bytes: Integer,
+    created_at: Integer,
+    filename: String,
+    purpose: String,
+    status: String?,
+    status_details: String?
+  }
+  type delete_response = { id: String, object: String, deleted: bool }
+  type file_list_query = { purpose: String?, limit: Integer?, after: String? }
+  type file_list_response = { object: String, data: Array[file_object] }
+  # ─── Batch types ───────────────────────────────────────────────────────────
+  type create_batch_request = {
+    input_file_id: String,
+    endpoint: String,
+    completion_window: String,
+    metadata: Hash[String, String]?
+  }
+  type batch_request_counts = { total: Integer, completed: Integer, failed: Integer }
+  type batch_object = {
+    id: String,
+    object: String,
+    endpoint: String,
+    input_file_id: String,
+    completion_window: String,
+    status: String,
+    output_file_id: String?,
+    error_file_id: String?,
+    created_at: Integer,
+    request_counts: batch_request_counts?,
+    metadata: Hash[String, String]?
+  }
+  type batch_list_query = { limit: Integer?, after: String? }
+  type batch_list_response = { object: String, data: Array[batch_object] }
+  # ─── Response types ────────────────────────────────────────────────────────
+  type create_response_request = {
+    model: String,
+    input: untyped,
+    instructions: String?,
+    max_output_tokens: Integer?,
+    temperature: Float?,
+    top_p: Float?,
+    stream: bool?,
+    metadata: Hash[String, String]?
+  }
+  type response_object = {
+    id: String,
+    object: String,
+    created_at: Integer,
+    status: String,
+    model: String,
+    output: untyped?,
+    usage: usage_response?,
+    metadata: Hash[String, String]?,
+    error: untyped?
+  }
+  # ─── LlmClient ───────────────────────────────────────────────────────────────
+  # Unified LLM client backed by the Rust core.
+  #
+  # All I/O methods accept a JSON-encoded request string and return a
+  # JSON-encoded response string.  The thin Ruby layer is responsible for
+  # serialising/deserialising as needed.
+  class LlmClient
+    # Create a new client.
+    #
+    # @param api_key     API key for authentication.
+    # @param base_url    Optional provider base URL override.
+    # @param max_retries Retries on 429 / 5xx (default: 3).
+    # @param timeout_secs Request timeout in seconds (default: 60).
+    def initialize: (
+      String api_key,
+      ?base_url: String?,
+      ?model_hint: String?,
+      ?max_retries: Integer,
+      ?timeout_secs: Integer
+    ) -> void
+    # Send a chat completion request.
+    def chat: (String request_json) -> String
+    # Send an embedding request.
+    def embed: (String request_json) -> String
+    # List models available from the configured provider.
+    def list_models: () -> String
+    # Generate an image from a text prompt.
+    def image_generate: (String request_json) -> String
+    # Generate audio speech from text, returning base64-encoded audio bytes.
+    def speech: (String request_json) -> String
+    # Transcribe audio to text.
+    def transcribe: (String request_json) -> String
+    # Check content against moderation policies.
+    def moderate: (String request_json) -> String
+    # Rerank documents by relevance to a query.
+    def rerank: (String request_json) -> String
+    # Upload a file.
+    def create_file: (String request_json) -> String
+    # Retrieve metadata for a file by ID.
+    def retrieve_file: (String file_id) -> String
+    # Delete a file by ID.
+    def delete_file: (String file_id) -> String
+    # List files, optionally filtered by query parameters.
+    def list_files: (String? query_json) -> String
+    # Retrieve the raw content of a file (base64-encoded).
+    def file_content: (String file_id) -> String
+    # Create a new batch job.
+    def create_batch: (String request_json) -> String
+    # Retrieve a batch by ID.
+    def retrieve_batch: (String batch_id) -> String
+    # List batches, optionally filtered by query parameters.
+    def list_batches: (String? query_json) -> String
+    # Cancel an in-progress batch.
+    def cancel_batch: (String batch_id) -> String
+    # Create a new response via the Responses API.
+    def create_response: (String request_json) -> String
+    # Retrieve a response by ID.
+    def retrieve_response: (String response_id) -> String
+    # Cancel an in-progress response.
+    def cancel_response: (String response_id) -> String
+    def inspect: () -> String
+  end
+end

data/vendor/Cargo.toml ADDED Viewed

@@ -0,0 +1,54 @@
+[workspace]
+members = ["liter-llm", "liter-llm-ffi"]
+[workspace.package]
+version = "1.0.0-rc.6"
+edition = "2024"
+authors = ["Na'aman Hirschfeld <naaman@kreuzberg.dev>"]
+license = "MIT"
+repository = "https://github.com/kreuzberg-dev/liter-llm"
+homepage = "https://kreuzberg.dev"
+[workspace.dependencies]
+anyhow = "1"
+base64 = "0.22"
+bytes = "1"
+clap = { version = "4", features = ["derive"] }
+dashmap = "6"
+futures-core = "0.3"
+futures-util = "0.3"
+jsonschema = "0.45"
+jsonwebtoken = { version = "10", features = ["use_pem"], default-features = false }
+magnus = "0.8"
+memchr = "2"
+napi = { version = "3", features = ["napi4", "serde-json", "async"] }
+napi-build = "2"
+napi-derive = "3"
+opendal = { version = "0.53", features = ["services-memory"], default-features = false }
+opentelemetry = "0.31"
+pin-project-lite = "0.2"
+pyo3 = { version = "0.28", features = ["abi3-py310"] }
+pyo3-async-runtimes = { version = "0.28", features = ["tokio-runtime"] }
+rayon = "1"
+rb-sys = "0.9"
+reqwest = { version = "0.13", features = ["json", "stream", "rustls", "multipart", "form"], default-features = false }
+rustler = "0.37"
+secrecy = { version = "0.10", features = ["serde"] }
+serde = { version = "1", features = ["derive"] }
+serde_json = "1"
+serial_test = "3"
+tempfile = "3"
+thiserror = "2"
+tokenizers = { version = "0.22", features = ["http", "fancy-regex"], default-features = false }
+tokio = { version = "1", features = ["full"] }
+toml = "1.1"
+tower = { version = "0.5", features = ["retry", "limit", "timeout", "buffer", "load-shed", "steer", "util"] }
+tower-http = { version = "0.6", features = ["follow-redirect", "set-header", "sensitive-headers", "trace", "request-id"] }
+tower-layer = "0.3"
+tower-service = "0.3"
+tracing = "0.1"
+tracing-opentelemetry = "0.32"
+walkdir = "2.5"
+wasm-bindgen = "0.2"
+wasm-bindgen-test = "0.3"
+which = "8"

data/vendor/liter-llm/Cargo.toml ADDED Viewed

@@ -0,0 +1,92 @@
+[package]
+name = "liter-llm"
+version = "1.0.0-rc.6"
+edition = "2024"
+license = "MIT"
+repository.workspace = true
+homepage.workspace = true
+authors = ["Na'aman Hirschfeld <naaman@kreuzberg.dev>"]
+description = "Universal LLM API client — 142+ providers, streaming, tool calling. Rust-powered, type-safe, compiled."
+readme = "README.md"
+keywords = ["llm", "openai", "api-client", "ai", "kreuzberg"]
+categories = ["api-bindings", "web-programming::http-client"]
+[package.metadata.cargo-machete]
+ignored = ["dashmap", "tower-http"]
+[features]
+# Enable the native HTTP stack (reqwest + tokio).  Disable this feature when
+# compiling for WebAssembly, where the browser / Node.js fetch API is used
+# instead and the tokio multi-thread runtime is unavailable.
+native-http = ["dep:reqwest", "dep:tokio", "dep:memchr", "dep:base64"]
+# Structured tracing via the `tracing` crate.  Adds `#[instrument]` spans on
+# HTTP functions and SSE parser warnings.  Can be enabled independently of the
+# Tower middleware integration.
+tracing = ["dep:tracing"]
+# Tower middleware integration (LlmService, TracingLayer, FallbackLayer).
+tower = ["dep:tower", "dep:tower-http", "dep:dashmap", "dep:futures-util", "tracing"]
+# OpenTelemetry export bridge via `tracing-opentelemetry`.  Depends on `tracing`
+# and re-exports `tracing_opentelemetry` so callers can compose their own
+# subscriber stack without a hard dependency on the crate themselves.
+otel = ["tracing", "dep:tracing-opentelemetry", "dep:opentelemetry"]
+# AWS Bedrock SigV4 signing support.  Enables the `aws-sigv4` dependency and
+# activates real SigV4 signing in `BedrockProvider`.  When this feature is
+# disabled, `BedrockProvider` still routes `bedrock/` prefixed model names
+# and strips the prefix, but sends requests unsigned (suitable for tests
+# using a `base_url` override pointing to a mock server).
+bedrock = ["native-http", "dep:aws-credential-types", "dep:aws-sigv4"]
+# Azure AD OAuth2 credential provider (client-credentials flow).
+azure-auth = ["native-http"]
+# Google Vertex AI OAuth2 credential provider (service-account JWT flow).
+vertex-auth = ["native-http", "dep:jsonwebtoken"]
+# AWS STS Web Identity credential provider (EKS / IRSA token exchange).
+bedrock-auth = ["native-http"]
+# Full feature set: native HTTP + tower + tracing + otel + bedrock.
+# Token counting via HuggingFace tokenizers.  Provides lazy-cached tokenizer
+# loading and token counting for text and chat completion requests.
+tokenizer = ["dep:tokenizers"]
+# Full feature set: native HTTP + tower + tracing + otel + bedrock + tokenizer + auth providers.
+opendal-cache = ["dep:opendal", "tower"]
+full = [
+    "native-http",
+    "tower",
+    "tracing",
+    "otel",
+    "bedrock",
+    "tokenizer",
+    "azure-auth",
+    "vertex-auth",
+    "bedrock-auth",
+    "opendal-cache",
+]
+default = ["native-http"]
+[dependencies]
+aws-credential-types = { version = "1", optional = true }
+aws-sigv4 = { version = "1", features = ["sign-http", "http1"], optional = true }
+base64 = { version = "0.22", optional = true }
+bytes = "1"
+dashmap = { version = "6", optional = true }
+futures-core = "0.3"
+futures-util = { version = "0.3", optional = true }
+jsonwebtoken = { version = "10", features = ["use_pem"], default-features = false, optional = true }
+memchr = { version = "2", optional = true }
+opendal = { version = "0.53", features = ["services-memory"], default-features = false, optional = true }
+opentelemetry = { version = "0.31", optional = true }
+pin-project-lite = "0.2"
+reqwest = { version = "0.13", features = ["json", "stream", "rustls", "multipart", "form"], default-features = false, optional = true }
+secrecy = { version = "0.10", features = ["serde"] }
+serde = { version = "1", features = ["derive"] }
+serde_json = "1"
+thiserror = "2"
+tokenizers = { version = "0.22", features = ["http", "fancy-regex"], default-features = false, optional = true }
+tokio = { version = "1", features = ["time", "rt", "macros"], optional = true }
+tower = { version = "0.5", features = ["retry", "limit", "timeout", "buffer", "load-shed", "steer", "util"], optional = true }
+tower-http = { version = "0.6", features = ["follow-redirect", "set-header", "sensitive-headers", "trace", "request-id"], optional = true }
+tracing = { version = "0.1", optional = true }
+tracing-opentelemetry = { version = "0.32", optional = true }
+[dev-dependencies]
+jsonschema = "0.45"
+serial_test = "3"
+tokio = { version = "1", features = ["test-util", "macros"] }