npm - llmist - Versions diffs - 1.6.2 → 2.0.0 - Mend

llmist 1.6.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/dist/{chunk-T3DIKQWU.js → chunk-LBHWVCZ2.js} +374 -55
package/dist/chunk-LBHWVCZ2.js.map +1 -0
package/dist/{chunk-TDRPJP2Q.js → chunk-LFSIEPAE.js} +10 -3
package/dist/chunk-LFSIEPAE.js.map +1 -0
package/dist/cli.cjs +384 -61
package/dist/cli.cjs.map +1 -1
package/dist/cli.js +28 -15
package/dist/cli.js.map +1 -1
package/dist/index.cjs +368 -49
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +70 -13
package/dist/index.d.ts +70 -13
package/dist/index.js +4 -2
package/dist/{mock-stream-Cc47j12U.d.cts → mock-stream-BQHut0lQ.d.cts} +595 -303
package/dist/{mock-stream-Cc47j12U.d.ts → mock-stream-BQHut0lQ.d.ts} +595 -303
package/dist/testing/index.cjs +369 -51
package/dist/testing/index.cjs.map +1 -1
package/dist/testing/index.d.cts +4 -2
package/dist/testing/index.d.ts +4 -2
package/dist/testing/index.js +1 -1
package/package.json +1 -1
package/dist/chunk-T3DIKQWU.js.map +0 -1
package/dist/chunk-TDRPJP2Q.js.map +0 -1

package/dist/{mock-stream-Cc47j12U.d.cts → mock-stream-BQHut0lQ.d.cts} RENAMED Viewed

@@ -315,6 +315,271 @@ interface ResolvedCompactionConfig {
     onCompaction?: (event: CompactionEvent) => void;
 }
+/**
+ * Model Registry
+ *
+ * Centralized registry for querying LLM model specifications,
+ * validating configurations, and estimating costs.
+ *
+ * Model data is provided by ProviderAdapter implementations and
+ * automatically populated when providers are registered.
+ */
+declare class ModelRegistry {
+    private modelSpecs;
+    private providerMap;
+    /**
+     * Register a provider and collect its model specifications
+     */
+    registerProvider(provider: ProviderAdapter): void;
+    /**
+     * Register a custom model specification at runtime
+     *
+     * Use this to add models that aren't in the built-in catalog, such as:
+     * - Fine-tuned models with custom pricing
+     * - New models not yet supported by llmist
+     * - Custom deployments with different configurations
+     *
+     * @param spec - Complete model specification
+     * @throws {Error} If spec is missing required fields
+     *
+     * @example
+     * ```ts
+     * client.modelRegistry.registerModel({
+     *   provider: "openai",
+     *   modelId: "ft:gpt-4o-2024-08-06:my-org:custom:abc123",
+     *   displayName: "My Fine-tuned GPT-4o",
+     *   contextWindow: 128_000,
+     *   maxOutputTokens: 16_384,
+     *   pricing: { input: 7.5, output: 30.0 },
+     *   knowledgeCutoff: "2024-08",
+     *   features: { streaming: true, functionCalling: true, vision: true }
+     * });
+     * ```
+     */
+    registerModel(spec: ModelSpec): void;
+    /**
+     * Register multiple custom model specifications at once
+     *
+     * @param specs - Array of complete model specifications
+     *
+     * @example
+     * ```ts
+     * client.modelRegistry.registerModels([
+     *   { provider: "openai", modelId: "gpt-5", ... },
+     *   { provider: "openai", modelId: "gpt-5-mini", ... }
+     * ]);
+     * ```
+     */
+    registerModels(specs: ModelSpec[]): void;
+    /**
+     * Get model specification by model ID
+     * @param modelId - Full model identifier (e.g., 'gpt-5', 'claude-sonnet-4-5-20250929')
+     * @returns ModelSpec if found, undefined otherwise
+     */
+    getModelSpec(modelId: string): ModelSpec | undefined;
+    /**
+     * List all models, optionally filtered by provider
+     * @param providerId - Optional provider ID to filter by (e.g., 'openai', 'anthropic')
+     * @returns Array of ModelSpec objects
+     */
+    listModels(providerId?: string): ModelSpec[];
+    /**
+     * Get context window and output limits for a model
+     * @param modelId - Full model identifier
+     * @returns ModelLimits if model found, undefined otherwise
+     */
+    getModelLimits(modelId: string): ModelLimits | undefined;
+    /**
+     * Estimate API cost for a given model and token usage
+     * @param modelId - Full model identifier
+     * @param inputTokens - Number of input tokens (total, including cached and cache creation)
+     * @param outputTokens - Number of output tokens
+     * @param cachedInputTokens - Number of cached input tokens (subset of inputTokens)
+     * @param cacheCreationInputTokens - Number of cache creation tokens (subset of inputTokens, Anthropic only)
+     * @returns CostEstimate if model found, undefined otherwise
+     */
+    estimateCost(modelId: string, inputTokens: number, outputTokens: number, cachedInputTokens?: number, cacheCreationInputTokens?: number): CostEstimate | undefined;
+    /**
+     * Validate that requested token count fits within model limits
+     * @param modelId - Full model identifier
+     * @param requestedTokens - Total tokens requested (input + output)
+     * @returns true if valid, false if model not found or exceeds limits
+     */
+    validateModelConfig(modelId: string, requestedTokens: number): boolean;
+    /**
+     * Check if a model supports a specific feature
+     * @param modelId - Full model identifier
+     * @param feature - Feature to check ('streaming', 'functionCalling', 'vision', etc.)
+     * @returns true if model supports feature, false otherwise
+     */
+    supportsFeature(modelId: string, feature: keyof ModelSpec["features"]): boolean;
+    /**
+     * Get all models that support a specific feature
+     * @param feature - Feature to filter by
+     * @param providerId - Optional provider ID to filter by
+     * @returns Array of ModelSpec objects that support the feature
+     */
+    getModelsByFeature(feature: keyof ModelSpec["features"], providerId?: string): ModelSpec[];
+    /**
+     * Get the most cost-effective model for a given provider and token budget
+     * @param inputTokens - Expected input tokens
+     * @param outputTokens - Expected output tokens
+     * @param providerId - Optional provider ID to filter by
+     * @returns ModelSpec with lowest total cost, or undefined if no models found
+     */
+    getCheapestModel(inputTokens: number, outputTokens: number, providerId?: string): ModelSpec | undefined;
+}
+interface LLMGenerationOptions {
+    model: string;
+    messages: LLMMessage[];
+    maxTokens?: number;
+    temperature?: number;
+    topP?: number;
+    stopSequences?: string[];
+    responseFormat?: "text";
+    metadata?: Record<string, unknown>;
+    extra?: Record<string, unknown>;
+    /**
+     * Optional abort signal for cancelling the request mid-flight.
+     *
+     * When the signal is aborted, the provider will attempt to cancel
+     * the underlying HTTP request and the stream will terminate with
+     * an abort error. Use `isAbortError()` from `@/core/errors` to
+     * detect cancellation in error handling.
+     *
+     * @example
+     * ```typescript
+     * const controller = new AbortController();
+     *
+     * const stream = client.stream({
+     *   model: "claude-3-5-sonnet-20241022",
+     *   messages: [{ role: "user", content: "Tell me a long story" }],
+     *   signal: controller.signal,
+     * });
+     *
+     * // Cancel after 5 seconds
+     * setTimeout(() => controller.abort(), 5000);
+     *
+     * try {
+     *   for await (const chunk of stream) {
+     *     process.stdout.write(chunk.text);
+     *   }
+     * } catch (error) {
+     *   if (isAbortError(error)) {
+     *     console.log("\nRequest was cancelled");
+     *   } else {
+     *     throw error;
+     *   }
+     * }
+     * ```
+     */
+    signal?: AbortSignal;
+}
+interface TokenUsage {
+    inputTokens: number;
+    outputTokens: number;
+    totalTokens: number;
+    /** Number of input tokens served from cache (subset of inputTokens) */
+    cachedInputTokens?: number;
+    /** Number of input tokens written to cache (subset of inputTokens, Anthropic only) */
+    cacheCreationInputTokens?: number;
+}
+interface LLMStreamChunk {
+    text: string;
+    /**
+     * Indicates that the provider has finished producing output and includes the reason if available.
+     */
+    finishReason?: string | null;
+    /**
+     * Token usage information, typically available in the final chunk when the stream completes.
+     */
+    usage?: TokenUsage;
+    /**
+     * Provider specific payload emitted at the same time as the text chunk. This is useful for debugging and tests.
+     */
+    rawEvent?: unknown;
+}
+interface LLMStream extends AsyncIterable<LLMStreamChunk> {
+}
+type ProviderIdentifier = string;
+interface ModelDescriptor {
+    provider: string;
+    name: string;
+}
+declare class ModelIdentifierParser {
+    private readonly defaultProvider;
+    constructor(defaultProvider?: string);
+    parse(identifier: string): ModelDescriptor;
+}
+/**
+ * Quick execution methods for simple use cases.
+ *
+ * These methods provide convenient shortcuts for common operations
+ * without requiring full agent setup.
+ *
+ * @example
+ * ```typescript
+ * // Quick completion
+ * const answer = await llmist.complete("What is 2+2?");
+ *
+ * // Quick streaming
+ * for await (const chunk of llmist.stream("Tell me a story")) {
+ *   process.stdout.write(chunk);
+ * }
+ * ```
+ */
+/**
+ * Options for quick execution methods.
+ */
+interface QuickOptions {
+    /** Model to use (supports aliases like "gpt4", "sonnet", "flash") */
+    model?: string;
+    /** Temperature (0-1) */
+    temperature?: number;
+    /** System prompt */
+    systemPrompt?: string;
+    /** Max tokens to generate */
+    maxTokens?: number;
+}
+/**
+ * Quick completion - returns final text response.
+ *
+ * @param client - LLMist client instance
+ * @param prompt - User prompt
+ * @param options - Optional configuration
+ * @returns Complete text response
+ *
+ * @example
+ * ```typescript
+ * const client = new LLMist();
+ * const answer = await complete(client, "What is 2+2?");
+ * console.log(answer); // "4" or "2+2 equals 4"
+ * ```
+ */
+declare function complete(client: LLMist, prompt: string, options?: QuickOptions): Promise<string>;
+/**
+ * Quick streaming - returns async generator of text chunks.
+ *
+ * @param client - LLMist client instance
+ * @param prompt - User prompt
+ * @param options - Optional configuration
+ * @returns Async generator yielding text chunks
+ *
+ * @example
+ * ```typescript
+ * const client = new LLMist();
+ *
+ * for await (const chunk of stream(client, "Tell me a story")) {
+ *   process.stdout.write(chunk);
+ * }
+ * ```
+ */
+declare function stream(client: LLMist, prompt: string, options?: QuickOptions): AsyncGenerator<string>;
 /**
  * Example of gadget usage to help LLMs understand proper invocation.
  *
@@ -350,7 +615,34 @@ interface GadgetExecutionResult {
     error?: string;
     executionTimeMs: number;
     breaksLoop?: boolean;
+    /** Cost of gadget execution in USD. Defaults to 0 if not provided by gadget. */
+    cost?: number;
 }
+/**
+ * Result returned by gadget execute() method.
+ * Can be a simple string or an object with result and optional cost.
+ *
+ * @example
+ * ```typescript
+ * // Simple string return (free gadget)
+ * execute: () => "result"
+ *
+ * // Object return with cost
+ * execute: () => ({ result: "data", cost: 0.001 })
+ * ```
+ */
+interface GadgetExecuteResult {
+    /** The execution result as a string */
+    result: string;
+    /** Optional cost in USD (e.g., 0.001 for $0.001) */
+    cost?: number;
+}
+/**
+ * Union type for backwards-compatible execute() return type.
+ * Gadgets can return either a string (legacy, cost = 0) or
+ * an object with result and optional cost.
+ */
+type GadgetExecuteReturn = string | GadgetExecuteResult;
 interface ParsedGadgetCall {
     gadgetName: string;
     invocationId: string;
@@ -433,6 +725,174 @@ type TextOnlyAction = {
     name: string;
     parameters: Record<string, unknown>;
 };
+/**
+ * LLMist client interface for use within gadgets.
+ *
+ * Provides LLM completion methods that automatically report costs
+ * via the execution context. All LLM calls made through this client
+ * will have their costs tracked and included in the gadget's total cost.
+ *
+ * @example
+ * ```typescript
+ * execute: async ({ text }, ctx) => {
+ *   // LLM costs are automatically reported
+ *   const summary = await ctx.llmist.complete('Summarize: ' + text, {
+ *     model: 'haiku',
+ *   });
+ *   return summary;
+ * }
+ * ```
+ */
+interface CostReportingLLMist {
+    /**
+     * Quick completion - returns final text response.
+     * Costs are automatically reported to the execution context.
+     */
+    complete(prompt: string, options?: QuickOptions): Promise<string>;
+    /**
+     * Quick streaming - returns async generator of text chunks.
+     * Costs are automatically reported when the stream completes.
+     */
+    streamText(prompt: string, options?: QuickOptions): AsyncGenerator<string>;
+    /**
+     * Low-level stream access for full control.
+     * Costs are automatically reported based on usage metadata in chunks.
+     */
+    stream(options: LLMGenerationOptions): LLMStream;
+    /**
+     * Access to model registry for cost estimation.
+     */
+    readonly modelRegistry: ModelRegistry;
+}
+/**
+ * Execution context provided to gadgets during execution.
+ *
+ * Contains utilities for cost reporting and LLM access.
+ * This parameter is optional for backwards compatibility -
+ * existing gadgets without the context parameter continue to work.
+ *
+ * @example
+ * ```typescript
+ * // Using reportCost() for manual cost reporting
+ * const apiGadget = createGadget({
+ *   description: 'Calls external API',
+ *   schema: z.object({ query: z.string() }),
+ *   execute: async ({ query }, ctx) => {
+ *     const result = await callExternalAPI(query);
+ *     ctx.reportCost(0.001); // Report $0.001 cost
+ *     return result;
+ *   },
+ * });
+ *
+ * // Using ctx.llmist for automatic LLM cost tracking
+ * const summarizer = createGadget({
+ *   description: 'Summarizes text using LLM',
+ *   schema: z.object({ text: z.string() }),
+ *   execute: async ({ text }, ctx) => {
+ *     // LLM costs are automatically reported!
+ *     return ctx.llmist.complete('Summarize: ' + text);
+ *   },
+ * });
+ * ```
+ */
+interface ExecutionContext {
+    /**
+     * Report a cost incurred during gadget execution.
+     *
+     * Costs are accumulated and added to the gadget's total cost.
+     * Can be called multiple times during execution.
+     * This is summed with any cost returned from the execute() method
+     * and any costs from ctx.llmist calls.
+     *
+     * @param amount - Cost in USD (e.g., 0.001 for $0.001)
+     *
+     * @example
+     * ```typescript
+     * execute: async (params, ctx) => {
+     *   await callExternalAPI(params.query);
+     *   ctx.reportCost(0.001); // $0.001 per API call
+     *
+     *   await callAnotherAPI(params.data);
+     *   ctx.reportCost(0.002); // Can be called multiple times
+     *
+     *   return 'done';
+     *   // Total cost: $0.003
+     * }
+     * ```
+     */
+    reportCost(amount: number): void;
+    /**
+     * Pre-configured LLMist client that automatically reports LLM costs
+     * as gadget costs via the reportCost() callback.
+     *
+     * All LLM calls made through this client will have their costs
+     * automatically tracked and included in the gadget's total cost.
+     *
+     * This property is optional - it will be `undefined` if:
+     * - The gadget is executed via CLI `gadget run` command
+     * - The gadget is tested directly without agent context
+     * - No LLMist client was provided to the executor
+     *
+     * Always check for availability before use: `ctx.llmist?.complete(...)`
+     *
+     * @example
+     * ```typescript
+     * execute: async ({ text }, ctx) => {
+     *   // Check if llmist is available
+     *   if (!ctx.llmist) {
+     *     return 'LLM not available in this context';
+     *   }
+     *
+     *   // LLM costs are automatically reported
+     *   const summary = await ctx.llmist.complete('Summarize: ' + text, {
+     *     model: 'haiku',
+     *   });
+     *
+     *   // Additional manual costs can still be reported
+     *   ctx.reportCost(0.0001); // Processing overhead
+     *
+     *   return summary;
+     * }
+     * ```
+     */
+    llmist?: CostReportingLLMist;
+    /**
+     * Abort signal for cancellation support.
+     *
+     * When a gadget times out, this signal is aborted before the TimeoutException
+     * is thrown. Gadgets can use this to clean up resources (close browsers,
+     * cancel HTTP requests, etc.) when execution is cancelled.
+     *
+     * The signal is always provided (never undefined) to simplify gadget code.
+     *
+     * @example
+     * ```typescript
+     * // Check for abort at key checkpoints
+     * execute: async (params, ctx) => {
+     *   if (ctx.signal.aborted) return 'Aborted';
+     *
+     *   await doExpensiveWork();
+     *
+     *   if (ctx.signal.aborted) return 'Aborted';
+     *   return result;
+     * }
+     *
+     * // Register cleanup handlers
+     * execute: async (params, ctx) => {
+     *   const browser = await chromium.launch();
+     *   ctx.signal.addEventListener('abort', () => browser.close(), { once: true });
+     *   // ... use browser
+     * }
+     *
+     * // Pass to fetch for automatic cancellation
+     * execute: async ({ url }, ctx) => {
+     *   const response = await fetch(url, { signal: ctx.signal });
+     *   return await response.text();
+     * }
+     * ```
+     */
+    signal: AbortSignal;
+}
 /**
  * Internal base class for gadgets. Most users should use the `Gadget` class
@@ -477,9 +937,67 @@ declare abstract class BaseGadget {
      * Can be synchronous or asynchronous.
      *
      * @param params - Parameters passed from the LLM
-     * @returns Result as a string
+     * @param ctx - Optional execution context for cost reporting and LLM access
+     * @returns Result as a string, or an object with result and optional cost
+     *
+     * @example
+     * ```typescript
+     * // Simple string return (free gadget)
+     * execute(params) {
+     *   return "result";
+     * }
+     *
+     * // Object return with cost tracking
+     * execute(params) {
+     *   return { result: "data", cost: 0.001 };
+     * }
+     *
+     * // Using context for callback-based cost reporting
+     * execute(params, ctx) {
+     *   ctx.reportCost(0.001);
+     *   return "result";
+     * }
+     *
+     * // Using wrapped LLMist for automatic cost tracking
+     * async execute(params, ctx) {
+     *   const summary = await ctx.llmist.complete('Summarize: ' + params.text);
+     *   return summary;
+     * }
+     * ```
+     */
+    abstract execute(params: Record<string, unknown>, ctx?: ExecutionContext): GadgetExecuteReturn | Promise<GadgetExecuteReturn>;
+    /**
+     * Throws an AbortError if the execution has been aborted.
+     *
+     * Call this at key checkpoints in long-running gadgets to allow early exit
+     * when the gadget has been cancelled (e.g., due to timeout). This enables
+     * resource cleanup and prevents unnecessary work after cancellation.
+     *
+     * @param ctx - The execution context containing the abort signal
+     * @throws AbortError if ctx.signal.aborted is true
+     *
+     * @example
+     * ```typescript
+     * class DataProcessor extends Gadget({
+     *   description: 'Processes data in multiple steps',
+     *   schema: z.object({ items: z.array(z.string()) }),
+     * }) {
+     *   async execute(params: this['params'], ctx?: ExecutionContext): Promise<string> {
+     *     const results: string[] = [];
+     *
+     *     for (const item of params.items) {
+     *       // Check before each expensive operation
+     *       this.throwIfAborted(ctx);
+     *
+     *       results.push(await this.processItem(item));
+     *     }
+     *
+     *     return results.join(', ');
+     *   }
+     * }
+     * ```
      */
-    abstract execute(params: Record<string, unknown>): string | Promise<string>;
+    throwIfAborted(ctx?: ExecutionContext): void;
     /**
      * Auto-generated instruction text for the LLM.
      * Combines name, description, and parameter schema into a formatted instruction.
@@ -630,129 +1148,46 @@ declare function resolveRulesTemplate(rules: PromptConfig["rules"] | undefined,
  * @param context - Context for rendering the template
  * @returns The resolved hint string
  */
-declare function resolveHintTemplate(template: HintTemplate | undefined, defaultValue: string, context: HintContext): string;
-type LLMRole = "system" | "user" | "assistant";
-interface LLMMessage {
-    role: LLMRole;
-    content: string;
-    name?: string;
-    metadata?: Record<string, unknown>;
-}
-declare class LLMMessageBuilder {
-    private readonly messages;
-    private startPrefix;
-    private endPrefix;
-    private argPrefix;
-    private promptConfig;
-    constructor(promptConfig?: PromptConfig);
-    /**
-     * Set custom prefixes for gadget markers.
-     * Used to configure history builder to match system prompt markers.
-     */
-    withPrefixes(startPrefix: string, endPrefix: string, argPrefix?: string): this;
-    addSystem(content: string, metadata?: Record<string, unknown>): this;
-    addGadgets(gadgets: BaseGadget[], options?: {
-        startPrefix?: string;
-        endPrefix?: string;
-        argPrefix?: string;
-    }): this;
-    private buildGadgetsSection;
-    private buildUsageSection;
-    private buildExamplesSection;
-    private buildRulesSection;
-    addUser(content: string, metadata?: Record<string, unknown>): this;
-    addAssistant(content: string, metadata?: Record<string, unknown>): this;
-    addGadgetCall(gadget: string, parameters: Record<string, unknown>, result: string): this;
-    /**
-     * Format parameters as Block format with JSON Pointer paths.
-     * Uses the configured argPrefix for consistency with system prompt.
-     */
-    private formatBlockParameters;
-    build(): LLMMessage[];
-}
-interface LLMGenerationOptions {
-    model: string;
-    messages: LLMMessage[];
-    maxTokens?: number;
-    temperature?: number;
-    topP?: number;
-    stopSequences?: string[];
-    responseFormat?: "text";
-    metadata?: Record<string, unknown>;
-    extra?: Record<string, unknown>;
-    /**
-     * Optional abort signal for cancelling the request mid-flight.
-     *
-     * When the signal is aborted, the provider will attempt to cancel
-     * the underlying HTTP request and the stream will terminate with
-     * an abort error. Use `isAbortError()` from `@/core/errors` to
-     * detect cancellation in error handling.
-     *
-     * @example
-     * ```typescript
-     * const controller = new AbortController();
-     *
-     * const stream = client.stream({
-     *   model: "claude-3-5-sonnet-20241022",
-     *   messages: [{ role: "user", content: "Tell me a long story" }],
-     *   signal: controller.signal,
-     * });
-     *
-     * // Cancel after 5 seconds
-     * setTimeout(() => controller.abort(), 5000);
-     *
-     * try {
-     *   for await (const chunk of stream) {
-     *     process.stdout.write(chunk.text);
-     *   }
-     * } catch (error) {
-     *   if (isAbortError(error)) {
-     *     console.log("\nRequest was cancelled");
-     *   } else {
-     *     throw error;
-     *   }
-     * }
-     * ```
-     */
-    signal?: AbortSignal;
-}
-interface TokenUsage {
-    inputTokens: number;
-    outputTokens: number;
-    totalTokens: number;
-    /** Number of input tokens served from cache (subset of inputTokens) */
-    cachedInputTokens?: number;
-    /** Number of input tokens written to cache (subset of inputTokens, Anthropic only) */
-    cacheCreationInputTokens?: number;
+declare function resolveHintTemplate(template: HintTemplate | undefined, defaultValue: string, context: HintContext): string;
+type LLMRole = "system" | "user" | "assistant";
+interface LLMMessage {
+    role: LLMRole;
+    content: string;
+    name?: string;
+    metadata?: Record<string, unknown>;
 }
-interface LLMStreamChunk {
-    text: string;
-    /**
-     * Indicates that the provider has finished producing output and includes the reason if available.
-     */
-    finishReason?: string | null;
+declare class LLMMessageBuilder {
+    private readonly messages;
+    private startPrefix;
+    private endPrefix;
+    private argPrefix;
+    private promptConfig;
+    constructor(promptConfig?: PromptConfig);
     /**
-     * Token usage information, typically available in the final chunk when the stream completes.
+     * Set custom prefixes for gadget markers.
+     * Used to configure history builder to match system prompt markers.
      */
-    usage?: TokenUsage;
+    withPrefixes(startPrefix: string, endPrefix: string, argPrefix?: string): this;
+    addSystem(content: string, metadata?: Record<string, unknown>): this;
+    addGadgets(gadgets: BaseGadget[], options?: {
+        startPrefix?: string;
+        endPrefix?: string;
+        argPrefix?: string;
+    }): this;
+    private buildGadgetsSection;
+    private buildUsageSection;
+    private buildExamplesSection;
+    private buildRulesSection;
+    addUser(content: string, metadata?: Record<string, unknown>): this;
+    addAssistant(content: string, metadata?: Record<string, unknown>): this;
+    addGadgetCall(gadget: string, parameters: Record<string, unknown>, result: string): this;
     /**
-     * Provider specific payload emitted at the same time as the text chunk. This is useful for debugging and tests.
+     * Format parameters as Block format with JSON Pointer paths.
+     * Uses the configured argPrefix for consistency with system prompt.
      */
-    rawEvent?: unknown;
-}
-interface LLMStream extends AsyncIterable<LLMStreamChunk> {
-}
-type ProviderIdentifier = string;
-interface ModelDescriptor {
-    provider: string;
-    name: string;
-}
-declare class ModelIdentifierParser {
-    private readonly defaultProvider;
-    constructor(defaultProvider?: string);
-    parse(identifier: string): ModelDescriptor;
+    private formatBlockParameters;
+    build(): LLMMessage[];
 }
 interface ProviderAdapter {
@@ -789,188 +1224,6 @@ interface ProviderAdapter {
     countTokens?(messages: LLMMessage[], descriptor: ModelDescriptor, spec?: ModelSpec): Promise<number>;
 }
-/**
- * Model Registry
- *
- * Centralized registry for querying LLM model specifications,
- * validating configurations, and estimating costs.
- *
- * Model data is provided by ProviderAdapter implementations and
- * automatically populated when providers are registered.
- */
-declare class ModelRegistry {
-    private modelSpecs;
-    private providerMap;
-    /**
-     * Register a provider and collect its model specifications
-     */
-    registerProvider(provider: ProviderAdapter): void;
-    /**
-     * Register a custom model specification at runtime
-     *
-     * Use this to add models that aren't in the built-in catalog, such as:
-     * - Fine-tuned models with custom pricing
-     * - New models not yet supported by llmist
-     * - Custom deployments with different configurations
-     *
-     * @param spec - Complete model specification
-     * @throws {Error} If spec is missing required fields
-     *
-     * @example
-     * ```ts
-     * client.modelRegistry.registerModel({
-     *   provider: "openai",
-     *   modelId: "ft:gpt-4o-2024-08-06:my-org:custom:abc123",
-     *   displayName: "My Fine-tuned GPT-4o",
-     *   contextWindow: 128_000,
-     *   maxOutputTokens: 16_384,
-     *   pricing: { input: 7.5, output: 30.0 },
-     *   knowledgeCutoff: "2024-08",
-     *   features: { streaming: true, functionCalling: true, vision: true }
-     * });
-     * ```
-     */
-    registerModel(spec: ModelSpec): void;
-    /**
-     * Register multiple custom model specifications at once
-     *
-     * @param specs - Array of complete model specifications
-     *
-     * @example
-     * ```ts
-     * client.modelRegistry.registerModels([
-     *   { provider: "openai", modelId: "gpt-5", ... },
-     *   { provider: "openai", modelId: "gpt-5-mini", ... }
-     * ]);
-     * ```
-     */
-    registerModels(specs: ModelSpec[]): void;
-    /**
-     * Get model specification by model ID
-     * @param modelId - Full model identifier (e.g., 'gpt-5', 'claude-sonnet-4-5-20250929')
-     * @returns ModelSpec if found, undefined otherwise
-     */
-    getModelSpec(modelId: string): ModelSpec | undefined;
-    /**
-     * List all models, optionally filtered by provider
-     * @param providerId - Optional provider ID to filter by (e.g., 'openai', 'anthropic')
-     * @returns Array of ModelSpec objects
-     */
-    listModels(providerId?: string): ModelSpec[];
-    /**
-     * Get context window and output limits for a model
-     * @param modelId - Full model identifier
-     * @returns ModelLimits if model found, undefined otherwise
-     */
-    getModelLimits(modelId: string): ModelLimits | undefined;
-    /**
-     * Estimate API cost for a given model and token usage
-     * @param modelId - Full model identifier
-     * @param inputTokens - Number of input tokens (total, including cached and cache creation)
-     * @param outputTokens - Number of output tokens
-     * @param cachedInputTokens - Number of cached input tokens (subset of inputTokens)
-     * @param cacheCreationInputTokens - Number of cache creation tokens (subset of inputTokens, Anthropic only)
-     * @returns CostEstimate if model found, undefined otherwise
-     */
-    estimateCost(modelId: string, inputTokens: number, outputTokens: number, cachedInputTokens?: number, cacheCreationInputTokens?: number): CostEstimate | undefined;
-    /**
-     * Validate that requested token count fits within model limits
-     * @param modelId - Full model identifier
-     * @param requestedTokens - Total tokens requested (input + output)
-     * @returns true if valid, false if model not found or exceeds limits
-     */
-    validateModelConfig(modelId: string, requestedTokens: number): boolean;
-    /**
-     * Check if a model supports a specific feature
-     * @param modelId - Full model identifier
-     * @param feature - Feature to check ('streaming', 'functionCalling', 'vision', etc.)
-     * @returns true if model supports feature, false otherwise
-     */
-    supportsFeature(modelId: string, feature: keyof ModelSpec["features"]): boolean;
-    /**
-     * Get all models that support a specific feature
-     * @param feature - Feature to filter by
-     * @param providerId - Optional provider ID to filter by
-     * @returns Array of ModelSpec objects that support the feature
-     */
-    getModelsByFeature(feature: keyof ModelSpec["features"], providerId?: string): ModelSpec[];
-    /**
-     * Get the most cost-effective model for a given provider and token budget
-     * @param inputTokens - Expected input tokens
-     * @param outputTokens - Expected output tokens
-     * @param providerId - Optional provider ID to filter by
-     * @returns ModelSpec with lowest total cost, or undefined if no models found
-     */
-    getCheapestModel(inputTokens: number, outputTokens: number, providerId?: string): ModelSpec | undefined;
-}
-/**
- * Quick execution methods for simple use cases.
- *
- * These methods provide convenient shortcuts for common operations
- * without requiring full agent setup.
- *
- * @example
- * ```typescript
- * // Quick completion
- * const answer = await llmist.complete("What is 2+2?");
- *
- * // Quick streaming
- * for await (const chunk of llmist.stream("Tell me a story")) {
- *   process.stdout.write(chunk);
- * }
- * ```
- */
-/**
- * Options for quick execution methods.
- */
-interface QuickOptions {
-    /** Model to use (supports aliases like "gpt4", "sonnet", "flash") */
-    model?: string;
-    /** Temperature (0-1) */
-    temperature?: number;
-    /** System prompt */
-    systemPrompt?: string;
-    /** Max tokens to generate */
-    maxTokens?: number;
-}
-/**
- * Quick completion - returns final text response.
- *
- * @param client - LLMist client instance
- * @param prompt - User prompt
- * @param options - Optional configuration
- * @returns Complete text response
- *
- * @example
- * ```typescript
- * const client = new LLMist();
- * const answer = await complete(client, "What is 2+2?");
- * console.log(answer); // "4" or "2+2 equals 4"
- * ```
- */
-declare function complete(client: LLMist, prompt: string, options?: QuickOptions): Promise<string>;
-/**
- * Quick streaming - returns async generator of text chunks.
- *
- * @param client - LLMist client instance
- * @param prompt - User prompt
- * @param options - Optional configuration
- * @returns Async generator yielding text chunks
- *
- * @example
- * ```typescript
- * const client = new LLMist();
- *
- * for await (const chunk of stream(client, "Tell me a story")) {
- *   process.stdout.write(chunk);
- * }
- * ```
- */
-declare function stream(client: LLMist, prompt: string, options?: QuickOptions): AsyncGenerator<string>;
 interface LLMistOptions {
     /**
      * Provider adapters to register manually.
@@ -1508,6 +1761,8 @@ interface ObserveGadgetCompleteContext {
     error?: string;
     executionTimeMs: number;
     breaksLoop?: boolean;
+    /** Cost of gadget execution in USD. 0 if gadget didn't report cost. */
+    cost?: number;
     logger: Logger<ILogObj>;
 }
 /**
@@ -2071,6 +2326,16 @@ type HistoryMessage = {
 } | {
     system: string;
 };
+/**
+ * Context available to trailing message functions.
+ * Provides iteration information for dynamic message generation.
+ */
+type TrailingMessageContext = Pick<LLMCallControllerContext, "iteration" | "maxIterations">;
+/**
+ * Trailing message can be a static string or a function that generates the message.
+ * The function receives context about the current iteration.
+ */
+type TrailingMessage = string | ((ctx: TrailingMessageContext) => string);
 /**
  * Fluent builder for creating agents.
  *
@@ -2101,6 +2366,7 @@ declare class AgentBuilder {
     private gadgetOutputLimitPercent?;
     private compactionConfig?;
     private signal?;
+    private trailingMessage?;
     constructor(client?: LLMist);
     /**
      * Set the model to use.
@@ -2498,6 +2764,28 @@ declare class AgentBuilder {
      * ```
      */
     withSignal(signal: AbortSignal): this;
+    /**
+     * Add an ephemeral trailing message that appears at the end of each LLM request.
+     *
+     * The message is NOT persisted to conversation history - it only appears in the
+     * current LLM call. This is useful for injecting context-specific instructions
+     * or reminders without polluting the conversation history.
+     *
+     * @param message - Static string or function that generates the message
+     * @returns This builder for chaining
+     *
+     * @example
+     * ```typescript
+     * // Static message
+     * .withTrailingMessage("Always respond in JSON format.")
+     *
+     * // Dynamic message based on iteration
+     * .withTrailingMessage((ctx) =>
+     *   `[Iteration ${ctx.iteration}/${ctx.maxIterations}] Stay focused on the task.`
+     * )
+     * ```
+     */
+    withTrailingMessage(message: TrailingMessage): this;
     /**
      * Add a synthetic gadget call to the conversation history.
      *
@@ -2524,6 +2812,10 @@ declare class AgentBuilder {
      * ```
      */
     withSyntheticGadgetCall(gadgetName: string, parameters: Record<string, unknown>, result: string): this;
+    /**
+     * Compose the final hooks, including trailing message if configured.
+     */
+    private composeHooks;
     /**
      * Format parameters as block format with JSON Pointer paths.
      */
@@ -3227,4 +3519,4 @@ declare function createTextMockStream(text: string, options?: {
     usage?: MockResponse["usage"];
 }): LLMStream;
-export { type BeforeGadgetExecutionAction as $, type AgentHooks as A, BaseGadget as B, type CompactionStrategy as C, type ModelDescriptor as D, type ModelSpec as E, type LLMGenerationOptions as F, GadgetRegistry as G, type HintTemplate as H, type IConversationManager as I, type HistoryMessage as J, AgentBuilder as K, type LLMStream as L, MockProviderAdapter as M, type EventHandlers as N, collectEvents as O, type ParsedGadgetCall as P, collectText as Q, type ResolvedCompactionConfig as R, type StreamEvent as S, type TokenUsage as T, runWithHandlers as U, type AfterGadgetExecutionAction as V, type AfterGadgetExecutionControllerContext as W, type AfterLLMCallAction as X, type AfterLLMCallControllerContext as Y, type AfterLLMErrorAction as Z, type AgentOptions as _, type LLMStreamChunk as a, type BeforeLLMCallAction as a0, type ChunkInterceptorContext as a1, type Controllers as a2, type GadgetExecutionControllerContext as a3, type GadgetParameterInterceptorContext as a4, type GadgetResultInterceptorContext as a5, type Interceptors as a6, type LLMCallControllerContext as a7, type LLMErrorControllerContext as a8, type MessageInterceptorContext as a9, resolveHintTemplate as aA, resolvePromptTemplate as aB, resolveRulesTemplate as aC, type QuickOptions as aD, complete as aE, stream as aF, type GadgetClass as aG, type GadgetOrClass as aH, type TextOnlyAction as aI, type TextOnlyContext as aJ, type TextOnlyCustomHandler as aK, type TextOnlyGadgetConfig as aL, type TextOnlyHandler as aM, type TextOnlyStrategy as aN, type ObserveChunkContext as aa, type ObserveGadgetCompleteContext as ab, type ObserveGadgetStartContext as ac, type ObserveLLMCallContext as ad, type ObserveLLMCompleteContext as ae, type ObserveLLMErrorContext as af, type Observers as ag, type MessageTurn as ah, type ObserveCompactionContext as ai, DEFAULT_COMPACTION_CONFIG as aj, DEFAULT_SUMMARIZATION_PROMPT as ak, type LLMistOptions as al, type LLMRole as am, LLMMessageBuilder as an, type CostEstimate as ao, type ModelFeatures as ap, type ModelLimits as aq, type ModelPricing as ar, type ProviderIdentifier as as, ModelIdentifierParser as at, type HintContext as au, type PromptConfig as av, type PromptContext as aw, type PromptTemplate as ax, DEFAULT_HINTS as ay, DEFAULT_PROMPTS as az, type LLMMessage as b, createMockAdapter as c, MockBuilder as d, createMockClient as e, MockManager as f, getMockManager as g, createMockStream as h, createTextMockStream as i, type MockMatcher as j, type MockMatcherContext as k, type MockOptions as l, mockLLM as m, type MockRegistration as n, type MockResponse as o, type MockStats as p, ModelRegistry as q, type CompactionContext as r, type CompactionResult as s, LLMist as t, type CompactionConfig as u, type CompactionEvent as v, type CompactionStats as w, type GadgetExample as x, type GadgetExecutionResult as y, type ProviderAdapter as z };
+export { type AfterLLMCallAction as $, type AgentHooks as A, BaseGadget as B, type CompactionStrategy as C, type ProviderAdapter as D, type ExecutionContext as E, type ModelDescriptor as F, GadgetRegistry as G, type HintTemplate as H, type IConversationManager as I, type ModelSpec as J, type LLMGenerationOptions as K, type LLMStream as L, MockProviderAdapter as M, type HistoryMessage as N, type TrailingMessage as O, type ParsedGadgetCall as P, type TrailingMessageContext as Q, type ResolvedCompactionConfig as R, type StreamEvent as S, type TokenUsage as T, AgentBuilder as U, type EventHandlers as V, collectEvents as W, collectText as X, runWithHandlers as Y, type AfterGadgetExecutionAction as Z, type AfterGadgetExecutionControllerContext as _, type LLMStreamChunk as a, type AfterLLMCallControllerContext as a0, type AfterLLMErrorAction as a1, type AgentOptions as a2, type BeforeGadgetExecutionAction as a3, type BeforeLLMCallAction as a4, type ChunkInterceptorContext as a5, type Controllers as a6, type GadgetExecutionControllerContext as a7, type GadgetParameterInterceptorContext as a8, type GadgetResultInterceptorContext as a9, type PromptContext as aA, type PromptTemplate as aB, DEFAULT_HINTS as aC, DEFAULT_PROMPTS as aD, resolveHintTemplate as aE, resolvePromptTemplate as aF, resolveRulesTemplate as aG, type QuickOptions as aH, complete as aI, stream as aJ, type GadgetClass as aK, type GadgetOrClass as aL, type CostReportingLLMist as aM, type GadgetExecuteResult as aN, type TextOnlyAction as aO, type TextOnlyContext as aP, type TextOnlyCustomHandler as aQ, type TextOnlyGadgetConfig as aR, type TextOnlyHandler as aS, type TextOnlyStrategy as aT, type Interceptors as aa, type LLMCallControllerContext as ab, type LLMErrorControllerContext as ac, type MessageInterceptorContext as ad, type ObserveChunkContext as ae, type ObserveGadgetCompleteContext as af, type ObserveGadgetStartContext as ag, type ObserveLLMCallContext as ah, type ObserveLLMCompleteContext as ai, type ObserveLLMErrorContext as aj, type Observers as ak, type MessageTurn as al, type ObserveCompactionContext as am, DEFAULT_COMPACTION_CONFIG as an, DEFAULT_SUMMARIZATION_PROMPT as ao, type LLMistOptions as ap, type LLMRole as aq, LLMMessageBuilder as ar, type CostEstimate as as, type ModelFeatures as at, type ModelLimits as au, type ModelPricing as av, type ProviderIdentifier as aw, ModelIdentifierParser as ax, type HintContext as ay, type PromptConfig as az, type LLMMessage as b, createMockAdapter as c, MockBuilder as d, createMockClient as e, MockManager as f, getMockManager as g, createMockStream as h, createTextMockStream as i, type MockMatcher as j, type MockMatcherContext as k, type MockOptions as l, mockLLM as m, type MockRegistration as n, type MockResponse as o, type MockStats as p, ModelRegistry as q, LLMist as r, type CompactionContext as s, type CompactionResult as t, type CompactionConfig as u, type CompactionEvent as v, type CompactionStats as w, type GadgetExecuteReturn as x, type GadgetExample as y, type GadgetExecutionResult as z };