npm - illuma-agents - Versions diffs - 1.0.10 → 1.0.12 - Mend

illuma-agents 1.0.10 → 1.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (117) hide show

package/LICENSE +1 -1
package/dist/cjs/agents/AgentContext.cjs +236 -27
package/dist/cjs/agents/AgentContext.cjs.map +1 -1
package/dist/cjs/common/enum.cjs +2 -0
package/dist/cjs/common/enum.cjs.map +1 -1
package/dist/cjs/events.cjs +3 -11
package/dist/cjs/events.cjs.map +1 -1
package/dist/cjs/graphs/Graph.cjs +44 -18
package/dist/cjs/graphs/Graph.cjs.map +1 -1
package/dist/cjs/instrumentation.cjs +1 -3
package/dist/cjs/instrumentation.cjs.map +1 -1
package/dist/cjs/llm/bedrock/index.cjs +121 -6
package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
package/dist/cjs/main.cjs +18 -0
package/dist/cjs/main.cjs.map +1 -1
package/dist/cjs/messages/cache.cjs +149 -54
package/dist/cjs/messages/cache.cjs.map +1 -1
package/dist/cjs/messages/tools.cjs +85 -0
package/dist/cjs/messages/tools.cjs.map +1 -0
package/dist/cjs/run.cjs +0 -8
package/dist/cjs/run.cjs.map +1 -1
package/dist/cjs/tools/CodeExecutor.cjs +4 -0
package/dist/cjs/tools/CodeExecutor.cjs.map +1 -1
package/dist/cjs/tools/ProgrammaticToolCalling.cjs +438 -0
package/dist/cjs/tools/ProgrammaticToolCalling.cjs.map +1 -0
package/dist/cjs/tools/ToolNode.cjs +53 -15
package/dist/cjs/tools/ToolNode.cjs.map +1 -1
package/dist/cjs/tools/ToolSearchRegex.cjs +455 -0
package/dist/cjs/tools/ToolSearchRegex.cjs.map +1 -0
package/dist/cjs/tools/search/schema.cjs +7 -9
package/dist/cjs/tools/search/schema.cjs.map +1 -1
package/dist/cjs/utils/run.cjs +5 -1
package/dist/cjs/utils/run.cjs.map +1 -1
package/dist/esm/agents/AgentContext.mjs +236 -27
package/dist/esm/agents/AgentContext.mjs.map +1 -1
package/dist/esm/common/enum.mjs +2 -0
package/dist/esm/common/enum.mjs.map +1 -1
package/dist/esm/events.mjs +4 -12
package/dist/esm/events.mjs.map +1 -1
package/dist/esm/graphs/Graph.mjs +45 -19
package/dist/esm/graphs/Graph.mjs.map +1 -1
package/dist/esm/instrumentation.mjs +1 -3
package/dist/esm/instrumentation.mjs.map +1 -1
package/dist/esm/llm/bedrock/index.mjs +121 -6
package/dist/esm/llm/bedrock/index.mjs.map +1 -1
package/dist/esm/main.mjs +3 -0
package/dist/esm/main.mjs.map +1 -1
package/dist/esm/messages/cache.mjs +149 -54
package/dist/esm/messages/cache.mjs.map +1 -1
package/dist/esm/messages/tools.mjs +82 -0
package/dist/esm/messages/tools.mjs.map +1 -0
package/dist/esm/run.mjs +0 -8
package/dist/esm/run.mjs.map +1 -1
package/dist/esm/tools/CodeExecutor.mjs +4 -0
package/dist/esm/tools/CodeExecutor.mjs.map +1 -1
package/dist/esm/tools/ProgrammaticToolCalling.mjs +430 -0
package/dist/esm/tools/ProgrammaticToolCalling.mjs.map +1 -0
package/dist/esm/tools/ToolNode.mjs +53 -15
package/dist/esm/tools/ToolNode.mjs.map +1 -1
package/dist/esm/tools/ToolSearchRegex.mjs +448 -0
package/dist/esm/tools/ToolSearchRegex.mjs.map +1 -0
package/dist/esm/tools/search/schema.mjs +7 -9
package/dist/esm/tools/search/schema.mjs.map +1 -1
package/dist/esm/utils/run.mjs +5 -1
package/dist/esm/utils/run.mjs.map +1 -1
package/dist/types/agents/AgentContext.d.ts +72 -5
package/dist/types/common/enum.d.ts +2 -0
package/dist/types/graphs/Graph.d.ts +3 -2
package/dist/types/index.d.ts +2 -0
package/dist/types/llm/bedrock/index.d.ts +31 -4
package/dist/types/messages/cache.d.ts +23 -8
package/dist/types/messages/index.d.ts +1 -0
package/dist/types/messages/tools.d.ts +17 -0
package/dist/types/test/mockTools.d.ts +28 -0
package/dist/types/tools/ProgrammaticToolCalling.d.ts +91 -0
package/dist/types/tools/ToolNode.d.ts +10 -2
package/dist/types/tools/ToolSearchRegex.d.ts +80 -0
package/dist/types/types/graph.d.ts +14 -1
package/dist/types/types/tools.d.ts +138 -0
package/package.json +7 -8
package/src/agents/AgentContext.ts +278 -27
package/src/agents/__tests__/AgentContext.test.ts +805 -0
package/src/common/enum.ts +2 -0
package/src/events.ts +5 -12
package/src/graphs/Graph.ts +57 -19
package/src/index.ts +2 -0
package/src/instrumentation.ts +1 -4
package/src/llm/bedrock/__tests__/bedrock-caching.test.ts +473 -0
package/src/llm/bedrock/index.ts +149 -12
package/src/messages/__tests__/tools.test.ts +473 -0
package/src/messages/cache.ts +163 -61
package/src/messages/index.ts +1 -0
package/src/messages/tools.ts +99 -0
package/src/run.ts +0 -9
package/src/scripts/code_exec_ptc.ts +334 -0
package/src/scripts/image.ts +178 -0
package/src/scripts/programmatic_exec.ts +396 -0
package/src/scripts/programmatic_exec_agent.ts +231 -0
package/src/scripts/test-tools-before-handoff.ts +5 -1
package/src/scripts/tool_search_regex.ts +162 -0
package/src/scripts/tools.ts +4 -1
package/src/specs/thinking-prune.test.ts +52 -118
package/src/test/mockTools.ts +366 -0
package/src/tools/CodeExecutor.ts +4 -0
package/src/tools/ProgrammaticToolCalling.ts +558 -0
package/src/tools/ToolNode.ts +59 -18
package/src/tools/ToolSearchRegex.ts +535 -0
package/src/tools/__tests__/ProgrammaticToolCalling.integration.test.ts +318 -0
package/src/tools/__tests__/ProgrammaticToolCalling.test.ts +853 -0
package/src/tools/__tests__/ToolSearchRegex.integration.test.ts +161 -0
package/src/tools/__tests__/ToolSearchRegex.test.ts +232 -0
package/src/tools/search/jina-reranker.test.ts +16 -16
package/src/tools/search/schema.ts +7 -9
package/src/types/graph.ts +14 -1
package/src/types/tools.ts +166 -0
package/src/utils/run.ts +5 -1
package/src/tools/search/direct-url.test.ts +0 -530

package/dist/types/types/tools.d.ts CHANGED Viewed

@@ -25,6 +25,8 @@ export type ToolNodeOptions = {
     loadRuntimeTools?: ToolRefGenerator;
     toolCallStepIds?: Map<string, string>;
     errorHandler?: (data: ToolErrorData, metadata?: Record<string, unknown>) => Promise<void>;
+    /** Tool registry for lazy computation of programmatic tools and tool search */
+    toolRegistry?: LCToolRegistry;
 };
 export type ToolNodeConstructorParams = ToolRefs & ToolNodeOptions;
 export type ToolEndEvent = {
@@ -59,3 +61,139 @@ export type ExecuteResult = {
     stderr: string;
     files?: FileRefs;
 };
+/** JSON Schema type definition for tool parameters */
+export type JsonSchemaType = {
+    type: 'string' | 'number' | 'integer' | 'float' | 'boolean' | 'array' | 'object';
+    enum?: string[];
+    items?: JsonSchemaType;
+    properties?: Record<string, JsonSchemaType>;
+    required?: string[];
+    description?: string;
+    additionalProperties?: boolean | JsonSchemaType;
+};
+/**
+ * Specifies which contexts can invoke a tool (inspired by Anthropic's allowed_callers)
+ * - 'direct': Only callable directly by the LLM (default if omitted)
+ * - 'code_execution': Only callable from within programmatic code execution
+ */
+export type AllowedCaller = 'direct' | 'code_execution';
+/** Tool definition with optional deferred loading and caller restrictions */
+export type LCTool = {
+    name: string;
+    description?: string;
+    parameters?: JsonSchemaType;
+    /** When true, tool is not loaded into context initially (for tool search) */
+    defer_loading?: boolean;
+    /**
+     * Which contexts can invoke this tool.
+     * Default: ['direct'] (only callable directly by LLM)
+     * Options: 'direct', 'code_execution'
+     */
+    allowed_callers?: AllowedCaller[];
+};
+/** Map of tool names to tool definitions */
+export type LCToolRegistry = Map<string, LCTool>;
+export type ProgrammaticCache = {
+    toolMap: ToolMap;
+    toolDefs: LCTool[];
+};
+/** Parameters for creating a Tool Search Regex tool */
+export type ToolSearchRegexParams = {
+    apiKey?: string;
+    toolRegistry?: LCToolRegistry;
+    onlyDeferred?: boolean;
+    baseUrl?: string;
+    [key: string]: unknown;
+};
+/** Simplified tool metadata for search purposes */
+export type ToolMetadata = {
+    name: string;
+    description: string;
+    parameters?: JsonSchemaType;
+};
+/** Individual search result for a matching tool */
+export type ToolSearchResult = {
+    tool_name: string;
+    match_score: number;
+    matched_field: string;
+    snippet: string;
+};
+/** Response from the tool search operation */
+export type ToolSearchResponse = {
+    tool_references: ToolSearchResult[];
+    total_tools_searched: number;
+    pattern_used: string;
+};
+/** Artifact returned alongside the formatted search results */
+export type ToolSearchArtifact = {
+    tool_references: ToolSearchResult[];
+    metadata: {
+        total_searched: number;
+        pattern: string;
+        error?: string;
+    };
+};
+/**
+ * Tool call requested by the Code API during programmatic execution
+ */
+export type PTCToolCall = {
+    /** Unique ID like "call_001" */
+    id: string;
+    /** Tool name */
+    name: string;
+    /** Input parameters */
+    input: Record<string, any>;
+};
+/**
+ * Tool result sent back to the Code API
+ */
+export type PTCToolResult = {
+    /** Matches PTCToolCall.id */
+    call_id: string;
+    /** Tool execution result (any JSON-serializable value) */
+    result: any;
+    /** Whether tool execution failed */
+    is_error: boolean;
+    /** Error details if is_error=true */
+    error_message?: string;
+};
+/**
+ * Response from the Code API for programmatic execution
+ */
+export type ProgrammaticExecutionResponse = {
+    status: 'tool_call_required' | 'completed' | 'error' | unknown;
+    session_id?: string;
+    /** Present when status='tool_call_required' */
+    continuation_token?: string;
+    tool_calls?: PTCToolCall[];
+    /** Present when status='completed' */
+    stdout?: string;
+    stderr?: string;
+    files?: FileRefs;
+    /** Present when status='error' */
+    error?: string;
+};
+/**
+ * Artifact returned by the PTC tool
+ */
+export type ProgrammaticExecutionArtifact = {
+    session_id?: string;
+    files?: FileRefs;
+};
+/**
+ * Initialization parameters for the PTC tool
+ */
+export type ProgrammaticToolCallingParams = {
+    /** Code API key (or use CODE_API_KEY env var) */
+    apiKey?: string;
+    /** Code API base URL (or use CODE_BASEURL env var) */
+    baseUrl?: string;
+    /** Safety limit for round-trips (default: 20) */
+    maxRoundTrips?: number;
+    /** HTTP proxy URL */
+    proxy?: string;
+    /** Enable debug logging (or set PTC_DEBUG=true env var) */
+    debug?: boolean;
+    /** Environment variable key for API key */
+    [key: string]: unknown;
+};

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "illuma-agents",
-  "version": "1.0.10",
+  "version": "1.0.12",
   "main": "./dist/cjs/main.cjs",
   "module": "./dist/esm/main.mjs",
   "types": "./dist/types/index.d.ts",
@@ -15,12 +15,6 @@
   "description": "Illuma AI Agents Library",
   "author": "Varun Muppidi",
   "license": "MIT",
-  "licenses": [
-    {
-      "type": "MIT",
-      "url": "https://github.com/danny-avila/agents/blob/main/LICENSE"
-    }
-  ],
   "packageManager": "npm@10.5.2",
   "engines": {
     "node": ">=14.0.0"
@@ -34,7 +28,7 @@
   "scripts": {
     "prepare": "node husky-setup.js",
     "prepublishOnly": "npm run build",
-    "build": "set NODE_ENV=production && rollup -c && tsc -p tsconfig.build.json",
+    "build": "cross-env NODE_ENV=production rollup -c && tsc -p tsconfig.build.json",
     "build:dev": "rollup -c",
     "start": "node dist/esm/main.js",
     "clean": "node ./config/clean.js",
@@ -53,6 +47,10 @@
     "memory": "node -r dotenv/config --loader ./tsconfig-paths-bootstrap.mjs --experimental-specifier-resolution=node ./src/scripts/memory.ts --provider 'openAI' --name 'Jo' --location 'New York, NY'",
     "tool": "node --trace-warnings -r dotenv/config --loader ./tsconfig-paths-bootstrap.mjs --experimental-specifier-resolution=node ./src/scripts/tools.ts --provider 'openrouter' --name 'Jo' --location 'New York, NY'",
     "search": "node -r dotenv/config --loader ./tsconfig-paths-bootstrap.mjs --experimental-specifier-resolution=node ./src/scripts/search.ts --provider 'bedrock' --name 'Jo' --location 'New York, NY'",
+    "tool_search_regex": "node -r dotenv/config --loader ./tsconfig-paths-bootstrap.mjs --experimental-specifier-resolution=node ./src/scripts/tool_search_regex.ts",
+    "programmatic_exec": "node -r dotenv/config --loader ./tsconfig-paths-bootstrap.mjs --experimental-specifier-resolution=node ./src/scripts/programmatic_exec.ts",
+    "code_exec_ptc": "node -r dotenv/config --loader ./tsconfig-paths-bootstrap.mjs --experimental-specifier-resolution=node ./src/scripts/code_exec_ptc.ts --provider 'openAI' --name 'Jo' --location 'New York, NY'",
+    "programmatic_exec_agent": "node -r dotenv/config --loader ./tsconfig-paths-bootstrap.mjs --experimental-specifier-resolution=node ./src/scripts/programmatic_exec_agent.ts --provider 'openAI' --name 'Jo' --location 'New York, NY'",
     "ant_web_search": "node -r dotenv/config --loader ./tsconfig-paths-bootstrap.mjs --experimental-specifier-resolution=node ./src/scripts/ant_web_search.ts --name 'Jo' --location 'New York, NY'",
     "ant_web_search_edge_case": "node -r dotenv/config --loader ./tsconfig-paths-bootstrap.mjs --experimental-specifier-resolution=node ./src/scripts/ant_web_search_edge_case.ts --name 'Jo' --location 'New York, NY'",
     "ant_web_search_error_edge_case": "node -r dotenv/config --loader ./tsconfig-paths-bootstrap.mjs --experimental-specifier-resolution=node ./src/scripts/ant_web_search_error_edge_case.ts --name 'Jo' --location 'New York, NY'",
@@ -140,6 +138,7 @@
     "@types/yargs-parser": "^21.0.3",
     "@typescript-eslint/eslint-plugin": "^8.24.0",
     "@typescript-eslint/parser": "^8.24.0",
+    "cross-env": "^10.1.0",
     "eslint": "^9.39.1",
     "eslint-import-resolver-typescript": "^3.7.0",
     "eslint-plugin-import": "^2.31.0",

package/src/agents/AgentContext.ts CHANGED Viewed

@@ -32,12 +32,14 @@ export class AgentContext {
       tools,
       toolMap,
       toolEnd,
+      toolRegistry,
       instructions,
       additional_instructions,
       streamBuffer,
       maxContextTokens,
       reasoningKey,
       useLegacyContent,
+      dynamicContext,
     } = agentConfig;
     const agentContext = new AgentContext({
@@ -48,6 +50,7 @@ export class AgentContext {
       streamBuffer,
       tools,
       toolMap,
+      toolRegistry,
       instructions,
       additionalInstructions: additional_instructions,
       reasoningKey,
@@ -55,15 +58,21 @@ export class AgentContext {
       instructionTokens: 0,
       tokenCounter,
       useLegacyContent,
+      dynamicContext,
     });
     if (tokenCounter) {
+      // Initialize system runnable BEFORE async tool token calculation
+      // This ensures system message tokens are in instructionTokens before
+      // updateTokenMapWithInstructions is called
+      agentContext.initializeSystemRunnable();
       const tokenMap = indexTokenCountMap || {};
       agentContext.indexTokenCountMap = tokenMap;
       agentContext.tokenCalculationPromise = agentContext
         .calculateInstructionTokens(tokenCounter)
         .then(() => {
-          // Update token map with instruction tokens
+          // Update token map with instruction tokens (includes system + tool tokens)
           agentContext.updateTokenMapWithInstructions(tokenMap);
         })
         .catch((err) => {
@@ -102,10 +111,23 @@ export class AgentContext {
   tools?: t.GraphTools;
   /** Tool map for this agent */
   toolMap?: t.ToolMap;
+  /**
+   * Tool definitions registry (includes deferred and programmatic tool metadata).
+   * Used for tool search and programmatic tool calling.
+   */
+  toolRegistry?: t.LCToolRegistry;
+  /** Set of tool names discovered via tool search (to be loaded) */
+  discoveredToolNames: Set<string> = new Set();
   /** Instructions for this agent */
   instructions?: string;
   /** Additional instructions for this agent */
   additionalInstructions?: string;
+  /**
+   * Dynamic context that changes per-request (e.g., current time, user info).
+   * This is NOT included in the system message to preserve cache.
+   * Instead, it's injected as a user message at the start of the conversation.
+   */
+  dynamicContext?: string;
   /** Reasoning key for this agent */
   reasoningKey: 'reasoning_content' | 'reasoning' = 'reasoning_content';
   /** Last token for reasoning detection */
@@ -117,12 +139,16 @@ export class AgentContext {
     ContentTypes.TEXT;
   /** Whether tools should end the workflow */
   toolEnd: boolean = false;
-  /** System runnable for this agent */
-  systemRunnable?: Runnable<
+  /** Cached system runnable (created lazily) */
+  private cachedSystemRunnable?: Runnable<
     BaseMessage[],
     (BaseMessage | SystemMessage)[],
     RunnableConfig<Record<string, unknown>>
   >;
+  /** Whether system runnable needs rebuild (set when discovered tools change) */
+  private systemRunnableStale: boolean = true;
+  /** Cached system message token count (separate from tool tokens) */
+  private systemMessageTokens: number = 0;
   /** Promise for token calculation initialization */
   tokenCalculationPromise?: Promise<void>;
   /** Format content blocks as strings (for legacy compatibility) */
@@ -137,8 +163,10 @@ export class AgentContext {
     tokenCounter,
     tools,
     toolMap,
+    toolRegistry,
     instructions,
     additionalInstructions,
+    dynamicContext,
     reasoningKey,
     toolEnd,
     instructionTokens,
@@ -152,8 +180,10 @@ export class AgentContext {
     tokenCounter?: t.TokenCounter;
     tools?: t.GraphTools;
     toolMap?: t.ToolMap;
+    toolRegistry?: t.LCToolRegistry;
     instructions?: string;
     additionalInstructions?: string;
+    dynamicContext?: string;
     reasoningKey?: 'reasoning_content' | 'reasoning';
     toolEnd?: boolean;
     instructionTokens?: number;
@@ -167,8 +197,10 @@ export class AgentContext {
     this.tokenCounter = tokenCounter;
     this.tools = tools;
     this.toolMap = toolMap;
+    this.toolRegistry = toolRegistry;
     this.instructions = instructions;
     this.additionalInstructions = additionalInstructions;
+    this.dynamicContext = dynamicContext;
     if (reasoningKey) {
       this.reasoningKey = reasoningKey;
     }
@@ -180,39 +212,145 @@ export class AgentContext {
     }
     this.useLegacyContent = useLegacyContent ?? false;
+  }
-    this.systemRunnable = this.createSystemRunnable();
+  /**
+   * Builds instructions text for tools that are ONLY callable via programmatic code execution.
+   * These tools cannot be called directly by the LLM but are available through the
+   * run_tools_with_code tool.
+   *
+   * Includes:
+   * - Code_execution-only tools that are NOT deferred
+   * - Code_execution-only tools that ARE deferred but have been discovered via tool search
+   */
+  private buildProgrammaticOnlyToolsInstructions(): string {
+    if (!this.toolRegistry) return '';
+    const programmaticOnlyTools: t.LCTool[] = [];
+    for (const [name, toolDef] of this.toolRegistry) {
+      const allowedCallers = toolDef.allowed_callers ?? ['direct'];
+      const isCodeExecutionOnly =
+        allowedCallers.includes('code_execution') &&
+        !allowedCallers.includes('direct');
+      if (!isCodeExecutionOnly) continue;
+      // Include if: not deferred OR deferred but discovered
+      const isDeferred = toolDef.defer_loading === true;
+      const isDiscovered = this.discoveredToolNames.has(name);
+      if (!isDeferred || isDiscovered) {
+        programmaticOnlyTools.push(toolDef);
+      }
+    }
+    if (programmaticOnlyTools.length === 0) return '';
+    const toolDescriptions = programmaticOnlyTools
+      .map((tool) => {
+        let desc = `- **${tool.name}**`;
+        if (tool.description != null && tool.description !== '') {
+          desc += `: ${tool.description}`;
+        }
+        if (tool.parameters) {
+          desc += `\n  Parameters: ${JSON.stringify(tool.parameters, null, 2).replace(/\n/g, '\n  ')}`;
+        }
+        return desc;
+      })
+      .join('\n\n');
+    return (
+      '\n\n## Programmatic-Only Tools\n\n' +
+      'The following tools are available exclusively through the `run_tools_with_code` tool. ' +
+      'You cannot call these tools directly; instead, use `run_tools_with_code` with Python code that invokes them.\n\n' +
+      toolDescriptions
+    );
   }
   /**
-   * Create system runnable from instructions and calculate tokens if tokenCounter is available
+   * Gets the system runnable, creating it lazily if needed.
+   * Includes instructions, additional instructions, and programmatic-only tools documentation.
+   * Only rebuilds when marked stale (via markToolsAsDiscovered).
    */
-  private createSystemRunnable():
+  get systemRunnable():
     | Runnable<
         BaseMessage[],
         (BaseMessage | SystemMessage)[],
         RunnableConfig<Record<string, unknown>>
       >
     | undefined {
-    let finalInstructions: string | BaseMessageFields | undefined =
-      this.instructions;
+    // Return cached if not stale
+    if (!this.systemRunnableStale && this.cachedSystemRunnable !== undefined) {
+      return this.cachedSystemRunnable;
+    }
+    // Stale or first access - rebuild
+    const instructionsString = this.buildInstructionsString();
+    this.cachedSystemRunnable = this.buildSystemRunnable(instructionsString);
+    this.systemRunnableStale = false;
+    return this.cachedSystemRunnable;
+  }
+  /**
+   * Explicitly initializes the system runnable.
+   * Call this before async token calculation to ensure system message tokens are counted first.
+   */
+  initializeSystemRunnable(): void {
+    if (this.systemRunnableStale || this.cachedSystemRunnable === undefined) {
+      const instructionsString = this.buildInstructionsString();
+      this.cachedSystemRunnable = this.buildSystemRunnable(instructionsString);
+      this.systemRunnableStale = false;
+    }
+  }
+  /**
+   * Builds the raw instructions string (without creating SystemMessage).
+   */
+  private buildInstructionsString(): string {
+    let result = this.instructions ?? '';
     if (
       this.additionalInstructions != null &&
       this.additionalInstructions !== ''
     ) {
-      finalInstructions =
-        finalInstructions != null && finalInstructions
-          ? `${finalInstructions}\n\n${this.additionalInstructions}`
-          : this.additionalInstructions;
+      result = result
+        ? `${result}\n\n${this.additionalInstructions}`
+        : this.additionalInstructions;
     }
-    // Handle Anthropic prompt caching
-    if (
-      finalInstructions != null &&
-      finalInstructions !== '' &&
-      this.provider === Providers.ANTHROPIC
-    ) {
+    const programmaticToolsDoc = this.buildProgrammaticOnlyToolsInstructions();
+    if (programmaticToolsDoc) {
+      result = result
+        ? `${result}${programmaticToolsDoc}`
+        : programmaticToolsDoc;
+    }
+    return result;
+  }
+  /**
+   * Build system runnable from pre-built instructions string.
+   * Only called when content has actually changed.
+   */
+  private buildSystemRunnable(
+    instructionsString: string
+  ):
+    | Runnable<
+        BaseMessage[],
+        (BaseMessage | SystemMessage)[],
+        RunnableConfig<Record<string, unknown>>
+      >
+    | undefined {
+    if (!instructionsString) {
+      // Remove previous tokens if we had a system message before
+      this.instructionTokens -= this.systemMessageTokens;
+      this.systemMessageTokens = 0;
+      return undefined;
+    }
+    let finalInstructions: string | BaseMessageFields = instructionsString;
+    // Handle Anthropic prompt caching (Direct API)
+    if (this.provider === Providers.ANTHROPIC) {
       const anthropicOptions = this.clientOptions as
         | t.AnthropicClientOptions
         | undefined;
@@ -228,7 +366,7 @@ export class AgentContext {
           content: [
             {
               type: 'text',
-              text: this.instructions,
+              text: instructionsString,
               cache_control: { type: 'ephemeral' },
             },
           ],
@@ -236,19 +374,47 @@ export class AgentContext {
       }
     }
-    if (finalInstructions != null && finalInstructions !== '') {
-      const systemMessage = new SystemMessage(finalInstructions);
+    // Handle Bedrock prompt caching (Converse API)
+    // Adds cachePoint block after text content for system message caching
+    // NOTE: Both Claude and Nova models support cachePoint in system and messages
+    // (Nova does NOT support cachePoint in tools - that check is in bedrock/index.ts)
+    if (this.provider === Providers.BEDROCK) {
+      const bedrockOptions = this.clientOptions as
+        | t.BedrockAnthropicInput
+        | undefined;
+      const modelId = bedrockOptions?.model?.toLowerCase() ?? '';
+      const supportsCaching = modelId.includes('claude') || modelId.includes('anthropic') || modelId.includes('nova');
-      if (this.tokenCounter) {
-        this.instructionTokens += this.tokenCounter(systemMessage);
+      if (bedrockOptions?.promptCache === true && supportsCaching) {
+        // Always log system cache structure
+        console.log(`[Cache] 📝 System | chars=${instructionsString.length} | tokens=${this.systemMessageTokens} | model=${modelId}`);
+        finalInstructions = {
+          content: [
+            {
+              type: 'text',
+              text: instructionsString,
+            },
+            {
+              cachePoint: { type: 'default' },
+            },
+          ],
+        };
       }
+    }
+    const systemMessage = new SystemMessage(finalInstructions);
-      return RunnableLambda.from((messages: BaseMessage[]) => {
-        return [systemMessage, ...messages];
-      }).withConfig({ runName: 'prompt' });
+    // Update token counts (subtract old, add new)
+    if (this.tokenCounter) {
+      this.instructionTokens -= this.systemMessageTokens;
+      this.systemMessageTokens = this.tokenCounter(systemMessage);
+      this.instructionTokens += this.systemMessageTokens;
     }
-    return undefined;
+    return RunnableLambda.from((messages: BaseMessage[]) => {
+      return [systemMessage, ...messages];
+    }).withConfig({ runName: 'prompt' });
   }
   /**
@@ -256,6 +422,9 @@ export class AgentContext {
    */
   reset(): void {
     this.instructionTokens = 0;
+    this.systemMessageTokens = 0;
+    this.cachedSystemRunnable = undefined;
+    this.systemRunnableStale = true;
     this.lastToken = undefined;
     this.indexTokenCountMap = {};
     this.currentUsage = undefined;
@@ -263,6 +432,7 @@ export class AgentContext {
     this.lastStreamCall = undefined;
     this.tokenTypeSwitch = undefined;
     this.currentTokenType = ContentTypes.TEXT;
+    this.discoveredToolNames.clear();
   }
   /**
@@ -320,4 +490,85 @@ export class AgentContext {
     // Add tool tokens to existing instruction tokens (which may already include system message tokens)
     this.instructionTokens += toolTokens;
   }
+  /**
+   * Gets the tool registry for deferred tools (for tool search).
+   * @param onlyDeferred If true, only returns tools with defer_loading=true
+   * @returns LCToolRegistry with tool definitions
+   */
+  getDeferredToolRegistry(onlyDeferred: boolean = true): t.LCToolRegistry {
+    const registry: t.LCToolRegistry = new Map();
+    if (!this.toolRegistry) {
+      return registry;
+    }
+    for (const [name, toolDef] of this.toolRegistry) {
+      if (!onlyDeferred || toolDef.defer_loading === true) {
+        registry.set(name, toolDef);
+      }
+    }
+    return registry;
+  }
+  /**
+   * Marks tools as discovered via tool search.
+   * Discovered tools will be included in the next model binding.
+   * Only marks system runnable stale if NEW tools were actually added.
+   * @param toolNames - Array of discovered tool names
+   * @returns true if any new tools were discovered
+   */
+  markToolsAsDiscovered(toolNames: string[]): boolean {
+    let hasNewDiscoveries = false;
+    for (const name of toolNames) {
+      if (!this.discoveredToolNames.has(name)) {
+        this.discoveredToolNames.add(name);
+        hasNewDiscoveries = true;
+      }
+    }
+    if (hasNewDiscoveries) {
+      this.systemRunnableStale = true;
+    }
+    return hasNewDiscoveries;
+  }
+  /**
+   * Gets tools that should be bound to the LLM.
+   * Includes:
+   * 1. Non-deferred tools with allowed_callers: ['direct']
+   * 2. Discovered tools (from tool search)
+   * @returns Array of tools to bind to model
+   */
+  getToolsForBinding(): t.GraphTools | undefined {
+    if (!this.tools || !this.toolRegistry) {
+      return this.tools;
+    }
+    const toolsToInclude = this.tools.filter((tool) => {
+      if (!('name' in tool)) {
+        return true; // No name, include by default
+      }
+      const toolDef = this.toolRegistry?.get(tool.name);
+      if (!toolDef) {
+        return true; // Not in registry, include by default
+      }
+      // Check if discovered (overrides defer_loading)
+      if (this.discoveredToolNames.has(tool.name)) {
+        // Discovered tools must still have allowed_callers: ['direct']
+        const allowedCallers = toolDef.allowed_callers ?? ['direct'];
+        return allowedCallers.includes('direct');
+      }
+      // Not discovered: must be direct-callable AND not deferred
+      const allowedCallers = toolDef.allowed_callers ?? ['direct'];
+      return (
+        allowedCallers.includes('direct') && toolDef.defer_loading !== true
+      );
+    });
+    return toolsToInclude;
+  }
 }