npm - @librechat/agents - Versions diffs - 3.1.81 → 3.1.83 - Mend

@librechat/agents 3.1.81 → 3.1.83

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

package/dist/cjs/agents/AgentContext.cjs +125 -36
package/dist/cjs/agents/AgentContext.cjs.map +1 -1
package/dist/cjs/graphs/Graph.cjs +13 -0
package/dist/cjs/graphs/Graph.cjs.map +1 -1
package/dist/cjs/llm/openai/index.cjs +50 -13
package/dist/cjs/llm/openai/index.cjs.map +1 -1
package/dist/cjs/llm/openrouter/index.cjs +17 -7
package/dist/cjs/llm/openrouter/index.cjs.map +1 -1
package/dist/cjs/llm/openrouter/toolCache.cjs +55 -0
package/dist/cjs/llm/openrouter/toolCache.cjs.map +1 -0
package/dist/cjs/main.cjs +1 -0
package/dist/cjs/main.cjs.map +1 -1
package/dist/cjs/messages/cache.cjs +96 -0
package/dist/cjs/messages/cache.cjs.map +1 -1
package/dist/cjs/tools/ToolNode.cjs +70 -12
package/dist/cjs/tools/ToolNode.cjs.map +1 -1
package/dist/esm/agents/AgentContext.mjs +125 -36
package/dist/esm/agents/AgentContext.mjs.map +1 -1
package/dist/esm/graphs/Graph.mjs +13 -0
package/dist/esm/graphs/Graph.mjs.map +1 -1
package/dist/esm/llm/openai/index.mjs +50 -14
package/dist/esm/llm/openai/index.mjs.map +1 -1
package/dist/esm/llm/openrouter/index.mjs +17 -7
package/dist/esm/llm/openrouter/index.mjs.map +1 -1
package/dist/esm/llm/openrouter/toolCache.mjs +53 -0
package/dist/esm/llm/openrouter/toolCache.mjs.map +1 -0
package/dist/esm/main.mjs +1 -1
package/dist/esm/messages/cache.mjs +96 -1
package/dist/esm/messages/cache.mjs.map +1 -1
package/dist/esm/tools/ToolNode.mjs +70 -12
package/dist/esm/tools/ToolNode.mjs.map +1 -1
package/dist/types/agents/AgentContext.d.ts +8 -1
package/dist/types/agents/__tests__/promptCacheLiveHelpers.d.ts +6 -2
package/dist/types/llm/openrouter/index.d.ts +1 -0
package/dist/types/llm/openrouter/toolCache.d.ts +2 -0
package/dist/types/messages/cache.d.ts +1 -0
package/dist/types/tools/ToolNode.d.ts +5 -0
package/dist/types/types/run.d.ts +2 -0
package/package.json +2 -1
package/src/agents/AgentContext.ts +191 -40
package/src/agents/__tests__/AgentContext.anthropic.live.test.ts +0 -4
package/src/agents/__tests__/AgentContext.openrouter.live.test.ts +128 -0
package/src/agents/__tests__/AgentContext.test.ts +355 -18
package/src/agents/__tests__/promptCacheLiveHelpers.ts +8 -2
package/src/graphs/Graph.ts +24 -0
package/src/llm/custom-chat-models.smoke.test.ts +76 -0
package/src/llm/openai/deepseek.test.ts +14 -1
package/src/llm/openai/index.ts +38 -12
package/src/llm/openrouter/index.ts +22 -7
package/src/llm/openrouter/reasoning.test.ts +33 -0
package/src/llm/openrouter/toolCache.test.ts +83 -0
package/src/llm/openrouter/toolCache.ts +89 -0
package/src/messages/cache.test.ts +127 -0
package/src/messages/cache.ts +143 -0
package/src/scripts/openrouter_prompt_cache_live.ts +310 -0
package/src/specs/agent-handoffs.live.test.ts +140 -0
package/src/specs/agent-handoffs.test.ts +266 -2
package/src/specs/openrouter.simple.test.ts +15 -8
package/src/tools/ToolNode.ts +92 -13
package/src/types/run.ts +2 -0

package/src/scripts/openrouter_prompt_cache_live.ts ADDED Viewed

@@ -0,0 +1,310 @@
+import { config as loadEnv } from 'dotenv';
+import { HumanMessage, SystemMessage } from '@langchain/core/messages';
+import type { AIMessage, BaseMessage } from '@langchain/core/messages';
+import type { ClientOptions } from '@langchain/openai';
+import type { GraphTools } from '@/types';
+import type { ChatOpenRouterInput } from '@/llm/openrouter';
+import { addCacheControl } from '@/messages/cache';
+import { ChatOpenRouter } from '@/llm/openrouter';
+import { partitionAndMarkOpenRouterToolCache } from '@/llm/openrouter/toolCache';
+loadEnv({ path: process.env.DOTENV_CONFIG_PATH ?? '.env' });
+type ModelCase = {
+  label: string;
+  model: string;
+};
+type CacheUsage = {
+  cacheCreation: number;
+  cacheRead: number;
+  inputTokens: number;
+  outputTokens: number;
+  totalTokens: number;
+};
+type OpenRouterTool = {
+  type: 'function';
+  function: {
+    name: string;
+  };
+  cache_control?: { type: 'ephemeral' };
+};
+const DEFAULT_MODEL_CASES: ModelCase[] = [
+  { label: 'Anthropic Claude', model: 'anthropic/claude-haiku-4.5' },
+  { label: 'Google Gemini', model: 'google/gemini-2.5-flash' },
+  { label: 'Alibaba Qwen', model: 'qwen/qwen3-coder-flash' },
+];
+const apiKey = process.env.OPENROUTER_API_KEY;
+const baseURL =
+  process.env.OPENROUTER_BASE_URL ?? 'https://openrouter.ai/api/v1';
+const attempts = Number(process.env.OPENROUTER_PROMPT_CACHE_ATTEMPTS ?? '3');
+const modelCases = (
+  process.env.OPENROUTER_PROMPT_CACHE_MODELS?.split(',').map((model) => ({
+    label: 'Custom',
+    model: model.trim(),
+  })) ?? DEFAULT_MODEL_CASES
+).filter(({ model }) => model.length > 0);
+if (apiKey == null || apiKey.length === 0) {
+  throw new Error('OPENROUTER_API_KEY is required');
+}
+function buildStableReference(): string {
+  const paragraph =
+    'LibreChat OpenRouter prompt caching live validation reference. This paragraph is deliberately stable across repeated requests so OpenRouter can route the conversation to the same provider endpoint and reuse cached prompt tokens. It describes cache breakpoints, provider sticky routing, cache write metrics, cache read metrics, model-specific minimum prompt sizes, and the expected behavior of explicit per-message cache_control markers for supported OpenRouter providers.';
+  return Array.from({ length: 90 }, (_, index) => {
+    const section = index + 1;
+    return `Section ${section}. ${paragraph} Verification key ${section}: OPENROUTER_PROMPT_CACHE_LIVE_REFERENCE_${section}.`;
+  }).join('\n');
+}
+function buildStableToolDescription(): string {
+  const paragraph =
+    'Static OpenRouter tool contract for prompt cache validation. This tool description is stable across requests and intentionally verbose so provider-side prompt caching can write and then read a meaningful static tool-schema prefix while dynamic tools vary after the cache breakpoint.';
+  return Array.from({ length: 90 }, (_, index) => {
+    const section = index + 1;
+    return `Tool section ${section}. ${paragraph} Stable tool key ${section}: OPENROUTER_STATIC_TOOL_CACHE_REFERENCE_${section}.`;
+  }).join('\n');
+}
+function buildToolSet(attempt: number): GraphTools {
+  return [
+    {
+      type: 'function',
+      function: {
+        name: 'stable_reference_lookup',
+        description: buildStableToolDescription(),
+        parameters: {
+          type: 'object',
+          properties: {
+            query: {
+              type: 'string',
+              description: 'Stable lookup query.',
+            },
+          },
+          required: ['query'],
+          additionalProperties: false,
+        },
+      },
+    },
+    {
+      type: 'function',
+      function: {
+        name: `dynamic_runtime_tool_${attempt}`,
+        description: `Dynamic runtime tool ${attempt}; this varies between attempts and should sit after the cached static tool prefix.`,
+        parameters: {
+          type: 'object',
+          properties: {
+            value: {
+              type: 'string',
+            },
+          },
+          required: ['value'],
+          additionalProperties: false,
+        },
+      },
+    },
+  ] as GraphTools;
+}
+function buildMessages(model: string): BaseMessage[] {
+  const reference = buildStableReference();
+  const messages: BaseMessage[] = [
+    new SystemMessage(
+      'You are validating prompt caching. Answer with one concise sentence.'
+    ),
+    new HumanMessage(
+      [
+        `For model ${model}, reply with exactly this format: cache live check ok.`,
+        'Use the stable reference below only to make this request large enough to cache.',
+        reference,
+      ].join('\n\n')
+    ),
+  ];
+  return addCacheControl<BaseMessage>(messages);
+}
+function getCacheUsage(message: AIMessage): CacheUsage {
+  const usage = message.usage_metadata;
+  const inputDetails = usage?.input_token_details;
+  return {
+    inputTokens: usage?.input_tokens ?? 0,
+    outputTokens: usage?.output_tokens ?? 0,
+    totalTokens: usage?.total_tokens ?? 0,
+    cacheRead: inputDetails?.cache_read ?? 0,
+    cacheCreation: inputDetails?.cache_creation ?? 0,
+  };
+}
+function hasCacheHit(usages: CacheUsage[]): boolean {
+  return usages.some(({ cacheRead }) => cacheRead > 0);
+}
+function hasCacheActivity(usages: CacheUsage[]): boolean {
+  return usages.some(
+    ({ cacheCreation, cacheRead }) => cacheCreation > 0 || cacheRead > 0
+  );
+}
+function log(message = ''): void {
+  process.stdout.write(`${message}\n`);
+}
+function logError(message: string): void {
+  process.stderr.write(`${message}\n`);
+}
+async function runCase({ label, model }: ModelCase): Promise<CacheUsage[]> {
+  const llmInput: ChatOpenRouterInput & { configuration: ClientOptions } = {
+    model,
+    apiKey,
+    maxTokens: 12,
+    temperature: 0,
+    promptCache: true,
+    streamUsage: true,
+    configuration: {
+      baseURL,
+      defaultHeaders: {
+        'HTTP-Referer': 'https://librechat.ai',
+        'X-Title': 'LibreChat OpenRouter Prompt Cache Live Test',
+      },
+    },
+  };
+  const llm = new ChatOpenRouter(llmInput);
+  const messages = buildMessages(model);
+  const usages: CacheUsage[] = [];
+  log(`\n${label}: ${model}`);
+  for (let attempt = 1; attempt <= attempts; attempt++) {
+    const started = Date.now();
+    const response = (await llm.invoke(messages)) as AIMessage;
+    const usage = getCacheUsage(response);
+    usages.push(usage);
+    log(
+      [
+        `attempt=${attempt}`,
+        `ms=${Date.now() - started}`,
+        `input=${usage.inputTokens}`,
+        `output=${usage.outputTokens}`,
+        `write=${usage.cacheCreation}`,
+        `read=${usage.cacheRead}`,
+        `total=${usage.totalTokens}`,
+      ].join(' ')
+    );
+    if (hasCacheHit(usages)) {
+      return usages;
+    }
+  }
+  return usages;
+}
+async function runStaticToolCase(): Promise<CacheUsage[]> {
+  const model = 'anthropic/claude-haiku-4.5';
+  const usages: CacheUsage[] = [];
+  log(`\nStatic tools through OpenRouter: ${model}`);
+  for (let attempt = 1; attempt <= attempts; attempt++) {
+    const llmInput: ChatOpenRouterInput & { configuration: ClientOptions } = {
+      model,
+      apiKey,
+      maxTokens: 12,
+      temperature: 0,
+      promptCache: true,
+      streamUsage: true,
+      configuration: {
+        baseURL,
+        defaultHeaders: {
+          'HTTP-Referer': 'https://librechat.ai',
+          'X-Title': 'LibreChat OpenRouter Prompt Cache Live Test',
+        },
+      },
+    };
+    const llm = new ChatOpenRouter(llmInput);
+    const tools = partitionAndMarkOpenRouterToolCache(
+      buildToolSet(attempt),
+      (name) => name.startsWith('dynamic_runtime_tool_')
+    ) as OpenRouterTool[];
+    const markedTool = tools.find((entry) => entry.cache_control != null);
+    if (markedTool?.function.name !== 'stable_reference_lookup') {
+      throw new Error('Static tool cache marker was not applied as expected');
+    }
+    const modelWithTools = llm.bindTools(tools);
+    const started = Date.now();
+    const response = (await modelWithTools.invoke([
+      new SystemMessage('Reply with exactly: cache live check ok.'),
+      new HumanMessage(
+        `Attempt ${attempt}. Do not call tools; only answer with the requested text.`
+      ),
+    ])) as AIMessage;
+    const usage = getCacheUsage(response);
+    usages.push(usage);
+    log(
+      [
+        `attempt=${attempt}`,
+        `ms=${Date.now() - started}`,
+        `input=${usage.inputTokens}`,
+        `output=${usage.outputTokens}`,
+        `write=${usage.cacheCreation}`,
+        `read=${usage.cacheRead}`,
+        `total=${usage.totalTokens}`,
+      ].join(' ')
+    );
+    if (hasCacheHit(usages)) {
+      return usages;
+    }
+  }
+  return usages;
+}
+async function main(): Promise<void> {
+  const results: Array<ModelCase & { usages: CacheUsage[] }> = [];
+  for (const modelCase of modelCases) {
+    const usages = await runCase(modelCase);
+    results.push({ ...modelCase, usages });
+  }
+  const staticToolUsages = await runStaticToolCase();
+  results.push({
+    label: 'Static tools',
+    model: 'anthropic/claude-haiku-4.5',
+    usages: staticToolUsages,
+  });
+  const failures = results.filter(({ usages }) => {
+    return !hasCacheActivity(usages) || !hasCacheHit(usages);
+  });
+  log('\nSummary');
+  for (const { label, model, usages } of results) {
+    const writes = usages.map(({ cacheCreation }) => cacheCreation).join(',');
+    const reads = usages.map(({ cacheRead }) => cacheRead).join(',');
+    log(`${label} ${model}: writes=[${writes}] reads=[${reads}]`);
+  }
+  if (failures.length > 0) {
+    const failedModels = failures.map(({ model }) => model).join(', ');
+    throw new Error(`Prompt caching was not confirmed for: ${failedModels}`);
+  }
+}
+main().catch((error: Error) => {
+  logError(error.message);
+  process.exit(1);
+});

package/src/specs/agent-handoffs.live.test.ts ADDED Viewed

@@ -0,0 +1,140 @@
+// src/specs/agent-handoffs.live.test.ts
+/**
+ * Live handoff integration verification.
+ *
+ * Run with:
+ * RUN_HANDOFF_LIVE_TESTS=1 ANTHROPIC_API_KEY=... npm test -- agent-handoffs.live.test.ts --runInBand
+ */
+import { config as dotenvConfig } from 'dotenv';
+dotenvConfig();
+import { HumanMessage } from '@langchain/core/messages';
+import { describe, expect, it, jest } from '@jest/globals';
+import type { BaseMessage, ToolMessage } from '@langchain/core/messages';
+import type { RunnableConfig } from '@langchain/core/runnables';
+import type * as t from '@/types';
+import { Constants, Providers } from '@/common';
+import { Run } from '@/run';
+const shouldRunLive =
+  process.env.RUN_HANDOFF_LIVE_TESTS === '1' &&
+  process.env.ANTHROPIC_API_KEY != null &&
+  process.env.ANTHROPIC_API_KEY !== '';
+const describeIfLive = shouldRunLive ? describe : describe.skip;
+const modelName =
+  process.env.ANTHROPIC_HANDOFF_LIVE_MODEL ?? 'claude-sonnet-4-6';
+function createAnthropicAgent(
+  agentId: string,
+  instructions: string
+): t.AgentInputs {
+  return {
+    agentId,
+    provider: Providers.ANTHROPIC,
+    clientOptions: {
+      modelName,
+      apiKey: process.env.ANTHROPIC_API_KEY,
+      temperature: 0,
+      maxTokens: 256,
+      streaming: true,
+    },
+    instructions,
+    maxContextTokens: 8000,
+  };
+}
+function createStreamConfig(threadId: string): Partial<RunnableConfig> & {
+  version: 'v1' | 'v2';
+  streamMode: string;
+} {
+  return {
+    configurable: { thread_id: threadId },
+    streamMode: 'values',
+    version: 'v2',
+  };
+}
+function contentToText(message: BaseMessage): string {
+  if (typeof message.content === 'string') {
+    return message.content;
+  }
+  if (!Array.isArray(message.content)) {
+    return '';
+  }
+  return message.content
+    .map((part) => {
+      if (typeof part === 'string') {
+        return part;
+      }
+      if ('text' in part && typeof part.text === 'string') {
+        return part.text;
+      }
+      return '';
+    })
+    .join('');
+}
+describeIfLive('Agent handoffs live integration', () => {
+  jest.setTimeout(120_000);
+  it('routes through a real Anthropic handoff and preserves instructions', async () => {
+    const nonce = `live-handoff-${Date.now()}`;
+    const expectedReply = `${nonce}-specialist-confirmed`;
+    const handoffToolName = `${Constants.LC_TRANSFER_TO_}specialist`;
+    const agents: t.AgentInputs[] = [
+      createAnthropicAgent(
+        'router',
+        `You are a routing agent. For every user request, your only valid action is to call the handoff tool named ${handoffToolName}. Do not answer directly.
+When you call the handoff tool, include instructions telling the specialist to reply exactly with this marker and no extra words: ${expectedReply}`
+      ),
+      createAnthropicAgent(
+        'specialist',
+        'You are the specialist. When you receive handoff instructions with a marker, reply exactly with that marker and no extra words.'
+      ),
+    ];
+    const edges: t.GraphEdge[] = [
+      {
+        from: 'router',
+        to: 'specialist',
+        edgeType: 'handoff',
+        description: 'Transfer to the specialist for the final response',
+        prompt:
+          'Instructions for the specialist. Include any exact marker that must be returned.',
+        promptKey: 'instructions',
+      },
+    ];
+    const run = await Run.create({
+      runId: `${nonce}-run`,
+      graphConfig: { type: 'multi-agent', agents, edges },
+      returnContent: true,
+      skipCleanup: true,
+    });
+    await run.processStream(
+      {
+        messages: [
+          new HumanMessage(
+            `Please delegate this to the specialist. The final answer must be exactly: ${expectedReply}`
+          ),
+        ],
+      },
+      createStreamConfig(`${nonce}-thread`)
+    );
+    const messages = run.getRunMessages() ?? [];
+    const handoffMessage = messages.find(
+      (message): message is ToolMessage =>
+        message.getType() === 'tool' &&
+        (message as ToolMessage).name === handoffToolName
+    );
+    const finalText = messages
+      .filter((message) => message.getType() === 'ai')
+      .map(contentToText)
+      .join('\n');
+    expect(handoffMessage).toBeDefined();
+    expect(finalText).toContain(expectedReply);
+  });
+});