npm - @exulu/backend - Versions diffs - 1.55.0 → 1.56.0 - Mend

@exulu/backend 1.55.0 → 1.56.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/dist/index.cjs +297 -146
package/dist/index.d.cts +4 -3
package/dist/index.d.ts +4 -3
package/dist/index.js +297 -146
package/ee/agentic-retrieval/v3/classifier.ts +19 -5
package/ee/agentic-retrieval/v3/context-sampler.ts +10 -1
package/ee/agentic-retrieval/v3/index.ts +110 -28
package/ee/agentic-retrieval/v3/tools.ts +13 -5
package/ee/agentic-retrieval/v4/agent-loop.ts +208 -0
package/ee/agentic-retrieval/v4/context-sampler.ts +79 -0
package/ee/agentic-retrieval/v4/index.ts +690 -0
package/ee/agentic-retrieval/v4/types.ts +58 -0
package/package.json +1 -1

package/ee/agentic-retrieval/v3/classifier.ts CHANGED Viewed

@@ -20,11 +20,25 @@ export async function classifyQuery(
     .map((ctx) => {
       const sample = samples.find((s) => s.contextId === ctx.id);
       const fieldList = sample?.fields.join(", ") ?? "name, external_id";
-      const exampleStr =
-        sample?.exampleItems.length
-          ? `\n    Example records: ${JSON.stringify(sample.exampleItems.slice(0, 2))}`
-          : "";
-      return `  - ${ctx.id}: ${ctx.name}\n    Description: ${ctx.description}\n    Fields: ${fieldList}${exampleStr}`;
+      return `
+      <context>
+        <id>
+          ${ctx.id}
+        </id>
+        <name>
+          ${ctx.name}
+        </name>
+        <description>
+          ${ctx.description}
+        </description>
+        <fields>
+          ${fieldList}
+        </fields>
+        <example_items>
+          ${sample?.exampleItems.map((item) => JSON.stringify(item)).join("\n")}
+        </example_items>
+       </context>
+       `;
     })
     .join("\n\n");

package/ee/agentic-retrieval/v3/context-sampler.ts CHANGED Viewed

@@ -3,10 +3,19 @@ import { postgresClient } from "@SRC/postgres/client";
 import { applyAccessControl } from "@SRC/graphql/utilities/access-control";
 import { convertContextToTableDefinition } from "@SRC/graphql/utilities/convert-context-to-table-definition";
 import type { User } from "@EXULU_TYPES/models/user";
-import type { ContextSample } from "./types";
 const CACHE_TTL_MS = 60 * 60 * 1000; // 1 hour
+export interface ContextSample {
+  contextId: string;
+  contextName: string;
+  /** All field names available on items (standard + custom) */
+  fields: string[];
+  /** Up to 2 example item records */
+  exampleItems: Array<Record<string, any>>;
+  sampledAt: number;
+}
 /**
  * Pulls 1–2 example item records per context at agent initialization and caches
  * them in memory. These samples are injected into the classifier prompt so the

package/ee/agentic-retrieval/v3/index.ts CHANGED Viewed

@@ -6,13 +6,14 @@ import type { ExuluReranker } from "@SRC/exulu/reranker";
 import { ExuluTool } from "@SRC/exulu/tool";
 import type { User } from "@EXULU_TYPES/models/user";
 import { checkLicense } from "@EE/entitlements";
-import { ContextSampler } from "./context-sampler";
 import { classifyQuery } from "./classifier";
 import { createRetrievalTools, parseGlobalItemIds } from "./tools";
 import { STRATEGIES } from "./strategies";
 import { runAgentLoop } from "./agent-loop";
 import { TrajectoryLogger } from "./trajectory";
 import type { AgenticRetrievalOutput, QueryType } from "./types";
+import type { ExuluItem } from "@SRC/index";
+import { ContextSampler } from "./context-sampler";
 // Module-level sampler — shared across all tool instances so the cache is warm
 // across requests within the same process.
@@ -23,6 +24,7 @@ async function* executeV3({
   contexts,
   reranker,
   model,
+  toolVariablesConfig,
   user,
   role,
   customInstructions,
@@ -33,6 +35,7 @@ async function* executeV3({
   query: string;
   contexts: ExuluContext[];
   reranker?: ExuluReranker;
+  toolVariablesConfig?: Record<string, any>;
   model: LanguageModel;
   user?: User;
   role?: string;
@@ -72,22 +75,45 @@ async function* executeV3({
   // ── 4. Select strategy ────────────────────────────────────────────────────
   const strategy = STRATEGIES[classification.queryType];
+  const contextSpecificInstructions = activeContexts.map(ctx => {
+    const instructions = toolVariablesConfig?.[`${ctx.id}_|_instructions`] ?? "";
+    if (instructions) {
+      return `
+      <${ctx.id}>
+      ${instructions}
+      </${ctx.id}>
+    `;
+    } else {
+      return null;
+    }
+  }).filter(Boolean).join("\n");
   // Build context guidance: the classifier is a priority hint, not a hard filter.
   // All contexts remain available so the agent can fall back if suggested ones miss.
   const suggestedIds = classification.suggestedContextIds;
   const fallbackIds = activeContexts
     .filter((c) => !suggestedIds.includes(c.id))
     .map((c) => c.id);
-  const contextBase =
+  let contextBase =
     suggestedIds.length > 0
-      ? `Suggested priority contexts: [${suggestedIds.join(", ")}]. Also available: [${fallbackIds.join(", ")}]. Custom instructions may require searching additional or all contexts — follow them.`
+      ? `
+      Suggested priority contexts: [${suggestedIds.join(", ")}].
+      Also available: [${fallbackIds.join(", ")}].
+      Custom instructions may require searching additional or all contexts — follow them.`
       : `All contexts available: [${activeContexts.map((c) => c.id).join(", ")}].`;
   const preselectedNote = preselectedByContext?.size
     ? `\nSCOPE CONSTRAINT: Retrieval is scoped to preselected items/contexts. Per context: ${[...preselectedByContext.entries()].map(([ctx, ids]) => ids === null ? `${ctx} (full context)` : `${ctx} (${ids.length} item${ids.length === 1 ? "" : "s"})`).join(", ")}. All tools enforce this scope automatically. For full-context entries you may search freely; for item-restricted entries do NOT use search_items_by_name for discovery — go directly to search_content or save_search_results.`
     : "";
+  if (contextSpecificInstructions?.length) {
+    contextBase += `
+      Context specific instructions:
+      ${contextSpecificInstructions}
+      `;
+  }
   const contextGuidance = contextBase + preselectedNote;
   // ── 5. Initialize tools ───────────────────────────────────────────────────
@@ -95,6 +121,7 @@ async function* executeV3({
   const retrievalTools = createRetrievalTools({
     contexts: activeContexts,
+    toolVariablesConfig,
     user,
     role,
     updateVirtualFiles: (files) => bashToolkit.sandbox.writeFiles(files),
@@ -166,7 +193,8 @@ export function createAgenticRetrievalToolV3({
   user,
   role,
   model,
-  preselectedItemIds,
+  preselected,
+  memoryItems
 }: {
   contexts: ExuluContext[];
   rerankers: ExuluReranker[];
@@ -174,7 +202,8 @@ export function createAgenticRetrievalToolV3({
   role?: string;
   model?: LanguageModel;
   instructions?: string;
-  preselectedItemIds?: string[];
+  preselected?: string[];
+  memoryItems?: ExuluItem[];
 }): ExuluTool | undefined {
   const license = checkLicense();
   if (!license["agentic-retrieval"]) {
@@ -229,20 +258,51 @@ export function createAgenticRetrievalToolV3({
         default: false,
       },
       {
-        name: "log_trajectories",
+        name: "logging",
         description: "Save a detailed markdown + JSON log of every retrieval execution to disk. Useful for debugging and evaluation.",
         type: "boolean",
         default: false,
       },
       ...contexts.map((ctx) => ({
-        name: ctx.id,
+        name: ctx.id + "_|_enabled",
         description: `Enable search in "${ctx.name}". ${ctx.description}`,
         type: "boolean" as const,
         default: true,
+      }
+      )),
+      ...contexts.map((ctx) => ({
+        name: `${ctx.id}_|_instructions`,
+        description: `Instructions for the retrieval agent about how to search in the ${ctx.name} context`,
+        type: "string" as const,
+        default: "",
       })),
+      ...contexts.map((ctx) => ({
+        name: `${ctx.id}_|_priority`,
+        description: `Defines in which order the context should be searched in, the higher the number the higher the priority, if contexts have the same priority they are searched in parallel`,
+        type: "number" as const,
+        default: 0,
+      })),
+      ...contexts.map((ctx) => ({
+        name: `${ctx.id}_|_max_results`,
+        description: `Defines the maximum number of results to return for the ${ctx.name} context`,
+        type: "number" as const,
+        default: 0,
+      })),
+      ...contexts.map((ctx) => ({
+        name: `${ctx.id}_|_max_steps`,
+        description: `Defines the maximum number of steps the agent is allowed to take when searching the ${ctx.name} context`,
+        type: "number" as const,
+        default: 0,
+      })),
+      ...contexts.map((ctx) => ({
+        name: `${ctx.id}_|_expand_chunks`,
+        description: `Defines if the agent automatically retrieves nearby chunks around the matched chunks, usefull if relevant content might be split up`,
+        type: "number" as const,
+        default: 0,
+      }))
     ],
     inputSchema: z.object({
-      query: z.string().describe("The question or query to answer"),
+      userQuery: z.string().describe("The original unaltered question from the user"),
       userInstructions: z
         .string()
         .optional()
@@ -256,24 +316,24 @@ export function createAgenticRetrievalToolV3({
         )
     }),
     execute: async function* ({
-      query,
+      userQuery,
       userInstructions,
       confirmedContextIds,
       toolVariablesConfig,
       sessionID,
     }: {
-      query: string;
+      userQuery: string;
       userInstructions?: string;
       confirmedContextIds?: string[];
       toolVariablesConfig?: Record<string, any>;
       sessionID?: string;
     }) {
       /* ROADMAP:
       const app = exuluApp.get();
       let reasoningModel: LanguageModel | undefined = model;
       let searchModel: LanguageModel | undefined = model;
        if (toolVariablesConfig?.reasoning_model) {
         reasoningModel = app.provider(toolVariablesConfig.reasoning_model)?.model?.create({});
@@ -281,7 +341,7 @@ export function createAgenticRetrievalToolV3({
           throw new Error("Reasoning model not found");
         }
       }
       if (toolVariablesConfig?.search_model) {
         searchModel = app.provider(toolVariablesConfig.search_model);
         if (!searchModel) {
@@ -304,38 +364,38 @@ export function createAgenticRetrievalToolV3({
       if (toolVariablesConfig) {
         configInstructions = toolVariablesConfig["instructions"] ?? "";
         logTrajectory =
-          toolVariablesConfig["log_trajectories"] === true ||
-          toolVariablesConfig["log_trajectories"] === "true";
+          toolVariablesConfig["logging"] === true ||
+          toolVariablesConfig["logging"] === "true";
         managedContextEnabled = toolVariablesConfig["managed_context"] === true || toolVariablesConfig["managed_context"] === "true";
         activeContexts = contexts.filter(
           (ctx) =>
-            toolVariablesConfig[ctx.id] === true ||
-            toolVariablesConfig[ctx.id] === "true" ||
-            toolVariablesConfig[ctx.id] === 1,
+            toolVariablesConfig[ctx.id + "_|_enabled"] === true ||
+            toolVariablesConfig[ctx.id + "_|_enabled"] === "true" ||
+            toolVariablesConfig[ctx.id + "_|_enabled"] === 1,
         );
         if (activeContexts.length === 0) activeContexts = contexts;
         requiresPreselectedContexts = toolVariablesConfig["require_preselected_contexts"] === true || toolVariablesConfig["require_preselected_contexts"] === "true";
         const rerankerId = toolVariablesConfig["reranker"];
         if (rerankerId && rerankerId !== "none") {
           configuredReranker = rerankers.find((r) => r.id === rerankerId);
         }
       }
       console.log("[EXULU] Managed context enabled:", managedContextEnabled);
-      console.log("[EXULU] Preselected item IDs:", preselectedItemIds);
+      console.log("[EXULU] Preselected item IDs:", preselected);
-      if (managedContextEnabled && !preselectedItemIds?.length) {
+      if (managedContextEnabled && !preselected?.length) {
         console.log("[EXULU] Managed context was enabled for the agentic retrieval tool. This means that the user must preselect items that the agentic retrieval tool will search in, please notify the user to preselect items before executing the tool.");
         yield { result: "Managed context was enabled for the agentic retrieval tool. This means that the user must preselect items that the agentic retrieval tool will search in, please notify the user to preselect items before executing the tool." };
         return;
       }
-      if (requiresPreselectedContexts && !confirmedContextIds?.length && !preselectedItemIds?.length) {
+      if (requiresPreselectedContexts && !confirmedContextIds?.length && !preselected?.length) {
         console.log("[EXULU] The user must choose between the available contexts before executing the tool. The available contexts are: " + activeContexts.map((c) => c.id).join(", ") + ". If the question_ask tool is available use that to ask the user which contexts they want to search in, otherwise just ask them in plain text.");
         yield { result: "The user must choose between the available contexts before executing the tool, the available contexts are: " + activeContexts.map((c) => c.id).join(", ") + ". If the question_ask tool is available use that to ask the user which contexts they want to search in, otherwise just ask them in plain text." };
         return;
@@ -348,24 +408,46 @@ export function createAgenticRetrievalToolV3({
       }
       const combinedInstructions = [
-        configInstructions ? `Configuration instructions: ${configInstructions}` : "",
-        adminInstructions ? `Admin instructions: ${adminInstructions}` : "",
-        userInstructions ? `User instructions: ${userInstructions}` : "",
+        configInstructions ? `
+        Configuration instructions:
+        <configuration_instructions>
+        ${configInstructions}
+        </configuration_instructions>
+        ` : "",
+        adminInstructions ? `
+        Admin instructions:
+        <admin_instructions>
+        ${adminInstructions}
+        </admin_instructions>
+        ` : "",
+        userInstructions ? `
+        User instructions:
+        <user_instructions>
+        ${userInstructions}
+        </user_instructions>
+        ` : "",
+        memoryItems ? `
+        Relevant memories (these are items that the agent has retrieved from the memory context and are relevant to the query):
+        <relevant_memories>
+        ${memoryItems?.map(item => JSON.stringify(item)).join("\n")}
+        </relevant_memories>
+        ` : "",
       ]
         .filter(Boolean)
         .join("\n");
       for await (const output of executeV3({
-        query,
+        query: userQuery,
         contexts: activeContexts,
         reranker: configuredReranker,
+        toolVariablesConfig,
         model,
         user,
         role,
         customInstructions: combinedInstructions || undefined,
         logTrajectory,
         sessionId: sessionID,
-        preselectedItemIds,
+        preselectedItemIds: preselected,
       })) {
         yield { result: JSON.stringify(output) };
       }

package/ee/agentic-retrieval/v3/tools.ts CHANGED Viewed

@@ -77,6 +77,7 @@ export function parseGlobalItemIds(globalIds: string[]): Map<string, string[] |
 export type RetrievalToolParams = {
   contexts: ExuluContext[];
+  toolVariablesConfig?: Record<string, any>;
   user?: User;
   role?: string;
   updateVirtualFiles: (files: Array<{ path: string; content: string }>) => Promise<void>;
@@ -94,7 +95,7 @@ export type RetrievalToolParams = {
  * and filtered per strategy.
  */
 export function createRetrievalTools(params: RetrievalToolParams) {
-  const { contexts, user, role, updateVirtualFiles, preselectedItemsByContext } = params;
+  const { contexts, toolVariablesConfig, user, role, updateVirtualFiles, preselectedItemsByContext } = params;
   const ctxEnum = buildContextEnum(contexts);
   // ──────────────────────────────────────────────────────────
@@ -278,7 +279,7 @@ Use includeContent: true when you need the ACTUAL text to answer a question.
 For listing queries: always start with includeContent: false, then use dynamic tools to fetch specific pages.`,
     inputSchema: z.object({
-      query: z.string().describe("Search query about the content you're looking for"),
+      userQuery: z.string().describe("The original unaltered question from the user"),
       knowledge_base_id: z
         .enum(contexts.map((c) => c.id) as [string, ...string[]])
         .describe(
@@ -318,7 +319,7 @@ For listing queries: always start with includeContent: false, then use dynamic t
         .describe("Max chunks with content (max 20). Without content, up to 200 are returned."),
     }),
     execute: async ({
-      query,
+      userQuery,
       knowledge_base_id,
       keywords,
       searchMethod,
@@ -329,7 +330,8 @@ For listing queries: always start with includeContent: false, then use dynamic t
       limit,
     }) => {
       const [ctx] = resolveContexts([knowledge_base_id], contexts) as [ExuluContext];
-      const effectiveLimit = includeContent ? Math.min(limit ?? 20, 20) : Math.min((limit ?? 20) * 20, 400);
+      const maxResults = toolVariablesConfig?.[`${ctx.id}_|_max_results`] || 20;
+      const effectiveLimit = includeContent ? Math.min(limit ?? maxResults, maxResults) : Math.min((limit ?? maxResults) * maxResults, 400);
       const itemFilters: SearchFilters = [];
@@ -361,7 +363,7 @@ For listing queries: always start with includeContent: false, then use dynamic t
         itemFilters.push({ name: { or: item_names.map((n) => ({ contains: n })) } });
       if (item_external_ids) itemFilters.push({ external_id: { in: item_external_ids } });
-      const effectiveQuery = query || keywords?.join(" ") || "";
+      const effectiveQuery = userQuery || keywords?.join(" ") || "";
       let method = mapSearchMethod(searchMethod ?? "hybrid");
@@ -372,6 +374,8 @@ For listing queries: always start with includeContent: false, then use dynamic t
         }
       }
+      const expandChunks = toolVariablesConfig?.[`${ctx.id}_|_expand_chunks`] || 0;
       try {
         const { chunks } = await ctx.search({
           query: effectiveQuery,
@@ -385,6 +389,10 @@ For listing queries: always start with includeContent: false, then use dynamic t
           user,
           role,
           trigger: "tool",
+          expand: expandChunks > 0 ? {
+            before: expandChunks,
+            after: expandChunks,
+          } : undefined,
         });
         return JSON.stringify(

package/ee/agentic-retrieval/v4/agent-loop.ts ADDED Viewed

@@ -0,0 +1,208 @@
+import { generateText, stepCountIs, tool } from "ai";
+import type { LanguageModel, Tool as AITool, ModelMessage } from "ai";
+import { z } from "zod";
+import { withRetry } from "@SRC/utils/with-retry";
+import type { ExuluReranker } from "@SRC/exulu/reranker";
+import type { AgenticRetrievalOutput, ChunkResult } from "./types";
+import { DEFAULT_MAX_STEPS, type AgenticRetrievalLog, type ContextRetrievalConfig } from ".";
+const FINISH_TOOL_NAME = "finish_retrieval";
+const finishRetrievalTool = tool({
+  description:
+    "Call this tool when you have retrieved sufficient information and no further searches are needed. " +
+    "You MUST call this tool to signal that retrieval is complete — do not write a text conclusion.",
+  inputSchema: z.object({
+    reasoning: z.string().describe("One sentence explaining why retrieval is complete"),
+  }),
+  execute: async ({ reasoning }) => JSON.stringify({ finished: true, reasoning }),
+});
+function extractChunksFromToolResults(toolResults: any[]): ChunkResult[] {
+  const chunks: ChunkResult[] = [];
+  for (const result of toolResults ?? []) {
+    // AI SDK v6 uses `output` (not `result`) for tool result values
+    const rawOutput = result.output ?? result.result;
+    let parsed: any;
+    try {
+      parsed = typeof rawOutput === "string" ? JSON.parse(rawOutput) : rawOutput;
+    } catch {
+      continue;
+    }
+    if (Array.isArray(parsed)) {
+      for (const item of parsed) {
+        if (item?.item_id && item?.context) {
+          chunks.push({
+            item_name: item.item_name,
+            item_id: item.item_id,
+            context: item.context?.id ?? item.context,
+            chunk_id: item.chunk_id,
+            chunk_index: item.chunk_index,
+            chunk_content: item.chunk_content,
+            metadata: item.metadata,
+          });
+        }
+      }
+    }
+  }
+  return chunks;
+}
+/**
+ * Core agent loop: one generateText call per step.
+ *
+ * Unlike v2 (which split each step into a reasoning call + a separate tool
+ * execution call), here a single call with toolChoice: "auto" lets the model
+ * reason and call tools in one pass. The model sees tool results from the
+ * previous step via the conversation history (messages array).
+ *
+ * The loop stops when:
+ * - The model makes no tool calls (it's satisfied), OR
+ * - The strategy's stepBudget is exhausted
+ */
+export async function* runAgentLoop(params: {
+  config: ContextRetrievalConfig;
+  userQuery: string;
+  log: AgenticRetrievalLog;
+  todos: {
+    status: "planned" | "completed";
+    description: string;
+    current: boolean;
+  }[];
+  tools: Record<string, AITool>;
+  model: LanguageModel;
+  reranker?: ExuluReranker;
+  sessionID?: string;
+  onStepComplete?: (step: AgenticRetrievalOutput["steps"][0]) => void;
+}): AsyncGenerator<AgenticRetrievalOutput> {
+  const { userQuery, tools, model, reranker, sessionID, onStepComplete, config, log, todos } = params;
+  const output: AgenticRetrievalOutput = {
+    steps: [],
+    reasoning: [],
+    chunks: [],
+    usage: [],
+    totalTokens: 0,
+  };
+  const messages: ModelMessage[] = [{ role: "user", content: userQuery }];
+  const stepBudget = config.maxSteps || DEFAULT_MAX_STEPS
+  const SYSTEM_PROMPT = `
+  You are a helpful assistant that can search the knowledge base and retrieve information.
+  You are searching for information that is relevant to the following question:
+  <user_query>
+  ${userQuery}
+  </user_query>
+  You have the following instructions for this knowledge base:
+  <instructions>
+  ${config.instructions}
+  </instructions>
+  A first search strategy was drafted as a todo list:
+  <todo_list>
+  ${todos.map((todo, index) => `${index + 1}. ${todo.status} - ${todo.description}`).join("\n")}
+  </todo_list>
+  `;
+  for (let step = 0; step < stepBudget; step++) {
+    log.entries.push({
+      label: "Agent loop step",
+      timestamp: new Date().toISOString(),
+      message: `[EXULU] v3 agent loop — step ${step + 1}/${stepBudget}`,
+    });
+    let result: Awaited<ReturnType<typeof generateText>>;
+    const stepTools = { ...tools, [FINISH_TOOL_NAME]: finishRetrievalTool };
+    try {
+      result = await withRetry(() =>
+        generateText({
+          model,
+          temperature: 0,
+          system: SYSTEM_PROMPT,
+          messages,
+          tools: stepTools,
+          toolChoice: "required",
+          stopWhen: stepCountIs(1),
+        }),
+      );
+    } catch (err) {
+      console.error("[EXULU] v3 generateText failed:", err);
+      throw err;
+    }
+    // Carry conversation forward: assistant message + tool results go into history
+    // so the model sees them on the next iteration.
+    messages.push(...(result.response.messages as ModelMessage[]));
+    // Extract chunks from tool results
+    let stepChunks: any[] = extractChunksFromToolResults(result.toolResults as any[]);
+    // Deduplicate by chunk_id within this step (parallel tool calls can return the same chunk
+    // if the agent searches the same context twice, or the same chunk is indexed in two contexts).
+    const seenChunkIds = new Set<string>();
+    stepChunks = stepChunks.filter((c) => {
+      if (!c.chunk_id) return true;
+      if (seenChunkIds.has(c.chunk_id)) return false;
+      seenChunkIds.add(c.chunk_id);
+      return true;
+    });
+    // Record step
+    const stepRecord = {
+      stepNumber: step + 1,
+      text: result.text ?? "",
+      toolCalls: (result.toolCalls as any[])?.map((tc) => ({
+        name: tc.toolName,
+        id: tc.toolCallId,
+        input: tc.input,
+      })) ?? [],
+      chunks: stepChunks,
+      tokens: result.usage?.totalTokens ?? 0,
+    };
+    log.entries.push({
+      label: "Step completed",
+      timestamp: new Date().toISOString(),
+      message: JSON.stringify(stepRecord),
+    });
+    output.steps.push(stepRecord);
+    output.reasoning.push({
+      text: result.text ?? "",
+      tools: (result.toolCalls as any[])?.map((tc) => ({
+        name: tc.toolName,
+        id: tc.toolCallId,
+        input: tc.input,
+        output: stepChunks,
+      })) ?? [],
+    });
+    // Deduplicate against chunks already accumulated from prior steps
+    const existingChunkIds = new Set(output.chunks.map((c) => c.chunk_id).filter(Boolean));
+    output.chunks.push(...stepChunks.filter((c) => !c.chunk_id || !existingChunkIds.has(c.chunk_id)));
+    output.usage.push(result.usage);
+    onStepComplete?.(stepRecord);
+    yield { ...output };
+    // Stop if the model called finish_retrieval AND no forced continuation is needed
+    const calledFinish = (result.toolCalls as any[])?.some(
+      (tc) => tc.toolName === FINISH_TOOL_NAME,
+    );
+    if (calledFinish) {
+      console.log(`[EXULU] v3 model called finish_retrieval after step ${step + 1}`);
+      break;
+    }
+  }
+  output.totalTokens = output.usage.reduce((sum, u) => sum + (u?.totalTokens ?? 0), 0);
+}