npm - @exulu/backend - Versions diffs - 1.54.0 → 1.56.0 - Mend

@exulu/backend 1.54.0 → 1.56.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/dist/index.cjs +2275 -1330
package/dist/index.d.cts +8 -30
package/dist/index.d.ts +8 -30
package/dist/index.js +2256 -1306
package/ee/agentic-retrieval/v3/agent-loop.ts +49 -3
package/ee/agentic-retrieval/v3/classifier.ts +61 -42
package/ee/agentic-retrieval/v3/context-sampler.ts +10 -1
package/ee/agentic-retrieval/v3/index.ts +211 -35
package/ee/agentic-retrieval/v3/session-tools-registry.ts +20 -0
package/ee/agentic-retrieval/v3/strategies.ts +28 -24
package/ee/agentic-retrieval/v3/tools.ts +236 -113
package/ee/agentic-retrieval/v3/trajectory.ts +227 -14
package/ee/agentic-retrieval/v4/agent-loop.ts +142 -55
package/ee/agentic-retrieval/v4/context-sampler.ts +79 -0
package/ee/agentic-retrieval/v4/index.ts +673 -164
package/ee/agentic-retrieval/v4/types.ts +33 -4
package/ee/invoke-skills/create-sandbox.ts +119 -0
package/ee/python/documents/processing/doc_processor.ts +106 -14
package/package.json +4 -2
package/ee/agentic-retrieval/ANALYSIS.md +0 -658
package/ee/agentic-retrieval/index.ts +0 -1109
package/ee/agentic-retrieval/logs/README.md +0 -198
package/ee/agentic-retrieval/v2.ts +0 -1628
package/ee/agentic-retrieval/v4/embed-preprocessor.ts +0 -76
package/ee/agentic-retrieval/v4/system-prompt.ts +0 -248
package/ee/agentic-retrieval/v4/tools.ts +0 -241

package/ee/agentic-retrieval/v3/agent-loop.ts CHANGED Viewed

@@ -6,6 +6,8 @@ import type { ExuluReranker } from "@SRC/exulu/reranker";
 import type { AgenticRetrievalOutput, ChunkResult, ClassificationResult } from "./types";
 import type { StrategyConfig } from "./strategies";
 import { createDynamicTools } from "./dynamic-tools";
+import { registerSessionTools } from "./session-tools-registry";
+import type { TrajectoryStepData } from "./trajectory";
 const FINISH_TOOL_NAME = "finish_retrieval";
@@ -71,9 +73,11 @@ export async function* runAgentLoop(params: {
   contextGuidance?: string;
   customInstructions?: string;
   classification: ClassificationResult;
+  sessionId?: string;
   onStepComplete?: (step: AgenticRetrievalOutput["steps"][0]) => void;
+  onTrajectoryStep?: (data: TrajectoryStepData) => void;
 }): AsyncGenerator<AgenticRetrievalOutput> {
-  const { query, strategy, tools, model, reranker, contextGuidance, customInstructions, onStepComplete } = params;
+  const { query, strategy, tools, model, reranker, contextGuidance, customInstructions, sessionId, onStepComplete, onTrajectoryStep } = params;
   const output: AgenticRetrievalOutput = {
     steps: [],
@@ -147,6 +151,16 @@ export async function* runAgentLoop(params: {
     // Extract chunks from tool results
     let stepChunks: any[] = extractChunksFromToolResults(result.toolResults as any[]);
+    // Deduplicate by chunk_id within this step (parallel tool calls can return the same chunk
+    // if the agent searches the same context twice, or the same chunk is indexed in two contexts).
+    const seenChunkIds = new Set<string>();
+    stepChunks = stepChunks.filter((c) => {
+      if (!c.chunk_id) return true;
+      if (seenChunkIds.has(c.chunk_id)) return false;
+      seenChunkIds.add(c.chunk_id);
+      return true;
+    });
     // Check if any search_content call excluded content (triggers page-load dynamic tools)
     // AI SDK v6 uses `input` (not `args`) for tool call arguments
     const hadExcludedContent = (result.toolCalls as any[])?.some(
@@ -164,6 +178,9 @@ export async function* runAgentLoop(params: {
     // Create dynamic tools (browse adjacent pages, load specific pages)
     const newDynamic = await createDynamicTools(stepChunks as ChunkResult[], hadExcludedContent);
     Object.assign(dynamicTools, newDynamic);
+    if (sessionId && Object.keys(newDynamic).length > 0) {
+      registerSessionTools(sessionId, newDynamic);
+    }
     // If relevant content was found but fewer than 5 chunks, withhold finish_retrieval
     // on the next step to force depth exploration via dynamic tools.
@@ -175,9 +192,14 @@ export async function* runAgentLoop(params: {
       Object.keys(newDynamic).length > 0 &&
       step < strategy.stepBudget - 2;
-    // Track which suggested contexts have been searched this step
+    // Track which suggested contexts have been searched this step.
+    // search_content and save_search_results now use knowledge_base_id (singular);
+    // count_items_or_chunks and search_items_by_name still use knowledge_base_ids (plural array).
     for (const tc of (result.toolCalls as any[]) ?? []) {
       if (SEARCH_TOOL_NAMES.has(tc.toolName)) {
+        if (tc.input?.knowledge_base_id) {
+          searchedContextIds.add(tc.input.knowledge_base_id);
+        }
         for (const id of (tc.input?.knowledge_base_ids ?? [])) {
           searchedContextIds.add(id);
         }
@@ -217,11 +239,35 @@ export async function* runAgentLoop(params: {
         output: stepChunks,
       })) ?? [],
     });
-    output.chunks.push(...stepChunks);
+    // Deduplicate against chunks already accumulated from prior steps
+    const existingChunkIds = new Set(output.chunks.map((c) => c.chunk_id).filter(Boolean));
+    output.chunks.push(...stepChunks.filter((c) => !c.chunk_id || !existingChunkIds.has(c.chunk_id)));
     output.usage.push(result.usage);
     onStepComplete?.(stepRecord);
+    if (onTrajectoryStep) {
+      const toolResultMap = new Map<string, any>();
+      for (const tr of (result.toolResults as any[]) ?? []) {
+        toolResultMap.set(tr.toolCallId, tr.output ?? tr.result);
+      }
+      onTrajectoryStep({
+        stepNumber: step + 1,
+        systemPrompt: stepSystemPrompt,
+        text: result.text ?? "",
+        toolCalls:
+          (result.toolCalls as any[])?.map((tc) => ({
+            name: tc.toolName,
+            id: tc.toolCallId,
+            input: tc.input,
+            output: toolResultMap.get(tc.toolCallId),
+          })) ?? [],
+        chunks: stepChunks,
+        dynamicToolsCreated: Object.keys(newDynamic),
+        tokens: result.usage?.totalTokens ?? 0,
+      });
+    }
     yield { ...output };
     // Stop if the model called finish_retrieval AND no forced continuation is needed

package/ee/agentic-retrieval/v3/classifier.ts CHANGED Viewed

@@ -3,6 +3,7 @@ import type { LanguageModel } from "ai";
 import { z } from "zod";
 import type { ExuluContext } from "@SRC/exulu/context";
 import type { ClassificationResult, ContextSample } from "./types";
+import { withRetry } from "@SRC/utils/with-retry";
 /**
  * Classifies a query into one of four types and identifies which contexts are
@@ -19,55 +20,73 @@ export async function classifyQuery(
     .map((ctx) => {
       const sample = samples.find((s) => s.contextId === ctx.id);
       const fieldList = sample?.fields.join(", ") ?? "name, external_id";
-      const exampleStr =
-        sample?.exampleItems.length
-          ? `\n    Example records: ${JSON.stringify(sample.exampleItems.slice(0, 2))}`
-          : "";
-      return `  - ${ctx.id}: ${ctx.name}\n    Description: ${ctx.description}\n    Fields: ${fieldList}${exampleStr}`;
+      return `
+      <context>
+        <id>
+          ${ctx.id}
+        </id>
+        <name>
+          ${ctx.name}
+        </name>
+        <description>
+          ${ctx.description}
+        </description>
+        <fields>
+          ${fieldList}
+        </fields>
+        <example_items>
+          ${sample?.exampleItems.map((item) => JSON.stringify(item)).join("\n")}
+        </example_items>
+       </context>
+       `;
     })
     .join("\n\n");
-  const result = await generateText({
-    model,
-    temperature: 0,
-    output: Output.object({
-      schema: z.object({
-        queryType: z
-          .enum(["aggregate", "list", "targeted", "exploratory"])
-          .describe(
-            "aggregate: ONLY use when the user explicitly asks to COUNT how many documents/items/tickets exist in the knowledge base (e.g. 'how many documents about X?', 'total number of tickets'). NEVER use for: real-world statistics stored in a document, intent statements, how-to questions, error/fault descriptions, configuration questions, or any query that does not explicitly ask for a count of knowledge base entries. When in doubt, choose targeted. " +
+  const result: ClassificationResult = await withRetry(async () => {
+    const result = await generateText({
+      model,
+      temperature: 0,
+      output: Output.object({
+        schema: z.object({
+          queryType: z
+            .enum(["aggregate", "list", "targeted", "exploratory"])
+            .describe(
+              "aggregate: ONLY use when the user explicitly asks to COUNT how many documents/items/tickets exist in the knowledge base (e.g. 'how many documents about X?', 'total number of tickets'). NEVER use for: real-world statistics stored in a document, intent statements, how-to questions, error/fault descriptions, configuration questions, or any query that does not explicitly ask for a count of knowledge base entries. When in doubt, choose targeted. " +
               "list: user wants to enumerate matching items/documents (show me all, list documents about). " +
               "targeted: use for almost everything — specific fact, answer, configuration, how-to, error/fault, feature/behavior question. Also use for intent statements and short commands describing a desired state (phrases that state what the user wants to do or achieve, even without an explicit question word). Real-world statistics stored in documents also go here. When in doubt, choose targeted over aggregate or exploratory. " +
               "exploratory: only for broad conceptual questions needing multi-source synthesis (what is the process for Z, explain how X works, general overview of topic Y).",
-          ),
-        language: z
-          .string()
-          .describe("ISO 639-3 language code of the query (e.g. eng, deu, fra)"),
-        suggestedContextIds: z
-          .array(z.string())
-          .describe(
-            "IDs of knowledge bases most likely to contain the answer. Return empty array to search all contexts.",
-          ),
+            ),
+          language: z
+            .string()
+            .describe("ISO 639-3 language code of the query (e.g. eng, deu, fra)"),
+          suggestedContextIds: z
+            .array(z.enum(contexts.map((c) => c.id)))
+            .describe(
+              "IDs of knowledge bases most likely to contain the answer. Return empty array to search all contexts.",
+            ),
+        }),
       }),
-    }),
-    toolChoice: "none",
-    system: `You are a query classifier for a multi-knowledge-base retrieval system.
-Classify the query and identify which knowledge bases are most relevant.
+      toolChoice: "none",
+      system: `You are a query classifier for a multi-knowledge-base retrieval system.
+    Classify the query and identify which knowledge bases are most relevant.
+    Available knowledge bases:
+    ${contextDescriptions}
+    Guidelines for queryType:
+    - Use "aggregate" ONLY when the query contains explicit counting language (e.g., "how many", "count", "total number", "wie viele"). Short statements, commands, or phrases without a question word are NEVER aggregate — classify them as targeted.
+    - When in doubt between aggregate and targeted: always choose targeted.
+    Guidelines for suggestedContextIds:
+    - Be conservative: only suggest contexts that are genuinely likely to contain the answer.
+      Aim for 2–3 focused suggestions rather than listing everything.
+    - Use each knowledge base's name and description (shown above) to judge relevance.
+    - Return an empty array only if you truly cannot determine which contexts are relevant.`,
+      prompt: `Query: ${query}`,
+    });
-Available knowledge bases:
-${contextDescriptions}
+    return result.output as ClassificationResult;
+  }, 3)
-Guidelines for queryType:
-- Use "aggregate" ONLY when the query contains explicit counting language (e.g., "how many", "count", "total number", "wie viele"). Short statements, commands, or phrases without a question word are NEVER aggregate — classify them as targeted.
-- When in doubt between aggregate and targeted: always choose targeted.
-Guidelines for suggestedContextIds:
-- Be conservative: only suggest contexts that are genuinely likely to contain the answer.
-  Aim for 2–3 focused suggestions rather than listing everything.
-- Use each knowledge base's name and description (shown above) to judge relevance.
-- Return an empty array only if you truly cannot determine which contexts are relevant.`,
-    prompt: `Query: ${query}`,
-  });
-  return result.output as ClassificationResult;
+  return result;
 }

package/ee/agentic-retrieval/v3/context-sampler.ts CHANGED Viewed

@@ -3,10 +3,19 @@ import { postgresClient } from "@SRC/postgres/client";
 import { applyAccessControl } from "@SRC/graphql/utilities/access-control";
 import { convertContextToTableDefinition } from "@SRC/graphql/utilities/convert-context-to-table-definition";
 import type { User } from "@EXULU_TYPES/models/user";
-import type { ContextSample } from "./types";
 const CACHE_TTL_MS = 60 * 60 * 1000; // 1 hour
+export interface ContextSample {
+  contextId: string;
+  contextName: string;
+  /** All field names available on items (standard + custom) */
+  fields: string[];
+  /** Up to 2 example item records */
+  exampleItems: Array<Record<string, any>>;
+  sampledAt: number;
+}
 /**
  * Pulls 1–2 example item records per context at agent initialization and caches
  * them in memory. These samples are injected into the classifier prompt so the