@exulu/backend 1.54.0 → 1.56.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,6 +6,8 @@ import type { ExuluReranker } from "@SRC/exulu/reranker";
6
6
  import type { AgenticRetrievalOutput, ChunkResult, ClassificationResult } from "./types";
7
7
  import type { StrategyConfig } from "./strategies";
8
8
  import { createDynamicTools } from "./dynamic-tools";
9
+ import { registerSessionTools } from "./session-tools-registry";
10
+ import type { TrajectoryStepData } from "./trajectory";
9
11
 
10
12
  const FINISH_TOOL_NAME = "finish_retrieval";
11
13
 
@@ -71,9 +73,11 @@ export async function* runAgentLoop(params: {
71
73
  contextGuidance?: string;
72
74
  customInstructions?: string;
73
75
  classification: ClassificationResult;
76
+ sessionId?: string;
74
77
  onStepComplete?: (step: AgenticRetrievalOutput["steps"][0]) => void;
78
+ onTrajectoryStep?: (data: TrajectoryStepData) => void;
75
79
  }): AsyncGenerator<AgenticRetrievalOutput> {
76
- const { query, strategy, tools, model, reranker, contextGuidance, customInstructions, onStepComplete } = params;
80
+ const { query, strategy, tools, model, reranker, contextGuidance, customInstructions, sessionId, onStepComplete, onTrajectoryStep } = params;
77
81
 
78
82
  const output: AgenticRetrievalOutput = {
79
83
  steps: [],
@@ -147,6 +151,16 @@ export async function* runAgentLoop(params: {
147
151
  // Extract chunks from tool results
148
152
  let stepChunks: any[] = extractChunksFromToolResults(result.toolResults as any[]);
149
153
 
154
+ // Deduplicate by chunk_id within this step (parallel tool calls can return the same chunk
155
+ // if the agent searches the same context twice, or the same chunk is indexed in two contexts).
156
+ const seenChunkIds = new Set<string>();
157
+ stepChunks = stepChunks.filter((c) => {
158
+ if (!c.chunk_id) return true;
159
+ if (seenChunkIds.has(c.chunk_id)) return false;
160
+ seenChunkIds.add(c.chunk_id);
161
+ return true;
162
+ });
163
+
150
164
  // Check if any search_content call excluded content (triggers page-load dynamic tools)
151
165
  // AI SDK v6 uses `input` (not `args`) for tool call arguments
152
166
  const hadExcludedContent = (result.toolCalls as any[])?.some(
@@ -164,6 +178,9 @@ export async function* runAgentLoop(params: {
164
178
  // Create dynamic tools (browse adjacent pages, load specific pages)
165
179
  const newDynamic = await createDynamicTools(stepChunks as ChunkResult[], hadExcludedContent);
166
180
  Object.assign(dynamicTools, newDynamic);
181
+ if (sessionId && Object.keys(newDynamic).length > 0) {
182
+ registerSessionTools(sessionId, newDynamic);
183
+ }
167
184
 
168
185
  // If relevant content was found but fewer than 5 chunks, withhold finish_retrieval
169
186
  // on the next step to force depth exploration via dynamic tools.
@@ -175,9 +192,14 @@ export async function* runAgentLoop(params: {
175
192
  Object.keys(newDynamic).length > 0 &&
176
193
  step < strategy.stepBudget - 2;
177
194
 
178
- // Track which suggested contexts have been searched this step
195
+ // Track which suggested contexts have been searched this step.
196
+ // search_content and save_search_results now use knowledge_base_id (singular);
197
+ // count_items_or_chunks and search_items_by_name still use knowledge_base_ids (plural array).
179
198
  for (const tc of (result.toolCalls as any[]) ?? []) {
180
199
  if (SEARCH_TOOL_NAMES.has(tc.toolName)) {
200
+ if (tc.input?.knowledge_base_id) {
201
+ searchedContextIds.add(tc.input.knowledge_base_id);
202
+ }
181
203
  for (const id of (tc.input?.knowledge_base_ids ?? [])) {
182
204
  searchedContextIds.add(id);
183
205
  }
@@ -217,11 +239,35 @@ export async function* runAgentLoop(params: {
217
239
  output: stepChunks,
218
240
  })) ?? [],
219
241
  });
220
- output.chunks.push(...stepChunks);
242
+ // Deduplicate against chunks already accumulated from prior steps
243
+ const existingChunkIds = new Set(output.chunks.map((c) => c.chunk_id).filter(Boolean));
244
+ output.chunks.push(...stepChunks.filter((c) => !c.chunk_id || !existingChunkIds.has(c.chunk_id)));
221
245
  output.usage.push(result.usage);
222
246
 
223
247
  onStepComplete?.(stepRecord);
224
248
 
249
+ if (onTrajectoryStep) {
250
+ const toolResultMap = new Map<string, any>();
251
+ for (const tr of (result.toolResults as any[]) ?? []) {
252
+ toolResultMap.set(tr.toolCallId, tr.output ?? tr.result);
253
+ }
254
+ onTrajectoryStep({
255
+ stepNumber: step + 1,
256
+ systemPrompt: stepSystemPrompt,
257
+ text: result.text ?? "",
258
+ toolCalls:
259
+ (result.toolCalls as any[])?.map((tc) => ({
260
+ name: tc.toolName,
261
+ id: tc.toolCallId,
262
+ input: tc.input,
263
+ output: toolResultMap.get(tc.toolCallId),
264
+ })) ?? [],
265
+ chunks: stepChunks,
266
+ dynamicToolsCreated: Object.keys(newDynamic),
267
+ tokens: result.usage?.totalTokens ?? 0,
268
+ });
269
+ }
270
+
225
271
  yield { ...output };
226
272
 
227
273
  // Stop if the model called finish_retrieval AND no forced continuation is needed
@@ -3,6 +3,7 @@ import type { LanguageModel } from "ai";
3
3
  import { z } from "zod";
4
4
  import type { ExuluContext } from "@SRC/exulu/context";
5
5
  import type { ClassificationResult, ContextSample } from "./types";
6
+ import { withRetry } from "@SRC/utils/with-retry";
6
7
 
7
8
  /**
8
9
  * Classifies a query into one of four types and identifies which contexts are
@@ -19,55 +20,73 @@ export async function classifyQuery(
19
20
  .map((ctx) => {
20
21
  const sample = samples.find((s) => s.contextId === ctx.id);
21
22
  const fieldList = sample?.fields.join(", ") ?? "name, external_id";
22
- const exampleStr =
23
- sample?.exampleItems.length
24
- ? `\n Example records: ${JSON.stringify(sample.exampleItems.slice(0, 2))}`
25
- : "";
26
- return ` - ${ctx.id}: ${ctx.name}\n Description: ${ctx.description}\n Fields: ${fieldList}${exampleStr}`;
23
+ return `
24
+ <context>
25
+ <id>
26
+ ${ctx.id}
27
+ </id>
28
+ <name>
29
+ ${ctx.name}
30
+ </name>
31
+ <description>
32
+ ${ctx.description}
33
+ </description>
34
+ <fields>
35
+ ${fieldList}
36
+ </fields>
37
+ <example_items>
38
+ ${sample?.exampleItems.map((item) => JSON.stringify(item)).join("\n")}
39
+ </example_items>
40
+ </context>
41
+ `;
27
42
  })
28
43
  .join("\n\n");
29
44
 
30
- const result = await generateText({
31
- model,
32
- temperature: 0,
33
- output: Output.object({
34
- schema: z.object({
35
- queryType: z
36
- .enum(["aggregate", "list", "targeted", "exploratory"])
37
- .describe(
38
- "aggregate: ONLY use when the user explicitly asks to COUNT how many documents/items/tickets exist in the knowledge base (e.g. 'how many documents about X?', 'total number of tickets'). NEVER use for: real-world statistics stored in a document, intent statements, how-to questions, error/fault descriptions, configuration questions, or any query that does not explicitly ask for a count of knowledge base entries. When in doubt, choose targeted. " +
45
+ const result: ClassificationResult = await withRetry(async () => {
46
+ const result = await generateText({
47
+ model,
48
+ temperature: 0,
49
+ output: Output.object({
50
+ schema: z.object({
51
+ queryType: z
52
+ .enum(["aggregate", "list", "targeted", "exploratory"])
53
+ .describe(
54
+ "aggregate: ONLY use when the user explicitly asks to COUNT how many documents/items/tickets exist in the knowledge base (e.g. 'how many documents about X?', 'total number of tickets'). NEVER use for: real-world statistics stored in a document, intent statements, how-to questions, error/fault descriptions, configuration questions, or any query that does not explicitly ask for a count of knowledge base entries. When in doubt, choose targeted. " +
39
55
  "list: user wants to enumerate matching items/documents (show me all, list documents about). " +
40
56
  "targeted: use for almost everything — specific fact, answer, configuration, how-to, error/fault, feature/behavior question. Also use for intent statements and short commands describing a desired state (phrases that state what the user wants to do or achieve, even without an explicit question word). Real-world statistics stored in documents also go here. When in doubt, choose targeted over aggregate or exploratory. " +
41
57
  "exploratory: only for broad conceptual questions needing multi-source synthesis (what is the process for Z, explain how X works, general overview of topic Y).",
42
- ),
43
- language: z
44
- .string()
45
- .describe("ISO 639-3 language code of the query (e.g. eng, deu, fra)"),
46
- suggestedContextIds: z
47
- .array(z.string())
48
- .describe(
49
- "IDs of knowledge bases most likely to contain the answer. Return empty array to search all contexts.",
50
- ),
58
+ ),
59
+ language: z
60
+ .string()
61
+ .describe("ISO 639-3 language code of the query (e.g. eng, deu, fra)"),
62
+ suggestedContextIds: z
63
+ .array(z.enum(contexts.map((c) => c.id)))
64
+ .describe(
65
+ "IDs of knowledge bases most likely to contain the answer. Return empty array to search all contexts.",
66
+ ),
67
+ }),
51
68
  }),
52
- }),
53
- toolChoice: "none",
54
- system: `You are a query classifier for a multi-knowledge-base retrieval system.
55
- Classify the query and identify which knowledge bases are most relevant.
69
+ toolChoice: "none",
70
+ system: `You are a query classifier for a multi-knowledge-base retrieval system.
71
+ Classify the query and identify which knowledge bases are most relevant.
72
+
73
+ Available knowledge bases:
74
+ ${contextDescriptions}
75
+
76
+ Guidelines for queryType:
77
+ - Use "aggregate" ONLY when the query contains explicit counting language (e.g., "how many", "count", "total number", "wie viele"). Short statements, commands, or phrases without a question word are NEVER aggregate — classify them as targeted.
78
+ - When in doubt between aggregate and targeted: always choose targeted.
79
+
80
+ Guidelines for suggestedContextIds:
81
+ - Be conservative: only suggest contexts that are genuinely likely to contain the answer.
82
+ Aim for 2–3 focused suggestions rather than listing everything.
83
+ - Use each knowledge base's name and description (shown above) to judge relevance.
84
+ - Return an empty array only if you truly cannot determine which contexts are relevant.`,
85
+ prompt: `Query: ${query}`,
86
+ });
56
87
 
57
- Available knowledge bases:
58
- ${contextDescriptions}
88
+ return result.output as ClassificationResult;
89
+ }, 3)
59
90
 
60
- Guidelines for queryType:
61
- - Use "aggregate" ONLY when the query contains explicit counting language (e.g., "how many", "count", "total number", "wie viele"). Short statements, commands, or phrases without a question word are NEVER aggregate — classify them as targeted.
62
- - When in doubt between aggregate and targeted: always choose targeted.
63
-
64
- Guidelines for suggestedContextIds:
65
- - Be conservative: only suggest contexts that are genuinely likely to contain the answer.
66
- Aim for 2–3 focused suggestions rather than listing everything.
67
- - Use each knowledge base's name and description (shown above) to judge relevance.
68
- - Return an empty array only if you truly cannot determine which contexts are relevant.`,
69
- prompt: `Query: ${query}`,
70
- });
71
-
72
- return result.output as ClassificationResult;
91
+ return result;
73
92
  }
@@ -3,10 +3,19 @@ import { postgresClient } from "@SRC/postgres/client";
3
3
  import { applyAccessControl } from "@SRC/graphql/utilities/access-control";
4
4
  import { convertContextToTableDefinition } from "@SRC/graphql/utilities/convert-context-to-table-definition";
5
5
  import type { User } from "@EXULU_TYPES/models/user";
6
- import type { ContextSample } from "./types";
7
6
 
8
7
  const CACHE_TTL_MS = 60 * 60 * 1000; // 1 hour
9
8
 
9
+ export interface ContextSample {
10
+ contextId: string;
11
+ contextName: string;
12
+ /** All field names available on items (standard + custom) */
13
+ fields: string[];
14
+ /** Up to 2 example item records */
15
+ exampleItems: Array<Record<string, any>>;
16
+ sampledAt: number;
17
+ }
18
+
10
19
  /**
11
20
  * Pulls 1–2 example item records per context at agent initialization and caches
12
21
  * them in memory. These samples are injected into the classifier prompt so the