@exulu/backend 1.53.0 → 1.54.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,242 @@
1
+ import { generateText, stepCountIs, tool } from "ai";
2
+ import type { LanguageModel, Tool as AITool, ModelMessage } from "ai";
3
+ import { z } from "zod";
4
+ import { withRetry } from "@SRC/utils/with-retry";
5
+ import type { ExuluReranker } from "@SRC/exulu/reranker";
6
+ import type { AgenticRetrievalOutput, ChunkResult, ClassificationResult } from "./types";
7
+ import type { StrategyConfig } from "./strategies";
8
+ import { createDynamicTools } from "./dynamic-tools";
9
+
10
+ const FINISH_TOOL_NAME = "finish_retrieval";
11
+
12
+ const finishRetrievalTool = tool({
13
+ description:
14
+ "Call this tool when you have retrieved sufficient information and no further searches are needed. " +
15
+ "You MUST call this tool to signal that retrieval is complete — do not write a text conclusion.",
16
+ inputSchema: z.object({
17
+ reasoning: z.string().describe("One sentence explaining why retrieval is complete"),
18
+ }),
19
+ execute: async ({ reasoning }) => JSON.stringify({ finished: true, reasoning }),
20
+ });
21
+
22
+ function extractChunksFromToolResults(toolResults: any[]): ChunkResult[] {
23
+ const chunks: ChunkResult[] = [];
24
+ for (const result of toolResults ?? []) {
25
+ // AI SDK v6 uses `output` (not `result`) for tool result values
26
+ const rawOutput = result.output ?? result.result;
27
+ let parsed: any;
28
+ try {
29
+ parsed = typeof rawOutput === "string" ? JSON.parse(rawOutput) : rawOutput;
30
+ } catch {
31
+ continue;
32
+ }
33
+
34
+ if (Array.isArray(parsed)) {
35
+ for (const item of parsed) {
36
+ if (item?.item_id && item?.context) {
37
+ chunks.push({
38
+ item_name: item.item_name,
39
+ item_id: item.item_id,
40
+ context: item.context?.id ?? item.context,
41
+ chunk_id: item.chunk_id,
42
+ chunk_index: item.chunk_index,
43
+ chunk_content: item.chunk_content,
44
+ metadata: item.metadata,
45
+ });
46
+ }
47
+ }
48
+ }
49
+ }
50
+ return chunks;
51
+ }
52
+
53
+ /**
54
+ * Core agent loop: one generateText call per step.
55
+ *
56
+ * Unlike v2 (which split each step into a reasoning call + a separate tool
57
+ * execution call), here a single call with toolChoice: "auto" lets the model
58
+ * reason and call tools in one pass. The model sees tool results from the
59
+ * previous step via the conversation history (messages array).
60
+ *
61
+ * The loop stops when:
62
+ * - The model makes no tool calls (it's satisfied), OR
63
+ * - The strategy's stepBudget is exhausted
64
+ */
65
+ export async function* runAgentLoop(params: {
66
+ query: string;
67
+ strategy: StrategyConfig;
68
+ tools: Record<string, AITool>;
69
+ model: LanguageModel;
70
+ reranker?: ExuluReranker;
71
+ contextGuidance?: string;
72
+ customInstructions?: string;
73
+ classification: ClassificationResult;
74
+ onStepComplete?: (step: AgenticRetrievalOutput["steps"][0]) => void;
75
+ }): AsyncGenerator<AgenticRetrievalOutput> {
76
+ const { query, strategy, tools, model, reranker, contextGuidance, customInstructions, onStepComplete } = params;
77
+
78
+ const output: AgenticRetrievalOutput = {
79
+ steps: [],
80
+ reasoning: [],
81
+ chunks: [],
82
+ usage: [],
83
+ totalTokens: 0,
84
+ };
85
+
86
+ const messages: ModelMessage[] = [{ role: "user", content: query }];
87
+ let dynamicTools: Record<string, AITool> = {};
88
+ let forceDepthExploration = false;
89
+ let forceContextCoverage = false;
90
+
91
+ // Track which suggested contexts have been searched to enforce coverage
92
+ const suggestedContextIds = params.classification.suggestedContextIds ?? [];
93
+ const searchedContextIds = new Set<string>();
94
+
95
+ const baseSystemPrompt = [
96
+ strategy.instructions,
97
+ contextGuidance ? `\nCONTEXT GUIDANCE:\n${contextGuidance}` : "",
98
+ customInstructions ? `\nCUSTOM INSTRUCTIONS (override context guidance above where they conflict):\n${customInstructions}` : "",
99
+ ]
100
+ .filter(Boolean)
101
+ .join("\n");
102
+
103
+ const SEARCH_TOOL_NAMES = new Set([
104
+ "search_content",
105
+ "save_search_results",
106
+ "count_items_or_chunks",
107
+ "search_items_by_name",
108
+ ]);
109
+
110
+ for (let step = 0; step < strategy.stepBudget; step++) {
111
+ console.log(`[EXULU] v3 agent loop — step ${step + 1}/${strategy.stepBudget}`);
112
+
113
+ // Build dynamic system prompt: add unsearched-context note after the first step
114
+ const unsearchedNow = suggestedContextIds.filter((id) => !searchedContextIds.has(id));
115
+ const contextCoverageNote =
116
+ unsearchedNow.length > 0 && step > 0
117
+ ? `\n\n⚠️ MANDATORY: The following suggested contexts have NOT been searched yet: [${unsearchedNow.join(", ")}]. You MUST include ALL of them in your next search call. Note: support/ticket contexts use document names like "Ticket #XXXX" — do NOT use item_names when searching them.`
118
+ : "";
119
+ const stepSystemPrompt = baseSystemPrompt + contextCoverageNote;
120
+
121
+ let result: Awaited<ReturnType<typeof generateText>>;
122
+ try {
123
+ const stepTools = forceDepthExploration || forceContextCoverage
124
+ ? { ...tools, ...dynamicTools } // finish_retrieval withheld — model must search/explore more
125
+ : { ...tools, ...dynamicTools, [FINISH_TOOL_NAME]: finishRetrievalTool };
126
+
127
+ result = await withRetry(() =>
128
+ generateText({
129
+ model,
130
+ temperature: 0,
131
+ system: stepSystemPrompt,
132
+ messages,
133
+ tools: stepTools,
134
+ toolChoice: "required",
135
+ stopWhen: stepCountIs(1),
136
+ }),
137
+ );
138
+ } catch (err) {
139
+ console.error("[EXULU] v3 generateText failed:", err);
140
+ throw err;
141
+ }
142
+
143
+ // Carry conversation forward: assistant message + tool results go into history
144
+ // so the model sees them on the next iteration.
145
+ messages.push(...(result.response.messages as ModelMessage[]));
146
+
147
+ // Extract chunks from tool results
148
+ let stepChunks: any[] = extractChunksFromToolResults(result.toolResults as any[]);
149
+
150
+ // Check if any search_content call excluded content (triggers page-load dynamic tools)
151
+ // AI SDK v6 uses `input` (not `args`) for tool call arguments
152
+ const hadExcludedContent = (result.toolCalls as any[])?.some(
153
+ (tc) =>
154
+ (tc.toolName === "search_content" && tc.input?.includeContent === false) ||
155
+ tc.toolName === "search_items_by_name",
156
+ );
157
+
158
+ // Rerank if reranker is available
159
+ if (reranker && stepChunks.length > 0) {
160
+ console.log(`[EXULU] v3 reranking ${stepChunks.length} chunks with ${reranker.name}`);
161
+ stepChunks = await reranker.run(query, stepChunks as any);
162
+ }
163
+
164
+ // Create dynamic tools (browse adjacent pages, load specific pages)
165
+ const newDynamic = await createDynamicTools(stepChunks as ChunkResult[], hadExcludedContent);
166
+ Object.assign(dynamicTools, newDynamic);
167
+
168
+ // If relevant content was found but fewer than 5 chunks, withhold finish_retrieval
169
+ // on the next step to force depth exploration via dynamic tools.
170
+ // Only applies when dynamic tools exist and there's budget remaining for both
171
+ // a depth step and a finish step.
172
+ forceDepthExploration =
173
+ stepChunks.length > 0 &&
174
+ stepChunks.length < 5 &&
175
+ Object.keys(newDynamic).length > 0 &&
176
+ step < strategy.stepBudget - 2;
177
+
178
+ // Track which suggested contexts have been searched this step
179
+ for (const tc of (result.toolCalls as any[]) ?? []) {
180
+ if (SEARCH_TOOL_NAMES.has(tc.toolName)) {
181
+ for (const id of (tc.input?.knowledge_base_ids ?? [])) {
182
+ searchedContextIds.add(id);
183
+ }
184
+ }
185
+ }
186
+
187
+ // Withhold finish_retrieval on the next step if suggested contexts remain unsearched
188
+ const unsearchedAfterStep = suggestedContextIds.filter((id) => !searchedContextIds.has(id));
189
+ forceContextCoverage = unsearchedAfterStep.length > 0 && step < strategy.stepBudget - 1;
190
+ if (forceContextCoverage) {
191
+ console.log(
192
+ `[EXULU] v3 forceContextCoverage — unsearched suggested: [${unsearchedAfterStep.join(", ")}]`,
193
+ );
194
+ }
195
+
196
+ // Record step
197
+ const stepRecord = {
198
+ stepNumber: step + 1,
199
+ text: result.text ?? "",
200
+ toolCalls: (result.toolCalls as any[])?.map((tc) => ({
201
+ name: tc.toolName,
202
+ id: tc.toolCallId,
203
+ input: tc.input,
204
+ })) ?? [],
205
+ chunks: stepChunks,
206
+ dynamicToolsCreated: Object.keys(newDynamic),
207
+ tokens: result.usage?.totalTokens ?? 0,
208
+ };
209
+
210
+ output.steps.push(stepRecord);
211
+ output.reasoning.push({
212
+ text: result.text ?? "",
213
+ tools: (result.toolCalls as any[])?.map((tc) => ({
214
+ name: tc.toolName,
215
+ id: tc.toolCallId,
216
+ input: tc.input,
217
+ output: stepChunks,
218
+ })) ?? [],
219
+ });
220
+ output.chunks.push(...stepChunks);
221
+ output.usage.push(result.usage);
222
+
223
+ onStepComplete?.(stepRecord);
224
+
225
+ yield { ...output };
226
+
227
+ // Stop if the model called finish_retrieval AND no forced continuation is needed
228
+ const calledFinish = (result.toolCalls as any[])?.some(
229
+ (tc) => tc.toolName === FINISH_TOOL_NAME,
230
+ );
231
+ if (calledFinish && !forceContextCoverage) {
232
+ console.log(`[EXULU] v3 model called finish_retrieval after step ${step + 1}`);
233
+ break;
234
+ } else if (calledFinish && forceContextCoverage) {
235
+ console.log(
236
+ `[EXULU] v3 model called finish_retrieval but overriding — unsearched suggested contexts remain`,
237
+ );
238
+ }
239
+ }
240
+
241
+ output.totalTokens = output.usage.reduce((sum, u) => sum + (u?.totalTokens ?? 0), 0);
242
+ }
@@ -0,0 +1,73 @@
1
+ import { generateText, Output } from "ai";
2
+ import type { LanguageModel } from "ai";
3
+ import { z } from "zod";
4
+ import type { ExuluContext } from "@SRC/exulu/context";
5
+ import type { ClassificationResult, ContextSample } from "./types";
6
+
7
+ /**
8
+ * Classifies a query into one of four types and identifies which contexts are
9
+ * most relevant. This is a single fast LLM call that runs before the main
10
+ * agent loop, enabling strategy-based routing.
11
+ */
12
+ export async function classifyQuery(
13
+ query: string,
14
+ contexts: ExuluContext[],
15
+ samples: ContextSample[],
16
+ model: LanguageModel,
17
+ ): Promise<ClassificationResult> {
18
+ const contextDescriptions = contexts
19
+ .map((ctx) => {
20
+ const sample = samples.find((s) => s.contextId === ctx.id);
21
+ const fieldList = sample?.fields.join(", ") ?? "name, external_id";
22
+ const exampleStr =
23
+ sample?.exampleItems.length
24
+ ? `\n Example records: ${JSON.stringify(sample.exampleItems.slice(0, 2))}`
25
+ : "";
26
+ return ` - ${ctx.id}: ${ctx.name}\n Description: ${ctx.description}\n Fields: ${fieldList}${exampleStr}`;
27
+ })
28
+ .join("\n\n");
29
+
30
+ const result = await generateText({
31
+ model,
32
+ temperature: 0,
33
+ output: Output.object({
34
+ schema: z.object({
35
+ queryType: z
36
+ .enum(["aggregate", "list", "targeted", "exploratory"])
37
+ .describe(
38
+ "aggregate: ONLY use when the user explicitly asks to COUNT how many documents/items/tickets exist in the knowledge base (e.g. 'how many documents about X?', 'total number of tickets'). NEVER use for: real-world statistics stored in a document, intent statements, how-to questions, error/fault descriptions, configuration questions, or any query that does not explicitly ask for a count of knowledge base entries. When in doubt, choose targeted. " +
39
+ "list: user wants to enumerate matching items/documents (show me all, list documents about). " +
40
+ "targeted: use for almost everything — specific fact, answer, configuration, how-to, error/fault, feature/behavior question. Also use for intent statements and short commands describing a desired state (phrases that state what the user wants to do or achieve, even without an explicit question word). Real-world statistics stored in documents also go here. When in doubt, choose targeted over aggregate or exploratory. " +
41
+ "exploratory: only for broad conceptual questions needing multi-source synthesis (what is the process for Z, explain how X works, general overview of topic Y).",
42
+ ),
43
+ language: z
44
+ .string()
45
+ .describe("ISO 639-3 language code of the query (e.g. eng, deu, fra)"),
46
+ suggestedContextIds: z
47
+ .array(z.string())
48
+ .describe(
49
+ "IDs of knowledge bases most likely to contain the answer. Return empty array to search all contexts.",
50
+ ),
51
+ }),
52
+ }),
53
+ toolChoice: "none",
54
+ system: `You are a query classifier for a multi-knowledge-base retrieval system.
55
+ Classify the query and identify which knowledge bases are most relevant.
56
+
57
+ Available knowledge bases:
58
+ ${contextDescriptions}
59
+
60
+ Guidelines for queryType:
61
+ - Use "aggregate" ONLY when the query contains explicit counting language (e.g., "how many", "count", "total number", "wie viele"). Short statements, commands, or phrases without a question word are NEVER aggregate — classify them as targeted.
62
+ - When in doubt between aggregate and targeted: always choose targeted.
63
+
64
+ Guidelines for suggestedContextIds:
65
+ - Be conservative: only suggest contexts that are genuinely likely to contain the answer.
66
+ Aim for 2–3 focused suggestions rather than listing everything.
67
+ - Use each knowledge base's name and description (shown above) to judge relevance.
68
+ - Return an empty array only if you truly cannot determine which contexts are relevant.`,
69
+ prompt: `Query: ${query}`,
70
+ });
71
+
72
+ return result.output as ClassificationResult;
73
+ }
@@ -0,0 +1,70 @@
1
+ import { ExuluContext, getTableName } from "@SRC/exulu/context";
2
+ import { postgresClient } from "@SRC/postgres/client";
3
+ import { applyAccessControl } from "@SRC/graphql/utilities/access-control";
4
+ import { convertContextToTableDefinition } from "@SRC/graphql/utilities/convert-context-to-table-definition";
5
+ import type { User } from "@EXULU_TYPES/models/user";
6
+ import type { ContextSample } from "./types";
7
+
8
+ const CACHE_TTL_MS = 60 * 60 * 1000; // 1 hour
9
+
10
+ /**
11
+ * Pulls 1–2 example item records per context at agent initialization and caches
12
+ * them in memory. These samples are injected into the classifier prompt so the
13
+ * model understands what data is actually stored (not just field names).
14
+ */
15
+ export class ContextSampler {
16
+ private cache = new Map<string, ContextSample>();
17
+
18
+ async getSamples(
19
+ contexts: ExuluContext[],
20
+ user?: User,
21
+ role?: string,
22
+ ): Promise<ContextSample[]> {
23
+ return Promise.all(contexts.map((ctx) => this.getSample(ctx, user, role)));
24
+ }
25
+
26
+ private async getSample(
27
+ ctx: ExuluContext,
28
+ user?: User,
29
+ role?: string,
30
+ ): Promise<ContextSample> {
31
+ const cached = this.cache.get(ctx.id);
32
+ if (cached && Date.now() - cached.sampledAt < CACHE_TTL_MS) {
33
+ return cached;
34
+ }
35
+
36
+ const { db } = await postgresClient();
37
+ const tableName = getTableName(ctx.id);
38
+ const tableDefinition = convertContextToTableDefinition(ctx);
39
+
40
+ const customFieldNames = ctx.fields.map((f) => f.name);
41
+ const selectFields = ["id", "name", "external_id", ...customFieldNames];
42
+
43
+ let exampleItems: Record<string, any>[] = [];
44
+ try {
45
+ let query = db(tableName).select(selectFields).whereNull("archived").limit(2);
46
+ query = applyAccessControl(tableDefinition, query, user, tableName);
47
+ exampleItems = await query;
48
+ } catch {
49
+ // If table doesn't exist yet or column mismatch, return empty samples
50
+ }
51
+
52
+ const sample: ContextSample = {
53
+ contextId: ctx.id,
54
+ contextName: ctx.name,
55
+ fields: ["name", "external_id", ...customFieldNames],
56
+ exampleItems,
57
+ sampledAt: Date.now(),
58
+ };
59
+
60
+ this.cache.set(ctx.id, sample);
61
+
62
+ // Refresh in background after TTL without blocking the caller
63
+ return sample;
64
+ }
65
+
66
+ /** Evict a context from cache so it's re-sampled on next use */
67
+ invalidate(contextId: string): void {
68
+ this.cache.delete(contextId);
69
+ }
70
+ }
@@ -0,0 +1,115 @@
1
+ import { z } from "zod";
2
+ import { tool } from "ai";
3
+ import type { Tool as AITool } from "ai";
4
+ import { postgresClient } from "@SRC/postgres/client";
5
+ import { getChunksTableName } from "@SRC/exulu/context";
6
+ import { sanitizeToolName } from "@SRC/utils/sanitize-tool-name.ts";
7
+ import type { ChunkResult } from "./types";
8
+
9
+ /**
10
+ * Creates per-chunk navigation tools from the results of a search step.
11
+ *
12
+ * Two types of dynamic tools are created:
13
+ * - get_more_content_from_{item}: Browse adjacent chunks of a multi-chunk item
14
+ * - get_{item}_page_{n}_content: Load the full text of a specific page/chunk
15
+ * (created when includeContent was false in the original search)
16
+ */
17
+ export async function createDynamicTools(
18
+ chunks: ChunkResult[],
19
+ hadExcludedContent: boolean,
20
+ ): Promise<Record<string, AITool>> {
21
+ const { db } = await postgresClient();
22
+ const tools: Record<string, AITool> = {};
23
+ const seenItems = new Set<string>();
24
+
25
+ for (const chunk of chunks) {
26
+ if (!chunk.item_id || !chunk.context) continue;
27
+
28
+ // ── get_more_content_from_{item} ──────────────────────────
29
+ const browseToolName = sanitizeToolName(`get_more_content_from_${chunk.item_name}`);
30
+ if (!seenItems.has(chunk.item_id) && !tools[browseToolName]) {
31
+ seenItems.add(chunk.item_id);
32
+ const chunksTable = getChunksTableName(chunk.context);
33
+
34
+ try {
35
+ const countResult = await db(chunksTable)
36
+ .count("id as count")
37
+ .where("source", chunk.item_id)
38
+ .first();
39
+ const total = Number(countResult?.count ?? 0);
40
+
41
+ if (total > 1) {
42
+ const capturedChunk = chunk;
43
+ tools[browseToolName] = tool({
44
+ description: `"${chunk.item_name}" has ${total} pages/chunks. Use this to read a range of pages from it.`,
45
+ inputSchema: z.object({
46
+ from_index: z.number().min(1).default(1).describe("Starting chunk index (1-based)"),
47
+ to_index: z
48
+ .number()
49
+ .max(total)
50
+ .describe(`Ending chunk index (max ${total})`),
51
+ }),
52
+ execute: async ({ from_index, to_index }) => {
53
+ const { db: db2 } = await postgresClient();
54
+ const rows = await db2(chunksTable)
55
+ .select("*")
56
+ .where("source", capturedChunk.item_id)
57
+ .whereBetween("chunk_index", [from_index, to_index])
58
+ .orderBy("chunk_index", "asc");
59
+
60
+ return JSON.stringify(
61
+ rows.map((r) => ({
62
+ chunk_content: r.content,
63
+ chunk_index: r.chunk_index,
64
+ chunk_id: r.id,
65
+ item_id: capturedChunk.item_id,
66
+ item_name: capturedChunk.item_name,
67
+ context: capturedChunk.context,
68
+ })),
69
+ );
70
+ },
71
+ });
72
+ }
73
+ } catch {
74
+ // Skip if table not accessible
75
+ }
76
+ }
77
+
78
+ // ── get_{item}_page_{n}_content ───────────────────────────
79
+ if (hadExcludedContent && chunk.chunk_id) {
80
+ const pageToolName = sanitizeToolName(
81
+ `get_${chunk.item_name}_page_${chunk.chunk_index}_content`,
82
+ );
83
+ if (!tools[pageToolName]) {
84
+ const capturedChunk = chunk;
85
+ tools[pageToolName] = tool({
86
+ description: `Load the full text of page ${chunk.chunk_index} from "${chunk.item_name}"`,
87
+ inputSchema: z.object({
88
+ reasoning: z.string().describe("Why you need this specific page's content"),
89
+ }),
90
+ execute: async () => {
91
+ const { db: db2 } = await postgresClient();
92
+ const chunksTable = getChunksTableName(capturedChunk.context!);
93
+ const rows = await db2(chunksTable)
94
+ .select("*")
95
+ .where("id", capturedChunk.chunk_id!)
96
+ .limit(1);
97
+
98
+ if (!rows[0]) return JSON.stringify({ error: "Chunk not found" });
99
+
100
+ return JSON.stringify({
101
+ chunk_content: rows[0].content,
102
+ chunk_index: rows[0].chunk_index,
103
+ chunk_id: rows[0].id,
104
+ item_id: capturedChunk.item_id,
105
+ item_name: capturedChunk.item_name,
106
+ context: capturedChunk.context ?? "",
107
+ });
108
+ },
109
+ });
110
+ }
111
+ }
112
+ }
113
+
114
+ return tools;
115
+ }