@exulu/backend 1.54.0 → 1.56.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +2275 -1330
- package/dist/index.d.cts +8 -30
- package/dist/index.d.ts +8 -30
- package/dist/index.js +2256 -1306
- package/ee/agentic-retrieval/v3/agent-loop.ts +49 -3
- package/ee/agentic-retrieval/v3/classifier.ts +61 -42
- package/ee/agentic-retrieval/v3/context-sampler.ts +10 -1
- package/ee/agentic-retrieval/v3/index.ts +211 -35
- package/ee/agentic-retrieval/v3/session-tools-registry.ts +20 -0
- package/ee/agentic-retrieval/v3/strategies.ts +28 -24
- package/ee/agentic-retrieval/v3/tools.ts +236 -113
- package/ee/agentic-retrieval/v3/trajectory.ts +227 -14
- package/ee/agentic-retrieval/v4/agent-loop.ts +142 -55
- package/ee/agentic-retrieval/v4/context-sampler.ts +79 -0
- package/ee/agentic-retrieval/v4/index.ts +673 -164
- package/ee/agentic-retrieval/v4/types.ts +33 -4
- package/ee/invoke-skills/create-sandbox.ts +119 -0
- package/ee/python/documents/processing/doc_processor.ts +106 -14
- package/package.json +4 -2
- package/ee/agentic-retrieval/ANALYSIS.md +0 -658
- package/ee/agentic-retrieval/index.ts +0 -1109
- package/ee/agentic-retrieval/logs/README.md +0 -198
- package/ee/agentic-retrieval/v2.ts +0 -1628
- package/ee/agentic-retrieval/v4/embed-preprocessor.ts +0 -76
- package/ee/agentic-retrieval/v4/system-prompt.ts +0 -248
- package/ee/agentic-retrieval/v4/tools.ts +0 -241
|
@@ -1,20 +1,31 @@
|
|
|
1
1
|
import * as fs from "fs/promises";
|
|
2
2
|
import * as path from "path";
|
|
3
|
-
import type { AgenticRetrievalOutput, ClassificationResult } from "./types";
|
|
3
|
+
import type { AgenticRetrievalOutput, ClassificationResult, ChunkResult } from "./types";
|
|
4
4
|
|
|
5
|
-
/**
|
|
6
|
-
* Module-level registry so external callers (e.g. test scripts) can read
|
|
7
|
-
* the path of the most recently saved trajectory file.
|
|
8
|
-
* Works because both the trajectory logger and the test run in the same process.
|
|
9
|
-
*/
|
|
10
5
|
export const trajectoryRegistry = {
|
|
11
6
|
lastFile: undefined as string | undefined,
|
|
12
7
|
};
|
|
13
8
|
|
|
9
|
+
export interface TrajectoryStepData {
|
|
10
|
+
stepNumber: number;
|
|
11
|
+
systemPrompt: string;
|
|
12
|
+
text: string;
|
|
13
|
+
toolCalls: Array<{
|
|
14
|
+
name: string;
|
|
15
|
+
id: string;
|
|
16
|
+
input: any;
|
|
17
|
+
output?: any;
|
|
18
|
+
}>;
|
|
19
|
+
chunks: ChunkResult[];
|
|
20
|
+
dynamicToolsCreated: string[];
|
|
21
|
+
tokens: number;
|
|
22
|
+
}
|
|
23
|
+
|
|
14
24
|
interface TrajectoryData {
|
|
15
25
|
timestamp: string;
|
|
16
26
|
query: string;
|
|
17
27
|
classification: ClassificationResult;
|
|
28
|
+
preselectedItemIds?: string[];
|
|
18
29
|
steps: {
|
|
19
30
|
step_number: number;
|
|
20
31
|
text: string;
|
|
@@ -35,6 +46,7 @@ interface TrajectoryData {
|
|
|
35
46
|
|
|
36
47
|
export class TrajectoryLogger {
|
|
37
48
|
private data: TrajectoryData;
|
|
49
|
+
private richSteps: TrajectoryStepData[] = [];
|
|
38
50
|
private startTime = Date.now();
|
|
39
51
|
private logDir: string;
|
|
40
52
|
|
|
@@ -42,12 +54,14 @@ export class TrajectoryLogger {
|
|
|
42
54
|
query: string,
|
|
43
55
|
classification: ClassificationResult,
|
|
44
56
|
logDir = path.join(process.cwd(), "ee/agentic-retrieval/logs"),
|
|
57
|
+
preselectedItemIds?: string[],
|
|
45
58
|
) {
|
|
46
59
|
this.logDir = logDir;
|
|
47
60
|
this.data = {
|
|
48
61
|
timestamp: new Date().toISOString(),
|
|
49
62
|
query,
|
|
50
63
|
classification,
|
|
64
|
+
preselectedItemIds: preselectedItemIds?.length ? preselectedItemIds : undefined,
|
|
51
65
|
steps: [],
|
|
52
66
|
final: {
|
|
53
67
|
total_chunks: 0,
|
|
@@ -70,24 +84,223 @@ export class TrajectoryLogger {
|
|
|
70
84
|
});
|
|
71
85
|
}
|
|
72
86
|
|
|
73
|
-
|
|
87
|
+
recordRichStep(data: TrajectoryStepData): void {
|
|
88
|
+
this.richSteps.push(data);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
private toMarkdown(durationMs: number, success: boolean, error?: Error): string {
|
|
92
|
+
const totalTokens = this.richSteps.reduce((sum, s) => sum + s.tokens, 0);
|
|
93
|
+
const totalChunks = this.richSteps.reduce((sum, s) => sum + s.chunks.length, 0);
|
|
94
|
+
const status = success ? "✓ Success" : `✗ Failed${error ? `: ${error.message}` : ""}`;
|
|
95
|
+
const lines: string[] = [];
|
|
96
|
+
|
|
97
|
+
// ── Header ──────────────────────────────────────────────────────────────
|
|
98
|
+
lines.push(`# Agentic Retrieval — ${this.data.timestamp}`);
|
|
99
|
+
lines.push("");
|
|
100
|
+
lines.push(`**Query:** ${this.data.query} `);
|
|
101
|
+
lines.push(
|
|
102
|
+
`**Duration:** ${(durationMs / 1000).toFixed(1)}s | **Tokens:** ${totalTokens} | **Status:** ${status}`,
|
|
103
|
+
);
|
|
104
|
+
lines.push("");
|
|
105
|
+
|
|
106
|
+
// ── Classification ───────────────────────────────────────────────────────
|
|
107
|
+
lines.push("## Classification");
|
|
108
|
+
lines.push("");
|
|
109
|
+
lines.push(`- **Type:** \`${this.data.classification.queryType}\``);
|
|
110
|
+
lines.push(`- **Language:** \`${this.data.classification.language}\``);
|
|
111
|
+
const suggested = this.data.classification.suggestedContextIds;
|
|
112
|
+
lines.push(
|
|
113
|
+
`- **Suggested contexts:** ${suggested.length > 0 ? suggested.map((id) => `\`${id}\``).join(", ") : "*(all)*"}`,
|
|
114
|
+
);
|
|
115
|
+
if (this.data.preselectedItemIds?.length) {
|
|
116
|
+
lines.push(
|
|
117
|
+
`- **Preselected item IDs:** ${this.data.preselectedItemIds.map((id) => `\`${id}\``).join(", ")}`,
|
|
118
|
+
);
|
|
119
|
+
}
|
|
120
|
+
lines.push("");
|
|
121
|
+
lines.push("---");
|
|
122
|
+
lines.push("");
|
|
123
|
+
|
|
124
|
+
// ── System prompt (from step 1, collapsed) ───────────────────────────────
|
|
125
|
+
const firstStep = this.richSteps[0];
|
|
126
|
+
if (firstStep) {
|
|
127
|
+
lines.push("## System Prompt");
|
|
128
|
+
lines.push("");
|
|
129
|
+
lines.push("<details>");
|
|
130
|
+
lines.push("<summary>View system prompt</summary>");
|
|
131
|
+
lines.push("");
|
|
132
|
+
lines.push("```");
|
|
133
|
+
lines.push(firstStep.systemPrompt);
|
|
134
|
+
lines.push("```");
|
|
135
|
+
lines.push("");
|
|
136
|
+
lines.push("</details>");
|
|
137
|
+
lines.push("");
|
|
138
|
+
lines.push("---");
|
|
139
|
+
lines.push("");
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// ── Steps ────────────────────────────────────────────────────────────────
|
|
143
|
+
for (const step of this.richSteps) {
|
|
144
|
+
const toolLabel =
|
|
145
|
+
step.toolCalls.map((tc) => `\`${tc.name}\``).join(", ") || "*(no tool calls)*";
|
|
146
|
+
lines.push(`## Step ${step.stepNumber} — ${toolLabel}`);
|
|
147
|
+
lines.push("");
|
|
148
|
+
const dynLabel =
|
|
149
|
+
step.dynamicToolsCreated.length > 0
|
|
150
|
+
? step.dynamicToolsCreated.map((t) => `\`${t}\``).join(", ")
|
|
151
|
+
: "none";
|
|
152
|
+
lines.push(
|
|
153
|
+
`**Tokens:** ${step.tokens} | **Chunks retrieved:** ${step.chunks.length} | **Dynamic tools created:** ${dynLabel}`,
|
|
154
|
+
);
|
|
155
|
+
lines.push("");
|
|
156
|
+
|
|
157
|
+
// Reasoning
|
|
158
|
+
if (step.text) {
|
|
159
|
+
lines.push("### Reasoning");
|
|
160
|
+
lines.push("");
|
|
161
|
+
lines.push(step.text);
|
|
162
|
+
lines.push("");
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// Tool calls
|
|
166
|
+
if (step.toolCalls.length > 0) {
|
|
167
|
+
lines.push("### Tool Calls");
|
|
168
|
+
lines.push("");
|
|
169
|
+
for (const [i, tc] of step.toolCalls.entries()) {
|
|
170
|
+
lines.push(`#### ${i + 1}. \`${tc.name}\``);
|
|
171
|
+
lines.push("");
|
|
172
|
+
lines.push("**Input:**");
|
|
173
|
+
lines.push("```json");
|
|
174
|
+
lines.push(JSON.stringify(tc.input, null, 2));
|
|
175
|
+
lines.push("```");
|
|
176
|
+
lines.push("");
|
|
177
|
+
|
|
178
|
+
if (tc.output !== undefined) {
|
|
179
|
+
let parsedOutput: any;
|
|
180
|
+
try {
|
|
181
|
+
parsedOutput =
|
|
182
|
+
typeof tc.output === "string" ? JSON.parse(tc.output) : tc.output;
|
|
183
|
+
} catch {
|
|
184
|
+
parsedOutput = tc.output;
|
|
185
|
+
}
|
|
186
|
+
const outputStr = JSON.stringify(parsedOutput, null, 2);
|
|
187
|
+
const truncated = outputStr.length > 2000;
|
|
188
|
+
lines.push("**Output:**");
|
|
189
|
+
lines.push("```json");
|
|
190
|
+
lines.push(truncated ? `${outputStr.slice(0, 2000)}\n… (truncated)` : outputStr);
|
|
191
|
+
lines.push("```");
|
|
192
|
+
lines.push("");
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
// Chunks table
|
|
198
|
+
if (step.chunks.length > 0) {
|
|
199
|
+
lines.push("### Chunks Retrieved");
|
|
200
|
+
lines.push("");
|
|
201
|
+
lines.push("| # | Item | Context | Chunk | Score |");
|
|
202
|
+
lines.push("|---|------|---------|-------|-------|");
|
|
203
|
+
for (const [i, c] of step.chunks.entries()) {
|
|
204
|
+
const score =
|
|
205
|
+
c.metadata?.hybrid_score ??
|
|
206
|
+
c.metadata?.cosine_distance ??
|
|
207
|
+
c.metadata?.fts_rank ??
|
|
208
|
+
"—";
|
|
209
|
+
const scoreStr = typeof score === "number" ? score.toFixed(4) : String(score);
|
|
210
|
+
lines.push(
|
|
211
|
+
`| ${i + 1} | ${c.item_name ?? "—"} | \`${c.context}\` | ${c.chunk_index ?? "—"} | ${scoreStr} |`,
|
|
212
|
+
);
|
|
213
|
+
}
|
|
214
|
+
lines.push("");
|
|
215
|
+
|
|
216
|
+
const withContent = step.chunks.filter((c) => c.chunk_content);
|
|
217
|
+
if (withContent.length > 0) {
|
|
218
|
+
lines.push("<details>");
|
|
219
|
+
lines.push("<summary>View chunk content</summary>");
|
|
220
|
+
lines.push("");
|
|
221
|
+
for (const c of withContent) {
|
|
222
|
+
lines.push(`**${c.item_name} (chunk ${c.chunk_index}):**`);
|
|
223
|
+
lines.push("");
|
|
224
|
+
const content = (c.chunk_content ?? "").trim();
|
|
225
|
+
lines.push(`> ${content.split("\n").join("\n> ")}`);
|
|
226
|
+
lines.push("");
|
|
227
|
+
}
|
|
228
|
+
lines.push("</details>");
|
|
229
|
+
lines.push("");
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
// Per-step system prompt addendum (only when it differs from step 1)
|
|
234
|
+
if (firstStep && step.stepNumber > 1 && step.systemPrompt !== firstStep.systemPrompt) {
|
|
235
|
+
const addendum = step.systemPrompt.slice(firstStep.systemPrompt.length).trim();
|
|
236
|
+
if (addendum) {
|
|
237
|
+
lines.push("<details>");
|
|
238
|
+
lines.push("<summary>System prompt addendum (this step only)</summary>");
|
|
239
|
+
lines.push("");
|
|
240
|
+
lines.push("```");
|
|
241
|
+
lines.push(addendum);
|
|
242
|
+
lines.push("```");
|
|
243
|
+
lines.push("");
|
|
244
|
+
lines.push("</details>");
|
|
245
|
+
lines.push("");
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
lines.push("---");
|
|
250
|
+
lines.push("");
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// ── Summary ──────────────────────────────────────────────────────────────
|
|
254
|
+
lines.push("## Summary");
|
|
255
|
+
lines.push("");
|
|
256
|
+
lines.push("| Metric | Value |");
|
|
257
|
+
lines.push("|--------|-------|");
|
|
258
|
+
lines.push(`| Steps | ${this.richSteps.length} |`);
|
|
259
|
+
lines.push(`| Total chunks | ${totalChunks} |`);
|
|
260
|
+
lines.push(`| Total tokens | ${totalTokens} |`);
|
|
261
|
+
lines.push(`| Duration | ${(durationMs / 1000).toFixed(1)}s |`);
|
|
262
|
+
lines.push(`| Status | ${status} |`);
|
|
263
|
+
if (error) {
|
|
264
|
+
lines.push(`| Error | ${error.message} |`);
|
|
265
|
+
}
|
|
266
|
+
lines.push("");
|
|
267
|
+
|
|
268
|
+
return lines.join("\n");
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
async finalize(
|
|
272
|
+
output: AgenticRetrievalOutput,
|
|
273
|
+
success: boolean,
|
|
274
|
+
error?: Error,
|
|
275
|
+
writeFiles = false,
|
|
276
|
+
): Promise<string | undefined> {
|
|
277
|
+
const durationMs = Date.now() - this.startTime;
|
|
278
|
+
|
|
74
279
|
this.data.final = {
|
|
75
280
|
total_chunks: output.chunks.length,
|
|
76
281
|
total_steps: output.steps.length,
|
|
77
282
|
total_tokens: output.totalTokens,
|
|
78
|
-
duration_ms:
|
|
283
|
+
duration_ms: durationMs,
|
|
79
284
|
success,
|
|
80
285
|
error: error?.message,
|
|
81
286
|
};
|
|
82
287
|
|
|
288
|
+
if (!writeFiles) return undefined;
|
|
289
|
+
|
|
83
290
|
try {
|
|
84
291
|
await fs.mkdir(this.logDir, { recursive: true });
|
|
85
|
-
const
|
|
86
|
-
const
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
292
|
+
const ts = Date.now();
|
|
293
|
+
const jsonPath = path.join(this.logDir, `trajectory_${ts}.json`);
|
|
294
|
+
const mdPath = path.join(this.logDir, `trajectory_${ts}.md`);
|
|
295
|
+
|
|
296
|
+
await Promise.all([
|
|
297
|
+
fs.writeFile(jsonPath, JSON.stringify(this.data, null, 2), "utf-8"),
|
|
298
|
+
fs.writeFile(mdPath, this.toMarkdown(durationMs, success, error), "utf-8"),
|
|
299
|
+
]);
|
|
300
|
+
|
|
301
|
+
console.log(`[EXULU] v3 trajectory saved: trajectory_${ts}.json + trajectory_${ts}.md`);
|
|
302
|
+
trajectoryRegistry.lastFile = jsonPath;
|
|
303
|
+
return jsonPath;
|
|
91
304
|
} catch (e) {
|
|
92
305
|
console.error("[EXULU] v3 failed to write trajectory:", e);
|
|
93
306
|
return undefined;
|
|
@@ -1,32 +1,82 @@
|
|
|
1
|
-
import { generateText, stepCountIs } from "ai";
|
|
1
|
+
import { generateText, stepCountIs, tool } from "ai";
|
|
2
2
|
import type { LanguageModel, Tool as AITool, ModelMessage } from "ai";
|
|
3
|
+
import { z } from "zod";
|
|
3
4
|
import { withRetry } from "@SRC/utils/with-retry";
|
|
4
|
-
import {
|
|
5
|
+
import type { ExuluReranker } from "@SRC/exulu/reranker";
|
|
5
6
|
import type { AgenticRetrievalOutput, ChunkResult } from "./types";
|
|
7
|
+
import { DEFAULT_MAX_STEPS, type AgenticRetrievalLog, type ContextRetrievalConfig } from ".";
|
|
6
8
|
|
|
7
|
-
const
|
|
9
|
+
const FINISH_TOOL_NAME = "finish_retrieval";
|
|
10
|
+
|
|
11
|
+
const finishRetrievalTool = tool({
|
|
12
|
+
description:
|
|
13
|
+
"Call this tool when you have retrieved sufficient information and no further searches are needed. " +
|
|
14
|
+
"You MUST call this tool to signal that retrieval is complete — do not write a text conclusion.",
|
|
15
|
+
inputSchema: z.object({
|
|
16
|
+
reasoning: z.string().describe("One sentence explaining why retrieval is complete"),
|
|
17
|
+
}),
|
|
18
|
+
execute: async ({ reasoning }) => JSON.stringify({ finished: true, reasoning }),
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
function extractChunksFromToolResults(toolResults: any[]): ChunkResult[] {
|
|
22
|
+
const chunks: ChunkResult[] = [];
|
|
23
|
+
for (const result of toolResults ?? []) {
|
|
24
|
+
// AI SDK v6 uses `output` (not `result`) for tool result values
|
|
25
|
+
const rawOutput = result.output ?? result.result;
|
|
26
|
+
let parsed: any;
|
|
27
|
+
try {
|
|
28
|
+
parsed = typeof rawOutput === "string" ? JSON.parse(rawOutput) : rawOutput;
|
|
29
|
+
} catch {
|
|
30
|
+
continue;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
if (Array.isArray(parsed)) {
|
|
34
|
+
for (const item of parsed) {
|
|
35
|
+
if (item?.item_id && item?.context) {
|
|
36
|
+
chunks.push({
|
|
37
|
+
item_name: item.item_name,
|
|
38
|
+
item_id: item.item_id,
|
|
39
|
+
context: item.context?.id ?? item.context,
|
|
40
|
+
chunk_id: item.chunk_id,
|
|
41
|
+
chunk_index: item.chunk_index,
|
|
42
|
+
chunk_content: item.chunk_content,
|
|
43
|
+
metadata: item.metadata,
|
|
44
|
+
});
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
return chunks;
|
|
50
|
+
}
|
|
8
51
|
|
|
9
52
|
/**
|
|
10
|
-
*
|
|
53
|
+
* Core agent loop: one generateText call per step.
|
|
11
54
|
*
|
|
12
|
-
* Unlike
|
|
13
|
-
*
|
|
14
|
-
*
|
|
15
|
-
*
|
|
16
|
-
* 3. Harvests any chunk-shaped rows from query results
|
|
17
|
-
* 4. Repeats until the model produces a text response (no tool calls) or
|
|
18
|
-
* the MAX_STEPS budget is exhausted
|
|
55
|
+
* Unlike v2 (which split each step into a reasoning call + a separate tool
|
|
56
|
+
* execution call), here a single call with toolChoice: "auto" lets the model
|
|
57
|
+
* reason and call tools in one pass. The model sees tool results from the
|
|
58
|
+
* previous step via the conversation history (messages array).
|
|
19
59
|
*
|
|
20
|
-
* The
|
|
60
|
+
* The loop stops when:
|
|
61
|
+
* - The model makes no tool calls (it's satisfied), OR
|
|
62
|
+
* - The strategy's stepBudget is exhausted
|
|
21
63
|
*/
|
|
22
64
|
export async function* runAgentLoop(params: {
|
|
23
|
-
|
|
24
|
-
|
|
65
|
+
config: ContextRetrievalConfig;
|
|
66
|
+
userQuery: string;
|
|
67
|
+
log: AgenticRetrievalLog;
|
|
68
|
+
todos: {
|
|
69
|
+
status: "planned" | "completed";
|
|
70
|
+
description: string;
|
|
71
|
+
current: boolean;
|
|
72
|
+
}[];
|
|
25
73
|
tools: Record<string, AITool>;
|
|
26
74
|
model: LanguageModel;
|
|
75
|
+
reranker?: ExuluReranker;
|
|
76
|
+
sessionID?: string;
|
|
27
77
|
onStepComplete?: (step: AgenticRetrievalOutput["steps"][0]) => void;
|
|
28
78
|
}): AsyncGenerator<AgenticRetrievalOutput> {
|
|
29
|
-
const {
|
|
79
|
+
const { userQuery, tools, model, reranker, sessionID, onStepComplete, config, log, todos } = params;
|
|
30
80
|
|
|
31
81
|
const output: AgenticRetrievalOutput = {
|
|
32
82
|
steps: [],
|
|
@@ -36,83 +86,120 @@ export async function* runAgentLoop(params: {
|
|
|
36
86
|
totalTokens: 0,
|
|
37
87
|
};
|
|
38
88
|
|
|
39
|
-
|
|
40
|
-
|
|
89
|
+
const messages: ModelMessage[] = [{ role: "user", content: userQuery }];
|
|
90
|
+
|
|
91
|
+
const stepBudget = config.maxSteps || DEFAULT_MAX_STEPS
|
|
92
|
+
|
|
93
|
+
const SYSTEM_PROMPT = `
|
|
94
|
+
You are a helpful assistant that can search the knowledge base and retrieve information.
|
|
95
|
+
|
|
96
|
+
You are searching for information that is relevant to the following question:
|
|
97
|
+
<user_query>
|
|
98
|
+
${userQuery}
|
|
99
|
+
</user_query>
|
|
41
100
|
|
|
42
|
-
|
|
101
|
+
You have the following instructions for this knowledge base:
|
|
102
|
+
<instructions>
|
|
103
|
+
${config.instructions}
|
|
104
|
+
</instructions>
|
|
43
105
|
|
|
44
|
-
|
|
45
|
-
|
|
106
|
+
A first search strategy was drafted as a todo list:
|
|
107
|
+
<todo_list>
|
|
108
|
+
${todos.map((todo, index) => `${index + 1}. ${todo.status} - ${todo.description}`).join("\n")}
|
|
109
|
+
</todo_list>
|
|
110
|
+
|
|
111
|
+
`;
|
|
112
|
+
|
|
113
|
+
for (let step = 0; step < stepBudget; step++) {
|
|
114
|
+
|
|
115
|
+
log.entries.push({
|
|
116
|
+
label: "Agent loop step",
|
|
117
|
+
timestamp: new Date().toISOString(),
|
|
118
|
+
message: `[EXULU] v3 agent loop — step ${step + 1}/${stepBudget}`,
|
|
119
|
+
});
|
|
46
120
|
|
|
47
121
|
let result: Awaited<ReturnType<typeof generateText>>;
|
|
122
|
+
|
|
123
|
+
const stepTools = { ...tools, [FINISH_TOOL_NAME]: finishRetrievalTool };
|
|
124
|
+
|
|
48
125
|
try {
|
|
49
126
|
result = await withRetry(() =>
|
|
50
127
|
generateText({
|
|
51
128
|
model,
|
|
52
129
|
temperature: 0,
|
|
53
|
-
system:
|
|
130
|
+
system: SYSTEM_PROMPT,
|
|
54
131
|
messages,
|
|
55
|
-
tools,
|
|
56
|
-
toolChoice: "
|
|
132
|
+
tools: stepTools,
|
|
133
|
+
toolChoice: "required",
|
|
57
134
|
stopWhen: stepCountIs(1),
|
|
58
135
|
}),
|
|
59
136
|
);
|
|
60
137
|
} catch (err) {
|
|
61
|
-
console.error("[EXULU]
|
|
138
|
+
console.error("[EXULU] v3 generateText failed:", err);
|
|
62
139
|
throw err;
|
|
63
140
|
}
|
|
64
141
|
|
|
65
|
-
//
|
|
142
|
+
// Carry conversation forward: assistant message + tool results go into history
|
|
143
|
+
// so the model sees them on the next iteration.
|
|
66
144
|
messages.push(...(result.response.messages as ModelMessage[]));
|
|
67
145
|
|
|
68
|
-
//
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
146
|
+
// Extract chunks from tool results
|
|
147
|
+
let stepChunks: any[] = extractChunksFromToolResults(result.toolResults as any[]);
|
|
148
|
+
|
|
149
|
+
// Deduplicate by chunk_id within this step (parallel tool calls can return the same chunk
|
|
150
|
+
// if the agent searches the same context twice, or the same chunk is indexed in two contexts).
|
|
151
|
+
const seenChunkIds = new Set<string>();
|
|
152
|
+
stepChunks = stepChunks.filter((c) => {
|
|
153
|
+
if (!c.chunk_id) return true;
|
|
154
|
+
if (seenChunkIds.has(c.chunk_id)) return false;
|
|
155
|
+
seenChunkIds.add(c.chunk_id);
|
|
156
|
+
return true;
|
|
157
|
+
});
|
|
77
158
|
|
|
78
159
|
// Record step
|
|
79
|
-
const stepRecord
|
|
160
|
+
const stepRecord = {
|
|
80
161
|
stepNumber: step + 1,
|
|
81
162
|
text: result.text ?? "",
|
|
82
|
-
toolCalls:
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
})) ?? [],
|
|
163
|
+
toolCalls: (result.toolCalls as any[])?.map((tc) => ({
|
|
164
|
+
name: tc.toolName,
|
|
165
|
+
id: tc.toolCallId,
|
|
166
|
+
input: tc.input,
|
|
167
|
+
})) ?? [],
|
|
88
168
|
chunks: stepChunks,
|
|
89
169
|
tokens: result.usage?.totalTokens ?? 0,
|
|
90
170
|
};
|
|
91
171
|
|
|
172
|
+
log.entries.push({
|
|
173
|
+
label: "Step completed",
|
|
174
|
+
timestamp: new Date().toISOString(),
|
|
175
|
+
message: JSON.stringify(stepRecord),
|
|
176
|
+
});
|
|
177
|
+
|
|
92
178
|
output.steps.push(stepRecord);
|
|
93
179
|
output.reasoning.push({
|
|
94
180
|
text: result.text ?? "",
|
|
95
|
-
tools:
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
(r: any) => (r.toolCallId ?? r.id) === tc.toolCallId,
|
|
102
|
-
)?.output,
|
|
103
|
-
})) ?? [],
|
|
181
|
+
tools: (result.toolCalls as any[])?.map((tc) => ({
|
|
182
|
+
name: tc.toolName,
|
|
183
|
+
id: tc.toolCallId,
|
|
184
|
+
input: tc.input,
|
|
185
|
+
output: stepChunks,
|
|
186
|
+
})) ?? [],
|
|
104
187
|
});
|
|
105
|
-
|
|
188
|
+
// Deduplicate against chunks already accumulated from prior steps
|
|
189
|
+
const existingChunkIds = new Set(output.chunks.map((c) => c.chunk_id).filter(Boolean));
|
|
190
|
+
output.chunks.push(...stepChunks.filter((c) => !c.chunk_id || !existingChunkIds.has(c.chunk_id)));
|
|
106
191
|
output.usage.push(result.usage);
|
|
107
192
|
|
|
108
193
|
onStepComplete?.(stepRecord);
|
|
109
194
|
|
|
110
195
|
yield { ...output };
|
|
111
196
|
|
|
112
|
-
// Stop
|
|
113
|
-
const
|
|
114
|
-
|
|
115
|
-
|
|
197
|
+
// Stop if the model called finish_retrieval AND no forced continuation is needed
|
|
198
|
+
const calledFinish = (result.toolCalls as any[])?.some(
|
|
199
|
+
(tc) => tc.toolName === FINISH_TOOL_NAME,
|
|
200
|
+
);
|
|
201
|
+
if (calledFinish) {
|
|
202
|
+
console.log(`[EXULU] v3 model called finish_retrieval after step ${step + 1}`);
|
|
116
203
|
break;
|
|
117
204
|
}
|
|
118
205
|
}
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import { ExuluContext, getTableName } from "@SRC/exulu/context";
|
|
2
|
+
import { postgresClient } from "@SRC/postgres/client";
|
|
3
|
+
import { applyAccessControl } from "@SRC/graphql/utilities/access-control";
|
|
4
|
+
import { convertContextToTableDefinition } from "@SRC/graphql/utilities/convert-context-to-table-definition";
|
|
5
|
+
import type { User } from "@EXULU_TYPES/models/user";
|
|
6
|
+
|
|
7
|
+
const CACHE_TTL_MS = 60 * 60 * 1000; // 1 hour
|
|
8
|
+
|
|
9
|
+
export interface ContextSample {
|
|
10
|
+
contextId: string;
|
|
11
|
+
contextName: string;
|
|
12
|
+
/** All field names available on items (standard + custom) */
|
|
13
|
+
fields: string[];
|
|
14
|
+
/** Up to 2 example item records */
|
|
15
|
+
exampleItems: Array<Record<string, any>>;
|
|
16
|
+
sampledAt: number;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Pulls 1–2 example item records per context at agent initialization and caches
|
|
21
|
+
* them in memory. These samples are injected into the classifier prompt so the
|
|
22
|
+
* model understands what data is actually stored (not just field names).
|
|
23
|
+
*/
|
|
24
|
+
export class ContextSampler {
|
|
25
|
+
private cache = new Map<string, ContextSample>();
|
|
26
|
+
|
|
27
|
+
async getSamples(
|
|
28
|
+
contexts: ExuluContext[],
|
|
29
|
+
user?: User,
|
|
30
|
+
role?: string,
|
|
31
|
+
): Promise<ContextSample[]> {
|
|
32
|
+
return Promise.all(contexts.map((ctx) => this.getSample(ctx, user, role)));
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
private async getSample(
|
|
36
|
+
ctx: ExuluContext,
|
|
37
|
+
user?: User,
|
|
38
|
+
role?: string,
|
|
39
|
+
): Promise<ContextSample> {
|
|
40
|
+
const cached = this.cache.get(ctx.id);
|
|
41
|
+
if (cached && Date.now() - cached.sampledAt < CACHE_TTL_MS) {
|
|
42
|
+
return cached;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const { db } = await postgresClient();
|
|
46
|
+
const tableName = getTableName(ctx.id);
|
|
47
|
+
const tableDefinition = convertContextToTableDefinition(ctx);
|
|
48
|
+
|
|
49
|
+
const customFieldNames = ctx.fields.map((f) => f.name);
|
|
50
|
+
const selectFields = ["id", "name", "external_id", ...customFieldNames];
|
|
51
|
+
|
|
52
|
+
let exampleItems: Record<string, any>[] = [];
|
|
53
|
+
try {
|
|
54
|
+
let query = db(tableName).select(selectFields).whereNull("archived").limit(2);
|
|
55
|
+
query = applyAccessControl(tableDefinition, query, user, tableName);
|
|
56
|
+
exampleItems = await query;
|
|
57
|
+
} catch {
|
|
58
|
+
// If table doesn't exist yet or column mismatch, return empty samples
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
const sample: ContextSample = {
|
|
62
|
+
contextId: ctx.id,
|
|
63
|
+
contextName: ctx.name,
|
|
64
|
+
fields: ["name", "external_id", ...customFieldNames],
|
|
65
|
+
exampleItems,
|
|
66
|
+
sampledAt: Date.now(),
|
|
67
|
+
};
|
|
68
|
+
|
|
69
|
+
this.cache.set(ctx.id, sample);
|
|
70
|
+
|
|
71
|
+
// Refresh in background after TTL without blocking the caller
|
|
72
|
+
return sample;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/** Evict a context from cache so it's re-sampled on next use */
|
|
76
|
+
invalidate(contextId: string): void {
|
|
77
|
+
this.cache.delete(contextId);
|
|
78
|
+
}
|
|
79
|
+
}
|