@exulu/backend 1.67.0 → 1.68.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-ZPZKOT6I.js → chunk-VPSLTGZF.js} +1428 -139
- package/dist/{convert-exulu-tools-to-ai-sdk-tools-4B7BQ5G2.js → convert-exulu-tools-to-ai-sdk-tools-CHQF36XW.js} +1 -1
- package/dist/index.cjs +24279 -22720
- package/dist/index.d.cts +256 -100
- package/dist/index.d.ts +256 -100
- package/dist/index.js +2837 -2645
- package/ee/agentic-retrieval/v3/agent-loop.ts +4 -4
- package/ee/agentic-retrieval/v3/index.ts +20 -6
- package/ee/python/documents/processing/doc_processor.ts +79 -34
- package/ee/workers.ts +3 -17
- package/package.json +1 -1
- package/ee/agentic-retrieval/v4/agent-loop.ts +0 -208
- package/ee/agentic-retrieval/v4/context-sampler.ts +0 -79
- package/ee/agentic-retrieval/v4/index.ts +0 -690
- package/ee/agentic-retrieval/v4/types.ts +0 -58
|
@@ -2,7 +2,7 @@ import { generateText, stepCountIs, tool } from "ai";
|
|
|
2
2
|
import type { LanguageModel, Tool as AITool, ModelMessage } from "ai";
|
|
3
3
|
import { z } from "zod";
|
|
4
4
|
import { withRetry } from "@SRC/utils/with-retry";
|
|
5
|
-
import type {
|
|
5
|
+
import type { ResolvedReranker } from "@SRC/exulu/resolve-reranker";
|
|
6
6
|
import type { AgenticRetrievalOutput, ChunkResult, ClassificationResult } from "./types";
|
|
7
7
|
import type { StrategyConfig } from "./strategies";
|
|
8
8
|
import { createDynamicTools } from "./dynamic-tools";
|
|
@@ -69,7 +69,7 @@ export async function* runAgentLoop(params: {
|
|
|
69
69
|
strategy: StrategyConfig;
|
|
70
70
|
tools: Record<string, AITool>;
|
|
71
71
|
model: LanguageModel;
|
|
72
|
-
reranker?:
|
|
72
|
+
reranker?: ResolvedReranker;
|
|
73
73
|
contextGuidance?: string;
|
|
74
74
|
customInstructions?: string;
|
|
75
75
|
classification: ClassificationResult;
|
|
@@ -171,8 +171,8 @@ export async function* runAgentLoop(params: {
|
|
|
171
171
|
|
|
172
172
|
// Rerank if reranker is available
|
|
173
173
|
if (reranker && stepChunks.length > 0) {
|
|
174
|
-
console.log(`[EXULU] v3 reranking ${stepChunks.length} chunks with ${reranker.
|
|
175
|
-
stepChunks = await reranker.
|
|
174
|
+
console.log(`[EXULU] v3 reranking ${stepChunks.length} chunks with ${reranker.model}`);
|
|
175
|
+
stepChunks = await reranker.rerank(query, stepChunks);
|
|
176
176
|
}
|
|
177
177
|
|
|
178
178
|
// Create dynamic tools (browse adjacent pages, load specific pages)
|
|
@@ -2,7 +2,8 @@ import { z } from "zod";
|
|
|
2
2
|
import { createBashTool } from "bash-tool";
|
|
3
3
|
import type { LanguageModel, Tool } from "ai";
|
|
4
4
|
import type { ExuluContext } from "@SRC/exulu/context";
|
|
5
|
-
import
|
|
5
|
+
import { resolveReranker } from "@SRC/exulu/resolve-reranker";
|
|
6
|
+
import type { ResolvedReranker } from "@SRC/exulu/resolve-reranker";
|
|
6
7
|
import { ExuluTool } from "@SRC/exulu/tool";
|
|
7
8
|
import type { User } from "@EXULU_TYPES/models/user";
|
|
8
9
|
import { checkLicense } from "@EE/entitlements";
|
|
@@ -34,7 +35,7 @@ async function* executeV3({
|
|
|
34
35
|
}: {
|
|
35
36
|
query: string;
|
|
36
37
|
contexts: ExuluContext[];
|
|
37
|
-
reranker?:
|
|
38
|
+
reranker?: ResolvedReranker;
|
|
38
39
|
toolVariablesConfig?: Record<string, any>;
|
|
39
40
|
model: LanguageModel;
|
|
40
41
|
user?: User;
|
|
@@ -189,7 +190,6 @@ async function* executeV3({
|
|
|
189
190
|
export function createAgenticRetrievalToolV3({
|
|
190
191
|
contexts,
|
|
191
192
|
instructions: adminInstructions,
|
|
192
|
-
rerankers,
|
|
193
193
|
user,
|
|
194
194
|
role,
|
|
195
195
|
model,
|
|
@@ -197,7 +197,6 @@ export function createAgenticRetrievalToolV3({
|
|
|
197
197
|
memoryItems
|
|
198
198
|
}: {
|
|
199
199
|
contexts: ExuluContext[];
|
|
200
|
-
rerankers: ExuluReranker[];
|
|
201
200
|
user?: User;
|
|
202
201
|
role?: string;
|
|
203
202
|
model?: LanguageModel;
|
|
@@ -355,7 +354,7 @@ export function createAgenticRetrievalToolV3({
|
|
|
355
354
|
}
|
|
356
355
|
|
|
357
356
|
let activeContexts = contexts;
|
|
358
|
-
let configuredReranker:
|
|
357
|
+
let configuredReranker: ResolvedReranker | undefined;
|
|
359
358
|
let configInstructions = "";
|
|
360
359
|
let logTrajectory = false;
|
|
361
360
|
let requiresPreselectedContexts = false;
|
|
@@ -382,7 +381,22 @@ export function createAgenticRetrievalToolV3({
|
|
|
382
381
|
const rerankerId = toolVariablesConfig["reranker"];
|
|
383
382
|
|
|
384
383
|
if (rerankerId && rerankerId !== "none") {
|
|
385
|
-
|
|
384
|
+
// rerankerId is a LiteLLM model_name from config.litellm.yaml
|
|
385
|
+
// (model_info.type: reranker). Resolution is best-effort: a
|
|
386
|
+
// misconfigured model or an unready proxy must not break retrieval —
|
|
387
|
+
// it just runs unreranked, matching the old find()→undefined path.
|
|
388
|
+
try {
|
|
389
|
+
configuredReranker = await resolveReranker({
|
|
390
|
+
model: rerankerId,
|
|
391
|
+
user,
|
|
392
|
+
roleId: role,
|
|
393
|
+
});
|
|
394
|
+
} catch (err) {
|
|
395
|
+
console.warn(
|
|
396
|
+
`[EXULU] v3 — could not resolve reranker "${rerankerId}", continuing without reranking:`,
|
|
397
|
+
err,
|
|
398
|
+
);
|
|
399
|
+
}
|
|
386
400
|
}
|
|
387
401
|
}
|
|
388
402
|
|
|
@@ -14,17 +14,41 @@ import { checkLicense } from '@EE/entitlements';
|
|
|
14
14
|
import { executePythonScript } from '@SRC/utils/python-executor';
|
|
15
15
|
import { setupPythonEnvironment, validatePythonEnvironment } from '@SRC/utils/python-setup';
|
|
16
16
|
import { LiteParse } from '@llamaindex/liteparse';
|
|
17
|
-
import {
|
|
18
|
-
import {
|
|
17
|
+
import { resolveOcr } from '@SRC/exulu/resolve-ocr';
|
|
18
|
+
import type { ResolveOcrInput } from '@SRC/exulu/resolve-ocr';
|
|
19
|
+
import { resolveModel } from '@SRC/exulu/resolve-model';
|
|
19
20
|
|
|
20
21
|
type DocumentProcessorConfig = {
|
|
21
22
|
vlm?: {
|
|
22
|
-
|
|
23
|
+
/**
|
|
24
|
+
* LiteLLM model_name for the VLM page-validation pass (declared in
|
|
25
|
+
* config.litellm.yaml, e.g. "vertex-gemini-2.5-flash"). Resolved via
|
|
26
|
+
* resolveModel() so the VLM pass shares the same tag-based cost controls
|
|
27
|
+
* and provider-switching as chat / embeddings / OCR, and the underlying
|
|
28
|
+
* provider can be swapped without code changes.
|
|
29
|
+
*/
|
|
30
|
+
model: string;
|
|
23
31
|
concurrency: number;
|
|
24
32
|
},
|
|
25
33
|
processor: {
|
|
26
34
|
name: "docling" | "liteparse" | "mistral" | "officeparser"
|
|
35
|
+
/**
|
|
36
|
+
* LiteLLM model_name for the "mistral" OCR processor (declared in
|
|
37
|
+
* config.litellm.yaml). Defaults to "mistral-ocr". OCR is routed through
|
|
38
|
+
* the LiteLLM proxy so it shares the same tag-based cost controls as chat
|
|
39
|
+
* and embeddings, and the underlying provider (mistral / azure_ai /
|
|
40
|
+
* vertex_ai) can be switched without code changes.
|
|
41
|
+
*/
|
|
42
|
+
model?: string
|
|
27
43
|
}
|
|
44
|
+
/**
|
|
45
|
+
* Optional cost-attribution context, forwarded to LiteLLM as spend tags
|
|
46
|
+
* (user / role / project / context) for both the OCR pass (resolveOcr) and
|
|
47
|
+
* the VLM page-validation pass (resolveModel). Not yet populated by callers;
|
|
48
|
+
* the wiring is in place so per-user/per-context budgets work the moment
|
|
49
|
+
* attribution is threaded through.
|
|
50
|
+
*/
|
|
51
|
+
attribution?: Omit<ResolveOcrInput, "model">
|
|
28
52
|
debugging?: {
|
|
29
53
|
deleteTempFiles?: boolean;
|
|
30
54
|
}
|
|
@@ -94,6 +118,38 @@ async function processWord(file: Buffer): Promise<ProcessorOutput> {
|
|
|
94
118
|
}
|
|
95
119
|
}
|
|
96
120
|
|
|
121
|
+
/**
|
|
122
|
+
* Resolve the dev-supplied VLM `model` string (a LiteLLM model_name from
|
|
123
|
+
* config.litellm.yaml, e.g. "vertex-gemini-2.5-flash") into an `ai` SDK
|
|
124
|
+
* LanguageModel via resolveModel. This routes the VLM page-validation pass
|
|
125
|
+
* through the LiteLLM proxy — same tag-based cost controls and provider
|
|
126
|
+
* switching as chat / embeddings / OCR — and keeps the internal VLM helpers
|
|
127
|
+
* (validateWithVLM / validatePageWithVLM) working with a LanguageModel.
|
|
128
|
+
*
|
|
129
|
+
* Returns undefined when no VLM model is configured. Attribution (user /
|
|
130
|
+
* project / agent / routine) is forwarded for spend tagging when callers
|
|
131
|
+
* populate config.attribution; rbacBypass is set because this is a background
|
|
132
|
+
* package call where model-level access control is delegated to LiteLLM.
|
|
133
|
+
*/
|
|
134
|
+
async function resolveVlmModel(
|
|
135
|
+
config?: DocumentProcessorConfig,
|
|
136
|
+
): Promise<LanguageModel | undefined> {
|
|
137
|
+
const modelId = config?.vlm?.model;
|
|
138
|
+
if (!modelId) return undefined;
|
|
139
|
+
|
|
140
|
+
const { languageModel } = await resolveModel({
|
|
141
|
+
modelId,
|
|
142
|
+
providers: [], // unused in LiteLLM mode; resolveModel ignores it there
|
|
143
|
+
user: config?.attribution?.user,
|
|
144
|
+
project: config?.attribution?.project,
|
|
145
|
+
agent: config?.attribution?.agent,
|
|
146
|
+
routine: config?.attribution?.routine,
|
|
147
|
+
rbacBypass: true,
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
return languageModel;
|
|
151
|
+
}
|
|
152
|
+
|
|
97
153
|
/**
|
|
98
154
|
* Processes a standalone image file by optionally extracting content using VLM
|
|
99
155
|
*/
|
|
@@ -122,14 +178,15 @@ async function processImage(
|
|
|
122
178
|
}];
|
|
123
179
|
|
|
124
180
|
// If VLM is enabled, use it to extract content from the image
|
|
125
|
-
|
|
181
|
+
const vlmModel = await resolveVlmModel(config);
|
|
182
|
+
if (vlmModel) {
|
|
126
183
|
console.log('[EXULU] Extracting content from image using VLM...');
|
|
127
184
|
|
|
128
185
|
json = await validateWithVLM(
|
|
129
186
|
json,
|
|
130
|
-
|
|
187
|
+
vlmModel,
|
|
131
188
|
verbose,
|
|
132
|
-
config
|
|
189
|
+
config!.vlm!.concurrency
|
|
133
190
|
);
|
|
134
191
|
|
|
135
192
|
// Save the processed result
|
|
@@ -679,15 +736,6 @@ async function processDocument(
|
|
|
679
736
|
};
|
|
680
737
|
}
|
|
681
738
|
|
|
682
|
-
const getMistralApiKey = async () => {
|
|
683
|
-
if (process.env.MISTRAL_API_KEY) {
|
|
684
|
-
return process.env.MISTRAL_API_KEY;
|
|
685
|
-
} else {
|
|
686
|
-
const variable = await ExuluVariables.get("MISTRAL_API_KEY");
|
|
687
|
-
return variable;
|
|
688
|
-
}
|
|
689
|
-
}
|
|
690
|
-
|
|
691
739
|
async function processPdf(
|
|
692
740
|
buffer: Buffer,
|
|
693
741
|
paths: ProcessingPaths,
|
|
@@ -759,28 +807,25 @@ async function processPdf(
|
|
|
759
807
|
|
|
760
808
|
} else if (config?.processor.name === "mistral") {
|
|
761
809
|
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
810
|
+
// OCR is routed through the LiteLLM proxy's Mistral-compatible /v1/ocr
|
|
811
|
+
// endpoint (see resolveOcr) rather than the Mistral SDK directly. This
|
|
812
|
+
// gives us tag-based cost control and lets us switch the OCR provider
|
|
813
|
+
// (mistral / azure_ai / vertex_ai) from config.litellm.yaml.
|
|
814
|
+
const resolved = await resolveOcr({
|
|
815
|
+
model: config.processor.model ?? "mistral-ocr",
|
|
816
|
+
...config.attribution,
|
|
817
|
+
});
|
|
766
818
|
|
|
767
819
|
// Wait a randomn time between 1 and 5 seconds to prevent rate limiting
|
|
768
820
|
await new Promise(resolve => setTimeout(resolve, Math.floor(Math.random() * 4000) + 1000));
|
|
769
821
|
|
|
770
822
|
const base64Pdf = buffer.toString('base64');
|
|
771
|
-
const client = new Mistral({ apiKey: MISTRAL_API_KEY });
|
|
772
823
|
|
|
773
824
|
const ocrResponse = await withRetry(async () => {
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
documentUrl: "data:application/pdf;base64," + base64Pdf
|
|
779
|
-
},
|
|
780
|
-
model: "mistral-ocr-latest",
|
|
781
|
-
includeImageBase64: false
|
|
782
|
-
});
|
|
783
|
-
return ocrResponse;
|
|
825
|
+
return await resolved.ocr({
|
|
826
|
+
type: "document_url",
|
|
827
|
+
document_url: "data:application/pdf;base64," + base64Pdf,
|
|
828
|
+
}, { includeImageBase64: false });
|
|
784
829
|
}, 10);
|
|
785
830
|
|
|
786
831
|
const parser = new LiteParse();
|
|
@@ -838,13 +883,14 @@ async function processPdf(
|
|
|
838
883
|
}
|
|
839
884
|
|
|
840
885
|
// Apply VLM validation if enabled
|
|
841
|
-
|
|
886
|
+
const vlmModel = config?.vlm?.model ? await resolveVlmModel(config) : undefined;
|
|
887
|
+
if (vlmModel && json.length > 0) {
|
|
842
888
|
|
|
843
889
|
json = await validateWithVLM(
|
|
844
890
|
json,
|
|
845
|
-
|
|
891
|
+
vlmModel,
|
|
846
892
|
verbose,
|
|
847
|
-
config
|
|
893
|
+
config!.vlm!.concurrency
|
|
848
894
|
);
|
|
849
895
|
|
|
850
896
|
console.log('[EXULU] \n📊 Processing Summary:');
|
|
@@ -1046,7 +1092,6 @@ export async function documentProcessor({
|
|
|
1046
1092
|
} catch (error) {
|
|
1047
1093
|
console.error('Error during chunking:', error);
|
|
1048
1094
|
throw error;
|
|
1049
|
-
|
|
1050
1095
|
} finally {
|
|
1051
1096
|
if (config?.debugging?.deleteTempFiles !== false) {
|
|
1052
1097
|
// Delete the temp directory using the local array to avoid race conditions
|
package/ee/workers.ts
CHANGED
|
@@ -7,7 +7,6 @@ import { ExuluStorage } from "@SRC/exulu/storage.ts";
|
|
|
7
7
|
import type { ExuluAgent } from "@EXULU_TYPES/models/agent.ts";
|
|
8
8
|
import type { ExuluQueueConfig } from "@EXULU_TYPES/queue-config.ts";
|
|
9
9
|
import { getTableName, type ExuluContext } from "@SRC/exulu/context.ts";
|
|
10
|
-
import type { ExuluReranker } from "@SRC/exulu/reranker.ts";
|
|
11
10
|
import type { ExuluEval } from "@SRC/exulu/evals.ts";
|
|
12
11
|
import type { ExuluTool } from "@SRC/exulu/tool.ts";
|
|
13
12
|
import { resolveModel } from "@SRC/exulu/resolve-model.ts";
|
|
@@ -115,7 +114,6 @@ export const createWorkers = async (
|
|
|
115
114
|
queues: ExuluQueueConfig[],
|
|
116
115
|
config: ExuluConfig,
|
|
117
116
|
contexts: ExuluContext[],
|
|
118
|
-
rerankers: ExuluReranker[],
|
|
119
117
|
evals: ExuluEval[],
|
|
120
118
|
tools: ExuluTool[],
|
|
121
119
|
tracer?: Tracer,
|
|
@@ -284,14 +282,8 @@ export const createWorkers = async (
|
|
|
284
282
|
throw new Error(`Context ${data.context} not found in the registry.`);
|
|
285
283
|
}
|
|
286
284
|
|
|
287
|
-
if (!
|
|
288
|
-
throw new Error(`No embedder
|
|
289
|
-
}
|
|
290
|
-
|
|
291
|
-
const embedder = contexts.find((context) => context.embedder?.id === data.embedder);
|
|
292
|
-
|
|
293
|
-
if (!embedder) {
|
|
294
|
-
throw new Error(`Embedder ${data.embedder} not found in the registry.`);
|
|
285
|
+
if (!context.embedder) {
|
|
286
|
+
throw new Error(`No embedder configured for context ${data.context}.`);
|
|
295
287
|
}
|
|
296
288
|
|
|
297
289
|
const result = await context.createAndUpsertEmbeddings(
|
|
@@ -299,7 +291,7 @@ export const createWorkers = async (
|
|
|
299
291
|
config,
|
|
300
292
|
data.user,
|
|
301
293
|
{
|
|
302
|
-
label: embedder.
|
|
294
|
+
label: context.embedder.model,
|
|
303
295
|
trigger: data.trigger,
|
|
304
296
|
},
|
|
305
297
|
data.role,
|
|
@@ -520,7 +512,6 @@ export const createWorkers = async (
|
|
|
520
512
|
provider,
|
|
521
513
|
inputMessages,
|
|
522
514
|
contexts,
|
|
523
|
-
rerankers,
|
|
524
515
|
user,
|
|
525
516
|
tools,
|
|
526
517
|
config,
|
|
@@ -623,7 +614,6 @@ export const createWorkers = async (
|
|
|
623
614
|
provider,
|
|
624
615
|
inputMessages,
|
|
625
616
|
contexts,
|
|
626
|
-
rerankers,
|
|
627
617
|
user,
|
|
628
618
|
tools,
|
|
629
619
|
config,
|
|
@@ -1324,7 +1314,6 @@ export const processUiMessagesFlow = async ({
|
|
|
1324
1314
|
provider,
|
|
1325
1315
|
inputMessages,
|
|
1326
1316
|
contexts,
|
|
1327
|
-
rerankers,
|
|
1328
1317
|
user,
|
|
1329
1318
|
tools,
|
|
1330
1319
|
config,
|
|
@@ -1336,7 +1325,6 @@ export const processUiMessagesFlow = async ({
|
|
|
1336
1325
|
provider: ExuluProvider;
|
|
1337
1326
|
inputMessages: UIMessage[];
|
|
1338
1327
|
contexts: ExuluContext[];
|
|
1339
|
-
rerankers: ExuluReranker[];
|
|
1340
1328
|
user: User;
|
|
1341
1329
|
tools: ExuluTool[];
|
|
1342
1330
|
config: ExuluConfig;
|
|
@@ -1376,7 +1364,6 @@ export const processUiMessagesFlow = async ({
|
|
|
1376
1364
|
agent,
|
|
1377
1365
|
tools,
|
|
1378
1366
|
contexts,
|
|
1379
|
-
rerankers,
|
|
1380
1367
|
disabledTools,
|
|
1381
1368
|
providers,
|
|
1382
1369
|
user,
|
|
@@ -1495,7 +1482,6 @@ export const processUiMessagesFlow = async ({
|
|
|
1495
1482
|
try {
|
|
1496
1483
|
const result = await provider.generateStream({
|
|
1497
1484
|
contexts,
|
|
1498
|
-
rerankers,
|
|
1499
1485
|
agent: agent,
|
|
1500
1486
|
user,
|
|
1501
1487
|
approvedTools: tools.map((tool) => "tool-" + sanitizeToolName(tool.name)),
|
package/package.json
CHANGED
|
@@ -1,208 +0,0 @@
|
|
|
1
|
-
import { generateText, stepCountIs, tool } from "ai";
|
|
2
|
-
import type { LanguageModel, Tool as AITool, ModelMessage } from "ai";
|
|
3
|
-
import { z } from "zod";
|
|
4
|
-
import { withRetry } from "@SRC/utils/with-retry";
|
|
5
|
-
import type { ExuluReranker } from "@SRC/exulu/reranker";
|
|
6
|
-
import type { AgenticRetrievalOutput, ChunkResult } from "./types";
|
|
7
|
-
import { DEFAULT_MAX_STEPS, type AgenticRetrievalLog, type ContextRetrievalConfig } from ".";
|
|
8
|
-
|
|
9
|
-
const FINISH_TOOL_NAME = "finish_retrieval";
|
|
10
|
-
|
|
11
|
-
const finishRetrievalTool = tool({
|
|
12
|
-
description:
|
|
13
|
-
"Call this tool when you have retrieved sufficient information and no further searches are needed. " +
|
|
14
|
-
"You MUST call this tool to signal that retrieval is complete — do not write a text conclusion.",
|
|
15
|
-
inputSchema: z.object({
|
|
16
|
-
reasoning: z.string().describe("One sentence explaining why retrieval is complete"),
|
|
17
|
-
}),
|
|
18
|
-
execute: async ({ reasoning }) => JSON.stringify({ finished: true, reasoning }),
|
|
19
|
-
});
|
|
20
|
-
|
|
21
|
-
function extractChunksFromToolResults(toolResults: any[]): ChunkResult[] {
|
|
22
|
-
const chunks: ChunkResult[] = [];
|
|
23
|
-
for (const result of toolResults ?? []) {
|
|
24
|
-
// AI SDK v6 uses `output` (not `result`) for tool result values
|
|
25
|
-
const rawOutput = result.output ?? result.result;
|
|
26
|
-
let parsed: any;
|
|
27
|
-
try {
|
|
28
|
-
parsed = typeof rawOutput === "string" ? JSON.parse(rawOutput) : rawOutput;
|
|
29
|
-
} catch {
|
|
30
|
-
continue;
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
if (Array.isArray(parsed)) {
|
|
34
|
-
for (const item of parsed) {
|
|
35
|
-
if (item?.item_id && item?.context) {
|
|
36
|
-
chunks.push({
|
|
37
|
-
item_name: item.item_name,
|
|
38
|
-
item_id: item.item_id,
|
|
39
|
-
context: item.context?.id ?? item.context,
|
|
40
|
-
chunk_id: item.chunk_id,
|
|
41
|
-
chunk_index: item.chunk_index,
|
|
42
|
-
chunk_content: item.chunk_content,
|
|
43
|
-
metadata: item.metadata,
|
|
44
|
-
});
|
|
45
|
-
}
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
|
-
return chunks;
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
/**
|
|
53
|
-
* Core agent loop: one generateText call per step.
|
|
54
|
-
*
|
|
55
|
-
* Unlike v2 (which split each step into a reasoning call + a separate tool
|
|
56
|
-
* execution call), here a single call with toolChoice: "auto" lets the model
|
|
57
|
-
* reason and call tools in one pass. The model sees tool results from the
|
|
58
|
-
* previous step via the conversation history (messages array).
|
|
59
|
-
*
|
|
60
|
-
* The loop stops when:
|
|
61
|
-
* - The model makes no tool calls (it's satisfied), OR
|
|
62
|
-
* - The strategy's stepBudget is exhausted
|
|
63
|
-
*/
|
|
64
|
-
export async function* runAgentLoop(params: {
|
|
65
|
-
config: ContextRetrievalConfig;
|
|
66
|
-
userQuery: string;
|
|
67
|
-
log: AgenticRetrievalLog;
|
|
68
|
-
todos: {
|
|
69
|
-
status: "planned" | "completed";
|
|
70
|
-
description: string;
|
|
71
|
-
current: boolean;
|
|
72
|
-
}[];
|
|
73
|
-
tools: Record<string, AITool>;
|
|
74
|
-
model: LanguageModel;
|
|
75
|
-
reranker?: ExuluReranker;
|
|
76
|
-
sessionID?: string;
|
|
77
|
-
onStepComplete?: (step: AgenticRetrievalOutput["steps"][0]) => void;
|
|
78
|
-
}): AsyncGenerator<AgenticRetrievalOutput> {
|
|
79
|
-
const { userQuery, tools, model, reranker, sessionID, onStepComplete, config, log, todos } = params;
|
|
80
|
-
|
|
81
|
-
const output: AgenticRetrievalOutput = {
|
|
82
|
-
steps: [],
|
|
83
|
-
reasoning: [],
|
|
84
|
-
chunks: [],
|
|
85
|
-
usage: [],
|
|
86
|
-
totalTokens: 0,
|
|
87
|
-
};
|
|
88
|
-
|
|
89
|
-
const messages: ModelMessage[] = [{ role: "user", content: userQuery }];
|
|
90
|
-
|
|
91
|
-
const stepBudget = config.maxSteps || DEFAULT_MAX_STEPS
|
|
92
|
-
|
|
93
|
-
const SYSTEM_PROMPT = `
|
|
94
|
-
You are a helpful assistant that can search the knowledge base and retrieve information.
|
|
95
|
-
|
|
96
|
-
You are searching for information that is relevant to the following question:
|
|
97
|
-
<user_query>
|
|
98
|
-
${userQuery}
|
|
99
|
-
</user_query>
|
|
100
|
-
|
|
101
|
-
You have the following instructions for this knowledge base:
|
|
102
|
-
<instructions>
|
|
103
|
-
${config.instructions}
|
|
104
|
-
</instructions>
|
|
105
|
-
|
|
106
|
-
A first search strategy was drafted as a todo list:
|
|
107
|
-
<todo_list>
|
|
108
|
-
${todos.map((todo, index) => `${index + 1}. ${todo.status} - ${todo.description}`).join("\n")}
|
|
109
|
-
</todo_list>
|
|
110
|
-
|
|
111
|
-
`;
|
|
112
|
-
|
|
113
|
-
for (let step = 0; step < stepBudget; step++) {
|
|
114
|
-
|
|
115
|
-
log.entries.push({
|
|
116
|
-
label: "Agent loop step",
|
|
117
|
-
timestamp: new Date().toISOString(),
|
|
118
|
-
message: `[EXULU] v3 agent loop — step ${step + 1}/${stepBudget}`,
|
|
119
|
-
});
|
|
120
|
-
|
|
121
|
-
let result: Awaited<ReturnType<typeof generateText>>;
|
|
122
|
-
|
|
123
|
-
const stepTools = { ...tools, [FINISH_TOOL_NAME]: finishRetrievalTool };
|
|
124
|
-
|
|
125
|
-
try {
|
|
126
|
-
result = await withRetry(() =>
|
|
127
|
-
generateText({
|
|
128
|
-
model,
|
|
129
|
-
temperature: 0,
|
|
130
|
-
system: SYSTEM_PROMPT,
|
|
131
|
-
messages,
|
|
132
|
-
tools: stepTools,
|
|
133
|
-
toolChoice: "required",
|
|
134
|
-
stopWhen: stepCountIs(1),
|
|
135
|
-
}),
|
|
136
|
-
);
|
|
137
|
-
} catch (err) {
|
|
138
|
-
console.error("[EXULU] v3 generateText failed:", err);
|
|
139
|
-
throw err;
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
// Carry conversation forward: assistant message + tool results go into history
|
|
143
|
-
// so the model sees them on the next iteration.
|
|
144
|
-
messages.push(...(result.response.messages as ModelMessage[]));
|
|
145
|
-
|
|
146
|
-
// Extract chunks from tool results
|
|
147
|
-
let stepChunks: any[] = extractChunksFromToolResults(result.toolResults as any[]);
|
|
148
|
-
|
|
149
|
-
// Deduplicate by chunk_id within this step (parallel tool calls can return the same chunk
|
|
150
|
-
// if the agent searches the same context twice, or the same chunk is indexed in two contexts).
|
|
151
|
-
const seenChunkIds = new Set<string>();
|
|
152
|
-
stepChunks = stepChunks.filter((c) => {
|
|
153
|
-
if (!c.chunk_id) return true;
|
|
154
|
-
if (seenChunkIds.has(c.chunk_id)) return false;
|
|
155
|
-
seenChunkIds.add(c.chunk_id);
|
|
156
|
-
return true;
|
|
157
|
-
});
|
|
158
|
-
|
|
159
|
-
// Record step
|
|
160
|
-
const stepRecord = {
|
|
161
|
-
stepNumber: step + 1,
|
|
162
|
-
text: result.text ?? "",
|
|
163
|
-
toolCalls: (result.toolCalls as any[])?.map((tc) => ({
|
|
164
|
-
name: tc.toolName,
|
|
165
|
-
id: tc.toolCallId,
|
|
166
|
-
input: tc.input,
|
|
167
|
-
})) ?? [],
|
|
168
|
-
chunks: stepChunks,
|
|
169
|
-
tokens: result.usage?.totalTokens ?? 0,
|
|
170
|
-
};
|
|
171
|
-
|
|
172
|
-
log.entries.push({
|
|
173
|
-
label: "Step completed",
|
|
174
|
-
timestamp: new Date().toISOString(),
|
|
175
|
-
message: JSON.stringify(stepRecord),
|
|
176
|
-
});
|
|
177
|
-
|
|
178
|
-
output.steps.push(stepRecord);
|
|
179
|
-
output.reasoning.push({
|
|
180
|
-
text: result.text ?? "",
|
|
181
|
-
tools: (result.toolCalls as any[])?.map((tc) => ({
|
|
182
|
-
name: tc.toolName,
|
|
183
|
-
id: tc.toolCallId,
|
|
184
|
-
input: tc.input,
|
|
185
|
-
output: stepChunks,
|
|
186
|
-
})) ?? [],
|
|
187
|
-
});
|
|
188
|
-
// Deduplicate against chunks already accumulated from prior steps
|
|
189
|
-
const existingChunkIds = new Set(output.chunks.map((c) => c.chunk_id).filter(Boolean));
|
|
190
|
-
output.chunks.push(...stepChunks.filter((c) => !c.chunk_id || !existingChunkIds.has(c.chunk_id)));
|
|
191
|
-
output.usage.push(result.usage);
|
|
192
|
-
|
|
193
|
-
onStepComplete?.(stepRecord);
|
|
194
|
-
|
|
195
|
-
yield { ...output };
|
|
196
|
-
|
|
197
|
-
// Stop if the model called finish_retrieval AND no forced continuation is needed
|
|
198
|
-
const calledFinish = (result.toolCalls as any[])?.some(
|
|
199
|
-
(tc) => tc.toolName === FINISH_TOOL_NAME,
|
|
200
|
-
);
|
|
201
|
-
if (calledFinish) {
|
|
202
|
-
console.log(`[EXULU] v3 model called finish_retrieval after step ${step + 1}`);
|
|
203
|
-
break;
|
|
204
|
-
}
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
output.totalTokens = output.usage.reduce((sum, u) => sum + (u?.totalTokens ?? 0), 0);
|
|
208
|
-
}
|
|
@@ -1,79 +0,0 @@
|
|
|
1
|
-
import { ExuluContext, getTableName } from "@SRC/exulu/context";
|
|
2
|
-
import { postgresClient } from "@SRC/postgres/client";
|
|
3
|
-
import { applyAccessControl } from "@SRC/graphql/utilities/access-control";
|
|
4
|
-
import { convertContextToTableDefinition } from "@SRC/graphql/utilities/convert-context-to-table-definition";
|
|
5
|
-
import type { User } from "@EXULU_TYPES/models/user";
|
|
6
|
-
|
|
7
|
-
const CACHE_TTL_MS = 60 * 60 * 1000; // 1 hour
|
|
8
|
-
|
|
9
|
-
export interface ContextSample {
|
|
10
|
-
contextId: string;
|
|
11
|
-
contextName: string;
|
|
12
|
-
/** All field names available on items (standard + custom) */
|
|
13
|
-
fields: string[];
|
|
14
|
-
/** Up to 2 example item records */
|
|
15
|
-
exampleItems: Array<Record<string, any>>;
|
|
16
|
-
sampledAt: number;
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
/**
|
|
20
|
-
* Pulls 1–2 example item records per context at agent initialization and caches
|
|
21
|
-
* them in memory. These samples are injected into the classifier prompt so the
|
|
22
|
-
* model understands what data is actually stored (not just field names).
|
|
23
|
-
*/
|
|
24
|
-
export class ContextSampler {
|
|
25
|
-
private cache = new Map<string, ContextSample>();
|
|
26
|
-
|
|
27
|
-
async getSamples(
|
|
28
|
-
contexts: ExuluContext[],
|
|
29
|
-
user?: User,
|
|
30
|
-
role?: string,
|
|
31
|
-
): Promise<ContextSample[]> {
|
|
32
|
-
return Promise.all(contexts.map((ctx) => this.getSample(ctx, user, role)));
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
private async getSample(
|
|
36
|
-
ctx: ExuluContext,
|
|
37
|
-
user?: User,
|
|
38
|
-
role?: string,
|
|
39
|
-
): Promise<ContextSample> {
|
|
40
|
-
const cached = this.cache.get(ctx.id);
|
|
41
|
-
if (cached && Date.now() - cached.sampledAt < CACHE_TTL_MS) {
|
|
42
|
-
return cached;
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
const { db } = await postgresClient();
|
|
46
|
-
const tableName = getTableName(ctx.id);
|
|
47
|
-
const tableDefinition = convertContextToTableDefinition(ctx);
|
|
48
|
-
|
|
49
|
-
const customFieldNames = ctx.fields.map((f) => f.name);
|
|
50
|
-
const selectFields = ["id", "name", "external_id", ...customFieldNames];
|
|
51
|
-
|
|
52
|
-
let exampleItems: Record<string, any>[] = [];
|
|
53
|
-
try {
|
|
54
|
-
let query = db(tableName).select(selectFields).whereNull("archived").limit(2);
|
|
55
|
-
query = applyAccessControl(tableDefinition, query, user, tableName);
|
|
56
|
-
exampleItems = await query;
|
|
57
|
-
} catch {
|
|
58
|
-
// If table doesn't exist yet or column mismatch, return empty samples
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
const sample: ContextSample = {
|
|
62
|
-
contextId: ctx.id,
|
|
63
|
-
contextName: ctx.name,
|
|
64
|
-
fields: ["name", "external_id", ...customFieldNames],
|
|
65
|
-
exampleItems,
|
|
66
|
-
sampledAt: Date.now(),
|
|
67
|
-
};
|
|
68
|
-
|
|
69
|
-
this.cache.set(ctx.id, sample);
|
|
70
|
-
|
|
71
|
-
// Refresh in background after TTL without blocking the caller
|
|
72
|
-
return sample;
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
/** Evict a context from cache so it's re-sampled on next use */
|
|
76
|
-
invalidate(contextId: string): void {
|
|
77
|
-
this.cache.delete(contextId);
|
|
78
|
-
}
|
|
79
|
-
}
|