@exulu/backend 1.67.0 → 1.68.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@ import { generateText, stepCountIs, tool } from "ai";
2
2
  import type { LanguageModel, Tool as AITool, ModelMessage } from "ai";
3
3
  import { z } from "zod";
4
4
  import { withRetry } from "@SRC/utils/with-retry";
5
- import type { ExuluReranker } from "@SRC/exulu/reranker";
5
+ import type { ResolvedReranker } from "@SRC/exulu/resolve-reranker";
6
6
  import type { AgenticRetrievalOutput, ChunkResult, ClassificationResult } from "./types";
7
7
  import type { StrategyConfig } from "./strategies";
8
8
  import { createDynamicTools } from "./dynamic-tools";
@@ -69,7 +69,7 @@ export async function* runAgentLoop(params: {
69
69
  strategy: StrategyConfig;
70
70
  tools: Record<string, AITool>;
71
71
  model: LanguageModel;
72
- reranker?: ExuluReranker;
72
+ reranker?: ResolvedReranker;
73
73
  contextGuidance?: string;
74
74
  customInstructions?: string;
75
75
  classification: ClassificationResult;
@@ -171,8 +171,8 @@ export async function* runAgentLoop(params: {
171
171
 
172
172
  // Rerank if reranker is available
173
173
  if (reranker && stepChunks.length > 0) {
174
- console.log(`[EXULU] v3 reranking ${stepChunks.length} chunks with ${reranker.name}`);
175
- stepChunks = await reranker.run(query, stepChunks as any);
174
+ console.log(`[EXULU] v3 reranking ${stepChunks.length} chunks with ${reranker.model}`);
175
+ stepChunks = await reranker.rerank(query, stepChunks);
176
176
  }
177
177
 
178
178
  // Create dynamic tools (browse adjacent pages, load specific pages)
@@ -2,7 +2,8 @@ import { z } from "zod";
2
2
  import { createBashTool } from "bash-tool";
3
3
  import type { LanguageModel, Tool } from "ai";
4
4
  import type { ExuluContext } from "@SRC/exulu/context";
5
- import type { ExuluReranker } from "@SRC/exulu/reranker";
5
+ import { resolveReranker } from "@SRC/exulu/resolve-reranker";
6
+ import type { ResolvedReranker } from "@SRC/exulu/resolve-reranker";
6
7
  import { ExuluTool } from "@SRC/exulu/tool";
7
8
  import type { User } from "@EXULU_TYPES/models/user";
8
9
  import { checkLicense } from "@EE/entitlements";
@@ -34,7 +35,7 @@ async function* executeV3({
34
35
  }: {
35
36
  query: string;
36
37
  contexts: ExuluContext[];
37
- reranker?: ExuluReranker;
38
+ reranker?: ResolvedReranker;
38
39
  toolVariablesConfig?: Record<string, any>;
39
40
  model: LanguageModel;
40
41
  user?: User;
@@ -189,7 +190,6 @@ async function* executeV3({
189
190
  export function createAgenticRetrievalToolV3({
190
191
  contexts,
191
192
  instructions: adminInstructions,
192
- rerankers,
193
193
  user,
194
194
  role,
195
195
  model,
@@ -197,7 +197,6 @@ export function createAgenticRetrievalToolV3({
197
197
  memoryItems
198
198
  }: {
199
199
  contexts: ExuluContext[];
200
- rerankers: ExuluReranker[];
201
200
  user?: User;
202
201
  role?: string;
203
202
  model?: LanguageModel;
@@ -355,7 +354,7 @@ export function createAgenticRetrievalToolV3({
355
354
  }
356
355
 
357
356
  let activeContexts = contexts;
358
- let configuredReranker: ExuluReranker | undefined;
357
+ let configuredReranker: ResolvedReranker | undefined;
359
358
  let configInstructions = "";
360
359
  let logTrajectory = false;
361
360
  let requiresPreselectedContexts = false;
@@ -382,7 +381,22 @@ export function createAgenticRetrievalToolV3({
382
381
  const rerankerId = toolVariablesConfig["reranker"];
383
382
 
384
383
  if (rerankerId && rerankerId !== "none") {
385
- configuredReranker = rerankers.find((r) => r.id === rerankerId);
384
+ // rerankerId is a LiteLLM model_name from config.litellm.yaml
385
+ // (model_info.type: reranker). Resolution is best-effort: a
386
+ // misconfigured model or an unready proxy must not break retrieval —
387
+ // it just runs unreranked, matching the old find()→undefined path.
388
+ try {
389
+ configuredReranker = await resolveReranker({
390
+ model: rerankerId,
391
+ user,
392
+ roleId: role,
393
+ });
394
+ } catch (err) {
395
+ console.warn(
396
+ `[EXULU] v3 — could not resolve reranker "${rerankerId}", continuing without reranking:`,
397
+ err,
398
+ );
399
+ }
386
400
  }
387
401
  }
388
402
 
@@ -14,17 +14,41 @@ import { checkLicense } from '@EE/entitlements';
14
14
  import { executePythonScript } from '@SRC/utils/python-executor';
15
15
  import { setupPythonEnvironment, validatePythonEnvironment } from '@SRC/utils/python-setup';
16
16
  import { LiteParse } from '@llamaindex/liteparse';
17
- import { Mistral } from '@mistralai/mistralai';
18
- import { ExuluVariables } from '@SRC/index';
17
+ import { resolveOcr } from '@SRC/exulu/resolve-ocr';
18
+ import type { ResolveOcrInput } from '@SRC/exulu/resolve-ocr';
19
+ import { resolveModel } from '@SRC/exulu/resolve-model';
19
20
 
20
21
  type DocumentProcessorConfig = {
21
22
  vlm?: {
22
- model: LanguageModel;
23
+ /**
24
+ * LiteLLM model_name for the VLM page-validation pass (declared in
25
+ * config.litellm.yaml, e.g. "vertex-gemini-2.5-flash"). Resolved via
26
+ * resolveModel() so the VLM pass shares the same tag-based cost controls
27
+ * and provider-switching as chat / embeddings / OCR, and the underlying
28
+ * provider can be swapped without code changes.
29
+ */
30
+ model: string;
23
31
  concurrency: number;
24
32
  },
25
33
  processor: {
26
34
  name: "docling" | "liteparse" | "mistral" | "officeparser"
35
+ /**
36
+ * LiteLLM model_name for the "mistral" OCR processor (declared in
37
+ * config.litellm.yaml). Defaults to "mistral-ocr". OCR is routed through
38
+ * the LiteLLM proxy so it shares the same tag-based cost controls as chat
39
+ * and embeddings, and the underlying provider (mistral / azure_ai /
40
+ * vertex_ai) can be switched without code changes.
41
+ */
42
+ model?: string
27
43
  }
44
+ /**
45
+ * Optional cost-attribution context, forwarded to LiteLLM as spend tags
46
+ * (user / role / project / context) for both the OCR pass (resolveOcr) and
47
+ * the VLM page-validation pass (resolveModel). Not yet populated by callers;
48
+ * the wiring is in place so per-user/per-context budgets work the moment
49
+ * attribution is threaded through.
50
+ */
51
+ attribution?: Omit<ResolveOcrInput, "model">
28
52
  debugging?: {
29
53
  deleteTempFiles?: boolean;
30
54
  }
@@ -94,6 +118,38 @@ async function processWord(file: Buffer): Promise<ProcessorOutput> {
94
118
  }
95
119
  }
96
120
 
121
+ /**
122
+ * Resolve the dev-supplied VLM `model` string (a LiteLLM model_name from
123
+ * config.litellm.yaml, e.g. "vertex-gemini-2.5-flash") into an `ai` SDK
124
+ * LanguageModel via resolveModel. This routes the VLM page-validation pass
125
+ * through the LiteLLM proxy — same tag-based cost controls and provider
126
+ * switching as chat / embeddings / OCR — and keeps the internal VLM helpers
127
+ * (validateWithVLM / validatePageWithVLM) working with a LanguageModel.
128
+ *
129
+ * Returns undefined when no VLM model is configured. Attribution (user /
130
+ * project / agent / routine) is forwarded for spend tagging when callers
131
+ * populate config.attribution; rbacBypass is set because this is a background
132
+ * package call where model-level access control is delegated to LiteLLM.
133
+ */
134
+ async function resolveVlmModel(
135
+ config?: DocumentProcessorConfig,
136
+ ): Promise<LanguageModel | undefined> {
137
+ const modelId = config?.vlm?.model;
138
+ if (!modelId) return undefined;
139
+
140
+ const { languageModel } = await resolveModel({
141
+ modelId,
142
+ providers: [], // unused in LiteLLM mode; resolveModel ignores it there
143
+ user: config?.attribution?.user,
144
+ project: config?.attribution?.project,
145
+ agent: config?.attribution?.agent,
146
+ routine: config?.attribution?.routine,
147
+ rbacBypass: true,
148
+ });
149
+
150
+ return languageModel;
151
+ }
152
+
97
153
  /**
98
154
  * Processes a standalone image file by optionally extracting content using VLM
99
155
  */
@@ -122,14 +178,15 @@ async function processImage(
122
178
  }];
123
179
 
124
180
  // If VLM is enabled, use it to extract content from the image
125
- if (config?.vlm?.model) {
181
+ const vlmModel = await resolveVlmModel(config);
182
+ if (vlmModel) {
126
183
  console.log('[EXULU] Extracting content from image using VLM...');
127
184
 
128
185
  json = await validateWithVLM(
129
186
  json,
130
- config.vlm.model,
187
+ vlmModel,
131
188
  verbose,
132
- config.vlm.concurrency
189
+ config!.vlm!.concurrency
133
190
  );
134
191
 
135
192
  // Save the processed result
@@ -679,15 +736,6 @@ async function processDocument(
679
736
  };
680
737
  }
681
738
 
682
- const getMistralApiKey = async () => {
683
- if (process.env.MISTRAL_API_KEY) {
684
- return process.env.MISTRAL_API_KEY;
685
- } else {
686
- const variable = await ExuluVariables.get("MISTRAL_API_KEY");
687
- return variable;
688
- }
689
- }
690
-
691
739
  async function processPdf(
692
740
  buffer: Buffer,
693
741
  paths: ProcessingPaths,
@@ -759,28 +807,25 @@ async function processPdf(
759
807
 
760
808
  } else if (config?.processor.name === "mistral") {
761
809
 
762
- const MISTRAL_API_KEY = await getMistralApiKey();
763
- if (!MISTRAL_API_KEY) {
764
- throw new Error('[EXULU] MISTRAL_API_KEY is not set, please set it in the environment variable via process.env or via an Exulu variable named "MISTRAL_API_KEY".');
765
- }
810
+ // OCR is routed through the LiteLLM proxy's Mistral-compatible /v1/ocr
811
+ // endpoint (see resolveOcr) rather than the Mistral SDK directly. This
812
+ // gives us tag-based cost control and lets us switch the OCR provider
813
+ // (mistral / azure_ai / vertex_ai) from config.litellm.yaml.
814
+ const resolved = await resolveOcr({
815
+ model: config.processor.model ?? "mistral-ocr",
816
+ ...config.attribution,
817
+ });
766
818
 
767
819
  // Wait a randomn time between 1 and 5 seconds to prevent rate limiting
768
820
  await new Promise(resolve => setTimeout(resolve, Math.floor(Math.random() * 4000) + 1000));
769
821
 
770
822
  const base64Pdf = buffer.toString('base64');
771
- const client = new Mistral({ apiKey: MISTRAL_API_KEY });
772
823
 
773
824
  const ocrResponse = await withRetry(async () => {
774
- type MistralOCRResponse = Awaited<ReturnType<typeof client.ocr.process>>;
775
- const ocrResponse: MistralOCRResponse = await client.ocr.process({
776
- document: {
777
- type: "document_url",
778
- documentUrl: "data:application/pdf;base64," + base64Pdf
779
- },
780
- model: "mistral-ocr-latest",
781
- includeImageBase64: false
782
- });
783
- return ocrResponse;
825
+ return await resolved.ocr({
826
+ type: "document_url",
827
+ document_url: "data:application/pdf;base64," + base64Pdf,
828
+ }, { includeImageBase64: false });
784
829
  }, 10);
785
830
 
786
831
  const parser = new LiteParse();
@@ -838,13 +883,14 @@ async function processPdf(
838
883
  }
839
884
 
840
885
  // Apply VLM validation if enabled
841
- if (config?.vlm?.model && json.length > 0) {
886
+ const vlmModel = config?.vlm?.model ? await resolveVlmModel(config) : undefined;
887
+ if (vlmModel && json.length > 0) {
842
888
 
843
889
  json = await validateWithVLM(
844
890
  json,
845
- config.vlm.model,
891
+ vlmModel,
846
892
  verbose,
847
- config.vlm.concurrency
893
+ config!.vlm!.concurrency
848
894
  );
849
895
 
850
896
  console.log('[EXULU] \n📊 Processing Summary:');
@@ -1046,7 +1092,6 @@ export async function documentProcessor({
1046
1092
  } catch (error) {
1047
1093
  console.error('Error during chunking:', error);
1048
1094
  throw error;
1049
-
1050
1095
  } finally {
1051
1096
  if (config?.debugging?.deleteTempFiles !== false) {
1052
1097
  // Delete the temp directory using the local array to avoid race conditions
package/ee/workers.ts CHANGED
@@ -7,7 +7,6 @@ import { ExuluStorage } from "@SRC/exulu/storage.ts";
7
7
  import type { ExuluAgent } from "@EXULU_TYPES/models/agent.ts";
8
8
  import type { ExuluQueueConfig } from "@EXULU_TYPES/queue-config.ts";
9
9
  import { getTableName, type ExuluContext } from "@SRC/exulu/context.ts";
10
- import type { ExuluReranker } from "@SRC/exulu/reranker.ts";
11
10
  import type { ExuluEval } from "@SRC/exulu/evals.ts";
12
11
  import type { ExuluTool } from "@SRC/exulu/tool.ts";
13
12
  import { resolveModel } from "@SRC/exulu/resolve-model.ts";
@@ -115,7 +114,6 @@ export const createWorkers = async (
115
114
  queues: ExuluQueueConfig[],
116
115
  config: ExuluConfig,
117
116
  contexts: ExuluContext[],
118
- rerankers: ExuluReranker[],
119
117
  evals: ExuluEval[],
120
118
  tools: ExuluTool[],
121
119
  tracer?: Tracer,
@@ -284,14 +282,8 @@ export const createWorkers = async (
284
282
  throw new Error(`Context ${data.context} not found in the registry.`);
285
283
  }
286
284
 
287
- if (!data.embedder) {
288
- throw new Error(`No embedder set for embedder job.`);
289
- }
290
-
291
- const embedder = contexts.find((context) => context.embedder?.id === data.embedder);
292
-
293
- if (!embedder) {
294
- throw new Error(`Embedder ${data.embedder} not found in the registry.`);
285
+ if (!context.embedder) {
286
+ throw new Error(`No embedder configured for context ${data.context}.`);
295
287
  }
296
288
 
297
289
  const result = await context.createAndUpsertEmbeddings(
@@ -299,7 +291,7 @@ export const createWorkers = async (
299
291
  config,
300
292
  data.user,
301
293
  {
302
- label: embedder.name,
294
+ label: context.embedder.model,
303
295
  trigger: data.trigger,
304
296
  },
305
297
  data.role,
@@ -520,7 +512,6 @@ export const createWorkers = async (
520
512
  provider,
521
513
  inputMessages,
522
514
  contexts,
523
- rerankers,
524
515
  user,
525
516
  tools,
526
517
  config,
@@ -623,7 +614,6 @@ export const createWorkers = async (
623
614
  provider,
624
615
  inputMessages,
625
616
  contexts,
626
- rerankers,
627
617
  user,
628
618
  tools,
629
619
  config,
@@ -1324,7 +1314,6 @@ export const processUiMessagesFlow = async ({
1324
1314
  provider,
1325
1315
  inputMessages,
1326
1316
  contexts,
1327
- rerankers,
1328
1317
  user,
1329
1318
  tools,
1330
1319
  config,
@@ -1336,7 +1325,6 @@ export const processUiMessagesFlow = async ({
1336
1325
  provider: ExuluProvider;
1337
1326
  inputMessages: UIMessage[];
1338
1327
  contexts: ExuluContext[];
1339
- rerankers: ExuluReranker[];
1340
1328
  user: User;
1341
1329
  tools: ExuluTool[];
1342
1330
  config: ExuluConfig;
@@ -1376,7 +1364,6 @@ export const processUiMessagesFlow = async ({
1376
1364
  agent,
1377
1365
  tools,
1378
1366
  contexts,
1379
- rerankers,
1380
1367
  disabledTools,
1381
1368
  providers,
1382
1369
  user,
@@ -1495,7 +1482,6 @@ export const processUiMessagesFlow = async ({
1495
1482
  try {
1496
1483
  const result = await provider.generateStream({
1497
1484
  contexts,
1498
- rerankers,
1499
1485
  agent: agent,
1500
1486
  user,
1501
1487
  approvedTools: tools.map((tool) => "tool-" + sanitizeToolName(tool.name)),
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@exulu/backend",
3
3
  "author": "Qventu Bv.",
4
- "version": "1.67.0",
4
+ "version": "1.68.0",
5
5
  "main": "./dist/index.js",
6
6
  "private": false,
7
7
  "publishConfig": {
@@ -1,208 +0,0 @@
1
- import { generateText, stepCountIs, tool } from "ai";
2
- import type { LanguageModel, Tool as AITool, ModelMessage } from "ai";
3
- import { z } from "zod";
4
- import { withRetry } from "@SRC/utils/with-retry";
5
- import type { ExuluReranker } from "@SRC/exulu/reranker";
6
- import type { AgenticRetrievalOutput, ChunkResult } from "./types";
7
- import { DEFAULT_MAX_STEPS, type AgenticRetrievalLog, type ContextRetrievalConfig } from ".";
8
-
9
- const FINISH_TOOL_NAME = "finish_retrieval";
10
-
11
- const finishRetrievalTool = tool({
12
- description:
13
- "Call this tool when you have retrieved sufficient information and no further searches are needed. " +
14
- "You MUST call this tool to signal that retrieval is complete — do not write a text conclusion.",
15
- inputSchema: z.object({
16
- reasoning: z.string().describe("One sentence explaining why retrieval is complete"),
17
- }),
18
- execute: async ({ reasoning }) => JSON.stringify({ finished: true, reasoning }),
19
- });
20
-
21
- function extractChunksFromToolResults(toolResults: any[]): ChunkResult[] {
22
- const chunks: ChunkResult[] = [];
23
- for (const result of toolResults ?? []) {
24
- // AI SDK v6 uses `output` (not `result`) for tool result values
25
- const rawOutput = result.output ?? result.result;
26
- let parsed: any;
27
- try {
28
- parsed = typeof rawOutput === "string" ? JSON.parse(rawOutput) : rawOutput;
29
- } catch {
30
- continue;
31
- }
32
-
33
- if (Array.isArray(parsed)) {
34
- for (const item of parsed) {
35
- if (item?.item_id && item?.context) {
36
- chunks.push({
37
- item_name: item.item_name,
38
- item_id: item.item_id,
39
- context: item.context?.id ?? item.context,
40
- chunk_id: item.chunk_id,
41
- chunk_index: item.chunk_index,
42
- chunk_content: item.chunk_content,
43
- metadata: item.metadata,
44
- });
45
- }
46
- }
47
- }
48
- }
49
- return chunks;
50
- }
51
-
52
- /**
53
- * Core agent loop: one generateText call per step.
54
- *
55
- * Unlike v2 (which split each step into a reasoning call + a separate tool
56
- * execution call), here a single call with toolChoice: "auto" lets the model
57
- * reason and call tools in one pass. The model sees tool results from the
58
- * previous step via the conversation history (messages array).
59
- *
60
- * The loop stops when:
61
- * - The model makes no tool calls (it's satisfied), OR
62
- * - The strategy's stepBudget is exhausted
63
- */
64
- export async function* runAgentLoop(params: {
65
- config: ContextRetrievalConfig;
66
- userQuery: string;
67
- log: AgenticRetrievalLog;
68
- todos: {
69
- status: "planned" | "completed";
70
- description: string;
71
- current: boolean;
72
- }[];
73
- tools: Record<string, AITool>;
74
- model: LanguageModel;
75
- reranker?: ExuluReranker;
76
- sessionID?: string;
77
- onStepComplete?: (step: AgenticRetrievalOutput["steps"][0]) => void;
78
- }): AsyncGenerator<AgenticRetrievalOutput> {
79
- const { userQuery, tools, model, reranker, sessionID, onStepComplete, config, log, todos } = params;
80
-
81
- const output: AgenticRetrievalOutput = {
82
- steps: [],
83
- reasoning: [],
84
- chunks: [],
85
- usage: [],
86
- totalTokens: 0,
87
- };
88
-
89
- const messages: ModelMessage[] = [{ role: "user", content: userQuery }];
90
-
91
- const stepBudget = config.maxSteps || DEFAULT_MAX_STEPS
92
-
93
- const SYSTEM_PROMPT = `
94
- You are a helpful assistant that can search the knowledge base and retrieve information.
95
-
96
- You are searching for information that is relevant to the following question:
97
- <user_query>
98
- ${userQuery}
99
- </user_query>
100
-
101
- You have the following instructions for this knowledge base:
102
- <instructions>
103
- ${config.instructions}
104
- </instructions>
105
-
106
- A first search strategy was drafted as a todo list:
107
- <todo_list>
108
- ${todos.map((todo, index) => `${index + 1}. ${todo.status} - ${todo.description}`).join("\n")}
109
- </todo_list>
110
-
111
- `;
112
-
113
- for (let step = 0; step < stepBudget; step++) {
114
-
115
- log.entries.push({
116
- label: "Agent loop step",
117
- timestamp: new Date().toISOString(),
118
- message: `[EXULU] v3 agent loop — step ${step + 1}/${stepBudget}`,
119
- });
120
-
121
- let result: Awaited<ReturnType<typeof generateText>>;
122
-
123
- const stepTools = { ...tools, [FINISH_TOOL_NAME]: finishRetrievalTool };
124
-
125
- try {
126
- result = await withRetry(() =>
127
- generateText({
128
- model,
129
- temperature: 0,
130
- system: SYSTEM_PROMPT,
131
- messages,
132
- tools: stepTools,
133
- toolChoice: "required",
134
- stopWhen: stepCountIs(1),
135
- }),
136
- );
137
- } catch (err) {
138
- console.error("[EXULU] v3 generateText failed:", err);
139
- throw err;
140
- }
141
-
142
- // Carry conversation forward: assistant message + tool results go into history
143
- // so the model sees them on the next iteration.
144
- messages.push(...(result.response.messages as ModelMessage[]));
145
-
146
- // Extract chunks from tool results
147
- let stepChunks: any[] = extractChunksFromToolResults(result.toolResults as any[]);
148
-
149
- // Deduplicate by chunk_id within this step (parallel tool calls can return the same chunk
150
- // if the agent searches the same context twice, or the same chunk is indexed in two contexts).
151
- const seenChunkIds = new Set<string>();
152
- stepChunks = stepChunks.filter((c) => {
153
- if (!c.chunk_id) return true;
154
- if (seenChunkIds.has(c.chunk_id)) return false;
155
- seenChunkIds.add(c.chunk_id);
156
- return true;
157
- });
158
-
159
- // Record step
160
- const stepRecord = {
161
- stepNumber: step + 1,
162
- text: result.text ?? "",
163
- toolCalls: (result.toolCalls as any[])?.map((tc) => ({
164
- name: tc.toolName,
165
- id: tc.toolCallId,
166
- input: tc.input,
167
- })) ?? [],
168
- chunks: stepChunks,
169
- tokens: result.usage?.totalTokens ?? 0,
170
- };
171
-
172
- log.entries.push({
173
- label: "Step completed",
174
- timestamp: new Date().toISOString(),
175
- message: JSON.stringify(stepRecord),
176
- });
177
-
178
- output.steps.push(stepRecord);
179
- output.reasoning.push({
180
- text: result.text ?? "",
181
- tools: (result.toolCalls as any[])?.map((tc) => ({
182
- name: tc.toolName,
183
- id: tc.toolCallId,
184
- input: tc.input,
185
- output: stepChunks,
186
- })) ?? [],
187
- });
188
- // Deduplicate against chunks already accumulated from prior steps
189
- const existingChunkIds = new Set(output.chunks.map((c) => c.chunk_id).filter(Boolean));
190
- output.chunks.push(...stepChunks.filter((c) => !c.chunk_id || !existingChunkIds.has(c.chunk_id)));
191
- output.usage.push(result.usage);
192
-
193
- onStepComplete?.(stepRecord);
194
-
195
- yield { ...output };
196
-
197
- // Stop if the model called finish_retrieval AND no forced continuation is needed
198
- const calledFinish = (result.toolCalls as any[])?.some(
199
- (tc) => tc.toolName === FINISH_TOOL_NAME,
200
- );
201
- if (calledFinish) {
202
- console.log(`[EXULU] v3 model called finish_retrieval after step ${step + 1}`);
203
- break;
204
- }
205
- }
206
-
207
- output.totalTokens = output.usage.reduce((sum, u) => sum + (u?.totalTokens ?? 0), 0);
208
- }
@@ -1,79 +0,0 @@
1
- import { ExuluContext, getTableName } from "@SRC/exulu/context";
2
- import { postgresClient } from "@SRC/postgres/client";
3
- import { applyAccessControl } from "@SRC/graphql/utilities/access-control";
4
- import { convertContextToTableDefinition } from "@SRC/graphql/utilities/convert-context-to-table-definition";
5
- import type { User } from "@EXULU_TYPES/models/user";
6
-
7
- const CACHE_TTL_MS = 60 * 60 * 1000; // 1 hour
8
-
9
- export interface ContextSample {
10
- contextId: string;
11
- contextName: string;
12
- /** All field names available on items (standard + custom) */
13
- fields: string[];
14
- /** Up to 2 example item records */
15
- exampleItems: Array<Record<string, any>>;
16
- sampledAt: number;
17
- }
18
-
19
- /**
20
- * Pulls 1–2 example item records per context at agent initialization and caches
21
- * them in memory. These samples are injected into the classifier prompt so the
22
- * model understands what data is actually stored (not just field names).
23
- */
24
- export class ContextSampler {
25
- private cache = new Map<string, ContextSample>();
26
-
27
- async getSamples(
28
- contexts: ExuluContext[],
29
- user?: User,
30
- role?: string,
31
- ): Promise<ContextSample[]> {
32
- return Promise.all(contexts.map((ctx) => this.getSample(ctx, user, role)));
33
- }
34
-
35
- private async getSample(
36
- ctx: ExuluContext,
37
- user?: User,
38
- role?: string,
39
- ): Promise<ContextSample> {
40
- const cached = this.cache.get(ctx.id);
41
- if (cached && Date.now() - cached.sampledAt < CACHE_TTL_MS) {
42
- return cached;
43
- }
44
-
45
- const { db } = await postgresClient();
46
- const tableName = getTableName(ctx.id);
47
- const tableDefinition = convertContextToTableDefinition(ctx);
48
-
49
- const customFieldNames = ctx.fields.map((f) => f.name);
50
- const selectFields = ["id", "name", "external_id", ...customFieldNames];
51
-
52
- let exampleItems: Record<string, any>[] = [];
53
- try {
54
- let query = db(tableName).select(selectFields).whereNull("archived").limit(2);
55
- query = applyAccessControl(tableDefinition, query, user, tableName);
56
- exampleItems = await query;
57
- } catch {
58
- // If table doesn't exist yet or column mismatch, return empty samples
59
- }
60
-
61
- const sample: ContextSample = {
62
- contextId: ctx.id,
63
- contextName: ctx.name,
64
- fields: ["name", "external_id", ...customFieldNames],
65
- exampleItems,
66
- sampledAt: Date.now(),
67
- };
68
-
69
- this.cache.set(ctx.id, sample);
70
-
71
- // Refresh in background after TTL without blocking the caller
72
- return sample;
73
- }
74
-
75
- /** Evict a context from cache so it's re-sampled on next use */
76
- invalidate(contextId: string): void {
77
- this.cache.delete(contextId);
78
- }
79
- }