@exulu/backend 1.54.0 → 1.56.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -44,11 +44,50 @@ function mapSearchMethod(method: "hybrid" | "keyword" | "semantic"): "hybridSear
44
44
  return "cosineDistance";
45
45
  }
46
46
 
47
+ /**
48
+ * Parses session item entries into a per-context map.
49
+ *
50
+ * Two supported formats:
51
+ * "<context_id>/<item_id>" → specific item; value is a non-empty string[]
52
+ * "<context_id>" → full context (no item filter); value is null
53
+ *
54
+ * If both a full-context entry and specific-item entries exist for the same
55
+ * context, full-context (null) wins.
56
+ */
57
+ export function parseGlobalItemIds(globalIds: string[]): Map<string, string[] | null> {
58
+ const map = new Map<string, string[] | null>();
59
+ for (const gid of globalIds) {
60
+ const slashIdx = gid.indexOf("/");
61
+ if (slashIdx === -1) {
62
+ // No slash → entire context selected
63
+ if (gid) map.set(gid, null);
64
+ continue;
65
+ }
66
+ const contextId = gid.slice(0, slashIdx);
67
+ const itemId = gid.slice(slashIdx + 1);
68
+ if (!contextId || !itemId) continue;
69
+ // Full-context entry already wins — don't downgrade to specific items
70
+ if (map.get(contextId) === null) continue;
71
+ const existing = map.get(contextId) ?? [];
72
+ existing.push(itemId);
73
+ map.set(contextId, existing);
74
+ }
75
+ return map;
76
+ }
77
+
47
78
  export type RetrievalToolParams = {
48
79
  contexts: ExuluContext[];
80
+ toolVariablesConfig?: Record<string, any>;
49
81
  user?: User;
50
82
  role?: string;
51
83
  updateVirtualFiles: (files: Array<{ path: string; content: string }>) => Promise<void>;
84
+ /**
85
+ * Preselected scope keyed by context ID. When set, every tool is scoped accordingly:
86
+ * null → full context access (no item filter)
87
+ * string[] → only these specific item IDs
88
+ * missing key → context was not selected; return empty results
89
+ */
90
+ preselectedItemsByContext?: Map<string, string[] | null>;
52
91
  };
53
92
 
54
93
  /**
@@ -56,7 +95,7 @@ export type RetrievalToolParams = {
56
95
  * and filtered per strategy.
57
96
  */
58
97
  export function createRetrievalTools(params: RetrievalToolParams) {
59
- const { contexts, user, role, updateVirtualFiles } = params;
98
+ const { contexts, toolVariablesConfig, user, role, updateVirtualFiles, preselectedItemsByContext } = params;
60
99
  const ctxEnum = buildContextEnum(contexts);
61
100
 
62
101
  // ──────────────────────────────────────────────────────────
@@ -87,6 +126,13 @@ export function createRetrievalTools(params: RetrievalToolParams) {
87
126
 
88
127
  const counts = await Promise.all(
89
128
  ctxList.map(async (ctx) => {
129
+ const contextItemIds = preselectedItemsByContext?.get(ctx.id);
130
+ // undefined = context not in preselection map → skip
131
+ if (preselectedItemsByContext && contextItemIds === undefined) {
132
+ return { context: ctx.id, context_name: ctx.name, count: 0 };
133
+ }
134
+ // null = full context; string[] = specific items
135
+
90
136
  let count = 0;
91
137
 
92
138
  if (count_what === "items") {
@@ -95,19 +141,25 @@ export function createRetrievalTools(params: RetrievalToolParams) {
95
141
  if (name_contains) {
96
142
  q = q.whereRaw("LOWER(name) LIKE ?", [`%${name_contains.toLowerCase()}%`]);
97
143
  }
144
+ if (Array.isArray(contextItemIds)) {
145
+ q = q.whereIn("id", contextItemIds);
146
+ }
98
147
  const tableDefinition = convertContextToTableDefinition(ctx);
99
148
  q = applyAccessControl(tableDefinition, q, user, tableName);
100
149
  const result = await q.first();
101
150
  count = Number(result?.count ?? 0);
102
151
  } else {
103
152
  const chunksTable = getChunksTableName(ctx.id);
153
+ const baseItemFilters: SearchFilters = Array.isArray(contextItemIds)
154
+ ? [{ id: { in: contextItemIds } }]
155
+ : [];
104
156
  if (content_query) {
105
157
  const searchResults = await ctx.search({
106
158
  query: content_query,
107
159
  method: "hybridSearch",
108
160
  limit: 10000,
109
161
  page: 1,
110
- itemFilters: [],
162
+ itemFilters: baseItemFilters,
111
163
  chunkFilters: [],
112
164
  sort: { field: "updatedAt", direction: "desc" },
113
165
  user,
@@ -115,6 +167,9 @@ export function createRetrievalTools(params: RetrievalToolParams) {
115
167
  trigger: "tool",
116
168
  });
117
169
  count = searchResults.chunks.length;
170
+ } else if (Array.isArray(contextItemIds)) {
171
+ const result = await db(chunksTable).count("id as count").whereIn("source", contextItemIds).first();
172
+ count = Number(result?.count ?? 0);
118
173
  } else {
119
174
  const result = await db(chunksTable).count("id as count").first();
120
175
  count = Number(result?.count ?? 0);
@@ -137,10 +192,18 @@ export function createRetrievalTools(params: RetrievalToolParams) {
137
192
  // ──────────────────────────────────────────────────────────
138
193
  const search_items_by_name = tool({
139
194
  description:
140
- "Search for items by their name or external ID. Use only when the user is asking for documents BY TITLE, not by content topic.",
195
+ "Search for items by their name or external ID. Use when:\n" +
196
+ "• The user asks for a document BY TITLE or NAME\n" +
197
+ "• The user asks whether a specific named document EXISTS (e.g. 'do you have the X manual?', 'is there a document for Y?')\n" +
198
+ "• Any query that references a specific document, manual, or resource by its name rather than by topic\n" +
199
+ "Do NOT use for topic-based content queries (e.g. 'what are the parameters for X?', 'how do I configure Y?').",
141
200
  inputSchema: z.object({
142
201
  knowledge_base_ids: ctxEnum,
143
- item_name: z.string().describe("The name or partial name to search for"),
202
+ item_name: z.string().describe(
203
+ "The name or partial name to search for. Uses substring matching, so shorter and more specific terms work better than full phrases. " +
204
+ "Extract only the core identifying part — typically the product model, document title, or unique identifier. " +
205
+ "Do NOT include surrounding descriptors like type words ('manual', 'guide', 'document') or manufacturer names unless they are likely part of the actual document title."
206
+ ),
144
207
  limit: z
145
208
  .number()
146
209
  .default(100)
@@ -152,10 +215,16 @@ export function createRetrievalTools(params: RetrievalToolParams) {
152
215
  const { db } = await postgresClient();
153
216
  const ctxList = resolveContexts(knowledge_base_ids, contexts);
154
217
  const safeLimit = Math.min(limit ?? 100, 400);
155
- const itemFilters: SearchFilters = item_name ? [{ name: { contains: item_name } }] : [];
156
218
 
157
219
  const results = await Promise.all(
158
220
  ctxList.map(async (ctx) => {
221
+ const contextItemIds = preselectedItemsByContext?.get(ctx.id);
222
+ // undefined = context not in preselection map → skip
223
+ if (preselectedItemsByContext && contextItemIds === undefined) return [];
224
+
225
+ const itemFilters: SearchFilters = item_name ? [{ name: { contains: item_name } }] : [];
226
+ if (Array.isArray(contextItemIds)) itemFilters.push({ id: { in: contextItemIds } });
227
+
159
228
  const tableName = getTableName(ctx.id);
160
229
  const tableDefinition = convertContextToTableDefinition(ctx);
161
230
 
@@ -202,15 +271,25 @@ export function createRetrievalTools(params: RetrievalToolParams) {
202
271
  // search_content
203
272
  // ──────────────────────────────────────────────────────────
204
273
  const search_content = tool({
205
- description: `Search across document content using hybrid, keyword, or semantic search.
274
+ description: `Search ONE knowledge base for document content using hybrid, keyword, or semantic search.
275
+ Always make a separate call for each knowledge base you want to search — never bundle multiple in one call.
206
276
 
207
277
  Use includeContent: false when you only need to know WHICH documents match (listing, overview, navigation).
208
278
  Use includeContent: true when you need the ACTUAL text to answer a question.
209
279
 
210
280
  For listing queries: always start with includeContent: false, then use dynamic tools to fetch specific pages.`,
211
281
  inputSchema: z.object({
212
- query: z.string().describe("Search query about the content you're looking for"),
213
- knowledge_base_ids: ctxEnum,
282
+ userQuery: z.string().describe("The original unaltered question from the user"),
283
+ knowledge_base_id: z
284
+ .enum(contexts.map((c) => c.id) as [string, ...string[]])
285
+ .describe(
286
+ contexts
287
+ .map(
288
+ (c) =>
289
+ `<knowledge_base id="${c.id}" name="${c.name}">${c.description}</knowledge_base>`,
290
+ )
291
+ .join("\n"),
292
+ ),
214
293
  keywords: z.array(z.string()).optional().describe("Keywords extracted from the query"),
215
294
  searchMethod: z
216
295
  .enum(["hybrid", "keyword", "semantic"])
@@ -236,12 +315,12 @@ For listing queries: always start with includeContent: false, then use dynamic t
236
315
  .describe("Filter results to specific external IDs"),
237
316
  limit: z
238
317
  .number()
239
- .default(10)
240
- .describe("Max chunks with content (max 10). Without content, up to 200 are returned."),
318
+ .default(20)
319
+ .describe("Max chunks with content (max 20). Without content, up to 200 are returned."),
241
320
  }),
242
321
  execute: async ({
243
- query,
244
- knowledge_base_ids,
322
+ userQuery,
323
+ knowledge_base_id,
245
324
  keywords,
246
325
  searchMethod,
247
326
  includeContent,
@@ -250,70 +329,94 @@ For listing queries: always start with includeContent: false, then use dynamic t
250
329
  item_external_ids,
251
330
  limit,
252
331
  }) => {
253
- const ctxList = resolveContexts(knowledge_base_ids, contexts);
254
- const effectiveLimit = includeContent ? Math.min(limit ?? 10, 10) : Math.min((limit ?? 10) * 20, 400);
255
-
256
- const results = await Promise.all(
257
- ctxList.map(async (ctx) => {
258
- const itemFilters: SearchFilters = [];
259
- if (item_ids) itemFilters.push({ id: { in: item_ids } });
260
- if (item_names)
261
- itemFilters.push({ name: { or: item_names.map((n) => ({ contains: n })) } });
262
- if (item_external_ids) itemFilters.push({ external_id: { in: item_external_ids } });
263
-
264
- const effectiveQuery = query || keywords?.join(" ") || "";
265
-
266
- let method = mapSearchMethod(searchMethod ?? "hybrid")
267
-
268
- if (
269
- method === "hybridSearch" ||
270
- method === "cosineDistance"
271
- ) {
272
- if (!ctx.embedder) {
273
- console.error(`[EXULU] context "${ctx.id}" does not have an embedder, falling back to tsvector search`);
274
- method = "tsvector"
275
- }
332
+ const [ctx] = resolveContexts([knowledge_base_id], contexts) as [ExuluContext];
333
+ const maxResults = toolVariablesConfig?.[`${ctx.id}_|_max_results`] || 20;
334
+ const effectiveLimit = includeContent ? Math.min(limit ?? maxResults, maxResults) : Math.min((limit ?? maxResults) * maxResults, 400);
335
+
336
+ const itemFilters: SearchFilters = [];
337
+
338
+ if (preselectedItemsByContext) {
339
+ const contextItemIds = preselectedItemsByContext.get(knowledge_base_id);
340
+ if (contextItemIds === undefined) {
341
+ // Context not in preselection map nothing to search
342
+ return JSON.stringify([]);
343
+ }
344
+ if (Array.isArray(contextItemIds)) {
345
+ const intersection = item_ids?.length
346
+ ? item_ids.filter((id) => contextItemIds.includes(id))
347
+ : contextItemIds;
348
+ if (!intersection.length) {
349
+ // Agent specified item_ids entirely outside the preselected scope
350
+ return JSON.stringify([]);
276
351
  }
277
-
278
- try {
279
- const { chunks } = await ctx.search({
280
- query: effectiveQuery,
281
- keywords,
282
- method: method,
283
- limit: effectiveLimit,
284
- page: 1,
285
- itemFilters,
286
- chunkFilters: [],
287
- sort: { field: "updatedAt", direction: "desc" },
288
- user,
289
- role,
290
- trigger: "tool",
291
- });
292
-
293
- return chunks.map(
294
- (chunk): ChunkResult => ({
295
- item_name: chunk.item_name,
296
- item_id: chunk.item_id,
297
- context: chunk.context?.id ?? ctx.id,
298
- chunk_id: chunk.chunk_id,
299
- chunk_index: chunk.chunk_index,
300
- chunk_content: includeContent ? chunk.chunk_content : undefined,
301
- metadata: {
302
- ...chunk.chunk_metadata,
303
- cosine_distance: chunk.chunk_cosine_distance,
304
- fts_rank: chunk.chunk_fts_rank,
305
- hybrid_score: chunk.chunk_hybrid_score,
306
- },
307
- }),
308
- );
309
- } catch (err) {
310
- console.error(`[EXULU] search_content failed for context "${ctx.id}":`, err);
311
- return [];
312
- }
313
- }),
314
- );
315
-
316
- return JSON.stringify(results.flat());
352
+ itemFilters.push({ id: { in: intersection } });
353
+ }
354
+ // null = full context no item filter; agent's item_ids still respected if provided
355
+ else if (item_ids?.length) {
356
+ itemFilters.push({ id: { in: item_ids } });
357
+ }
358
+ } else if (item_ids?.length) {
359
+ itemFilters.push({ id: { in: item_ids } });
360
+ }
361
+
362
+ if (item_names)
363
+ itemFilters.push({ name: { or: item_names.map((n) => ({ contains: n })) } });
364
+ if (item_external_ids) itemFilters.push({ external_id: { in: item_external_ids } });
365
+
366
+ const effectiveQuery = userQuery || keywords?.join(" ") || "";
367
+
368
+ let method = mapSearchMethod(searchMethod ?? "hybrid");
369
+
370
+ if (method === "hybridSearch" || method === "cosineDistance") {
371
+ if (!ctx.embedder) {
372
+ console.error(`[EXULU] context "${ctx.id}" does not have an embedder, falling back to tsvector search`);
373
+ method = "tsvector";
374
+ }
375
+ }
376
+
377
+ const expandChunks = toolVariablesConfig?.[`${ctx.id}_|_expand_chunks`] || 0;
378
+
379
+ try {
380
+ const { chunks } = await ctx.search({
381
+ query: effectiveQuery,
382
+ keywords,
383
+ method,
384
+ limit: effectiveLimit,
385
+ page: 1,
386
+ itemFilters,
387
+ chunkFilters: [],
388
+ sort: { field: "updatedAt", direction: "desc" },
389
+ user,
390
+ role,
391
+ trigger: "tool",
392
+ expand: expandChunks > 0 ? {
393
+ before: expandChunks,
394
+ after: expandChunks,
395
+ } : undefined,
396
+ });
397
+
398
+ return JSON.stringify(
399
+ chunks.map(
400
+ (chunk): ChunkResult => ({
401
+ item_name: chunk.item_name,
402
+ item_id: chunk.item_id,
403
+ context: chunk.context?.id ?? ctx.id,
404
+ chunk_id: chunk.chunk_id,
405
+ chunk_index: chunk.chunk_index,
406
+ chunk_content: includeContent ? chunk.chunk_content : undefined,
407
+ metadata: {
408
+ ...chunk.chunk_metadata,
409
+ cosine_distance: chunk.chunk_cosine_distance,
410
+ fts_rank: chunk.chunk_fts_rank,
411
+ hybrid_score: chunk.chunk_hybrid_score,
412
+ },
413
+ }),
414
+ ),
415
+ );
416
+ } catch (err) {
417
+ console.error(`[EXULU] search_content failed for context "${ctx.id}":`, err);
418
+ return JSON.stringify([]);
419
+ }
317
420
  },
318
421
  });
319
422
 
@@ -321,10 +424,11 @@ For listing queries: always start with includeContent: false, then use dynamic t
321
424
  // save_search_results
322
425
  // ──────────────────────────────────────────────────────────
323
426
  const save_search_results = tool({
324
- description: `Execute a search and save ALL results to the virtual filesystem WITHOUT loading them into context.
427
+ description: `Execute a search on ONE knowledge base and save ALL results to the virtual filesystem WITHOUT loading them into context.
428
+ Always make a separate call for each knowledge base you want to search.
325
429
 
326
430
  Use this when you expect many results (>20) and need to filter iteratively:
327
- 1. Call save_search_results to save up to 1000 results to /search_results.txt
431
+ 1. Call save_search_results (once per knowledge base) to save up to 1000 results to /search_results_{knowledge_base_id}.txt
328
432
  2. Use bash grep/awk to identify relevant chunks by pattern
329
433
  3. Use dynamic get_content tools to load only the specific chunks you need
330
434
 
@@ -340,7 +444,16 @@ SCORE: ...
340
444
  (content or placeholder)
341
445
  ---CONTENT END---`,
342
446
  inputSchema: z.object({
343
- knowledge_base_ids: ctxEnum,
447
+ knowledge_base_id: z
448
+ .enum(contexts.map((c) => c.id) as [string, ...string[]])
449
+ .describe(
450
+ contexts
451
+ .map(
452
+ (c) =>
453
+ `<knowledge_base id="${c.id}" name="${c.name}">${c.description}</knowledge_base>`,
454
+ )
455
+ .join("\n"),
456
+ ),
344
457
  query: z.string().describe("Search query"),
345
458
  searchMethod: z.enum(["hybrid", "keyword", "semantic"]).default("hybrid"),
346
459
  limit: z
@@ -355,34 +468,44 @@ SCORE: ...
355
468
  "Whether to include chunk text in the saved file. False saves tokens — use true only if you need to grep content.",
356
469
  ),
357
470
  }),
358
- execute: async ({ query, knowledge_base_ids, searchMethod, limit, includeContent }) => {
359
- const ctxList = resolveContexts(knowledge_base_ids, contexts);
360
-
361
- const results = await Promise.all(
362
- ctxList.map(async (ctx) => {
363
- try {
364
- const { chunks } = await ctx.search({
365
- query,
366
- method: mapSearchMethod(searchMethod ?? "hybrid"),
367
- limit: Math.min(limit ?? 100, 1000),
368
- page: 1,
369
- itemFilters: [],
370
- chunkFilters: [],
371
- sort: { field: "updatedAt", direction: "desc" },
372
- user,
373
- role,
374
- trigger: "tool",
375
- });
376
- return chunks;
377
- } catch (err) {
378
- console.error(`[EXULU] save_search_results failed for context "${ctx.id}":`, err);
379
- return [];
380
- }
381
- }),
382
- );
383
-
384
- const chunks: VectorSearchChunkResult[] = results.flat();
385
-
471
+ execute: async ({ query, knowledge_base_id, searchMethod, limit, includeContent }) => {
472
+ const [ctx] = resolveContexts([knowledge_base_id], contexts) as [ExuluContext];
473
+
474
+ const contextItemIds = preselectedItemsByContext?.get(knowledge_base_id);
475
+ // undefined = context not in preselection map skip
476
+ if (preselectedItemsByContext && contextItemIds === undefined) {
477
+ return JSON.stringify({
478
+ success: true,
479
+ results_count: 0,
480
+ message: `Context "${knowledge_base_id}" not in preselected scope — skipped.`,
481
+ });
482
+ }
483
+
484
+ // null = full context (no filter); string[] = specific items
485
+ const itemFilters: SearchFilters = Array.isArray(contextItemIds)
486
+ ? [{ id: { in: contextItemIds } }]
487
+ : [];
488
+
489
+ let chunks: VectorSearchChunkResult[] = [];
490
+ try {
491
+ const result = await ctx.search({
492
+ query,
493
+ method: mapSearchMethod(searchMethod ?? "hybrid"),
494
+ limit: Math.min(limit ?? 100, 1000),
495
+ page: 1,
496
+ itemFilters,
497
+ chunkFilters: [],
498
+ sort: { field: "updatedAt", direction: "desc" },
499
+ user,
500
+ role,
501
+ trigger: "tool",
502
+ });
503
+ chunks = result.chunks;
504
+ } catch (err) {
505
+ console.error(`[EXULU] save_search_results failed for context "${ctx.id}":`, err);
506
+ }
507
+
508
+ const fileName = `search_results_${ctx.id}.txt`;
386
509
  const fileContent = chunks
387
510
  .map(
388
511
  (chunk, i) =>
@@ -400,14 +523,14 @@ SCORE: ...
400
523
  .join("\n");
401
524
 
402
525
  await updateVirtualFiles([
403
- { path: "search_results.txt", content: fileContent },
526
+ { path: fileName, content: fileContent },
404
527
  {
405
- path: "search_metadata.json",
528
+ path: `search_metadata_${ctx.id}.json`,
406
529
  content: JSON.stringify({
407
530
  query,
408
531
  timestamp: new Date().toISOString(),
409
532
  results_count: chunks.length,
410
- contexts: ctxList.map((c) => c.id),
533
+ context: ctx.id,
411
534
  method: searchMethod,
412
535
  }),
413
536
  },
@@ -416,11 +539,11 @@ SCORE: ...
416
539
  return JSON.stringify({
417
540
  success: true,
418
541
  results_count: chunks.length,
419
- message: `Saved ${chunks.length} results to /search_results.txt`,
542
+ message: `Saved ${chunks.length} results to /${fileName}`,
420
543
  grep_examples: [
421
- "grep -i 'keyword' search_results.txt | head -20",
422
- "grep 'ITEM_NAME:' search_results.txt",
423
- "grep -B 5 'pattern' search_results.txt | grep 'CHUNK_ID:'",
544
+ `grep -i 'keyword' ${fileName} | head -20`,
545
+ `grep 'ITEM_NAME:' ${fileName}`,
546
+ `grep -B 5 'pattern' ${fileName} | grep 'CHUNK_ID:'`,
424
547
  ],
425
548
  });
426
549
  },