@exulu/backend 1.54.0 → 1.55.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -44,11 +44,49 @@ function mapSearchMethod(method: "hybrid" | "keyword" | "semantic"): "hybridSear
44
44
  return "cosineDistance";
45
45
  }
46
46
 
47
+ /**
48
+ * Parses session item entries into a per-context map.
49
+ *
50
+ * Two supported formats:
51
+ * "<context_id>/<item_id>" → specific item; value is a non-empty string[]
52
+ * "<context_id>" → full context (no item filter); value is null
53
+ *
54
+ * If both a full-context entry and specific-item entries exist for the same
55
+ * context, full-context (null) wins.
56
+ */
57
+ export function parseGlobalItemIds(globalIds: string[]): Map<string, string[] | null> {
58
+ const map = new Map<string, string[] | null>();
59
+ for (const gid of globalIds) {
60
+ const slashIdx = gid.indexOf("/");
61
+ if (slashIdx === -1) {
62
+ // No slash → entire context selected
63
+ if (gid) map.set(gid, null);
64
+ continue;
65
+ }
66
+ const contextId = gid.slice(0, slashIdx);
67
+ const itemId = gid.slice(slashIdx + 1);
68
+ if (!contextId || !itemId) continue;
69
+ // Full-context entry already wins — don't downgrade to specific items
70
+ if (map.get(contextId) === null) continue;
71
+ const existing = map.get(contextId) ?? [];
72
+ existing.push(itemId);
73
+ map.set(contextId, existing);
74
+ }
75
+ return map;
76
+ }
77
+
47
78
  export type RetrievalToolParams = {
48
79
  contexts: ExuluContext[];
49
80
  user?: User;
50
81
  role?: string;
51
82
  updateVirtualFiles: (files: Array<{ path: string; content: string }>) => Promise<void>;
83
+ /**
84
+ * Preselected scope keyed by context ID. When set, every tool is scoped accordingly:
85
+ * null → full context access (no item filter)
86
+ * string[] → only these specific item IDs
87
+ * missing key → context was not selected; return empty results
88
+ */
89
+ preselectedItemsByContext?: Map<string, string[] | null>;
52
90
  };
53
91
 
54
92
  /**
@@ -56,7 +94,7 @@ export type RetrievalToolParams = {
56
94
  * and filtered per strategy.
57
95
  */
58
96
  export function createRetrievalTools(params: RetrievalToolParams) {
59
- const { contexts, user, role, updateVirtualFiles } = params;
97
+ const { contexts, user, role, updateVirtualFiles, preselectedItemsByContext } = params;
60
98
  const ctxEnum = buildContextEnum(contexts);
61
99
 
62
100
  // ──────────────────────────────────────────────────────────
@@ -87,6 +125,13 @@ export function createRetrievalTools(params: RetrievalToolParams) {
87
125
 
88
126
  const counts = await Promise.all(
89
127
  ctxList.map(async (ctx) => {
128
+ const contextItemIds = preselectedItemsByContext?.get(ctx.id);
129
+ // undefined = context not in preselection map → skip
130
+ if (preselectedItemsByContext && contextItemIds === undefined) {
131
+ return { context: ctx.id, context_name: ctx.name, count: 0 };
132
+ }
133
+ // null = full context; string[] = specific items
134
+
90
135
  let count = 0;
91
136
 
92
137
  if (count_what === "items") {
@@ -95,19 +140,25 @@ export function createRetrievalTools(params: RetrievalToolParams) {
95
140
  if (name_contains) {
96
141
  q = q.whereRaw("LOWER(name) LIKE ?", [`%${name_contains.toLowerCase()}%`]);
97
142
  }
143
+ if (Array.isArray(contextItemIds)) {
144
+ q = q.whereIn("id", contextItemIds);
145
+ }
98
146
  const tableDefinition = convertContextToTableDefinition(ctx);
99
147
  q = applyAccessControl(tableDefinition, q, user, tableName);
100
148
  const result = await q.first();
101
149
  count = Number(result?.count ?? 0);
102
150
  } else {
103
151
  const chunksTable = getChunksTableName(ctx.id);
152
+ const baseItemFilters: SearchFilters = Array.isArray(contextItemIds)
153
+ ? [{ id: { in: contextItemIds } }]
154
+ : [];
104
155
  if (content_query) {
105
156
  const searchResults = await ctx.search({
106
157
  query: content_query,
107
158
  method: "hybridSearch",
108
159
  limit: 10000,
109
160
  page: 1,
110
- itemFilters: [],
161
+ itemFilters: baseItemFilters,
111
162
  chunkFilters: [],
112
163
  sort: { field: "updatedAt", direction: "desc" },
113
164
  user,
@@ -115,6 +166,9 @@ export function createRetrievalTools(params: RetrievalToolParams) {
115
166
  trigger: "tool",
116
167
  });
117
168
  count = searchResults.chunks.length;
169
+ } else if (Array.isArray(contextItemIds)) {
170
+ const result = await db(chunksTable).count("id as count").whereIn("source", contextItemIds).first();
171
+ count = Number(result?.count ?? 0);
118
172
  } else {
119
173
  const result = await db(chunksTable).count("id as count").first();
120
174
  count = Number(result?.count ?? 0);
@@ -137,10 +191,18 @@ export function createRetrievalTools(params: RetrievalToolParams) {
137
191
  // ──────────────────────────────────────────────────────────
138
192
  const search_items_by_name = tool({
139
193
  description:
140
- "Search for items by their name or external ID. Use only when the user is asking for documents BY TITLE, not by content topic.",
194
+ "Search for items by their name or external ID. Use when:\n" +
195
+ "• The user asks for a document BY TITLE or NAME\n" +
196
+ "• The user asks whether a specific named document EXISTS (e.g. 'do you have the X manual?', 'is there a document for Y?')\n" +
197
+ "• Any query that references a specific document, manual, or resource by its name rather than by topic\n" +
198
+ "Do NOT use for topic-based content queries (e.g. 'what are the parameters for X?', 'how do I configure Y?').",
141
199
  inputSchema: z.object({
142
200
  knowledge_base_ids: ctxEnum,
143
- item_name: z.string().describe("The name or partial name to search for"),
201
+ item_name: z.string().describe(
202
+ "The name or partial name to search for. Uses substring matching, so shorter and more specific terms work better than full phrases. " +
203
+ "Extract only the core identifying part — typically the product model, document title, or unique identifier. " +
204
+ "Do NOT include surrounding descriptors like type words ('manual', 'guide', 'document') or manufacturer names unless they are likely part of the actual document title."
205
+ ),
144
206
  limit: z
145
207
  .number()
146
208
  .default(100)
@@ -152,10 +214,16 @@ export function createRetrievalTools(params: RetrievalToolParams) {
152
214
  const { db } = await postgresClient();
153
215
  const ctxList = resolveContexts(knowledge_base_ids, contexts);
154
216
  const safeLimit = Math.min(limit ?? 100, 400);
155
- const itemFilters: SearchFilters = item_name ? [{ name: { contains: item_name } }] : [];
156
217
 
157
218
  const results = await Promise.all(
158
219
  ctxList.map(async (ctx) => {
220
+ const contextItemIds = preselectedItemsByContext?.get(ctx.id);
221
+ // undefined = context not in preselection map → skip
222
+ if (preselectedItemsByContext && contextItemIds === undefined) return [];
223
+
224
+ const itemFilters: SearchFilters = item_name ? [{ name: { contains: item_name } }] : [];
225
+ if (Array.isArray(contextItemIds)) itemFilters.push({ id: { in: contextItemIds } });
226
+
159
227
  const tableName = getTableName(ctx.id);
160
228
  const tableDefinition = convertContextToTableDefinition(ctx);
161
229
 
@@ -202,7 +270,8 @@ export function createRetrievalTools(params: RetrievalToolParams) {
202
270
  // search_content
203
271
  // ──────────────────────────────────────────────────────────
204
272
  const search_content = tool({
205
- description: `Search across document content using hybrid, keyword, or semantic search.
273
+ description: `Search ONE knowledge base for document content using hybrid, keyword, or semantic search.
274
+ Always make a separate call for each knowledge base you want to search — never bundle multiple in one call.
206
275
 
207
276
  Use includeContent: false when you only need to know WHICH documents match (listing, overview, navigation).
208
277
  Use includeContent: true when you need the ACTUAL text to answer a question.
@@ -210,7 +279,16 @@ Use includeContent: true when you need the ACTUAL text to answer a question.
210
279
  For listing queries: always start with includeContent: false, then use dynamic tools to fetch specific pages.`,
211
280
  inputSchema: z.object({
212
281
  query: z.string().describe("Search query about the content you're looking for"),
213
- knowledge_base_ids: ctxEnum,
282
+ knowledge_base_id: z
283
+ .enum(contexts.map((c) => c.id) as [string, ...string[]])
284
+ .describe(
285
+ contexts
286
+ .map(
287
+ (c) =>
288
+ `<knowledge_base id="${c.id}" name="${c.name}">${c.description}</knowledge_base>`,
289
+ )
290
+ .join("\n"),
291
+ ),
214
292
  keywords: z.array(z.string()).optional().describe("Keywords extracted from the query"),
215
293
  searchMethod: z
216
294
  .enum(["hybrid", "keyword", "semantic"])
@@ -236,12 +314,12 @@ For listing queries: always start with includeContent: false, then use dynamic t
236
314
  .describe("Filter results to specific external IDs"),
237
315
  limit: z
238
316
  .number()
239
- .default(10)
240
- .describe("Max chunks with content (max 10). Without content, up to 200 are returned."),
317
+ .default(20)
318
+ .describe("Max chunks with content (max 20). Without content, up to 200 are returned."),
241
319
  }),
242
320
  execute: async ({
243
321
  query,
244
- knowledge_base_ids,
322
+ knowledge_base_id,
245
323
  keywords,
246
324
  searchMethod,
247
325
  includeContent,
@@ -250,70 +328,87 @@ For listing queries: always start with includeContent: false, then use dynamic t
250
328
  item_external_ids,
251
329
  limit,
252
330
  }) => {
253
- const ctxList = resolveContexts(knowledge_base_ids, contexts);
254
- const effectiveLimit = includeContent ? Math.min(limit ?? 10, 10) : Math.min((limit ?? 10) * 20, 400);
255
-
256
- const results = await Promise.all(
257
- ctxList.map(async (ctx) => {
258
- const itemFilters: SearchFilters = [];
259
- if (item_ids) itemFilters.push({ id: { in: item_ids } });
260
- if (item_names)
261
- itemFilters.push({ name: { or: item_names.map((n) => ({ contains: n })) } });
262
- if (item_external_ids) itemFilters.push({ external_id: { in: item_external_ids } });
263
-
264
- const effectiveQuery = query || keywords?.join(" ") || "";
265
-
266
- let method = mapSearchMethod(searchMethod ?? "hybrid")
267
-
268
- if (
269
- method === "hybridSearch" ||
270
- method === "cosineDistance"
271
- ) {
272
- if (!ctx.embedder) {
273
- console.error(`[EXULU] context "${ctx.id}" does not have an embedder, falling back to tsvector search`);
274
- method = "tsvector"
275
- }
331
+ const [ctx] = resolveContexts([knowledge_base_id], contexts) as [ExuluContext];
332
+ const effectiveLimit = includeContent ? Math.min(limit ?? 20, 20) : Math.min((limit ?? 20) * 20, 400);
333
+
334
+ const itemFilters: SearchFilters = [];
335
+
336
+ if (preselectedItemsByContext) {
337
+ const contextItemIds = preselectedItemsByContext.get(knowledge_base_id);
338
+ if (contextItemIds === undefined) {
339
+ // Context not in preselection map nothing to search
340
+ return JSON.stringify([]);
341
+ }
342
+ if (Array.isArray(contextItemIds)) {
343
+ const intersection = item_ids?.length
344
+ ? item_ids.filter((id) => contextItemIds.includes(id))
345
+ : contextItemIds;
346
+ if (!intersection.length) {
347
+ // Agent specified item_ids entirely outside the preselected scope
348
+ return JSON.stringify([]);
276
349
  }
277
-
278
- try {
279
- const { chunks } = await ctx.search({
280
- query: effectiveQuery,
281
- keywords,
282
- method: method,
283
- limit: effectiveLimit,
284
- page: 1,
285
- itemFilters,
286
- chunkFilters: [],
287
- sort: { field: "updatedAt", direction: "desc" },
288
- user,
289
- role,
290
- trigger: "tool",
291
- });
292
-
293
- return chunks.map(
294
- (chunk): ChunkResult => ({
295
- item_name: chunk.item_name,
296
- item_id: chunk.item_id,
297
- context: chunk.context?.id ?? ctx.id,
298
- chunk_id: chunk.chunk_id,
299
- chunk_index: chunk.chunk_index,
300
- chunk_content: includeContent ? chunk.chunk_content : undefined,
301
- metadata: {
302
- ...chunk.chunk_metadata,
303
- cosine_distance: chunk.chunk_cosine_distance,
304
- fts_rank: chunk.chunk_fts_rank,
305
- hybrid_score: chunk.chunk_hybrid_score,
306
- },
307
- }),
308
- );
309
- } catch (err) {
310
- console.error(`[EXULU] search_content failed for context "${ctx.id}":`, err);
311
- return [];
312
- }
313
- }),
314
- );
315
-
316
- return JSON.stringify(results.flat());
350
+ itemFilters.push({ id: { in: intersection } });
351
+ }
352
+ // null = full context no item filter; agent's item_ids still respected if provided
353
+ else if (item_ids?.length) {
354
+ itemFilters.push({ id: { in: item_ids } });
355
+ }
356
+ } else if (item_ids?.length) {
357
+ itemFilters.push({ id: { in: item_ids } });
358
+ }
359
+
360
+ if (item_names)
361
+ itemFilters.push({ name: { or: item_names.map((n) => ({ contains: n })) } });
362
+ if (item_external_ids) itemFilters.push({ external_id: { in: item_external_ids } });
363
+
364
+ const effectiveQuery = query || keywords?.join(" ") || "";
365
+
366
+ let method = mapSearchMethod(searchMethod ?? "hybrid");
367
+
368
+ if (method === "hybridSearch" || method === "cosineDistance") {
369
+ if (!ctx.embedder) {
370
+ console.error(`[EXULU] context "${ctx.id}" does not have an embedder, falling back to tsvector search`);
371
+ method = "tsvector";
372
+ }
373
+ }
374
+
375
+ try {
376
+ const { chunks } = await ctx.search({
377
+ query: effectiveQuery,
378
+ keywords,
379
+ method,
380
+ limit: effectiveLimit,
381
+ page: 1,
382
+ itemFilters,
383
+ chunkFilters: [],
384
+ sort: { field: "updatedAt", direction: "desc" },
385
+ user,
386
+ role,
387
+ trigger: "tool",
388
+ });
389
+
390
+ return JSON.stringify(
391
+ chunks.map(
392
+ (chunk): ChunkResult => ({
393
+ item_name: chunk.item_name,
394
+ item_id: chunk.item_id,
395
+ context: chunk.context?.id ?? ctx.id,
396
+ chunk_id: chunk.chunk_id,
397
+ chunk_index: chunk.chunk_index,
398
+ chunk_content: includeContent ? chunk.chunk_content : undefined,
399
+ metadata: {
400
+ ...chunk.chunk_metadata,
401
+ cosine_distance: chunk.chunk_cosine_distance,
402
+ fts_rank: chunk.chunk_fts_rank,
403
+ hybrid_score: chunk.chunk_hybrid_score,
404
+ },
405
+ }),
406
+ ),
407
+ );
408
+ } catch (err) {
409
+ console.error(`[EXULU] search_content failed for context "${ctx.id}":`, err);
410
+ return JSON.stringify([]);
411
+ }
317
412
  },
318
413
  });
319
414
 
@@ -321,10 +416,11 @@ For listing queries: always start with includeContent: false, then use dynamic t
321
416
  // save_search_results
322
417
  // ──────────────────────────────────────────────────────────
323
418
  const save_search_results = tool({
324
- description: `Execute a search and save ALL results to the virtual filesystem WITHOUT loading them into context.
419
+ description: `Execute a search on ONE knowledge base and save ALL results to the virtual filesystem WITHOUT loading them into context.
420
+ Always make a separate call for each knowledge base you want to search.
325
421
 
326
422
  Use this when you expect many results (>20) and need to filter iteratively:
327
- 1. Call save_search_results to save up to 1000 results to /search_results.txt
423
+ 1. Call save_search_results (once per knowledge base) to save up to 1000 results to /search_results_{knowledge_base_id}.txt
328
424
  2. Use bash grep/awk to identify relevant chunks by pattern
329
425
  3. Use dynamic get_content tools to load only the specific chunks you need
330
426
 
@@ -340,7 +436,16 @@ SCORE: ...
340
436
  (content or placeholder)
341
437
  ---CONTENT END---`,
342
438
  inputSchema: z.object({
343
- knowledge_base_ids: ctxEnum,
439
+ knowledge_base_id: z
440
+ .enum(contexts.map((c) => c.id) as [string, ...string[]])
441
+ .describe(
442
+ contexts
443
+ .map(
444
+ (c) =>
445
+ `<knowledge_base id="${c.id}" name="${c.name}">${c.description}</knowledge_base>`,
446
+ )
447
+ .join("\n"),
448
+ ),
344
449
  query: z.string().describe("Search query"),
345
450
  searchMethod: z.enum(["hybrid", "keyword", "semantic"]).default("hybrid"),
346
451
  limit: z
@@ -355,34 +460,44 @@ SCORE: ...
355
460
  "Whether to include chunk text in the saved file. False saves tokens — use true only if you need to grep content.",
356
461
  ),
357
462
  }),
358
- execute: async ({ query, knowledge_base_ids, searchMethod, limit, includeContent }) => {
359
- const ctxList = resolveContexts(knowledge_base_ids, contexts);
360
-
361
- const results = await Promise.all(
362
- ctxList.map(async (ctx) => {
363
- try {
364
- const { chunks } = await ctx.search({
365
- query,
366
- method: mapSearchMethod(searchMethod ?? "hybrid"),
367
- limit: Math.min(limit ?? 100, 1000),
368
- page: 1,
369
- itemFilters: [],
370
- chunkFilters: [],
371
- sort: { field: "updatedAt", direction: "desc" },
372
- user,
373
- role,
374
- trigger: "tool",
375
- });
376
- return chunks;
377
- } catch (err) {
378
- console.error(`[EXULU] save_search_results failed for context "${ctx.id}":`, err);
379
- return [];
380
- }
381
- }),
382
- );
383
-
384
- const chunks: VectorSearchChunkResult[] = results.flat();
385
-
463
+ execute: async ({ query, knowledge_base_id, searchMethod, limit, includeContent }) => {
464
+ const [ctx] = resolveContexts([knowledge_base_id], contexts) as [ExuluContext];
465
+
466
+ const contextItemIds = preselectedItemsByContext?.get(knowledge_base_id);
467
+ // undefined = context not in preselection map skip
468
+ if (preselectedItemsByContext && contextItemIds === undefined) {
469
+ return JSON.stringify({
470
+ success: true,
471
+ results_count: 0,
472
+ message: `Context "${knowledge_base_id}" not in preselected scope — skipped.`,
473
+ });
474
+ }
475
+
476
+ // null = full context (no filter); string[] = specific items
477
+ const itemFilters: SearchFilters = Array.isArray(contextItemIds)
478
+ ? [{ id: { in: contextItemIds } }]
479
+ : [];
480
+
481
+ let chunks: VectorSearchChunkResult[] = [];
482
+ try {
483
+ const result = await ctx.search({
484
+ query,
485
+ method: mapSearchMethod(searchMethod ?? "hybrid"),
486
+ limit: Math.min(limit ?? 100, 1000),
487
+ page: 1,
488
+ itemFilters,
489
+ chunkFilters: [],
490
+ sort: { field: "updatedAt", direction: "desc" },
491
+ user,
492
+ role,
493
+ trigger: "tool",
494
+ });
495
+ chunks = result.chunks;
496
+ } catch (err) {
497
+ console.error(`[EXULU] save_search_results failed for context "${ctx.id}":`, err);
498
+ }
499
+
500
+ const fileName = `search_results_${ctx.id}.txt`;
386
501
  const fileContent = chunks
387
502
  .map(
388
503
  (chunk, i) =>
@@ -400,14 +515,14 @@ SCORE: ...
400
515
  .join("\n");
401
516
 
402
517
  await updateVirtualFiles([
403
- { path: "search_results.txt", content: fileContent },
518
+ { path: fileName, content: fileContent },
404
519
  {
405
- path: "search_metadata.json",
520
+ path: `search_metadata_${ctx.id}.json`,
406
521
  content: JSON.stringify({
407
522
  query,
408
523
  timestamp: new Date().toISOString(),
409
524
  results_count: chunks.length,
410
- contexts: ctxList.map((c) => c.id),
525
+ context: ctx.id,
411
526
  method: searchMethod,
412
527
  }),
413
528
  },
@@ -416,11 +531,11 @@ SCORE: ...
416
531
  return JSON.stringify({
417
532
  success: true,
418
533
  results_count: chunks.length,
419
- message: `Saved ${chunks.length} results to /search_results.txt`,
534
+ message: `Saved ${chunks.length} results to /${fileName}`,
420
535
  grep_examples: [
421
- "grep -i 'keyword' search_results.txt | head -20",
422
- "grep 'ITEM_NAME:' search_results.txt",
423
- "grep -B 5 'pattern' search_results.txt | grep 'CHUNK_ID:'",
536
+ `grep -i 'keyword' ${fileName} | head -20`,
537
+ `grep 'ITEM_NAME:' ${fileName}`,
538
+ `grep -B 5 'pattern' ${fileName} | grep 'CHUNK_ID:'`,
424
539
  ],
425
540
  });
426
541
  },