illuma-agents 1.0.37 → 1.0.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +69 -14
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/common/enum.cjs +3 -1
  4. package/dist/cjs/common/enum.cjs.map +1 -1
  5. package/dist/cjs/graphs/Graph.cjs +50 -8
  6. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  7. package/dist/cjs/graphs/MultiAgentGraph.cjs +277 -11
  8. package/dist/cjs/graphs/MultiAgentGraph.cjs.map +1 -1
  9. package/dist/cjs/llm/bedrock/index.cjs +128 -61
  10. package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
  11. package/dist/cjs/main.cjs +16 -7
  12. package/dist/cjs/main.cjs.map +1 -1
  13. package/dist/cjs/messages/cache.cjs +1 -0
  14. package/dist/cjs/messages/cache.cjs.map +1 -1
  15. package/dist/cjs/messages/core.cjs +1 -1
  16. package/dist/cjs/messages/core.cjs.map +1 -1
  17. package/dist/cjs/messages/tools.cjs +2 -2
  18. package/dist/cjs/messages/tools.cjs.map +1 -1
  19. package/dist/cjs/stream.cjs +4 -2
  20. package/dist/cjs/stream.cjs.map +1 -1
  21. package/dist/cjs/tools/BrowserTools.cjs.map +1 -1
  22. package/dist/cjs/tools/CodeExecutor.cjs +22 -21
  23. package/dist/cjs/tools/CodeExecutor.cjs.map +1 -1
  24. package/dist/cjs/tools/ProgrammaticToolCalling.cjs +14 -11
  25. package/dist/cjs/tools/ProgrammaticToolCalling.cjs.map +1 -1
  26. package/dist/cjs/tools/ToolNode.cjs +101 -2
  27. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  28. package/dist/cjs/tools/ToolSearch.cjs +862 -0
  29. package/dist/cjs/tools/ToolSearch.cjs.map +1 -0
  30. package/dist/esm/agents/AgentContext.mjs +69 -14
  31. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  32. package/dist/esm/common/enum.mjs +3 -1
  33. package/dist/esm/common/enum.mjs.map +1 -1
  34. package/dist/esm/graphs/Graph.mjs +51 -9
  35. package/dist/esm/graphs/Graph.mjs.map +1 -1
  36. package/dist/esm/graphs/MultiAgentGraph.mjs +278 -12
  37. package/dist/esm/graphs/MultiAgentGraph.mjs.map +1 -1
  38. package/dist/esm/llm/bedrock/index.mjs +127 -60
  39. package/dist/esm/llm/bedrock/index.mjs.map +1 -1
  40. package/dist/esm/main.mjs +1 -1
  41. package/dist/esm/messages/cache.mjs +1 -0
  42. package/dist/esm/messages/cache.mjs.map +1 -1
  43. package/dist/esm/messages/core.mjs +1 -1
  44. package/dist/esm/messages/core.mjs.map +1 -1
  45. package/dist/esm/messages/tools.mjs +2 -2
  46. package/dist/esm/messages/tools.mjs.map +1 -1
  47. package/dist/esm/stream.mjs +4 -2
  48. package/dist/esm/stream.mjs.map +1 -1
  49. package/dist/esm/tools/BrowserTools.mjs.map +1 -1
  50. package/dist/esm/tools/CodeExecutor.mjs +22 -21
  51. package/dist/esm/tools/CodeExecutor.mjs.map +1 -1
  52. package/dist/esm/tools/ProgrammaticToolCalling.mjs +14 -11
  53. package/dist/esm/tools/ProgrammaticToolCalling.mjs.map +1 -1
  54. package/dist/esm/tools/ToolNode.mjs +102 -3
  55. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  56. package/dist/esm/tools/ToolSearch.mjs +827 -0
  57. package/dist/esm/tools/ToolSearch.mjs.map +1 -0
  58. package/dist/types/agents/AgentContext.d.ts +33 -1
  59. package/dist/types/common/enum.d.ts +4 -2
  60. package/dist/types/graphs/Graph.d.ts +6 -0
  61. package/dist/types/graphs/MultiAgentGraph.d.ts +16 -0
  62. package/dist/types/index.d.ts +1 -1
  63. package/dist/types/llm/bedrock/index.d.ts +89 -11
  64. package/dist/types/llm/bedrock/types.d.ts +27 -0
  65. package/dist/types/llm/bedrock/utils/index.d.ts +5 -0
  66. package/dist/types/llm/bedrock/utils/message_inputs.d.ts +31 -0
  67. package/dist/types/llm/bedrock/utils/message_outputs.d.ts +33 -0
  68. package/dist/types/tools/CodeExecutor.d.ts +0 -3
  69. package/dist/types/tools/ProgrammaticToolCalling.d.ts +0 -3
  70. package/dist/types/tools/ToolNode.d.ts +3 -1
  71. package/dist/types/tools/ToolSearch.d.ts +148 -0
  72. package/dist/types/types/graph.d.ts +2 -0
  73. package/dist/types/types/llm.d.ts +3 -1
  74. package/dist/types/types/tools.d.ts +42 -2
  75. package/package.json +12 -5
  76. package/src/agents/AgentContext.ts +88 -16
  77. package/src/common/enum.ts +3 -1
  78. package/src/graphs/Graph.ts +64 -13
  79. package/src/graphs/MultiAgentGraph.ts +350 -13
  80. package/src/index.ts +1 -1
  81. package/src/llm/bedrock/index.ts +221 -99
  82. package/src/llm/bedrock/llm.spec.ts +616 -0
  83. package/src/llm/bedrock/types.ts +51 -0
  84. package/src/llm/bedrock/utils/index.ts +18 -0
  85. package/src/llm/bedrock/utils/message_inputs.ts +563 -0
  86. package/src/llm/bedrock/utils/message_outputs.ts +310 -0
  87. package/src/messages/__tests__/tools.test.ts +21 -21
  88. package/src/messages/cache.test.ts +259 -0
  89. package/src/messages/cache.ts +104 -1
  90. package/src/messages/core.ts +1 -1
  91. package/src/messages/tools.ts +2 -2
  92. package/src/scripts/caching.ts +27 -19
  93. package/src/scripts/code_exec_files.ts +58 -15
  94. package/src/scripts/code_exec_multi_session.ts +241 -0
  95. package/src/scripts/code_exec_session.ts +282 -0
  96. package/src/scripts/multi-agent-conditional.ts +1 -0
  97. package/src/scripts/multi-agent-supervisor.ts +1 -0
  98. package/src/scripts/programmatic_exec_agent.ts +4 -4
  99. package/src/scripts/test-handoff-preamble.ts +277 -0
  100. package/src/scripts/test-parallel-handoffs.ts +291 -0
  101. package/src/scripts/test-tools-before-handoff.ts +8 -4
  102. package/src/scripts/test_code_api.ts +361 -0
  103. package/src/scripts/thinking-bedrock.ts +159 -0
  104. package/src/scripts/thinking.ts +39 -18
  105. package/src/scripts/{tool_search_regex.ts → tool_search.ts} +5 -5
  106. package/src/scripts/tools.ts +7 -3
  107. package/src/stream.ts +4 -2
  108. package/src/tools/BrowserTools.ts +39 -17
  109. package/src/tools/CodeExecutor.ts +26 -23
  110. package/src/tools/ProgrammaticToolCalling.ts +18 -14
  111. package/src/tools/ToolNode.ts +114 -1
  112. package/src/tools/ToolSearch.ts +1041 -0
  113. package/src/tools/__tests__/ProgrammaticToolCalling.test.ts +0 -2
  114. package/src/tools/__tests__/{ToolSearchRegex.integration.test.ts → ToolSearch.integration.test.ts} +6 -6
  115. package/src/tools/__tests__/ToolSearch.test.ts +1003 -0
  116. package/src/types/graph.ts +2 -0
  117. package/src/types/llm.ts +3 -1
  118. package/src/types/tools.ts +51 -2
  119. package/dist/cjs/tools/ToolSearchRegex.cjs +0 -455
  120. package/dist/cjs/tools/ToolSearchRegex.cjs.map +0 -1
  121. package/dist/esm/tools/ToolSearchRegex.mjs +0 -448
  122. package/dist/esm/tools/ToolSearchRegex.mjs.map +0 -1
  123. package/dist/types/tools/ToolSearchRegex.d.ts +0 -80
  124. package/src/tools/ToolSearchRegex.ts +0 -535
  125. package/src/tools/__tests__/ToolSearchRegex.test.ts +0 -232
@@ -0,0 +1,1041 @@
1
+ // src/tools/ToolSearch.ts
2
+ import { z } from 'zod';
3
+ import * as okapibm25Module from 'okapibm25';
4
+ import { config } from 'dotenv';
5
+
6
+ type BM25Fn = (
7
+ documents: string[],
8
+ keywords: string[],
9
+ constants?: { k1?: number; b?: number }
10
+ ) => number[];
11
+
12
+ function getBM25Function(): BM25Fn {
13
+ const mod = okapibm25Module as unknown as {
14
+ default: BM25Fn | { default: BM25Fn } | undefined;
15
+ };
16
+ if (typeof mod === 'function') return mod;
17
+ if (typeof mod.default === 'function') return mod.default;
18
+ if (mod.default != null && typeof mod.default.default === 'function')
19
+ return mod.default.default;
20
+ throw new Error('Could not resolve BM25 function from okapibm25 module');
21
+ }
22
+
23
+ const BM25 = getBM25Function();
24
+ import fetch, { RequestInit } from 'node-fetch';
25
+ import { HttpsProxyAgent } from 'https-proxy-agent';
26
+ import { getEnvironmentVariable } from '@langchain/core/utils/env';
27
+ import { tool, DynamicStructuredTool } from '@langchain/core/tools';
28
+ import type * as t from '@/types';
29
+ import { getCodeBaseURL } from './CodeExecutor';
30
+ import { EnvVar, Constants } from '@/common';
31
+
32
+ config();
33
+
34
+ /** Maximum allowed regex pattern length */
35
+ const MAX_PATTERN_LENGTH = 200;
36
+
37
+ /** Maximum allowed regex nesting depth */
38
+ const MAX_REGEX_COMPLEXITY = 5;
39
+
40
+ /** Default search timeout in milliseconds */
41
+ const SEARCH_TIMEOUT = 5000;
42
+
43
+ /** Zod schema type for tool search parameters */
44
+ type ToolSearchSchema = z.ZodObject<{
45
+ query: z.ZodDefault<z.ZodOptional<z.ZodString>>;
46
+ fields: z.ZodDefault<
47
+ z.ZodOptional<z.ZodArray<z.ZodEnum<['name', 'description', 'parameters']>>>
48
+ >;
49
+ max_results: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
50
+ mcp_server: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodArray<z.ZodString>]>>;
51
+ }>;
52
+
53
+ /**
54
+ * Creates the Zod schema with dynamic query description based on mode.
55
+ * @param mode - The search mode determining query interpretation
56
+ * @returns Zod schema for tool search parameters
57
+ */
58
+ function createToolSearchSchema(mode: t.ToolSearchMode): ToolSearchSchema {
59
+ const queryDescription =
60
+ mode === 'local'
61
+ ? 'Search term to find in tool names and descriptions. Case-insensitive substring matching. Optional if mcp_server is provided.'
62
+ : 'Regex pattern to search tool names and descriptions. Optional if mcp_server is provided.';
63
+
64
+ return z.object({
65
+ query: z
66
+ .string()
67
+ .max(MAX_PATTERN_LENGTH)
68
+ .optional()
69
+ .default('')
70
+ .describe(queryDescription),
71
+ fields: z
72
+ .array(z.enum(['name', 'description', 'parameters']))
73
+ .optional()
74
+ .default(['name', 'description'])
75
+ .describe('Which fields to search. Default: name and description'),
76
+ max_results: z
77
+ .number()
78
+ .int()
79
+ .min(1)
80
+ .max(50)
81
+ .optional()
82
+ .default(10)
83
+ .describe('Maximum number of matching tools to return'),
84
+ mcp_server: z
85
+ .union([z.string(), z.array(z.string())])
86
+ .optional()
87
+ .describe(
88
+ 'Filter to tools from specific MCP server(s). Can be a single server name or array of names. If provided without a query, lists all tools from those servers.'
89
+ ),
90
+ });
91
+ }
92
+
93
+ /**
94
+ * Extracts the MCP server name from a tool name.
95
+ * MCP tools follow the pattern: toolName_mcp_serverName
96
+ * @param toolName - The full tool name
97
+ * @returns The server name if it's an MCP tool, undefined otherwise
98
+ */
99
+ function extractMcpServerName(toolName: string): string | undefined {
100
+ const delimiterIndex = toolName.indexOf(Constants.MCP_DELIMITER);
101
+ if (delimiterIndex === -1) {
102
+ return undefined;
103
+ }
104
+ return toolName.substring(delimiterIndex + Constants.MCP_DELIMITER.length);
105
+ }
106
+
107
+ /**
108
+ * Checks if a tool belongs to a specific MCP server.
109
+ * @param toolName - The full tool name
110
+ * @param serverName - The server name to match
111
+ * @returns True if the tool belongs to the specified server
112
+ */
113
+ function isFromMcpServer(toolName: string, serverName: string): boolean {
114
+ const toolServer = extractMcpServerName(toolName);
115
+ return toolServer === serverName;
116
+ }
117
+
118
+ /**
119
+ * Checks if a tool belongs to any of the specified MCP servers.
120
+ * @param toolName - The full tool name
121
+ * @param serverNames - Array of server names to match
122
+ * @returns True if the tool belongs to any of the specified servers
123
+ */
124
+ function isFromAnyMcpServer(toolName: string, serverNames: string[]): boolean {
125
+ const toolServer = extractMcpServerName(toolName);
126
+ if (toolServer === undefined) {
127
+ return false;
128
+ }
129
+ return serverNames.includes(toolServer);
130
+ }
131
+
132
+ /**
133
+ * Normalizes server filter input to always be an array.
134
+ * @param serverFilter - String, array of strings, or undefined
135
+ * @returns Array of server names (empty if none specified)
136
+ */
137
+ function normalizeServerFilter(
138
+ serverFilter: string | string[] | undefined
139
+ ): string[] {
140
+ if (serverFilter === undefined) {
141
+ return [];
142
+ }
143
+ if (typeof serverFilter === 'string') {
144
+ return serverFilter === '' ? [] : [serverFilter];
145
+ }
146
+ return serverFilter.filter((s) => s !== '');
147
+ }
148
+
149
+ /**
150
+ * Extracts all unique MCP server names from a tool registry.
151
+ * @param toolRegistry - The tool registry to scan
152
+ * @param onlyDeferred - If true, only considers deferred tools
153
+ * @returns Array of unique server names, sorted alphabetically
154
+ */
155
+ function getAvailableMcpServers(
156
+ toolRegistry: t.LCToolRegistry | undefined,
157
+ onlyDeferred: boolean = true
158
+ ): string[] {
159
+ if (!toolRegistry) {
160
+ return [];
161
+ }
162
+
163
+ const servers = new Set<string>();
164
+ for (const [, toolDef] of toolRegistry) {
165
+ if (onlyDeferred && toolDef.defer_loading !== true) {
166
+ continue;
167
+ }
168
+ const server = extractMcpServerName(toolDef.name);
169
+ if (server !== undefined && server !== '') {
170
+ servers.add(server);
171
+ }
172
+ }
173
+
174
+ return Array.from(servers).sort();
175
+ }
176
+
177
+ /**
178
+ * Escapes special regex characters in a string to use as a literal pattern.
179
+ * @param pattern - The string to escape
180
+ * @returns The escaped string safe for use in a RegExp
181
+ */
182
+ function escapeRegexSpecialChars(pattern: string): string {
183
+ return pattern.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
184
+ }
185
+
186
+ /**
187
+ * Counts the maximum nesting depth of groups in a regex pattern.
188
+ * @param pattern - The regex pattern to analyze
189
+ * @returns The maximum nesting depth
190
+ */
191
+ function countNestedGroups(pattern: string): number {
192
+ let maxDepth = 0;
193
+ let currentDepth = 0;
194
+
195
+ for (let i = 0; i < pattern.length; i++) {
196
+ if (pattern[i] === '(' && (i === 0 || pattern[i - 1] !== '\\')) {
197
+ currentDepth++;
198
+ maxDepth = Math.max(maxDepth, currentDepth);
199
+ } else if (pattern[i] === ')' && (i === 0 || pattern[i - 1] !== '\\')) {
200
+ currentDepth = Math.max(0, currentDepth - 1);
201
+ }
202
+ }
203
+
204
+ return maxDepth;
205
+ }
206
+
207
+ /**
208
+ * Detects nested quantifiers that can cause catastrophic backtracking.
209
+ * Patterns like (a+)+, (a*)*, (a+)*, etc.
210
+ * @param pattern - The regex pattern to check
211
+ * @returns True if nested quantifiers are detected
212
+ */
213
+ function hasNestedQuantifiers(pattern: string): boolean {
214
+ const nestedQuantifierPattern = /\([^)]*[+*][^)]*\)[+*?]/;
215
+ return nestedQuantifierPattern.test(pattern);
216
+ }
217
+
218
+ /**
219
+ * Checks if a regex pattern contains potentially dangerous constructs.
220
+ * @param pattern - The regex pattern to validate
221
+ * @returns True if the pattern is dangerous
222
+ */
223
+ function isDangerousPattern(pattern: string): boolean {
224
+ if (hasNestedQuantifiers(pattern)) {
225
+ return true;
226
+ }
227
+
228
+ if (countNestedGroups(pattern) > MAX_REGEX_COMPLEXITY) {
229
+ return true;
230
+ }
231
+
232
+ const dangerousPatterns = [
233
+ /\.\{1000,\}/, // Excessive wildcards
234
+ /\(\?=\.\{100,\}\)/, // Runaway lookaheads
235
+ /\([^)]*\|\s*\){20,}/, // Excessive alternation (rough check)
236
+ /\(\.\*\)\+/, // (.*)+
237
+ /\(\.\+\)\+/, // (.+)+
238
+ /\(\.\*\)\*/, // (.*)*
239
+ /\(\.\+\)\*/, // (.+)*
240
+ ];
241
+
242
+ for (const dangerous of dangerousPatterns) {
243
+ if (dangerous.test(pattern)) {
244
+ return true;
245
+ }
246
+ }
247
+
248
+ return false;
249
+ }
250
+
251
+ /**
252
+ * Sanitizes a regex pattern for safe execution.
253
+ * If the pattern is dangerous, it will be escaped to a literal string search.
254
+ * @param pattern - The regex pattern to sanitize
255
+ * @returns Object containing the safe pattern and whether it was escaped
256
+ */
257
+ function sanitizeRegex(pattern: string): { safe: string; wasEscaped: boolean } {
258
+ if (isDangerousPattern(pattern)) {
259
+ return {
260
+ safe: escapeRegexSpecialChars(pattern),
261
+ wasEscaped: true,
262
+ };
263
+ }
264
+
265
+ try {
266
+ new RegExp(pattern);
267
+ return { safe: pattern, wasEscaped: false };
268
+ } catch {
269
+ return {
270
+ safe: escapeRegexSpecialChars(pattern),
271
+ wasEscaped: true,
272
+ };
273
+ }
274
+ }
275
+
276
+ /**
277
+ * Simplifies tool parameters for search purposes.
278
+ * Extracts only the essential structure needed for parameter name searching.
279
+ * @param parameters - The tool's JSON schema parameters
280
+ * @returns Simplified parameters object
281
+ */
282
+ function simplifyParametersForSearch(
283
+ parameters?: t.JsonSchemaType
284
+ ): t.JsonSchemaType | undefined {
285
+ if (!parameters) {
286
+ return undefined;
287
+ }
288
+
289
+ if (parameters.properties) {
290
+ return {
291
+ type: parameters.type,
292
+ properties: Object.fromEntries(
293
+ Object.entries(parameters.properties).map(([key, value]) => [
294
+ key,
295
+ { type: (value as t.JsonSchemaType).type },
296
+ ])
297
+ ),
298
+ } as t.JsonSchemaType;
299
+ }
300
+
301
+ return { type: parameters.type };
302
+ }
303
+
304
+ /**
305
+ * Tokenizes a string into lowercase words for BM25.
306
+ * Splits on underscores and non-alphanumeric characters for consistent matching.
307
+ * @param text - The text to tokenize
308
+ * @returns Array of lowercase tokens
309
+ */
310
+ function tokenize(text: string): string[] {
311
+ return text
312
+ .toLowerCase()
313
+ .replace(/[^a-z0-9]/g, ' ')
314
+ .split(/\s+/)
315
+ .filter((token) => token.length > 0);
316
+ }
317
+
318
+ /**
319
+ * Creates a searchable document string from tool metadata.
320
+ * @param tool - The tool metadata
321
+ * @param fields - Which fields to include
322
+ * @returns Combined document string for BM25
323
+ */
324
+ function createToolDocument(tool: t.ToolMetadata, fields: string[]): string {
325
+ const parts: string[] = [];
326
+
327
+ if (fields.includes('name')) {
328
+ const baseName = tool.name.replace(/_/g, ' ');
329
+ parts.push(baseName, baseName);
330
+ }
331
+
332
+ if (fields.includes('description') && tool.description) {
333
+ parts.push(tool.description);
334
+ }
335
+
336
+ if (fields.includes('parameters') && tool.parameters?.properties) {
337
+ const paramNames = Object.keys(tool.parameters.properties).join(' ');
338
+ parts.push(paramNames);
339
+ }
340
+
341
+ return parts.join(' ');
342
+ }
343
+
344
+ /**
345
+ * Determines which field had the best match for a query.
346
+ * @param tool - The tool to check
347
+ * @param queryTokens - Tokenized query
348
+ * @param fields - Fields to check
349
+ * @returns The matched field and a snippet
350
+ */
351
+ function findMatchedField(
352
+ tool: t.ToolMetadata,
353
+ queryTokens: string[],
354
+ fields: string[]
355
+ ): { field: string; snippet: string } {
356
+ if (fields.includes('name')) {
357
+ const nameLower = tool.name.toLowerCase();
358
+ for (const token of queryTokens) {
359
+ if (nameLower.includes(token)) {
360
+ return { field: 'name', snippet: tool.name };
361
+ }
362
+ }
363
+ }
364
+
365
+ if (fields.includes('description') && tool.description) {
366
+ const descLower = tool.description.toLowerCase();
367
+ for (const token of queryTokens) {
368
+ if (descLower.includes(token)) {
369
+ return {
370
+ field: 'description',
371
+ snippet: tool.description.substring(0, 100),
372
+ };
373
+ }
374
+ }
375
+ }
376
+
377
+ if (fields.includes('parameters') && tool.parameters?.properties) {
378
+ const paramNames = Object.keys(tool.parameters.properties);
379
+ const paramLower = paramNames.join(' ').toLowerCase();
380
+ for (const token of queryTokens) {
381
+ if (paramLower.includes(token)) {
382
+ return { field: 'parameters', snippet: paramNames.join(', ') };
383
+ }
384
+ }
385
+ }
386
+
387
+ const fallbackSnippet = tool.description
388
+ ? tool.description.substring(0, 100)
389
+ : tool.name;
390
+ return { field: 'unknown', snippet: fallbackSnippet };
391
+ }
392
+
393
+ /**
394
+ * Performs BM25-based search for better relevance ranking.
395
+ * Uses Okapi BM25 algorithm for term frequency and document length normalization.
396
+ * @param tools - Array of tool metadata to search
397
+ * @param query - The search query
398
+ * @param fields - Which fields to search
399
+ * @param maxResults - Maximum results to return
400
+ * @returns Search response with matching tools ranked by BM25 score
401
+ */
402
+ function performLocalSearch(
403
+ tools: t.ToolMetadata[],
404
+ query: string,
405
+ fields: string[],
406
+ maxResults: number
407
+ ): t.ToolSearchResponse {
408
+ if (tools.length === 0 || !query.trim()) {
409
+ return {
410
+ tool_references: [],
411
+ total_tools_searched: tools.length,
412
+ pattern_used: query,
413
+ };
414
+ }
415
+
416
+ const documents = tools.map((tool) => createToolDocument(tool, fields));
417
+ const queryTokens = tokenize(query);
418
+
419
+ if (queryTokens.length === 0) {
420
+ return {
421
+ tool_references: [],
422
+ total_tools_searched: tools.length,
423
+ pattern_used: query,
424
+ };
425
+ }
426
+
427
+ const scores = BM25(documents, queryTokens, { k1: 1.5, b: 0.75 }) as number[];
428
+
429
+ const maxScore = Math.max(...scores.filter((s) => s > 0), 1);
430
+ const queryLower = query.toLowerCase().trim();
431
+
432
+ const results: t.ToolSearchResult[] = [];
433
+ for (let i = 0; i < tools.length; i++) {
434
+ if (scores[i] > 0) {
435
+ const { field, snippet } = findMatchedField(
436
+ tools[i],
437
+ queryTokens,
438
+ fields
439
+ );
440
+ let normalizedScore = Math.min(scores[i] / maxScore, 1.0);
441
+
442
+ // Boost score for exact base name match
443
+ const baseName = getBaseToolName(tools[i].name).toLowerCase();
444
+ if (baseName === queryLower) {
445
+ normalizedScore = 1.0;
446
+ } else if (baseName.startsWith(queryLower)) {
447
+ normalizedScore = Math.max(normalizedScore, 0.95);
448
+ }
449
+
450
+ results.push({
451
+ tool_name: tools[i].name,
452
+ match_score: normalizedScore,
453
+ matched_field: field,
454
+ snippet,
455
+ });
456
+ }
457
+ }
458
+
459
+ results.sort((a, b) => b.match_score - a.match_score);
460
+ const topResults = results.slice(0, maxResults);
461
+
462
+ return {
463
+ tool_references: topResults,
464
+ total_tools_searched: tools.length,
465
+ pattern_used: query,
466
+ };
467
+ }
468
+
469
+ /**
470
+ * Generates the JavaScript search script to be executed in the sandbox.
471
+ * Uses plain JavaScript for maximum compatibility with the Code API.
472
+ * @param deferredTools - Array of tool metadata to search through
473
+ * @param fields - Which fields to search
474
+ * @param maxResults - Maximum number of results to return
475
+ * @param sanitizedPattern - The sanitized regex pattern
476
+ * @returns The JavaScript code string
477
+ */
478
+ function generateSearchScript(
479
+ deferredTools: t.ToolMetadata[],
480
+ fields: string[],
481
+ maxResults: number,
482
+ sanitizedPattern: string
483
+ ): string {
484
+ const lines = [
485
+ '// Tool definitions (injected)',
486
+ 'var tools = ' + JSON.stringify(deferredTools) + ';',
487
+ 'var searchFields = ' + JSON.stringify(fields) + ';',
488
+ 'var maxResults = ' + maxResults + ';',
489
+ 'var pattern = ' + JSON.stringify(sanitizedPattern) + ';',
490
+ '',
491
+ '// Compile regex (pattern is sanitized client-side)',
492
+ 'var regex;',
493
+ 'try {',
494
+ ' regex = new RegExp(pattern, \'i\');',
495
+ '} catch (e) {',
496
+ ' regex = new RegExp(pattern.replace(/[.*+?^${}()[\\]\\\\|]/g, "\\\\$&"), "i");',
497
+ '}',
498
+ '',
499
+ '// Search logic',
500
+ 'var results = [];',
501
+ '',
502
+ 'for (var j = 0; j < tools.length; j++) {',
503
+ ' var tool = tools[j];',
504
+ ' var bestScore = 0;',
505
+ ' var matchedField = \'\';',
506
+ ' var snippet = \'\';',
507
+ '',
508
+ ' // Search name (highest priority)',
509
+ ' if (searchFields.indexOf(\'name\') >= 0 && regex.test(tool.name)) {',
510
+ ' bestScore = 0.95;',
511
+ ' matchedField = \'name\';',
512
+ ' snippet = tool.name;',
513
+ ' }',
514
+ '',
515
+ ' // Search description (medium priority)',
516
+ ' if (searchFields.indexOf(\'description\') >= 0 && tool.description && regex.test(tool.description)) {',
517
+ ' if (bestScore === 0) {',
518
+ ' bestScore = 0.75;',
519
+ ' matchedField = \'description\';',
520
+ ' snippet = tool.description.substring(0, 100);',
521
+ ' }',
522
+ ' }',
523
+ '',
524
+ ' // Search parameter names (lower priority)',
525
+ ' if (searchFields.indexOf(\'parameters\') >= 0 && tool.parameters && tool.parameters.properties) {',
526
+ ' var paramNames = Object.keys(tool.parameters.properties).join(\' \');',
527
+ ' if (regex.test(paramNames)) {',
528
+ ' if (bestScore === 0) {',
529
+ ' bestScore = 0.60;',
530
+ ' matchedField = \'parameters\';',
531
+ ' snippet = paramNames;',
532
+ ' }',
533
+ ' }',
534
+ ' }',
535
+ '',
536
+ ' if (bestScore > 0) {',
537
+ ' results.push({',
538
+ ' tool_name: tool.name,',
539
+ ' match_score: bestScore,',
540
+ ' matched_field: matchedField,',
541
+ ' snippet: snippet',
542
+ ' });',
543
+ ' }',
544
+ '}',
545
+ '',
546
+ '// Sort by score (descending) and limit results',
547
+ 'results.sort(function(a, b) { return b.match_score - a.match_score; });',
548
+ 'var topResults = results.slice(0, maxResults);',
549
+ '',
550
+ '// Output as JSON',
551
+ 'console.log(JSON.stringify({',
552
+ ' tool_references: topResults.map(function(r) {',
553
+ ' return {',
554
+ ' tool_name: r.tool_name,',
555
+ ' match_score: r.match_score,',
556
+ ' matched_field: r.matched_field,',
557
+ ' snippet: r.snippet',
558
+ ' };',
559
+ ' }),',
560
+ ' total_tools_searched: tools.length,',
561
+ ' pattern_used: pattern',
562
+ '}));',
563
+ ];
564
+ return lines.join('\n');
565
+ }
566
+
567
+ /**
568
+ * Parses the search results from stdout JSON.
569
+ * @param stdout - The stdout string containing JSON results
570
+ * @returns Parsed search response
571
+ */
572
+ function parseSearchResults(stdout: string): t.ToolSearchResponse {
573
+ const jsonMatch = stdout.trim();
574
+ const parsed = JSON.parse(jsonMatch) as t.ToolSearchResponse;
575
+ return parsed;
576
+ }
577
+
578
+ /**
579
+ * Formats search results as structured JSON for efficient parsing.
580
+ * @param searchResponse - The parsed search response
581
+ * @returns JSON string with search results
582
+ */
583
+ function formatSearchResults(searchResponse: t.ToolSearchResponse): string {
584
+ const { tool_references, total_tools_searched, pattern_used } =
585
+ searchResponse;
586
+
587
+ const output = {
588
+ found: tool_references.length,
589
+ tools: tool_references.map((ref) => ({
590
+ name: ref.tool_name,
591
+ score: Number(ref.match_score.toFixed(2)),
592
+ matched_in: ref.matched_field,
593
+ snippet: ref.snippet,
594
+ })),
595
+ total_searched: total_tools_searched,
596
+ query: pattern_used,
597
+ };
598
+
599
+ return JSON.stringify(output, null, 2);
600
+ }
601
+
602
+ /**
603
+ * Extracts the base tool name (without MCP server suffix) from a full tool name.
604
+ * @param toolName - The full tool name
605
+ * @returns The base tool name without server suffix
606
+ */
607
+ function getBaseToolName(toolName: string): string {
608
+ const delimiterIndex = toolName.indexOf(Constants.MCP_DELIMITER);
609
+ if (delimiterIndex === -1) {
610
+ return toolName;
611
+ }
612
+ return toolName.substring(0, delimiterIndex);
613
+ }
614
+
615
+ /**
616
+ * Generates a compact listing of deferred tools grouped by server.
617
+ * Format: "server: tool1, tool2, tool3"
618
+ * Non-MCP tools are grouped under "other".
619
+ * @param toolRegistry - The tool registry
620
+ * @param onlyDeferred - Whether to only include deferred tools
621
+ * @returns Formatted string with tools grouped by server
622
+ */
623
+ function getDeferredToolsListing(
624
+ toolRegistry: t.LCToolRegistry | undefined,
625
+ onlyDeferred: boolean
626
+ ): string {
627
+ if (!toolRegistry) {
628
+ return '';
629
+ }
630
+
631
+ const toolsByServer: Record<string, string[]> = {};
632
+
633
+ for (const lcTool of toolRegistry.values()) {
634
+ if (onlyDeferred && lcTool.defer_loading !== true) {
635
+ continue;
636
+ }
637
+
638
+ const toolName = lcTool.name;
639
+ const serverName = extractMcpServerName(toolName) ?? 'other';
640
+ const baseName = getBaseToolName(toolName);
641
+
642
+ if (!(serverName in toolsByServer)) {
643
+ toolsByServer[serverName] = [];
644
+ }
645
+ toolsByServer[serverName].push(baseName);
646
+ }
647
+
648
+ const serverNames = Object.keys(toolsByServer).sort((a, b) => {
649
+ if (a === 'other') return 1;
650
+ if (b === 'other') return -1;
651
+ return a.localeCompare(b);
652
+ });
653
+
654
+ if (serverNames.length === 0) {
655
+ return '';
656
+ }
657
+
658
+ const lines = serverNames.map(
659
+ (server) => `${server}: ${toolsByServer[server].join(', ')}`
660
+ );
661
+
662
+ return lines.join('\n');
663
+ }
664
+
665
+ /**
666
+ * Formats a server listing response as structured JSON.
667
+ * NOTE: This is a PREVIEW only - tools are NOT discovered/loaded.
668
+ * @param tools - Array of tool metadata from the server(s)
669
+ * @param serverNames - The MCP server name(s)
670
+ * @returns JSON string showing all tools grouped by server
671
+ */
672
+ function formatServerListing(
673
+ tools: t.ToolMetadata[],
674
+ serverNames: string | string[]
675
+ ): string {
676
+ const servers = Array.isArray(serverNames) ? serverNames : [serverNames];
677
+
678
+ if (tools.length === 0) {
679
+ return JSON.stringify(
680
+ {
681
+ listing_mode: true,
682
+ servers,
683
+ total_tools: 0,
684
+ tools_by_server: {},
685
+ hint: 'No tools found from the specified MCP server(s).',
686
+ },
687
+ null,
688
+ 2
689
+ );
690
+ }
691
+
692
+ const toolsByServer: Record<
693
+ string,
694
+ Array<{ name: string; description: string }>
695
+ > = {};
696
+ for (const tool of tools) {
697
+ const server = extractMcpServerName(tool.name) ?? 'unknown';
698
+ if (!(server in toolsByServer)) {
699
+ toolsByServer[server] = [];
700
+ }
701
+ toolsByServer[server].push({
702
+ name: getBaseToolName(tool.name),
703
+ description:
704
+ tool.description.length > 100
705
+ ? tool.description.substring(0, 97) + '...'
706
+ : tool.description,
707
+ });
708
+ }
709
+
710
+ const output = {
711
+ listing_mode: true,
712
+ servers,
713
+ total_tools: tools.length,
714
+ tools_by_server: toolsByServer,
715
+ hint: `To use a tool, search for it by name (e.g., query: "${getBaseToolName(tools[0]?.name ?? 'tool_name')}") to load it.`,
716
+ };
717
+
718
+ return JSON.stringify(output, null, 2);
719
+ }
720
+
721
+ /**
722
+ * Creates a Tool Search tool for discovering tools from a large registry.
723
+ *
724
+ * This tool enables AI agents to dynamically discover tools from a large library
725
+ * without loading all tool definitions into the LLM context window. The agent
726
+ * can search for relevant tools on-demand.
727
+ *
728
+ * **Modes:**
729
+ * - `code_interpreter` (default): Uses external sandbox for regex search. Safer for complex patterns.
730
+ * - `local`: Uses safe substring matching locally. No network call, faster, completely safe from ReDoS.
731
+ *
732
+ * The tool registry can be provided either:
733
+ * 1. At initialization time via params.toolRegistry
734
+ * 2. At runtime via config.configurable.toolRegistry when invoking
735
+ *
736
+ * @param params - Configuration parameters for the tool (toolRegistry is optional)
737
+ * @returns A LangChain DynamicStructuredTool for tool searching
738
+ *
739
+ * @example
740
+ * // Option 1: Code interpreter mode (regex via sandbox)
741
+ * const tool = createToolSearch({ apiKey, toolRegistry });
742
+ * await tool.invoke({ query: 'expense.*report' });
743
+ *
744
+ * @example
745
+ * // Option 2: Local mode (safe substring search, no API key needed)
746
+ * const tool = createToolSearch({ mode: 'local', toolRegistry });
747
+ * await tool.invoke({ query: 'expense' });
748
+ */
749
+ function createToolSearch(
750
+ initParams: t.ToolSearchParams = {}
751
+ ): DynamicStructuredTool<ReturnType<typeof createToolSearchSchema>> {
752
+ const mode: t.ToolSearchMode = initParams.mode ?? 'code_interpreter';
753
+ const defaultOnlyDeferred = initParams.onlyDeferred ?? true;
754
+ const schema = createToolSearchSchema(mode);
755
+
756
+ const apiKey: string =
757
+ mode === 'code_interpreter'
758
+ ? ((initParams[EnvVar.CODE_API_KEY] as string | undefined) ??
759
+ initParams.apiKey ??
760
+ getEnvironmentVariable(EnvVar.CODE_API_KEY) ??
761
+ '')
762
+ : '';
763
+
764
+ if (mode === 'code_interpreter' && !apiKey) {
765
+ throw new Error(
766
+ 'No API key provided for tool search in code_interpreter mode. Use mode: "local" to search without an API key.'
767
+ );
768
+ }
769
+
770
+ const baseEndpoint = initParams.baseUrl ?? getCodeBaseURL();
771
+ const EXEC_ENDPOINT = `${baseEndpoint}/exec`;
772
+
773
+ const deferredToolsListing = getDeferredToolsListing(
774
+ initParams.toolRegistry,
775
+ defaultOnlyDeferred
776
+ );
777
+
778
+ const toolsListSection =
779
+ deferredToolsListing.length > 0
780
+ ? `
781
+
782
+ Deferred tools (search to load):
783
+ ${deferredToolsListing}`
784
+ : '';
785
+
786
+ const mcpNote =
787
+ deferredToolsListing.includes(Constants.MCP_DELIMITER) ||
788
+ deferredToolsListing.split('\n').some((line) => !line.startsWith('other:'))
789
+ ? `
790
+ - MCP tools use format: toolName${Constants.MCP_DELIMITER}serverName
791
+ - Use mcp_server param to filter by server`
792
+ : '';
793
+
794
+ const description =
795
+ mode === 'local'
796
+ ? `
797
+ Searches deferred tools using BM25 ranking. Multi-word queries supported.
798
+ ${mcpNote}${toolsListSection}
799
+ `.trim()
800
+ : `
801
+ Searches deferred tools by regex pattern.
802
+ ${mcpNote}${toolsListSection}
803
+ `.trim();
804
+
805
+ return tool<typeof schema>(
806
+ async (params, config) => {
807
+ const {
808
+ query,
809
+ fields = ['name', 'description'],
810
+ max_results = 10,
811
+ mcp_server,
812
+ } = params;
813
+
814
+ const {
815
+ toolRegistry: paramToolRegistry,
816
+ onlyDeferred: paramOnlyDeferred,
817
+ mcpServer: paramMcpServer,
818
+ } = config.toolCall ?? {};
819
+
820
+ const toolRegistry = paramToolRegistry ?? initParams.toolRegistry;
821
+ const onlyDeferred =
822
+ paramOnlyDeferred !== undefined
823
+ ? paramOnlyDeferred
824
+ : defaultOnlyDeferred;
825
+ const rawServerFilter =
826
+ mcp_server ?? paramMcpServer ?? initParams.mcpServer;
827
+ const serverFilters = normalizeServerFilter(rawServerFilter);
828
+ const hasServerFilter = serverFilters.length > 0;
829
+
830
+ if (toolRegistry == null) {
831
+ return [
832
+ 'Error: No tool registry provided. Configure toolRegistry at agent level or initialization.',
833
+ {
834
+ tool_references: [],
835
+ metadata: {
836
+ total_searched: 0,
837
+ pattern: query,
838
+ error: 'No tool registry provided',
839
+ },
840
+ },
841
+ ];
842
+ }
843
+
844
+ const toolsArray: t.LCTool[] = Array.from(toolRegistry.values());
845
+ const deferredTools: t.ToolMetadata[] = toolsArray
846
+ .filter((lcTool) => {
847
+ if (onlyDeferred === true && lcTool.defer_loading !== true) {
848
+ return false;
849
+ }
850
+ if (
851
+ hasServerFilter &&
852
+ !isFromAnyMcpServer(lcTool.name, serverFilters)
853
+ ) {
854
+ return false;
855
+ }
856
+ return true;
857
+ })
858
+ .map((lcTool) => ({
859
+ name: lcTool.name,
860
+ description: lcTool.description ?? '',
861
+ parameters: simplifyParametersForSearch(lcTool.parameters),
862
+ }));
863
+
864
+ if (deferredTools.length === 0) {
865
+ const serverMsg = hasServerFilter
866
+ ? ` from MCP server(s): ${serverFilters.join(', ')}`
867
+ : '';
868
+ return [
869
+ `No tools available to search${serverMsg}. The tool registry is empty or no matching deferred tools are registered.`,
870
+ {
871
+ tool_references: [],
872
+ metadata: {
873
+ total_searched: 0,
874
+ pattern: query,
875
+ mcp_server: serverFilters,
876
+ },
877
+ },
878
+ ];
879
+ }
880
+
881
+ const isServerListing = hasServerFilter && query === '';
882
+
883
+ if (isServerListing) {
884
+ const formattedOutput = formatServerListing(
885
+ deferredTools,
886
+ serverFilters
887
+ );
888
+
889
+ return [
890
+ formattedOutput,
891
+ {
892
+ tool_references: [],
893
+ metadata: {
894
+ total_available: deferredTools.length,
895
+ mcp_server: serverFilters,
896
+ listing_mode: true,
897
+ },
898
+ },
899
+ ];
900
+ }
901
+
902
+ if (mode === 'local') {
903
+ const searchResponse = performLocalSearch(
904
+ deferredTools,
905
+ query,
906
+ fields,
907
+ max_results
908
+ );
909
+ const formattedOutput = formatSearchResults(searchResponse);
910
+
911
+ return [
912
+ formattedOutput,
913
+ {
914
+ tool_references: searchResponse.tool_references,
915
+ metadata: {
916
+ total_searched: searchResponse.total_tools_searched,
917
+ pattern: searchResponse.pattern_used,
918
+ mcp_server: serverFilters.length > 0 ? serverFilters : undefined,
919
+ },
920
+ },
921
+ ];
922
+ }
923
+
924
+ const { safe: sanitizedPattern, wasEscaped } = sanitizeRegex(query);
925
+ let warningMessage = '';
926
+ if (wasEscaped) {
927
+ warningMessage =
928
+ 'Note: The provided pattern was converted to a literal search for safety.\n\n';
929
+ }
930
+
931
+ const searchScript = generateSearchScript(
932
+ deferredTools,
933
+ fields,
934
+ max_results,
935
+ sanitizedPattern
936
+ );
937
+
938
+ const postData = {
939
+ lang: 'js',
940
+ code: searchScript,
941
+ timeout: SEARCH_TIMEOUT,
942
+ };
943
+
944
+ try {
945
+ const fetchOptions: RequestInit = {
946
+ method: 'POST',
947
+ headers: {
948
+ 'Content-Type': 'application/json',
949
+ 'User-Agent': 'LibreChat/1.0',
950
+ 'X-API-Key': apiKey,
951
+ },
952
+ body: JSON.stringify(postData),
953
+ };
954
+
955
+ if (process.env.PROXY != null && process.env.PROXY !== '') {
956
+ fetchOptions.agent = new HttpsProxyAgent(process.env.PROXY);
957
+ }
958
+
959
+ const response = await fetch(EXEC_ENDPOINT, fetchOptions);
960
+ if (!response.ok) {
961
+ throw new Error(`HTTP error! status: ${response.status}`);
962
+ }
963
+
964
+ const result: t.ExecuteResult = await response.json();
965
+
966
+ if (result.stderr && result.stderr.trim()) {
967
+ // eslint-disable-next-line no-console
968
+ console.warn('[ToolSearch] stderr:', result.stderr);
969
+ }
970
+
971
+ if (!result.stdout || !result.stdout.trim()) {
972
+ return [
973
+ `${warningMessage}No tools matched the pattern "${sanitizedPattern}".\nTotal tools searched: ${deferredTools.length}`,
974
+ {
975
+ tool_references: [],
976
+ metadata: {
977
+ total_searched: deferredTools.length,
978
+ pattern: sanitizedPattern,
979
+ },
980
+ },
981
+ ];
982
+ }
983
+
984
+ const searchResponse = parseSearchResults(result.stdout);
985
+ const formattedOutput = `${warningMessage}${formatSearchResults(searchResponse)}`;
986
+
987
+ return [
988
+ formattedOutput,
989
+ {
990
+ tool_references: searchResponse.tool_references,
991
+ metadata: {
992
+ total_searched: searchResponse.total_tools_searched,
993
+ pattern: searchResponse.pattern_used,
994
+ },
995
+ },
996
+ ];
997
+ } catch (error) {
998
+ // eslint-disable-next-line no-console
999
+ console.error('[ToolSearch] Error:', error);
1000
+
1001
+ const errorMessage =
1002
+ error instanceof Error ? error.message : String(error);
1003
+ return [
1004
+ `Tool search failed: ${errorMessage}\n\nSuggestion: Try a simpler search pattern or search for specific tool names.`,
1005
+ {
1006
+ tool_references: [],
1007
+ metadata: {
1008
+ total_searched: 0,
1009
+ pattern: sanitizedPattern,
1010
+ error: errorMessage,
1011
+ },
1012
+ },
1013
+ ];
1014
+ }
1015
+ },
1016
+ {
1017
+ name: Constants.TOOL_SEARCH,
1018
+ description,
1019
+ schema,
1020
+ responseFormat: Constants.CONTENT_AND_ARTIFACT,
1021
+ }
1022
+ );
1023
+ }
1024
+
1025
+ export {
1026
+ createToolSearch,
1027
+ performLocalSearch,
1028
+ extractMcpServerName,
1029
+ isFromMcpServer,
1030
+ isFromAnyMcpServer,
1031
+ normalizeServerFilter,
1032
+ getAvailableMcpServers,
1033
+ getDeferredToolsListing,
1034
+ getBaseToolName,
1035
+ formatServerListing,
1036
+ sanitizeRegex,
1037
+ escapeRegexSpecialChars,
1038
+ isDangerousPattern,
1039
+ countNestedGroups,
1040
+ hasNestedQuantifiers,
1041
+ };