@slashfi/agents-sdk 0.78.0 → 0.80.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/search.ts ADDED
@@ -0,0 +1,541 @@
1
+ /**
2
+ * adk search — BM25 over materialized ref/tool docs.
3
+ *
4
+ * Walks `${configDir}/refs/` and indexes every ref + tool + skill found:
5
+ *
6
+ * `<configDir>/refs/<ref>/agent.json` → ref-level fields
7
+ * `<configDir>/refs/<ref>/entrypoint.md` → ref-level body
8
+ * `<configDir>/refs/<ref>/tools/<t>.tool.md` → per-tool body
9
+ * `<configDir>/refs/<ref>/tools/<t>.tool.json` → per-tool param names/descs
10
+ * `<configDir>/refs/<ref>/skills/<file>` → per-resource body
11
+ *
12
+ * Platform agents nest under `refs/agents/<@name>/`; integration refs sit
13
+ * directly at `refs/<name>/`. The walker handles both layouts.
14
+ *
15
+ * One BM25 document per ref + one per tool + one per skill resource. Tool
16
+ * and ref names get inserted multiple times into the document text so they
17
+ * outweigh surrounding prose without the BM25 implementation needing
18
+ * per-field weighting.
19
+ *
20
+ * Persistence: `adk sync` calls `writeSearchIndex(configDir)` to dump the
21
+ * raw BM25 docs + per-doc result metadata to `<configDir>/.search-index.json`.
22
+ * `searchRefs` prefers that file when it exists — `adk search` becomes a
23
+ * single file read + BM25 build (a few ms) instead of a recursive walk.
24
+ * Falls back to a fresh walk when the persisted file is missing or stale.
25
+ *
26
+ * The file is dot-prefixed so coding agents treat it as a hidden artifact
27
+ * and don't try to read it directly — they should use `adk search` instead.
28
+ */
29
+
30
+ import {
31
+ existsSync,
32
+ mkdirSync,
33
+ readFileSync,
34
+ readdirSync,
35
+ statSync,
36
+ writeFileSync,
37
+ } from "node:fs";
38
+ import { basename, dirname, join } from "node:path";
39
+ import { createBM25Index } from "./bm25.js";
40
+
41
+ // ============================================
42
+ // Types
43
+ // ============================================
44
+
45
+ export interface SearchOptions {
46
+ /** Max results returned. */
47
+ limit?: number;
48
+ /**
49
+ * Restrict to one ref by name. Matches both bare names (`notion`) and
50
+ * platform-agent paths (`/agents/@clock`). Filtering happens after
51
+ * scoring, so other refs' content doesn't affect the ranking of
52
+ * the kept ref's documents.
53
+ */
54
+ ref?: string;
55
+ /** Only include per-tool results. */
56
+ toolsOnly?: boolean;
57
+ /** Only include ref-level results. */
58
+ refsOnly?: boolean;
59
+ }
60
+
61
+ export type SearchResult =
62
+ | {
63
+ kind: "tool";
64
+ /** Canonical ref name (e.g. `notion`, `/agents/@clock`). */
65
+ ref: string;
66
+ /** Tool name. */
67
+ tool: string;
68
+ score: number;
69
+ /** First non-blank line of the tool's .tool.md (description). */
70
+ summary: string;
71
+ /** Path to the per-tool .tool.md. */
72
+ docs: string;
73
+ /** Path to the per-tool .tool.json. */
74
+ schema: string;
75
+ /** Suggested CLI snippet. */
76
+ call: string;
77
+ }
78
+ | {
79
+ kind: "ref";
80
+ /** Canonical ref name. */
81
+ ref: string;
82
+ score: number;
83
+ /** Description from agent.json. */
84
+ summary: string;
85
+ /** Ref directory path. */
86
+ docs: string;
87
+ /** Path to entrypoint.md. */
88
+ entrypoint: string;
89
+ /** Number of tools the ref exposes. */
90
+ toolCount: number;
91
+ }
92
+ | {
93
+ kind: "resource";
94
+ /** Canonical ref name that owns this resource. */
95
+ ref: string;
96
+ /** Resource name (file basename, e.g. `writing-pages.md`). */
97
+ resource: string;
98
+ score: number;
99
+ /** First non-blank, non-heading line of the resource. */
100
+ summary: string;
101
+ /** Absolute or `~`-rooted path to the resource file. */
102
+ docs: string;
103
+ };
104
+
105
+ // ============================================
106
+ // Persisted index format
107
+ // ============================================
108
+
109
+ /** Per-document metadata we need to render a SearchResult — score is added at search time. */
110
+ type IndexItem =
111
+ | {
112
+ kind: "ref";
113
+ ref: string;
114
+ summary: string;
115
+ docs: string;
116
+ entrypoint: string;
117
+ toolCount: number;
118
+ }
119
+ | {
120
+ kind: "tool";
121
+ ref: string;
122
+ tool: string;
123
+ summary: string;
124
+ docs: string;
125
+ schema: string;
126
+ call: string;
127
+ }
128
+ | {
129
+ kind: "resource";
130
+ ref: string;
131
+ resource: string;
132
+ summary: string;
133
+ docs: string;
134
+ };
135
+
136
+ /**
137
+ * Serialized BM25 index. Written by `writeSearchIndex` (called from
138
+ * `adk sync`) and read by `searchRefs` to skip the recursive filesystem
139
+ * walk on every query.
140
+ *
141
+ * `docs` feeds `createBM25Index` directly. `items` is keyed by the same
142
+ * `id` so we can map ranked hits back to renderable result objects.
143
+ */
144
+ export interface PersistedSearchIndex {
145
+ /** Bumped on incompatible changes. Older readers must rebuild. */
146
+ version: 1;
147
+ generatedAt: string;
148
+ /** BM25 input documents. */
149
+ docs: { id: string; text: string }[];
150
+ /** Per-id metadata used to render `SearchResult`. */
151
+ items: Record<string, IndexItem>;
152
+ }
153
+
154
+ const INDEX_VERSION = 1 as const;
155
+ /**
156
+ * Sibling to `refs/` and `adk.d.ts` in the config directory. Dot-prefixed
157
+ * so coding agents treat it as a hidden artifact rather than something
158
+ * they should read directly — agents should query through `adk search`.
159
+ */
160
+ export const SEARCH_INDEX_FILENAME = ".search-index.json";
161
+
162
+ // ============================================
163
+ // Index building (filesystem walk)
164
+ // ============================================
165
+
166
+ /**
167
+ * Walk `refsRoot` recursively. Every directory containing an `agent.json`
168
+ * is a materialized ref. Builds the unified BM25 docs + per-result metadata
169
+ * map used by both the live search path and the persisted index writer.
170
+ *
171
+ * Ids embed the kind so that `adk search`'s kind filters can apply post-rank
172
+ * without re-running BM25:
173
+ *
174
+ * `ref:<refName>`
175
+ * `tool:<refName>|<toolName>`
176
+ * `resource:<refName>|<fileName>`
177
+ *
178
+ * The `|` separator is safe because ref / tool / resource names use
179
+ * letters / numbers / `-` / `_` / `/` / `@` / `.` only.
180
+ */
181
+ export function buildSearchIndex(refsRoot: string): PersistedSearchIndex {
182
+ const docs: { id: string; text: string }[] = [];
183
+ const items: Record<string, IndexItem> = {};
184
+
185
+ function walk(dir: string): void {
186
+ if (!existsSync(dir)) return;
187
+ let entries: string[];
188
+ try {
189
+ entries = readdirSync(dir);
190
+ } catch {
191
+ return;
192
+ }
193
+
194
+ const agentJsonPath = join(dir, "agent.json");
195
+ if (existsSync(agentJsonPath)) {
196
+ try {
197
+ const manifest = JSON.parse(readFileSync(agentJsonPath, "utf-8")) as {
198
+ name?: string;
199
+ description?: string;
200
+ tools?: string[];
201
+ toolCount?: number;
202
+ };
203
+ const refName = manifest.name ?? basename(dir);
204
+ const description = manifest.description ?? "";
205
+ const toolCount = manifest.toolCount ?? manifest.tools?.length ?? 0;
206
+ const entrypointPath = join(dir, "entrypoint.md");
207
+ const entrypointBody = existsSync(entrypointPath)
208
+ ? readFileSync(entrypointPath, "utf-8")
209
+ : "";
210
+
211
+ const refId = `ref:${refName}`;
212
+ docs.push({
213
+ id: refId,
214
+ text: [refName, refName, description, entrypointBody].join(" \n "),
215
+ });
216
+ items[refId] = {
217
+ kind: "ref",
218
+ ref: refName,
219
+ summary: description || refName,
220
+ docs: dir,
221
+ entrypoint: entrypointPath,
222
+ toolCount,
223
+ };
224
+
225
+ const toolsDir = join(dir, "tools");
226
+ if (existsSync(toolsDir)) {
227
+ for (const file of readdirSync(toolsDir)) {
228
+ if (!file.endsWith(".tool.md")) continue;
229
+ const toolMdPath = join(toolsDir, file);
230
+ const toolJsonPath = toolMdPath.replace(
231
+ /\.tool\.md$/,
232
+ ".tool.json",
233
+ );
234
+ const md = readFileSync(toolMdPath, "utf-8");
235
+ const summary = firstNonBlankLine(md, refName, file);
236
+ const tool = parseToolName(toolJsonPath, file);
237
+ const paramText = extractParamText(toolJsonPath);
238
+ // Repeat the high-signal terms (ref + tool name) so BM25
239
+ // ranks exact-name matches above ambient body matches.
240
+ const text = [
241
+ tool,
242
+ tool,
243
+ tool,
244
+ refName,
245
+ refName,
246
+ md,
247
+ paramText,
248
+ ].join(" \n ");
249
+ const id = `tool:${refName}|${tool}`;
250
+ docs.push({ id, text });
251
+ items[id] = {
252
+ kind: "tool",
253
+ ref: refName,
254
+ tool,
255
+ summary,
256
+ docs: toolMdPath,
257
+ schema: toolJsonPath,
258
+ call: `adk ref call ${refName} ${tool} '{...}'`,
259
+ };
260
+ }
261
+ }
262
+
263
+ // Skills / resources written by `materializeRef` — text content
264
+ // synced from the registry's `read_resources` / `list_resources`
265
+ // surface. Indexed so `adk search` can surface ref-specific
266
+ // skill files (e.g. "writing pages" → notion's writing-pages.md).
267
+ const skillsDir = join(dir, "skills");
268
+ if (existsSync(skillsDir)) {
269
+ for (const file of readdirSync(skillsDir)) {
270
+ const path = join(skillsDir, file);
271
+ try {
272
+ if (!statSync(path).isFile()) continue;
273
+ } catch {
274
+ continue;
275
+ }
276
+ let body: string;
277
+ try {
278
+ body = readFileSync(path, "utf-8");
279
+ } catch {
280
+ continue;
281
+ }
282
+ const summary = firstNonBlankLine(body, refName, file);
283
+ const text = [file, file, refName, refName, body].join(" \n ");
284
+ const id = `resource:${refName}|${file}`;
285
+ docs.push({ id, text });
286
+ items[id] = {
287
+ kind: "resource",
288
+ ref: refName,
289
+ resource: file,
290
+ summary,
291
+ docs: path,
292
+ };
293
+ }
294
+ }
295
+ } catch {
296
+ // Malformed agent.json — skip this directory but keep walking.
297
+ }
298
+ }
299
+
300
+ // Recurse into subdirectories that aren't tool / skill / types output —
301
+ // those have no nested agents. Anything else (e.g. `agents/`) might
302
+ // hold platform-agent refs.
303
+ for (const entry of entries) {
304
+ const full = join(dir, entry);
305
+ let isDir: boolean;
306
+ try {
307
+ isDir = statSync(full).isDirectory();
308
+ } catch {
309
+ continue;
310
+ }
311
+ if (!isDir) continue;
312
+ if (entry === "tools" || entry === "skills" || entry === "types")
313
+ continue;
314
+ walk(full);
315
+ }
316
+ }
317
+
318
+ walk(refsRoot);
319
+ return {
320
+ version: INDEX_VERSION,
321
+ generatedAt: new Date().toISOString(),
322
+ docs,
323
+ items,
324
+ };
325
+ }
326
+
327
+ // ============================================
328
+ // Persistence
329
+ // ============================================
330
+
331
+ /** Resolve the persisted-index path for a given config directory. */
332
+ export function searchIndexPath(configDir: string): string {
333
+ return join(configDir, SEARCH_INDEX_FILENAME);
334
+ }
335
+
336
+ /**
337
+ * Build and write the persisted index to `<configDir>/.search-index.json`.
338
+ * Called from `adk sync` so subsequent `adk search` invocations skip the
339
+ * recursive ref walk.
340
+ */
341
+ export function writeSearchIndex(configDir: string): {
342
+ path: string;
343
+ documentCount: number;
344
+ } {
345
+ const refsRoot = join(configDir, "refs");
346
+ const index = buildSearchIndex(refsRoot);
347
+ const path = searchIndexPath(configDir);
348
+ const dir = dirname(path);
349
+ if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
350
+ // Pretty-print in dev only; production indexes will be small enough
351
+ // that minified output isn't worth the readability cost.
352
+ writeFileSync(path, `${JSON.stringify(index, null, 2)}\n`, "utf-8");
353
+ return { path, documentCount: index.docs.length };
354
+ }
355
+
356
+ /**
357
+ * Read a persisted index. Returns `null` if the file is missing,
358
+ * unreadable, malformed, or written by an incompatible version.
359
+ */
360
+ export function readSearchIndex(
361
+ configDir: string,
362
+ ): PersistedSearchIndex | null {
363
+ const path = searchIndexPath(configDir);
364
+ if (!existsSync(path)) return null;
365
+ try {
366
+ const raw = readFileSync(path, "utf-8");
367
+ const parsed = JSON.parse(raw) as PersistedSearchIndex;
368
+ if (parsed?.version !== INDEX_VERSION) return null;
369
+ if (!Array.isArray(parsed.docs)) return null;
370
+ if (!parsed.items || typeof parsed.items !== "object") return null;
371
+ return parsed;
372
+ } catch {
373
+ return null;
374
+ }
375
+ }
376
+
377
+ /** Pick the first non-blank line of `body` as a one-line summary. */
378
+ function firstNonBlankLine(
379
+ body: string,
380
+ refName: string,
381
+ fileName: string,
382
+ ): string {
383
+ for (const raw of body.split("\n")) {
384
+ const line = raw.trim();
385
+ if (!line) continue;
386
+ if (line.startsWith("#")) continue;
387
+ return line;
388
+ }
389
+ return `${refName}/${fileName}`;
390
+ }
391
+
392
+ /**
393
+ * Pull the actual tool name from the .tool.json. Falls back to the
394
+ * filename (without .tool.md) if the json is missing or unreadable —
395
+ * still useful as a search anchor even if not exact.
396
+ */
397
+ function parseToolName(toolJsonPath: string, fileName: string): string {
398
+ if (existsSync(toolJsonPath)) {
399
+ try {
400
+ const obj = JSON.parse(readFileSync(toolJsonPath, "utf-8")) as {
401
+ name?: string;
402
+ };
403
+ if (typeof obj.name === "string" && obj.name.length > 0) return obj.name;
404
+ } catch {
405
+ // fall through
406
+ }
407
+ }
408
+ return fileName.replace(/\.tool\.md$/, "");
409
+ }
410
+
411
+ /**
412
+ * Pull parameter names + descriptions out of a .tool.json's inputSchema.
413
+ * Surfaces them as plain text into the BM25 index so queries like
414
+ * "calendar event id" land on the right tool.
415
+ */
416
+ function extractParamText(toolJsonPath: string): string {
417
+ if (!existsSync(toolJsonPath)) return "";
418
+ try {
419
+ const obj = JSON.parse(readFileSync(toolJsonPath, "utf-8")) as {
420
+ description?: string;
421
+ inputSchema?: { properties?: Record<string, { description?: string }> };
422
+ };
423
+ const parts: string[] = [];
424
+ if (obj.description) parts.push(obj.description);
425
+ const props = obj.inputSchema?.properties ?? {};
426
+ for (const [name, info] of Object.entries(props)) {
427
+ parts.push(name);
428
+ if (typeof info?.description === "string") parts.push(info.description);
429
+ }
430
+ return parts.join(" ");
431
+ } catch {
432
+ return "";
433
+ }
434
+ }
435
+
436
+ // ============================================
437
+ // Search
438
+ // ============================================
439
+
440
+ /**
441
+ * Run a BM25 search over the materialized refs.
442
+ *
443
+ * Prefers the persisted `<configDir>/search-index.json` (written by
444
+ * `adk sync`) when it exists — that path skips the recursive walk and
445
+ * runs in single-digit ms even with hundreds of tools. Falls back to a
446
+ * fresh walk of `refsRoot` when the persisted file is missing or stale.
447
+ *
448
+ * @param refsRoot The materialized refs directory (e.g. `~/.adk/refs`).
449
+ * We derive `<configDir>` from this as the parent so callers don't
450
+ * have to plumb both. Pass an `index` directly via `options.index` to
451
+ * bypass disk I/O entirely (used by tests).
452
+ */
453
+ export function searchRefs(
454
+ refsRoot: string,
455
+ query: string,
456
+ options: SearchOptions & { index?: PersistedSearchIndex } = {},
457
+ ): SearchResult[] {
458
+ const index =
459
+ options.index ??
460
+ readSearchIndex(dirname(refsRoot)) ??
461
+ buildSearchIndex(refsRoot);
462
+
463
+ const bm25 = createBM25Index(index.docs);
464
+ const limit = options.limit ?? 10;
465
+ // Pull more raw hits than `limit` so the kind / ref filters below have
466
+ // headroom to drop irrelevant matches without short-changing the caller.
467
+ // 5x is plenty for typical 10-20 limits.
468
+ const raw = bm25.search(query, limit * 5);
469
+
470
+ const results: SearchResult[] = [];
471
+ for (const hit of raw) {
472
+ if (results.length >= limit) break;
473
+ const item = index.items[hit.id];
474
+ if (!item) continue;
475
+ if (options.toolsOnly && item.kind !== "tool") continue;
476
+ if (options.refsOnly && item.kind !== "ref") continue;
477
+ if (options.ref && !refMatches(options.ref, item.ref)) continue;
478
+ // Spread the stored item and tack on the query-time score. Each
479
+ // `item` matches one of the `SearchResult` variants by construction
480
+ // (see `buildSearchIndex`), so this is type-safe.
481
+ results.push({ ...item, score: hit.score } as SearchResult);
482
+ }
483
+
484
+ return results;
485
+ }
486
+
487
+ /** Match `ref` filter loosely — accepts both bare names and `/agents/@…` paths. */
488
+ function refMatches(filter: string, ref: string): boolean {
489
+ if (filter === ref) return true;
490
+ // Allow `@clock` as a shorthand for `/agents/@clock`, and vice versa.
491
+ if (ref === `/agents/${filter}`) return true;
492
+ if (`/agents/${ref}` === filter) return true;
493
+ return false;
494
+ }
495
+
496
+ // ============================================
497
+ // CLI rendering
498
+ // ============================================
499
+
500
+ /** Concise human-readable rendering — one numbered block per result. */
501
+ export function renderResults(results: SearchResult[]): string {
502
+ if (results.length === 0) return "No results.";
503
+ const blocks: string[] = [];
504
+ for (let i = 0; i < results.length; i++) {
505
+ const r = results[i];
506
+ const score = r.score.toFixed(2);
507
+ if (r.kind === "tool") {
508
+ blocks.push(
509
+ [
510
+ `${i + 1}. ${r.ref}.${r.tool} score=${score}`,
511
+ ` ${r.summary}`,
512
+ ` Docs: ${r.docs}`,
513
+ ` Call: ${r.call}`,
514
+ ].join("\n"),
515
+ );
516
+ } else if (r.kind === "ref") {
517
+ blocks.push(
518
+ [
519
+ `${i + 1}. ${r.ref} (ref, ${r.toolCount} tools) score=${score}`,
520
+ ` ${r.summary}`,
521
+ ` Docs: ${r.docs}`,
522
+ ].join("\n"),
523
+ );
524
+ } else {
525
+ blocks.push(
526
+ [
527
+ `${i + 1}. ${r.ref}/${r.resource} (resource) score=${score}`,
528
+ ` ${r.summary}`,
529
+ ` Docs: ${r.docs}`,
530
+ ].join("\n"),
531
+ );
532
+ }
533
+ }
534
+ return blocks.join("\n\n");
535
+ }
536
+
537
+ // Local helpers — exposed so callers (the CLI) can detect a missing
538
+ // refs root and print a useful message instead of an empty result list.
539
+ export function refsRootExists(refsRoot: string): boolean {
540
+ return existsSync(refsRoot) && statSync(refsRoot).isDirectory();
541
+ }