ada-agent 0.5.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ada-agent",
3
- "version": "0.5.0",
3
+ "version": "0.6.1",
4
4
  "description": "A from-zero terminal coding agent with a Cursor-style routing backend, ~285 skills, MCP connectors, and ask/plan/auto modes",
5
5
  "type": "module",
6
6
  "license": "MIT",
@@ -50,7 +50,7 @@ function systemPrompt(includeProject: boolean): string {
50
50
  "You are ada, a minimal coding agent running in a terminal, in the spirit of pi, Codex, and Cursor.",
51
51
  `Working directory: ${process.cwd()}`,
52
52
  `Platform: ${process.platform}`,
53
- "Tools: read_file, write_file, edit_file, bash, ls, grep, glob, web_fetch, web_search, lsp_diagnostics. Use grep/glob/ls to explore the codebase; read a file before editing it; prefer edit_file for changes to existing files; web_fetch to read a URL, web_search to find one; lsp_diagnostics to check a file for errors after editing; apply_patch for multi-file changes; ask_user only when genuinely blocked.",
53
+ "Tools: read_file, write_file, edit_file, bash, ls, grep, glob, codebase_search, web_fetch, web_search, lsp_diagnostics. Use grep/glob/ls to explore the codebase — or codebase_search when you're looking for code by MEANING rather than an exact string; read a file before editing it; prefer edit_file for changes to existing files; web_fetch to read a URL, web_search to find one; lsp_diagnostics to check a file for errors after editing; apply_patch for multi-file changes; ask_user only when genuinely blocked.",
54
54
  "Specialized skills are available: call list_skills to browse them (by category or filter), then use_skill to load one before a specialized task.",
55
55
  "Be concise. Don't narrate routine actions or pad with preamble. When you have enough information to act, act. Ask only when genuinely blocked or before destructive, irreversible actions.",
56
56
  ].join("\n") + (includeProject ? projectContext() : "")
@@ -30,13 +30,22 @@ export function healthUrl(backendUrl: string): string {
30
30
  }
31
31
  }
32
32
 
33
- async function probe(url: string, timeoutMs = 800): Promise<boolean> {
34
- try {
35
- const res = await fetch(url, { signal: AbortSignal.timeout(timeoutMs) });
36
- return res.ok;
37
- } catch {
38
- return false;
39
- }
33
+ // Plain node:http with agent:false, NOT fetch: undici's keep-alive socket from a probe lingers into
34
+ // process teardown and deterministically prints "Assertion failed: !(handle->flags &
35
+ // UV_HANDLE_CLOSING)" on Windows at exit. agent:false closes the socket with the response.
36
+ function probe(url: string, timeoutMs = 800): Promise<boolean> {
37
+ return new Promise((resolve) => {
38
+ import("node:http")
39
+ .then((http) => {
40
+ const req = http.get(url, { agent: false, timeout: timeoutMs }, (res) => {
41
+ res.resume(); // drain so the socket can close
42
+ resolve((res.statusCode ?? 500) < 400);
43
+ });
44
+ req.on("timeout", () => req.destroy());
45
+ req.on("error", () => resolve(false));
46
+ })
47
+ .catch(() => resolve(false));
48
+ });
40
49
  }
41
50
 
42
51
  /** Resolved path to bin/ada-server.mjs (sibling of bin/ada.mjs, packaged in the npm tarball). */
@@ -0,0 +1,198 @@
1
+ // @codebase semantic search. Chunks the working tree, embeds chunks through the backend's
2
+ // /v1/embeddings (which forwards to Ollama — `ollama pull nomic-embed-text`, or set
3
+ // ADA_EMBED_MODEL), caches vectors in .ada/index.json keyed by content hash, and answers queries
4
+ // by cosine similarity. Exposed to the model as the read-only `codebase_search` tool.
5
+ //
6
+ // ponytail: brute-force cosine over a JSON cache — fine to ~50k chunks; an ANN index and a binary
7
+ // vector format are the upgrade path if repos outgrow it.
8
+
9
+ import { createHash } from "node:crypto";
10
+ import { existsSync, mkdirSync, readFileSync, readdirSync, statSync, writeFileSync } from "node:fs";
11
+ import { join, relative, resolve } from "node:path";
12
+
13
+ const EMBED_MODEL = process.env.ADA_EMBED_MODEL ?? "nomic-embed-text";
14
+ const BACKEND = process.env.ADA_BACKEND_URL ?? "http://localhost:8787/v1";
15
+ const SKIP = new Set(["node_modules", ".git", "dist", ".ada", ".next", "build", "coverage"]);
16
+ const TEXT_EXT = /\.(ts|tsx|js|jsx|mjs|cjs|py|go|rs|java|kt|rb|php|cs|c|h|cpp|hpp|md|txt|json|yaml|yml|toml|css|scss|html|sql|sh|svelte|vue)$/i;
17
+ const CHUNK_LINES = 80;
18
+ const MAX_FILE_BYTES = 200_000;
19
+
20
+ export interface Chunk {
21
+ start: number; // 1-based first line
22
+ end: number;
23
+ text: string;
24
+ }
25
+ interface IndexedFile {
26
+ hash: string;
27
+ chunks: Array<{ start: number; end: number; vec: number[] }>;
28
+ }
29
+ interface Index {
30
+ model: string;
31
+ files: Record<string, IndexedFile>;
32
+ }
33
+
34
+ /** Split file text into fixed-size line windows, char-capped so minified/long-line files can't
35
+ * blow the embedding model's context window. */
36
+ export function chunkText(text: string, lines = CHUNK_LINES): Chunk[] {
37
+ const all = text.split("\n");
38
+ const out: Chunk[] = [];
39
+ for (let i = 0; i < all.length; i += lines) {
40
+ const slice = all.slice(i, i + lines).join("\n");
41
+ if (slice.trim()) out.push({ start: i + 1, end: Math.min(i + lines, all.length), text: slice.slice(0, 6000) });
42
+ }
43
+ return out;
44
+ }
45
+
46
+ export function cosine(a: number[], b: number[]): number {
47
+ let dot = 0;
48
+ let na = 0;
49
+ let nb = 0;
50
+ for (let i = 0; i < a.length; i++) {
51
+ dot += a[i]! * b[i]!;
52
+ na += a[i]! * a[i]!;
53
+ nb += b[i]! * b[i]!;
54
+ }
55
+ const d = Math.sqrt(na) * Math.sqrt(nb);
56
+ return d ? dot / d : 0;
57
+ }
58
+
59
+ function sha1(s: string): string {
60
+ return createHash("sha1").update(s).digest("hex");
61
+ }
62
+
63
+ /** Indexable text files under root (relative paths), matching the tool suite's skip list. */
64
+ export function walkFiles(root: string, dir = root, out: string[] = []): string[] {
65
+ let entries;
66
+ try {
67
+ entries = readdirSync(dir, { withFileTypes: true });
68
+ } catch {
69
+ return out;
70
+ }
71
+ for (const e of entries) {
72
+ if (e.name.startsWith(".") && e.name !== ".github") continue;
73
+ if (SKIP.has(e.name)) continue;
74
+ const p = join(dir, e.name);
75
+ if (e.isDirectory()) walkFiles(root, p, out);
76
+ else if (TEXT_EXT.test(e.name)) {
77
+ try {
78
+ if (statSync(p).size <= MAX_FILE_BYTES) out.push(relative(root, p).replace(/\\/g, "/"));
79
+ } catch {
80
+ /* unreadable — skip */
81
+ }
82
+ }
83
+ }
84
+ return out;
85
+ }
86
+
87
+ async function embed(texts: string[], kind: "document" | "query" = "document"): Promise<number[][]> {
88
+ // nomic-embed models are trained asymmetric: prefixing queries/documents differently measurably
89
+ // improves retrieval (code stops losing to prose). Other models get the raw text.
90
+ const input = EMBED_MODEL.includes("nomic") ? texts.map((t) => `search_${kind}: ${t}`) : texts;
91
+ const res = await fetch(`${BACKEND}/embeddings`, {
92
+ method: "POST",
93
+ headers: { "content-type": "application/json", authorization: `Bearer ${process.env.ADA_CLIENT_KEY ?? "dev"}` },
94
+ body: JSON.stringify({ model: EMBED_MODEL, input }),
95
+ signal: AbortSignal.timeout(60_000),
96
+ });
97
+ if (!res.ok) throw new Error(`embeddings HTTP ${res.status}: ${(await res.text().catch(() => "")).slice(0, 200)} — is the backend up, and is "${EMBED_MODEL}" pulled in Ollama? (ollama pull nomic-embed-text, or set ADA_EMBED_MODEL)`);
98
+ const j = (await res.json()) as { data?: Array<{ index: number; embedding: number[] }> };
99
+ if (!j.data?.length) throw new Error("embeddings response had no data");
100
+ return [...j.data].sort((a, b) => a.index - b.index).map((d) => d.embedding);
101
+ }
102
+
103
+ function indexPath(root: string): string {
104
+ return resolve(root, ".ada", "index.json");
105
+ }
106
+
107
+ // Cache key includes an embedding-scheme tag: changing the model OR how text is prefixed makes old
108
+ // vectors incomparable, and both must force a rebuild.
109
+ const SCHEME = EMBED_MODEL.includes("nomic") ? `${EMBED_MODEL}#affix1` : EMBED_MODEL;
110
+
111
+ function loadIndex(root: string): Index {
112
+ try {
113
+ const idx = JSON.parse(readFileSync(indexPath(root), "utf8")) as Index;
114
+ if (idx.model === SCHEME) return idx; // scheme changed → vectors incomparable, rebuild
115
+ } catch {
116
+ /* no cache yet */
117
+ }
118
+ return { model: SCHEME, files: {} };
119
+ }
120
+
121
+ function saveIndex(root: string, idx: Index): void {
122
+ try {
123
+ mkdirSync(resolve(root, ".ada"), { recursive: true });
124
+ writeFileSync(indexPath(root), JSON.stringify(idx));
125
+ } catch {
126
+ /* cache is best-effort */
127
+ }
128
+ }
129
+
130
+ /** Bring the index up to date (embed new/changed files, drop deleted ones). Returns chunk count. */
131
+ export async function refreshIndex(root = process.cwd(), onProgress?: (msg: string) => void): Promise<number> {
132
+ const idx = loadIndex(root);
133
+ const files = walkFiles(root);
134
+ const live = new Set(files);
135
+ for (const known of Object.keys(idx.files)) if (!live.has(known)) delete idx.files[known];
136
+
137
+ const stale: Array<{ rel: string; hash: string; chunks: Chunk[] }> = [];
138
+ for (const rel of files) {
139
+ let text: string;
140
+ try {
141
+ text = readFileSync(resolve(root, rel), "utf8");
142
+ } catch {
143
+ continue;
144
+ }
145
+ const hash = sha1(text);
146
+ if (idx.files[rel]?.hash === hash) continue;
147
+ stale.push({ rel, hash, chunks: chunkText(text) });
148
+ }
149
+
150
+ let done = 0;
151
+ for (const f of stale) {
152
+ const vecs: number[][] = [];
153
+ for (let i = 0; i < f.chunks.length; i += 32) {
154
+ const batch = f.chunks.slice(i, i + 32);
155
+ vecs.push(...(await embed(batch.map((c) => c.text))));
156
+ }
157
+ idx.files[f.rel] = { hash: f.hash, chunks: f.chunks.map((c, i) => ({ start: c.start, end: c.end, vec: vecs[i]! })) };
158
+ done++;
159
+ if (onProgress && done % 20 === 0) onProgress(`indexed ${done}/${stale.length} changed files…`);
160
+ }
161
+ if (stale.length) saveIndex(root, idx);
162
+ return Object.values(idx.files).reduce((n, f) => n + f.chunks.length, 0);
163
+ }
164
+
165
+ export interface Hit {
166
+ file: string;
167
+ start: number;
168
+ end: number;
169
+ score: number;
170
+ snippet: string;
171
+ }
172
+
173
+ /** Top-k chunks most similar to the query. Refreshes the index first (incremental). */
174
+ export async function searchCodebase(query: string, k = 6, root = process.cwd()): Promise<Hit[]> {
175
+ await refreshIndex(root);
176
+ const idx = loadIndex(root);
177
+ const [qvec] = await embed([query], "query");
178
+ const hits: Hit[] = [];
179
+ for (const [rel, f] of Object.entries(idx.files)) {
180
+ for (const c of f.chunks) {
181
+ hits.push({ file: rel, start: c.start, end: c.end, score: cosine(qvec!, c.vec), snippet: "" });
182
+ }
183
+ }
184
+ hits.sort((a, b) => b.score - a.score);
185
+ const top = hits.slice(0, k);
186
+ for (const h of top) {
187
+ try {
188
+ h.snippet = readFileSync(resolve(root, h.file), "utf8")
189
+ .split("\n")
190
+ .slice(h.start - 1, h.end)
191
+ .join("\n")
192
+ .slice(0, 1200);
193
+ } catch {
194
+ h.snippet = "(file changed since indexing)";
195
+ }
196
+ }
197
+ return top;
198
+ }
@@ -99,14 +99,21 @@ export function formatFile(abs: string): boolean {
99
99
  }
100
100
 
101
101
  // node-pty gives the bash tool a real terminal. It's a required dependency; if the native build is
102
- // ever broken on a platform, fall back to spawnSync so bash still works.
103
- const pty: typeof PtyType | null = (() => {
104
- try {
105
- return createRequire(import.meta.url)("node-pty") as typeof PtyType;
106
- } catch {
107
- return null;
102
+ // ever broken on a platform, fall back to spawnSync so bash still works. Loaded LAZILY on the first
103
+ // bash call: merely loading the native module on Windows sets up async handles whose teardown races
104
+ // process.exit and prints "Assertion failed: !(handle->flags & UV_HANDLE_CLOSING)" — commands that
105
+ // never spawn a PTY (--version, catalog, --list-models, …) shouldn't pay that.
106
+ let ptyMod: typeof PtyType | null | undefined;
107
+ function getPty(): typeof PtyType | null {
108
+ if (ptyMod === undefined) {
109
+ try {
110
+ ptyMod = createRequire(import.meta.url)("node-pty") as typeof PtyType;
111
+ } catch {
112
+ ptyMod = null;
113
+ }
108
114
  }
109
- })();
115
+ return ptyMod;
116
+ }
110
117
 
111
118
  // Built via new RegExp (string escapes) so no literal ESC/BEL bytes live in the source.
112
119
  const ANSI = new RegExp("[\\u001B\\u009B][\\[\\]()#;?]*(?:(?:[a-zA-Z\\d]*(?:;[a-zA-Z\\d]*)*)?\\u0007|(?:\\d{1,4}(?:;\\d{0,4})*)?[\\dA-PR-TZcf-ntqry=><~])", "g");
@@ -120,7 +127,7 @@ function runPty(command: string, timeoutMs = 120_000): Promise<{ output: string;
120
127
  const win = process.platform === "win32";
121
128
  const shell = win ? process.env.COMSPEC ?? "cmd.exe" : process.env.SHELL ?? "/bin/bash";
122
129
  const shellArgs = win ? ["/c", command] : ["-lc", command];
123
- const p = pty!.spawn(shell, shellArgs, { name: "xterm-256color", cols: 120, rows: 30, cwd: process.cwd(), env: process.env as Record<string, string> });
130
+ const p = getPty()!.spawn(shell, shellArgs, { name: "xterm-256color", cols: 120, rows: 30, cwd: process.cwd(), env: process.env as Record<string, string> });
124
131
  let out = "";
125
132
  const cap = 10 * 1024 * 1024;
126
133
  p.onData((d) => {
@@ -360,7 +367,7 @@ export const tools: Tool[] = [
360
367
  needsApproval: true,
361
368
  async run(args) {
362
369
  const command = String(args.command);
363
- if (pty) {
370
+ if (getPty()) {
364
371
  const { output, code } = await runPty(command);
365
372
  return { output: `exit ${code ?? "null"}\n${spillIfHuge(stripAnsi(output).trim() || "(no output)")}`, isError: code !== 0 };
366
373
  }
@@ -533,6 +540,31 @@ export const tools: Tool[] = [
533
540
  return { output: (matches.join("\n") || "(no matches)") + more };
534
541
  },
535
542
  },
543
+ {
544
+ name: "codebase_search",
545
+ description:
546
+ "Semantic (meaning-based) search over the codebase — finds code by what it DOES, not by exact strings. Use when grep's literal matching won't work (\"where do we handle auth?\", \"how are sessions persisted?\"). First call indexes the repo (needs an Ollama embedding model, e.g. nomic-embed-text); later calls are incremental.",
547
+ parameters: {
548
+ type: "object",
549
+ properties: {
550
+ query: { type: "string", description: "What you're looking for, in plain words." },
551
+ k: { type: "number", description: "How many results (default 6)." },
552
+ },
553
+ required: ["query"],
554
+ additionalProperties: false,
555
+ },
556
+ needsApproval: false,
557
+ async run(args) {
558
+ try {
559
+ const { searchCodebase } = await import("./embed-index.ts"); // lazy — only pay for it when used
560
+ const hits = await searchCodebase(String(args.query), Math.min(Number(args.k) || 6, 20));
561
+ if (!hits.length) return { output: "No indexed content matched. Is the repo empty, or all files skipped?" };
562
+ return { output: hits.map((h) => `${h.file}:${h.start}-${h.end} (score ${h.score.toFixed(3)})\n${h.snippet}`).join("\n\n---\n\n") };
563
+ } catch (e) {
564
+ return { output: String(e instanceof Error ? e.message : e), isError: true };
565
+ }
566
+ },
567
+ },
536
568
  {
537
569
  name: "web_fetch",
538
570
  description: "Fetch an http(s) URL and return its content as readable text (HTML is stripped to text). Use to read docs, articles, changelogs, or JSON APIs.",
package/src/selfcheck.ts CHANGED
@@ -293,6 +293,27 @@ async function main(): Promise<void> {
293
293
  assert.equal(route("anything-else"), "openrouter", "unmatched → openrouter");
294
294
  }
295
295
 
296
+ // --- @codebase semantic search: pure parts (no network / no embedding model needed) ---
297
+ {
298
+ const { chunkText, cosine, walkFiles } = await import("./client/embed-index.ts");
299
+ const chunks = chunkText(Array.from({ length: 200 }, (_, i) => `line ${i + 1}`).join("\n"));
300
+ assert.equal(chunks.length, 3, "200 lines → 3 chunks of 80");
301
+ assert.equal(chunks[0]!.start, 1);
302
+ assert.equal(chunks[1]!.start, 81);
303
+ assert.equal(chunks[2]!.end, 200, "last chunk ends at the last line");
304
+ assert.equal(chunkText(" \n \n").length, 0, "whitespace-only text → no chunks");
305
+ assert.ok(chunkText(`x${"y".repeat(50_000)}`)[0]!.text.length <= 6000, "long-line chunks are char-capped");
306
+ assert.ok(Math.abs(cosine([1, 0], [1, 0]) - 1) < 1e-9, "cosine identical = 1");
307
+ assert.equal(cosine([1, 0], [0, 1]), 0, "cosine orthogonal = 0");
308
+ assert.equal(cosine([0, 0], [1, 1]), 0, "zero vector → 0, not NaN");
309
+ const walked = walkFiles(process.cwd());
310
+ assert.ok(walked.includes("src/selfcheck.ts"), "walkFiles finds source files");
311
+ assert.ok(!walked.some((f) => f.includes("node_modules")), "walkFiles skips node_modules");
312
+ // Offline: the tool must fail with a clear message, not hang or throw
313
+ const r = await toolByName.get("codebase_search")!.run({ query: "x" });
314
+ assert.ok(typeof r.output === "string", "codebase_search returns cleanly even when embeddings are unavailable");
315
+ }
316
+
296
317
  // --- `ada --version` prints the version and exits WITHOUT auto-starting a backend ---
297
318
  {
298
319
  const { spawnSync } = await import("node:child_process");
@@ -72,6 +72,25 @@ async function handleChat(req: IncomingMessage, res: ServerResponse): Promise<vo
72
72
  await adapterFor(provider).chat({ provider, model, body, res });
73
73
  }
74
74
 
75
+ /** Embeddings for @codebase semantic search — forwarded to the ollama provider's
76
+ * OpenAI-compatible endpoint (embedding models only live there for now). */
77
+ async function handleEmbeddings(req: IncomingMessage, res: ServerResponse): Promise<void> {
78
+ const raw = await readBody(req);
79
+ try {
80
+ JSON.parse(raw);
81
+ } catch {
82
+ return json(res, 400, { error: { message: "invalid JSON body" } });
83
+ }
84
+ const upstream = await fetch(`${PROVIDERS.ollama.baseURL}/embeddings`, {
85
+ method: "POST",
86
+ headers: { "content-type": "application/json" },
87
+ body: raw,
88
+ });
89
+ const text = await upstream.text();
90
+ res.writeHead(upstream.status, { "content-type": "application/json" });
91
+ res.end(text);
92
+ }
93
+
75
94
  const server = createServer(async (req, res) => {
76
95
  try {
77
96
  const url = new URL(req.url ?? "/", "http://localhost");
@@ -91,6 +110,10 @@ const server = createServer(async (req, res) => {
91
110
  if (!(await authorized(req))) return json(res, 401, { error: { message: "unauthorized — invalid client key or login" } });
92
111
  return await handleChat(req, res);
93
112
  }
113
+ if (req.method === "POST" && url.pathname === "/v1/embeddings") {
114
+ if (!(await authorized(req))) return json(res, 401, { error: { message: "unauthorized — invalid client key or login" } });
115
+ return await handleEmbeddings(req, res);
116
+ }
94
117
  return json(res, 404, { error: { message: "not found" } });
95
118
  } catch (err) {
96
119
  if (!res.headersSent) json(res, 500, { error: { message: err instanceof Error ? err.message : String(err) } });