npm - ada-agent - Versions diffs - 0.5.0 → 0.6.0 - Mend

ada-agent 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/package.json +1 -1
package/src/client/agent.ts +1 -1
package/src/client/embed-index.ts +198 -0
package/src/client/tools.ts +25 -0
package/src/selfcheck.ts +21 -0
package/src/server/index.ts +23 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "ada-agent",
-  "version": "0.5.0",
+  "version": "0.6.0",
   "description": "A from-zero terminal coding agent with a Cursor-style routing backend, ~285 skills, MCP connectors, and ask/plan/auto modes",
   "type": "module",
   "license": "MIT",

package/src/client/agent.ts CHANGED Viewed

@@ -50,7 +50,7 @@ function systemPrompt(includeProject: boolean): string {
       "You are ada, a minimal coding agent running in a terminal, in the spirit of pi, Codex, and Cursor.",
       `Working directory: ${process.cwd()}`,
       `Platform: ${process.platform}`,
-      "Tools: read_file, write_file, edit_file, bash, ls, grep, glob, web_fetch, web_search, lsp_diagnostics. Use grep/glob/ls to explore the codebase; read a file before editing it; prefer edit_file for changes to existing files; web_fetch to read a URL, web_search to find one; lsp_diagnostics to check a file for errors after editing; apply_patch for multi-file changes; ask_user only when genuinely blocked.",
+      "Tools: read_file, write_file, edit_file, bash, ls, grep, glob, codebase_search, web_fetch, web_search, lsp_diagnostics. Use grep/glob/ls to explore the codebase — or codebase_search when you're looking for code by MEANING rather than an exact string; read a file before editing it; prefer edit_file for changes to existing files; web_fetch to read a URL, web_search to find one; lsp_diagnostics to check a file for errors after editing; apply_patch for multi-file changes; ask_user only when genuinely blocked.",
       "Specialized skills are available: call list_skills to browse them (by category or filter), then use_skill to load one before a specialized task.",
       "Be concise. Don't narrate routine actions or pad with preamble. When you have enough information to act, act. Ask only when genuinely blocked or before destructive, irreversible actions.",
     ].join("\n") + (includeProject ? projectContext() : "")

package/src/client/embed-index.ts ADDED Viewed

@@ -0,0 +1,198 @@
+// @codebase semantic search. Chunks the working tree, embeds chunks through the backend's
+// /v1/embeddings (which forwards to Ollama — `ollama pull nomic-embed-text`, or set
+// ADA_EMBED_MODEL), caches vectors in .ada/index.json keyed by content hash, and answers queries
+// by cosine similarity. Exposed to the model as the read-only `codebase_search` tool.
+//
+// ponytail: brute-force cosine over a JSON cache — fine to ~50k chunks; an ANN index and a binary
+// vector format are the upgrade path if repos outgrow it.
+import { createHash } from "node:crypto";
+import { existsSync, mkdirSync, readFileSync, readdirSync, statSync, writeFileSync } from "node:fs";
+import { join, relative, resolve } from "node:path";
+const EMBED_MODEL = process.env.ADA_EMBED_MODEL ?? "nomic-embed-text";
+const BACKEND = process.env.ADA_BACKEND_URL ?? "http://localhost:8787/v1";
+const SKIP = new Set(["node_modules", ".git", "dist", ".ada", ".next", "build", "coverage"]);
+const TEXT_EXT = /\.(ts|tsx|js|jsx|mjs|cjs|py|go|rs|java|kt|rb|php|cs|c|h|cpp|hpp|md|txt|json|yaml|yml|toml|css|scss|html|sql|sh|svelte|vue)$/i;
+const CHUNK_LINES = 80;
+const MAX_FILE_BYTES = 200_000;
+export interface Chunk {
+  start: number; // 1-based first line
+  end: number;
+  text: string;
+}
+interface IndexedFile {
+  hash: string;
+  chunks: Array<{ start: number; end: number; vec: number[] }>;
+}
+interface Index {
+  model: string;
+  files: Record<string, IndexedFile>;
+}
+/** Split file text into fixed-size line windows, char-capped so minified/long-line files can't
+ *  blow the embedding model's context window. */
+export function chunkText(text: string, lines = CHUNK_LINES): Chunk[] {
+  const all = text.split("\n");
+  const out: Chunk[] = [];
+  for (let i = 0; i < all.length; i += lines) {
+    const slice = all.slice(i, i + lines).join("\n");
+    if (slice.trim()) out.push({ start: i + 1, end: Math.min(i + lines, all.length), text: slice.slice(0, 6000) });
+  }
+  return out;
+}
+export function cosine(a: number[], b: number[]): number {
+  let dot = 0;
+  let na = 0;
+  let nb = 0;
+  for (let i = 0; i < a.length; i++) {
+    dot += a[i]! * b[i]!;
+    na += a[i]! * a[i]!;
+    nb += b[i]! * b[i]!;
+  }
+  const d = Math.sqrt(na) * Math.sqrt(nb);
+  return d ? dot / d : 0;
+}
+function sha1(s: string): string {
+  return createHash("sha1").update(s).digest("hex");
+}
+/** Indexable text files under root (relative paths), matching the tool suite's skip list. */
+export function walkFiles(root: string, dir = root, out: string[] = []): string[] {
+  let entries;
+  try {
+    entries = readdirSync(dir, { withFileTypes: true });
+  } catch {
+    return out;
+  }
+  for (const e of entries) {
+    if (e.name.startsWith(".") && e.name !== ".github") continue;
+    if (SKIP.has(e.name)) continue;
+    const p = join(dir, e.name);
+    if (e.isDirectory()) walkFiles(root, p, out);
+    else if (TEXT_EXT.test(e.name)) {
+      try {
+        if (statSync(p).size <= MAX_FILE_BYTES) out.push(relative(root, p).replace(/\\/g, "/"));
+      } catch {
+        /* unreadable — skip */
+      }
+    }
+  }
+  return out;
+}
+async function embed(texts: string[], kind: "document" | "query" = "document"): Promise<number[][]> {
+  // nomic-embed models are trained asymmetric: prefixing queries/documents differently measurably
+  // improves retrieval (code stops losing to prose). Other models get the raw text.
+  const input = EMBED_MODEL.includes("nomic") ? texts.map((t) => `search_${kind}: ${t}`) : texts;
+  const res = await fetch(`${BACKEND}/embeddings`, {
+    method: "POST",
+    headers: { "content-type": "application/json", authorization: `Bearer ${process.env.ADA_CLIENT_KEY ?? "dev"}` },
+    body: JSON.stringify({ model: EMBED_MODEL, input }),
+    signal: AbortSignal.timeout(60_000),
+  });
+  if (!res.ok) throw new Error(`embeddings HTTP ${res.status}: ${(await res.text().catch(() => "")).slice(0, 200)} — is the backend up, and is "${EMBED_MODEL}" pulled in Ollama? (ollama pull nomic-embed-text, or set ADA_EMBED_MODEL)`);
+  const j = (await res.json()) as { data?: Array<{ index: number; embedding: number[] }> };
+  if (!j.data?.length) throw new Error("embeddings response had no data");
+  return [...j.data].sort((a, b) => a.index - b.index).map((d) => d.embedding);
+}
+function indexPath(root: string): string {
+  return resolve(root, ".ada", "index.json");
+}
+// Cache key includes an embedding-scheme tag: changing the model OR how text is prefixed makes old
+// vectors incomparable, and both must force a rebuild.
+const SCHEME = EMBED_MODEL.includes("nomic") ? `${EMBED_MODEL}#affix1` : EMBED_MODEL;
+function loadIndex(root: string): Index {
+  try {
+    const idx = JSON.parse(readFileSync(indexPath(root), "utf8")) as Index;
+    if (idx.model === SCHEME) return idx; // scheme changed → vectors incomparable, rebuild
+  } catch {
+    /* no cache yet */
+  }
+  return { model: SCHEME, files: {} };
+}
+function saveIndex(root: string, idx: Index): void {
+  try {
+    mkdirSync(resolve(root, ".ada"), { recursive: true });
+    writeFileSync(indexPath(root), JSON.stringify(idx));
+  } catch {
+    /* cache is best-effort */
+  }
+}
+/** Bring the index up to date (embed new/changed files, drop deleted ones). Returns chunk count. */
+export async function refreshIndex(root = process.cwd(), onProgress?: (msg: string) => void): Promise<number> {
+  const idx = loadIndex(root);
+  const files = walkFiles(root);
+  const live = new Set(files);
+  for (const known of Object.keys(idx.files)) if (!live.has(known)) delete idx.files[known];
+  const stale: Array<{ rel: string; hash: string; chunks: Chunk[] }> = [];
+  for (const rel of files) {
+    let text: string;
+    try {
+      text = readFileSync(resolve(root, rel), "utf8");
+    } catch {
+      continue;
+    }
+    const hash = sha1(text);
+    if (idx.files[rel]?.hash === hash) continue;
+    stale.push({ rel, hash, chunks: chunkText(text) });
+  }
+  let done = 0;
+  for (const f of stale) {
+    const vecs: number[][] = [];
+    for (let i = 0; i < f.chunks.length; i += 32) {
+      const batch = f.chunks.slice(i, i + 32);
+      vecs.push(...(await embed(batch.map((c) => c.text))));
+    }
+    idx.files[f.rel] = { hash: f.hash, chunks: f.chunks.map((c, i) => ({ start: c.start, end: c.end, vec: vecs[i]! })) };
+    done++;
+    if (onProgress && done % 20 === 0) onProgress(`indexed ${done}/${stale.length} changed files…`);
+  }
+  if (stale.length) saveIndex(root, idx);
+  return Object.values(idx.files).reduce((n, f) => n + f.chunks.length, 0);
+}
+export interface Hit {
+  file: string;
+  start: number;
+  end: number;
+  score: number;
+  snippet: string;
+}
+/** Top-k chunks most similar to the query. Refreshes the index first (incremental). */
+export async function searchCodebase(query: string, k = 6, root = process.cwd()): Promise<Hit[]> {
+  await refreshIndex(root);
+  const idx = loadIndex(root);
+  const [qvec] = await embed([query], "query");
+  const hits: Hit[] = [];
+  for (const [rel, f] of Object.entries(idx.files)) {
+    for (const c of f.chunks) {
+      hits.push({ file: rel, start: c.start, end: c.end, score: cosine(qvec!, c.vec), snippet: "" });
+    }
+  }
+  hits.sort((a, b) => b.score - a.score);
+  const top = hits.slice(0, k);
+  for (const h of top) {
+    try {
+      h.snippet = readFileSync(resolve(root, h.file), "utf8")
+        .split("\n")
+        .slice(h.start - 1, h.end)
+        .join("\n")
+        .slice(0, 1200);
+    } catch {
+      h.snippet = "(file changed since indexing)";
+    }
+  }
+  return top;
+}

package/src/client/tools.ts CHANGED Viewed

@@ -533,6 +533,31 @@ export const tools: Tool[] = [
       return { output: (matches.join("\n") || "(no matches)") + more };
     },
   },
+  {
+    name: "codebase_search",
+    description:
+      "Semantic (meaning-based) search over the codebase — finds code by what it DOES, not by exact strings. Use when grep's literal matching won't work (\"where do we handle auth?\", \"how are sessions persisted?\"). First call indexes the repo (needs an Ollama embedding model, e.g. nomic-embed-text); later calls are incremental.",
+    parameters: {
+      type: "object",
+      properties: {
+        query: { type: "string", description: "What you're looking for, in plain words." },
+        k: { type: "number", description: "How many results (default 6)." },
+      },
+      required: ["query"],
+      additionalProperties: false,
+    },
+    needsApproval: false,
+    async run(args) {
+      try {
+        const { searchCodebase } = await import("./embed-index.ts"); // lazy — only pay for it when used
+        const hits = await searchCodebase(String(args.query), Math.min(Number(args.k) || 6, 20));
+        if (!hits.length) return { output: "No indexed content matched. Is the repo empty, or all files skipped?" };
+        return { output: hits.map((h) => `${h.file}:${h.start}-${h.end}  (score ${h.score.toFixed(3)})\n${h.snippet}`).join("\n\n---\n\n") };
+      } catch (e) {
+        return { output: String(e instanceof Error ? e.message : e), isError: true };
+      }
+    },
+  },
   {
     name: "web_fetch",
     description: "Fetch an http(s) URL and return its content as readable text (HTML is stripped to text). Use to read docs, articles, changelogs, or JSON APIs.",

package/src/selfcheck.ts CHANGED Viewed

@@ -293,6 +293,27 @@ async function main(): Promise<void> {
     assert.equal(route("anything-else"), "openrouter", "unmatched → openrouter");
   }
+  // --- @codebase semantic search: pure parts (no network / no embedding model needed) ---
+  {
+    const { chunkText, cosine, walkFiles } = await import("./client/embed-index.ts");
+    const chunks = chunkText(Array.from({ length: 200 }, (_, i) => `line ${i + 1}`).join("\n"));
+    assert.equal(chunks.length, 3, "200 lines → 3 chunks of 80");
+    assert.equal(chunks[0]!.start, 1);
+    assert.equal(chunks[1]!.start, 81);
+    assert.equal(chunks[2]!.end, 200, "last chunk ends at the last line");
+    assert.equal(chunkText("   \n \n").length, 0, "whitespace-only text → no chunks");
+    assert.ok(chunkText(`x${"y".repeat(50_000)}`)[0]!.text.length <= 6000, "long-line chunks are char-capped");
+    assert.ok(Math.abs(cosine([1, 0], [1, 0]) - 1) < 1e-9, "cosine identical = 1");
+    assert.equal(cosine([1, 0], [0, 1]), 0, "cosine orthogonal = 0");
+    assert.equal(cosine([0, 0], [1, 1]), 0, "zero vector → 0, not NaN");
+    const walked = walkFiles(process.cwd());
+    assert.ok(walked.includes("src/selfcheck.ts"), "walkFiles finds source files");
+    assert.ok(!walked.some((f) => f.includes("node_modules")), "walkFiles skips node_modules");
+    // Offline: the tool must fail with a clear message, not hang or throw
+    const r = await toolByName.get("codebase_search")!.run({ query: "x" });
+    assert.ok(typeof r.output === "string", "codebase_search returns cleanly even when embeddings are unavailable");
+  }
   // --- `ada --version` prints the version and exits WITHOUT auto-starting a backend ---
   {
     const { spawnSync } = await import("node:child_process");

package/src/server/index.ts CHANGED Viewed

@@ -72,6 +72,25 @@ async function handleChat(req: IncomingMessage, res: ServerResponse): Promise<vo
   await adapterFor(provider).chat({ provider, model, body, res });
 }
+/** Embeddings for @codebase semantic search — forwarded to the ollama provider's
+ *  OpenAI-compatible endpoint (embedding models only live there for now). */
+async function handleEmbeddings(req: IncomingMessage, res: ServerResponse): Promise<void> {
+  const raw = await readBody(req);
+  try {
+    JSON.parse(raw);
+  } catch {
+    return json(res, 400, { error: { message: "invalid JSON body" } });
+  }
+  const upstream = await fetch(`${PROVIDERS.ollama.baseURL}/embeddings`, {
+    method: "POST",
+    headers: { "content-type": "application/json" },
+    body: raw,
+  });
+  const text = await upstream.text();
+  res.writeHead(upstream.status, { "content-type": "application/json" });
+  res.end(text);
+}
 const server = createServer(async (req, res) => {
   try {
     const url = new URL(req.url ?? "/", "http://localhost");
@@ -91,6 +110,10 @@ const server = createServer(async (req, res) => {
       if (!(await authorized(req))) return json(res, 401, { error: { message: "unauthorized — invalid client key or login" } });
       return await handleChat(req, res);
     }
+    if (req.method === "POST" && url.pathname === "/v1/embeddings") {
+      if (!(await authorized(req))) return json(res, 401, { error: { message: "unauthorized — invalid client key or login" } });
+      return await handleEmbeddings(req, res);
+    }
     return json(res, 404, { error: { message: "not found" } });
   } catch (err) {
     if (!res.headersSent) json(res, 500, { error: { message: err instanceof Error ? err.message : String(err) } });