npm - wasper-cli - Versions diffs - 0.2.0 → 0.3.0 - Mend

wasper-cli 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.js CHANGED Viewed

@@ -3158,6 +3158,14 @@ var SCHEMA = `
     name TEXT NOT NULL DEFAULT '',
     created_at INTEGER NOT NULL DEFAULT (unixepoch())
   );
+  CREATE TABLE IF NOT EXISTS chat_memory (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    role TEXT NOT NULL,
+    content TEXT NOT NULL,
+    created_at INTEGER NOT NULL DEFAULT (unixepoch())
+  );
+  CREATE INDEX IF NOT EXISTS idx_memory_created ON chat_memory(created_at DESC);
 `;
 // src/db/index.ts
@@ -3337,6 +3345,19 @@ var init_db = __esm(() => {
     getSetting: (key) => (db.query("SELECT value FROM settings WHERE key = ?").get(key) ?? null)?.value ?? null,
     setSetting: (key, value) => {
       db.run("INSERT INTO settings(key,value) VALUES(?,?) ON CONFLICT(key) DO UPDATE SET value=excluded.value", [key, value]);
+    },
+    saveMemory: (role, content) => {
+      db.query("INSERT INTO chat_memory (role, content) VALUES (?, ?)").run(role, content);
+    },
+    getMemory: (limit = 20) => {
+      const rows = db.query("SELECT role, content FROM chat_memory ORDER BY created_at DESC LIMIT ?").all(limit);
+      return rows.reverse();
+    },
+    clearMemory: () => {
+      db.query("DELETE FROM chat_memory").run();
+    },
+    trimMemory: (keepLast = 40) => {
+      db.query("DELETE FROM chat_memory WHERE id NOT IN (SELECT id FROM chat_memory ORDER BY created_at DESC LIMIT ?)").run(keepLast);
     }
   };
 });
@@ -3586,7 +3607,7 @@ var package_default;
 var init_package = __esm(() => {
   package_default = {
     name: "wasper-cli",
-    version: "0.2.0",
+    version: "0.3.0",
     description: "Host an MCP server + API proxy from any OpenAPI spec. Like Drizzle Studio, but for APIs.",
     type: "module",
     homepage: "https://wasper.site",
@@ -4578,6 +4599,517 @@ var init_engine2 = __esm(() => {
   init_state();
 });
+// src/agent/harness.ts
+function mergeSignals(a, b) {
+  if (!a && !b)
+    return new AbortController().signal;
+  if (!a)
+    return b;
+  if (!b)
+    return a;
+  const ctrl = new AbortController;
+  const abort = () => ctrl.abort();
+  a.addEventListener("abort", abort, { once: true });
+  b.addEventListener("abort", abort, { once: true });
+  return ctrl.signal;
+}
+async function fetchWithRetry(url, opts, emit, signal, maxRetries = 4) {
+  for (let attempt = 0;attempt <= maxRetries; attempt++) {
+    const stepSignal = signal;
+    let res;
+    try {
+      res = await fetch(url, { ...opts, signal: stepSignal });
+    } catch (e) {
+      const msg = e instanceof Error ? e.message : String(e);
+      if (signal?.aborted)
+        throw e;
+      if (attempt === maxRetries)
+        throw e;
+      const isNetwork = msg.includes("ECONNREFUSED") || msg.includes("ENOTFOUND") || msg.includes("network") || msg.includes("fetch");
+      if (!isNetwork)
+        throw e;
+      const delay2 = Math.min(1000 * Math.pow(2, attempt) + Math.random() * 300, 15000);
+      emit({ type: "info", message: `Network error, retrying in ${Math.round(delay2 / 1000)}s\u2026` });
+      await new Promise((r) => setTimeout(r, delay2));
+      continue;
+    }
+    if (!RETRYABLE_STATUS.has(res.status) || attempt === maxRetries)
+      return res;
+    const retryAfter = parseInt(res.headers.get("retry-after") ?? "0", 10);
+    const delay = retryAfter > 0 ? retryAfter * 1000 : Math.min(1000 * Math.pow(2, attempt) + Math.random() * 300, 30000);
+    const label = res.status === 429 ? "Rate limited" : `Server error ${res.status}`;
+    emit({ type: "info", message: `${label} \u2014 retrying in ${Math.round(delay / 1000)}s\u2026 (attempt ${attempt + 1}/${maxRetries})` });
+    await new Promise((r) => setTimeout(r, delay));
+  }
+  return fetch(url, opts);
+}
+function trimContext(messages) {
+  if (JSON.stringify(messages).length <= MAX_CONTEXT_CHARS)
+    return { messages, trimmed: false };
+  const result = [...messages];
+  while (JSON.stringify(result).length > MAX_CONTEXT_CHARS && result.length > 2) {
+    let removed = false;
+    const toolIdx = result.findIndex((m) => m.role === "tool");
+    if (toolIdx !== -1) {
+      result.splice(toolIdx, 1);
+      if (toolIdx > 0) {
+        const prev = result[toolIdx - 1];
+        if (prev?.role === "assistant") {
+          const tc = prev.tool_calls;
+          if (Array.isArray(tc) && tc.length)
+            result.splice(toolIdx - 1, 1);
+        }
+      }
+      removed = true;
+    }
+    if (!removed) {
+      const anthropicIdx = result.findIndex((m) => {
+        if (m.role !== "user")
+          return false;
+        const c = m.content;
+        return Array.isArray(c) && c.some((b) => b.type === "tool_result");
+      });
+      if (anthropicIdx !== -1) {
+        result.splice(anthropicIdx, 1);
+        if (anthropicIdx > 0 && result[anthropicIdx - 1]?.role === "assistant") {
+          result.splice(anthropicIdx - 1, 1);
+        }
+        removed = true;
+      }
+    }
+    if (!removed)
+      break;
+  }
+  return { messages: result, trimmed: true };
+}
+async function* readSSE(body) {
+  const reader = body.getReader();
+  const decoder = new TextDecoder;
+  let buf = "";
+  try {
+    while (true) {
+      const { done, value } = await reader.read();
+      if (done)
+        break;
+      buf += decoder.decode(value, { stream: true });
+      const parts = buf.split(`
+`);
+      buf = parts.pop() ?? "";
+      for (const part of parts) {
+        let data = "";
+        for (const line of part.split(`
+`)) {
+          if (line.startsWith("data: ")) {
+            data = line.slice(6);
+            break;
+          }
+        }
+        if (!data || data === "[DONE]")
+          continue;
+        try {
+          yield JSON.parse(data);
+        } catch {}
+      }
+    }
+  } finally {
+    reader.releaseLock();
+  }
+}
+function buildAnthropicTools(schemas) {
+  return schemas.map((s) => ({
+    name: s.name,
+    description: s.description,
+    input_schema: { type: "object", properties: s.params, required: s.required }
+  }));
+}
+async function streamAnthropic(cfg, system, messages, tools, emit, signal) {
+  const base = (cfg.baseUrl || "https://api.anthropic.com").replace(/\/$/, "");
+  const systemContent = cfg.enablePromptCache ? [{ type: "text", text: system, cache_control: { type: "ephemeral" } }] : system;
+  const stepSignal = mergeSignals(signal, AbortSignal.timeout(cfg.stepTimeoutMs));
+  const res = await fetchWithRetry(`${base}/v1/messages`, {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+      "x-api-key": cfg.apiKey,
+      "anthropic-version": "2023-06-01",
+      ...cfg.enablePromptCache ? { "anthropic-beta": "prompt-caching-1-0" } : {},
+      ...cfg.extraHeaders
+    },
+    body: JSON.stringify({
+      model: cfg.model,
+      max_tokens: cfg.maxTokens,
+      temperature: cfg.temperature,
+      ...cfg.topK > 0 ? { top_k: cfg.topK } : {},
+      system: systemContent,
+      messages,
+      tools,
+      stream: true
+    })
+  }, emit, stepSignal);
+  if (!res.ok) {
+    const body = await res.text();
+    const retryable = RETRYABLE_STATUS.has(res.status);
+    throw Object.assign(new Error(`Anthropic ${res.status}: ${body}`), { retryable });
+  }
+  const result = { text: "", thinking: "", stopReason: "", toolUses: [], usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 } };
+  const blocks = [];
+  const inputAccum = {};
+  for await (const ev of readSSE(res.body)) {
+    if (signal.aborted)
+      break;
+    const evType = ev.type;
+    if (evType === "message_start") {
+      const usage = ev.message?.usage;
+      if (usage) {
+        result.usage.input = usage.input_tokens ?? 0;
+        result.usage.cacheRead = usage.cache_read_input_tokens ?? 0;
+        result.usage.cacheWrite = usage.cache_creation_input_tokens ?? 0;
+      }
+    } else if (evType === "content_block_start") {
+      const idx = ev.index;
+      const cb = ev.content_block;
+      blocks[idx] = { type: cb.type, id: cb.id, name: cb.name };
+      if (cb.type === "tool_use")
+        inputAccum[idx] = "";
+    } else if (evType === "content_block_delta") {
+      const idx = ev.index;
+      const delta = ev.delta;
+      if (delta.type === "text_delta" && delta.text) {
+        result.text += delta.text;
+        if (!blocks[idx])
+          blocks[idx] = { type: "text" };
+        blocks[idx].text = (blocks[idx].text ?? "") + delta.text;
+        emit({ type: "text_delta", text: delta.text });
+      } else if (delta.type === "thinking_delta" && delta.thinking) {
+        result.thinking += delta.thinking;
+        emit({ type: "thinking", text: delta.thinking });
+      } else if (delta.type === "input_json_delta" && delta.partial_json) {
+        inputAccum[idx] = (inputAccum[idx] ?? "") + delta.partial_json;
+      }
+    } else if (evType === "content_block_stop") {
+      const idx = ev.index;
+      if (blocks[idx]?.type === "tool_use") {
+        try {
+          blocks[idx].input = JSON.parse(inputAccum[idx] ?? "{}");
+        } catch {
+          blocks[idx].input = {};
+        }
+      }
+    } else if (evType === "message_delta") {
+      const delta = ev.delta;
+      const usage = ev.usage;
+      if (delta.stop_reason)
+        result.stopReason = delta.stop_reason;
+      if (usage?.output_tokens)
+        result.usage.output = usage.output_tokens;
+    }
+  }
+  for (const b of blocks) {
+    if (b.type === "tool_use" && b.id && b.name) {
+      result.toolUses.push({ id: b.id, name: b.name, input: b.input ?? {} });
+    }
+  }
+  result._anthropicBlocks = blocks;
+  return result;
+}
+function buildOpenAITools(schemas) {
+  return schemas.map((s) => ({
+    type: "function",
+    function: { name: s.name, description: s.description, parameters: { type: "object", properties: s.params, required: s.required } }
+  }));
+}
+async function streamOpenAI(cfg, system, messages, tools, emit, signal) {
+  const providerBases = {
+    openai: "https://api.openai.com",
+    mistral: "https://api.mistral.ai",
+    groq: "https://api.groq.com/openai",
+    "github-copilot": "https://api.githubcopilot.com"
+  };
+  const base = (cfg.baseUrl || providerBases[cfg.provider] || "https://api.openai.com").replace(/\/$/, "");
+  const providerHeaders = cfg.provider === "github-copilot" ? { "Copilot-Integration-Id": "vscode-chat", "Editor-Version": "vscode/1.85.0" } : {};
+  const authHeaders = cfg.apiKey ? { Authorization: `Bearer ${cfg.apiKey}` } : {};
+  const stepSignal = mergeSignals(signal, AbortSignal.timeout(cfg.stepTimeoutMs));
+  const res = await fetchWithRetry(`${base}/v1/chat/completions`, {
+    method: "POST",
+    headers: { "Content-Type": "application/json", ...authHeaders, ...providerHeaders, ...cfg.extraHeaders },
+    body: JSON.stringify({
+      model: cfg.model,
+      max_tokens: cfg.maxTokens,
+      temperature: cfg.temperature,
+      messages: [{ role: "system", content: system }, ...messages],
+      tools,
+      tool_choice: "auto",
+      stream: true,
+      stream_options: { include_usage: true }
+    })
+  }, emit, stepSignal);
+  if (!res.ok) {
+    const body = await res.text();
+    const retryable = RETRYABLE_STATUS.has(res.status);
+    throw Object.assign(new Error(body), { retryable });
+  }
+  const result = { text: "", thinking: "", stopReason: "", toolUses: [], usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 } };
+  const tcAccum = {};
+  for await (const ev of readSSE(res.body)) {
+    if (signal.aborted)
+      break;
+    if (ev.object === "error") {
+      const retryable = ev.code === "1300" || ev.raw_status_code === 429 || ev.raw_status_code === 503;
+      throw Object.assign(new Error(JSON.stringify(ev)), { retryable });
+    }
+    if (ev.usage) {
+      const u = ev.usage;
+      result.usage.input = u.prompt_tokens ?? 0;
+      result.usage.output = u.completion_tokens ?? 0;
+    }
+    const choices = ev.choices;
+    const choice = choices?.[0];
+    if (!choice)
+      continue;
+    const fr = choice.finish_reason;
+    if (fr)
+      result.stopReason = fr;
+    const delta = choice.delta;
+    if (!delta)
+      continue;
+    if (typeof delta.content === "string" && delta.content) {
+      result.text += delta.content;
+      emit({ type: "text_delta", text: delta.content });
+    }
+    const tcDeltas = delta.tool_calls;
+    if (tcDeltas) {
+      for (const tc of tcDeltas) {
+        if (!tcAccum[tc.index])
+          tcAccum[tc.index] = { id: "", name: "", args: "" };
+        const e = tcAccum[tc.index];
+        if (tc.id)
+          e.id += tc.id;
+        if (tc.function?.name)
+          e.name += tc.function.name;
+        if (tc.function?.arguments)
+          e.args += tc.function.arguments;
+      }
+    }
+  }
+  for (const tc of Object.values(tcAccum)) {
+    let input = {};
+    try {
+      input = JSON.parse(tc.args);
+    } catch {}
+    result.toolUses.push({ id: tc.id, name: tc.name, input });
+  }
+  result._openaiTcAccum = tcAccum;
+  return result;
+}
+async function callOllama(cfg, system, messages, emit, signal) {
+  const base = (cfg.baseUrl || "http://localhost:11434").replace(/\/$/, "");
+  const stepSignal = mergeSignals(signal, AbortSignal.timeout(cfg.stepTimeoutMs));
+  const res = await fetchWithRetry(`${base}/api/chat`, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify({ model: cfg.model, messages: [{ role: "system", content: system }, ...messages], stream: false })
+  }, emit, stepSignal);
+  if (!res.ok)
+    throw new Error(`Ollama ${res.status}: ${await res.text()}`);
+  const d = await res.json();
+  const text = d.message?.content ?? "";
+  emit({ type: "text_delta", text });
+  return { text, thinking: "", stopReason: "end_turn", toolUses: [], usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 } };
+}
+async function callGemini(cfg, system, messages, emit, signal) {
+  const base = (cfg.baseUrl || "https://generativelanguage.googleapis.com").replace(/\/$/, "");
+  const stepSignal = mergeSignals(signal, AbortSignal.timeout(cfg.stepTimeoutMs));
+  const res = await fetchWithRetry(`${base}/v1beta/models/${cfg.model}:generateContent?key=${cfg.apiKey}`, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify({
+      systemInstruction: { parts: [{ text: system }] },
+      contents: messages.map((m) => ({
+        role: m.role === "assistant" ? "model" : "user",
+        parts: [{ text: m.content }]
+      })),
+      generationConfig: { maxOutputTokens: cfg.maxTokens }
+    })
+  }, emit, stepSignal);
+  if (!res.ok)
+    throw new Error(`Gemini ${res.status}: ${await res.text()}`);
+  const d = await res.json();
+  const text = d.candidates?.[0]?.content?.parts?.[0]?.text ?? "";
+  emit({ type: "text_delta", text });
+  return { text, thinking: "", stopReason: "end_turn", toolUses: [], usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 } };
+}
+async function runAgentLoop(config2, system, initialMessages, toolSchemas, executeTool, emit, signal = new AbortController().signal, toolCache = new Map) {
+  const cfg = { ...DEFAULTS, ...config2 };
+  const isAnthropic = cfg.provider === "anthropic";
+  const isOllama = cfg.provider === "ollama";
+  const isGemini = cfg.provider === "gemini";
+  const anthropicTools = buildAnthropicTools(toolSchemas);
+  const openaiTools = buildOpenAITools(toolSchemas);
+  const messages = [...initialMessages];
+  const allToolCalls = [];
+  const totalTokens = { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 };
+  let totalToolsUsed = 0;
+  let consecutiveErrors = 0;
+  const endpointErrors = {};
+  for (let iter = 0;iter < cfg.maxIterations; iter++) {
+    if (signal.aborted) {
+      return { content: "", toolCalls: allToolCalls, stopReason: "cancelled", tokens: totalTokens };
+    }
+    const { messages: trimmed, trimmed: didTrim } = trimContext(messages);
+    if (didTrim) {
+      emit({ type: "info", message: "Context trimmed to fit within limits." });
+      messages.splice(0, messages.length, ...trimmed);
+    }
+    let turn;
+    try {
+      if (isAnthropic) {
+        turn = await streamAnthropic(cfg, system, messages, anthropicTools, emit, signal);
+      } else if (isOllama) {
+        turn = await callOllama(cfg, system, messages, emit, signal);
+      } else if (isGemini) {
+        turn = await callGemini(cfg, system, messages, emit, signal);
+      } else {
+        turn = await streamOpenAI(cfg, system, messages, openaiTools, emit, signal);
+      }
+    } catch (e) {
+      if (signal.aborted) {
+        return { content: "", toolCalls: allToolCalls, stopReason: "cancelled", tokens: totalTokens };
+      }
+      const msg = e instanceof Error ? e.message : String(e);
+      const retryable = e.retryable ?? false;
+      emit({ type: "error", message: msg, retryable });
+      throw e;
+    }
+    totalTokens.input += turn.usage.input;
+    totalTokens.output += turn.usage.output;
+    totalTokens.cacheRead += turn.usage.cacheRead;
+    totalTokens.cacheWrite += turn.usage.cacheWrite;
+    if (turn.usage.input || turn.usage.output) {
+      emit({ type: "token_usage", ...turn.usage });
+    }
+    const wantsTools = isAnthropic ? turn.stopReason === "tool_use" : turn.stopReason === "tool_calls";
+    if (!wantsTools || turn.toolUses.length === 0) {
+      return { content: turn.text, toolCalls: allToolCalls, stopReason: "end_turn", tokens: totalTokens };
+    }
+    if (totalToolsUsed >= cfg.maxTotalTools) {
+      return {
+        content: `Agent stopped: reached ${cfg.maxTotalTools} tool calls. Break your request into smaller steps.`,
+        toolCalls: allToolCalls,
+        stopReason: "max_tools",
+        tokens: totalTokens
+      };
+    }
+    const dedupeKey = (name, input) => `${name}:${JSON.stringify(input)}`;
+    const turnResults = [];
+    const executeOne = async (use) => {
+      totalToolsUsed++;
+      const key = dedupeKey(use.name, use.input);
+      const cachedResult = toolCache.get(key);
+      const isCached = !!cachedResult;
+      emit({ type: "tool_start", id: use.id, tool: use.name, input: use.input, cached: isCached });
+      let result;
+      let ms = 0;
+      if (isCached) {
+        result = cachedResult;
+      } else {
+        const t0 = Date.now();
+        result = await executeTool(use.name, use.input);
+        ms = Date.now() - t0;
+        if (!result.isError && use.name !== "execute_api_request" && use.name !== "fetch_url") {
+          toolCache.set(key, result);
+        }
+      }
+      emit({ type: "tool_done", id: use.id, tool: use.name, output: result.text, isError: result.isError, ms, cached: isCached });
+      return { id: use.id, name: use.name, text: result.text, isError: result.isError, ms, cached: isCached };
+    };
+    const toolUses = turn.toolUses;
+    if (cfg.parallelTools && toolUses.length > 1) {
+      const pure = toolUses.filter((u) => u.name !== "execute_api_request" && u.name !== "fetch_url");
+      const sideEffect = toolUses.filter((u) => u.name === "execute_api_request" || u.name === "fetch_url");
+      const pureResults = await Promise.all(pure.map((u) => executeOne(u)));
+      const sideEffectResults = [];
+      for (const u of sideEffect)
+        sideEffectResults.push(await executeOne(u));
+      const resultMap = new Map([...pureResults, ...sideEffectResults].map((r) => [r.id, r]));
+      for (const u of toolUses) {
+        const r = resultMap.get(u.id);
+        if (r)
+          turnResults.push(r);
+      }
+    } else {
+      for (const u of toolUses)
+        turnResults.push(await executeOne(u));
+    }
+    for (const r of turnResults) {
+      allToolCalls.push({ id: r.id, tool: r.name, input: turn.toolUses.find((u) => u.id === r.id)?.input ?? {}, output: r.text, isError: r.isError, ms: r.ms, cached: r.cached });
+      if (r.isError) {
+        consecutiveErrors++;
+        if (r.name === "execute_api_request") {
+          const eid = String(turn.toolUses.find((u) => u.id === r.id)?.input?.operationId ?? r.id);
+          endpointErrors[eid] = (endpointErrors[eid] ?? 0) + 1;
+          if (endpointErrors[eid] >= cfg.maxEndpointErrors) {
+            return {
+              content: `Endpoint "${eid}" failed ${cfg.maxEndpointErrors} times. Last error: ${r.text}`,
+              toolCalls: allToolCalls,
+              stopReason: "max_endpoint_errors",
+              tokens: totalTokens
+            };
+          }
+        }
+        if (consecutiveErrors >= cfg.maxConsecutiveErrors) {
+          return {
+            content: `Stopped after ${cfg.maxConsecutiveErrors} consecutive errors. Last: ${r.text}`,
+            toolCalls: allToolCalls,
+            stopReason: "max_errors",
+            tokens: totalTokens
+          };
+        }
+      } else {
+        consecutiveErrors = 0;
+      }
+    }
+    if (isAnthropic) {
+      const blocks = turn._anthropicBlocks ?? [];
+      messages.push({ role: "assistant", content: blocks });
+      messages.push({
+        role: "user",
+        content: turnResults.map((r) => ({ type: "tool_result", tool_use_id: r.id, content: r.text }))
+      });
+    } else {
+      const tc = turn._openaiTcAccum ?? {};
+      messages.push({
+        role: "assistant",
+        content: turn.text || null,
+        tool_calls: Object.values(tc).map((t) => ({ id: t.id, type: "function", function: { name: t.name, arguments: t.args } }))
+      });
+      for (const r of turnResults) {
+        messages.push({ role: "tool", tool_call_id: r.id, content: r.text });
+      }
+    }
+  }
+  return { content: "(max iterations reached)", toolCalls: allToolCalls, stopReason: "max_iterations", tokens: totalTokens };
+}
+var RETRYABLE_STATUS, MAX_CONTEXT_CHARS = 300000, DEFAULTS;
+var init_harness = __esm(() => {
+  RETRYABLE_STATUS = new Set([429, 500, 502, 503, 504]);
+  DEFAULTS = {
+    apiKey: "",
+    baseUrl: "",
+    extraHeaders: {},
+    maxTokens: 4096,
+    temperature: 1,
+    topK: 0,
+    maxIterations: 40,
+    maxTotalTools: 40,
+    maxConsecutiveErrors: 5,
+    maxEndpointErrors: 3,
+    stepTimeoutMs: 60000,
+    parallelTools: true,
+    enablePromptCache: true
+  };
+});
 // src/api/routes.ts
 import dns from "dns/promises";
 function json(data, status = 200) {
@@ -4641,6 +5173,12 @@ async function apiRouter(req) {
     return handleDeleteRule(path);
   if (path === "/api/ai/chat" && method === "POST")
     return handleAiChat(req);
+  if (path === "/api/ai/memory" && method === "GET")
+    return json({ memory: dbQueries.getMemory(40) });
+  if (path === "/api/ai/memory" && method === "DELETE") {
+    dbQueries.clearMemory();
+    return json({ success: true });
+  }
   if (path === "/api/debug/dns" && method === "GET")
     return handleDnsQuery(searchParams);
   if (path === "/api/debug/ping" && method === "GET")
@@ -4866,39 +5404,54 @@ async function handleSetSettings(req) {
   dbQueries.setSettings(body);
   return json(body);
 }
-async function executeTool(name, args) {
+async function executeTool(name, args, cache = new Map) {
   const { operations, spec } = getState();
   if (name === "search_endpoints") {
+    const cacheKey2 = `search:${String(args.query ?? "").toLowerCase()}`;
+    const hit = cache.get(cacheKey2);
+    if (hit)
+      return hit;
     const q = String(args.query ?? "").toLowerCase();
     const terms = q.split(/\s+/).filter(Boolean);
     const matches = operations.filter((op) => {
       const hay = [op.operationId, op.path, op.method, ...op.tags ?? [], op.summary ?? "", op.description ?? ""].join(" ").toLowerCase();
       return terms.every((t) => hay.includes(t));
     }).slice(0, 30).map((op) => ({ operationId: op.operationId, method: op.method.toUpperCase(), path: op.path, summary: op.summary ?? null, tags: op.tags }));
-    if (!matches.length)
-      return { text: `No endpoints found matching "${args.query}". Total: ${operations.length}.`, isError: false };
-    return { text: JSON.stringify({ count: matches.length, total: operations.length, endpoints: matches }, null, 2), isError: false };
+    const text = !matches.length ? `No endpoints found matching "${args.query}". Total: ${operations.length}.` : JSON.stringify({ count: matches.length, total: operations.length, endpoints: matches }, null, 2);
+    const result = { text, isError: false };
+    cache.set(cacheKey2, result);
+    return result;
   }
   if (name === "get_endpoint_schema") {
+    const cacheKey2 = `schema:${String(args.operationId ?? "")}`;
+    const hit = cache.get(cacheKey2);
+    if (hit)
+      return hit;
     const op = operations.find((o) => o.operationId === args.operationId);
     if (!op)
       return { text: `Endpoint not found: "${args.operationId}"`, isError: true };
-    return {
-      text: JSON.stringify({
-        operationId: op.operationId,
-        method: op.method.toUpperCase(),
-        path: op.path,
-        summary: op.summary ?? null,
-        description: op.description ?? null,
-        tags: op.tags,
-        parameters: op.parameters,
-        requestBody: op.requestBody ?? null,
-        responses: op.responses
-      }, null, 2),
-      isError: false
-    };
+    const text = JSON.stringify({
+      operationId: op.operationId,
+      method: op.method.toUpperCase(),
+      path: op.path,
+      summary: op.summary ?? null,
+      description: op.description ?? null,
+      tags: op.tags,
+      parameters: op.parameters,
+      requestBody: op.requestBody ?? null,
+      responses: op.responses
+    }, null, 2);
+    const result = { text, isError: false };
+    cache.set(cacheKey2, result);
+    return result;
   }
   if (name === "execute_api_request") {
+    const now = Date.now();
+    const gap = now - _lastApiCallMs;
+    if (gap < MIN_API_CALL_INTERVAL_MS) {
+      await new Promise((r) => setTimeout(r, MIN_API_CALL_INTERVAL_MS - gap));
+    }
+    _lastApiCallMs = Date.now();
     const op = operations.find((o) => o.operationId === args.operationId);
     if (!op)
       return { text: `Endpoint not found: "${args.operationId}"`, isError: true };
@@ -4929,20 +5482,81 @@ async function executeTool(name, args) {
     const bodyStr = reqBody !== undefined ? typeof reqBody === "string" ? reqBody : JSON.stringify(reqBody) : null;
     if (bodyStr !== null && op.requestBody?.contentType)
       authedHeaders["Content-Type"] = op.requestBody.contentType;
+    const logId = randomUUID2();
     try {
       const start = Date.now();
       const res = await fetch(authedUrl, { method: op.method.toUpperCase(), headers: authedHeaders, body: bodyStr ?? undefined });
-      const text = await res.text();
+      const responseText = await res.text();
       const latency = Date.now() - start;
-      let pretty = text;
+      const resHeaders = Object.fromEntries(res.headers.entries());
+      dbQueries.insertLog({
+        id: logId,
+        source: "ai",
+        tool_name: String(args.operationId ?? op.operationId),
+        method: op.method.toUpperCase(),
+        url: authedUrl,
+        request_headers: JSON.stringify(authedHeaders),
+        request_body: bodyStr,
+        status_code: res.status,
+        response_headers: JSON.stringify(resHeaders),
+        response_body: responseText.slice(0, 8192),
+        latency_ms: latency,
+        error: null
+      });
+      logBus.emit({
+        id: logId,
+        source: "ai",
+        tool_name: String(args.operationId ?? op.operationId),
+        method: op.method.toUpperCase(),
+        url: authedUrl,
+        request_headers: JSON.stringify(authedHeaders),
+        request_body: bodyStr,
+        status_code: res.status,
+        response_headers: JSON.stringify(resHeaders),
+        response_body: responseText.slice(0, 2048),
+        latency_ms: latency,
+        error: null,
+        created_at: Date.now()
+      });
+      let pretty = responseText;
       try {
-        pretty = JSON.stringify(JSON.parse(text), null, 2);
+        pretty = JSON.stringify(JSON.parse(responseText), null, 2);
       } catch {}
       return { text: `HTTP ${res.status} (${latency}ms)
 ${pretty}`, isError: !res.ok };
     } catch (e) {
-      return { text: `Network error: ${e instanceof Error ? e.message : String(e)}`, isError: true };
+      const errMsg = e instanceof Error ? e.message : String(e);
+      dbQueries.insertLog({
+        id: logId,
+        source: "ai",
+        tool_name: String(args.operationId ?? op.operationId),
+        method: op.method.toUpperCase(),
+        url: authedUrl,
+        request_headers: JSON.stringify(authedHeaders),
+        request_body: bodyStr,
+        status_code: null,
+        response_headers: null,
+        response_body: null,
+        latency_ms: null,
+        error: errMsg
+      });
+      logBus.emit({
+        id: logId,
+        source: "ai",
+        tool_name: String(args.operationId ?? op.operationId),
+        method: op.method.toUpperCase(),
+        url: authedUrl,
+        request_headers: null,
+        request_body: bodyStr,
+        status_code: null,
+        response_headers: null,
+        response_body: null,
+        latency_ms: null,
+        error: errMsg,
+        created_at: Date.now()
+      });
+      return { text: `Network error: ${errMsg}`, isError: true };
     }
   }
   if (name === "fetch_url") {
@@ -5092,10 +5706,25 @@ ${stripped}`, isError: !res.ok };
   }
   if (name === "save_auth_token") {
     const profileName = String(args.name ?? "AI Login").trim();
+    const tokenType = String(args.token_type ?? "bearer");
+    if (tokenType === "basic" || args.username && args.password) {
+      const username = String(args.username ?? "").trim();
+      const password = String(args.password ?? "").trim();
+      if (!username || !password)
+        return { text: "Error: username and password are required for basic auth", isError: true };
+      const authConfig2 = { type: "basic", username, password };
+      const profileId2 = randomUUID2();
+      try {
+        dbQueries.insertProfile({ id: profileId2, name: profileName, description: "Saved by AI", type: "basic", config: JSON.stringify(authConfig2), token_cache: null, is_active: 0 });
+        dbQueries.activateProfile(profileId2);
+        return { text: JSON.stringify({ success: true, message: `Saved and activated basic auth profile "${profileName}"`, id: profileId2 }), isError: false };
+      } catch (e) {
+        return { text: `Error saving profile: ${e instanceof Error ? e.message : String(e)}`, isError: true };
+      }
+    }
     const token = String(args.token ?? "").trim();
     if (!token)
-      return { text: "Error: token is required", isError: true };
-    const tokenType = String(args.token_type ?? "bearer");
+      return { text: "Error: token is required for bearer/apikey auth", isError: true };
     const headerName = String(args.header_name ?? "X-Api-Key");
     let authConfig;
     let type;
@@ -5120,274 +5749,6 @@ ${stripped}`, isError: !res.ok };
   }
   return { text: `Unknown tool: ${name}`, isError: true };
 }
-async function fetchWithRetry(url, opts, emit, maxRetries = 3) {
-  for (let attempt = 0;attempt <= maxRetries; attempt++) {
-    const res = await fetch(url, opts);
-    if (res.status !== 429 || attempt === maxRetries)
-      return res;
-    const retryAfter = parseInt(res.headers.get("retry-after") ?? "0", 10);
-    const delay = retryAfter > 0 ? retryAfter * 1000 : Math.min(1000 * Math.pow(2, attempt) + Math.random() * 500, 30000);
-    emit({ type: "info", message: `Rate limited \u2014 retrying in ${Math.round(delay / 1000)}s\u2026 (attempt ${attempt + 1}/${maxRetries})` });
-    await new Promise((r) => setTimeout(r, delay));
-  }
-  return fetch(url, opts);
-}
-async function anthropicAgentLoop(apiKey, model, system, initialMessages, emit) {
-  const msgs = [...initialMessages];
-  const toolCalls = [];
-  let totalTools = 0;
-  let consecutiveErrors = 0;
-  const endpointErrors = {};
-  for (let iter = 0;iter < 40; iter++) {
-    const res = await fetchWithRetry("https://api.anthropic.com/v1/messages", {
-      method: "POST",
-      headers: { "Content-Type": "application/json", "x-api-key": apiKey, "anthropic-version": "2023-06-01" },
-      body: JSON.stringify({ model, max_tokens: 4096, system, messages: msgs, tools: ANTHROPIC_TOOLS, stream: true })
-    }, emit);
-    if (!res.ok)
-      throw new Error(`Anthropic error: ${await res.text()}`);
-    let fullText = "";
-    let stopReason = "";
-    const contentBlocks = [];
-    const inputAccum = {};
-    const reader = res.body.getReader();
-    const decoder = new TextDecoder;
-    let buf = "";
-    while (true) {
-      const { done, value } = await reader.read();
-      if (done)
-        break;
-      buf += decoder.decode(value, { stream: true });
-      const parts = buf.split(`
-`);
-      buf = parts.pop() ?? "";
-      for (const part of parts) {
-        let dataLine = "";
-        for (const line of part.split(`
-`)) {
-          if (line.startsWith("data: ")) {
-            dataLine = line.slice(6);
-            break;
-          }
-        }
-        if (!dataLine || dataLine === "[DONE]")
-          continue;
-        let ev;
-        try {
-          ev = JSON.parse(dataLine);
-        } catch {
-          continue;
-        }
-        const type = ev.type;
-        if (type === "content_block_start") {
-          const idx = ev.index;
-          const cb = ev.content_block;
-          contentBlocks[idx] = { type: cb.type, id: cb.id, name: cb.name };
-          if (cb.type === "tool_use")
-            inputAccum[idx] = "";
-        } else if (type === "content_block_delta") {
-          const idx = ev.index;
-          const delta = ev.delta;
-          if (delta.type === "text_delta" && delta.text) {
-            fullText += delta.text;
-            if (!contentBlocks[idx])
-              contentBlocks[idx] = { type: "text", text: "" };
-            contentBlocks[idx].text = (contentBlocks[idx].text ?? "") + delta.text;
-            emit({ type: "text_delta", text: delta.text });
-          } else if (delta.type === "input_json_delta" && delta.partial_json) {
-            inputAccum[idx] = (inputAccum[idx] ?? "") + delta.partial_json;
-          }
-        } else if (type === "content_block_stop") {
-          const idx = ev.index;
-          if (contentBlocks[idx]?.type === "tool_use") {
-            try {
-              contentBlocks[idx].input = JSON.parse(inputAccum[idx] ?? "{}");
-            } catch {
-              contentBlocks[idx].input = {};
-            }
-          }
-        } else if (type === "message_delta") {
-          const delta = ev.delta;
-          if (delta.stop_reason)
-            stopReason = delta.stop_reason;
-        }
-      }
-    }
-    if (stopReason !== "tool_use")
-      return { content: fullText, toolCalls };
-    if (totalTools >= MAX_TOTAL_TOOLS) {
-      return { content: `Agent stopped: reached ${MAX_TOTAL_TOOLS} tool calls. Please break your request into smaller steps.`, toolCalls };
-    }
-    msgs.push({ role: "assistant", content: contentBlocks });
-    const toolResults = [];
-    for (const block of contentBlocks) {
-      if (block.type !== "tool_use" || !block.id || !block.name)
-        continue;
-      totalTools++;
-      emit({ type: "tool_start", tool: block.name, input: block.input ?? {} });
-      const result = await executeTool(block.name, block.input ?? {});
-      emit({ type: "tool_done", tool: block.name, input: block.input ?? {}, output: result.text, isError: result.isError });
-      toolCalls.push({ tool: block.name, input: block.input ?? {}, output: result.text, isError: result.isError });
-      if (result.isError) {
-        consecutiveErrors++;
-        if (block.name === "execute_api_request" && block.input?.operationId) {
-          const eid = String(block.input.operationId);
-          endpointErrors[eid] = (endpointErrors[eid] ?? 0) + 1;
-          if (endpointErrors[eid] >= MAX_SAME_ENDPOINT_ERRORS) {
-            const stopContent = result.text + `
-[AGENT LOOP STOPPED: endpoint "${eid}" failed ${MAX_SAME_ENDPOINT_ERRORS} times \u2014 stopping to avoid loop]`;
-            toolResults.push({ type: "tool_result", tool_use_id: block.id, content: stopContent });
-            msgs.push({ role: "user", content: toolResults });
-            return { content: `Endpoint "${eid}" failed ${MAX_SAME_ENDPOINT_ERRORS} times. Last error: ${result.text}`, toolCalls };
-          }
-        }
-        if (consecutiveErrors >= MAX_CONSECUTIVE_ERRORS) {
-          const stopMsg = `Stopped after ${MAX_CONSECUTIVE_ERRORS} consecutive errors. Last: ${result.text}`;
-          const stopContent = result.text + `
-[AGENT LOOP STOPPED: ${stopMsg}]`;
-          toolResults.push({ type: "tool_result", tool_use_id: block.id, content: stopContent });
-          msgs.push({ role: "user", content: toolResults });
-          return { content: stopMsg, toolCalls };
-        }
-      } else {
-        consecutiveErrors = 0;
-      }
-      toolResults.push({ type: "tool_result", tool_use_id: block.id, content: result.text });
-    }
-    if (!toolResults.length)
-      return { content: fullText, toolCalls };
-    msgs.push({ role: "user", content: toolResults });
-  }
-  return { content: "(max iterations reached)", toolCalls };
-}
-async function openaiCompatibleLoop(base, apiKey, model, extraHeaders, system, initialMessages, emit) {
-  const msgs = [{ role: "system", content: system }, ...initialMessages];
-  const toolCalls = [];
-  const authHeaders = {};
-  if (apiKey)
-    authHeaders["Authorization"] = `Bearer ${apiKey}`;
-  let totalTools = 0;
-  let consecutiveErrors = 0;
-  const endpointErrors = {};
-  for (let iter = 0;iter < 40; iter++) {
-    const res = await fetchWithRetry(`${base}/v1/chat/completions`, {
-      method: "POST",
-      headers: { "Content-Type": "application/json", ...authHeaders, ...extraHeaders },
-      body: JSON.stringify({ model, messages: msgs, tools: OPENAI_TOOLS, tool_choice: "auto", stream: true })
-    }, emit);
-    if (!res.ok)
-      throw new Error(await res.text());
-    let fullContent = "";
-    let finishReason = "";
-    const tcAccum = {};
-    const reader = res.body.getReader();
-    const dec = new TextDecoder;
-    let buf = "";
-    outer:
-      while (true) {
-        const { done, value } = await reader.read();
-        if (done)
-          break;
-        buf += dec.decode(value, { stream: true });
-        const parts = buf.split(`
-`);
-        buf = parts.pop() ?? "";
-        for (const part of parts) {
-          let data = "";
-          for (const line of part.split(`
-`)) {
-            if (line.startsWith("data: ")) {
-              data = line.slice(6);
-              break;
-            }
-          }
-          if (!data)
-            continue;
-          if (data === "[DONE]")
-            break outer;
-          let ev;
-          try {
-            ev = JSON.parse(data);
-          } catch {
-            continue;
-          }
-          if (ev.object === "error")
-            throw new Error(JSON.stringify(ev));
-          const choices = ev.choices;
-          const choice = choices?.[0];
-          if (!choice)
-            continue;
-          const fr = choice.finish_reason;
-          if (fr)
-            finishReason = fr;
-          const delta = choice.delta;
-          if (!delta)
-            continue;
-          if (typeof delta.content === "string" && delta.content) {
-            fullContent += delta.content;
-            emit({ type: "text_delta", text: delta.content });
-          }
-          const tcDeltas = delta.tool_calls;
-          if (tcDeltas) {
-            for (const tc of tcDeltas) {
-              if (!tcAccum[tc.index])
-                tcAccum[tc.index] = { id: "", name: "", args: "" };
-              const entry = tcAccum[tc.index];
-              if (tc.id)
-                entry.id += tc.id;
-              if (tc.function?.name)
-                entry.name += tc.function.name;
-              if (tc.function?.arguments)
-                entry.args += tc.function.arguments;
-            }
-          }
-        }
-      }
-    if (finishReason !== "tool_calls")
-      return { content: fullContent, toolCalls };
-    if (totalTools >= MAX_TOTAL_TOOLS) {
-      return { content: `Agent stopped: reached ${MAX_TOTAL_TOOLS} tool calls. Please break your request into smaller steps.`, toolCalls };
-    }
-    const msgToolCalls = Object.values(tcAccum).map((tc) => ({
-      id: tc.id,
-      type: "function",
-      function: { name: tc.name, arguments: tc.args }
-    }));
-    msgs.push({ role: "assistant", content: fullContent || null, tool_calls: msgToolCalls });
-    for (const tc of Object.values(tcAccum)) {
-      let args = {};
-      try {
-        args = JSON.parse(tc.args);
-      } catch {}
-      totalTools++;
-      emit({ type: "tool_start", tool: tc.name, input: args });
-      const result = await executeTool(tc.name, args);
-      emit({ type: "tool_done", tool: tc.name, input: args, output: result.text, isError: result.isError });
-      toolCalls.push({ tool: tc.name, input: args, output: result.text, isError: result.isError });
-      msgs.push({ role: "tool", tool_call_id: tc.id, content: result.text });
-      if (result.isError) {
-        consecutiveErrors++;
-        if (tc.name === "execute_api_request" && args.operationId) {
-          const eid = String(args.operationId);
-          endpointErrors[eid] = (endpointErrors[eid] ?? 0) + 1;
-          if (endpointErrors[eid] >= MAX_SAME_ENDPOINT_ERRORS) {
-            return { content: `Endpoint "${eid}" failed ${MAX_SAME_ENDPOINT_ERRORS} times. Last error: ${result.text}`, toolCalls };
-          }
-        }
-        if (consecutiveErrors >= MAX_CONSECUTIVE_ERRORS) {
-          return { content: `Stopped after ${MAX_CONSECUTIVE_ERRORS} consecutive errors. Last: ${result.text}`, toolCalls };
-        }
-      } else {
-        consecutiveErrors = 0;
-      }
-    }
-  }
-  return { content: "(max iterations reached)", toolCalls };
-}
 async function handleAiChat(req) {
   let body;
   try {
@@ -5398,43 +5759,58 @@ async function handleAiChat(req) {
   const settingsRow = dbQueries.getSettings();
   const settings = settingsRow ? JSON.parse(settingsRow.value) : {};
   const ai = settings.ai ?? {};
+  const provider = ai.provider ?? "anthropic";
+  const providerDefaults = PROVIDER_DEFAULTS[provider] ?? { model: "" };
+  const requiresKey = provider !== "ollama" && provider !== "custom";
+  if (requiresKey && !ai.apiKey) {
+    return json({ error: "No AI API key configured. Go to Settings \u2192 AI Provider to add one." }, 400);
+  }
+  if (!hasState())
+    return json({ error: "No spec loaded." }, 400);
   const { spec, operations } = getState();
   const preview = operations.slice(0, 40).map((op) => `- ${op.method.toUpperCase()} ${op.path}${op.summary ? `: ${op.summary}` : ""}`).join(`
 `);
   const activeAuth = dbQueries.getActiveProfile();
-  const authLine = activeAuth ? `Active auth: "${activeAuth.name}" (${activeAuth.type})` : "No active auth profile. If the API requires auth, call list_auth_profiles first, then set_active_auth, or login and call save_auth_token.";
+  const authLine = activeAuth ? `Active auth: "${activeAuth.name}" (${activeAuth.type})` : "No active auth profile. Call list_auth_profiles, then set_active_auth or save_auth_token.";
+  const memory = dbQueries.getMemory(20);
+  const memorySection = memory.length ? `
+## Memory from previous sessions
+${memory.map((m) => `${m.role === "user" ? "User" : "Assistant"}: ${m.content.slice(0, 300)}${m.content.length > 300 ? "\u2026" : ""}`).join(`
+`)}
+` : "";
   const system = `You are an AI assistant for the "${spec.title}" API (v${spec.version}). Base URL: ${spec.baseUrl}.
 Total endpoints: ${operations.length}. Sample:
 ${preview}${operations.length > 40 ? `
 ... and ${operations.length - 40} more` : ""}
 ${authLine}
+${memorySection}
+Tools:
+- search_endpoints / get_endpoint_schema \u2014 explore API structure (results cached; never repeat the same query)
+- execute_api_request \u2014 call an endpoint
+- list_auth_profiles / set_active_auth / save_auth_token \u2014 manage credentials
+  \u2022 save_auth_token supports token_type="basic" with username+password for HTTP Basic auth
+- fetch_url \u2014 external docs
+- dns_lookup \u2014 connectivity diagnostics
+- get_recent_logs \u2014 proxy traffic history
+- run_security_check \u2014 static security analysis
-Tools available:
-- search_endpoints / get_endpoint_schema: explore API structure
-- execute_api_request: call an endpoint
-- list_auth_profiles: list all saved auth profiles (name, type, active)
-- set_active_auth(name): switch to a saved profile before making requests
-- save_auth_token(name, token): IMMEDIATELY call this after a successful login that returns a token \u2014 saves the token as a named profile and activates it so subsequent requests are authenticated
-- fetch_url: fetch external docs
-- dns_lookup: DNS resolution / connectivity
-- get_recent_logs: recent request/response traffic
-- run_security_check: security analysis on an endpoint
+Auth workflow: 401/403 \u2192 list_auth_profiles \u2192 set_active_auth OR find login endpoint \u2192 save_auth_token \u2192 retry.
-Authentication workflow: if requests return 401/403, call list_auth_profiles first. If a profile exists, call set_active_auth. If none, find and call the login endpoint, extract the token from the response, then call save_auth_token immediately. After saving, retry the original request.
+Rules:
+- Never repeat a search you already ran \u2014 results are cached.
+- Diagnose errors before retrying. Three failures on the same endpoint stops the agent.
+- Do not fire rapid successive API requests.
-IMPORTANT: if an endpoint returns an error, diagnose it (check the schema, check auth) and fix the root cause before retrying. If the same endpoint fails 3 times the agent will be forcibly stopped. Do not retry without changing something.
+Be concise. Format code and JSON in fenced blocks.${ai.customInstructions ? `
-Be concise and practical. Format code and JSON in code blocks.${body.extra_context ? `
+---
+## Custom instructions
+${ai.customInstructions}` : ""}${body.extra_context ? `
 ---
-## Current context
+## Context
 ${body.extra_context}` : ""}`;
-  const provider = ai.provider ?? "anthropic";
-  const requiresKey = provider !== "ollama" && provider !== "custom";
-  if (requiresKey && !ai.apiKey) {
-    return json({ error: "No AI API key configured. Go to Settings \u2192 AI Provider to add one." }, 400);
-  }
   const { readable, writable } = new TransformStream;
   const writer = writable.getWriter();
   const enc = new TextEncoder;
@@ -5444,62 +5820,39 @@ ${body.extra_context}` : ""}`;
 `)).catch(() => {});
   };
   const msgs = body.messages;
+  const toolCache = new Map;
+  const abortCtrl = new AbortController;
+  const lastUserMsg = [...msgs].reverse().find((m) => m.role === "user");
+  const userMemoryContent = typeof lastUserMsg?.content === "string" ? lastUserMsg.content : null;
   (async () => {
     try {
-      let result;
-      if (provider === "anthropic") {
-        result = await anthropicAgentLoop(ai.apiKey, ai.model || "claude-haiku-4-5-20251001", system, msgs, emit);
-      } else if (provider === "openai") {
-        const base = (ai.baseUrl || "https://api.openai.com").replace(/\/$/, "");
-        result = await openaiCompatibleLoop(base, ai.apiKey, ai.model || "gpt-4o-mini", {}, system, msgs, emit);
-      } else if (provider === "mistral") {
-        const base = (ai.baseUrl || "https://api.mistral.ai").replace(/\/$/, "");
-        result = await openaiCompatibleLoop(base, ai.apiKey, ai.model || "mistral-small-latest", {}, system, msgs, emit);
-      } else if (provider === "github-copilot") {
-        const base = (ai.baseUrl || "https://api.githubcopilot.com").replace(/\/$/, "");
-        result = await openaiCompatibleLoop(base, ai.apiKey, ai.model || "gpt-4o", {
-          "Copilot-Integration-Id": "vscode-chat",
-          "Editor-Version": "vscode/1.85.0"
-        }, system, msgs, emit);
-      } else if (provider === "groq") {
-        const base = (ai.baseUrl || "https://api.groq.com/openai").replace(/\/$/, "");
-        result = await openaiCompatibleLoop(base, ai.apiKey, ai.model || "llama-3.1-70b-versatile", {}, system, msgs, emit);
-      } else if (provider === "custom") {
-        if (!ai.baseUrl) {
-          emit({ type: "error", message: "Custom provider requires a Base URL." });
-          await writer.close();
-          return;
-        }
-        result = await openaiCompatibleLoop(ai.baseUrl.replace(/\/$/, ""), ai.apiKey, ai.model || "", {}, system, msgs, emit);
-      } else if (provider === "ollama") {
-        const base = (ai.baseUrl || "http://localhost:11434").replace(/\/$/, "");
-        const res = await fetch(`${base}/api/chat`, {
-          method: "POST",
-          headers: { "Content-Type": "application/json" },
-          body: JSON.stringify({ model: ai.model || "llama3", messages: [{ role: "system", content: system }, ...msgs], stream: false })
-        });
-        const d = await res.json();
-        result = { content: d.message.content ?? "", toolCalls: [] };
-      } else if (provider === "gemini") {
-        const model = ai.model || "gemini-1.5-flash";
-        const base = (ai.baseUrl || "https://generativelanguage.googleapis.com").replace(/\/$/, "");
-        const res = await fetch(`${base}/v1beta/models/${model}:generateContent?key=${ai.apiKey}`, {
-          method: "POST",
-          headers: { "Content-Type": "application/json" },
-          body: JSON.stringify({
-            systemInstruction: { parts: [{ text: system }] },
-            contents: msgs.map((m) => ({ role: m.role === "assistant" ? "model" : "user", parts: [{ text: m.content }] })),
-            generationConfig: { maxOutputTokens: 4096 }
-          })
-        });
-        const d = await res.json();
-        result = { content: d.candidates[0]?.content.parts[0]?.text ?? "", toolCalls: [] };
-      } else {
-        emit({ type: "error", message: `Unknown provider: ${provider}` });
-        await writer.close();
-        return;
+      const result = await runAgentLoop({
+        provider,
+        apiKey: ai.apiKey,
+        model: ai.model || providerDefaults.model,
+        baseUrl: ai.baseUrl || providerDefaults.baseUrl,
+        maxTokens: ai.maxTokens ?? 4096,
+        stepTimeoutMs: ai.stepTimeoutMs ?? 60000,
+        temperature: ai.temperature,
+        topK: ai.topK && ai.topK > 0 ? ai.topK : undefined,
+        parallelTools: true,
+        enablePromptCache: true
+      }, system, msgs, TOOL_SCHEMAS, (name, args) => executeTool(name, args, toolCache), emit, abortCtrl.signal, toolCache);
+      if (result.content && result.stopReason !== "max_iterations") {
+        try {
+          if (userMemoryContent)
+            dbQueries.saveMemory("user", userMemoryContent.slice(0, 1000));
+          dbQueries.saveMemory("assistant", result.content.slice(0, 1000));
+          dbQueries.trimMemory(40);
+        } catch {}
       }
-      emit({ type: "done", content: result.content, toolCalls: result.toolCalls });
+      emit({
+        type: "done",
+        content: result.content,
+        toolCalls: result.toolCalls,
+        stopReason: result.stopReason,
+        tokens: result.tokens
+      });
     } catch (e) {
       emit({ type: "error", message: e instanceof Error ? e.message : String(e) });
     } finally {
@@ -5509,11 +5862,7 @@ ${body.extra_context}` : ""}`;
     }
   })();
   return new Response(readable, {
-    headers: {
-      "Content-Type": "text/event-stream",
-      "Cache-Control": "no-cache",
-      ...CORS4
-    }
+    headers: { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", ...CORS4 }
   });
 }
 function handleGetProfiles() {
@@ -6120,7 +6469,7 @@ function handleDeleteCaptureBin(path) {
   dbQueries.deleteCaptureBin(id);
   return json({ ok: true });
 }
-var CORS4, TOOL_DEFS, ANTHROPIC_TOOLS, OPENAI_TOOLS, MAX_TOTAL_TOOLS = 40, MAX_CONSECUTIVE_ERRORS = 5, MAX_SAME_ENDPOINT_ERRORS = 3;
+var CORS4, TOOL_DEFS, _lastApiCallMs = 0, MIN_API_CALL_INTERVAL_MS = 400, TOOL_SCHEMAS, PROVIDER_DEFAULTS;
 var init_routes = __esm(() => {
   init_db();
   init_engine();
@@ -6130,6 +6479,7 @@ var init_routes = __esm(() => {
   init_config();
   init_version();
   init_engine2();
+  init_harness();
   CORS4 = {
     "Access-Control-Allow-Origin": "*",
     "Access-Control-Allow-Methods": "GET, POST, PUT, PATCH, DELETE, OPTIONS",
@@ -6198,25 +6548,34 @@ var init_routes = __esm(() => {
       required: ["name"]
     },
     save_auth_token: {
-      description: "Save a bearer token or API key as a named auth profile and immediately activate it. Call this right after a successful login endpoint returns a token so all subsequent API requests are authenticated.",
+      description: "Save a bearer token, API key, or basic auth credentials as a named auth profile and immediately activate it. Call this right after a successful login endpoint returns a token so all subsequent API requests are authenticated.",
       params: {
         name: { type: "string", description: 'Profile name, e.g. "user session" or the username' },
-        token: { type: "string", description: "The bearer token or API key value to save" },
-        token_type: { type: "string", enum: ["bearer", "apikey_header", "apikey_query"], description: "Token type (default: bearer)" },
-        header_name: { type: "string", description: "Header name for apikey_header type (default: X-Api-Key)" }
+        token: { type: "string", description: "The bearer token or API key value (omit for basic auth)" },
+        token_type: { type: "string", enum: ["bearer", "apikey_header", "apikey_query", "basic"], description: "Token type (default: bearer)" },
+        header_name: { type: "string", description: "Header name for apikey_header type (default: X-Api-Key)" },
+        username: { type: "string", description: "Username for basic auth" },
+        password: { type: "string", description: "Password for basic auth" }
       },
-      required: ["name", "token"]
+      required: ["name"]
     }
   };
-  ANTHROPIC_TOOLS = Object.entries(TOOL_DEFS).map(([name, def]) => ({
+  TOOL_SCHEMAS = Object.entries(TOOL_DEFS).map(([name, def]) => ({
     name,
     description: def.description,
-    input_schema: { type: "object", properties: def.params, required: def.required }
-  }));
-  OPENAI_TOOLS = Object.entries(TOOL_DEFS).map(([name, def]) => ({
-    type: "function",
-    function: { name, description: def.description, parameters: { type: "object", properties: def.params, required: def.required } }
+    params: def.params,
+    required: def.required
   }));
+  PROVIDER_DEFAULTS = {
+    anthropic: { model: "claude-haiku-4-5-20251001" },
+    openai: { model: "gpt-4o-mini", baseUrl: "https://api.openai.com" },
+    mistral: { model: "mistral-small-latest", baseUrl: "https://api.mistral.ai" },
+    groq: { model: "llama-3.1-70b-versatile", baseUrl: "https://api.groq.com/openai" },
+    "github-copilot": { model: "gpt-4o", baseUrl: "https://api.githubcopilot.com" },
+    ollama: { model: "llama3", baseUrl: "http://localhost:11434" },
+    gemini: { model: "gemini-1.5-flash" },
+    custom: { model: "" }
+  };
 });
 // src/daemon.ts