npm - botholomew - Versions diffs - 0.16.4 → 0.18.0 - Mend

botholomew 0.16.4 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (98) hide show

package/README.md +46 -41
package/package.json +4 -9
package/src/chat/agent.ts +37 -40
package/src/chat/session.ts +10 -10
package/src/cli.ts +0 -2
package/src/commands/capabilities.ts +35 -33
package/src/commands/context.ts +133 -221
package/src/commands/init.ts +22 -1
package/src/commands/mcpx.ts +21 -8
package/src/commands/nuke.ts +52 -15
package/src/commands/prepare.ts +16 -13
package/src/config/loader.ts +1 -8
package/src/config/schemas.ts +6 -0
package/src/constants.ts +16 -32
package/src/init/index.ts +52 -27
package/src/mcpx/client.ts +21 -5
package/src/mem/client.ts +33 -0
package/src/{context → prompts}/capabilities.ts +11 -7
package/src/schedules/store.ts +1 -1
package/src/tasks/store.ts +1 -1
package/src/threads/store.ts +1 -1
package/src/tools/capabilities/refresh.ts +1 -1
package/src/tools/membot/adapter.ts +111 -0
package/src/tools/membot/copy.ts +59 -0
package/src/tools/membot/count_lines.ts +53 -0
package/src/tools/membot/edit.ts +72 -0
package/src/tools/membot/exists.ts +54 -0
package/src/tools/membot/index.ts +26 -0
package/src/tools/{context → membot}/pipe.ts +34 -32
package/src/tools/registry.ts +6 -37
package/src/tools/tool.ts +6 -8
package/src/tui/App.tsx +3 -4
package/src/tui/components/ContextPanel.tsx +109 -226
package/src/tui/components/HelpPanel.tsx +2 -2
package/src/tui/components/StatusBar.tsx +0 -6
package/src/tui/components/ThreadPanel.tsx +8 -7
package/src/tui/wrapDetail.ts +11 -0
package/src/worker/heartbeat.ts +0 -20
package/src/worker/index.ts +13 -13
package/src/worker/llm.ts +7 -9
package/src/worker/prompt.ts +25 -13
package/src/worker/spawn.ts +1 -1
package/src/worker/tick.ts +10 -9
package/src/commands/db.ts +0 -119
package/src/commands/with-db.ts +0 -22
package/src/context/chunker.ts +0 -275
package/src/context/embedder-impl.ts +0 -100
package/src/context/embedder.ts +0 -9
package/src/context/fetcher-errors.ts +0 -8
package/src/context/fetcher.ts +0 -515
package/src/context/locks.ts +0 -146
package/src/context/markdown-converter.ts +0 -186
package/src/context/reindex.ts +0 -198
package/src/context/store.ts +0 -841
package/src/context/url-utils.ts +0 -25
package/src/db/connection.ts +0 -255
package/src/db/doctor.ts +0 -235
package/src/db/embeddings.ts +0 -317
package/src/db/query.ts +0 -56
package/src/db/schema.ts +0 -93
package/src/db/sql/1-core_tables.sql +0 -53
package/src/db/sql/10-dedupe_context_items.sql +0 -26
package/src/db/sql/11-rebuild_hnsw.sql +0 -8
package/src/db/sql/12-workers.sql +0 -66
package/src/db/sql/13-drive-paths.sql +0 -47
package/src/db/sql/14-drop_hnsw_index.sql +0 -8
package/src/db/sql/15-fts_index.sql +0 -8
package/src/db/sql/16-source_url.sql +0 -7
package/src/db/sql/17-worker_log_path.sql +0 -3
package/src/db/sql/18-reset_embeddings_for_local.sql +0 -39
package/src/db/sql/19-disk_backed_index.sql +0 -36
package/src/db/sql/2-logging_tables.sql +0 -24
package/src/db/sql/20-drop_db_tables_for_files.sql +0 -19
package/src/db/sql/3-daemon_state.sql +0 -5
package/src/db/sql/4-unique_context_path.sql +0 -1
package/src/db/sql/5-reset_embeddings_for_openai.sql +0 -1
package/src/db/sql/6-vss_index.sql +0 -7
package/src/db/sql/7-drop_embeddings_fk.sql +0 -23
package/src/db/sql/8-task_output.sql +0 -1
package/src/db/sql/9-source-type.sql +0 -1
package/src/tools/context/read-large-result.ts +0 -33
package/src/tools/dir/create.ts +0 -47
package/src/tools/dir/size.ts +0 -77
package/src/tools/dir/tree.ts +0 -124
package/src/tools/file/copy.ts +0 -73
package/src/tools/file/count-lines.ts +0 -54
package/src/tools/file/delete.ts +0 -83
package/src/tools/file/edit.ts +0 -76
package/src/tools/file/exists.ts +0 -33
package/src/tools/file/info.ts +0 -66
package/src/tools/file/move.ts +0 -66
package/src/tools/file/read.ts +0 -67
package/src/tools/file/write.ts +0 -58
package/src/tools/search/fuse.ts +0 -96
package/src/tools/search/index.ts +0 -127
package/src/tools/search/regexp.ts +0 -82
package/src/tools/search/semantic.ts +0 -167
/package/src/{db → utils}/uuid.ts +0 -0

package/src/context/markdown-converter.ts DELETED Viewed

@@ -1,186 +0,0 @@
-import type { BotholomewConfig } from "../config/schemas.ts";
-import { logger } from "../utils/logger.ts";
-import { createLlmClient } from "../worker/llm-client.ts";
-import { FetchFailureError } from "./fetcher-errors.ts";
-const CONVERTER_MAX_TOKENS = 16_384;
-const CONVERTER_SYSTEM_PROMPT = `You normalize documents to clean, well-structured Markdown.
-**If the input is already clean, valid Markdown, return it verbatim with no edits.** Look for ATX headings (#, ##), bullet/numbered lists, fenced code blocks, inline code, links in [text](url) form, blockquotes, GFM tables. If the structure is consistently markdown-shaped, echo it back unchanged.
-Otherwise, convert it. The input mime_type is a hint, not a guarantee — verify the actual content. Common non-markdown formats to recognize and convert:
-- **HTML** — strip tags, scripts, styles, navigation/footer chrome; preserve headings, paragraphs, lists, tables, links, code.
-- **JSON / XML / YAML** — render the structure as readable Markdown (headings/lists for objects, tables where appropriate, fenced code blocks for inline values).
-- **DocMD (Google Docs structured format)** — lines like \`[H1 1-31 HEADING_1 tabId=t.0 ...] Title text\` or \`[P5 884-937 PARAGRAPH ...] Body text\`. Strip the bracket annotations entirely; map H1→#, H2→##, H3→###, P→paragraph; preserve the trailing text content.
-- **RTF, plain text with mixed structure, ad-hoc formats** — extract the semantic content, drop the noise.
-Rules for the output:
-- Preserve all semantic content: headings, paragraphs, lists, tables, links, inline code, code blocks, blockquotes.
-- Use ATX headings (#, ##, ###), fenced code blocks (\`\`\`lang), GFM-style tables, and reference- or inline-style links — whichever is cleanest.
-- Strip metadata headers/IDs that aren't part of the document body (e.g. \`@document_id: ...\`, \`@revision_id: ...\`).
-- Output **only** the Markdown. No preamble ("Here is the converted markdown:"), no trailing commentary, no wrapping the entire output in a code fence.`;
-const MARKDOWN_MIME_TYPES = new Set([
-  "text/markdown",
-  "text/x-markdown",
-  "text/md",
-]);
-export function isMarkdownMimeType(mimeType: string): boolean {
-  const base = mimeType.split(";")[0]?.trim().toLowerCase() ?? "";
-  return MARKDOWN_MIME_TYPES.has(base);
-}
-/**
- * Sniff content for a non-markdown structure. Returns a mime type when the
- * content has unmistakable markers of HTML / XML / JSON / etc., otherwise
- * null. Used to verify a tool's claim of `text/markdown` — if the agent (or
- * a defaulted mime type) lies about the format, we want to convert anyway.
- *
- * Markdown is a superset of plain text, so a null return ≠ "definitely
- * markdown". It just means we found no strong contradicting signal.
- */
-export function sniffNonMarkdownMimeType(content: string): string | null {
-  const head = content.trimStart().slice(0, 4096);
-  if (!head) return null;
-  if (/^<!doctype\s+html/i.test(head)) return "text/html";
-  if (/^<html[\s>]/i.test(head)) return "text/html";
-  if (/^<\?xml[\s?]/i.test(head)) return "application/xml";
-  // JSON: parses as JSON top-to-bottom (use the full content, not the head).
-  const trimmed = content.trim();
-  if (
-    (trimmed.startsWith("{") && trimmed.endsWith("}")) ||
-    (trimmed.startsWith("[") && trimmed.endsWith("]"))
-  ) {
-    try {
-      JSON.parse(trimmed);
-      return "application/json";
-    } catch {
-      // fall through
-    }
-  }
-  // Heuristic HTML: dense tag markup. Markdown can contain occasional inline
-  // HTML, so we only flag it when tags dominate the sample.
-  const tagMatches = head.match(/<\/?[a-z][a-z0-9]*[\s/>]/gi) ?? [];
-  if (tagMatches.length >= 10) {
-    const charsPerTag = head.length / tagMatches.length;
-    if (charsPerTag < 80) return "text/html";
-  }
-  return null;
-}
-/**
- * Decide the effective mime type for a piece of content. If the claim is
- * markdown but the content sniffs as something else, trust the sniff so we
- * convert instead of saving mislabeled garbage.
- */
-export function resolveEffectiveMimeType(
-  claimedMimeType: string,
-  content: string,
-): { mimeType: string; sniffed: boolean } {
-  if (!isMarkdownMimeType(claimedMimeType)) {
-    return { mimeType: claimedMimeType, sniffed: false };
-  }
-  const sniffed = sniffNonMarkdownMimeType(content);
-  if (sniffed) return { mimeType: sniffed, sniffed: true };
-  return { mimeType: claimedMimeType, sniffed: false };
-}
-function stripLeadingMarkdownFence(text: string): string {
-  const trimmed = text.trim();
-  const fenceMatch = trimmed.match(
-    /^```(?:markdown|md)?\s*\n([\s\S]*?)\n```\s*$/,
-  );
-  if (fenceMatch?.[1]) return fenceMatch[1];
-  return text;
-}
-/**
- * Convert arbitrary content to Markdown via a single-shot LLM call.
- *
- * Does **not** short-circuit on `mimeType === "text/markdown"` — tools
- * frequently mislabel their output (e.g. Google Docs' "DocMD" tool returns
- * structured `[H1 ...]` annotations, not real markdown). The mime type is
- * passed in as a hint for the model; the model decides whether the content
- * is already markdown (echo unchanged) or needs converting.
- *
- * - Throws FetchFailureError when the response hits max_tokens (silently
- *   truncating the saved file would be worse than failing loudly).
- * - On transient API errors, logs a warning and returns the raw content so
- *   the import still produces *something* the user can edit.
- */
-export async function convertToMarkdown(
-  content: string,
-  mimeType: string,
-  sourceUrl: string,
-  config: Required<BotholomewConfig>,
-): Promise<string> {
-  if (!config.anthropic_api_key) return content;
-  const client = createLlmClient(config);
-  // Conversion is mechanical text-shaping — Haiku (the chunker model) is
-  // plenty smart for this and ~5x faster than Opus on long documents.
-  const model = config.chunker_model || config.model;
-  try {
-    const stream = client.messages.stream({
-      model,
-      max_tokens: CONVERTER_MAX_TOKENS,
-      system: CONVERTER_SYSTEM_PROMPT,
-      messages: [
-        {
-          role: "user",
-          content: `Convert this ${mimeType} content to Markdown. Source URL: ${sourceUrl}\n\n${content}`,
-        },
-      ],
-    });
-    let charsReceived = 0;
-    let lastLogged = 0;
-    const PROGRESS_INTERVAL_CHARS = 2_000;
-    for await (const event of stream) {
-      if (
-        event.type === "content_block_delta" &&
-        event.delta.type === "text_delta"
-      ) {
-        charsReceived += event.delta.text.length;
-        if (charsReceived - lastLogged >= PROGRESS_INTERVAL_CHARS) {
-          logger.dim(`  ...converted ${charsReceived} chars`);
-          lastLogged = charsReceived;
-        }
-      }
-    }
-    const final = await stream.finalMessage();
-    if (final.stop_reason === "max_tokens") {
-      throw new FetchFailureError(
-        `Markdown conversion exceeded token budget (max_tokens=${CONVERTER_MAX_TOKENS}). The source document is too large to convert in one pass — try fetching a smaller section or a tool that supports pagination.`,
-      );
-    }
-    const text = final.content
-      .flatMap((block) => (block.type === "text" ? [block.text] : []))
-      .join("");
-    if (!text.trim()) {
-      logger.warn(
-        "markdown conversion returned empty output — saving raw content",
-      );
-      return content;
-    }
-    return stripLeadingMarkdownFence(text);
-  } catch (err) {
-    if (err instanceof FetchFailureError) throw err;
-    logger.warn(
-      `markdown conversion failed (${err instanceof Error ? err.message : String(err)}) — saving raw content`,
-    );
-    return content;
-  }
-}

package/src/context/reindex.ts DELETED Viewed

@@ -1,198 +0,0 @@
-import { createHash } from "node:crypto";
-import { readFile, stat } from "node:fs/promises";
-import { join } from "node:path";
-import type { BotholomewConfig } from "../config/schemas.ts";
-import { CONTEXT_DIR } from "../constants.ts";
-import { withDb } from "../db/connection.ts";
-import {
-  type ChunkInput,
-  deleteIndexedPath,
-  getIndexedPath,
-  listIndexedPaths,
-  rebuildSearchIndex,
-  upsertChunksForPath,
-} from "../db/embeddings.ts";
-import { logger } from "../utils/logger.ts";
-import { chunkByTextSplit } from "./chunker.ts";
-import { embed as defaultEmbed } from "./embedder.ts";
-import { isContextPathLocked } from "./locks.ts";
-import { listContextDir } from "./store.ts";
-/** Embed function shape — exported for tests that want to inject a fake. */
-export type EmbedFn = (
-  texts: string[],
-  config: Required<BotholomewConfig>,
-) => Promise<number[][]>;
-/**
- * Walk every textual file under `<projectDir>/context/` and reconcile the
- * disk-backed search index. Adds new files, replaces stale ones whose
- * content_hash changed, and drops index rows for files that no longer exist.
- *
- * Uses the deterministic text splitter (`chunkByTextSplit`) — never the LLM
- * chunker — so a fresh project with no API key still indexes successfully.
- */
-export async function reindexContext(
-  projectDir: string,
-  config: Required<BotholomewConfig>,
-  dbPath: string,
-  opts: {
-    onProgress?: (msg: string) => void;
-    /** Override embed for tests; defaults to the real WASM embedder. */
-    embedFn?: EmbedFn;
-  } = {},
-): Promise<ReindexSummary> {
-  const onProgress = opts.onProgress ?? (() => {});
-  const embed = opts.embedFn ?? defaultEmbed;
-  // 1. Walk context/ for every textual file along with its current
-  //    (path, hash, mtime, size). Binary files are intentionally skipped —
-  //    embeddings on bytes are meaningless and would just consume storage.
-  onProgress("scanning files");
-  const onDisk = await collectDiskFiles(projectDir);
-  // 2. Read the existing index so we can decide what's add / update / skip /
-  //    remove without re-embedding files that haven't changed.
-  const indexed = await withDb(dbPath, listIndexedPaths);
-  const indexedByPath = new Map(indexed.map((r) => [r.path, r]));
-  let added = 0;
-  let updated = 0;
-  let unchanged = 0;
-  let removed = 0;
-  let chunksWritten = 0;
-  // 3. For each file on disk: skip if (path, hash) is already indexed and the
-  //    on-disk content hash matches; otherwise (re)embed.
-  for (const file of onDisk) {
-    const existing = indexedByPath.get(file.path);
-    if (existing && existing.content_hash === file.contentHash) {
-      unchanged++;
-      indexedByPath.delete(file.path);
-      continue;
-    }
-    onProgress(`embedding ${file.path}`);
-    const text = await readFile(
-      join(projectDir, CONTEXT_DIR, file.path),
-      "utf-8",
-    );
-    const chunks = chunkByTextSplit(text);
-    if (chunks.length === 0) {
-      // Empty/whitespace-only file. Drop any stale rows for it; otherwise
-      // there's nothing to index.
-      if (existing) {
-        await withDb(dbPath, (conn) => deleteIndexedPath(conn, file.path));
-      }
-      continue;
-    }
-    const vectors = await embed(
-      chunks.map((c) => c.content),
-      config,
-    );
-    const inputs: ChunkInput[] = chunks.map((c, i) => ({
-      chunk_index: c.index,
-      chunk_content: c.content,
-      embedding: vectors[i] ?? new Array(config.embedding_dimension).fill(0),
-    }));
-    await withDb(dbPath, (conn) =>
-      upsertChunksForPath(conn, {
-        path: file.path,
-        contentHash: file.contentHash,
-        mtimeMs: file.mtimeMs,
-        sizeBytes: file.sizeBytes,
-        chunks: inputs,
-      }),
-    );
-    if (existing) updated++;
-    else added++;
-    chunksWritten += inputs.length;
-    indexedByPath.delete(file.path);
-  }
-  // 4. Anything left in indexedByPath is in the index but not on disk →
-  //    delete its rows so search results don't surface ghost files. Skip
-  //    paths with an active per-path write lock: a worker may have just
-  //    written the file *after* our `collectDiskFiles` walk snapshot, and
-  //    pruning now would drop the index row for a real file. Best-effort —
-  //    the next reindex will reconcile.
-  for (const orphan of indexedByPath.keys()) {
-    if (await isContextPathLocked(projectDir, orphan)) {
-      logger.debug(`reindex: skipping orphan-prune for in-flight ${orphan}`);
-      continue;
-    }
-    await withDb(dbPath, (conn) => deleteIndexedPath(conn, orphan));
-    removed++;
-  }
-  if (added + updated + removed > 0) {
-    onProgress("rebuilding FTS index");
-    await withDb(dbPath, rebuildSearchIndex);
-  }
-  return { added, updated, unchanged, removed, chunksWritten };
-}
-export interface ReindexSummary {
-  added: number;
-  updated: number;
-  unchanged: number;
-  removed: number;
-  chunksWritten: number;
-}
-interface DiskFile {
-  path: string;
-  contentHash: string;
-  mtimeMs: number;
-  sizeBytes: number;
-}
-async function collectDiskFiles(projectDir: string): Promise<DiskFile[]> {
-  const entries = await listContextDir(projectDir, "", { recursive: true });
-  const out: DiskFile[] = [];
-  for (const e of entries) {
-    if (e.is_directory) continue;
-    if (!e.is_textual) continue;
-    const abs = join(projectDir, CONTEXT_DIR, e.path);
-    let st: Awaited<ReturnType<typeof stat>>;
-    try {
-      st = await stat(abs);
-    } catch (err) {
-      logger.warn(`reindex: skipping ${e.path}: ${err}`);
-      continue;
-    }
-    const buf = await readFile(abs);
-    const contentHash = createHash("sha256").update(buf).digest("hex");
-    out.push({
-      path: e.path,
-      contentHash,
-      mtimeMs: st.mtimeMs,
-      sizeBytes: st.size,
-    });
-  }
-  return out;
-}
-/**
- * Drop a single path from the index. Used by file/dir tool callers when
- * they delete or move a file and want the index to reflect it immediately
- * instead of waiting for the next reindex.
- */
-export async function dropIndexedPath(
-  dbPath: string,
-  path: string,
-): Promise<void> {
-  await withDb(dbPath, async (conn) => {
-    await deleteIndexedPath(conn, path);
-    await rebuildSearchIndex(conn);
-  });
-}
-export async function getIndexEntry(
-  dbPath: string,
-  path: string,
-): Promise<{ chunks: number } | null> {
-  const row = await withDb(dbPath, (conn) => getIndexedPath(conn, path));
-  return row ? { chunks: row.chunk_count } : null;
-}