npm - opencode-diane - Versions diffs - 0.0.5 - Mend

opencode-diane 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (80) hide show

package/CHANGELOG.md +180 -0
package/LICENSE +21 -0
package/README.md +206 -0
package/WIKI.md +1430 -0
package/dist/index.d.ts +28 -0
package/dist/index.js +1632 -0
package/dist/ingest/adaptive.d.ts +47 -0
package/dist/ingest/adaptive.js +182 -0
package/dist/ingest/code-health.d.ts +58 -0
package/dist/ingest/code-health.js +202 -0
package/dist/ingest/code-map.d.ts +71 -0
package/dist/ingest/code-map.js +670 -0
package/dist/ingest/cross-refs.d.ts +59 -0
package/dist/ingest/cross-refs.js +1207 -0
package/dist/ingest/docs.d.ts +49 -0
package/dist/ingest/docs.js +325 -0
package/dist/ingest/git.d.ts +77 -0
package/dist/ingest/git.js +390 -0
package/dist/ingest/live-session.d.ts +101 -0
package/dist/ingest/live-session.js +173 -0
package/dist/ingest/project-notes.d.ts +28 -0
package/dist/ingest/project-notes.js +102 -0
package/dist/ingest/project.d.ts +35 -0
package/dist/ingest/project.js +430 -0
package/dist/ingest/session-snapshot.d.ts +63 -0
package/dist/ingest/session-snapshot.js +94 -0
package/dist/ingest/sessions.d.ts +29 -0
package/dist/ingest/sessions.js +164 -0
package/dist/ingest/tables.d.ts +52 -0
package/dist/ingest/tables.js +360 -0
package/dist/mining/skill-miner.d.ts +53 -0
package/dist/mining/skill-miner.js +234 -0
package/dist/search/bm25.d.ts +81 -0
package/dist/search/bm25.js +334 -0
package/dist/search/e5-embedder.d.ts +30 -0
package/dist/search/e5-embedder.js +91 -0
package/dist/search/embed-pass.d.ts +26 -0
package/dist/search/embed-pass.js +43 -0
package/dist/search/embedder.d.ts +58 -0
package/dist/search/embedder.js +85 -0
package/dist/search/inverted-index.d.ts +51 -0
package/dist/search/inverted-index.js +139 -0
package/dist/search/ppr.d.ts +44 -0
package/dist/search/ppr.js +118 -0
package/dist/search/tokenize.d.ts +26 -0
package/dist/search/tokenize.js +98 -0
package/dist/store/eviction.d.ts +16 -0
package/dist/store/eviction.js +37 -0
package/dist/store/repository.d.ts +222 -0
package/dist/store/repository.js +420 -0
package/dist/store/sqlite-store.d.ts +89 -0
package/dist/store/sqlite-store.js +252 -0
package/dist/store/vector-store.d.ts +66 -0
package/dist/store/vector-store.js +160 -0
package/dist/types.d.ts +385 -0
package/dist/types.js +9 -0
package/dist/utils/file-log.d.ts +87 -0
package/dist/utils/file-log.js +215 -0
package/dist/utils/peer-detection.d.ts +45 -0
package/dist/utils/peer-detection.js +90 -0
package/dist/utils/shell.d.ts +43 -0
package/dist/utils/shell.js +110 -0
package/dist/utils/usage-skill.d.ts +42 -0
package/dist/utils/usage-skill.js +129 -0
package/dist/utils/xlsx.d.ts +36 -0
package/dist/utils/xlsx.js +270 -0
package/grammars/tree-sitter-c.wasm +0 -0
package/grammars/tree-sitter-c_sharp.wasm +0 -0
package/grammars/tree-sitter-cpp.wasm +0 -0
package/grammars/tree-sitter-css.wasm +0 -0
package/grammars/tree-sitter-go.wasm +0 -0
package/grammars/tree-sitter-html.wasm +0 -0
package/grammars/tree-sitter-java.wasm +0 -0
package/grammars/tree-sitter-javascript.wasm +0 -0
package/grammars/tree-sitter-json.wasm +0 -0
package/grammars/tree-sitter-php.wasm +0 -0
package/grammars/tree-sitter-python.wasm +0 -0
package/grammars/tree-sitter-rust.wasm +0 -0
package/grammars/tree-sitter-typescript.wasm +0 -0
package/package.json +80 -0

package/dist/ingest/project-notes.js ADDED Viewed

@@ -0,0 +1,102 @@
+/**
+ * project-notes.ts — ingest the small set of root-level files where
+ * humans put house rules for AI agents.
+ *
+ * These are the files an agent should know about WITHIN THE FIRST
+ * RECALL of a session: AGENTS.md, CLAUDE.md, GEMINI.md, .cursorrules,
+ * .windsurfrules, COPILOT.md. They typically contain "in this repo,
+ * always do X, never do Y, our naming convention is Z" — exactly the
+ * kind of facts that, missed, lead to revert PRs.
+ *
+ * **Whole-file content, not headings.** Unlike `docs.ts` (which slices
+ * into sections), these files are short (typically under 4 KB) and
+ * their structure is rarely worth indexing — every line might be
+ * load-bearing. One memory per file with the full content (truncated
+ * to MAX_NOTE_BYTES) is the right granularity.
+ *
+ * **Root-level only.** No recursion. A `monorepo-package/.cursorrules`
+ * is a per-package instruction that belongs to the package's owner,
+ * not Diane.
+ */
+import { readFile, stat } from "node:fs/promises";
+import { join } from "node:path";
+const CATEGORY = "project-facts";
+/** The files we look for, with friendly display names. The list is
+ *  intentionally conservative — only files that are conventionally
+ *  written for human consumption by AI agents, not arbitrary config
+ *  files. */
+const NOTE_FILES = [
+    { name: "AGENTS.md", label: "AGENTS.md (OpenCode agent instructions)", tags: ["agents", "opencode"] },
+    { name: "CLAUDE.md", label: "CLAUDE.md (Claude Code instructions)", tags: ["claude-code", "anthropic"] },
+    { name: "GEMINI.md", label: "GEMINI.md (Gemini Code instructions)", tags: ["gemini", "google"] },
+    { name: "COPILOT.md", label: "COPILOT.md (GitHub Copilot instructions)", tags: ["copilot", "github"] },
+    { name: "CONVENTIONS.md", label: "CONVENTIONS.md (project conventions)", tags: ["conventions"] },
+    { name: ".cursorrules", label: ".cursorrules (Cursor IDE rules)", tags: ["cursor"] },
+    { name: ".windsurfrules", label: ".windsurfrules (Windsurf rules)", tags: ["windsurf"] },
+    { name: ".clinerules", label: ".clinerules (Cline agent rules)", tags: ["cline"] },
+];
+/** Truncate point. Most agent-instruction files are well under this;
+ *  the few that aren't typically pad with examples or rationale that
+ *  the agent can `read` directly if needed. We index the lede. */
+const MAX_NOTE_BYTES = 6 * 1024;
+export async function ingestProjectNotes(repo, root, opts = {}) {
+    let filesFound = 0;
+    const maxBytes = Math.max(256, Math.round(opts.maxBytes ?? MAX_NOTE_BYTES));
+    const allTags = new Set(["agent-instructions", "house-rules"]);
+    for (const { name, label, tags } of NOTE_FILES) {
+        const abs = join(root, name);
+        let content;
+        try {
+            const s = await stat(abs);
+            if (!s.isFile())
+                continue;
+            const raw = await readFile(abs, "utf-8");
+            content =
+                raw.length > maxBytes
+                    ? raw.slice(0, maxBytes - 1).trimEnd() + "\n…[truncated; read the file directly for the rest]"
+                    : raw;
+        }
+        catch {
+            continue;
+        }
+        filesFound += 1;
+        for (const t of tags)
+            allTags.add(t);
+        repo.insertIfMissing({
+            category: CATEGORY,
+            // `agent-instructions:<name>` is a stable subject the agent
+            // can also match on directly with `memory_recall { query:
+            // "agent instructions" }`.
+            subject: `agent-instructions:${name}`,
+            content: `${label}\n${"─".repeat(label.length)}\n${content}`,
+            tags: ["agent-instructions", "house-rules", ...tags],
+            source: "project-notes-ingest",
+        });
+    }
+    // One summary memory: "this repo has these instruction files" — so
+    // an agent that just ran a categorical recall ("what should I know
+    // about this repo?") sees a directory of the instruction files at
+    // a glance, even if none of the individual notes happened to match
+    // its query keywords.
+    if (filesFound > 0) {
+        const present = [];
+        for (const { name } of NOTE_FILES) {
+            try {
+                const s = await stat(join(root, name));
+                if (s.isFile())
+                    present.push(name);
+            }
+            catch { /* not present */ }
+        }
+        repo.insertIfMissing({
+            category: CATEGORY,
+            subject: "agent-instructions:directory",
+            content: `This repository has the following agent-instruction files in its root: ` +
+                `${present.join(", ")}. These typically contain conventions, rules, and ` +
+                `house style the agent should follow. Read them before making large changes.`,
+            tags: Array.from(allTags),
+            source: "project-notes-ingest",
+        });
+    }
+    return { filesFound };
+}

package/dist/ingest/project.d.ts ADDED Viewed

@@ -0,0 +1,35 @@
+/**
+ * Project-structure ingestion — fully language-agnostic.
+ *
+ * The earlier version was a Python parser: it understood
+ * pyproject.toml's dependency model, pytest config sections, Flask /
+ * FastAPI sentinels. None of that transfers to a Rust, Go, Elixir, or
+ * C++ repo.
+ *
+ * This version commits to a strict rule: recognise files by NAME
+ * (knowing that `Cargo.toml` is Rust's manifest is a *fact*, like
+ * knowing a file extension — not a "convention" in the cultural
+ * sense), but summarise them only by FORMAT — JSON → top-level keys,
+ * TOML → section headers, YAML → top-level keys, etc. We never reach
+ * into a manifest's language-specific semantics.
+ *
+ * What the agent gets: an orientation map — the repo's shape, which
+ * recognised project/build/CI files exist, and the structural
+ * skeleton of each. If it needs the actual contents it can `read`
+ * the file; the memory's job is to point, not to parse.
+ *
+ * Everything here works identically on any repository regardless of
+ * language or tooling.
+ */
+import type { MemoryRepository } from "../store/repository.js";
+export interface ProjectIngestResult {
+    facts: number;
+}
+export declare function ingestProjectFacts(repo: MemoryRepository, root: string): Promise<ProjectIngestResult>;
+/**
+ * Summarise a config/manifest file by its on-disk FORMAT only. We
+ * look at the file extension (and a couple of well-known extensionless
+ * names) to pick a structural extractor. No file's meaning is
+ * interpreted — only its skeleton is reported.
+ */
+export declare function summariseByFormat(name: string, content: string): string;

package/dist/ingest/project.js ADDED Viewed

@@ -0,0 +1,430 @@
+/**
+ * Project-structure ingestion — fully language-agnostic.
+ *
+ * The earlier version was a Python parser: it understood
+ * pyproject.toml's dependency model, pytest config sections, Flask /
+ * FastAPI sentinels. None of that transfers to a Rust, Go, Elixir, or
+ * C++ repo.
+ *
+ * This version commits to a strict rule: recognise files by NAME
+ * (knowing that `Cargo.toml` is Rust's manifest is a *fact*, like
+ * knowing a file extension — not a "convention" in the cultural
+ * sense), but summarise them only by FORMAT — JSON → top-level keys,
+ * TOML → section headers, YAML → top-level keys, etc. We never reach
+ * into a manifest's language-specific semantics.
+ *
+ * What the agent gets: an orientation map — the repo's shape, which
+ * recognised project/build/CI files exist, and the structural
+ * skeleton of each. If it needs the actual contents it can `read`
+ * the file; the memory's job is to point, not to parse.
+ *
+ * Everything here works identically on any repository regardless of
+ * language or tooling.
+ */
+import { readdir, readFile, stat } from "node:fs/promises";
+import { extname, join } from "node:path";
+const CATEGORY = "project-facts";
+// Directories never worth walking into for an orientation summary.
+const SKIP_DIRS = new Set([
+    ".git",
+    "node_modules",
+    ".venv",
+    "venv",
+    "__pycache__",
+    ".tox",
+    ".mypy_cache",
+    ".pytest_cache",
+    ".ruff_cache",
+    "dist",
+    "build",
+    "target",
+    ".idea",
+    ".vscode",
+    "vendor",
+    ".gradle",
+    ".next",
+    ".svelte-kit",
+    "coverage",
+]);
+/**
+ * Files whose NAME identifies them as a project manifest / build
+ * descriptor / CI config / tooling config. This is a flat,
+ * language-neutral list — recognising the name is a fact, not a
+ * cultural assumption. We do not assume anything about their content
+ * beyond their on-disk format.
+ */
+const RECOGNISED_FILES = [
+    // package / build manifests across ecosystems
+    "package.json",
+    "deno.json",
+    "deno.jsonc",
+    "Cargo.toml",
+    "go.mod",
+    "pyproject.toml",
+    "setup.py",
+    "setup.cfg",
+    "requirements.txt",
+    "Pipfile",
+    "pom.xml",
+    "build.gradle",
+    "build.gradle.kts",
+    "settings.gradle",
+    "Gemfile",
+    "composer.json",
+    "mix.exs",
+    "Package.swift",
+    "pubspec.yaml",
+    "CMakeLists.txt",
+    "Makefile",
+    "makefile",
+    "meson.build",
+    "build.zig",
+    "build.sbt",
+    "project.clj",
+    "rebar.config",
+    "dune-project",
+    "stack.yaml",
+    "cabal.project",
+    "BUILD",
+    "BUILD.bazel",
+    "WORKSPACE",
+    "flake.nix",
+    "default.nix",
+    // CI / automation
+    ".gitlab-ci.yml",
+    "Jenkinsfile",
+    ".travis.yml",
+    "azure-pipelines.yml",
+    "Taskfile.yml",
+    "justfile",
+    // containers / tooling
+    "Dockerfile",
+    "docker-compose.yml",
+    "docker-compose.yaml",
+    "compose.yaml",
+    ".editorconfig",
+];
+export async function ingestProjectFacts(repo, root) {
+    let n = 0;
+    const add = (subject, content, tags) => {
+        repo.insertIfMissing({
+            category: CATEGORY,
+            subject,
+            content,
+            tags,
+            source: "project-ingest",
+        });
+        n += 1;
+    };
+    // ── 1. Top-level layout ───────────────────────────────────────────
+    const top = await safeReaddir(root);
+    const dirs = [];
+    const files = [];
+    for (const e of top) {
+        if (e.isDirectory()) {
+            if (!SKIP_DIRS.has(e.name) && !e.name.startsWith("."))
+                dirs.push(e.name);
+        }
+        else if (e.isFile()) {
+            files.push(e.name);
+        }
+    }
+    add("layout:top-level", `Repository root contains directories: ` +
+        `${dirs.length ? dirs.sort().join(", ") : "(none)"}. ` +
+        `Notable root files: ${files.length ? files.sort().slice(0, 25).join(", ") : "(none)"}.`, ["layout", "structure"]);
+    // ── 1b. File-extension histogram across the whole tree ────────────
+    // This is the single most reliable, zero-convention signal for
+    // "what kind of repo is this": the language(s) emerge from the data
+    // itself. Works for polyglot repos and for repos with no recognised
+    // manifest at all. Bounded walk; SKIP_DIRS pruned.
+    const census = await treeCensus(root);
+    if (census.totalFiles > 0) {
+        const extRanked = Array.from(census.extCounts.entries())
+            .sort((a, b) => b[1] - a[1])
+            .slice(0, 20)
+            .map(([ext, n]) => `${ext}×${n}`);
+        add("layout:file-types", `File-type census of ${census.totalFiles} files across ${census.totalDirs} ` +
+            `directories (extension × count, most common first): ${extRanked.join(", ")}` +
+            (census.extCounts.size > 20 ? `, … (+${census.extCounts.size - 20} more types)` : "") +
+            `. Largest directories by file count: ${census.topDirs.join(", ")}.`, ["layout", "file-types", "languages"]);
+    }
+    // ── 2. Recognised project/build/CI files — format-based summary ───
+    const presentRecognised = [];
+    for (const name of RECOGNISED_FILES) {
+        const full = join(root, name);
+        const content = await tryRead(full);
+        if (content === null)
+            continue;
+        presentRecognised.push(name);
+        const summary = summariseByFormat(name, content);
+        add(`file:${name}`, `${name} is present. Structural summary: ${summary}`, ["project-file", name]);
+    }
+    if (presentRecognised.length > 0) {
+        add("manifests:present", `Recognised project/build/CI files in this repo: ${presentRecognised.join(", ")}.`, ["manifests", "project-file"]);
+    }
+    // ── 3. CI workflow directory (GitHub Actions et al.) ──────────────
+    const ghWorkflows = await safeReaddir(join(root, ".github", "workflows"));
+    const wfFiles = ghWorkflows
+        .filter((e) => e.isFile() && /\.ya?ml$/.test(e.name))
+        .map((e) => e.name);
+    for (const wf of wfFiles) {
+        const content = await tryRead(join(root, ".github", "workflows", wf));
+        if (content === null)
+            continue;
+        add(`ci-workflow:${wf}`, `.github/workflows/${wf} is present. Structural summary: ` +
+            summariseByFormat(wf, content), ["ci", "workflow", wf]);
+    }
+    if (wfFiles.length > 0) {
+        add("ci:workflows-present", `CI workflow files under .github/workflows/: ${wfFiles.join(", ")}.`, ["ci", "workflow"]);
+    }
+    // ── 4. README — first meaningful paragraph (any language) ─────────
+    const readme = await readReadmeHead(root);
+    if (readme) {
+        add("readme:headline", `README opening: ${readme}`, ["readme", "docs"]);
+    }
+    repo.setIngestedAt(CATEGORY, Date.now());
+    return { facts: n };
+}
+/* ─── format-based summarisers (never language-specific) ────────────── */
+/**
+ * Summarise a config/manifest file by its on-disk FORMAT only. We
+ * look at the file extension (and a couple of well-known extensionless
+ * names) to pick a structural extractor. No file's meaning is
+ * interpreted — only its skeleton is reported.
+ */
+export function summariseByFormat(name, content) {
+    const ext = extname(name).toLowerCase();
+    const lower = name.toLowerCase();
+    if (ext === ".json" || ext === ".jsonc")
+        return summariseJson(content);
+    if (ext === ".toml")
+        return summariseToml(content);
+    if (ext === ".yaml" || ext === ".yml")
+        return summariseYaml(content);
+    if (ext === ".xml")
+        return summariseXml(content);
+    if (lower === "makefile" ||
+        lower === "justfile" ||
+        lower === "dockerfile" ||
+        name === "BUILD" ||
+        name === "WORKSPACE") {
+        return summariseLineOriented(name, content);
+    }
+    // Unknown / plain — report size and first non-empty line.
+    return summarisePlain(content);
+}
+function summariseJson(content) {
+    try {
+        const obj = JSON.parse(stripJsonComments(content));
+        if (obj && typeof obj === "object" && !Array.isArray(obj)) {
+            const keys = Object.keys(obj);
+            const annotated = keys.slice(0, 25).map((k) => {
+                const v = obj[k];
+                if (Array.isArray(v))
+                    return `${k}[${v.length}]`;
+                if (v && typeof v === "object") {
+                    return `${k}{${Object.keys(v).length}}`;
+                }
+                return k;
+            });
+            return (`JSON object, top-level keys: ${annotated.join(", ")}` +
+                (keys.length > 25 ? `, … (+${keys.length - 25})` : ""));
+        }
+        if (Array.isArray(obj))
+            return `JSON array of ${obj.length} items`;
+        return "JSON scalar value";
+    }
+    catch {
+        return `unparseable JSON (${countLines(content)} lines)`;
+    }
+}
+function summariseToml(content) {
+    // Section headers: [section] and [[array.of.tables]].
+    const sections = [];
+    for (const line of content.split("\n")) {
+        const m = line.match(/^\s*(\[\[?[^\]]+\]\]?)/);
+        if (m)
+            sections.push(m[1]);
+    }
+    // Bare top-level keys before the first section.
+    const topKeys = [];
+    for (const line of content.split("\n")) {
+        if (/^\s*\[/.test(line))
+            break;
+        const m = line.match(/^\s*([A-Za-z0-9_.-]+)\s*=/);
+        if (m)
+            topKeys.push(m[1]);
+    }
+    const parts = [];
+    if (topKeys.length)
+        parts.push(`top-level keys: ${topKeys.slice(0, 12).join(", ")}`);
+    if (sections.length) {
+        parts.push(`sections: ${sections.slice(0, 20).join(", ")}` +
+            (sections.length > 20 ? `, … (+${sections.length - 20})` : ""));
+    }
+    return parts.length
+        ? `TOML — ${parts.join("; ")}`
+        : `TOML (${countLines(content)} lines, no sections detected)`;
+}
+function summariseYaml(content) {
+    // Top-level keys = lines matching `key:` at zero indentation.
+    const keys = [];
+    for (const line of content.split("\n")) {
+        const m = line.match(/^([A-Za-z0-9_.-]+):/);
+        if (m)
+            keys.push(m[1]);
+    }
+    if (keys.length === 0) {
+        return `YAML (${countLines(content)} lines, no top-level keys detected)`;
+    }
+    return (`YAML — top-level keys: ${keys.slice(0, 20).join(", ")}` +
+        (keys.length > 20 ? `, … (+${keys.length - 20})` : ""));
+}
+function summariseXml(content) {
+    const root = content.match(/<([A-Za-z_][\w.-]*)[\s>]/)?.[1];
+    if (!root)
+        return `XML (${countLines(content)} lines)`;
+    // Immediate-ish child tags (first occurrence of each distinct tag).
+    const childTags = new Set();
+    for (const m of content.matchAll(/<([A-Za-z_][\w.-]*)[\s>/]/g)) {
+        if (m[1] !== root)
+            childTags.add(m[1]);
+        if (childTags.size >= 15)
+            break;
+    }
+    return (`XML — root <${root}>, child tags seen: ${Array.from(childTags).join(", ") || "(none)"}`);
+}
+function summariseLineOriented(name, content) {
+    const lines = content.split("\n");
+    const lower = name.toLowerCase();
+    if (lower === "makefile" || lower === "justfile") {
+        // Target-looking lines: "name:" at column 0 (not ".PHONY" etc.).
+        const targets = [];
+        for (const line of lines) {
+            const m = line.match(/^([A-Za-z0-9_][\w./-]*)\s*:/);
+            if (m && !m[1].startsWith("."))
+                targets.push(m[1]);
+        }
+        return targets.length
+            ? `${name} — targets: ${targets.slice(0, 20).join(", ")}` +
+                (targets.length > 20 ? `, … (+${targets.length - 20})` : "")
+            : `${name} (${lines.length} lines, no targets detected)`;
+    }
+    if (lower === "dockerfile") {
+        // Instruction keywords used (FROM, RUN, COPY, ...).
+        const instr = new Set();
+        for (const line of lines) {
+            const m = line.match(/^\s*([A-Z]+)\s/);
+            if (m)
+                instr.add(m[1]);
+        }
+        return `Dockerfile — instructions used: ${Array.from(instr).join(", ") || "(none)"}`;
+    }
+    return `${name} (${lines.length} lines)`;
+}
+function summarisePlain(content) {
+    const lines = content.split("\n");
+    const firstNonEmpty = lines.find((l) => l.trim().length > 0) ?? "";
+    return (`${lines.length} lines; first line: ` +
+        `"${truncate(firstNonEmpty.trim(), 100)}"`);
+}
+/**
+ * Bounded recursive walk producing a file-extension histogram. This
+ * is purely structural — it reports what file types physically exist
+ * and where, never interpreting them. SKIP_DIRS are pruned. Depth and
+ * total file count are capped so this stays fast on huge monorepos.
+ */
+async function treeCensus(root) {
+    const MAX_DEPTH = 8;
+    const MAX_FILES = 20_000;
+    const extCounts = new Map();
+    const dirFileCounts = new Map();
+    let totalFiles = 0;
+    let totalDirs = 0;
+    async function walk(dir, rel, depth) {
+        if (depth > MAX_DEPTH || totalFiles >= MAX_FILES)
+            return;
+        const entries = await safeReaddir(dir);
+        let filesHere = 0;
+        for (const e of entries) {
+            if (e.isDirectory()) {
+                if (SKIP_DIRS.has(e.name) || e.name.startsWith("."))
+                    continue;
+                totalDirs += 1;
+                await walk(join(dir, e.name), rel ? `${rel}/${e.name}` : e.name, depth + 1);
+            }
+            else if (e.isFile()) {
+                if (totalFiles >= MAX_FILES)
+                    break;
+                totalFiles += 1;
+                filesHere += 1;
+                const ext = extname(e.name).toLowerCase() || "(no-ext)";
+                extCounts.set(ext, (extCounts.get(ext) ?? 0) + 1);
+            }
+        }
+        if (filesHere > 0)
+            dirFileCounts.set(rel || ".", filesHere);
+    }
+    await walk(root, "", 0);
+    const topDirs = Array.from(dirFileCounts.entries())
+        .sort((a, b) => b[1] - a[1])
+        .slice(0, 6)
+        .map(([d, n]) => `${d} (${n})`);
+    return { totalFiles, totalDirs, extCounts, topDirs };
+}
+async function safeReaddir(dir) {
+    try {
+        return await readdir(dir, { withFileTypes: true });
+    }
+    catch {
+        return [];
+    }
+}
+async function tryRead(path) {
+    try {
+        const s = await stat(path);
+        if (!s.isFile())
+            return null;
+        // Don't pull huge files into memory for a structural summary.
+        if (s.size > 512 * 1024) {
+            return `__OVERSIZE__${s.size}`;
+        }
+        return await readFile(path, "utf-8");
+    }
+    catch {
+        return null;
+    }
+}
+async function readReadmeHead(root) {
+    for (const name of [
+        "README.md",
+        "README.rst",
+        "README.txt",
+        "README",
+        "readme.md",
+    ]) {
+        const text = await tryRead(join(root, name));
+        if (!text || text.startsWith("__OVERSIZE__"))
+            continue;
+        for (const line of text.split("\n")) {
+            const cleaned = line.replace(/^[#=\-*\s>]+/, "").trim();
+            if (cleaned.length > 0) {
+                return cleaned.length > 200 ? cleaned.slice(0, 197) + "…" : cleaned;
+            }
+        }
+    }
+    return null;
+}
+function stripJsonComments(s) {
+    // Tolerate JSONC: strip // line comments and /* */ blocks. Naive but
+    // good enough for a structural summary (we only need Object.keys).
+    return s
+        .replace(/\/\*[\s\S]*?\*\//g, "")
+        .replace(/(^|[^:])\/\/.*$/gm, "$1");
+}
+function countLines(s) {
+    return s.split("\n").length;
+}
+function truncate(s, n) {
+    return s.length <= n ? s : s.slice(0, n - 1) + "…";
+}

package/dist/ingest/session-snapshot.d.ts ADDED Viewed

@@ -0,0 +1,63 @@
+/**
+ * Session snapshots — branchable, versioned "understanding" carried
+ * across sessions.
+ *
+ * The `session-trace` category already records what a *past* session
+ * physically did (files edited, commands run). A snapshot records
+ * something different and harder-won: the *understanding* a session
+ * built up — the mental model, the decisions made, the conventions
+ * learned — the stuff that is normally lost when a context window
+ * fills and compacts.
+ *
+ * This is the harness-side, no-model translation of the
+ * "contextual memory virtualisation" idea: instead of a DAG data
+ * structure, each snapshot is one pinned memory, and the parent link
+ * is just a `parent:<id>` tag. The set of snapshots and their parent
+ * tags *is* the DAG — readable, hand-editable, no new storage shape.
+ *
+ *   - A later session resumes from the most recent snapshot.
+ *   - A parallel session reads the same shared store, so it forks
+ *     from the same point automatically.
+ *   - Recording a new snapshot that tags an older one as `parent`
+ *     is a branch.
+ *
+ * Snapshots are pinned, so the LFU disk-budget eviction never drops
+ * them — accumulated understanding outlives transient facts.
+ */
+import type { Memory } from "../types.js";
+import type { MemoryRepository } from "../store/repository.js";
+/** Structured payload the agent supplies when taking a snapshot. */
+export interface SnapshotInput {
+    /** One short paragraph: the working mental model of the codebase/task. */
+    summary: string;
+    /** Decisions made and why — each a short line. */
+    decisions?: string[];
+    /** Conventions/constraints learned that aren't obvious from the code. */
+    conventions?: string[];
+}
+export interface SnapshotWriteResult {
+    id: string;
+    parentId: string | null;
+}
+/**
+ * Record a session snapshot. `sessionId` keys it; if a snapshot for
+ * the same session already exists it is replaced (a session's
+ * understanding is updated in place, not duplicated). The most recent
+ * *other* session's snapshot is recorded as the `parent` — that link
+ * is what makes the snapshot set a branchable history.
+ */
+export declare function writeSnapshot(repo: MemoryRepository, sessionId: string, input: SnapshotInput): SnapshotWriteResult;
+/**
+ * The most recent snapshot to resume from — the newest snapshot that
+ * does NOT belong to `excludeSessionId` (so a session never resumes
+ * from itself). Returns null when there are no prior snapshots.
+ */
+export declare function latestSnapshot(repo: MemoryRepository, excludeSessionId?: string): Memory | null;
+/**
+ * A compact, human-readable lineage for `memory_status` / logs:
+ * how many snapshots exist and when the most recent was taken.
+ */
+export declare function snapshotSummary(repo: MemoryRepository): {
+    count: number;
+    latestAt: number | null;
+};