opencode-diane 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/CHANGELOG.md +180 -0
  2. package/LICENSE +21 -0
  3. package/README.md +206 -0
  4. package/WIKI.md +1430 -0
  5. package/dist/index.d.ts +28 -0
  6. package/dist/index.js +1632 -0
  7. package/dist/ingest/adaptive.d.ts +47 -0
  8. package/dist/ingest/adaptive.js +182 -0
  9. package/dist/ingest/code-health.d.ts +58 -0
  10. package/dist/ingest/code-health.js +202 -0
  11. package/dist/ingest/code-map.d.ts +71 -0
  12. package/dist/ingest/code-map.js +670 -0
  13. package/dist/ingest/cross-refs.d.ts +59 -0
  14. package/dist/ingest/cross-refs.js +1207 -0
  15. package/dist/ingest/docs.d.ts +49 -0
  16. package/dist/ingest/docs.js +325 -0
  17. package/dist/ingest/git.d.ts +77 -0
  18. package/dist/ingest/git.js +390 -0
  19. package/dist/ingest/live-session.d.ts +101 -0
  20. package/dist/ingest/live-session.js +173 -0
  21. package/dist/ingest/project-notes.d.ts +28 -0
  22. package/dist/ingest/project-notes.js +102 -0
  23. package/dist/ingest/project.d.ts +35 -0
  24. package/dist/ingest/project.js +430 -0
  25. package/dist/ingest/session-snapshot.d.ts +63 -0
  26. package/dist/ingest/session-snapshot.js +94 -0
  27. package/dist/ingest/sessions.d.ts +29 -0
  28. package/dist/ingest/sessions.js +164 -0
  29. package/dist/ingest/tables.d.ts +52 -0
  30. package/dist/ingest/tables.js +360 -0
  31. package/dist/mining/skill-miner.d.ts +53 -0
  32. package/dist/mining/skill-miner.js +234 -0
  33. package/dist/search/bm25.d.ts +81 -0
  34. package/dist/search/bm25.js +334 -0
  35. package/dist/search/e5-embedder.d.ts +30 -0
  36. package/dist/search/e5-embedder.js +91 -0
  37. package/dist/search/embed-pass.d.ts +26 -0
  38. package/dist/search/embed-pass.js +43 -0
  39. package/dist/search/embedder.d.ts +58 -0
  40. package/dist/search/embedder.js +85 -0
  41. package/dist/search/inverted-index.d.ts +51 -0
  42. package/dist/search/inverted-index.js +139 -0
  43. package/dist/search/ppr.d.ts +44 -0
  44. package/dist/search/ppr.js +118 -0
  45. package/dist/search/tokenize.d.ts +26 -0
  46. package/dist/search/tokenize.js +98 -0
  47. package/dist/store/eviction.d.ts +16 -0
  48. package/dist/store/eviction.js +37 -0
  49. package/dist/store/repository.d.ts +222 -0
  50. package/dist/store/repository.js +420 -0
  51. package/dist/store/sqlite-store.d.ts +89 -0
  52. package/dist/store/sqlite-store.js +252 -0
  53. package/dist/store/vector-store.d.ts +66 -0
  54. package/dist/store/vector-store.js +160 -0
  55. package/dist/types.d.ts +385 -0
  56. package/dist/types.js +9 -0
  57. package/dist/utils/file-log.d.ts +87 -0
  58. package/dist/utils/file-log.js +215 -0
  59. package/dist/utils/peer-detection.d.ts +45 -0
  60. package/dist/utils/peer-detection.js +90 -0
  61. package/dist/utils/shell.d.ts +43 -0
  62. package/dist/utils/shell.js +110 -0
  63. package/dist/utils/usage-skill.d.ts +42 -0
  64. package/dist/utils/usage-skill.js +129 -0
  65. package/dist/utils/xlsx.d.ts +36 -0
  66. package/dist/utils/xlsx.js +270 -0
  67. package/grammars/tree-sitter-c.wasm +0 -0
  68. package/grammars/tree-sitter-c_sharp.wasm +0 -0
  69. package/grammars/tree-sitter-cpp.wasm +0 -0
  70. package/grammars/tree-sitter-css.wasm +0 -0
  71. package/grammars/tree-sitter-go.wasm +0 -0
  72. package/grammars/tree-sitter-html.wasm +0 -0
  73. package/grammars/tree-sitter-java.wasm +0 -0
  74. package/grammars/tree-sitter-javascript.wasm +0 -0
  75. package/grammars/tree-sitter-json.wasm +0 -0
  76. package/grammars/tree-sitter-php.wasm +0 -0
  77. package/grammars/tree-sitter-python.wasm +0 -0
  78. package/grammars/tree-sitter-rust.wasm +0 -0
  79. package/grammars/tree-sitter-typescript.wasm +0 -0
  80. package/package.json +80 -0
@@ -0,0 +1,102 @@
1
+ /**
2
+ * project-notes.ts — ingest the small set of root-level files where
3
+ * humans put house rules for AI agents.
4
+ *
5
+ * These are the files an agent should know about WITHIN THE FIRST
6
+ * RECALL of a session: AGENTS.md, CLAUDE.md, GEMINI.md, .cursorrules,
7
+ * .windsurfrules, COPILOT.md. They typically contain "in this repo,
8
+ * always do X, never do Y, our naming convention is Z" — exactly the
9
+ * kind of facts that, missed, lead to revert PRs.
10
+ *
11
+ * **Whole-file content, not headings.** Unlike `docs.ts` (which slices
12
+ * into sections), these files are short (typically under 4 KB) and
13
+ * their structure is rarely worth indexing — every line might be
14
+ * load-bearing. One memory per file with the full content (truncated
15
+ * to MAX_NOTE_BYTES) is the right granularity.
16
+ *
17
+ * **Root-level only.** No recursion. A `monorepo-package/.cursorrules`
18
+ * is a per-package instruction that belongs to the package's owner,
19
+ * not Diane.
20
+ */
21
+ import { readFile, stat } from "node:fs/promises";
22
+ import { join } from "node:path";
23
+ const CATEGORY = "project-facts";
24
+ /** The files we look for, with friendly display names. The list is
25
+ * intentionally conservative — only files that are conventionally
26
+ * written for human consumption by AI agents, not arbitrary config
27
+ * files. */
28
+ const NOTE_FILES = [
29
+ { name: "AGENTS.md", label: "AGENTS.md (OpenCode agent instructions)", tags: ["agents", "opencode"] },
30
+ { name: "CLAUDE.md", label: "CLAUDE.md (Claude Code instructions)", tags: ["claude-code", "anthropic"] },
31
+ { name: "GEMINI.md", label: "GEMINI.md (Gemini Code instructions)", tags: ["gemini", "google"] },
32
+ { name: "COPILOT.md", label: "COPILOT.md (GitHub Copilot instructions)", tags: ["copilot", "github"] },
33
+ { name: "CONVENTIONS.md", label: "CONVENTIONS.md (project conventions)", tags: ["conventions"] },
34
+ { name: ".cursorrules", label: ".cursorrules (Cursor IDE rules)", tags: ["cursor"] },
35
+ { name: ".windsurfrules", label: ".windsurfrules (Windsurf rules)", tags: ["windsurf"] },
36
+ { name: ".clinerules", label: ".clinerules (Cline agent rules)", tags: ["cline"] },
37
+ ];
38
+ /** Truncate point. Most agent-instruction files are well under this;
39
+ * the few that aren't typically pad with examples or rationale that
40
+ * the agent can `read` directly if needed. We index the lede. */
41
+ const MAX_NOTE_BYTES = 6 * 1024;
42
+ export async function ingestProjectNotes(repo, root, opts = {}) {
43
+ let filesFound = 0;
44
+ const maxBytes = Math.max(256, Math.round(opts.maxBytes ?? MAX_NOTE_BYTES));
45
+ const allTags = new Set(["agent-instructions", "house-rules"]);
46
+ for (const { name, label, tags } of NOTE_FILES) {
47
+ const abs = join(root, name);
48
+ let content;
49
+ try {
50
+ const s = await stat(abs);
51
+ if (!s.isFile())
52
+ continue;
53
+ const raw = await readFile(abs, "utf-8");
54
+ content =
55
+ raw.length > maxBytes
56
+ ? raw.slice(0, maxBytes - 1).trimEnd() + "\n…[truncated; read the file directly for the rest]"
57
+ : raw;
58
+ }
59
+ catch {
60
+ continue;
61
+ }
62
+ filesFound += 1;
63
+ for (const t of tags)
64
+ allTags.add(t);
65
+ repo.insertIfMissing({
66
+ category: CATEGORY,
67
+ // `agent-instructions:<name>` is a stable subject the agent
68
+ // can also match on directly with `memory_recall { query:
69
+ // "agent instructions" }`.
70
+ subject: `agent-instructions:${name}`,
71
+ content: `${label}\n${"─".repeat(label.length)}\n${content}`,
72
+ tags: ["agent-instructions", "house-rules", ...tags],
73
+ source: "project-notes-ingest",
74
+ });
75
+ }
76
+ // One summary memory: "this repo has these instruction files" — so
77
+ // an agent that just ran a categorical recall ("what should I know
78
+ // about this repo?") sees a directory of the instruction files at
79
+ // a glance, even if none of the individual notes happened to match
80
+ // its query keywords.
81
+ if (filesFound > 0) {
82
+ const present = [];
83
+ for (const { name } of NOTE_FILES) {
84
+ try {
85
+ const s = await stat(join(root, name));
86
+ if (s.isFile())
87
+ present.push(name);
88
+ }
89
+ catch { /* not present */ }
90
+ }
91
+ repo.insertIfMissing({
92
+ category: CATEGORY,
93
+ subject: "agent-instructions:directory",
94
+ content: `This repository has the following agent-instruction files in its root: ` +
95
+ `${present.join(", ")}. These typically contain conventions, rules, and ` +
96
+ `house style the agent should follow. Read them before making large changes.`,
97
+ tags: Array.from(allTags),
98
+ source: "project-notes-ingest",
99
+ });
100
+ }
101
+ return { filesFound };
102
+ }
@@ -0,0 +1,35 @@
1
+ /**
2
+ * Project-structure ingestion — fully language-agnostic.
3
+ *
4
+ * The earlier version was a Python parser: it understood
5
+ * pyproject.toml's dependency model, pytest config sections, Flask /
6
+ * FastAPI sentinels. None of that transfers to a Rust, Go, Elixir, or
7
+ * C++ repo.
8
+ *
9
+ * This version commits to a strict rule: recognise files by NAME
10
+ * (knowing that `Cargo.toml` is Rust's manifest is a *fact*, like
11
+ * knowing a file extension — not a "convention" in the cultural
12
+ * sense), but summarise them only by FORMAT — JSON → top-level keys,
13
+ * TOML → section headers, YAML → top-level keys, etc. We never reach
14
+ * into a manifest's language-specific semantics.
15
+ *
16
+ * What the agent gets: an orientation map — the repo's shape, which
17
+ * recognised project/build/CI files exist, and the structural
18
+ * skeleton of each. If it needs the actual contents it can `read`
19
+ * the file; the memory's job is to point, not to parse.
20
+ *
21
+ * Everything here works identically on any repository regardless of
22
+ * language or tooling.
23
+ */
24
+ import type { MemoryRepository } from "../store/repository.js";
25
+ export interface ProjectIngestResult {
26
+ facts: number;
27
+ }
28
+ export declare function ingestProjectFacts(repo: MemoryRepository, root: string): Promise<ProjectIngestResult>;
29
+ /**
30
+ * Summarise a config/manifest file by its on-disk FORMAT only. We
31
+ * look at the file extension (and a couple of well-known extensionless
32
+ * names) to pick a structural extractor. No file's meaning is
33
+ * interpreted — only its skeleton is reported.
34
+ */
35
+ export declare function summariseByFormat(name: string, content: string): string;
@@ -0,0 +1,430 @@
1
+ /**
2
+ * Project-structure ingestion — fully language-agnostic.
3
+ *
4
+ * The earlier version was a Python parser: it understood
5
+ * pyproject.toml's dependency model, pytest config sections, Flask /
6
+ * FastAPI sentinels. None of that transfers to a Rust, Go, Elixir, or
7
+ * C++ repo.
8
+ *
9
+ * This version commits to a strict rule: recognise files by NAME
10
+ * (knowing that `Cargo.toml` is Rust's manifest is a *fact*, like
11
+ * knowing a file extension — not a "convention" in the cultural
12
+ * sense), but summarise them only by FORMAT — JSON → top-level keys,
13
+ * TOML → section headers, YAML → top-level keys, etc. We never reach
14
+ * into a manifest's language-specific semantics.
15
+ *
16
+ * What the agent gets: an orientation map — the repo's shape, which
17
+ * recognised project/build/CI files exist, and the structural
18
+ * skeleton of each. If it needs the actual contents it can `read`
19
+ * the file; the memory's job is to point, not to parse.
20
+ *
21
+ * Everything here works identically on any repository regardless of
22
+ * language or tooling.
23
+ */
24
+ import { readdir, readFile, stat } from "node:fs/promises";
25
+ import { extname, join } from "node:path";
26
+ const CATEGORY = "project-facts";
27
+ // Directories never worth walking into for an orientation summary.
28
+ const SKIP_DIRS = new Set([
29
+ ".git",
30
+ "node_modules",
31
+ ".venv",
32
+ "venv",
33
+ "__pycache__",
34
+ ".tox",
35
+ ".mypy_cache",
36
+ ".pytest_cache",
37
+ ".ruff_cache",
38
+ "dist",
39
+ "build",
40
+ "target",
41
+ ".idea",
42
+ ".vscode",
43
+ "vendor",
44
+ ".gradle",
45
+ ".next",
46
+ ".svelte-kit",
47
+ "coverage",
48
+ ]);
49
+ /**
50
+ * Files whose NAME identifies them as a project manifest / build
51
+ * descriptor / CI config / tooling config. This is a flat,
52
+ * language-neutral list — recognising the name is a fact, not a
53
+ * cultural assumption. We do not assume anything about their content
54
+ * beyond their on-disk format.
55
+ */
56
+ const RECOGNISED_FILES = [
57
+ // package / build manifests across ecosystems
58
+ "package.json",
59
+ "deno.json",
60
+ "deno.jsonc",
61
+ "Cargo.toml",
62
+ "go.mod",
63
+ "pyproject.toml",
64
+ "setup.py",
65
+ "setup.cfg",
66
+ "requirements.txt",
67
+ "Pipfile",
68
+ "pom.xml",
69
+ "build.gradle",
70
+ "build.gradle.kts",
71
+ "settings.gradle",
72
+ "Gemfile",
73
+ "composer.json",
74
+ "mix.exs",
75
+ "Package.swift",
76
+ "pubspec.yaml",
77
+ "CMakeLists.txt",
78
+ "Makefile",
79
+ "makefile",
80
+ "meson.build",
81
+ "build.zig",
82
+ "build.sbt",
83
+ "project.clj",
84
+ "rebar.config",
85
+ "dune-project",
86
+ "stack.yaml",
87
+ "cabal.project",
88
+ "BUILD",
89
+ "BUILD.bazel",
90
+ "WORKSPACE",
91
+ "flake.nix",
92
+ "default.nix",
93
+ // CI / automation
94
+ ".gitlab-ci.yml",
95
+ "Jenkinsfile",
96
+ ".travis.yml",
97
+ "azure-pipelines.yml",
98
+ "Taskfile.yml",
99
+ "justfile",
100
+ // containers / tooling
101
+ "Dockerfile",
102
+ "docker-compose.yml",
103
+ "docker-compose.yaml",
104
+ "compose.yaml",
105
+ ".editorconfig",
106
+ ];
107
+ export async function ingestProjectFacts(repo, root) {
108
+ let n = 0;
109
+ const add = (subject, content, tags) => {
110
+ repo.insertIfMissing({
111
+ category: CATEGORY,
112
+ subject,
113
+ content,
114
+ tags,
115
+ source: "project-ingest",
116
+ });
117
+ n += 1;
118
+ };
119
+ // ── 1. Top-level layout ───────────────────────────────────────────
120
+ const top = await safeReaddir(root);
121
+ const dirs = [];
122
+ const files = [];
123
+ for (const e of top) {
124
+ if (e.isDirectory()) {
125
+ if (!SKIP_DIRS.has(e.name) && !e.name.startsWith("."))
126
+ dirs.push(e.name);
127
+ }
128
+ else if (e.isFile()) {
129
+ files.push(e.name);
130
+ }
131
+ }
132
+ add("layout:top-level", `Repository root contains directories: ` +
133
+ `${dirs.length ? dirs.sort().join(", ") : "(none)"}. ` +
134
+ `Notable root files: ${files.length ? files.sort().slice(0, 25).join(", ") : "(none)"}.`, ["layout", "structure"]);
135
+ // ── 1b. File-extension histogram across the whole tree ────────────
136
+ // This is the single most reliable, zero-convention signal for
137
+ // "what kind of repo is this": the language(s) emerge from the data
138
+ // itself. Works for polyglot repos and for repos with no recognised
139
+ // manifest at all. Bounded walk; SKIP_DIRS pruned.
140
+ const census = await treeCensus(root);
141
+ if (census.totalFiles > 0) {
142
+ const extRanked = Array.from(census.extCounts.entries())
143
+ .sort((a, b) => b[1] - a[1])
144
+ .slice(0, 20)
145
+ .map(([ext, n]) => `${ext}×${n}`);
146
+ add("layout:file-types", `File-type census of ${census.totalFiles} files across ${census.totalDirs} ` +
147
+ `directories (extension × count, most common first): ${extRanked.join(", ")}` +
148
+ (census.extCounts.size > 20 ? `, … (+${census.extCounts.size - 20} more types)` : "") +
149
+ `. Largest directories by file count: ${census.topDirs.join(", ")}.`, ["layout", "file-types", "languages"]);
150
+ }
151
+ // ── 2. Recognised project/build/CI files — format-based summary ───
152
+ const presentRecognised = [];
153
+ for (const name of RECOGNISED_FILES) {
154
+ const full = join(root, name);
155
+ const content = await tryRead(full);
156
+ if (content === null)
157
+ continue;
158
+ presentRecognised.push(name);
159
+ const summary = summariseByFormat(name, content);
160
+ add(`file:${name}`, `${name} is present. Structural summary: ${summary}`, ["project-file", name]);
161
+ }
162
+ if (presentRecognised.length > 0) {
163
+ add("manifests:present", `Recognised project/build/CI files in this repo: ${presentRecognised.join(", ")}.`, ["manifests", "project-file"]);
164
+ }
165
+ // ── 3. CI workflow directory (GitHub Actions et al.) ──────────────
166
+ const ghWorkflows = await safeReaddir(join(root, ".github", "workflows"));
167
+ const wfFiles = ghWorkflows
168
+ .filter((e) => e.isFile() && /\.ya?ml$/.test(e.name))
169
+ .map((e) => e.name);
170
+ for (const wf of wfFiles) {
171
+ const content = await tryRead(join(root, ".github", "workflows", wf));
172
+ if (content === null)
173
+ continue;
174
+ add(`ci-workflow:${wf}`, `.github/workflows/${wf} is present. Structural summary: ` +
175
+ summariseByFormat(wf, content), ["ci", "workflow", wf]);
176
+ }
177
+ if (wfFiles.length > 0) {
178
+ add("ci:workflows-present", `CI workflow files under .github/workflows/: ${wfFiles.join(", ")}.`, ["ci", "workflow"]);
179
+ }
180
+ // ── 4. README — first meaningful paragraph (any language) ─────────
181
+ const readme = await readReadmeHead(root);
182
+ if (readme) {
183
+ add("readme:headline", `README opening: ${readme}`, ["readme", "docs"]);
184
+ }
185
+ repo.setIngestedAt(CATEGORY, Date.now());
186
+ return { facts: n };
187
+ }
188
+ /* ─── format-based summarisers (never language-specific) ────────────── */
189
+ /**
190
+ * Summarise a config/manifest file by its on-disk FORMAT only. We
191
+ * look at the file extension (and a couple of well-known extensionless
192
+ * names) to pick a structural extractor. No file's meaning is
193
+ * interpreted — only its skeleton is reported.
194
+ */
195
+ export function summariseByFormat(name, content) {
196
+ const ext = extname(name).toLowerCase();
197
+ const lower = name.toLowerCase();
198
+ if (ext === ".json" || ext === ".jsonc")
199
+ return summariseJson(content);
200
+ if (ext === ".toml")
201
+ return summariseToml(content);
202
+ if (ext === ".yaml" || ext === ".yml")
203
+ return summariseYaml(content);
204
+ if (ext === ".xml")
205
+ return summariseXml(content);
206
+ if (lower === "makefile" ||
207
+ lower === "justfile" ||
208
+ lower === "dockerfile" ||
209
+ name === "BUILD" ||
210
+ name === "WORKSPACE") {
211
+ return summariseLineOriented(name, content);
212
+ }
213
+ // Unknown / plain — report size and first non-empty line.
214
+ return summarisePlain(content);
215
+ }
216
+ function summariseJson(content) {
217
+ try {
218
+ const obj = JSON.parse(stripJsonComments(content));
219
+ if (obj && typeof obj === "object" && !Array.isArray(obj)) {
220
+ const keys = Object.keys(obj);
221
+ const annotated = keys.slice(0, 25).map((k) => {
222
+ const v = obj[k];
223
+ if (Array.isArray(v))
224
+ return `${k}[${v.length}]`;
225
+ if (v && typeof v === "object") {
226
+ return `${k}{${Object.keys(v).length}}`;
227
+ }
228
+ return k;
229
+ });
230
+ return (`JSON object, top-level keys: ${annotated.join(", ")}` +
231
+ (keys.length > 25 ? `, … (+${keys.length - 25})` : ""));
232
+ }
233
+ if (Array.isArray(obj))
234
+ return `JSON array of ${obj.length} items`;
235
+ return "JSON scalar value";
236
+ }
237
+ catch {
238
+ return `unparseable JSON (${countLines(content)} lines)`;
239
+ }
240
+ }
241
+ function summariseToml(content) {
242
+ // Section headers: [section] and [[array.of.tables]].
243
+ const sections = [];
244
+ for (const line of content.split("\n")) {
245
+ const m = line.match(/^\s*(\[\[?[^\]]+\]\]?)/);
246
+ if (m)
247
+ sections.push(m[1]);
248
+ }
249
+ // Bare top-level keys before the first section.
250
+ const topKeys = [];
251
+ for (const line of content.split("\n")) {
252
+ if (/^\s*\[/.test(line))
253
+ break;
254
+ const m = line.match(/^\s*([A-Za-z0-9_.-]+)\s*=/);
255
+ if (m)
256
+ topKeys.push(m[1]);
257
+ }
258
+ const parts = [];
259
+ if (topKeys.length)
260
+ parts.push(`top-level keys: ${topKeys.slice(0, 12).join(", ")}`);
261
+ if (sections.length) {
262
+ parts.push(`sections: ${sections.slice(0, 20).join(", ")}` +
263
+ (sections.length > 20 ? `, … (+${sections.length - 20})` : ""));
264
+ }
265
+ return parts.length
266
+ ? `TOML — ${parts.join("; ")}`
267
+ : `TOML (${countLines(content)} lines, no sections detected)`;
268
+ }
269
+ function summariseYaml(content) {
270
+ // Top-level keys = lines matching `key:` at zero indentation.
271
+ const keys = [];
272
+ for (const line of content.split("\n")) {
273
+ const m = line.match(/^([A-Za-z0-9_.-]+):/);
274
+ if (m)
275
+ keys.push(m[1]);
276
+ }
277
+ if (keys.length === 0) {
278
+ return `YAML (${countLines(content)} lines, no top-level keys detected)`;
279
+ }
280
+ return (`YAML — top-level keys: ${keys.slice(0, 20).join(", ")}` +
281
+ (keys.length > 20 ? `, … (+${keys.length - 20})` : ""));
282
+ }
283
+ function summariseXml(content) {
284
+ const root = content.match(/<([A-Za-z_][\w.-]*)[\s>]/)?.[1];
285
+ if (!root)
286
+ return `XML (${countLines(content)} lines)`;
287
+ // Immediate-ish child tags (first occurrence of each distinct tag).
288
+ const childTags = new Set();
289
+ for (const m of content.matchAll(/<([A-Za-z_][\w.-]*)[\s>/]/g)) {
290
+ if (m[1] !== root)
291
+ childTags.add(m[1]);
292
+ if (childTags.size >= 15)
293
+ break;
294
+ }
295
+ return (`XML — root <${root}>, child tags seen: ${Array.from(childTags).join(", ") || "(none)"}`);
296
+ }
297
+ function summariseLineOriented(name, content) {
298
+ const lines = content.split("\n");
299
+ const lower = name.toLowerCase();
300
+ if (lower === "makefile" || lower === "justfile") {
301
+ // Target-looking lines: "name:" at column 0 (not ".PHONY" etc.).
302
+ const targets = [];
303
+ for (const line of lines) {
304
+ const m = line.match(/^([A-Za-z0-9_][\w./-]*)\s*:/);
305
+ if (m && !m[1].startsWith("."))
306
+ targets.push(m[1]);
307
+ }
308
+ return targets.length
309
+ ? `${name} — targets: ${targets.slice(0, 20).join(", ")}` +
310
+ (targets.length > 20 ? `, … (+${targets.length - 20})` : "")
311
+ : `${name} (${lines.length} lines, no targets detected)`;
312
+ }
313
+ if (lower === "dockerfile") {
314
+ // Instruction keywords used (FROM, RUN, COPY, ...).
315
+ const instr = new Set();
316
+ for (const line of lines) {
317
+ const m = line.match(/^\s*([A-Z]+)\s/);
318
+ if (m)
319
+ instr.add(m[1]);
320
+ }
321
+ return `Dockerfile — instructions used: ${Array.from(instr).join(", ") || "(none)"}`;
322
+ }
323
+ return `${name} (${lines.length} lines)`;
324
+ }
325
+ function summarisePlain(content) {
326
+ const lines = content.split("\n");
327
+ const firstNonEmpty = lines.find((l) => l.trim().length > 0) ?? "";
328
+ return (`${lines.length} lines; first line: ` +
329
+ `"${truncate(firstNonEmpty.trim(), 100)}"`);
330
+ }
331
+ /**
332
+ * Bounded recursive walk producing a file-extension histogram. This
333
+ * is purely structural — it reports what file types physically exist
334
+ * and where, never interpreting them. SKIP_DIRS are pruned. Depth and
335
+ * total file count are capped so this stays fast on huge monorepos.
336
+ */
337
+ async function treeCensus(root) {
338
+ const MAX_DEPTH = 8;
339
+ const MAX_FILES = 20_000;
340
+ const extCounts = new Map();
341
+ const dirFileCounts = new Map();
342
+ let totalFiles = 0;
343
+ let totalDirs = 0;
344
+ async function walk(dir, rel, depth) {
345
+ if (depth > MAX_DEPTH || totalFiles >= MAX_FILES)
346
+ return;
347
+ const entries = await safeReaddir(dir);
348
+ let filesHere = 0;
349
+ for (const e of entries) {
350
+ if (e.isDirectory()) {
351
+ if (SKIP_DIRS.has(e.name) || e.name.startsWith("."))
352
+ continue;
353
+ totalDirs += 1;
354
+ await walk(join(dir, e.name), rel ? `${rel}/${e.name}` : e.name, depth + 1);
355
+ }
356
+ else if (e.isFile()) {
357
+ if (totalFiles >= MAX_FILES)
358
+ break;
359
+ totalFiles += 1;
360
+ filesHere += 1;
361
+ const ext = extname(e.name).toLowerCase() || "(no-ext)";
362
+ extCounts.set(ext, (extCounts.get(ext) ?? 0) + 1);
363
+ }
364
+ }
365
+ if (filesHere > 0)
366
+ dirFileCounts.set(rel || ".", filesHere);
367
+ }
368
+ await walk(root, "", 0);
369
+ const topDirs = Array.from(dirFileCounts.entries())
370
+ .sort((a, b) => b[1] - a[1])
371
+ .slice(0, 6)
372
+ .map(([d, n]) => `${d} (${n})`);
373
+ return { totalFiles, totalDirs, extCounts, topDirs };
374
+ }
375
+ async function safeReaddir(dir) {
376
+ try {
377
+ return await readdir(dir, { withFileTypes: true });
378
+ }
379
+ catch {
380
+ return [];
381
+ }
382
+ }
383
+ async function tryRead(path) {
384
+ try {
385
+ const s = await stat(path);
386
+ if (!s.isFile())
387
+ return null;
388
+ // Don't pull huge files into memory for a structural summary.
389
+ if (s.size > 512 * 1024) {
390
+ return `__OVERSIZE__${s.size}`;
391
+ }
392
+ return await readFile(path, "utf-8");
393
+ }
394
+ catch {
395
+ return null;
396
+ }
397
+ }
398
+ async function readReadmeHead(root) {
399
+ for (const name of [
400
+ "README.md",
401
+ "README.rst",
402
+ "README.txt",
403
+ "README",
404
+ "readme.md",
405
+ ]) {
406
+ const text = await tryRead(join(root, name));
407
+ if (!text || text.startsWith("__OVERSIZE__"))
408
+ continue;
409
+ for (const line of text.split("\n")) {
410
+ const cleaned = line.replace(/^[#=\-*\s>]+/, "").trim();
411
+ if (cleaned.length > 0) {
412
+ return cleaned.length > 200 ? cleaned.slice(0, 197) + "…" : cleaned;
413
+ }
414
+ }
415
+ }
416
+ return null;
417
+ }
418
+ function stripJsonComments(s) {
419
+ // Tolerate JSONC: strip // line comments and /* */ blocks. Naive but
420
+ // good enough for a structural summary (we only need Object.keys).
421
+ return s
422
+ .replace(/\/\*[\s\S]*?\*\//g, "")
423
+ .replace(/(^|[^:])\/\/.*$/gm, "$1");
424
+ }
425
+ function countLines(s) {
426
+ return s.split("\n").length;
427
+ }
428
+ function truncate(s, n) {
429
+ return s.length <= n ? s : s.slice(0, n - 1) + "…";
430
+ }
@@ -0,0 +1,63 @@
1
+ /**
2
+ * Session snapshots — branchable, versioned "understanding" carried
3
+ * across sessions.
4
+ *
5
+ * The `session-trace` category already records what a *past* session
6
+ * physically did (files edited, commands run). A snapshot records
7
+ * something different and harder-won: the *understanding* a session
8
+ * built up — the mental model, the decisions made, the conventions
9
+ * learned — the stuff that is normally lost when a context window
10
+ * fills and compacts.
11
+ *
12
+ * This is the harness-side, no-model translation of the
13
+ * "contextual memory virtualisation" idea: instead of a DAG data
14
+ * structure, each snapshot is one pinned memory, and the parent link
15
+ * is just a `parent:<id>` tag. The set of snapshots and their parent
16
+ * tags *is* the DAG — readable, hand-editable, no new storage shape.
17
+ *
18
+ * - A later session resumes from the most recent snapshot.
19
+ * - A parallel session reads the same shared store, so it forks
20
+ * from the same point automatically.
21
+ * - Recording a new snapshot that tags an older one as `parent`
22
+ * is a branch.
23
+ *
24
+ * Snapshots are pinned, so the LFU disk-budget eviction never drops
25
+ * them — accumulated understanding outlives transient facts.
26
+ */
27
+ import type { Memory } from "../types.js";
28
+ import type { MemoryRepository } from "../store/repository.js";
29
+ /** Structured payload the agent supplies when taking a snapshot. */
30
+ export interface SnapshotInput {
31
+ /** One short paragraph: the working mental model of the codebase/task. */
32
+ summary: string;
33
+ /** Decisions made and why — each a short line. */
34
+ decisions?: string[];
35
+ /** Conventions/constraints learned that aren't obvious from the code. */
36
+ conventions?: string[];
37
+ }
38
+ export interface SnapshotWriteResult {
39
+ id: string;
40
+ parentId: string | null;
41
+ }
42
+ /**
43
+ * Record a session snapshot. `sessionId` keys it; if a snapshot for
44
+ * the same session already exists it is replaced (a session's
45
+ * understanding is updated in place, not duplicated). The most recent
46
+ * *other* session's snapshot is recorded as the `parent` — that link
47
+ * is what makes the snapshot set a branchable history.
48
+ */
49
+ export declare function writeSnapshot(repo: MemoryRepository, sessionId: string, input: SnapshotInput): SnapshotWriteResult;
50
+ /**
51
+ * The most recent snapshot to resume from — the newest snapshot that
52
+ * does NOT belong to `excludeSessionId` (so a session never resumes
53
+ * from itself). Returns null when there are no prior snapshots.
54
+ */
55
+ export declare function latestSnapshot(repo: MemoryRepository, excludeSessionId?: string): Memory | null;
56
+ /**
57
+ * A compact, human-readable lineage for `memory_status` / logs:
58
+ * how many snapshots exist and when the most recent was taken.
59
+ */
60
+ export declare function snapshotSummary(repo: MemoryRepository): {
61
+ count: number;
62
+ latestAt: number | null;
63
+ };