opencode-diane 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/CHANGELOG.md +180 -0
  2. package/LICENSE +21 -0
  3. package/README.md +206 -0
  4. package/WIKI.md +1430 -0
  5. package/dist/index.d.ts +28 -0
  6. package/dist/index.js +1632 -0
  7. package/dist/ingest/adaptive.d.ts +47 -0
  8. package/dist/ingest/adaptive.js +182 -0
  9. package/dist/ingest/code-health.d.ts +58 -0
  10. package/dist/ingest/code-health.js +202 -0
  11. package/dist/ingest/code-map.d.ts +71 -0
  12. package/dist/ingest/code-map.js +670 -0
  13. package/dist/ingest/cross-refs.d.ts +59 -0
  14. package/dist/ingest/cross-refs.js +1207 -0
  15. package/dist/ingest/docs.d.ts +49 -0
  16. package/dist/ingest/docs.js +325 -0
  17. package/dist/ingest/git.d.ts +77 -0
  18. package/dist/ingest/git.js +390 -0
  19. package/dist/ingest/live-session.d.ts +101 -0
  20. package/dist/ingest/live-session.js +173 -0
  21. package/dist/ingest/project-notes.d.ts +28 -0
  22. package/dist/ingest/project-notes.js +102 -0
  23. package/dist/ingest/project.d.ts +35 -0
  24. package/dist/ingest/project.js +430 -0
  25. package/dist/ingest/session-snapshot.d.ts +63 -0
  26. package/dist/ingest/session-snapshot.js +94 -0
  27. package/dist/ingest/sessions.d.ts +29 -0
  28. package/dist/ingest/sessions.js +164 -0
  29. package/dist/ingest/tables.d.ts +52 -0
  30. package/dist/ingest/tables.js +360 -0
  31. package/dist/mining/skill-miner.d.ts +53 -0
  32. package/dist/mining/skill-miner.js +234 -0
  33. package/dist/search/bm25.d.ts +81 -0
  34. package/dist/search/bm25.js +334 -0
  35. package/dist/search/e5-embedder.d.ts +30 -0
  36. package/dist/search/e5-embedder.js +91 -0
  37. package/dist/search/embed-pass.d.ts +26 -0
  38. package/dist/search/embed-pass.js +43 -0
  39. package/dist/search/embedder.d.ts +58 -0
  40. package/dist/search/embedder.js +85 -0
  41. package/dist/search/inverted-index.d.ts +51 -0
  42. package/dist/search/inverted-index.js +139 -0
  43. package/dist/search/ppr.d.ts +44 -0
  44. package/dist/search/ppr.js +118 -0
  45. package/dist/search/tokenize.d.ts +26 -0
  46. package/dist/search/tokenize.js +98 -0
  47. package/dist/store/eviction.d.ts +16 -0
  48. package/dist/store/eviction.js +37 -0
  49. package/dist/store/repository.d.ts +222 -0
  50. package/dist/store/repository.js +420 -0
  51. package/dist/store/sqlite-store.d.ts +89 -0
  52. package/dist/store/sqlite-store.js +252 -0
  53. package/dist/store/vector-store.d.ts +66 -0
  54. package/dist/store/vector-store.js +160 -0
  55. package/dist/types.d.ts +385 -0
  56. package/dist/types.js +9 -0
  57. package/dist/utils/file-log.d.ts +87 -0
  58. package/dist/utils/file-log.js +215 -0
  59. package/dist/utils/peer-detection.d.ts +45 -0
  60. package/dist/utils/peer-detection.js +90 -0
  61. package/dist/utils/shell.d.ts +43 -0
  62. package/dist/utils/shell.js +110 -0
  63. package/dist/utils/usage-skill.d.ts +42 -0
  64. package/dist/utils/usage-skill.js +129 -0
  65. package/dist/utils/xlsx.d.ts +36 -0
  66. package/dist/utils/xlsx.js +270 -0
  67. package/grammars/tree-sitter-c.wasm +0 -0
  68. package/grammars/tree-sitter-c_sharp.wasm +0 -0
  69. package/grammars/tree-sitter-cpp.wasm +0 -0
  70. package/grammars/tree-sitter-css.wasm +0 -0
  71. package/grammars/tree-sitter-go.wasm +0 -0
  72. package/grammars/tree-sitter-html.wasm +0 -0
  73. package/grammars/tree-sitter-java.wasm +0 -0
  74. package/grammars/tree-sitter-javascript.wasm +0 -0
  75. package/grammars/tree-sitter-json.wasm +0 -0
  76. package/grammars/tree-sitter-php.wasm +0 -0
  77. package/grammars/tree-sitter-python.wasm +0 -0
  78. package/grammars/tree-sitter-rust.wasm +0 -0
  79. package/grammars/tree-sitter-typescript.wasm +0 -0
  80. package/package.json +80 -0
@@ -0,0 +1,390 @@
1
+ /**
2
+ * Git history ingestion — fully convention-agnostic.
3
+ *
4
+ * The earlier version classified commits by parsing the *subject
5
+ * line* (conventional commits, bracket tags, gitmoji, English
6
+ * keywords). That is unreliable on real repositories, many of which
7
+ * have no commit-message culture at all ("wip", "fix", ".", "update",
8
+ * non-English text, empty subjects). Message-derived "flavor" was
9
+ * noise dressed up as signal.
10
+ *
11
+ * This version derives everything from STRUCTURE — facts about what
12
+ * the commit physically did, which are true regardless of how (or
13
+ * whether) the author described it:
14
+ *
15
+ * - diff shape : files touched, lines +/-, files created/deleted,
16
+ * net direction. From `git log --numstat --summary`.
17
+ * - co-change : pairs of files modified in the same commit,
18
+ * counted across history (mechanical/huge commits
19
+ * skipped).
20
+ * - churn : how often each file changes — a stability signal.
21
+ * - recency : which files were touched in the most recent
22
+ * commits.
23
+ *
24
+ * The commit subject is still STORED, verbatim, inside the memory
25
+ * content — it is text the agent may legitimately search for — but it
26
+ * never drives tags or categorisation. It is data, not signal.
27
+ *
28
+ * Output is hard-capped by `gitHistoryDepth`. Re-running ingest is
29
+ * idempotent thanks to insertIfMissing on the repository.
30
+ */
31
+ import { isGitRepo, runGit } from "../utils/shell.js";
32
+ const CATEGORY = "git-history";
33
+ // Co-change: pairs touched together this many times are "coupled".
34
+ const COCHANGE_MIN_TIMES = 3;
35
+ // Commits touching more files than this are mechanical (mass reformat,
36
+ // vendoring, lockfile churn) — their file pairs are noise, skip them.
37
+ const COCHANGE_MAX_FILES = 8;
38
+ const PAIR_LIMIT = 200;
39
+ // Churn: a file modified in at least this fraction of scanned commits,
40
+ // AND at least this many times absolutely, is flagged as high-churn.
41
+ const CHURN_MIN_FRACTION = 0.15;
42
+ const CHURN_MIN_ABSOLUTE = 4;
43
+ const CHURN_LIMIT = 60;
44
+ // Recency: how many of the most-recent commits feed the "recently
45
+ // changed files" memory.
46
+ const RECENCY_WINDOW = 12;
47
+ const MAX_FILES_PER_COMMIT_IN_MEMORY = 8;
48
+ /**
49
+ * True when a commit is "balanced churn": substantial, and its added
50
+ * line count is within ~8 % of its deleted count. That near-equality
51
+ * is the convention-free fingerprint of content being *moved or
52
+ * reformatted* rather than written — a file rename (with `--no-renames`
53
+ * a rename shows as +N to the new path, -N from the old), a `.rst`→
54
+ * `.md` doc migration, a reformat. Such commits flood keyword recall
55
+ * (they touch keyword-named files) while carrying no logic signal, so
56
+ * they get no per-commit memory — exactly as merge commits don't.
57
+ *
58
+ * Deliberately conservative: the ≥ 25-line floor spares small commits,
59
+ * and 92 % balance is tight enough that a genuine logic change (which
60
+ * almost never lands added ≈ deleted to within 8 %) is not caught.
61
+ * Pure arithmetic on the diff stat — no message parsing, no language
62
+ * or commit-convention assumptions.
63
+ */
64
+ export function isBalancedChurnCommit(files) {
65
+ if (files.length === 0)
66
+ return false;
67
+ let added = 0;
68
+ let deleted = 0;
69
+ for (const f of files) {
70
+ if (f.added < 0 || f.deleted < 0)
71
+ return false; // binary — unknown shape
72
+ added += f.added;
73
+ deleted += f.deleted;
74
+ }
75
+ if (added < 25 || deleted < 25)
76
+ return false;
77
+ const hi = Math.max(added, deleted);
78
+ const lo = Math.min(added, deleted);
79
+ return lo / hi >= 0.92;
80
+ }
81
+ export async function ingestGitHistory(repo, root, depth, coChangeMaxCommits = Infinity, coChangeMinOccurrences = COCHANGE_MIN_TIMES) {
82
+ const result = {
83
+ scanned: 0,
84
+ commitMemories: 0,
85
+ coChangeMemories: 0,
86
+ churnMemories: 0,
87
+ recencyMemories: 0,
88
+ balancedChurnSkipped: 0,
89
+ shapeTagCounts: {},
90
+ };
91
+ if (!(await isGitRepo(root)))
92
+ return result;
93
+ // `--numstat` gives per-file added/deleted line counts; `--summary`
94
+ // adds "create mode" / "delete mode" lines so we can tell new and
95
+ // removed files apart. Both are structural, language-neutral.
96
+ const SEP = "\u241F";
97
+ const FMT = `${SEP}%H${SEP}%P${SEP}%at${SEP}%s${SEP}`;
98
+ const stdout = await runGit([
99
+ "log",
100
+ `-${depth}`,
101
+ "--no-color",
102
+ "--numstat",
103
+ "--summary",
104
+ "--no-renames",
105
+ `--pretty=format:${FMT}`,
106
+ ], root);
107
+ if (!stdout)
108
+ return result;
109
+ const commits = parseGitLog(stdout, SEP);
110
+ result.scanned = commits.length;
111
+ // ── Per-commit memories — every non-merge commit gets one, EXCEPT
112
+ // balanced-churn commits (renames / reformats / doc migrations):
113
+ // they flood keyword recall with no logic signal. They still feed
114
+ // the co-change and churn passes below — only the noisy per-commit
115
+ // memory is dropped, the same way merge commits are.
116
+ for (const c of commits) {
117
+ if (c.isMerge)
118
+ continue; // merge commits rarely carry their own diff
119
+ if (isBalancedChurnCommit(c.files)) {
120
+ result.balancedChurnSkipped += 1;
121
+ continue;
122
+ }
123
+ const shape = deriveShapeTags(c.files);
124
+ for (const t of shape) {
125
+ result.shapeTagCounts[t] = (result.shapeTagCounts[t] ?? 0) + 1;
126
+ }
127
+ ingestCommitMemory(repo, c, shape);
128
+ result.commitMemories += 1;
129
+ }
130
+ // ── File co-modification ──────────────────────────────────────────
131
+ // The pair-counting below is O(commits × files-per-commit²). On a
132
+ // very large history that is the one genuinely super-linear pass in
133
+ // the plugin, so adaptive config can cap it: above the cutoff,
134
+ // co-change is skipped entirely (commit/churn/recency still run).
135
+ if (commits.length <= coChangeMaxCommits) {
136
+ const pairCounts = new Map();
137
+ for (const c of commits) {
138
+ if (c.isMerge || c.files.length > COCHANGE_MAX_FILES)
139
+ continue;
140
+ const paths = c.files.map((f) => f.path).sort();
141
+ for (let i = 0; i < paths.length; i++) {
142
+ for (let j = i + 1; j < paths.length; j++) {
143
+ const key = `${paths[i]}\u0000${paths[j]}`;
144
+ pairCounts.set(key, (pairCounts.get(key) ?? 0) + 1);
145
+ }
146
+ }
147
+ }
148
+ const pairs = [];
149
+ for (const [k, n] of pairCounts) {
150
+ if (n < coChangeMinOccurrences)
151
+ continue;
152
+ const sep = k.indexOf("\u0000");
153
+ pairs.push({ a: k.slice(0, sep), b: k.slice(sep + 1), n });
154
+ }
155
+ pairs.sort((x, y) => y.n - x.n);
156
+ for (const p of pairs.slice(0, PAIR_LIMIT)) {
157
+ repo.insertIfMissing({
158
+ category: CATEGORY,
159
+ subject: `co-change:${p.a}`,
160
+ content: `${p.a} and ${p.b} were modified together in ${p.n} of the last ` +
161
+ `${commits.length} commits — they are likely coupled.`,
162
+ tags: ["co-change", p.a, p.b],
163
+ source: "git:co-occurrence",
164
+ });
165
+ result.coChangeMemories += 1;
166
+ }
167
+ }
168
+ // ── Churn — how often each file changes (stability signal) ────────
169
+ const churn = new Map();
170
+ for (const c of commits) {
171
+ if (c.isMerge)
172
+ continue;
173
+ for (const f of c.files) {
174
+ churn.set(f.path, (churn.get(f.path) ?? 0) + 1);
175
+ }
176
+ }
177
+ const nonMerge = commits.filter((c) => !c.isMerge).length || 1;
178
+ const churnRanked = Array.from(churn.entries())
179
+ .filter(([, n]) => n >= CHURN_MIN_ABSOLUTE && n / nonMerge >= CHURN_MIN_FRACTION)
180
+ .sort((a, b) => b[1] - a[1])
181
+ .slice(0, CHURN_LIMIT);
182
+ for (const [path, n] of churnRanked) {
183
+ const pct = Math.round((n / nonMerge) * 100);
184
+ repo.insertIfMissing({
185
+ category: CATEGORY,
186
+ subject: `churn:${path}`,
187
+ content: `${path} is high-churn: changed in ${n} of the last ${nonMerge} ` +
188
+ `non-merge commits (${pct}%). Treat it as a hot, frequently-edited file.`,
189
+ tags: ["churn", "hot-file", path],
190
+ source: "git:churn",
191
+ });
192
+ result.churnMemories += 1;
193
+ }
194
+ // ── Recency — what was touched most recently ──────────────────────
195
+ const recentNonMerge = commits.filter((c) => !c.isMerge).slice(0, RECENCY_WINDOW);
196
+ if (recentNonMerge.length > 0) {
197
+ const recentFiles = [];
198
+ const seen = new Set();
199
+ for (const c of recentNonMerge) {
200
+ for (const f of c.files) {
201
+ if (!seen.has(f.path)) {
202
+ seen.add(f.path);
203
+ recentFiles.push(f.path);
204
+ }
205
+ }
206
+ }
207
+ const newest = recentNonMerge[0];
208
+ repo.insertIfMissing({
209
+ category: CATEGORY,
210
+ subject: "recency:recently-changed",
211
+ content: `Files changed in the last ${recentNonMerge.length} non-merge commits ` +
212
+ `(most recent first): ${recentFiles.slice(0, 25).join(", ")}` +
213
+ (recentFiles.length > 25 ? `, … (+${recentFiles.length - 25})` : "") +
214
+ `. Most recent commit: ${newest.hash.slice(0, 8)}.`,
215
+ tags: ["recency", "recently-changed"],
216
+ source: "git:recency",
217
+ });
218
+ result.recencyMemories += 1;
219
+ }
220
+ repo.setIngestedAt(CATEGORY, Date.now());
221
+ return result;
222
+ }
223
+ /* ─── structural shape derivation (no message parsing) ──────────────── */
224
+ /**
225
+ * Derive tags purely from what the commit physically did. Every tag
226
+ * here is a fact about the diff, not an interpretation of intent.
227
+ */
228
+ export function deriveShapeTags(files) {
229
+ const tags = [];
230
+ const n = files.length;
231
+ if (n === 0)
232
+ return ["empty"];
233
+ const created = files.filter((f) => f.status === "created").length;
234
+ const deleted = files.filter((f) => f.status === "deleted").length;
235
+ let totalAdded = 0;
236
+ let totalDeleted = 0;
237
+ let hasBinary = false;
238
+ for (const f of files) {
239
+ if (f.added < 0 || f.deleted < 0) {
240
+ hasBinary = true;
241
+ }
242
+ else {
243
+ totalAdded += f.added;
244
+ totalDeleted += f.deleted;
245
+ }
246
+ }
247
+ // size of the change (file count)
248
+ if (n === 1)
249
+ tags.push("single-file");
250
+ else if (n >= 10)
251
+ tags.push("many-files");
252
+ // size of the change (line volume)
253
+ const churn = totalAdded + totalDeleted;
254
+ if (churn >= 500)
255
+ tags.push("large-diff");
256
+ else if (churn > 0 && churn <= 10)
257
+ tags.push("tiny-diff");
258
+ // direction — file-level
259
+ if (created > 0 && created >= n / 2)
260
+ tags.push("adds-files");
261
+ if (deleted > 0 && deleted >= n / 2)
262
+ tags.push("removes-files");
263
+ // direction — line-level
264
+ if (totalDeleted > totalAdded * 2 && totalDeleted >= 30) {
265
+ tags.push("net-removal");
266
+ }
267
+ else if (totalAdded > totalDeleted * 2 && totalAdded >= 30) {
268
+ tags.push("net-addition");
269
+ }
270
+ if (hasBinary)
271
+ tags.push("touches-binary");
272
+ return tags;
273
+ }
274
+ function ingestCommitMemory(repo, c, shapeTags) {
275
+ const paths = c.files.map((f) => f.path);
276
+ const subject = paths[0] ?? `tree:${c.hash.slice(0, 8)}`;
277
+ const fileList = paths.slice(0, MAX_FILES_PER_COMMIT_IN_MEMORY).join(", ") +
278
+ (paths.length > MAX_FILES_PER_COMMIT_IN_MEMORY
279
+ ? `, … (+${paths.length - MAX_FILES_PER_COMMIT_IN_MEMORY})`
280
+ : "");
281
+ let totalAdded = 0;
282
+ let totalDeleted = 0;
283
+ for (const f of c.files) {
284
+ if (f.added > 0)
285
+ totalAdded += f.added;
286
+ if (f.deleted > 0)
287
+ totalDeleted += f.deleted;
288
+ }
289
+ const dateStr = c.unixTime
290
+ ? new Date(c.unixTime * 1000).toISOString().slice(0, 10)
291
+ : "unknown-date";
292
+ // The subject is included VERBATIM, quoted, as plain searchable
293
+ // text. We do not parse it. Structural facts carry the meaning.
294
+ const subjectText = c.subject.trim()
295
+ ? `Message: "${truncate(c.subject.trim(), 140)}". `
296
+ : "Message: (empty). ";
297
+ repo.insertIfMissing({
298
+ category: CATEGORY,
299
+ subject,
300
+ content: `Commit ${c.hash.slice(0, 8)} (${dateStr}). ${subjectText}` +
301
+ `Changed ${c.files.length} file(s), +${totalAdded}/-${totalDeleted} lines. ` +
302
+ `Files: ${fileList || "(none)"}.`,
303
+ // Tags are structural shape + the touched file paths. No flavor
304
+ // derived from the message.
305
+ tags: [...shapeTags, ...paths.slice(0, 4)],
306
+ source: `git:${c.hash}`,
307
+ });
308
+ }
309
+ /* ─── parsing ───────────────────────────────────────────────────────── */
310
+ function truncate(s, n) {
311
+ return s.length <= n ? s : s.slice(0, n - 1) + "…";
312
+ }
313
+ /**
314
+ * Parse `git log --numstat --summary` output framed by our SEP-based
315
+ * pretty format. Each commit chunk is:
316
+ *
317
+ * SEP HASH SEP PARENTS SEP UNIXTIME SEP SUBJECT SEP
318
+ * <added>\t<deleted>\t<path> (numstat lines)
319
+ * ...
320
+ * create mode 100644 <path> (summary lines)
321
+ * delete mode 100644 <path>
322
+ * ...
323
+ */
324
+ function parseGitLog(stdout, sep) {
325
+ const commits = [];
326
+ const chunks = stdout.split(sep);
327
+ // chunks[0] is the text before the first SEP (empty). Then each
328
+ // commit is 5 fields: hash, parents, unixtime, subject, body-block.
329
+ let i = 1;
330
+ while (i < chunks.length) {
331
+ const hash = (chunks[i] ?? "").trim();
332
+ const parents = (chunks[i + 1] ?? "").trim();
333
+ const unixTime = parseInt((chunks[i + 2] ?? "").trim(), 10) || 0;
334
+ const subject = (chunks[i + 3] ?? "").replace(/^\s+/, "").replace(/\s+$/, "");
335
+ const body = chunks[i + 4] ?? "";
336
+ if (!hash)
337
+ break;
338
+ const created = new Set();
339
+ const removed = new Set();
340
+ const numstat = [];
341
+ for (const rawLine of body.split("\n")) {
342
+ const line = rawLine.replace(/\r$/, "");
343
+ if (!line.trim())
344
+ continue;
345
+ // summary lines: " create mode 100644 path", " delete mode 100644 path"
346
+ const create = line.match(/^\s+create mode \d+ (.+)$/);
347
+ if (create) {
348
+ created.add(create[1].trim());
349
+ continue;
350
+ }
351
+ const del = line.match(/^\s+delete mode \d+ (.+)$/);
352
+ if (del) {
353
+ removed.add(del[1].trim());
354
+ continue;
355
+ }
356
+ // other summary lines (" mode change ...", " rename ...") — ignore
357
+ if (/^\s+(mode change|rename) /.test(line))
358
+ continue;
359
+ // numstat line: "<added>\t<deleted>\t<path>" — binary shows "-\t-\t"
360
+ const ns = line.match(/^(-|\d+)\t(-|\d+)\t(.+)$/);
361
+ if (ns) {
362
+ const added = ns[1] === "-" ? -1 : parseInt(ns[1], 10);
363
+ const deleted = ns[2] === "-" ? -1 : parseInt(ns[2], 10);
364
+ numstat.push({ path: ns[3].trim(), added, deleted });
365
+ }
366
+ // anything else (shouldn't happen) — skip
367
+ }
368
+ const files = numstat.map((n) => ({
369
+ path: n.path,
370
+ added: n.added,
371
+ deleted: n.deleted,
372
+ status: created.has(n.path)
373
+ ? "created"
374
+ : removed.has(n.path)
375
+ ? "deleted"
376
+ : "modified",
377
+ }));
378
+ // A pure-deletion commit may have numstat "0 0 path" or be present
379
+ // only in the summary block — fold any summary-only deletes in.
380
+ for (const path of removed) {
381
+ if (!files.some((f) => f.path === path)) {
382
+ files.push({ path, added: 0, deleted: 0, status: "deleted" });
383
+ }
384
+ }
385
+ const isMerge = parents.split(/\s+/).filter(Boolean).length > 1;
386
+ commits.push({ hash, subject, unixTime, files, isMerge });
387
+ i += 5;
388
+ }
389
+ return commits;
390
+ }
@@ -0,0 +1,101 @@
1
+ /**
2
+ * Live-session activity recorder.
3
+ *
4
+ * The pre-existing `session-trace` category records what *past* sessions
5
+ * physically did (files edited, commands run), pulled from the OpenCode
6
+ * SDK by `ingestSessions`. By design, that ingester explicitly skips
7
+ * the *current* session — past sessions are stable, the current one is
8
+ * still being lived.
9
+ *
10
+ * This module fills the gap: it records the **current** session's
11
+ * activity in-place, in the same `session-trace` category, so:
12
+ *
13
+ * - Recall within the current session can surface "what have I
14
+ * touched so far" without scanning the OpenCode SDK at all.
15
+ * - When this session later becomes a *past* session, its data is
16
+ * already in the store and ready for resume by parallel/successor
17
+ * sessions.
18
+ *
19
+ * Design choices:
20
+ *
21
+ * 1. ONE memory per session, keyed by `live:${sessionId}`, updated
22
+ * in place via `upsertBySubject`. The content is a compact
23
+ * rolling summary (files edited + bash commands run + counts),
24
+ * not a full transcript — this is a recall surface, not an audit
25
+ * log. The JSONL file logger is the audit log.
26
+ *
27
+ * 2. We do not record every tool call (read, grep, glob would flood
28
+ * the store with noise). We record:
29
+ * - File-modifying tool calls (write, edit, patch)
30
+ * - Bash commands (rich signal: build/test/install/checkout/…)
31
+ * - We deliberately skip pure-discovery calls.
32
+ *
33
+ * 3. Write debouncing — the recorder accumulates events in memory
34
+ * and only persists when (a) `flushNow()` is called, or (b) an
35
+ * idle timer expires. This keeps the write-behind buffer from
36
+ * flushing on every keystroke-of-an-edit.
37
+ *
38
+ * 4. Best-effort — every recording path is wrapped at the call
39
+ * site. A failure inside this module must never block a tool call
40
+ * or surface to the agent.
41
+ *
42
+ * 5. Bounded — content is capped at MAX_CONTENT_BYTES, with the
43
+ * oldest events dropped first when the cap is reached. The
44
+ * summary line and total counts stay accurate; only the
45
+ * timeline-detail is truncated.
46
+ */
47
+ import type { MemoryRepository } from "../store/repository.js";
48
+ export interface LiveSessionEvent {
49
+ /** "write" | "edit" | "patch" | "bash" | "code-map-refresh" — for tagging. */
50
+ kind: string;
51
+ /** A short, single-line description (file path, truncated command, …). */
52
+ detail: string;
53
+ /** Epoch ms. */
54
+ at: number;
55
+ }
56
+ /**
57
+ * Per-plugin-instance state. One LiveSessionRecorder is created at
58
+ * plugin load and lives for the lifetime of the OpenCode session.
59
+ */
60
+ export declare class LiveSessionRecorder {
61
+ private readonly repo;
62
+ private readonly sessionId;
63
+ private readonly editedFiles;
64
+ private readonly bashLines;
65
+ private editCount;
66
+ private bashCount;
67
+ private readonly startedAt;
68
+ constructor(repo: MemoryRepository, sessionId: string);
69
+ /**
70
+ * Record a file modification (write / edit / patch). Idempotent on
71
+ * file path — recording the same file twice keeps the path in the
72
+ * edited-files set exactly once but increments the edit counter.
73
+ */
74
+ recordFileEdit(filePath: string, _tool: string): void;
75
+ /**
76
+ * Record a bash command. The command text is truncated to MAX_BASH_LINE
77
+ * characters and the buffer is capped at MAX_BASH_DETAIL entries.
78
+ */
79
+ recordBash(command: string): void;
80
+ /**
81
+ * Render the current state as memory content. Format is stable so
82
+ * BM25 tokenisation behaves predictably.
83
+ */
84
+ private renderContent;
85
+ /**
86
+ * Persist the rolling state as a single memory (upsert by subject).
87
+ * Idempotent: calling it twice with no new events between writes the
88
+ * same memory twice — no duplicates, the existing one is replaced.
89
+ *
90
+ * Tags include all touched files (for recall by file path) plus a
91
+ * `live:${sessionId}` marker so a query can target the current
92
+ * session's trace explicitly.
93
+ */
94
+ flush(): void;
95
+ /** Test-only inspection of internal counters. */
96
+ stats(): {
97
+ editCount: number;
98
+ bashCount: number;
99
+ uniqueFiles: number;
100
+ };
101
+ }
@@ -0,0 +1,173 @@
1
+ /**
2
+ * Live-session activity recorder.
3
+ *
4
+ * The pre-existing `session-trace` category records what *past* sessions
5
+ * physically did (files edited, commands run), pulled from the OpenCode
6
+ * SDK by `ingestSessions`. By design, that ingester explicitly skips
7
+ * the *current* session — past sessions are stable, the current one is
8
+ * still being lived.
9
+ *
10
+ * This module fills the gap: it records the **current** session's
11
+ * activity in-place, in the same `session-trace` category, so:
12
+ *
13
+ * - Recall within the current session can surface "what have I
14
+ * touched so far" without scanning the OpenCode SDK at all.
15
+ * - When this session later becomes a *past* session, its data is
16
+ * already in the store and ready for resume by parallel/successor
17
+ * sessions.
18
+ *
19
+ * Design choices:
20
+ *
21
+ * 1. ONE memory per session, keyed by `live:${sessionId}`, updated
22
+ * in place via `upsertBySubject`. The content is a compact
23
+ * rolling summary (files edited + bash commands run + counts),
24
+ * not a full transcript — this is a recall surface, not an audit
25
+ * log. The JSONL file logger is the audit log.
26
+ *
27
+ * 2. We do not record every tool call (read, grep, glob would flood
28
+ * the store with noise). We record:
29
+ * - File-modifying tool calls (write, edit, patch)
30
+ * - Bash commands (rich signal: build/test/install/checkout/…)
31
+ * - We deliberately skip pure-discovery calls.
32
+ *
33
+ * 3. Write debouncing — the recorder accumulates events in memory
34
+ * and only persists when (a) `flushNow()` is called, or (b) an
35
+ * idle timer expires. This keeps the write-behind buffer from
36
+ * flushing on every keystroke-of-an-edit.
37
+ *
38
+ * 4. Best-effort — every recording path is wrapped at the call
39
+ * site. A failure inside this module must never block a tool call
40
+ * or surface to the agent.
41
+ *
42
+ * 5. Bounded — content is capped at MAX_CONTENT_BYTES, with the
43
+ * oldest events dropped first when the cap is reached. The
44
+ * summary line and total counts stay accurate; only the
45
+ * timeline-detail is truncated.
46
+ */
47
+ const CATEGORY = "session-trace";
48
+ /** Hard cap on the rolling memory content size. */
49
+ const MAX_CONTENT_BYTES = 4096;
50
+ /** Hard cap on the list of bash commands kept in detail (oldest dropped). */
51
+ const MAX_BASH_DETAIL = 30;
52
+ /** Hard cap on the list of edited files kept (oldest dropped). */
53
+ const MAX_EDITED_FILES = 60;
54
+ /** Truncation length for any individual bash command stored. */
55
+ const MAX_BASH_LINE = 160;
56
+ /**
57
+ * Per-plugin-instance state. One LiveSessionRecorder is created at
58
+ * plugin load and lives for the lifetime of the OpenCode session.
59
+ */
60
+ export class LiveSessionRecorder {
61
+ repo;
62
+ sessionId;
63
+ editedFiles = new Set();
64
+ bashLines = [];
65
+ editCount = 0;
66
+ bashCount = 0;
67
+ startedAt = Date.now();
68
+ constructor(repo, sessionId) {
69
+ this.repo = repo;
70
+ this.sessionId = sessionId;
71
+ }
72
+ /**
73
+ * Record a file modification (write / edit / patch). Idempotent on
74
+ * file path — recording the same file twice keeps the path in the
75
+ * edited-files set exactly once but increments the edit counter.
76
+ */
77
+ recordFileEdit(filePath, _tool) {
78
+ this.editCount += 1;
79
+ this.editedFiles.add(filePath);
80
+ if (this.editedFiles.size > MAX_EDITED_FILES) {
81
+ // drop the oldest tracked file — Sets preserve insertion order in JS.
82
+ const first = this.editedFiles.values().next().value;
83
+ if (first !== undefined)
84
+ this.editedFiles.delete(first);
85
+ }
86
+ }
87
+ /**
88
+ * Record a bash command. The command text is truncated to MAX_BASH_LINE
89
+ * characters and the buffer is capped at MAX_BASH_DETAIL entries.
90
+ */
91
+ recordBash(command) {
92
+ this.bashCount += 1;
93
+ const truncated = command.length > MAX_BASH_LINE
94
+ ? command.slice(0, MAX_BASH_LINE) + "…"
95
+ : command;
96
+ this.bashLines.push(truncated);
97
+ if (this.bashLines.length > MAX_BASH_DETAIL)
98
+ this.bashLines.shift();
99
+ }
100
+ /**
101
+ * Render the current state as memory content. Format is stable so
102
+ * BM25 tokenisation behaves predictably.
103
+ */
104
+ renderContent() {
105
+ const ageMin = Math.round((Date.now() - this.startedAt) / 60000);
106
+ const lines = [];
107
+ lines.push(`Live session ${this.sessionId} (started ${ageMin}m ago): ` +
108
+ `${this.editCount} file edit${this.editCount === 1 ? "" : "s"}, ` +
109
+ `${this.bashCount} bash command${this.bashCount === 1 ? "" : "s"}.`);
110
+ if (this.editedFiles.size > 0) {
111
+ lines.push("Files edited: " + [...this.editedFiles].join(", "));
112
+ }
113
+ if (this.bashLines.length > 0) {
114
+ lines.push("Recent bash commands:");
115
+ for (const cmd of this.bashLines)
116
+ lines.push(" $ " + cmd);
117
+ }
118
+ let content = lines.join("\n");
119
+ // Hard cap on size — drop oldest bash lines until under cap.
120
+ while (content.length > MAX_CONTENT_BYTES && this.bashLines.length > 0) {
121
+ this.bashLines.shift();
122
+ const idx = lines.findIndex((l) => l.startsWith(" $ "));
123
+ if (idx >= 0)
124
+ lines.splice(idx, 1);
125
+ else
126
+ break;
127
+ content = lines.join("\n");
128
+ }
129
+ // Final hard truncate as a safety net.
130
+ if (content.length > MAX_CONTENT_BYTES) {
131
+ content = content.slice(0, MAX_CONTENT_BYTES - 1) + "…";
132
+ }
133
+ return content;
134
+ }
135
+ /**
136
+ * Persist the rolling state as a single memory (upsert by subject).
137
+ * Idempotent: calling it twice with no new events between writes the
138
+ * same memory twice — no duplicates, the existing one is replaced.
139
+ *
140
+ * Tags include all touched files (for recall by file path) plus a
141
+ * `live:${sessionId}` marker so a query can target the current
142
+ * session's trace explicitly.
143
+ */
144
+ flush() {
145
+ // Nothing to record? Skip the write — an empty live trace memory
146
+ // would just dilute recall results without adding signal.
147
+ if (this.editCount === 0 && this.bashCount === 0)
148
+ return;
149
+ const tags = ["live-session", `session:${this.sessionId}`];
150
+ for (const f of this.editedFiles)
151
+ tags.push(`file:${f}`);
152
+ this.repo.upsertBySubject({
153
+ category: CATEGORY,
154
+ subject: `live:${this.sessionId}`,
155
+ content: this.renderContent(),
156
+ tags,
157
+ source: `session:${this.sessionId}`,
158
+ // NOT pinned — a live trace is transient state. Once this session
159
+ // becomes a past session, ingestSessions may add a more compact
160
+ // trace memory; the LFU eviction can drop the live one as it
161
+ // ages, which is the desired behaviour.
162
+ pinned: false,
163
+ });
164
+ }
165
+ /** Test-only inspection of internal counters. */
166
+ stats() {
167
+ return {
168
+ editCount: this.editCount,
169
+ bashCount: this.bashCount,
170
+ uniqueFiles: this.editedFiles.size,
171
+ };
172
+ }
173
+ }
@@ -0,0 +1,28 @@
1
+ /**
2
+ * project-notes.ts — ingest the small set of root-level files where
3
+ * humans put house rules for AI agents.
4
+ *
5
+ * These are the files an agent should know about WITHIN THE FIRST
6
+ * RECALL of a session: AGENTS.md, CLAUDE.md, GEMINI.md, .cursorrules,
7
+ * .windsurfrules, COPILOT.md. They typically contain "in this repo,
8
+ * always do X, never do Y, our naming convention is Z" — exactly the
9
+ * kind of facts that, missed, lead to revert PRs.
10
+ *
11
+ * **Whole-file content, not headings.** Unlike `docs.ts` (which slices
12
+ * into sections), these files are short (typically under 4 KB) and
13
+ * their structure is rarely worth indexing — every line might be
14
+ * load-bearing. One memory per file with the full content (truncated
15
+ * to MAX_NOTE_BYTES) is the right granularity.
16
+ *
17
+ * **Root-level only.** No recursion. A `monorepo-package/.cursorrules`
18
+ * is a per-package instruction that belongs to the package's owner,
19
+ * not Diane.
20
+ */
21
+ import type { MemoryRepository } from "../store/repository.js";
22
+ export interface ProjectNotesIngestOptions {
23
+ maxBytes?: number;
24
+ }
25
+ export interface ProjectNotesIngestResult {
26
+ filesFound: number;
27
+ }
28
+ export declare function ingestProjectNotes(repo: MemoryRepository, root: string, opts?: ProjectNotesIngestOptions): Promise<ProjectNotesIngestResult>;