opencode-diane 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +180 -0
- package/LICENSE +21 -0
- package/README.md +206 -0
- package/WIKI.md +1430 -0
- package/dist/index.d.ts +28 -0
- package/dist/index.js +1632 -0
- package/dist/ingest/adaptive.d.ts +47 -0
- package/dist/ingest/adaptive.js +182 -0
- package/dist/ingest/code-health.d.ts +58 -0
- package/dist/ingest/code-health.js +202 -0
- package/dist/ingest/code-map.d.ts +71 -0
- package/dist/ingest/code-map.js +670 -0
- package/dist/ingest/cross-refs.d.ts +59 -0
- package/dist/ingest/cross-refs.js +1207 -0
- package/dist/ingest/docs.d.ts +49 -0
- package/dist/ingest/docs.js +325 -0
- package/dist/ingest/git.d.ts +77 -0
- package/dist/ingest/git.js +390 -0
- package/dist/ingest/live-session.d.ts +101 -0
- package/dist/ingest/live-session.js +173 -0
- package/dist/ingest/project-notes.d.ts +28 -0
- package/dist/ingest/project-notes.js +102 -0
- package/dist/ingest/project.d.ts +35 -0
- package/dist/ingest/project.js +430 -0
- package/dist/ingest/session-snapshot.d.ts +63 -0
- package/dist/ingest/session-snapshot.js +94 -0
- package/dist/ingest/sessions.d.ts +29 -0
- package/dist/ingest/sessions.js +164 -0
- package/dist/ingest/tables.d.ts +52 -0
- package/dist/ingest/tables.js +360 -0
- package/dist/mining/skill-miner.d.ts +53 -0
- package/dist/mining/skill-miner.js +234 -0
- package/dist/search/bm25.d.ts +81 -0
- package/dist/search/bm25.js +334 -0
- package/dist/search/e5-embedder.d.ts +30 -0
- package/dist/search/e5-embedder.js +91 -0
- package/dist/search/embed-pass.d.ts +26 -0
- package/dist/search/embed-pass.js +43 -0
- package/dist/search/embedder.d.ts +58 -0
- package/dist/search/embedder.js +85 -0
- package/dist/search/inverted-index.d.ts +51 -0
- package/dist/search/inverted-index.js +139 -0
- package/dist/search/ppr.d.ts +44 -0
- package/dist/search/ppr.js +118 -0
- package/dist/search/tokenize.d.ts +26 -0
- package/dist/search/tokenize.js +98 -0
- package/dist/store/eviction.d.ts +16 -0
- package/dist/store/eviction.js +37 -0
- package/dist/store/repository.d.ts +222 -0
- package/dist/store/repository.js +420 -0
- package/dist/store/sqlite-store.d.ts +89 -0
- package/dist/store/sqlite-store.js +252 -0
- package/dist/store/vector-store.d.ts +66 -0
- package/dist/store/vector-store.js +160 -0
- package/dist/types.d.ts +385 -0
- package/dist/types.js +9 -0
- package/dist/utils/file-log.d.ts +87 -0
- package/dist/utils/file-log.js +215 -0
- package/dist/utils/peer-detection.d.ts +45 -0
- package/dist/utils/peer-detection.js +90 -0
- package/dist/utils/shell.d.ts +43 -0
- package/dist/utils/shell.js +110 -0
- package/dist/utils/usage-skill.d.ts +42 -0
- package/dist/utils/usage-skill.js +129 -0
- package/dist/utils/xlsx.d.ts +36 -0
- package/dist/utils/xlsx.js +270 -0
- package/grammars/tree-sitter-c.wasm +0 -0
- package/grammars/tree-sitter-c_sharp.wasm +0 -0
- package/grammars/tree-sitter-cpp.wasm +0 -0
- package/grammars/tree-sitter-css.wasm +0 -0
- package/grammars/tree-sitter-go.wasm +0 -0
- package/grammars/tree-sitter-html.wasm +0 -0
- package/grammars/tree-sitter-java.wasm +0 -0
- package/grammars/tree-sitter-javascript.wasm +0 -0
- package/grammars/tree-sitter-json.wasm +0 -0
- package/grammars/tree-sitter-php.wasm +0 -0
- package/grammars/tree-sitter-python.wasm +0 -0
- package/grammars/tree-sitter-rust.wasm +0 -0
- package/grammars/tree-sitter-typescript.wasm +0 -0
- package/package.json +80 -0
|
@@ -0,0 +1,390 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Git history ingestion — fully convention-agnostic.
|
|
3
|
+
*
|
|
4
|
+
* The earlier version classified commits by parsing the *subject
|
|
5
|
+
* line* (conventional commits, bracket tags, gitmoji, English
|
|
6
|
+
* keywords). That is unreliable on real repositories, many of which
|
|
7
|
+
* have no commit-message culture at all ("wip", "fix", ".", "update",
|
|
8
|
+
* non-English text, empty subjects). Message-derived "flavor" was
|
|
9
|
+
* noise dressed up as signal.
|
|
10
|
+
*
|
|
11
|
+
* This version derives everything from STRUCTURE — facts about what
|
|
12
|
+
* the commit physically did, which are true regardless of how (or
|
|
13
|
+
* whether) the author described it:
|
|
14
|
+
*
|
|
15
|
+
* - diff shape : files touched, lines +/-, files created/deleted,
|
|
16
|
+
* net direction. From `git log --numstat --summary`.
|
|
17
|
+
* - co-change : pairs of files modified in the same commit,
|
|
18
|
+
* counted across history (mechanical/huge commits
|
|
19
|
+
* skipped).
|
|
20
|
+
* - churn : how often each file changes — a stability signal.
|
|
21
|
+
* - recency : which files were touched in the most recent
|
|
22
|
+
* commits.
|
|
23
|
+
*
|
|
24
|
+
* The commit subject is still STORED, verbatim, inside the memory
|
|
25
|
+
* content — it is text the agent may legitimately search for — but it
|
|
26
|
+
* never drives tags or categorisation. It is data, not signal.
|
|
27
|
+
*
|
|
28
|
+
* Output is hard-capped by `gitHistoryDepth`. Re-running ingest is
|
|
29
|
+
* idempotent thanks to insertIfMissing on the repository.
|
|
30
|
+
*/
|
|
31
|
+
import { isGitRepo, runGit } from "../utils/shell.js";
|
|
32
|
+
const CATEGORY = "git-history";
|
|
33
|
+
// Co-change: pairs touched together this many times are "coupled".
|
|
34
|
+
const COCHANGE_MIN_TIMES = 3;
|
|
35
|
+
// Commits touching more files than this are mechanical (mass reformat,
|
|
36
|
+
// vendoring, lockfile churn) — their file pairs are noise, skip them.
|
|
37
|
+
const COCHANGE_MAX_FILES = 8;
|
|
38
|
+
const PAIR_LIMIT = 200;
|
|
39
|
+
// Churn: a file modified in at least this fraction of scanned commits,
|
|
40
|
+
// AND at least this many times absolutely, is flagged as high-churn.
|
|
41
|
+
const CHURN_MIN_FRACTION = 0.15;
|
|
42
|
+
const CHURN_MIN_ABSOLUTE = 4;
|
|
43
|
+
const CHURN_LIMIT = 60;
|
|
44
|
+
// Recency: how many of the most-recent commits feed the "recently
|
|
45
|
+
// changed files" memory.
|
|
46
|
+
const RECENCY_WINDOW = 12;
|
|
47
|
+
const MAX_FILES_PER_COMMIT_IN_MEMORY = 8;
|
|
48
|
+
/**
|
|
49
|
+
* True when a commit is "balanced churn": substantial, and its added
|
|
50
|
+
* line count is within ~8 % of its deleted count. That near-equality
|
|
51
|
+
* is the convention-free fingerprint of content being *moved or
|
|
52
|
+
* reformatted* rather than written — a file rename (with `--no-renames`
|
|
53
|
+
* a rename shows as +N to the new path, -N from the old), a `.rst`→
|
|
54
|
+
* `.md` doc migration, a reformat. Such commits flood keyword recall
|
|
55
|
+
* (they touch keyword-named files) while carrying no logic signal, so
|
|
56
|
+
* they get no per-commit memory — exactly as merge commits don't.
|
|
57
|
+
*
|
|
58
|
+
* Deliberately conservative: the ≥ 25-line floor spares small commits,
|
|
59
|
+
* and 92 % balance is tight enough that a genuine logic change (which
|
|
60
|
+
* almost never lands added ≈ deleted to within 8 %) is not caught.
|
|
61
|
+
* Pure arithmetic on the diff stat — no message parsing, no language
|
|
62
|
+
* or commit-convention assumptions.
|
|
63
|
+
*/
|
|
64
|
+
export function isBalancedChurnCommit(files) {
|
|
65
|
+
if (files.length === 0)
|
|
66
|
+
return false;
|
|
67
|
+
let added = 0;
|
|
68
|
+
let deleted = 0;
|
|
69
|
+
for (const f of files) {
|
|
70
|
+
if (f.added < 0 || f.deleted < 0)
|
|
71
|
+
return false; // binary — unknown shape
|
|
72
|
+
added += f.added;
|
|
73
|
+
deleted += f.deleted;
|
|
74
|
+
}
|
|
75
|
+
if (added < 25 || deleted < 25)
|
|
76
|
+
return false;
|
|
77
|
+
const hi = Math.max(added, deleted);
|
|
78
|
+
const lo = Math.min(added, deleted);
|
|
79
|
+
return lo / hi >= 0.92;
|
|
80
|
+
}
|
|
81
|
+
export async function ingestGitHistory(repo, root, depth, coChangeMaxCommits = Infinity, coChangeMinOccurrences = COCHANGE_MIN_TIMES) {
|
|
82
|
+
const result = {
|
|
83
|
+
scanned: 0,
|
|
84
|
+
commitMemories: 0,
|
|
85
|
+
coChangeMemories: 0,
|
|
86
|
+
churnMemories: 0,
|
|
87
|
+
recencyMemories: 0,
|
|
88
|
+
balancedChurnSkipped: 0,
|
|
89
|
+
shapeTagCounts: {},
|
|
90
|
+
};
|
|
91
|
+
if (!(await isGitRepo(root)))
|
|
92
|
+
return result;
|
|
93
|
+
// `--numstat` gives per-file added/deleted line counts; `--summary`
|
|
94
|
+
// adds "create mode" / "delete mode" lines so we can tell new and
|
|
95
|
+
// removed files apart. Both are structural, language-neutral.
|
|
96
|
+
const SEP = "\u241F";
|
|
97
|
+
const FMT = `${SEP}%H${SEP}%P${SEP}%at${SEP}%s${SEP}`;
|
|
98
|
+
const stdout = await runGit([
|
|
99
|
+
"log",
|
|
100
|
+
`-${depth}`,
|
|
101
|
+
"--no-color",
|
|
102
|
+
"--numstat",
|
|
103
|
+
"--summary",
|
|
104
|
+
"--no-renames",
|
|
105
|
+
`--pretty=format:${FMT}`,
|
|
106
|
+
], root);
|
|
107
|
+
if (!stdout)
|
|
108
|
+
return result;
|
|
109
|
+
const commits = parseGitLog(stdout, SEP);
|
|
110
|
+
result.scanned = commits.length;
|
|
111
|
+
// ── Per-commit memories — every non-merge commit gets one, EXCEPT
|
|
112
|
+
// balanced-churn commits (renames / reformats / doc migrations):
|
|
113
|
+
// they flood keyword recall with no logic signal. They still feed
|
|
114
|
+
// the co-change and churn passes below — only the noisy per-commit
|
|
115
|
+
// memory is dropped, the same way merge commits are.
|
|
116
|
+
for (const c of commits) {
|
|
117
|
+
if (c.isMerge)
|
|
118
|
+
continue; // merge commits rarely carry their own diff
|
|
119
|
+
if (isBalancedChurnCommit(c.files)) {
|
|
120
|
+
result.balancedChurnSkipped += 1;
|
|
121
|
+
continue;
|
|
122
|
+
}
|
|
123
|
+
const shape = deriveShapeTags(c.files);
|
|
124
|
+
for (const t of shape) {
|
|
125
|
+
result.shapeTagCounts[t] = (result.shapeTagCounts[t] ?? 0) + 1;
|
|
126
|
+
}
|
|
127
|
+
ingestCommitMemory(repo, c, shape);
|
|
128
|
+
result.commitMemories += 1;
|
|
129
|
+
}
|
|
130
|
+
// ── File co-modification ──────────────────────────────────────────
|
|
131
|
+
// The pair-counting below is O(commits × files-per-commit²). On a
|
|
132
|
+
// very large history that is the one genuinely super-linear pass in
|
|
133
|
+
// the plugin, so adaptive config can cap it: above the cutoff,
|
|
134
|
+
// co-change is skipped entirely (commit/churn/recency still run).
|
|
135
|
+
if (commits.length <= coChangeMaxCommits) {
|
|
136
|
+
const pairCounts = new Map();
|
|
137
|
+
for (const c of commits) {
|
|
138
|
+
if (c.isMerge || c.files.length > COCHANGE_MAX_FILES)
|
|
139
|
+
continue;
|
|
140
|
+
const paths = c.files.map((f) => f.path).sort();
|
|
141
|
+
for (let i = 0; i < paths.length; i++) {
|
|
142
|
+
for (let j = i + 1; j < paths.length; j++) {
|
|
143
|
+
const key = `${paths[i]}\u0000${paths[j]}`;
|
|
144
|
+
pairCounts.set(key, (pairCounts.get(key) ?? 0) + 1);
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
const pairs = [];
|
|
149
|
+
for (const [k, n] of pairCounts) {
|
|
150
|
+
if (n < coChangeMinOccurrences)
|
|
151
|
+
continue;
|
|
152
|
+
const sep = k.indexOf("\u0000");
|
|
153
|
+
pairs.push({ a: k.slice(0, sep), b: k.slice(sep + 1), n });
|
|
154
|
+
}
|
|
155
|
+
pairs.sort((x, y) => y.n - x.n);
|
|
156
|
+
for (const p of pairs.slice(0, PAIR_LIMIT)) {
|
|
157
|
+
repo.insertIfMissing({
|
|
158
|
+
category: CATEGORY,
|
|
159
|
+
subject: `co-change:${p.a}`,
|
|
160
|
+
content: `${p.a} and ${p.b} were modified together in ${p.n} of the last ` +
|
|
161
|
+
`${commits.length} commits — they are likely coupled.`,
|
|
162
|
+
tags: ["co-change", p.a, p.b],
|
|
163
|
+
source: "git:co-occurrence",
|
|
164
|
+
});
|
|
165
|
+
result.coChangeMemories += 1;
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
// ── Churn — how often each file changes (stability signal) ────────
|
|
169
|
+
const churn = new Map();
|
|
170
|
+
for (const c of commits) {
|
|
171
|
+
if (c.isMerge)
|
|
172
|
+
continue;
|
|
173
|
+
for (const f of c.files) {
|
|
174
|
+
churn.set(f.path, (churn.get(f.path) ?? 0) + 1);
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
const nonMerge = commits.filter((c) => !c.isMerge).length || 1;
|
|
178
|
+
const churnRanked = Array.from(churn.entries())
|
|
179
|
+
.filter(([, n]) => n >= CHURN_MIN_ABSOLUTE && n / nonMerge >= CHURN_MIN_FRACTION)
|
|
180
|
+
.sort((a, b) => b[1] - a[1])
|
|
181
|
+
.slice(0, CHURN_LIMIT);
|
|
182
|
+
for (const [path, n] of churnRanked) {
|
|
183
|
+
const pct = Math.round((n / nonMerge) * 100);
|
|
184
|
+
repo.insertIfMissing({
|
|
185
|
+
category: CATEGORY,
|
|
186
|
+
subject: `churn:${path}`,
|
|
187
|
+
content: `${path} is high-churn: changed in ${n} of the last ${nonMerge} ` +
|
|
188
|
+
`non-merge commits (${pct}%). Treat it as a hot, frequently-edited file.`,
|
|
189
|
+
tags: ["churn", "hot-file", path],
|
|
190
|
+
source: "git:churn",
|
|
191
|
+
});
|
|
192
|
+
result.churnMemories += 1;
|
|
193
|
+
}
|
|
194
|
+
// ── Recency — what was touched most recently ──────────────────────
|
|
195
|
+
const recentNonMerge = commits.filter((c) => !c.isMerge).slice(0, RECENCY_WINDOW);
|
|
196
|
+
if (recentNonMerge.length > 0) {
|
|
197
|
+
const recentFiles = [];
|
|
198
|
+
const seen = new Set();
|
|
199
|
+
for (const c of recentNonMerge) {
|
|
200
|
+
for (const f of c.files) {
|
|
201
|
+
if (!seen.has(f.path)) {
|
|
202
|
+
seen.add(f.path);
|
|
203
|
+
recentFiles.push(f.path);
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
const newest = recentNonMerge[0];
|
|
208
|
+
repo.insertIfMissing({
|
|
209
|
+
category: CATEGORY,
|
|
210
|
+
subject: "recency:recently-changed",
|
|
211
|
+
content: `Files changed in the last ${recentNonMerge.length} non-merge commits ` +
|
|
212
|
+
`(most recent first): ${recentFiles.slice(0, 25).join(", ")}` +
|
|
213
|
+
(recentFiles.length > 25 ? `, … (+${recentFiles.length - 25})` : "") +
|
|
214
|
+
`. Most recent commit: ${newest.hash.slice(0, 8)}.`,
|
|
215
|
+
tags: ["recency", "recently-changed"],
|
|
216
|
+
source: "git:recency",
|
|
217
|
+
});
|
|
218
|
+
result.recencyMemories += 1;
|
|
219
|
+
}
|
|
220
|
+
repo.setIngestedAt(CATEGORY, Date.now());
|
|
221
|
+
return result;
|
|
222
|
+
}
|
|
223
|
+
/* ─── structural shape derivation (no message parsing) ──────────────── */
|
|
224
|
+
/**
|
|
225
|
+
* Derive tags purely from what the commit physically did. Every tag
|
|
226
|
+
* here is a fact about the diff, not an interpretation of intent.
|
|
227
|
+
*/
|
|
228
|
+
export function deriveShapeTags(files) {
|
|
229
|
+
const tags = [];
|
|
230
|
+
const n = files.length;
|
|
231
|
+
if (n === 0)
|
|
232
|
+
return ["empty"];
|
|
233
|
+
const created = files.filter((f) => f.status === "created").length;
|
|
234
|
+
const deleted = files.filter((f) => f.status === "deleted").length;
|
|
235
|
+
let totalAdded = 0;
|
|
236
|
+
let totalDeleted = 0;
|
|
237
|
+
let hasBinary = false;
|
|
238
|
+
for (const f of files) {
|
|
239
|
+
if (f.added < 0 || f.deleted < 0) {
|
|
240
|
+
hasBinary = true;
|
|
241
|
+
}
|
|
242
|
+
else {
|
|
243
|
+
totalAdded += f.added;
|
|
244
|
+
totalDeleted += f.deleted;
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
// size of the change (file count)
|
|
248
|
+
if (n === 1)
|
|
249
|
+
tags.push("single-file");
|
|
250
|
+
else if (n >= 10)
|
|
251
|
+
tags.push("many-files");
|
|
252
|
+
// size of the change (line volume)
|
|
253
|
+
const churn = totalAdded + totalDeleted;
|
|
254
|
+
if (churn >= 500)
|
|
255
|
+
tags.push("large-diff");
|
|
256
|
+
else if (churn > 0 && churn <= 10)
|
|
257
|
+
tags.push("tiny-diff");
|
|
258
|
+
// direction — file-level
|
|
259
|
+
if (created > 0 && created >= n / 2)
|
|
260
|
+
tags.push("adds-files");
|
|
261
|
+
if (deleted > 0 && deleted >= n / 2)
|
|
262
|
+
tags.push("removes-files");
|
|
263
|
+
// direction — line-level
|
|
264
|
+
if (totalDeleted > totalAdded * 2 && totalDeleted >= 30) {
|
|
265
|
+
tags.push("net-removal");
|
|
266
|
+
}
|
|
267
|
+
else if (totalAdded > totalDeleted * 2 && totalAdded >= 30) {
|
|
268
|
+
tags.push("net-addition");
|
|
269
|
+
}
|
|
270
|
+
if (hasBinary)
|
|
271
|
+
tags.push("touches-binary");
|
|
272
|
+
return tags;
|
|
273
|
+
}
|
|
274
|
+
function ingestCommitMemory(repo, c, shapeTags) {
|
|
275
|
+
const paths = c.files.map((f) => f.path);
|
|
276
|
+
const subject = paths[0] ?? `tree:${c.hash.slice(0, 8)}`;
|
|
277
|
+
const fileList = paths.slice(0, MAX_FILES_PER_COMMIT_IN_MEMORY).join(", ") +
|
|
278
|
+
(paths.length > MAX_FILES_PER_COMMIT_IN_MEMORY
|
|
279
|
+
? `, … (+${paths.length - MAX_FILES_PER_COMMIT_IN_MEMORY})`
|
|
280
|
+
: "");
|
|
281
|
+
let totalAdded = 0;
|
|
282
|
+
let totalDeleted = 0;
|
|
283
|
+
for (const f of c.files) {
|
|
284
|
+
if (f.added > 0)
|
|
285
|
+
totalAdded += f.added;
|
|
286
|
+
if (f.deleted > 0)
|
|
287
|
+
totalDeleted += f.deleted;
|
|
288
|
+
}
|
|
289
|
+
const dateStr = c.unixTime
|
|
290
|
+
? new Date(c.unixTime * 1000).toISOString().slice(0, 10)
|
|
291
|
+
: "unknown-date";
|
|
292
|
+
// The subject is included VERBATIM, quoted, as plain searchable
|
|
293
|
+
// text. We do not parse it. Structural facts carry the meaning.
|
|
294
|
+
const subjectText = c.subject.trim()
|
|
295
|
+
? `Message: "${truncate(c.subject.trim(), 140)}". `
|
|
296
|
+
: "Message: (empty). ";
|
|
297
|
+
repo.insertIfMissing({
|
|
298
|
+
category: CATEGORY,
|
|
299
|
+
subject,
|
|
300
|
+
content: `Commit ${c.hash.slice(0, 8)} (${dateStr}). ${subjectText}` +
|
|
301
|
+
`Changed ${c.files.length} file(s), +${totalAdded}/-${totalDeleted} lines. ` +
|
|
302
|
+
`Files: ${fileList || "(none)"}.`,
|
|
303
|
+
// Tags are structural shape + the touched file paths. No flavor
|
|
304
|
+
// derived from the message.
|
|
305
|
+
tags: [...shapeTags, ...paths.slice(0, 4)],
|
|
306
|
+
source: `git:${c.hash}`,
|
|
307
|
+
});
|
|
308
|
+
}
|
|
309
|
+
/* ─── parsing ───────────────────────────────────────────────────────── */
|
|
310
|
+
function truncate(s, n) {
|
|
311
|
+
return s.length <= n ? s : s.slice(0, n - 1) + "…";
|
|
312
|
+
}
|
|
313
|
+
/**
|
|
314
|
+
* Parse `git log --numstat --summary` output framed by our SEP-based
|
|
315
|
+
* pretty format. Each commit chunk is:
|
|
316
|
+
*
|
|
317
|
+
* SEP HASH SEP PARENTS SEP UNIXTIME SEP SUBJECT SEP
|
|
318
|
+
* <added>\t<deleted>\t<path> (numstat lines)
|
|
319
|
+
* ...
|
|
320
|
+
* create mode 100644 <path> (summary lines)
|
|
321
|
+
* delete mode 100644 <path>
|
|
322
|
+
* ...
|
|
323
|
+
*/
|
|
324
|
+
function parseGitLog(stdout, sep) {
|
|
325
|
+
const commits = [];
|
|
326
|
+
const chunks = stdout.split(sep);
|
|
327
|
+
// chunks[0] is the text before the first SEP (empty). Then each
|
|
328
|
+
// commit is 5 fields: hash, parents, unixtime, subject, body-block.
|
|
329
|
+
let i = 1;
|
|
330
|
+
while (i < chunks.length) {
|
|
331
|
+
const hash = (chunks[i] ?? "").trim();
|
|
332
|
+
const parents = (chunks[i + 1] ?? "").trim();
|
|
333
|
+
const unixTime = parseInt((chunks[i + 2] ?? "").trim(), 10) || 0;
|
|
334
|
+
const subject = (chunks[i + 3] ?? "").replace(/^\s+/, "").replace(/\s+$/, "");
|
|
335
|
+
const body = chunks[i + 4] ?? "";
|
|
336
|
+
if (!hash)
|
|
337
|
+
break;
|
|
338
|
+
const created = new Set();
|
|
339
|
+
const removed = new Set();
|
|
340
|
+
const numstat = [];
|
|
341
|
+
for (const rawLine of body.split("\n")) {
|
|
342
|
+
const line = rawLine.replace(/\r$/, "");
|
|
343
|
+
if (!line.trim())
|
|
344
|
+
continue;
|
|
345
|
+
// summary lines: " create mode 100644 path", " delete mode 100644 path"
|
|
346
|
+
const create = line.match(/^\s+create mode \d+ (.+)$/);
|
|
347
|
+
if (create) {
|
|
348
|
+
created.add(create[1].trim());
|
|
349
|
+
continue;
|
|
350
|
+
}
|
|
351
|
+
const del = line.match(/^\s+delete mode \d+ (.+)$/);
|
|
352
|
+
if (del) {
|
|
353
|
+
removed.add(del[1].trim());
|
|
354
|
+
continue;
|
|
355
|
+
}
|
|
356
|
+
// other summary lines (" mode change ...", " rename ...") — ignore
|
|
357
|
+
if (/^\s+(mode change|rename) /.test(line))
|
|
358
|
+
continue;
|
|
359
|
+
// numstat line: "<added>\t<deleted>\t<path>" — binary shows "-\t-\t"
|
|
360
|
+
const ns = line.match(/^(-|\d+)\t(-|\d+)\t(.+)$/);
|
|
361
|
+
if (ns) {
|
|
362
|
+
const added = ns[1] === "-" ? -1 : parseInt(ns[1], 10);
|
|
363
|
+
const deleted = ns[2] === "-" ? -1 : parseInt(ns[2], 10);
|
|
364
|
+
numstat.push({ path: ns[3].trim(), added, deleted });
|
|
365
|
+
}
|
|
366
|
+
// anything else (shouldn't happen) — skip
|
|
367
|
+
}
|
|
368
|
+
const files = numstat.map((n) => ({
|
|
369
|
+
path: n.path,
|
|
370
|
+
added: n.added,
|
|
371
|
+
deleted: n.deleted,
|
|
372
|
+
status: created.has(n.path)
|
|
373
|
+
? "created"
|
|
374
|
+
: removed.has(n.path)
|
|
375
|
+
? "deleted"
|
|
376
|
+
: "modified",
|
|
377
|
+
}));
|
|
378
|
+
// A pure-deletion commit may have numstat "0 0 path" or be present
|
|
379
|
+
// only in the summary block — fold any summary-only deletes in.
|
|
380
|
+
for (const path of removed) {
|
|
381
|
+
if (!files.some((f) => f.path === path)) {
|
|
382
|
+
files.push({ path, added: 0, deleted: 0, status: "deleted" });
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
const isMerge = parents.split(/\s+/).filter(Boolean).length > 1;
|
|
386
|
+
commits.push({ hash, subject, unixTime, files, isMerge });
|
|
387
|
+
i += 5;
|
|
388
|
+
}
|
|
389
|
+
return commits;
|
|
390
|
+
}
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Live-session activity recorder.
|
|
3
|
+
*
|
|
4
|
+
* The pre-existing `session-trace` category records what *past* sessions
|
|
5
|
+
* physically did (files edited, commands run), pulled from the OpenCode
|
|
6
|
+
* SDK by `ingestSessions`. By design, that ingester explicitly skips
|
|
7
|
+
* the *current* session — past sessions are stable, the current one is
|
|
8
|
+
* still being lived.
|
|
9
|
+
*
|
|
10
|
+
* This module fills the gap: it records the **current** session's
|
|
11
|
+
* activity in-place, in the same `session-trace` category, so:
|
|
12
|
+
*
|
|
13
|
+
* - Recall within the current session can surface "what have I
|
|
14
|
+
* touched so far" without scanning the OpenCode SDK at all.
|
|
15
|
+
* - When this session later becomes a *past* session, its data is
|
|
16
|
+
* already in the store and ready for resume by parallel/successor
|
|
17
|
+
* sessions.
|
|
18
|
+
*
|
|
19
|
+
* Design choices:
|
|
20
|
+
*
|
|
21
|
+
* 1. ONE memory per session, keyed by `live:${sessionId}`, updated
|
|
22
|
+
* in place via `upsertBySubject`. The content is a compact
|
|
23
|
+
* rolling summary (files edited + bash commands run + counts),
|
|
24
|
+
* not a full transcript — this is a recall surface, not an audit
|
|
25
|
+
* log. The JSONL file logger is the audit log.
|
|
26
|
+
*
|
|
27
|
+
* 2. We do not record every tool call (read, grep, glob would flood
|
|
28
|
+
* the store with noise). We record:
|
|
29
|
+
* - File-modifying tool calls (write, edit, patch)
|
|
30
|
+
* - Bash commands (rich signal: build/test/install/checkout/…)
|
|
31
|
+
* - We deliberately skip pure-discovery calls.
|
|
32
|
+
*
|
|
33
|
+
* 3. Write debouncing — the recorder accumulates events in memory
|
|
34
|
+
* and only persists when (a) `flushNow()` is called, or (b) an
|
|
35
|
+
* idle timer expires. This keeps the write-behind buffer from
|
|
36
|
+
* flushing on every keystroke-of-an-edit.
|
|
37
|
+
*
|
|
38
|
+
* 4. Best-effort — every recording path is wrapped at the call
|
|
39
|
+
* site. A failure inside this module must never block a tool call
|
|
40
|
+
* or surface to the agent.
|
|
41
|
+
*
|
|
42
|
+
* 5. Bounded — content is capped at MAX_CONTENT_BYTES, with the
|
|
43
|
+
* oldest events dropped first when the cap is reached. The
|
|
44
|
+
* summary line and total counts stay accurate; only the
|
|
45
|
+
* timeline-detail is truncated.
|
|
46
|
+
*/
|
|
47
|
+
import type { MemoryRepository } from "../store/repository.js";
|
|
48
|
+
export interface LiveSessionEvent {
|
|
49
|
+
/** "write" | "edit" | "patch" | "bash" | "code-map-refresh" — for tagging. */
|
|
50
|
+
kind: string;
|
|
51
|
+
/** A short, single-line description (file path, truncated command, …). */
|
|
52
|
+
detail: string;
|
|
53
|
+
/** Epoch ms. */
|
|
54
|
+
at: number;
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Per-plugin-instance state. One LiveSessionRecorder is created at
|
|
58
|
+
* plugin load and lives for the lifetime of the OpenCode session.
|
|
59
|
+
*/
|
|
60
|
+
export declare class LiveSessionRecorder {
|
|
61
|
+
private readonly repo;
|
|
62
|
+
private readonly sessionId;
|
|
63
|
+
private readonly editedFiles;
|
|
64
|
+
private readonly bashLines;
|
|
65
|
+
private editCount;
|
|
66
|
+
private bashCount;
|
|
67
|
+
private readonly startedAt;
|
|
68
|
+
constructor(repo: MemoryRepository, sessionId: string);
|
|
69
|
+
/**
|
|
70
|
+
* Record a file modification (write / edit / patch). Idempotent on
|
|
71
|
+
* file path — recording the same file twice keeps the path in the
|
|
72
|
+
* edited-files set exactly once but increments the edit counter.
|
|
73
|
+
*/
|
|
74
|
+
recordFileEdit(filePath: string, _tool: string): void;
|
|
75
|
+
/**
|
|
76
|
+
* Record a bash command. The command text is truncated to MAX_BASH_LINE
|
|
77
|
+
* characters and the buffer is capped at MAX_BASH_DETAIL entries.
|
|
78
|
+
*/
|
|
79
|
+
recordBash(command: string): void;
|
|
80
|
+
/**
|
|
81
|
+
* Render the current state as memory content. Format is stable so
|
|
82
|
+
* BM25 tokenisation behaves predictably.
|
|
83
|
+
*/
|
|
84
|
+
private renderContent;
|
|
85
|
+
/**
|
|
86
|
+
* Persist the rolling state as a single memory (upsert by subject).
|
|
87
|
+
* Idempotent: calling it twice with no new events between writes the
|
|
88
|
+
* same memory twice — no duplicates, the existing one is replaced.
|
|
89
|
+
*
|
|
90
|
+
* Tags include all touched files (for recall by file path) plus a
|
|
91
|
+
* `live:${sessionId}` marker so a query can target the current
|
|
92
|
+
* session's trace explicitly.
|
|
93
|
+
*/
|
|
94
|
+
flush(): void;
|
|
95
|
+
/** Test-only inspection of internal counters. */
|
|
96
|
+
stats(): {
|
|
97
|
+
editCount: number;
|
|
98
|
+
bashCount: number;
|
|
99
|
+
uniqueFiles: number;
|
|
100
|
+
};
|
|
101
|
+
}
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Live-session activity recorder.
|
|
3
|
+
*
|
|
4
|
+
* The pre-existing `session-trace` category records what *past* sessions
|
|
5
|
+
* physically did (files edited, commands run), pulled from the OpenCode
|
|
6
|
+
* SDK by `ingestSessions`. By design, that ingester explicitly skips
|
|
7
|
+
* the *current* session — past sessions are stable, the current one is
|
|
8
|
+
* still being lived.
|
|
9
|
+
*
|
|
10
|
+
* This module fills the gap: it records the **current** session's
|
|
11
|
+
* activity in-place, in the same `session-trace` category, so:
|
|
12
|
+
*
|
|
13
|
+
* - Recall within the current session can surface "what have I
|
|
14
|
+
* touched so far" without scanning the OpenCode SDK at all.
|
|
15
|
+
* - When this session later becomes a *past* session, its data is
|
|
16
|
+
* already in the store and ready for resume by parallel/successor
|
|
17
|
+
* sessions.
|
|
18
|
+
*
|
|
19
|
+
* Design choices:
|
|
20
|
+
*
|
|
21
|
+
* 1. ONE memory per session, keyed by `live:${sessionId}`, updated
|
|
22
|
+
* in place via `upsertBySubject`. The content is a compact
|
|
23
|
+
* rolling summary (files edited + bash commands run + counts),
|
|
24
|
+
* not a full transcript — this is a recall surface, not an audit
|
|
25
|
+
* log. The JSONL file logger is the audit log.
|
|
26
|
+
*
|
|
27
|
+
* 2. We do not record every tool call (read, grep, glob would flood
|
|
28
|
+
* the store with noise). We record:
|
|
29
|
+
* - File-modifying tool calls (write, edit, patch)
|
|
30
|
+
* - Bash commands (rich signal: build/test/install/checkout/…)
|
|
31
|
+
* - We deliberately skip pure-discovery calls.
|
|
32
|
+
*
|
|
33
|
+
* 3. Write debouncing — the recorder accumulates events in memory
|
|
34
|
+
* and only persists when (a) `flushNow()` is called, or (b) an
|
|
35
|
+
* idle timer expires. This keeps the write-behind buffer from
|
|
36
|
+
* flushing on every keystroke-of-an-edit.
|
|
37
|
+
*
|
|
38
|
+
* 4. Best-effort — every recording path is wrapped at the call
|
|
39
|
+
* site. A failure inside this module must never block a tool call
|
|
40
|
+
* or surface to the agent.
|
|
41
|
+
*
|
|
42
|
+
* 5. Bounded — content is capped at MAX_CONTENT_BYTES, with the
|
|
43
|
+
* oldest events dropped first when the cap is reached. The
|
|
44
|
+
* summary line and total counts stay accurate; only the
|
|
45
|
+
* timeline-detail is truncated.
|
|
46
|
+
*/
|
|
47
|
+
const CATEGORY = "session-trace";
|
|
48
|
+
/** Hard cap on the rolling memory content size. */
|
|
49
|
+
const MAX_CONTENT_BYTES = 4096;
|
|
50
|
+
/** Hard cap on the list of bash commands kept in detail (oldest dropped). */
|
|
51
|
+
const MAX_BASH_DETAIL = 30;
|
|
52
|
+
/** Hard cap on the list of edited files kept (oldest dropped). */
|
|
53
|
+
const MAX_EDITED_FILES = 60;
|
|
54
|
+
/** Truncation length for any individual bash command stored. */
|
|
55
|
+
const MAX_BASH_LINE = 160;
|
|
56
|
+
/**
|
|
57
|
+
* Per-plugin-instance state. One LiveSessionRecorder is created at
|
|
58
|
+
* plugin load and lives for the lifetime of the OpenCode session.
|
|
59
|
+
*/
|
|
60
|
+
export class LiveSessionRecorder {
|
|
61
|
+
repo;
|
|
62
|
+
sessionId;
|
|
63
|
+
editedFiles = new Set();
|
|
64
|
+
bashLines = [];
|
|
65
|
+
editCount = 0;
|
|
66
|
+
bashCount = 0;
|
|
67
|
+
startedAt = Date.now();
|
|
68
|
+
constructor(repo, sessionId) {
|
|
69
|
+
this.repo = repo;
|
|
70
|
+
this.sessionId = sessionId;
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Record a file modification (write / edit / patch). Idempotent on
|
|
74
|
+
* file path — recording the same file twice keeps the path in the
|
|
75
|
+
* edited-files set exactly once but increments the edit counter.
|
|
76
|
+
*/
|
|
77
|
+
recordFileEdit(filePath, _tool) {
|
|
78
|
+
this.editCount += 1;
|
|
79
|
+
this.editedFiles.add(filePath);
|
|
80
|
+
if (this.editedFiles.size > MAX_EDITED_FILES) {
|
|
81
|
+
// drop the oldest tracked file — Sets preserve insertion order in JS.
|
|
82
|
+
const first = this.editedFiles.values().next().value;
|
|
83
|
+
if (first !== undefined)
|
|
84
|
+
this.editedFiles.delete(first);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* Record a bash command. The command text is truncated to MAX_BASH_LINE
|
|
89
|
+
* characters and the buffer is capped at MAX_BASH_DETAIL entries.
|
|
90
|
+
*/
|
|
91
|
+
recordBash(command) {
|
|
92
|
+
this.bashCount += 1;
|
|
93
|
+
const truncated = command.length > MAX_BASH_LINE
|
|
94
|
+
? command.slice(0, MAX_BASH_LINE) + "…"
|
|
95
|
+
: command;
|
|
96
|
+
this.bashLines.push(truncated);
|
|
97
|
+
if (this.bashLines.length > MAX_BASH_DETAIL)
|
|
98
|
+
this.bashLines.shift();
|
|
99
|
+
}
|
|
100
|
+
/**
|
|
101
|
+
* Render the current state as memory content. Format is stable so
|
|
102
|
+
* BM25 tokenisation behaves predictably.
|
|
103
|
+
*/
|
|
104
|
+
renderContent() {
|
|
105
|
+
const ageMin = Math.round((Date.now() - this.startedAt) / 60000);
|
|
106
|
+
const lines = [];
|
|
107
|
+
lines.push(`Live session ${this.sessionId} (started ${ageMin}m ago): ` +
|
|
108
|
+
`${this.editCount} file edit${this.editCount === 1 ? "" : "s"}, ` +
|
|
109
|
+
`${this.bashCount} bash command${this.bashCount === 1 ? "" : "s"}.`);
|
|
110
|
+
if (this.editedFiles.size > 0) {
|
|
111
|
+
lines.push("Files edited: " + [...this.editedFiles].join(", "));
|
|
112
|
+
}
|
|
113
|
+
if (this.bashLines.length > 0) {
|
|
114
|
+
lines.push("Recent bash commands:");
|
|
115
|
+
for (const cmd of this.bashLines)
|
|
116
|
+
lines.push(" $ " + cmd);
|
|
117
|
+
}
|
|
118
|
+
let content = lines.join("\n");
|
|
119
|
+
// Hard cap on size — drop oldest bash lines until under cap.
|
|
120
|
+
while (content.length > MAX_CONTENT_BYTES && this.bashLines.length > 0) {
|
|
121
|
+
this.bashLines.shift();
|
|
122
|
+
const idx = lines.findIndex((l) => l.startsWith(" $ "));
|
|
123
|
+
if (idx >= 0)
|
|
124
|
+
lines.splice(idx, 1);
|
|
125
|
+
else
|
|
126
|
+
break;
|
|
127
|
+
content = lines.join("\n");
|
|
128
|
+
}
|
|
129
|
+
// Final hard truncate as a safety net.
|
|
130
|
+
if (content.length > MAX_CONTENT_BYTES) {
|
|
131
|
+
content = content.slice(0, MAX_CONTENT_BYTES - 1) + "…";
|
|
132
|
+
}
|
|
133
|
+
return content;
|
|
134
|
+
}
|
|
135
|
+
/**
|
|
136
|
+
* Persist the rolling state as a single memory (upsert by subject).
|
|
137
|
+
* Idempotent: calling it twice with no new events between writes the
|
|
138
|
+
* same memory twice — no duplicates, the existing one is replaced.
|
|
139
|
+
*
|
|
140
|
+
* Tags include all touched files (for recall by file path) plus a
|
|
141
|
+
* `live:${sessionId}` marker so a query can target the current
|
|
142
|
+
* session's trace explicitly.
|
|
143
|
+
*/
|
|
144
|
+
flush() {
|
|
145
|
+
// Nothing to record? Skip the write — an empty live trace memory
|
|
146
|
+
// would just dilute recall results without adding signal.
|
|
147
|
+
if (this.editCount === 0 && this.bashCount === 0)
|
|
148
|
+
return;
|
|
149
|
+
const tags = ["live-session", `session:${this.sessionId}`];
|
|
150
|
+
for (const f of this.editedFiles)
|
|
151
|
+
tags.push(`file:${f}`);
|
|
152
|
+
this.repo.upsertBySubject({
|
|
153
|
+
category: CATEGORY,
|
|
154
|
+
subject: `live:${this.sessionId}`,
|
|
155
|
+
content: this.renderContent(),
|
|
156
|
+
tags,
|
|
157
|
+
source: `session:${this.sessionId}`,
|
|
158
|
+
// NOT pinned — a live trace is transient state. Once this session
|
|
159
|
+
// becomes a past session, ingestSessions may add a more compact
|
|
160
|
+
// trace memory; the LFU eviction can drop the live one as it
|
|
161
|
+
// ages, which is the desired behaviour.
|
|
162
|
+
pinned: false,
|
|
163
|
+
});
|
|
164
|
+
}
|
|
165
|
+
/** Test-only inspection of internal counters. */
|
|
166
|
+
stats() {
|
|
167
|
+
return {
|
|
168
|
+
editCount: this.editCount,
|
|
169
|
+
bashCount: this.bashCount,
|
|
170
|
+
uniqueFiles: this.editedFiles.size,
|
|
171
|
+
};
|
|
172
|
+
}
|
|
173
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* project-notes.ts — ingest the small set of root-level files where
|
|
3
|
+
* humans put house rules for AI agents.
|
|
4
|
+
*
|
|
5
|
+
* These are the files an agent should know about WITHIN THE FIRST
|
|
6
|
+
* RECALL of a session: AGENTS.md, CLAUDE.md, GEMINI.md, .cursorrules,
|
|
7
|
+
* .windsurfrules, COPILOT.md. They typically contain "in this repo,
|
|
8
|
+
* always do X, never do Y, our naming convention is Z" — exactly the
|
|
9
|
+
* kind of facts that, missed, lead to revert PRs.
|
|
10
|
+
*
|
|
11
|
+
* **Whole-file content, not headings.** Unlike `docs.ts` (which slices
|
|
12
|
+
* into sections), these files are short (typically under 4 KB) and
|
|
13
|
+
* their structure is rarely worth indexing — every line might be
|
|
14
|
+
* load-bearing. One memory per file with the full content (truncated
|
|
15
|
+
* to MAX_NOTE_BYTES) is the right granularity.
|
|
16
|
+
*
|
|
17
|
+
* **Root-level only.** No recursion. A `monorepo-package/.cursorrules`
|
|
18
|
+
* is a per-package instruction that belongs to the package's owner,
|
|
19
|
+
* not Diane.
|
|
20
|
+
*/
|
|
21
|
+
import type { MemoryRepository } from "../store/repository.js";
|
|
22
|
+
export interface ProjectNotesIngestOptions {
|
|
23
|
+
maxBytes?: number;
|
|
24
|
+
}
|
|
25
|
+
export interface ProjectNotesIngestResult {
|
|
26
|
+
filesFound: number;
|
|
27
|
+
}
|
|
28
|
+
export declare function ingestProjectNotes(repo: MemoryRepository, root: string, opts?: ProjectNotesIngestOptions): Promise<ProjectNotesIngestResult>;
|