memarium 0.13.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +146 -0
- package/assets/scripts/merge-books.mjs +921 -0
- package/assets/workflows/memarium-aggregate.yml +66 -0
- package/dist/bin/memarium.js +6 -0
- package/dist/src/aggregated-store.js +95 -0
- package/dist/src/cli.js +175 -0
- package/dist/src/commands/cat.js +20 -0
- package/dist/src/commands/doctor.js +383 -0
- package/dist/src/commands/init-wizard.js +201 -0
- package/dist/src/commands/init.js +45 -0
- package/dist/src/commands/list.js +19 -0
- package/dist/src/commands/prune.js +108 -0
- package/dist/src/commands/resume/config-pathmap.js +38 -0
- package/dist/src/commands/resume/fuzzy-match.js +13 -0
- package/dist/src/commands/resume/list-sessions.js +54 -0
- package/dist/src/commands/resume/render-prompt.js +121 -0
- package/dist/src/commands/resume/resume.js +121 -0
- package/dist/src/commands/show.js +21 -0
- package/dist/src/commands/sync.js +279 -0
- package/dist/src/commands/upgrade.js +47 -0
- package/dist/src/commands/workflow.js +126 -0
- package/dist/src/config.js +98 -0
- package/dist/src/content-project-inference.js +185 -0
- package/dist/src/device.js +47 -0
- package/dist/src/digest/manifest.js +121 -0
- package/dist/src/digest/project-filter.js +32 -0
- package/dist/src/digest/session-signal.js +106 -0
- package/dist/src/digest/toc.js +127 -0
- package/dist/src/git-ops.js +359 -0
- package/dist/src/index-store.js +35 -0
- package/dist/src/migrate.js +72 -0
- package/dist/src/project-identity.js +139 -0
- package/dist/src/project-resolve.js +42 -0
- package/dist/src/prompts.js +87 -0
- package/dist/src/repo-data-dir.js +25 -0
- package/dist/src/slug.js +28 -0
- package/dist/src/sources/base.js +1 -0
- package/dist/src/sources/claude-code.js +294 -0
- package/dist/src/sources/vscode-copilot.js +400 -0
- package/dist/src/types.js +1 -0
- package/dist/src/writer.js +240 -0
- package/package.json +60 -0
|
@@ -0,0 +1,400 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
import { readdirSync, readFileSync, statSync, existsSync } from "node:fs";
|
|
3
|
+
import { homedir } from "node:os";
|
|
4
|
+
import { join, basename } from "node:path";
|
|
5
|
+
import { deriveSlug } from "../slug.js";
|
|
6
|
+
import { cachedProjectSlug } from "../project-identity.js";
|
|
7
|
+
import { sanitizeMessageText } from "./claude-code.js";
|
|
8
|
+
function defaultStorageRoot() {
|
|
9
|
+
if (process.platform === "darwin")
|
|
10
|
+
return join(homedir(), "Library", "Application Support", "Code", "User", "workspaceStorage");
|
|
11
|
+
if (process.platform === "win32")
|
|
12
|
+
return join(homedir(), "AppData", "Roaming", "Code", "User", "workspaceStorage");
|
|
13
|
+
return join(homedir(), ".config", "Code", "User", "workspaceStorage");
|
|
14
|
+
}
|
|
15
|
+
export class VSCodeCopilotAdapter {
|
|
16
|
+
root;
|
|
17
|
+
name = "copilot";
|
|
18
|
+
constructor(root = defaultStorageRoot()) {
|
|
19
|
+
this.root = root;
|
|
20
|
+
}
|
|
21
|
+
async *discover() {
|
|
22
|
+
if (!existsSync(this.root))
|
|
23
|
+
return;
|
|
24
|
+
let workspaces;
|
|
25
|
+
try {
|
|
26
|
+
workspaces = readdirSync(this.root, { withFileTypes: true });
|
|
27
|
+
}
|
|
28
|
+
catch {
|
|
29
|
+
return;
|
|
30
|
+
}
|
|
31
|
+
for (const w of workspaces) {
|
|
32
|
+
if (!w.isDirectory())
|
|
33
|
+
continue;
|
|
34
|
+
const wsDir = join(this.root, w.name);
|
|
35
|
+
const wsPath = readWorkspacePath(join(wsDir, "workspace.json"));
|
|
36
|
+
// Within a single workspace, the SAME conversation can land in BOTH
|
|
37
|
+
// `chatSessions/<id>.jsonl` (the rolling-window state log, schema fixed
|
|
38
|
+
// in 0.7.0) and `GitHub.copilot-chat/transcripts/<id>.jsonl` (an
|
|
39
|
+
// older event-stream format). Letting both through produces twin .md
|
|
40
|
+
// files at different paths because the two formats extract different
|
|
41
|
+
// first-user prompts and different startedAt timestamps — same
|
|
42
|
+
// sessionId, two filenames, only one of them indexed (audit on Yue's
|
|
43
|
+
// 2026-05-23 sync surfaced 83 orphan files repo-wide). Prefer
|
|
44
|
+
// chatSessions/ as the authoritative source; transcripts/ runs only
|
|
45
|
+
// as a fallback when chatSessions/ doesn't have the id.
|
|
46
|
+
const chatDir = join(wsDir, "chatSessions");
|
|
47
|
+
const chatSessionIds = new Set();
|
|
48
|
+
// Legacy format (pre-2026-04): workspaceStorage/<hash>/chatSessions/<id>.json
|
|
49
|
+
// Newer append-log format (2026-03+): workspaceStorage/<hash>/chatSessions/<id>.jsonl
|
|
50
|
+
if (existsSync(chatDir)) {
|
|
51
|
+
let files = [];
|
|
52
|
+
try {
|
|
53
|
+
files = readdirSync(chatDir, { withFileTypes: true });
|
|
54
|
+
}
|
|
55
|
+
catch {
|
|
56
|
+
files = [];
|
|
57
|
+
}
|
|
58
|
+
for (const f of files) {
|
|
59
|
+
if (!f.isFile())
|
|
60
|
+
continue;
|
|
61
|
+
const isJson = f.name.endsWith(".json");
|
|
62
|
+
const isJsonl = f.name.endsWith(".jsonl");
|
|
63
|
+
if (!isJson && !isJsonl)
|
|
64
|
+
continue;
|
|
65
|
+
const p = join(chatDir, f.name);
|
|
66
|
+
const st = statSync(p);
|
|
67
|
+
if (st.size === 0)
|
|
68
|
+
continue;
|
|
69
|
+
chatSessionIds.add(basename(f.name, isJsonl ? ".jsonl" : ".json"));
|
|
70
|
+
const buf = readFileSync(p);
|
|
71
|
+
const sha = createHash("sha256").update(buf).digest("hex");
|
|
72
|
+
yield {
|
|
73
|
+
sourcePath: p,
|
|
74
|
+
sourceMtimeMs: st.mtimeMs,
|
|
75
|
+
sourceSha256: sha,
|
|
76
|
+
load: async () => isJsonl
|
|
77
|
+
? parseCopilotChatSessionsJsonl(p, buf.toString("utf8"), wsPath)
|
|
78
|
+
: parseCopilotJson(p, buf.toString("utf8"), wsPath),
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
// New format (2026-04+): workspaceStorage/<hash>/GitHub.copilot-chat/transcripts/<id>.jsonl
|
|
83
|
+
const transcriptsDir = join(wsDir, "GitHub.copilot-chat", "transcripts");
|
|
84
|
+
if (existsSync(transcriptsDir)) {
|
|
85
|
+
let tfiles = [];
|
|
86
|
+
try {
|
|
87
|
+
tfiles = readdirSync(transcriptsDir, { withFileTypes: true });
|
|
88
|
+
}
|
|
89
|
+
catch {
|
|
90
|
+
tfiles = [];
|
|
91
|
+
}
|
|
92
|
+
for (const f of tfiles) {
|
|
93
|
+
if (!f.isFile() || !f.name.endsWith(".jsonl"))
|
|
94
|
+
continue;
|
|
95
|
+
const id = basename(f.name, ".jsonl");
|
|
96
|
+
if (chatSessionIds.has(id))
|
|
97
|
+
continue; // chatSessions/ wins for same workspace+sessionId
|
|
98
|
+
const p = join(transcriptsDir, f.name);
|
|
99
|
+
const st = statSync(p);
|
|
100
|
+
if (st.size === 0)
|
|
101
|
+
continue;
|
|
102
|
+
const buf = readFileSync(p);
|
|
103
|
+
const sha = createHash("sha256").update(buf).digest("hex");
|
|
104
|
+
yield {
|
|
105
|
+
sourcePath: p,
|
|
106
|
+
sourceMtimeMs: st.mtimeMs,
|
|
107
|
+
sourceSha256: sha,
|
|
108
|
+
load: async () => parseCopilotTranscript(p, buf.toString("utf8"), wsPath),
|
|
109
|
+
};
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
function readWorkspacePath(workspaceJsonPath) {
|
|
116
|
+
if (!existsSync(workspaceJsonPath))
|
|
117
|
+
return "";
|
|
118
|
+
try {
|
|
119
|
+
const obj = JSON.parse(readFileSync(workspaceJsonPath, "utf8"));
|
|
120
|
+
const u = obj.folder ?? obj.workspace ?? "";
|
|
121
|
+
if (!u)
|
|
122
|
+
return "";
|
|
123
|
+
return u.startsWith("file://") ? decodeURIComponent(u.slice("file://".length)) : u;
|
|
124
|
+
}
|
|
125
|
+
catch {
|
|
126
|
+
return "";
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
function parseCopilotJson(sourcePath, content, workspacePath) {
|
|
130
|
+
const obj = JSON.parse(content);
|
|
131
|
+
const fileBase = basename(sourcePath, ".json");
|
|
132
|
+
const sessionId = fileBase;
|
|
133
|
+
const requests = Array.isArray(obj.requests) ? obj.requests : [];
|
|
134
|
+
return buildSessionFromRequests(sourcePath, sessionId, requests, workspacePath);
|
|
135
|
+
}
|
|
136
|
+
/**
|
|
137
|
+
* chatSessions/<id>.jsonl is a *live state log* with a rolling-window snapshot
|
|
138
|
+
* pattern, NOT a complete conversation transcript. We discovered this on
|
|
139
|
+
* 2026-05-22 after vibebook 0.5/0.6 was found to only capture the LAST turn
|
|
140
|
+
* (~5–8% of the actual conversation) on multi-turn Copilot agent sessions.
|
|
141
|
+
*
|
|
142
|
+
* Event schema:
|
|
143
|
+
* - kind=0 (first line): initial state with v.requests (usually `[]`).
|
|
144
|
+
* - kind=1: replace top-level state path (e.g. inputState, responderUsername).
|
|
145
|
+
* Not relevant to conversation turns; we ignore.
|
|
146
|
+
* - kind=2 with k=["requests"]: VS Code's snapshot is a 1-element array
|
|
147
|
+
* containing only the *latest* turn. But the conceptual `requests` array
|
|
148
|
+
* grows monotonically across turns — subsequent patches reference
|
|
149
|
+
* k=["requests", N, ...] where N is the chronological turn index (0, 1, 2…).
|
|
150
|
+
* So we APPEND v[0] to our growing turns list rather than replacing.
|
|
151
|
+
* - kind=2 with k=["requests", N, "response"]: REPLACE the response array of
|
|
152
|
+
* turn N. Each patch is a full replacement (not a delta), so the last
|
|
153
|
+
* such patch for any given N wins.
|
|
154
|
+
* - kind=2 with k=["requests", N]: replace turn N entirely.
|
|
155
|
+
* - kind=2 with k=["requests", N, ...deep path]: deep-set into turn N.
|
|
156
|
+
*/
|
|
157
|
+
function parseCopilotChatSessionsJsonl(sourcePath, content, workspacePath) {
|
|
158
|
+
const fileBase = basename(sourcePath, ".jsonl");
|
|
159
|
+
let sessionId = fileBase;
|
|
160
|
+
const turns = [];
|
|
161
|
+
const lines = content.split("\n");
|
|
162
|
+
for (const line of lines) {
|
|
163
|
+
const s = line.trim();
|
|
164
|
+
if (!s)
|
|
165
|
+
continue;
|
|
166
|
+
let obj;
|
|
167
|
+
try {
|
|
168
|
+
obj = JSON.parse(s);
|
|
169
|
+
}
|
|
170
|
+
catch {
|
|
171
|
+
continue;
|
|
172
|
+
}
|
|
173
|
+
if (obj?.kind === 0 && obj?.v) {
|
|
174
|
+
if (typeof obj.v.sessionId === "string" && obj.v.sessionId)
|
|
175
|
+
sessionId = obj.v.sessionId;
|
|
176
|
+
if (Array.isArray(obj.v.requests)) {
|
|
177
|
+
// Initial state — seed turns from whatever was already in v.requests
|
|
178
|
+
for (const r of obj.v.requests)
|
|
179
|
+
turns.push(r);
|
|
180
|
+
}
|
|
181
|
+
continue;
|
|
182
|
+
}
|
|
183
|
+
if (obj?.kind !== 2 || !Array.isArray(obj.k) || obj.k[0] !== "requests")
|
|
184
|
+
continue;
|
|
185
|
+
if (obj.k.length === 1 && Array.isArray(obj.v)) {
|
|
186
|
+
// Snapshot event. v is a rolling window (typically a single element).
|
|
187
|
+
// Append each element to grow our chronological turn list.
|
|
188
|
+
for (const r of obj.v)
|
|
189
|
+
turns.push(r);
|
|
190
|
+
}
|
|
191
|
+
else if (obj.k.length >= 2 && typeof obj.k[1] === "number") {
|
|
192
|
+
const idx = obj.k[1];
|
|
193
|
+
// Grow sparsely if the patch references a turn we haven't seen yet
|
|
194
|
+
// (defensive — in well-formed logs the snapshot precedes the patch).
|
|
195
|
+
while (turns.length <= idx)
|
|
196
|
+
turns.push({});
|
|
197
|
+
if (obj.k.length === 2) {
|
|
198
|
+
turns[idx] = obj.v;
|
|
199
|
+
}
|
|
200
|
+
else {
|
|
201
|
+
let cur = turns[idx];
|
|
202
|
+
if (cur === undefined || cur === null) {
|
|
203
|
+
cur = {};
|
|
204
|
+
turns[idx] = cur;
|
|
205
|
+
}
|
|
206
|
+
for (let i = 2; i < obj.k.length - 1; i++) {
|
|
207
|
+
const seg = obj.k[i];
|
|
208
|
+
if (cur[seg] === undefined)
|
|
209
|
+
cur[seg] = typeof obj.k[i + 1] === "number" ? [] : {};
|
|
210
|
+
cur = cur[seg];
|
|
211
|
+
}
|
|
212
|
+
cur[obj.k[obj.k.length - 1]] = obj.v;
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
return buildSessionFromRequests(sourcePath, sessionId, turns, workspacePath);
|
|
217
|
+
}
|
|
218
|
+
function buildSessionFromRequests(sourcePath, sessionId, requests, workspacePath) {
|
|
219
|
+
const messages = [];
|
|
220
|
+
let startedAt = "";
|
|
221
|
+
let endedAt = "";
|
|
222
|
+
for (const r of requests) {
|
|
223
|
+
if (!r)
|
|
224
|
+
continue;
|
|
225
|
+
const ts = typeof r.timestamp === "number" ? new Date(r.timestamp).toISOString() : undefined;
|
|
226
|
+
if (ts) {
|
|
227
|
+
if (!startedAt)
|
|
228
|
+
startedAt = ts;
|
|
229
|
+
endedAt = ts;
|
|
230
|
+
}
|
|
231
|
+
const userTextRaw = r?.message?.text;
|
|
232
|
+
if (typeof userTextRaw === "string" && userTextRaw) {
|
|
233
|
+
const userText = sanitizeMessageText(userTextRaw);
|
|
234
|
+
if (userText)
|
|
235
|
+
messages.push({ role: "user", text: userText, timestamp: ts, raw: r.message });
|
|
236
|
+
}
|
|
237
|
+
const respParts = Array.isArray(r.response) ? r.response : [];
|
|
238
|
+
const { text: rawText, reasoning: rawReasoning, contentBlocks } = extractCopilotResponseParts(respParts);
|
|
239
|
+
const text = sanitizeMessageText(rawText);
|
|
240
|
+
const reasoning = sanitizeMessageText(rawReasoning);
|
|
241
|
+
if (text || reasoning || contentBlocks.length > 0) {
|
|
242
|
+
const msg = { role: "assistant", text, timestamp: ts, raw: respParts };
|
|
243
|
+
if (reasoning)
|
|
244
|
+
msg.reasoning = reasoning;
|
|
245
|
+
if (contentBlocks.length > 0)
|
|
246
|
+
msg.contentBlocks = contentBlocks;
|
|
247
|
+
messages.push(msg);
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
const firstUser = messages.find((m) => m.role === "user")?.text ?? "";
|
|
251
|
+
const { slug, display } = deriveSlug(firstUser);
|
|
252
|
+
const shortId = sessionId.slice(0, 8);
|
|
253
|
+
return {
|
|
254
|
+
tool: "copilot",
|
|
255
|
+
sessionId,
|
|
256
|
+
shortId,
|
|
257
|
+
project: cachedProjectSlug(workspacePath),
|
|
258
|
+
projectRaw: workspacePath,
|
|
259
|
+
startedAt: startedAt || new Date(0).toISOString(),
|
|
260
|
+
endedAt: endedAt || new Date(0).toISOString(),
|
|
261
|
+
nameSlug: slug,
|
|
262
|
+
displayName: display,
|
|
263
|
+
messages,
|
|
264
|
+
sourcePath,
|
|
265
|
+
};
|
|
266
|
+
}
|
|
267
|
+
/**
|
|
268
|
+
* Pull text + reasoning + tool calls out of a Copilot response parts array.
|
|
269
|
+
*
|
|
270
|
+
* Response part kinds observed (chatSessions format):
|
|
271
|
+
* - markdownContent → visible assistant text (.content.value)
|
|
272
|
+
* - thinking → reasoning (.value)
|
|
273
|
+
* - toolInvocationSerialized → tool call (.toolId + .pastTenseMessage / .invocationMessage)
|
|
274
|
+
* - textEditGroup, inlineReference, mcpServersStarting, progressTaskSerialized → UI noise, drop
|
|
275
|
+
* - null / no-kind → drop
|
|
276
|
+
*
|
|
277
|
+
* Tool *results* are NOT captured by VS Code in chatSessions — only the
|
|
278
|
+
* invocation marker. So tool blocks carry a placeholder result indicating
|
|
279
|
+
* the result is unavailable from the source.
|
|
280
|
+
*/
|
|
281
|
+
function extractCopilotResponseParts(parts) {
|
|
282
|
+
const texts = [];
|
|
283
|
+
const reasonings = [];
|
|
284
|
+
const blocks = [];
|
|
285
|
+
for (const p of parts) {
|
|
286
|
+
if (!p || typeof p !== "object")
|
|
287
|
+
continue;
|
|
288
|
+
const k = p.kind;
|
|
289
|
+
if (k === "markdownContent") {
|
|
290
|
+
const v = typeof p?.content?.value === "string" ? p.content.value : "";
|
|
291
|
+
if (v) {
|
|
292
|
+
texts.push(v);
|
|
293
|
+
blocks.push({ type: "text", text: v });
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
else if (k === "thinking") {
|
|
297
|
+
const v = typeof p?.value === "string" ? p.value : "";
|
|
298
|
+
if (v) {
|
|
299
|
+
reasonings.push(v);
|
|
300
|
+
blocks.push({ type: "thinking", thinking: v });
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
else if (k === "toolInvocationSerialized") {
|
|
304
|
+
const toolId = typeof p?.toolId === "string" ? p.toolId : "tool";
|
|
305
|
+
const past = p?.pastTenseMessage?.value;
|
|
306
|
+
const cur = p?.invocationMessage?.value;
|
|
307
|
+
const label = (typeof past === "string" && past) || (typeof cur === "string" && cur) || "";
|
|
308
|
+
const input = p?.toolSpecificData ?? {};
|
|
309
|
+
const block = { type: "tool_use", name: toolId, input };
|
|
310
|
+
if (typeof p?.toolCallId === "string")
|
|
311
|
+
block.id = p.toolCallId;
|
|
312
|
+
blocks.push(block);
|
|
313
|
+
if (label)
|
|
314
|
+
blocks.push({ type: "tool_result", content: label });
|
|
315
|
+
}
|
|
316
|
+
// textEditGroup / inlineReference / mcpServersStarting / progressTaskSerialized
|
|
317
|
+
// are UI / streaming-state noise and intentionally dropped.
|
|
318
|
+
}
|
|
319
|
+
return {
|
|
320
|
+
text: texts.join("\n"),
|
|
321
|
+
reasoning: reasonings.join("\n"),
|
|
322
|
+
contentBlocks: blocks,
|
|
323
|
+
};
|
|
324
|
+
}
|
|
325
|
+
function parseCopilotTranscript(sourcePath, content, workspacePath) {
|
|
326
|
+
const fileBase = basename(sourcePath, ".jsonl");
|
|
327
|
+
let sessionId = fileBase;
|
|
328
|
+
const messages = [];
|
|
329
|
+
let startedAt = "";
|
|
330
|
+
let endedAt = "";
|
|
331
|
+
const lines = content.split("\n");
|
|
332
|
+
for (const line of lines) {
|
|
333
|
+
const s = line.trim();
|
|
334
|
+
if (!s)
|
|
335
|
+
continue;
|
|
336
|
+
let obj;
|
|
337
|
+
try {
|
|
338
|
+
obj = JSON.parse(s);
|
|
339
|
+
}
|
|
340
|
+
catch {
|
|
341
|
+
continue;
|
|
342
|
+
}
|
|
343
|
+
const t = obj?.type;
|
|
344
|
+
const ts = typeof obj?.timestamp === "string" ? obj.timestamp : undefined;
|
|
345
|
+
if (ts) {
|
|
346
|
+
if (!startedAt)
|
|
347
|
+
startedAt = ts;
|
|
348
|
+
endedAt = ts;
|
|
349
|
+
}
|
|
350
|
+
if (t === "session.start") {
|
|
351
|
+
const sid = obj?.data?.sessionId;
|
|
352
|
+
if (typeof sid === "string" && sid)
|
|
353
|
+
sessionId = sid;
|
|
354
|
+
continue;
|
|
355
|
+
}
|
|
356
|
+
if (t === "user.message") {
|
|
357
|
+
const raw = typeof obj?.data?.content === "string" ? obj.data.content : "";
|
|
358
|
+
const text = sanitizeMessageText(raw);
|
|
359
|
+
if (text)
|
|
360
|
+
messages.push({ role: "user", text, timestamp: ts, raw: obj });
|
|
361
|
+
continue;
|
|
362
|
+
}
|
|
363
|
+
if (t === "assistant.message") {
|
|
364
|
+
const rawText = typeof obj?.data?.content === "string" ? obj.data.content : "";
|
|
365
|
+
const rawReasoning = typeof obj?.data?.reasoningText === "string" ? obj.data.reasoningText : "";
|
|
366
|
+
const text = sanitizeMessageText(rawText);
|
|
367
|
+
const reasoning = sanitizeMessageText(rawReasoning);
|
|
368
|
+
// tool requests are intentionally NOT included — memarium summarizes
|
|
369
|
+
// intent + outcome, not tool traces. Drop the message only when both
|
|
370
|
+
// text AND reasoning are empty.
|
|
371
|
+
if (text || reasoning) {
|
|
372
|
+
const msg = { role: "assistant", text, timestamp: ts, raw: obj };
|
|
373
|
+
if (reasoning)
|
|
374
|
+
msg.reasoning = reasoning;
|
|
375
|
+
messages.push(msg);
|
|
376
|
+
}
|
|
377
|
+
continue;
|
|
378
|
+
}
|
|
379
|
+
if (t === "tool.execution_start" || t === "tool.execution_complete") {
|
|
380
|
+
// Drop tool execution events entirely — same rationale as above.
|
|
381
|
+
continue;
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
const firstUser = messages.find((m) => m.role === "user")?.text ?? "";
|
|
385
|
+
const { slug, display } = deriveSlug(firstUser);
|
|
386
|
+
const shortId = sessionId.slice(0, 8);
|
|
387
|
+
return {
|
|
388
|
+
tool: "copilot",
|
|
389
|
+
sessionId,
|
|
390
|
+
shortId,
|
|
391
|
+
project: cachedProjectSlug(workspacePath),
|
|
392
|
+
projectRaw: workspacePath,
|
|
393
|
+
startedAt: startedAt || new Date(0).toISOString(),
|
|
394
|
+
endedAt: endedAt || new Date(0).toISOString(),
|
|
395
|
+
nameSlug: slug,
|
|
396
|
+
displayName: display,
|
|
397
|
+
messages,
|
|
398
|
+
sourcePath,
|
|
399
|
+
};
|
|
400
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
import { mkdirSync, writeFileSync } from "node:fs";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
import { extractManifest } from "./digest/manifest.js";
|
|
4
|
+
import { buildTocEntries, renderTocMarkdown } from "./digest/toc.js";
|
|
5
|
+
/** Threshold above which tool_result.content / tool_use.input gets truncated.
|
|
6
|
+
* Empirical: a 20 KB code-fence in markdown is already large; tool outputs
|
|
7
|
+
* bigger than this usually mean Claude Read a long file or Bash dumped a
|
|
8
|
+
* build log — neither is high-value context for resume. The truncation
|
|
9
|
+
* preserves first 30 + last 10 lines + a footer noting the original size. */
|
|
10
|
+
export const TRUNCATE_THRESHOLD_BYTES = 20 * 1024;
|
|
11
|
+
export function writeSession(repoRoot, s, opts = {}) {
|
|
12
|
+
const date = s.startedAt.slice(0, 10); // YYYY-MM-DD
|
|
13
|
+
const dirRel = join("raw_sessions", s.tool, s.project, date);
|
|
14
|
+
const absDir = join(repoRoot, dirRel);
|
|
15
|
+
mkdirSync(absDir, { recursive: true });
|
|
16
|
+
const base = `${s.nameSlug}__${s.shortId}`;
|
|
17
|
+
const mdRel = join(dirRel, `${base}.md`);
|
|
18
|
+
const includeReasoning = opts.includeReasoning ?? true;
|
|
19
|
+
const fullToolResults = opts.fullToolResults ?? process.env.MEMARIUM_FULL_TOOL_RESULTS === "1";
|
|
20
|
+
writeFileSync(join(repoRoot, mdRel), renderMarkdown(s, { includeReasoning, fullToolResults }));
|
|
21
|
+
return { md: mdRel };
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Two-pass renderer:
|
|
25
|
+
* Pass 1 — render each message to its own string, track per-message line
|
|
26
|
+
* offsets RELATIVE TO body start.
|
|
27
|
+
* Pass 2 — build the manifest (mechanical facts) and importance-based TOC
|
|
28
|
+
* using those relative offsets. Compute the line count of the
|
|
29
|
+
* `frontmatter + TOC + separator` prefix. Patch every `line:` field in
|
|
30
|
+
* manifest + TOC by adding the prefix length so consumers can
|
|
31
|
+
* `Read offset:line` into the final file and land on the right turn.
|
|
32
|
+
* Emit: `<frontmatter incl. manifest>` → `<TOC block>` → `<body>`.
|
|
33
|
+
*/
|
|
34
|
+
function renderMarkdown(s, ctx) {
|
|
35
|
+
// Filter out messages that render to empty (keeps body indices aligned
|
|
36
|
+
// with what the consumer will actually see).
|
|
37
|
+
const renderedPerMessage = [];
|
|
38
|
+
for (const m of s.messages) {
|
|
39
|
+
const md = renderMessageBlock(m, ctx);
|
|
40
|
+
if (!md)
|
|
41
|
+
continue;
|
|
42
|
+
renderedPerMessage.push({ md, src: m });
|
|
43
|
+
}
|
|
44
|
+
// Body assembly + per-message line offsets relative to body start (1-based,
|
|
45
|
+
// matching how the `Read` tool reports line numbers).
|
|
46
|
+
//
|
|
47
|
+
// Math: a rendered message of `numLines` (split by `\n`) lines does NOT
|
|
48
|
+
// end with a trailing newline. After writing it the cursor sits at the
|
|
49
|
+
// end of line (currentLine + numLines - 1). Joining the next message with
|
|
50
|
+
// "\n\n" advances by 2 newlines: cursor → start of (currentLine + numLines)
|
|
51
|
+
// → empty line + start of (currentLine + numLines + 1). So the next
|
|
52
|
+
// message begins at currentLine + numLines + 1.
|
|
53
|
+
const bodyParts = [];
|
|
54
|
+
const messageLineOffsetsRelative = [];
|
|
55
|
+
let currentLine = 1;
|
|
56
|
+
for (let i = 0; i < renderedPerMessage.length; i++) {
|
|
57
|
+
messageLineOffsetsRelative.push(currentLine);
|
|
58
|
+
const md = renderedPerMessage[i].md;
|
|
59
|
+
bodyParts.push(md);
|
|
60
|
+
if (i < renderedPerMessage.length - 1) {
|
|
61
|
+
currentLine += md.split("\n").length + 1;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
const body = bodyParts.join("\n\n");
|
|
65
|
+
// Build manifest + TOC against relative offsets.
|
|
66
|
+
const renderedMessages = renderedPerMessage.map((r) => r.src);
|
|
67
|
+
const manifestRel = extractManifest(renderedMessages, messageLineOffsetsRelative);
|
|
68
|
+
const tocRel = buildTocEntries(renderedMessages, messageLineOffsetsRelative);
|
|
69
|
+
// Render the prefix (frontmatter + TOC) with RELATIVE line numbers first so
|
|
70
|
+
// we can measure its true line count. Then re-render with offset-patched
|
|
71
|
+
// line numbers and emit.
|
|
72
|
+
const tocMdRel = renderTocMarkdown(tocRel);
|
|
73
|
+
const frontmatterRel = renderFrontmatter(s, manifestRel);
|
|
74
|
+
const tocSection = tocMdRel ? `\n\n${tocMdRel}` : "";
|
|
75
|
+
const prefixRel = frontmatterRel + tocSection + "\n\n";
|
|
76
|
+
const prefixLineCount = prefixRel.split("\n").length - 1;
|
|
77
|
+
// -1 because the trailing "\n\n" puts the body's first line at the line
|
|
78
|
+
// *after* the empty separator line; the body's "line 1" sits at exactly
|
|
79
|
+
// `prefixLineCount + 1` in the final file.
|
|
80
|
+
const manifest = patchManifestLines(manifestRel, prefixLineCount);
|
|
81
|
+
const toc = tocRel.map((e) => ({ ...e, line: e.line + prefixLineCount }));
|
|
82
|
+
const frontmatter = renderFrontmatter(s, manifest);
|
|
83
|
+
const tocMd = renderTocMarkdown(toc);
|
|
84
|
+
return [frontmatter, tocMd, body].filter(Boolean).join("\n\n");
|
|
85
|
+
}
|
|
86
|
+
function patchManifestLines(m, offset) {
|
|
87
|
+
return {
|
|
88
|
+
...m,
|
|
89
|
+
commits: m.commits.map((c) => ({ ...c, line: c.line + offset })),
|
|
90
|
+
candidate_decisions: m.candidate_decisions.map((d) => ({ ...d, line: d.line + offset })),
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
function renderFrontmatter(s, m) {
|
|
94
|
+
const lines = [
|
|
95
|
+
"---",
|
|
96
|
+
`sessionId: ${s.sessionId}`,
|
|
97
|
+
`tool: ${s.tool}`,
|
|
98
|
+
`project: ${s.project}`,
|
|
99
|
+
`projectRaw: ${s.projectRaw}`,
|
|
100
|
+
`startedAt: ${s.startedAt}`,
|
|
101
|
+
`endedAt: ${s.endedAt}`,
|
|
102
|
+
`displayName: ${yamlSafeString(s.displayName)}`,
|
|
103
|
+
`manifest_version: 1`,
|
|
104
|
+
`user_turns: ${m.user_turns}`,
|
|
105
|
+
`assistant_turns: ${m.assistant_turns}`,
|
|
106
|
+
...renderToolsUsed(m.tools_used),
|
|
107
|
+
...renderCommits(m.commits),
|
|
108
|
+
...renderFilesTouched(m.files_touched),
|
|
109
|
+
...renderCandidateDecisions(m.candidate_decisions),
|
|
110
|
+
"---",
|
|
111
|
+
];
|
|
112
|
+
return lines.join("\n");
|
|
113
|
+
}
|
|
114
|
+
function renderToolsUsed(t) {
|
|
115
|
+
const entries = Object.entries(t).sort((a, b) => b[1] - a[1]);
|
|
116
|
+
if (entries.length === 0)
|
|
117
|
+
return ["tools_used: {}"];
|
|
118
|
+
return ["tools_used:", ...entries.map(([k, v]) => ` ${yamlSafeKey(k)}: ${v}`)];
|
|
119
|
+
}
|
|
120
|
+
function renderCommits(commits) {
|
|
121
|
+
if (commits.length === 0)
|
|
122
|
+
return ["commits: []"];
|
|
123
|
+
return [
|
|
124
|
+
"commits:",
|
|
125
|
+
...commits.map((c) => ` - { sha: ${yamlSafeString(c.sha)}, msg: ${yamlSafeString(c.msg)}, line: ${c.line} }`),
|
|
126
|
+
];
|
|
127
|
+
}
|
|
128
|
+
function renderFilesTouched(files) {
|
|
129
|
+
if (files.length === 0)
|
|
130
|
+
return ["files_touched: []"];
|
|
131
|
+
return [
|
|
132
|
+
"files_touched:",
|
|
133
|
+
...files.map((f) => ` - ${yamlSafeString(f)}`),
|
|
134
|
+
];
|
|
135
|
+
}
|
|
136
|
+
function renderCandidateDecisions(decisions) {
|
|
137
|
+
if (decisions.length === 0)
|
|
138
|
+
return ["candidate_decisions: []"];
|
|
139
|
+
return [
|
|
140
|
+
"candidate_decisions:",
|
|
141
|
+
...decisions.map((d) => ` - { line: ${d.line}, preview: ${yamlSafeString(d.preview)} }`),
|
|
142
|
+
];
|
|
143
|
+
}
|
|
144
|
+
/** YAML-safe one-line string. If the value contains anything quoting-hostile
|
|
145
|
+
* (colons, hash, special chars, leading/trailing whitespace, or any quote),
|
|
146
|
+
* wrap in single quotes and escape internal single quotes by doubling them
|
|
147
|
+
* (YAML 1.2 spec). */
|
|
148
|
+
function yamlSafeString(s) {
|
|
149
|
+
if (/^[A-Za-z0-9_一-鿿 -〿 -]+$/.test(s) && s === s.trim())
|
|
150
|
+
return s;
|
|
151
|
+
const escaped = s.replace(/'/g, "''");
|
|
152
|
+
return `'${escaped}'`;
|
|
153
|
+
}
|
|
154
|
+
/** YAML-safe object key. Tool names contain only letters/digits/`_`, but be
|
|
155
|
+
* defensive: anything outside the safe set gets single-quoted. */
|
|
156
|
+
function yamlSafeKey(s) {
|
|
157
|
+
if (/^[A-Za-z0-9_-]+$/.test(s))
|
|
158
|
+
return s;
|
|
159
|
+
return `'${s.replace(/'/g, "''")}'`;
|
|
160
|
+
}
|
|
161
|
+
function renderMessageBlock(m, ctx) {
|
|
162
|
+
const heading = m.role === "user" ? "## User" :
|
|
163
|
+
m.role === "assistant" ? "## Assistant" :
|
|
164
|
+
`## ${m.role}`;
|
|
165
|
+
const ts = m.timestamp ? ` _(${m.timestamp})_` : "";
|
|
166
|
+
const rendered = renderMessageContent(m.contentBlocks, m.text, m.reasoning, ctx);
|
|
167
|
+
if (!rendered.trim())
|
|
168
|
+
return "";
|
|
169
|
+
return `${heading}${ts}\n\n${rendered}`;
|
|
170
|
+
}
|
|
171
|
+
function renderMessageContent(blocks, fallbackText, fallbackReasoning, ctx) {
|
|
172
|
+
// Path 1: rich content blocks available (Claude source, post-Task 2)
|
|
173
|
+
if (blocks && blocks.length > 0) {
|
|
174
|
+
const out = [];
|
|
175
|
+
for (const b of blocks) {
|
|
176
|
+
if (b.type === "thinking") {
|
|
177
|
+
if (!ctx.includeReasoning)
|
|
178
|
+
continue;
|
|
179
|
+
out.push(renderThinking(b.thinking));
|
|
180
|
+
}
|
|
181
|
+
else if (b.type === "text") {
|
|
182
|
+
if (b.text.trim())
|
|
183
|
+
out.push(b.text);
|
|
184
|
+
}
|
|
185
|
+
else if (b.type === "tool_use") {
|
|
186
|
+
out.push(renderToolUse(b, ctx));
|
|
187
|
+
}
|
|
188
|
+
else if (b.type === "tool_result") {
|
|
189
|
+
out.push(renderToolResult(b, ctx));
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
return out.join("\n\n");
|
|
193
|
+
}
|
|
194
|
+
// Path 2: legacy text-only message (Copilot source, or pre-Task 2 callers)
|
|
195
|
+
const out = [];
|
|
196
|
+
if (ctx.includeReasoning && fallbackReasoning) {
|
|
197
|
+
out.push(renderThinking(fallbackReasoning));
|
|
198
|
+
}
|
|
199
|
+
if (fallbackText)
|
|
200
|
+
out.push(fallbackText);
|
|
201
|
+
return out.join("\n\n");
|
|
202
|
+
}
|
|
203
|
+
function renderThinking(text) {
|
|
204
|
+
const quoted = text.split("\n").map((l) => `> ${l}`).join("\n");
|
|
205
|
+
return `> 💭 _thinking_\n${quoted}`;
|
|
206
|
+
}
|
|
207
|
+
function renderToolUse(b, ctx) {
|
|
208
|
+
const inputStr = JSON.stringify(b.input, null, 2);
|
|
209
|
+
const truncated = ctx.fullToolResults
|
|
210
|
+
? inputStr
|
|
211
|
+
: maybeTruncate(inputStr, "input");
|
|
212
|
+
return `### 🔧 tool_use: ${b.name}\n\n\`\`\`json\n${truncated}\n\`\`\``;
|
|
213
|
+
}
|
|
214
|
+
function renderToolResult(b, ctx) {
|
|
215
|
+
const truncated = ctx.fullToolResults
|
|
216
|
+
? b.content
|
|
217
|
+
: maybeTruncate(b.content, "output");
|
|
218
|
+
return `### ✅ tool_result\n\n\`\`\`\n${truncated}\n\`\`\``;
|
|
219
|
+
}
|
|
220
|
+
/** Truncate strings above TRUNCATE_THRESHOLD_BYTES. Preserves first 30 lines
|
|
221
|
+
* + last 10 lines so the LLM still gets enough signal about what was read /
|
|
222
|
+
* output, without dragging multi-MB file dumps into the context.
|
|
223
|
+
*
|
|
224
|
+
* Returns the original string unchanged if under threshold. */
|
|
225
|
+
function maybeTruncate(s, kind) {
|
|
226
|
+
if (Buffer.byteLength(s, "utf8") <= TRUNCATE_THRESHOLD_BYTES)
|
|
227
|
+
return s;
|
|
228
|
+
const lines = s.split("\n");
|
|
229
|
+
if (lines.length <= 50) {
|
|
230
|
+
// Single long line — truncate by character count
|
|
231
|
+
const head = s.slice(0, 4000);
|
|
232
|
+
const tail = s.slice(-1000);
|
|
233
|
+
return `${head}\n\n[... truncated: ${(Buffer.byteLength(s, "utf8") / 1024).toFixed(1)} KB total, showing first 4000 + last 1000 chars ...]\n\n${tail}`;
|
|
234
|
+
}
|
|
235
|
+
const head = lines.slice(0, 30).join("\n");
|
|
236
|
+
const tail = lines.slice(-10).join("\n");
|
|
237
|
+
const omitted = lines.length - 40;
|
|
238
|
+
const sizeKb = (Buffer.byteLength(s, "utf8") / 1024).toFixed(1);
|
|
239
|
+
return `${head}\n\n[... truncated: ${sizeKb} KB ${kind}, omitting ${omitted} middle lines ...]\n\n${tail}`;
|
|
240
|
+
}
|