@rubytech/taskmaster 1.0.62 → 1.0.63
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/pi-embedded-runner/compact.js +7 -3
- package/dist/agents/pi-embedded-runner/history.js +82 -0
- package/dist/agents/pi-embedded-runner/run/attempt.js +9 -3
- package/dist/agents/pi-embedded-runner.js +1 -1
- package/dist/auto-reply/commands-registry.data.js +6 -0
- package/dist/auto-reply/reply/commands-core.js +2 -0
- package/dist/auto-reply/reply/commands-restore.js +64 -0
- package/dist/auto-reply/reply/session.js +6 -1
- package/dist/build-info.json +3 -3
- package/dist/config/sessions/reset.js +4 -1
- package/dist/config/zod-schema.session.js +1 -1
- package/dist/control-ui/assets/{index-CV7xcGIS.js → index-BPvR6pln.js} +4 -4
- package/dist/control-ui/assets/{index-CV7xcGIS.js.map → index-BPvR6pln.js.map} +1 -1
- package/dist/control-ui/index.html +1 -1
- package/dist/gateway/chat-sanitize.js +75 -0
- package/dist/gateway/protocol/schema/logs-chat.js +1 -1
- package/dist/gateway/server-chat.js +0 -9
- package/dist/gateway/server-methods/chat.js +31 -34
- package/dist/gateway/server-startup.js +7 -0
- package/dist/infra/session-recovery.js +381 -0
- package/package.json +1 -1
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
<title>Taskmaster Control</title>
|
|
7
7
|
<meta name="color-scheme" content="dark light" />
|
|
8
8
|
<link rel="icon" type="image/png" href="./favicon.png" />
|
|
9
|
-
<script type="module" crossorigin src="./assets/index-
|
|
9
|
+
<script type="module" crossorigin src="./assets/index-BPvR6pln.js"></script>
|
|
10
10
|
<link rel="stylesheet" crossorigin href="./assets/index-mweBpmCT.css">
|
|
11
11
|
</head>
|
|
12
12
|
<body>
|
|
@@ -118,3 +118,78 @@ export function stripEnvelopeFromMessages(messages) {
|
|
|
118
118
|
});
|
|
119
119
|
return changed ? next : messages;
|
|
120
120
|
}
|
|
121
|
+
// ---------------------------------------------------------------------------
|
|
122
|
+
// Base64 image stripping
|
|
123
|
+
// ---------------------------------------------------------------------------
|
|
124
|
+
// Images must be stored as physical files on disk and referenced by path —
|
|
125
|
+
// never as inline base64 in transcripts or chat history responses.
|
|
126
|
+
// These functions remove base64 data from image content blocks wherever
|
|
127
|
+
// they appear (user, assistant, tool messages).
|
|
128
|
+
// ---------------------------------------------------------------------------
|
|
129
|
+
function isBase64ImageBlock(block) {
|
|
130
|
+
if (!block || typeof block !== "object")
|
|
131
|
+
return false;
|
|
132
|
+
const b = block;
|
|
133
|
+
if (b.type === "image") {
|
|
134
|
+
// Format: { type: "image", data: "<base64>", mimeType: "..." }
|
|
135
|
+
if (typeof b.data === "string" && b.data.length > 256)
|
|
136
|
+
return true;
|
|
137
|
+
// Format: { type: "image", source: { type: "base64", data: "..." } }
|
|
138
|
+
const source = b.source;
|
|
139
|
+
if (source?.type === "base64" && typeof source.data === "string" && source.data.length > 256) {
|
|
140
|
+
return true;
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
if (b.type === "image_url") {
|
|
144
|
+
// OpenAI format with inline data URL
|
|
145
|
+
const imageUrl = b.image_url;
|
|
146
|
+
if (typeof imageUrl?.url === "string" && imageUrl.url.startsWith("data:"))
|
|
147
|
+
return true;
|
|
148
|
+
}
|
|
149
|
+
return false;
|
|
150
|
+
}
|
|
151
|
+
function stripBase64FromContentBlocks(content) {
|
|
152
|
+
let changed = false;
|
|
153
|
+
const next = content.map((block) => {
|
|
154
|
+
if (!isBase64ImageBlock(block))
|
|
155
|
+
return block;
|
|
156
|
+
changed = true;
|
|
157
|
+
const b = block;
|
|
158
|
+
const mimeType = b.mimeType ??
|
|
159
|
+
b.media_type ??
|
|
160
|
+
b.source?.media_type ??
|
|
161
|
+
"image";
|
|
162
|
+
return { type: "text", text: `[${mimeType}]` };
|
|
163
|
+
});
|
|
164
|
+
return { content: next, changed };
|
|
165
|
+
}
|
|
166
|
+
/**
|
|
167
|
+
* Remove inline base64 image data from a single message.
|
|
168
|
+
* Replaces image content blocks with a text placeholder.
|
|
169
|
+
*/
|
|
170
|
+
export function stripBase64ImagesFromMessage(message) {
|
|
171
|
+
if (!message || typeof message !== "object")
|
|
172
|
+
return message;
|
|
173
|
+
const entry = message;
|
|
174
|
+
if (!Array.isArray(entry.content))
|
|
175
|
+
return message;
|
|
176
|
+
const { content, changed } = stripBase64FromContentBlocks(entry.content);
|
|
177
|
+
if (!changed)
|
|
178
|
+
return message;
|
|
179
|
+
return { ...entry, content };
|
|
180
|
+
}
|
|
181
|
+
/**
|
|
182
|
+
* Remove inline base64 image data from an array of messages.
|
|
183
|
+
*/
|
|
184
|
+
export function stripBase64ImagesFromMessages(messages) {
|
|
185
|
+
if (messages.length === 0)
|
|
186
|
+
return messages;
|
|
187
|
+
let changed = false;
|
|
188
|
+
const next = messages.map((message) => {
|
|
189
|
+
const stripped = stripBase64ImagesFromMessage(message);
|
|
190
|
+
if (stripped !== message)
|
|
191
|
+
changed = true;
|
|
192
|
+
return stripped;
|
|
193
|
+
});
|
|
194
|
+
return changed ? next : messages;
|
|
195
|
+
}
|
|
@@ -16,7 +16,7 @@ export const LogsTailResultSchema = Type.Object({
|
|
|
16
16
|
// WebChat/WebSocket-native chat methods
|
|
17
17
|
export const ChatHistoryParamsSchema = Type.Object({
|
|
18
18
|
sessionKey: NonEmptyString,
|
|
19
|
-
limit: Type.Optional(Type.Integer({ minimum: 1, maximum:
|
|
19
|
+
limit: Type.Optional(Type.Integer({ minimum: 1, maximum: 10_000 })),
|
|
20
20
|
/** When set, read from this specific session transcript instead of the current one. */
|
|
21
21
|
sessionId: Type.Optional(NonEmptyString),
|
|
22
22
|
}, { additionalProperties: false });
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import { normalizeVerboseLevel } from "../auto-reply/thinking.js";
|
|
2
2
|
import { isSilentReplyText } from "../auto-reply/tokens.js";
|
|
3
3
|
import { getAgentRunContext } from "../infra/agent-events.js";
|
|
4
|
-
import { fireSuggestion } from "../suggestions/broadcast.js";
|
|
5
4
|
import { loadSessionEntry } from "./session-utils.js";
|
|
6
5
|
import { formatForLog } from "./ws-log.js";
|
|
7
6
|
export function createChatRunRegistry() {
|
|
@@ -108,14 +107,6 @@ export function createAgentEventHandler({ broadcast, nodeSendToSession, agentRun
|
|
|
108
107
|
};
|
|
109
108
|
broadcast("chat", payload);
|
|
110
109
|
nodeSendToSession(sessionKey, "chat", payload);
|
|
111
|
-
// Fire follow-up suggestion for successful responses with text
|
|
112
|
-
if (text) {
|
|
113
|
-
fireSuggestion({
|
|
114
|
-
sessionKey,
|
|
115
|
-
broadcast,
|
|
116
|
-
lastAssistantReply: text,
|
|
117
|
-
});
|
|
118
|
-
}
|
|
119
110
|
return;
|
|
120
111
|
}
|
|
121
112
|
const payload = {
|
|
@@ -14,11 +14,9 @@ import { createInternalHookEvent, triggerInternalHook } from "../../hooks/intern
|
|
|
14
14
|
import { INTERNAL_MESSAGE_CHANNEL } from "../../utils/message-channel.js";
|
|
15
15
|
import { abortChatRunById, abortChatRunsForSessionKey, isChatStopCommandText, resolveChatRunExpiresAtMs, } from "../chat-abort.js";
|
|
16
16
|
import { ErrorCodes, errorShape, formatValidationErrors, validateChatAbortParams, validateChatHistoryParams, validateChatInjectParams, validateChatSendParams, } from "../protocol/index.js";
|
|
17
|
-
import {
|
|
18
|
-
import {
|
|
19
|
-
import { stripEnvelopeFromMessages } from "../chat-sanitize.js";
|
|
17
|
+
import { loadSessionEntry, readSessionMessages, resolveSessionModelRef } from "../session-utils.js";
|
|
18
|
+
import { stripBase64ImagesFromMessages, stripEnvelopeFromMessages } from "../chat-sanitize.js";
|
|
20
19
|
import { formatForLog } from "../ws-log.js";
|
|
21
|
-
import { fireSuggestion } from "../../suggestions/broadcast.js";
|
|
22
20
|
function resolveTranscriptPath(params) {
|
|
23
21
|
const { sessionId, storePath, sessionFile } = params;
|
|
24
22
|
if (sessionFile)
|
|
@@ -128,11 +126,13 @@ export const chatHandlers = {
|
|
|
128
126
|
}
|
|
129
127
|
const { sessionKey, limit, sessionId: requestedSessionId, } = params;
|
|
130
128
|
const { cfg, storePath, entry } = loadSessionEntry(sessionKey);
|
|
131
|
-
// When a specific sessionId is requested, resolve
|
|
132
|
-
//
|
|
129
|
+
// When a specific sessionId is requested, resolve only that transcript.
|
|
130
|
+
// Otherwise, stitch all previous sessions + current into one continuous history.
|
|
133
131
|
let sessionId = entry?.sessionId;
|
|
134
132
|
let sessionFile = entry?.sessionFile;
|
|
133
|
+
let rawMessages = [];
|
|
135
134
|
if (requestedSessionId && entry) {
|
|
135
|
+
// Single-session mode: resolve the requested transcript.
|
|
136
136
|
if (requestedSessionId === entry.sessionId) {
|
|
137
137
|
// Already pointing at current — no change needed.
|
|
138
138
|
}
|
|
@@ -143,22 +143,37 @@ export const chatHandlers = {
|
|
|
143
143
|
sessionFile = prev.sessionFile;
|
|
144
144
|
}
|
|
145
145
|
else {
|
|
146
|
-
// Unknown sessionId — fall through with empty result rather than erroring,
|
|
147
|
-
// so the caller gets an empty messages array instead of a hard failure.
|
|
148
146
|
sessionId = undefined;
|
|
149
147
|
}
|
|
150
148
|
}
|
|
149
|
+
rawMessages =
|
|
150
|
+
sessionId && storePath ? readSessionMessages(sessionId, storePath, sessionFile) : [];
|
|
151
151
|
}
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
152
|
+
else if (entry && storePath) {
|
|
153
|
+
// Stitched mode: read previous sessions (oldest first) + current session.
|
|
154
|
+
const previous = entry.previousSessions ?? [];
|
|
155
|
+
for (const prev of previous) {
|
|
156
|
+
if (!prev.sessionId)
|
|
157
|
+
continue;
|
|
158
|
+
const msgs = readSessionMessages(prev.sessionId, storePath, prev.sessionFile);
|
|
159
|
+
if (msgs.length > 0)
|
|
160
|
+
rawMessages.push(...msgs);
|
|
161
|
+
}
|
|
162
|
+
// Append current session messages.
|
|
163
|
+
if (sessionId) {
|
|
164
|
+
const current = readSessionMessages(sessionId, storePath, sessionFile);
|
|
165
|
+
rawMessages.push(...current);
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
const hardMax = 10_000;
|
|
169
|
+
const defaultLimit = 5000;
|
|
155
170
|
const requested = typeof limit === "number" ? limit : defaultLimit;
|
|
156
171
|
const max = Math.min(hardMax, requested);
|
|
157
|
-
const
|
|
158
|
-
const sanitized = stripEnvelopeFromMessages(
|
|
159
|
-
const { items: capped, bytes: cappedBytes } = capArrayByJsonBytes(sanitized, getMaxChatHistoryMessagesBytes());
|
|
172
|
+
const messages = rawMessages.length > max ? rawMessages.slice(-max) : rawMessages;
|
|
173
|
+
const sanitized = stripBase64ImagesFromMessages(stripEnvelopeFromMessages(messages));
|
|
160
174
|
// Diagnostic: log resolution details so we can trace "lost history" reports.
|
|
161
|
-
|
|
175
|
+
const prevCount = entry?.previousSessions?.length ?? 0;
|
|
176
|
+
context.logGateway.info(`chat.history: sessionKey=${sessionKey} resolvedSessionId=${sessionId ?? "none"} storePath=${storePath ?? "none"} entryExists=${!!entry} previousSessions=${prevCount} rawMessages=${rawMessages.length} sent=${sanitized.length}`);
|
|
162
177
|
if (!entry) {
|
|
163
178
|
context.logGateway.warn(`chat.history: no session entry found for sessionKey=${sessionKey}`);
|
|
164
179
|
}
|
|
@@ -183,21 +198,13 @@ export const chatHandlers = {
|
|
|
183
198
|
respond(true, {
|
|
184
199
|
sessionKey,
|
|
185
200
|
sessionId,
|
|
186
|
-
messages:
|
|
201
|
+
messages: sanitized,
|
|
187
202
|
thinkingLevel,
|
|
188
203
|
modelProvider,
|
|
189
204
|
model: modelId,
|
|
190
205
|
verboseLevel: entry?.verboseLevel ?? null,
|
|
191
206
|
fillerEnabled: entry?.fillerEnabled ?? null,
|
|
192
207
|
});
|
|
193
|
-
// Fire suggestion for empty sessions (onboarding opener)
|
|
194
|
-
if (capped.length === 0) {
|
|
195
|
-
fireSuggestion({
|
|
196
|
-
sessionKey,
|
|
197
|
-
broadcast: context.broadcast,
|
|
198
|
-
cfg,
|
|
199
|
-
});
|
|
200
|
-
}
|
|
201
208
|
},
|
|
202
209
|
"chat.abort": ({ params, respond, context }) => {
|
|
203
210
|
if (!validateChatAbortParams(params)) {
|
|
@@ -559,16 +566,6 @@ export const chatHandlers = {
|
|
|
559
566
|
sessionKey: p.sessionKey,
|
|
560
567
|
message,
|
|
561
568
|
});
|
|
562
|
-
// Fire follow-up suggestion after successful response
|
|
563
|
-
if (combinedReply) {
|
|
564
|
-
fireSuggestion({
|
|
565
|
-
sessionKey: p.sessionKey,
|
|
566
|
-
broadcast: context.broadcast,
|
|
567
|
-
cfg,
|
|
568
|
-
lastUserMessage: p.message,
|
|
569
|
-
lastAssistantReply: combinedReply,
|
|
570
|
-
});
|
|
571
|
-
}
|
|
572
569
|
}
|
|
573
570
|
// Fire message:outbound hook for conversation archiving
|
|
574
571
|
const outboundText = finalReplyParts.join("\n\n").trim();
|
|
@@ -6,6 +6,7 @@ import { startGmailWatcher } from "../hooks/gmail-watcher.js";
|
|
|
6
6
|
import { clearInternalHooks, createInternalHookEvent, triggerInternalHook, } from "../hooks/internal-hooks.js";
|
|
7
7
|
import { loadInternalHooks } from "../hooks/loader.js";
|
|
8
8
|
import { startPluginServices } from "../plugins/services.js";
|
|
9
|
+
import { recoverOrphanedSessions, stripBase64FromTranscripts } from "../infra/session-recovery.js";
|
|
9
10
|
import { startBrowserControlServerIfEnabled } from "./server-browser.js";
|
|
10
11
|
import { scheduleRestartSentinelWake, shouldWakeFromRestartSentinel, } from "./server-restart-sentinel.js";
|
|
11
12
|
import { cacheLastUpdateSentinel } from "./server-methods/update.js";
|
|
@@ -18,6 +19,12 @@ export async function startGatewaySidecars(params) {
|
|
|
18
19
|
catch (err) {
|
|
19
20
|
params.logBrowser.error(`server failed to start: ${String(err)}`);
|
|
20
21
|
}
|
|
22
|
+
// Recover orphaned session JSONL files left by old daily/idle resets,
|
|
23
|
+
// then strip any inline base64 image data from transcripts.
|
|
24
|
+
// Fire-and-forget — never blocks startup.
|
|
25
|
+
void recoverOrphanedSessions()
|
|
26
|
+
.then(() => stripBase64FromTranscripts())
|
|
27
|
+
.catch(() => { });
|
|
21
28
|
// Start Gmail watcher if configured (hooks.gmail.account).
|
|
22
29
|
if (!isTruthyEnvValue(process.env.TASKMASTER_SKIP_GMAIL_WATCHER)) {
|
|
23
30
|
try {
|
|
@@ -0,0 +1,381 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Recovers orphaned session JSONL files that exist on disk but are not
|
|
3
|
+
* referenced by any session store entry (neither as a current session nor
|
|
4
|
+
* in any `previousSessions` array).
|
|
5
|
+
*
|
|
6
|
+
* This situation arises when daily/idle resets silently replaced session
|
|
7
|
+
* entries without archiving the previous session. The JSONL files were
|
|
8
|
+
* preserved on disk but the store lost track of them.
|
|
9
|
+
*
|
|
10
|
+
* The recovery runs once at gateway startup, is idempotent, and classifies
|
|
11
|
+
* each orphan by reading its first user message to determine which session
|
|
12
|
+
* key it belongs to (cron, WhatsApp DM, or webchat main).
|
|
13
|
+
*/
|
|
14
|
+
import fs from "node:fs";
|
|
15
|
+
import path from "node:path";
|
|
16
|
+
import { resolveStateDir } from "../config/paths.js";
|
|
17
|
+
import { updateSessionStore } from "../config/sessions.js";
|
|
18
|
+
import { createSubsystemLogger } from "../logging/subsystem.js";
|
|
19
|
+
const log = createSubsystemLogger("session-recovery");
|
|
20
|
+
// ---------- Classification helpers ----------
|
|
21
|
+
const CRON_PATTERN = /^\[cron:([0-9a-f-]{36})\b/i;
|
|
22
|
+
const WHATSAPP_PATTERN = /^\[WhatsApp\s+(\+\d+)\b/i;
|
|
23
|
+
/**
|
|
24
|
+
* Read the session header (first JSONL line) to get the timestamp,
|
|
25
|
+
* and the first user message to classify the channel.
|
|
26
|
+
*/
|
|
27
|
+
function classifyOrphan(filePath, agentId, knownSessionKeys) {
|
|
28
|
+
const uuid = path.basename(filePath, ".jsonl");
|
|
29
|
+
let headerTimestamp = 0;
|
|
30
|
+
let firstUserMessage = null;
|
|
31
|
+
let fd = null;
|
|
32
|
+
try {
|
|
33
|
+
fd = fs.openSync(filePath, "r");
|
|
34
|
+
// Read enough to cover the header + first few messages
|
|
35
|
+
const buf = Buffer.alloc(16384);
|
|
36
|
+
const bytesRead = fs.readSync(fd, buf, 0, buf.length, 0);
|
|
37
|
+
if (bytesRead === 0)
|
|
38
|
+
return null;
|
|
39
|
+
const chunk = buf.toString("utf-8", 0, bytesRead);
|
|
40
|
+
const lines = chunk.split(/\r?\n/).slice(0, 20);
|
|
41
|
+
for (const line of lines) {
|
|
42
|
+
if (!line.trim())
|
|
43
|
+
continue;
|
|
44
|
+
try {
|
|
45
|
+
const parsed = JSON.parse(line);
|
|
46
|
+
// Session header line
|
|
47
|
+
if (parsed?.type === "session" && parsed?.timestamp) {
|
|
48
|
+
const ts = typeof parsed.timestamp === "number"
|
|
49
|
+
? parsed.timestamp
|
|
50
|
+
: new Date(parsed.timestamp).getTime();
|
|
51
|
+
if (Number.isFinite(ts))
|
|
52
|
+
headerTimestamp = ts;
|
|
53
|
+
}
|
|
54
|
+
// First user message
|
|
55
|
+
if (parsed?.type === "message" && !firstUserMessage) {
|
|
56
|
+
const msg = parsed?.message;
|
|
57
|
+
if (msg?.role === "user") {
|
|
58
|
+
const content = msg.content;
|
|
59
|
+
if (typeof content === "string") {
|
|
60
|
+
firstUserMessage = content;
|
|
61
|
+
}
|
|
62
|
+
else if (Array.isArray(content)) {
|
|
63
|
+
for (const block of content) {
|
|
64
|
+
if (block?.type === "text" && typeof block.text === "string") {
|
|
65
|
+
firstUserMessage = block.text;
|
|
66
|
+
break;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
if (headerTimestamp && firstUserMessage)
|
|
73
|
+
break;
|
|
74
|
+
}
|
|
75
|
+
catch {
|
|
76
|
+
// skip malformed lines
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
catch {
|
|
81
|
+
return null;
|
|
82
|
+
}
|
|
83
|
+
finally {
|
|
84
|
+
if (fd !== null)
|
|
85
|
+
fs.closeSync(fd);
|
|
86
|
+
}
|
|
87
|
+
// Fall back to file mtime if no header timestamp
|
|
88
|
+
if (!headerTimestamp) {
|
|
89
|
+
try {
|
|
90
|
+
headerTimestamp = fs.statSync(filePath).mtimeMs;
|
|
91
|
+
}
|
|
92
|
+
catch {
|
|
93
|
+
headerTimestamp = 0;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
// Classify by first user message content
|
|
97
|
+
let sessionKey = null;
|
|
98
|
+
if (firstUserMessage) {
|
|
99
|
+
const cronMatch = firstUserMessage.match(CRON_PATTERN);
|
|
100
|
+
if (cronMatch) {
|
|
101
|
+
const cronId = cronMatch[1];
|
|
102
|
+
const candidate = `agent:${agentId}:cron:${cronId}`;
|
|
103
|
+
// Only assign to a cron key that exists in the store
|
|
104
|
+
sessionKey = knownSessionKeys.has(candidate) ? candidate : null;
|
|
105
|
+
}
|
|
106
|
+
if (!sessionKey) {
|
|
107
|
+
const waMatch = firstUserMessage.match(WHATSAPP_PATTERN);
|
|
108
|
+
if (waMatch) {
|
|
109
|
+
const phone = waMatch[1];
|
|
110
|
+
const candidate = `agent:${agentId}:dm:${phone}`;
|
|
111
|
+
sessionKey = knownSessionKeys.has(candidate) ? candidate : null;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
// Default: assign to the agent's main webchat session
|
|
116
|
+
if (!sessionKey) {
|
|
117
|
+
const mainKey = `agent:${agentId}:main`;
|
|
118
|
+
if (knownSessionKeys.has(mainKey)) {
|
|
119
|
+
sessionKey = mainKey;
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
return { uuid, filePath, headerTimestamp, sessionKey };
|
|
123
|
+
}
|
|
124
|
+
// ---------- Main recovery ----------
|
|
125
|
+
export async function recoverOrphanedSessions(params) {
|
|
126
|
+
const stateDir = params?.stateDir ?? resolveStateDir();
|
|
127
|
+
const agentsDir = path.join(stateDir, "agents");
|
|
128
|
+
if (!fs.existsSync(agentsDir)) {
|
|
129
|
+
return { recovered: 0, agents: 0 };
|
|
130
|
+
}
|
|
131
|
+
let totalRecovered = 0;
|
|
132
|
+
let agentsProcessed = 0;
|
|
133
|
+
let agentEntries;
|
|
134
|
+
try {
|
|
135
|
+
agentEntries = fs.readdirSync(agentsDir, { withFileTypes: true });
|
|
136
|
+
}
|
|
137
|
+
catch {
|
|
138
|
+
return { recovered: 0, agents: 0 };
|
|
139
|
+
}
|
|
140
|
+
for (const agentEntry of agentEntries) {
|
|
141
|
+
if (!agentEntry.isDirectory())
|
|
142
|
+
continue;
|
|
143
|
+
const agentId = agentEntry.name;
|
|
144
|
+
const sessionsDir = path.join(agentsDir, agentId, "sessions");
|
|
145
|
+
const storePath = path.join(sessionsDir, "sessions.json");
|
|
146
|
+
if (!fs.existsSync(storePath))
|
|
147
|
+
continue;
|
|
148
|
+
// Load the store
|
|
149
|
+
let store;
|
|
150
|
+
try {
|
|
151
|
+
const raw = fs.readFileSync(storePath, "utf-8");
|
|
152
|
+
store = JSON.parse(raw);
|
|
153
|
+
}
|
|
154
|
+
catch {
|
|
155
|
+
continue;
|
|
156
|
+
}
|
|
157
|
+
// Collect all referenced session IDs
|
|
158
|
+
const referencedIds = new Set();
|
|
159
|
+
const knownSessionKeys = new Set();
|
|
160
|
+
for (const [key, entry] of Object.entries(store)) {
|
|
161
|
+
knownSessionKeys.add(key);
|
|
162
|
+
if (entry.sessionId)
|
|
163
|
+
referencedIds.add(entry.sessionId);
|
|
164
|
+
if (Array.isArray(entry.previousSessions)) {
|
|
165
|
+
for (const prev of entry.previousSessions) {
|
|
166
|
+
if (prev.sessionId)
|
|
167
|
+
referencedIds.add(prev.sessionId);
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
// List JSONL files on disk
|
|
172
|
+
let files;
|
|
173
|
+
try {
|
|
174
|
+
files = fs
|
|
175
|
+
.readdirSync(sessionsDir)
|
|
176
|
+
.filter((f) => f.endsWith(".jsonl"));
|
|
177
|
+
}
|
|
178
|
+
catch {
|
|
179
|
+
continue;
|
|
180
|
+
}
|
|
181
|
+
// Find orphans
|
|
182
|
+
const orphans = [];
|
|
183
|
+
for (const file of files) {
|
|
184
|
+
const uuid = file.replace(".jsonl", "");
|
|
185
|
+
if (referencedIds.has(uuid))
|
|
186
|
+
continue;
|
|
187
|
+
const info = classifyOrphan(path.join(sessionsDir, file), agentId, knownSessionKeys);
|
|
188
|
+
if (info?.sessionKey) {
|
|
189
|
+
orphans.push(info);
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
if (orphans.length === 0)
|
|
193
|
+
continue;
|
|
194
|
+
agentsProcessed++;
|
|
195
|
+
// Group orphans by session key
|
|
196
|
+
const byKey = new Map();
|
|
197
|
+
for (const orphan of orphans) {
|
|
198
|
+
if (!orphan.sessionKey)
|
|
199
|
+
continue;
|
|
200
|
+
const group = byKey.get(orphan.sessionKey) ?? [];
|
|
201
|
+
group.push(orphan);
|
|
202
|
+
byKey.set(orphan.sessionKey, group);
|
|
203
|
+
}
|
|
204
|
+
// Update the store: prepend orphans to each entry's previousSessions
|
|
205
|
+
try {
|
|
206
|
+
await updateSessionStore(storePath, (currentStore) => {
|
|
207
|
+
for (const [sessionKey, orphanGroup] of byKey.entries()) {
|
|
208
|
+
const entry = currentStore[sessionKey];
|
|
209
|
+
if (!entry)
|
|
210
|
+
continue;
|
|
211
|
+
// Re-check: skip any that are now referenced (in case of concurrent update)
|
|
212
|
+
const nowReferenced = new Set();
|
|
213
|
+
if (entry.sessionId)
|
|
214
|
+
nowReferenced.add(entry.sessionId);
|
|
215
|
+
if (Array.isArray(entry.previousSessions)) {
|
|
216
|
+
for (const prev of entry.previousSessions) {
|
|
217
|
+
if (prev.sessionId)
|
|
218
|
+
nowReferenced.add(prev.sessionId);
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
const toAdd = orphanGroup
|
|
222
|
+
.filter((o) => !nowReferenced.has(o.uuid))
|
|
223
|
+
.sort((a, b) => a.headerTimestamp - b.headerTimestamp);
|
|
224
|
+
if (toAdd.length === 0)
|
|
225
|
+
continue;
|
|
226
|
+
const existingPrev = entry.previousSessions ?? [];
|
|
227
|
+
const newPrev = [
|
|
228
|
+
...toAdd.map((o) => ({
|
|
229
|
+
sessionId: o.uuid,
|
|
230
|
+
sessionFile: o.filePath,
|
|
231
|
+
endedAt: o.headerTimestamp,
|
|
232
|
+
})),
|
|
233
|
+
...existingPrev,
|
|
234
|
+
];
|
|
235
|
+
entry.previousSessions = newPrev;
|
|
236
|
+
totalRecovered += toAdd.length;
|
|
237
|
+
}
|
|
238
|
+
});
|
|
239
|
+
}
|
|
240
|
+
catch (err) {
|
|
241
|
+
log.warn(`failed to update store for agent ${agentId}: ${String(err)}`);
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
if (totalRecovered > 0) {
|
|
245
|
+
log.info(`recovered ${totalRecovered} orphaned session(s) across ${agentsProcessed} agent(s)`);
|
|
246
|
+
}
|
|
247
|
+
return { recovered: totalRecovered, agents: agentsProcessed };
|
|
248
|
+
}
|
|
249
|
+
// ---------- Base64 image stripping from JSONL transcripts ----------
|
|
250
|
+
/**
|
|
251
|
+
* Returns true if a content block contains inline base64 image data
|
|
252
|
+
* that should have been stored as a physical file instead.
|
|
253
|
+
*/
|
|
254
|
+
function hasInlineBase64(block) {
|
|
255
|
+
if (block.type === "image") {
|
|
256
|
+
if (typeof block.data === "string" && block.data.length > 256)
|
|
257
|
+
return true;
|
|
258
|
+
const source = block.source;
|
|
259
|
+
if (source?.type === "base64" && typeof source.data === "string" && source.data.length > 256) {
|
|
260
|
+
return true;
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
if (block.type === "image_url") {
|
|
264
|
+
const imageUrl = block.image_url;
|
|
265
|
+
if (typeof imageUrl?.url === "string" && imageUrl.url.startsWith("data:"))
|
|
266
|
+
return true;
|
|
267
|
+
}
|
|
268
|
+
return false;
|
|
269
|
+
}
|
|
270
|
+
/**
|
|
271
|
+
* Replace base64 image blocks in a content array with text placeholders.
|
|
272
|
+
* Returns null if no changes were made.
|
|
273
|
+
*/
|
|
274
|
+
function stripBase64FromContent(content) {
|
|
275
|
+
let changed = false;
|
|
276
|
+
const result = content.map((block) => {
|
|
277
|
+
if (!block || typeof block !== "object")
|
|
278
|
+
return block;
|
|
279
|
+
const b = block;
|
|
280
|
+
if (!hasInlineBase64(b))
|
|
281
|
+
return block;
|
|
282
|
+
changed = true;
|
|
283
|
+
const mime = b.mimeType ??
|
|
284
|
+
b.media_type ??
|
|
285
|
+
b.source?.media_type ??
|
|
286
|
+
"image";
|
|
287
|
+
return { type: "text", text: `[${mime}]` };
|
|
288
|
+
});
|
|
289
|
+
return changed ? result : null;
|
|
290
|
+
}
|
|
291
|
+
/**
|
|
292
|
+
* Scans all JSONL transcript files across all agents and removes inline
|
|
293
|
+
* base64 image data, replacing with text placeholders.
|
|
294
|
+
*
|
|
295
|
+
* Idempotent — files with no base64 are left untouched.
|
|
296
|
+
*/
|
|
297
|
+
export async function stripBase64FromTranscripts(params) {
|
|
298
|
+
const stateDir = params?.stateDir ?? resolveStateDir();
|
|
299
|
+
const agentsDir = path.join(stateDir, "agents");
|
|
300
|
+
if (!fs.existsSync(agentsDir)) {
|
|
301
|
+
return { cleaned: 0, bytesReclaimed: 0 };
|
|
302
|
+
}
|
|
303
|
+
let totalCleaned = 0;
|
|
304
|
+
let totalBytesReclaimed = 0;
|
|
305
|
+
let agentEntries;
|
|
306
|
+
try {
|
|
307
|
+
agentEntries = fs.readdirSync(agentsDir, { withFileTypes: true });
|
|
308
|
+
}
|
|
309
|
+
catch {
|
|
310
|
+
return { cleaned: 0, bytesReclaimed: 0 };
|
|
311
|
+
}
|
|
312
|
+
for (const agentEntry of agentEntries) {
|
|
313
|
+
if (!agentEntry.isDirectory())
|
|
314
|
+
continue;
|
|
315
|
+
const sessionsDir = path.join(agentsDir, agentEntry.name, "sessions");
|
|
316
|
+
let files;
|
|
317
|
+
try {
|
|
318
|
+
files = fs.readdirSync(sessionsDir).filter((f) => f.endsWith(".jsonl"));
|
|
319
|
+
}
|
|
320
|
+
catch {
|
|
321
|
+
continue;
|
|
322
|
+
}
|
|
323
|
+
for (const file of files) {
|
|
324
|
+
const filePath = path.join(sessionsDir, file);
|
|
325
|
+
let raw;
|
|
326
|
+
try {
|
|
327
|
+
raw = fs.readFileSync(filePath, "utf-8");
|
|
328
|
+
}
|
|
329
|
+
catch {
|
|
330
|
+
continue;
|
|
331
|
+
}
|
|
332
|
+
const lines = raw.split(/\r?\n/);
|
|
333
|
+
let fileChanged = false;
|
|
334
|
+
const rewritten = [];
|
|
335
|
+
for (const line of lines) {
|
|
336
|
+
if (!line.trim()) {
|
|
337
|
+
rewritten.push(line);
|
|
338
|
+
continue;
|
|
339
|
+
}
|
|
340
|
+
try {
|
|
341
|
+
const parsed = JSON.parse(line);
|
|
342
|
+
const msg = parsed?.message;
|
|
343
|
+
if (msg && Array.isArray(msg.content)) {
|
|
344
|
+
const stripped = stripBase64FromContent(msg.content);
|
|
345
|
+
if (stripped) {
|
|
346
|
+
parsed.message = { ...msg, content: stripped };
|
|
347
|
+
rewritten.push(JSON.stringify(parsed));
|
|
348
|
+
fileChanged = true;
|
|
349
|
+
continue;
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
catch {
|
|
354
|
+
// keep line as-is
|
|
355
|
+
}
|
|
356
|
+
rewritten.push(line);
|
|
357
|
+
}
|
|
358
|
+
if (!fileChanged)
|
|
359
|
+
continue;
|
|
360
|
+
const newContent = rewritten.join("\n");
|
|
361
|
+
const oldSize = Buffer.byteLength(raw, "utf-8");
|
|
362
|
+
const newSize = Buffer.byteLength(newContent, "utf-8");
|
|
363
|
+
try {
|
|
364
|
+
// Atomic write: write to tmp, then rename
|
|
365
|
+
const tmpPath = `${filePath}.tmp`;
|
|
366
|
+
fs.writeFileSync(tmpPath, newContent, "utf-8");
|
|
367
|
+
fs.renameSync(tmpPath, filePath);
|
|
368
|
+
totalCleaned++;
|
|
369
|
+
totalBytesReclaimed += oldSize - newSize;
|
|
370
|
+
}
|
|
371
|
+
catch (err) {
|
|
372
|
+
log.warn(`failed to clean ${file}: ${String(err)}`);
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
if (totalCleaned > 0) {
|
|
377
|
+
const mb = (totalBytesReclaimed / (1024 * 1024)).toFixed(1);
|
|
378
|
+
log.info(`stripped base64 images from ${totalCleaned} transcript(s), reclaimed ${mb} MB`);
|
|
379
|
+
}
|
|
380
|
+
return { cleaned: totalCleaned, bytesReclaimed: totalBytesReclaimed };
|
|
381
|
+
}
|