@lightcone-ai/daemon 0.15.77 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/chat-bridge.js +80 -31
- package/src/submit-to-library-tool.js +11 -10
- package/src/upload-job-manager.js +542 -0
- package/src/upload-server-api.js +80 -0
package/package.json
CHANGED
package/src/chat-bridge.js
CHANGED
|
@@ -2,11 +2,14 @@
|
|
|
2
2
|
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
3
3
|
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
4
4
|
import { z } from 'zod';
|
|
5
|
-
import { createReadStream, existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs';
|
|
5
|
+
import { createReadStream, existsSync, mkdirSync, readFileSync, writeFileSync, statSync } from 'fs';
|
|
6
6
|
import { createHash, randomUUID } from 'crypto';
|
|
7
7
|
import path, { extname } from 'path';
|
|
8
|
+
import os from 'os';
|
|
8
9
|
import { recordUrlNarration } from './_vendor/video/recorder/index.js';
|
|
9
|
-
import { writeLocalFileToWorkspace } from './workspace-file-upload.js';
|
|
10
|
+
import { writeLocalFileToWorkspace, resolveWorkspaceFileUploadPlan } from './workspace-file-upload.js';
|
|
11
|
+
import { UploadJobManager } from './upload-job-manager.js';
|
|
12
|
+
import { createUploadServerApi } from './upload-server-api.js';
|
|
10
13
|
import { runRecordUrlNarrationTool } from './record-url-narration-tool.js';
|
|
11
14
|
import { runSubmitToLibraryTool } from './submit-to-library-tool.js';
|
|
12
15
|
import { runRenderTextToImageTool } from './tools/render-text-to-image.js';
|
|
@@ -843,6 +846,23 @@ const IS_HOST_AGENT = await resolveHostAgentFlag();
|
|
|
843
846
|
|
|
844
847
|
const server = new McpServer({ name: 'chat', version: '0.1.0' });
|
|
845
848
|
|
|
849
|
+
// ── Upload job manager (async + multipart workspace-file uploads) ────────────
|
|
850
|
+
// Per-agent jobDir so each chat-bridge sees only its own pending jobs. When the
|
|
851
|
+
// chat-bridge process restarts (skill bind, manual restart) it picks up any
|
|
852
|
+
// in-flight uploads from disk and resumes them; see docs/upload-pipeline-design.md.
|
|
853
|
+
const _uploadJobDir = path.join(
|
|
854
|
+
os.homedir(),
|
|
855
|
+
'.lightcone',
|
|
856
|
+
'upload-jobs',
|
|
857
|
+
AGENT_ID || 'default'
|
|
858
|
+
);
|
|
859
|
+
const uploadJobManager = new UploadJobManager({
|
|
860
|
+
jobDir: _uploadJobDir,
|
|
861
|
+
serverApi: createUploadServerApi({ api: (...args) => api(...args) }),
|
|
862
|
+
log: (msg) => console.error(`[chat-bridge:UploadJobManager] ${msg}`),
|
|
863
|
+
});
|
|
864
|
+
uploadJobManager.start();
|
|
865
|
+
|
|
846
866
|
// ── check_messages ────────────────────────────────────────────────────────────
|
|
847
867
|
server.tool('check_messages', 'Check for new messages in your inbox', {}, async () => {
|
|
848
868
|
const data = await api('GET', '/receive');
|
|
@@ -1099,38 +1119,67 @@ server.tool('write_workspace', 'Write a file to the shared workspace. Use this t
|
|
|
1099
1119
|
return { content: [{ type: 'text', text: `Saved to workspace: ${path}` }] };
|
|
1100
1120
|
});
|
|
1101
1121
|
|
|
1102
|
-
server.tool('write_workspace_file',
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1122
|
+
server.tool('write_workspace_file',
|
|
1123
|
+
'Write a local file directly to the shared workspace. Prefer this over write_workspace for images/PDFs/binary files so large base64 content never enters the model context.\n\n'
|
|
1124
|
+
+ 'Binary uploads are ASYNCHRONOUS: the tool queues an upload job and returns immediately with a jobId. The actual PUT runs in the background with retry + COS multipart (no codex 120s timeout, no single-PUT failures). When you next call submit_to_library on the same workspace path, the server waits up to ~90s for the upload to finish before indexing; if it returns "upload_still_in_progress" just retry submit_to_library a few seconds later — DO NOT re-record/re-compose the video, the source file is fine.\n\n'
|
|
1125
|
+
+ 'Text files (.md/.txt/.json/etc.) still upload synchronously since they are tiny.',
|
|
1126
|
+
{
|
|
1127
|
+
file_path: z.string().describe('Local file path. Relative paths resolve from the current agent workspace. Absolute paths must stay inside the agent/workspace.'),
|
|
1128
|
+
path: z.string().describe('Destination path relative to workspace root, e.g. "artifacts/cover.png"'),
|
|
1129
|
+
},
|
|
1130
|
+
async ({ file_path, path: workspacePath }) => {
|
|
1106
1131
|
if (!currentWorkspaceId) return { content: [{ type: 'text', text: 'No workspace context.' }] };
|
|
1107
1132
|
const localPath = resolveLocalWorkspaceFile(file_path);
|
|
1108
|
-
const result = await writeLocalFileToWorkspace({
|
|
1109
|
-
localPath,
|
|
1110
|
-
workspacePath: path,
|
|
1111
|
-
workspaceId: currentWorkspaceId,
|
|
1112
|
-
readFileSyncFn: readFileSync,
|
|
1113
|
-
uploadWorkspaceMemory: async ({ workspacePath, workspaceId, content }) => {
|
|
1114
|
-
await api('PUT', `/workspace-memory?path=${encodeURIComponent(workspacePath)}&workspaceId=${encodeURIComponent(workspaceId)}`, { content });
|
|
1115
|
-
},
|
|
1116
|
-
presign: async ({ workspaceId, path: filePath, size, mime, sha256 }) => {
|
|
1117
|
-
return api('POST', '/storage/presign', { workspaceId, path: filePath, size, mime, sha256 });
|
|
1118
|
-
},
|
|
1119
|
-
confirmUpload: async ({ workspaceId, path: filePath, objectKey }) => {
|
|
1120
|
-
await api('POST', '/storage/confirm', { workspaceId, path: filePath, objectKey });
|
|
1121
|
-
},
|
|
1122
|
-
});
|
|
1123
1133
|
|
|
1124
|
-
const
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
+
const plan = resolveWorkspaceFileUploadPlan({ localPath, workspacePath });
|
|
1135
|
+
|
|
1136
|
+
// ── Text path: tiny, sync, unchanged ────────────────────────────────────
|
|
1137
|
+
if (plan.isText) {
|
|
1138
|
+
const result = await writeLocalFileToWorkspace({
|
|
1139
|
+
localPath,
|
|
1140
|
+
workspacePath,
|
|
1141
|
+
workspaceId: currentWorkspaceId,
|
|
1142
|
+
readFileSyncFn: readFileSync,
|
|
1143
|
+
uploadWorkspaceMemory: async ({ workspacePath: wp, workspaceId, content }) => {
|
|
1144
|
+
await api('PUT', `/workspace-memory?path=${encodeURIComponent(wp)}&workspaceId=${encodeURIComponent(workspaceId)}`, { content });
|
|
1145
|
+
},
|
|
1146
|
+
// Binary path below replaces these; keep these stubs in case the plan
|
|
1147
|
+
// misclassifies, to avoid TypeError mid-tool-call.
|
|
1148
|
+
presign: async () => { throw new Error('binary upload should not reach legacy presign'); },
|
|
1149
|
+
confirmUpload: async () => { throw new Error('binary upload should not reach legacy confirmUpload'); },
|
|
1150
|
+
});
|
|
1151
|
+
const bytes = Number.isFinite(result?.bytes) ? result.bytes : 0;
|
|
1152
|
+
return {
|
|
1153
|
+
content: [{
|
|
1154
|
+
type: 'text',
|
|
1155
|
+
text: `Saved local text file to workspace: ${workspacePath} (${plan.mime}, ${formatBytes(bytes)})`,
|
|
1156
|
+
}],
|
|
1157
|
+
};
|
|
1158
|
+
}
|
|
1159
|
+
|
|
1160
|
+
// ── Binary path: async enqueue ──────────────────────────────────────────
|
|
1161
|
+
try {
|
|
1162
|
+
const enq = await uploadJobManager.enqueue({
|
|
1163
|
+
workspaceId: currentWorkspaceId,
|
|
1164
|
+
agentId: AGENT_ID || null,
|
|
1165
|
+
localPath,
|
|
1166
|
+
workspacePath,
|
|
1167
|
+
mime: plan.mime,
|
|
1168
|
+
});
|
|
1169
|
+
const lines = [
|
|
1170
|
+
`Queued upload to workspace: ${workspacePath} (${plan.mime}, ${formatBytes(enq.size)})`,
|
|
1171
|
+
`mode=${enq.mode}${enq.mode === 'multipart' ? ` chunks=${enq.totalChunks}` : ''}`,
|
|
1172
|
+
`job_id=${enq.jobId}`,
|
|
1173
|
+
`status=pending`,
|
|
1174
|
+
`note=Upload runs in background. Call submit_to_library when ready — it waits up to ~90s for the upload to finish. If it returns "upload_still_in_progress", just retry submit_to_library a few seconds later. DO NOT re-record or re-compose.`,
|
|
1175
|
+
];
|
|
1176
|
+
return { content: [{ type: 'text', text: lines.join('\n') }] };
|
|
1177
|
+
} catch (err) {
|
|
1178
|
+
return {
|
|
1179
|
+
isError: true,
|
|
1180
|
+
content: [{ type: 'text', text: `write_workspace_file failed: ${err?.message ?? err}` }],
|
|
1181
|
+
};
|
|
1182
|
+
}
|
|
1134
1183
|
});
|
|
1135
1184
|
|
|
1136
1185
|
// ── skill_list ───────────────────────────────────────────────────────────────
|
|
@@ -30,17 +30,18 @@ export async function runSubmitToLibraryTool({
|
|
|
30
30
|
'/content-library/submit',
|
|
31
31
|
buildSubmitToLibraryBody(args, currentWorkspaceId)
|
|
32
32
|
);
|
|
33
|
-
// Server returns 2xx + body.error for transient "still processing" cases
|
|
34
|
-
//
|
|
35
|
-
//
|
|
36
|
-
// treats any 2xx as success and returns the
|
|
37
|
-
// surface body.error here — otherwise the agent
|
|
38
|
-
// undefined and (because the literal "undefined"
|
|
39
|
-
// submit silently failed) tends to redo the entire
|
|
40
|
-
// retrying submit_to_library.
|
|
41
|
-
//
|
|
33
|
+
// Server returns 2xx + body.error for transient "still processing" cases:
|
|
34
|
+
// HTTP 202 + {error: 'upload_still_in_progress', message: '...'} when the
|
|
35
|
+
// async upload pipeline (see docs/upload-pipeline-design.md) hasn't
|
|
36
|
+
// finished. The HTTP client treats any 2xx as success and returns the
|
|
37
|
+
// body verbatim, so we must surface body.error here — otherwise the agent
|
|
38
|
+
// reads `data.itemId` as undefined and (because the literal "undefined"
|
|
39
|
+
// looks like proof the submit silently failed) tends to redo the entire
|
|
40
|
+
// video instead of just retrying submit_to_library. Prefer body.message
|
|
41
|
+
// when present (rich human-readable retry hint); fall back to body.error.
|
|
42
42
|
if (data && typeof data === 'object' && typeof data.error === 'string' && data.error.trim()) {
|
|
43
|
-
|
|
43
|
+
const detail = (typeof data.message === 'string' && data.message.trim()) ? data.message : data.error;
|
|
44
|
+
return toolError(`submit_to_library not ready: ${detail}. Retry submit_to_library with the same video_path in a few seconds — do NOT re-record or re-compose the video.`);
|
|
44
45
|
}
|
|
45
46
|
if (!data || typeof data.itemId !== 'string' || !data.itemId) {
|
|
46
47
|
return toolError(`submit_to_library returned no itemId. Server response: ${JSON.stringify(data).slice(0, 300)}. Retry submit_to_library with the same video_path — do NOT re-record or re-compose.`);
|
|
@@ -0,0 +1,542 @@
|
|
|
1
|
+
// Daemon-owned upload job manager.
|
|
2
|
+
//
|
|
3
|
+
// Decouples the agent-facing `write_workspace_file` tool from the actual
|
|
4
|
+
// PUT-to-COS round-trip so:
|
|
5
|
+
// - the tool returns in <50ms (just enqueues a job) rather than holding the
|
|
6
|
+
// codex/claude tool slot for minutes
|
|
7
|
+
// - PUT failures are retried with exponential backoff
|
|
8
|
+
// - large files use COS multipart so a single dropped chunk doesn't restart
|
|
9
|
+
// the whole upload
|
|
10
|
+
// - daemon restarts pick up in-flight jobs from disk and resume
|
|
11
|
+
//
|
|
12
|
+
// Out of scope for this module: the actual COS API calls. Those live behind
|
|
13
|
+
// the injected `serverApi` interface (M3 implements the server endpoints, M4
|
|
14
|
+
// wires this manager to it).
|
|
15
|
+
//
|
|
16
|
+
// Schema and state machine: see docs/upload-pipeline-design.md.
|
|
17
|
+
|
|
18
|
+
import {
|
|
19
|
+
readFileSync, writeFileSync, readdirSync, statSync,
|
|
20
|
+
promises as fsPromises,
|
|
21
|
+
createReadStream,
|
|
22
|
+
mkdirSync, renameSync,
|
|
23
|
+
} from 'node:fs';
|
|
24
|
+
import { open as fsOpen } from 'node:fs/promises';
|
|
25
|
+
import path from 'node:path';
|
|
26
|
+
import crypto, { randomUUID } from 'node:crypto';
|
|
27
|
+
import os from 'node:os';
|
|
28
|
+
|
|
29
|
+
export const SCHEMA_VERSION = 1;
|
|
30
|
+
export const DEFAULT_CHUNK_SIZE = 8 * 1024 * 1024; // 8 MB
|
|
31
|
+
export const MULTIPART_THRESHOLD = 5 * 1024 * 1024; // COS minimum part size
|
|
32
|
+
export const MAX_JOB_ATTEMPTS = 5;
|
|
33
|
+
export const DEFAULT_TICK_INTERVAL_MS = 2_000;
|
|
34
|
+
export const PART_RETRY_ATTEMPTS = 3;
|
|
35
|
+
export const PART_RETRY_BASE_MS = 1_000; // 1s, 3s, 9s
|
|
36
|
+
export const TERMINAL_JOB_TTL_MS = 7 * 24 * 3600 * 1000; // sweep done/dead_letter after 7 days
|
|
37
|
+
export const HOUSEKEEPING_INTERVAL_MS = 6 * 3600 * 1000; // run housekeeping every 6h
|
|
38
|
+
|
|
39
|
+
function nowIso() { return new Date().toISOString(); }
|
|
40
|
+
|
|
41
|
+
async function sha256OfFile(localPath) {
|
|
42
|
+
return new Promise((resolve, reject) => {
|
|
43
|
+
const h = crypto.createHash('sha256');
|
|
44
|
+
const stream = createReadStream(localPath);
|
|
45
|
+
stream.on('data', chunk => h.update(chunk));
|
|
46
|
+
stream.on('end', () => resolve(h.digest('hex')));
|
|
47
|
+
stream.on('error', reject);
|
|
48
|
+
});
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// Job-level backoff: attempts²×30s.
|
|
52
|
+
// 1st retry: +30s, 2nd: +120s, 3rd: +270s, 4th: +480s.
|
|
53
|
+
export function jobBackoffMs(attempts) {
|
|
54
|
+
return attempts * attempts * 30_000;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// Chunk-level backoff: 1s, 3s, 9s.
|
|
58
|
+
function partBackoffMs(attempt) {
|
|
59
|
+
return PART_RETRY_BASE_MS * (3 ** (attempt - 1));
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
function sleep(ms) { return new Promise(r => setTimeout(r, ms)); }
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* UploadJobManager — singleton in daemon process.
|
|
66
|
+
*
|
|
67
|
+
* const mgr = new UploadJobManager({ serverApi });
|
|
68
|
+
* mgr.start();
|
|
69
|
+
* const { jobId } = await mgr.enqueue({ workspaceId, agentId, localPath, workspacePath, mime });
|
|
70
|
+
* // job runs in background; later:
|
|
71
|
+
* mgr.getStatus(jobId);
|
|
72
|
+
* // or shutting down:
|
|
73
|
+
* mgr.stop();
|
|
74
|
+
*
|
|
75
|
+
* serverApi shape (M3 implements):
|
|
76
|
+
* presignSingle({ workspaceId, path, size, mime, sha256 }) → { objectKey, uploadUrl, method?, headers? }
|
|
77
|
+
* confirmSingle({ workspaceId, path, objectKey }) → { ok: true }
|
|
78
|
+
* presignMultipart({ workspaceId, path, size, mime, sha256 }) → { objectKey, cosUploadId }
|
|
79
|
+
* presignPart({ workspaceId, objectKey, cosUploadId, partNumber }) → { url, method?, headers? }
|
|
80
|
+
* completeMultipart({ workspaceId, path, objectKey, cosUploadId, parts, size, sha256 }) → { ok: true }
|
|
81
|
+
* abortMultipart({ workspaceId, path, objectKey, cosUploadId }) → { ok: true }
|
|
82
|
+
*/
|
|
83
|
+
export class UploadJobManager {
|
|
84
|
+
constructor({
|
|
85
|
+
jobDir = path.join(os.homedir(), '.lightcone', 'upload-jobs'),
|
|
86
|
+
serverApi,
|
|
87
|
+
fetchFn = globalThis.fetch,
|
|
88
|
+
nowFn = () => Date.now(),
|
|
89
|
+
log = (msg) => console.error(`[UploadJobManager] ${msg}`),
|
|
90
|
+
tickIntervalMs = DEFAULT_TICK_INTERVAL_MS,
|
|
91
|
+
chunkSize = DEFAULT_CHUNK_SIZE,
|
|
92
|
+
multipartThreshold = MULTIPART_THRESHOLD,
|
|
93
|
+
} = {}) {
|
|
94
|
+
if (!serverApi) throw new Error('UploadJobManager: serverApi is required');
|
|
95
|
+
this.jobDir = jobDir;
|
|
96
|
+
this.serverApi = serverApi;
|
|
97
|
+
this.fetchFn = fetchFn;
|
|
98
|
+
this.nowFn = nowFn;
|
|
99
|
+
this.log = log;
|
|
100
|
+
this.tickIntervalMs = tickIntervalMs;
|
|
101
|
+
this.chunkSize = chunkSize;
|
|
102
|
+
this.multipartThreshold = multipartThreshold;
|
|
103
|
+
|
|
104
|
+
this._tickInterval = null;
|
|
105
|
+
this._housekeepingInterval = null;
|
|
106
|
+
this._stopping = false;
|
|
107
|
+
this._activeJobs = new Set(); // jobIds currently advancing
|
|
108
|
+
this._pathLocks = new Map(); // `${workspaceId}|${workspacePath}` → jobId
|
|
109
|
+
|
|
110
|
+
mkdirSync(this.jobDir, { recursive: true });
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// ─── public API ──────────────────────────────────────────────────────────
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Stat + sha256 the file, choose mode (single vs multipart), persist a
|
|
117
|
+
* pending job, and schedule a tick. Throws synchronously on path-lock
|
|
118
|
+
* conflict or missing/unreadable file.
|
|
119
|
+
*/
|
|
120
|
+
async enqueue({ workspaceId, agentId = null, localPath, workspacePath, mime = 'application/octet-stream' }) {
|
|
121
|
+
if (!workspaceId) throw new Error('enqueue: workspaceId required');
|
|
122
|
+
if (!localPath) throw new Error('enqueue: localPath required');
|
|
123
|
+
if (!workspacePath) throw new Error('enqueue: workspacePath required');
|
|
124
|
+
|
|
125
|
+
const pathKey = `${workspaceId}|${workspacePath}`;
|
|
126
|
+
const existing = this._pathLocks.get(pathKey);
|
|
127
|
+
if (existing) {
|
|
128
|
+
throw new Error(`upload_path_locked: another upload to ${workspacePath} is in progress (jobId=${existing})`);
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
let st;
|
|
132
|
+
try { st = statSync(localPath); }
|
|
133
|
+
catch (err) { throw new Error(`localPath not readable: ${err.message}`); }
|
|
134
|
+
const size = st.size;
|
|
135
|
+
if (!Number.isFinite(size) || size <= 0) {
|
|
136
|
+
throw new Error(`localPath has invalid size: ${size}`);
|
|
137
|
+
}
|
|
138
|
+
const mtimeMs = st.mtimeMs;
|
|
139
|
+
const sha256 = await sha256OfFile(localPath);
|
|
140
|
+
|
|
141
|
+
const uploadMode = size >= this.multipartThreshold ? 'multipart' : 'single';
|
|
142
|
+
const totalChunks = uploadMode === 'multipart' ? Math.ceil(size / this.chunkSize) : 1;
|
|
143
|
+
|
|
144
|
+
const job = {
|
|
145
|
+
schemaVersion: SCHEMA_VERSION,
|
|
146
|
+
jobId: randomUUID(),
|
|
147
|
+
createdAt: nowIso(),
|
|
148
|
+
updatedAt: nowIso(),
|
|
149
|
+
agentId,
|
|
150
|
+
workspaceId,
|
|
151
|
+
localPath,
|
|
152
|
+
workspacePath,
|
|
153
|
+
mime,
|
|
154
|
+
objectKey: null,
|
|
155
|
+
size,
|
|
156
|
+
mtimeMs,
|
|
157
|
+
sha256,
|
|
158
|
+
uploadMode,
|
|
159
|
+
chunkSize: this.chunkSize,
|
|
160
|
+
totalChunks,
|
|
161
|
+
cosUploadId: null,
|
|
162
|
+
doneParts: [],
|
|
163
|
+
status: 'pending',
|
|
164
|
+
attempts: 0,
|
|
165
|
+
lastError: null,
|
|
166
|
+
lastErrorAt: null,
|
|
167
|
+
nextAttemptAt: this.nowFn(),
|
|
168
|
+
};
|
|
169
|
+
|
|
170
|
+
this._persist(job);
|
|
171
|
+
this._pathLocks.set(pathKey, job.jobId);
|
|
172
|
+
|
|
173
|
+
// Best-effort immediate tick so the very first job doesn't wait for the interval.
|
|
174
|
+
setImmediate(() => this._tick().catch(err => this.log(`tick (post-enqueue) failed: ${err.message}`)));
|
|
175
|
+
|
|
176
|
+
return {
|
|
177
|
+
jobId: job.jobId,
|
|
178
|
+
mode: uploadMode,
|
|
179
|
+
totalChunks,
|
|
180
|
+
status: 'pending',
|
|
181
|
+
size,
|
|
182
|
+
};
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
getStatus(jobId) {
|
|
186
|
+
const job = this._loadById(jobId);
|
|
187
|
+
return job ? this._publicState(job) : null;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
start() {
|
|
191
|
+
if (this._tickInterval) return;
|
|
192
|
+
this._stopping = false;
|
|
193
|
+
// Rebuild path locks from on-disk jobs so concurrent enqueue still respects them.
|
|
194
|
+
for (const job of this._listJobs()) {
|
|
195
|
+
if (job.status === 'pending' || job.status === 'uploading') {
|
|
196
|
+
const pathKey = `${job.workspaceId}|${job.workspacePath}`;
|
|
197
|
+
this._pathLocks.set(pathKey, job.jobId);
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
this._tickInterval = setInterval(() => {
|
|
201
|
+
this._tick().catch(err => this.log(`tick failed: ${err.message}`));
|
|
202
|
+
}, this.tickIntervalMs);
|
|
203
|
+
// Run housekeeping immediately + on a slow schedule.
|
|
204
|
+
this._housekeep();
|
|
205
|
+
this._housekeepingInterval = setInterval(() => {
|
|
206
|
+
this._housekeep();
|
|
207
|
+
}, HOUSEKEEPING_INTERVAL_MS);
|
|
208
|
+
this.log(`started (jobDir=${this.jobDir}, tick=${this.tickIntervalMs}ms)`);
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
/**
|
|
212
|
+
* Signal stop. In-flight `_advance` calls notice via `_stopping` flag and
|
|
213
|
+
* exit between chunks; partial state is persisted so the next start() can
|
|
214
|
+
* resume. Does NOT abort the underlying fetch in progress — a single chunk
|
|
215
|
+
* may still complete after stop returns.
|
|
216
|
+
*/
|
|
217
|
+
stop() {
|
|
218
|
+
this._stopping = true;
|
|
219
|
+
if (this._tickInterval) clearInterval(this._tickInterval);
|
|
220
|
+
this._tickInterval = null;
|
|
221
|
+
if (this._housekeepingInterval) clearInterval(this._housekeepingInterval);
|
|
222
|
+
this._housekeepingInterval = null;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
/**
|
|
226
|
+
* Drop terminal (done / dead_letter) job files older than TERMINAL_JOB_TTL_MS.
|
|
227
|
+
* Keeps recent ones around for observability / debugging.
|
|
228
|
+
*/
|
|
229
|
+
_housekeep() {
|
|
230
|
+
try {
|
|
231
|
+
const now = this.nowFn();
|
|
232
|
+
let pruned = 0;
|
|
233
|
+
for (const job of this._listJobs()) {
|
|
234
|
+
if (job.status !== 'done' && job.status !== 'dead_letter') continue;
|
|
235
|
+
const updatedAt = job.updatedAt ? Date.parse(job.updatedAt) : 0;
|
|
236
|
+
if (!Number.isFinite(updatedAt)) continue;
|
|
237
|
+
if (now - updatedAt < TERMINAL_JOB_TTL_MS) continue;
|
|
238
|
+
try {
|
|
239
|
+
// unlink via fsPromises is fine to do sync-ish since housekeeping is rare
|
|
240
|
+
// and the file is tiny.
|
|
241
|
+
fsPromises.unlink(this._jobPath(job.jobId)).catch(() => {});
|
|
242
|
+
pruned += 1;
|
|
243
|
+
} catch { /* ignore */ }
|
|
244
|
+
}
|
|
245
|
+
if (pruned > 0) this.log(`housekeeping: pruned ${pruned} terminal job(s) older than ${TERMINAL_JOB_TTL_MS / 86_400_000}d`);
|
|
246
|
+
} catch (err) {
|
|
247
|
+
this.log(`housekeeping failed: ${err?.message ?? err}`);
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
/**
|
|
252
|
+
* Test helper: wait until no jobs are pending/uploading or until timeout.
|
|
253
|
+
*/
|
|
254
|
+
async waitAllSettled({ timeoutMs = 30_000 } = {}) {
|
|
255
|
+
const start = this.nowFn();
|
|
256
|
+
while (this.nowFn() - start < timeoutMs) {
|
|
257
|
+
const jobs = this._listJobs();
|
|
258
|
+
const inFlight = jobs.some(j => j.status === 'pending' || j.status === 'uploading');
|
|
259
|
+
if (!inFlight && this._activeJobs.size === 0) return;
|
|
260
|
+
await sleep(50);
|
|
261
|
+
}
|
|
262
|
+
throw new Error('waitAllSettled: timeout');
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
// ─── internal ────────────────────────────────────────────────────────────
|
|
266
|
+
|
|
267
|
+
async _tick() {
|
|
268
|
+
if (this._stopping) return;
|
|
269
|
+
const jobs = this._listJobs();
|
|
270
|
+
const now = this.nowFn();
|
|
271
|
+
for (const job of jobs) {
|
|
272
|
+
if (this._stopping) break;
|
|
273
|
+
if (job.status === 'done' || job.status === 'dead_letter') continue;
|
|
274
|
+
if (this._activeJobs.has(job.jobId)) continue;
|
|
275
|
+
if (job.nextAttemptAt && job.nextAttemptAt > now) continue;
|
|
276
|
+
this._activeJobs.add(job.jobId);
|
|
277
|
+
this._advance(job)
|
|
278
|
+
.catch(err => this.log(`advance(${job.jobId}) crashed: ${err.message}`))
|
|
279
|
+
.finally(() => this._activeJobs.delete(job.jobId));
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
async _advance(job) {
|
|
284
|
+
try {
|
|
285
|
+
// Local-file integrity check before each attempt.
|
|
286
|
+
let st;
|
|
287
|
+
try { st = statSync(job.localPath); }
|
|
288
|
+
catch (err) {
|
|
289
|
+
return this._markDeadLetter(job, `local_file_gone:${err.code ?? err.message}`);
|
|
290
|
+
}
|
|
291
|
+
if (st.size !== job.size) {
|
|
292
|
+
return this._markDeadLetter(job, `local_file_changed: declared=${job.size}, now=${st.size}`);
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
if (job.status !== 'uploading') {
|
|
296
|
+
job.status = 'uploading';
|
|
297
|
+
this._persist(job);
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
if (job.uploadMode === 'single') {
|
|
301
|
+
await this._advanceSingle(job);
|
|
302
|
+
} else {
|
|
303
|
+
await this._advanceMultipart(job);
|
|
304
|
+
}
|
|
305
|
+
} catch (err) {
|
|
306
|
+
this._recordFailure(job, err);
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
async _advanceSingle(job) {
|
|
311
|
+
const presign = await this.serverApi.presignSingle({
|
|
312
|
+
workspaceId: job.workspaceId,
|
|
313
|
+
path: job.workspacePath,
|
|
314
|
+
size: job.size,
|
|
315
|
+
mime: job.mime,
|
|
316
|
+
sha256: job.sha256,
|
|
317
|
+
});
|
|
318
|
+
job.objectKey = presign.objectKey;
|
|
319
|
+
this._persist(job);
|
|
320
|
+
|
|
321
|
+
const fileBuf = await fsPromises.readFile(job.localPath);
|
|
322
|
+
const resp = await this.fetchFn(presign.uploadUrl, {
|
|
323
|
+
method: presign.method ?? 'PUT',
|
|
324
|
+
headers: {
|
|
325
|
+
'Content-Type': job.mime,
|
|
326
|
+
'Content-Length': String(job.size),
|
|
327
|
+
...(presign.headers ?? {}),
|
|
328
|
+
},
|
|
329
|
+
body: fileBuf,
|
|
330
|
+
});
|
|
331
|
+
if (!resp.ok) {
|
|
332
|
+
const text = await resp.text().catch(() => '');
|
|
333
|
+
throw new Error(`single PUT failed: HTTP ${resp.status} ${text.slice(0, 200)}`);
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
await this.serverApi.confirmSingle({
|
|
337
|
+
workspaceId: job.workspaceId,
|
|
338
|
+
path: job.workspacePath,
|
|
339
|
+
objectKey: job.objectKey,
|
|
340
|
+
});
|
|
341
|
+
|
|
342
|
+
this._markDone(job);
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
async _advanceMultipart(job) {
|
|
346
|
+
if (!job.cosUploadId) {
|
|
347
|
+
const init = await this.serverApi.presignMultipart({
|
|
348
|
+
workspaceId: job.workspaceId,
|
|
349
|
+
path: job.workspacePath,
|
|
350
|
+
size: job.size,
|
|
351
|
+
mime: job.mime,
|
|
352
|
+
sha256: job.sha256,
|
|
353
|
+
});
|
|
354
|
+
job.objectKey = init.objectKey;
|
|
355
|
+
job.cosUploadId = init.cosUploadId;
|
|
356
|
+
this._persist(job);
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
const doneSet = new Set(job.doneParts.map(p => p.partNumber));
|
|
360
|
+
const fh = await fsOpen(job.localPath, 'r');
|
|
361
|
+
try {
|
|
362
|
+
for (let i = 1; i <= job.totalChunks; i++) {
|
|
363
|
+
if (this._stopping) return; // graceful shutdown mid-job
|
|
364
|
+
if (doneSet.has(i)) continue;
|
|
365
|
+
|
|
366
|
+
const offset = (i - 1) * job.chunkSize;
|
|
367
|
+
const remaining = job.size - offset;
|
|
368
|
+
const partLen = Math.min(job.chunkSize, remaining);
|
|
369
|
+
const buf = Buffer.alloc(partLen);
|
|
370
|
+
await fh.read(buf, 0, partLen, offset);
|
|
371
|
+
|
|
372
|
+
const etag = await this._uploadPartWithRetry(job, i, buf);
|
|
373
|
+
job.doneParts.push({ partNumber: i, etag });
|
|
374
|
+
this._persist(job);
|
|
375
|
+
}
|
|
376
|
+
} finally {
|
|
377
|
+
await fh.close().catch(() => {});
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
if (this._stopping) return;
|
|
381
|
+
|
|
382
|
+
await this.serverApi.completeMultipart({
|
|
383
|
+
workspaceId: job.workspaceId,
|
|
384
|
+
path: job.workspacePath,
|
|
385
|
+
objectKey: job.objectKey,
|
|
386
|
+
cosUploadId: job.cosUploadId,
|
|
387
|
+
parts: job.doneParts,
|
|
388
|
+
size: job.size,
|
|
389
|
+
sha256: job.sha256,
|
|
390
|
+
});
|
|
391
|
+
|
|
392
|
+
this._markDone(job);
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
async _uploadPartWithRetry(job, partNumber, buf) {
|
|
396
|
+
let lastErr;
|
|
397
|
+
for (let attempt = 1; attempt <= PART_RETRY_ATTEMPTS; attempt++) {
|
|
398
|
+
if (this._stopping) throw new Error('stopping');
|
|
399
|
+
try {
|
|
400
|
+
const presign = await this.serverApi.presignPart({
|
|
401
|
+
workspaceId: job.workspaceId,
|
|
402
|
+
objectKey: job.objectKey,
|
|
403
|
+
cosUploadId: job.cosUploadId,
|
|
404
|
+
partNumber,
|
|
405
|
+
});
|
|
406
|
+
const resp = await this.fetchFn(presign.url, {
|
|
407
|
+
method: presign.method ?? 'PUT',
|
|
408
|
+
headers: {
|
|
409
|
+
'Content-Length': String(buf.length),
|
|
410
|
+
...(presign.headers ?? {}),
|
|
411
|
+
},
|
|
412
|
+
body: buf,
|
|
413
|
+
});
|
|
414
|
+
if (!resp.ok) {
|
|
415
|
+
const text = await resp.text().catch(() => '');
|
|
416
|
+
throw new Error(`HTTP ${resp.status} ${text.slice(0, 200)}`);
|
|
417
|
+
}
|
|
418
|
+
const etag = resp.headers.get?.('etag') ?? resp.headers.get?.('ETag') ?? presign.etag ?? '';
|
|
419
|
+
if (!etag) throw new Error(`PUT part ${partNumber} missing etag`);
|
|
420
|
+
return etag;
|
|
421
|
+
} catch (err) {
|
|
422
|
+
lastErr = err;
|
|
423
|
+
this.log(`part ${partNumber} attempt ${attempt}/${PART_RETRY_ATTEMPTS} failed: ${err.message}`);
|
|
424
|
+
if (attempt < PART_RETRY_ATTEMPTS) {
|
|
425
|
+
await sleep(partBackoffMs(attempt));
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
throw new Error(`part ${partNumber} exhausted ${PART_RETRY_ATTEMPTS} retries: ${lastErr?.message ?? lastErr}`);
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
_markDone(job) {
|
|
433
|
+
job.status = 'done';
|
|
434
|
+
job.updatedAt = nowIso();
|
|
435
|
+
job.lastError = null;
|
|
436
|
+
job.lastErrorAt = null;
|
|
437
|
+
job.nextAttemptAt = null;
|
|
438
|
+
this._persist(job);
|
|
439
|
+
this._releaseLock(job);
|
|
440
|
+
this.log(`job ${job.jobId} done (${job.workspacePath}, ${job.size} bytes, ${job.uploadMode})`);
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
_markDeadLetter(job, reason) {
|
|
444
|
+
// Best-effort COS cleanup so we don't leak storage cost on aborted multipart.
|
|
445
|
+
if (job.uploadMode === 'multipart' && job.cosUploadId && job.objectKey) {
|
|
446
|
+
Promise.resolve(this.serverApi.abortMultipart({
|
|
447
|
+
workspaceId: job.workspaceId,
|
|
448
|
+
path: job.workspacePath,
|
|
449
|
+
objectKey: job.objectKey,
|
|
450
|
+
cosUploadId: job.cosUploadId,
|
|
451
|
+
})).catch(err => this.log(`abort_multipart for ${job.jobId} failed: ${err.message}`));
|
|
452
|
+
}
|
|
453
|
+
job.status = 'dead_letter';
|
|
454
|
+
job.lastError = String(reason);
|
|
455
|
+
job.lastErrorAt = nowIso();
|
|
456
|
+
job.updatedAt = nowIso();
|
|
457
|
+
job.nextAttemptAt = null;
|
|
458
|
+
this._persist(job);
|
|
459
|
+
this._releaseLock(job);
|
|
460
|
+
this.log(`job ${job.jobId} dead_letter: ${reason}`);
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
_recordFailure(job, err) {
|
|
464
|
+
job.attempts = (job.attempts ?? 0) + 1;
|
|
465
|
+
job.lastError = String(err?.message ?? err);
|
|
466
|
+
job.lastErrorAt = nowIso();
|
|
467
|
+
job.updatedAt = nowIso();
|
|
468
|
+
if (job.attempts >= MAX_JOB_ATTEMPTS) {
|
|
469
|
+
this._markDeadLetter(job, `max_attempts_exhausted: ${job.lastError}`);
|
|
470
|
+
return;
|
|
471
|
+
}
|
|
472
|
+
job.status = 'pending';
|
|
473
|
+
const backoff = jobBackoffMs(job.attempts);
|
|
474
|
+
job.nextAttemptAt = this.nowFn() + backoff;
|
|
475
|
+
this._persist(job);
|
|
476
|
+
this.log(`job ${job.jobId} attempt ${job.attempts} failed: ${job.lastError}; next retry in ${Math.round(backoff / 1000)}s`);
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
_releaseLock(job) {
|
|
480
|
+
const pathKey = `${job.workspaceId}|${job.workspacePath}`;
|
|
481
|
+
if (this._pathLocks.get(pathKey) === job.jobId) {
|
|
482
|
+
this._pathLocks.delete(pathKey);
|
|
483
|
+
}
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
// ─── persistence ─────────────────────────────────────────────────────────
|
|
487
|
+
|
|
488
|
+
_jobPath(jobId) {
|
|
489
|
+
return path.join(this.jobDir, `${jobId}.json`);
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
_persist(job) {
|
|
493
|
+
job.updatedAt = nowIso();
|
|
494
|
+
const dest = this._jobPath(job.jobId);
|
|
495
|
+
const tmp = `${dest}.tmp`;
|
|
496
|
+
writeFileSync(tmp, JSON.stringify(job, null, 2));
|
|
497
|
+
renameSync(tmp, dest); // atomic on POSIX
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
_loadById(jobId) {
|
|
501
|
+
try {
|
|
502
|
+
const text = readFileSync(this._jobPath(jobId), 'utf8');
|
|
503
|
+
const job = JSON.parse(text);
|
|
504
|
+
if (job.schemaVersion !== SCHEMA_VERSION) {
|
|
505
|
+
this.log(`job ${jobId}: unsupported schemaVersion ${job.schemaVersion}, ignored`);
|
|
506
|
+
return null;
|
|
507
|
+
}
|
|
508
|
+
return job;
|
|
509
|
+
} catch { return null; }
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
_listJobs() {
|
|
513
|
+
let names;
|
|
514
|
+
try { names = readdirSync(this.jobDir); }
|
|
515
|
+
catch { return []; }
|
|
516
|
+
const out = [];
|
|
517
|
+
for (const name of names) {
|
|
518
|
+
if (!name.endsWith('.json') || name.endsWith('.tmp.json')) continue;
|
|
519
|
+
const jobId = name.slice(0, -5);
|
|
520
|
+
const job = this._loadById(jobId);
|
|
521
|
+
if (job) out.push(job);
|
|
522
|
+
}
|
|
523
|
+
return out;
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
_publicState(job) {
|
|
527
|
+
return {
|
|
528
|
+
jobId: job.jobId,
|
|
529
|
+
status: job.status,
|
|
530
|
+
mode: job.uploadMode,
|
|
531
|
+
size: job.size,
|
|
532
|
+
progress: job.uploadMode === 'multipart'
|
|
533
|
+
? { donePartCount: job.doneParts.length, totalChunks: job.totalChunks }
|
|
534
|
+
: { donePartCount: job.status === 'done' ? 1 : 0, totalChunks: 1 },
|
|
535
|
+
attempts: job.attempts,
|
|
536
|
+
lastError: job.lastError,
|
|
537
|
+
lastErrorAt: job.lastErrorAt,
|
|
538
|
+
nextAttemptAt: job.nextAttemptAt,
|
|
539
|
+
objectKey: job.objectKey,
|
|
540
|
+
};
|
|
541
|
+
}
|
|
542
|
+
}
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
// Thin daemon-side wrapper around the server's /storage/* endpoints.
|
|
2
|
+
//
|
|
3
|
+
// Implements the `serverApi` interface consumed by UploadJobManager. Single
|
|
4
|
+
// place that translates the abstract { presignSingle / confirmSingle /
|
|
5
|
+
// presignMultipart / presignPart / completeMultipart / abortMultipart } calls
|
|
6
|
+
// into HTTP requests against the lightcone server's internal/agent/.../storage/*
|
|
7
|
+
// endpoints. The actual HTTP plumbing — auth headers, governance, retries —
|
|
8
|
+
// is delegated to the `api` function passed in (typically chat-bridge's `api`).
|
|
9
|
+
//
|
|
10
|
+
// Why split this out: keeps UploadJobManager pure (no HTTP knowledge) and lets
|
|
11
|
+
// us write the manager's tests entirely against an injected mock.
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* @param {Object} deps
|
|
15
|
+
* @param {(method: string, path: string, body?: unknown) => Promise<unknown>} deps.api
|
|
16
|
+
* HTTP helper that POSTs to `/internal/agent/<agentId>/<path>` and returns
|
|
17
|
+
* the JSON body. Throws on non-2xx.
|
|
18
|
+
*/
|
|
19
|
+
export function createUploadServerApi({ api }) {
|
|
20
|
+
if (typeof api !== 'function') {
|
|
21
|
+
throw new Error('createUploadServerApi: api function is required');
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
return {
|
|
25
|
+
async presignSingle({ workspaceId, path, size, mime, sha256 }) {
|
|
26
|
+
// Existing endpoint, reused as-is. Returns { objectKey, uploadUrl, method?, headers? }.
|
|
27
|
+
const data = await api('POST', '/storage/presign', {
|
|
28
|
+
workspaceId, path, size, mime, sha256,
|
|
29
|
+
});
|
|
30
|
+
// /storage/presign returns { uploadUrl, method, headers, objectKey, alreadyExists }
|
|
31
|
+
// for the actual presigned PUT URL flow; surface them in the shape the
|
|
32
|
+
// manager expects.
|
|
33
|
+
return {
|
|
34
|
+
objectKey: data.objectKey,
|
|
35
|
+
uploadUrl: data.uploadUrl,
|
|
36
|
+
method: data.method ?? 'PUT',
|
|
37
|
+
headers: data.headers ?? {},
|
|
38
|
+
alreadyExists: !!data.alreadyExists,
|
|
39
|
+
};
|
|
40
|
+
},
|
|
41
|
+
|
|
42
|
+
async confirmSingle({ workspaceId, path, objectKey }) {
|
|
43
|
+
return api('POST', '/storage/confirm', { workspaceId, path, objectKey });
|
|
44
|
+
},
|
|
45
|
+
|
|
46
|
+
async presignMultipart({ workspaceId, path, size, mime, sha256 }) {
|
|
47
|
+
const data = await api('POST', '/storage/presign-multipart', {
|
|
48
|
+
workspaceId, path, size, mime, sha256,
|
|
49
|
+
});
|
|
50
|
+
return {
|
|
51
|
+
objectKey: data.objectKey,
|
|
52
|
+
cosUploadId: data.cosUploadId,
|
|
53
|
+
alreadyExists: !!data.alreadyExists,
|
|
54
|
+
};
|
|
55
|
+
},
|
|
56
|
+
|
|
57
|
+
async presignPart({ workspaceId, objectKey, cosUploadId, partNumber }) {
|
|
58
|
+
const data = await api('POST', '/storage/presign-part', {
|
|
59
|
+
workspaceId, objectKey, cosUploadId, partNumber,
|
|
60
|
+
});
|
|
61
|
+
return {
|
|
62
|
+
url: data.url,
|
|
63
|
+
method: data.method ?? 'PUT',
|
|
64
|
+
headers: data.headers ?? {},
|
|
65
|
+
};
|
|
66
|
+
},
|
|
67
|
+
|
|
68
|
+
async completeMultipart({ workspaceId, path, objectKey, cosUploadId, parts, size, sha256 }) {
|
|
69
|
+
return api('POST', '/storage/complete-multipart', {
|
|
70
|
+
workspaceId, path, objectKey, cosUploadId, parts, size, sha256,
|
|
71
|
+
});
|
|
72
|
+
},
|
|
73
|
+
|
|
74
|
+
async abortMultipart({ workspaceId, path, objectKey, cosUploadId }) {
|
|
75
|
+
return api('POST', '/storage/abort-multipart', {
|
|
76
|
+
workspaceId, path, objectKey, cosUploadId,
|
|
77
|
+
});
|
|
78
|
+
},
|
|
79
|
+
};
|
|
80
|
+
}
|