@lightcone-ai/daemon 0.15.77 → 0.16.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/chat-bridge.js +101 -33
- package/src/record-url-narration-tool.js +80 -0
- package/src/submit-to-library-tool.js +11 -10
- package/src/upload-job-manager.js +565 -0
- package/src/upload-server-api.js +80 -0
package/package.json
CHANGED
package/src/chat-bridge.js
CHANGED
|
@@ -2,11 +2,14 @@
|
|
|
2
2
|
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
3
3
|
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
4
4
|
import { z } from 'zod';
|
|
5
|
-
import { createReadStream, existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs';
|
|
5
|
+
import { createReadStream, existsSync, mkdirSync, readFileSync, writeFileSync, statSync } from 'fs';
|
|
6
6
|
import { createHash, randomUUID } from 'crypto';
|
|
7
7
|
import path, { extname } from 'path';
|
|
8
|
+
import os from 'os';
|
|
8
9
|
import { recordUrlNarration } from './_vendor/video/recorder/index.js';
|
|
9
|
-
import { writeLocalFileToWorkspace } from './workspace-file-upload.js';
|
|
10
|
+
import { writeLocalFileToWorkspace, resolveWorkspaceFileUploadPlan } from './workspace-file-upload.js';
|
|
11
|
+
import { UploadJobManager } from './upload-job-manager.js';
|
|
12
|
+
import { createUploadServerApi } from './upload-server-api.js';
|
|
10
13
|
import { runRecordUrlNarrationTool } from './record-url-narration-tool.js';
|
|
11
14
|
import { runSubmitToLibraryTool } from './submit-to-library-tool.js';
|
|
12
15
|
import { runRenderTextToImageTool } from './tools/render-text-to-image.js';
|
|
@@ -843,6 +846,23 @@ const IS_HOST_AGENT = await resolveHostAgentFlag();
|
|
|
843
846
|
|
|
844
847
|
const server = new McpServer({ name: 'chat', version: '0.1.0' });
|
|
845
848
|
|
|
849
|
+
// ── Upload job manager (async + multipart workspace-file uploads) ────────────
|
|
850
|
+
// Per-agent jobDir so each chat-bridge sees only its own pending jobs. When the
|
|
851
|
+
// chat-bridge process restarts (skill bind, manual restart) it picks up any
|
|
852
|
+
// in-flight uploads from disk and resumes them; see docs/upload-pipeline-design.md.
|
|
853
|
+
const _uploadJobDir = path.join(
|
|
854
|
+
os.homedir(),
|
|
855
|
+
'.lightcone',
|
|
856
|
+
'upload-jobs',
|
|
857
|
+
AGENT_ID || 'default'
|
|
858
|
+
);
|
|
859
|
+
const uploadJobManager = new UploadJobManager({
|
|
860
|
+
jobDir: _uploadJobDir,
|
|
861
|
+
serverApi: createUploadServerApi({ api: (...args) => api(...args) }),
|
|
862
|
+
log: (msg) => console.error(`[chat-bridge:UploadJobManager] ${msg}`),
|
|
863
|
+
});
|
|
864
|
+
uploadJobManager.start();
|
|
865
|
+
|
|
846
866
|
// ── check_messages ────────────────────────────────────────────────────────────
|
|
847
867
|
server.tool('check_messages', 'Check for new messages in your inbox', {}, async () => {
|
|
848
868
|
const data = await api('GET', '/receive');
|
|
@@ -884,11 +904,16 @@ server.tool('search_messages', 'Search messages within a specific workspace. You
|
|
|
884
904
|
const data = await api('GET', `/search?${params}`);
|
|
885
905
|
if (!data.results || data.results.length === 0)
|
|
886
906
|
return { content: [{ type: 'text', text: 'No search results.' }] };
|
|
907
|
+
// Use full content rather than snippet — the snippet truncates at ~200 chars,
|
|
908
|
+
// which routinely cuts URLs / structured data in half (e.g. a forwarded list
|
|
909
|
+
// of mp.weixin.qq.com article URLs becomes useless if only the second one
|
|
910
|
+
// shrinks below its query-string). For search the agent's intent is usually
|
|
911
|
+
// "give me the original message verbatim so I can act on it", not a teaser.
|
|
887
912
|
const formatted = data.results.map((r, i) => [
|
|
888
913
|
`[${i + 1}] msg=${r.id} seq=${r.seq} time=${r.createdAt}`,
|
|
889
914
|
`workspace: #${r.workspaceName}`,
|
|
890
915
|
`sender: @${r.senderName}${r.senderType === 'agent' ? ' (agent)' : ''}`,
|
|
891
|
-
`content: ${r.snippet}`,
|
|
916
|
+
`content: ${r.content ?? r.snippet}`,
|
|
892
917
|
].join('\n')).join('\n\n');
|
|
893
918
|
return { content: [{ type: 'text', text: `## Search Results for "${trimmed}" (${data.results.length} results)\n\n${formatted}` }] };
|
|
894
919
|
} catch (err) {
|
|
@@ -1099,38 +1124,67 @@ server.tool('write_workspace', 'Write a file to the shared workspace. Use this t
|
|
|
1099
1124
|
return { content: [{ type: 'text', text: `Saved to workspace: ${path}` }] };
|
|
1100
1125
|
});
|
|
1101
1126
|
|
|
1102
|
-
server.tool('write_workspace_file',
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1127
|
+
server.tool('write_workspace_file',
|
|
1128
|
+
'Write a local file directly to the shared workspace. Prefer this over write_workspace for images/PDFs/binary files so large base64 content never enters the model context.\n\n'
|
|
1129
|
+
+ 'Binary uploads are ASYNCHRONOUS: the tool queues an upload job and returns immediately with a jobId. The actual PUT runs in the background with retry + COS multipart (no codex 120s timeout, no single-PUT failures). When you next call submit_to_library on the same workspace path, the server waits up to ~90s for the upload to finish before indexing; if it returns "upload_still_in_progress" just retry submit_to_library a few seconds later — DO NOT re-record/re-compose the video, the source file is fine.\n\n'
|
|
1130
|
+
+ 'Text files (.md/.txt/.json/etc.) still upload synchronously since they are tiny.',
|
|
1131
|
+
{
|
|
1132
|
+
file_path: z.string().describe('Local file path. Relative paths resolve from the current agent workspace. Absolute paths must stay inside the agent/workspace.'),
|
|
1133
|
+
path: z.string().describe('Destination path relative to workspace root, e.g. "artifacts/cover.png"'),
|
|
1134
|
+
},
|
|
1135
|
+
async ({ file_path, path: workspacePath }) => {
|
|
1106
1136
|
if (!currentWorkspaceId) return { content: [{ type: 'text', text: 'No workspace context.' }] };
|
|
1107
1137
|
const localPath = resolveLocalWorkspaceFile(file_path);
|
|
1108
|
-
const result = await writeLocalFileToWorkspace({
|
|
1109
|
-
localPath,
|
|
1110
|
-
workspacePath: path,
|
|
1111
|
-
workspaceId: currentWorkspaceId,
|
|
1112
|
-
readFileSyncFn: readFileSync,
|
|
1113
|
-
uploadWorkspaceMemory: async ({ workspacePath, workspaceId, content }) => {
|
|
1114
|
-
await api('PUT', `/workspace-memory?path=${encodeURIComponent(workspacePath)}&workspaceId=${encodeURIComponent(workspaceId)}`, { content });
|
|
1115
|
-
},
|
|
1116
|
-
presign: async ({ workspaceId, path: filePath, size, mime, sha256 }) => {
|
|
1117
|
-
return api('POST', '/storage/presign', { workspaceId, path: filePath, size, mime, sha256 });
|
|
1118
|
-
},
|
|
1119
|
-
confirmUpload: async ({ workspaceId, path: filePath, objectKey }) => {
|
|
1120
|
-
await api('POST', '/storage/confirm', { workspaceId, path: filePath, objectKey });
|
|
1121
|
-
},
|
|
1122
|
-
});
|
|
1123
1138
|
|
|
1124
|
-
const
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1139
|
+
const plan = resolveWorkspaceFileUploadPlan({ localPath, workspacePath });
|
|
1140
|
+
|
|
1141
|
+
// ── Text path: tiny, sync, unchanged ────────────────────────────────────
|
|
1142
|
+
if (plan.isText) {
|
|
1143
|
+
const result = await writeLocalFileToWorkspace({
|
|
1144
|
+
localPath,
|
|
1145
|
+
workspacePath,
|
|
1146
|
+
workspaceId: currentWorkspaceId,
|
|
1147
|
+
readFileSyncFn: readFileSync,
|
|
1148
|
+
uploadWorkspaceMemory: async ({ workspacePath: wp, workspaceId, content }) => {
|
|
1149
|
+
await api('PUT', `/workspace-memory?path=${encodeURIComponent(wp)}&workspaceId=${encodeURIComponent(workspaceId)}`, { content });
|
|
1150
|
+
},
|
|
1151
|
+
// Binary path below replaces these; keep these stubs in case the plan
|
|
1152
|
+
// misclassifies, to avoid TypeError mid-tool-call.
|
|
1153
|
+
presign: async () => { throw new Error('binary upload should not reach legacy presign'); },
|
|
1154
|
+
confirmUpload: async () => { throw new Error('binary upload should not reach legacy confirmUpload'); },
|
|
1155
|
+
});
|
|
1156
|
+
const bytes = Number.isFinite(result?.bytes) ? result.bytes : 0;
|
|
1157
|
+
return {
|
|
1158
|
+
content: [{
|
|
1159
|
+
type: 'text',
|
|
1160
|
+
text: `Saved local text file to workspace: ${workspacePath} (${plan.mime}, ${formatBytes(bytes)})`,
|
|
1161
|
+
}],
|
|
1162
|
+
};
|
|
1163
|
+
}
|
|
1164
|
+
|
|
1165
|
+
// ── Binary path: async enqueue ──────────────────────────────────────────
|
|
1166
|
+
try {
|
|
1167
|
+
const enq = await uploadJobManager.enqueue({
|
|
1168
|
+
workspaceId: currentWorkspaceId,
|
|
1169
|
+
agentId: AGENT_ID || null,
|
|
1170
|
+
localPath,
|
|
1171
|
+
workspacePath,
|
|
1172
|
+
mime: plan.mime,
|
|
1173
|
+
});
|
|
1174
|
+
const lines = [
|
|
1175
|
+
`Queued upload to workspace: ${workspacePath} (${plan.mime}, ${formatBytes(enq.size)})`,
|
|
1176
|
+
`mode=${enq.mode}${enq.mode === 'multipart' ? ` chunks=${enq.totalChunks}` : ''}`,
|
|
1177
|
+
`job_id=${enq.jobId}`,
|
|
1178
|
+
`status=pending`,
|
|
1179
|
+
`note=Upload runs in background. Call submit_to_library when ready — it waits up to ~90s for the upload to finish. If it returns "upload_still_in_progress", just retry submit_to_library a few seconds later. DO NOT re-record or re-compose.`,
|
|
1180
|
+
];
|
|
1181
|
+
return { content: [{ type: 'text', text: lines.join('\n') }] };
|
|
1182
|
+
} catch (err) {
|
|
1183
|
+
return {
|
|
1184
|
+
isError: true,
|
|
1185
|
+
content: [{ type: 'text', text: `write_workspace_file failed: ${err?.message ?? err}` }],
|
|
1186
|
+
};
|
|
1187
|
+
}
|
|
1134
1188
|
});
|
|
1135
1189
|
|
|
1136
1190
|
// ── skill_list ───────────────────────────────────────────────────────────────
|
|
@@ -1433,7 +1487,21 @@ server.tool('record_url_narration',
|
|
|
1433
1487
|
'Record a silent video of a URL by driving Chromium on an Xvfb display and capturing it with Playwright recordVideo, driven by a video plan; ffmpeg then transcodes the recording to mp4. Outputs a silent mp4 that can be passed to compose_video_v2 as a video-kind segment with an audio_path for narration.\n\nUse this as the canonical recording step for URL-narration videos. Falls back: if the page needs interactions outside the visual_action vocabulary (clicks, waits, OCR loops), use Monitor (Bash) with custom Playwright instead.\n\nRuntime requirements: this tool only works on a Linux daemon machine with Xvfb + Chromium + ffmpeg installed (ffmpeg is used to transcode the recording to mp4; no x11grab device support needed). macOS / Windows daemons will fail at startup.',
|
|
1434
1488
|
{
|
|
1435
1489
|
url: z.string().describe('Page URL to record'),
|
|
1436
|
-
plan: z.record(z.any()).describe(
|
|
1490
|
+
plan: z.record(z.any()).describe(
|
|
1491
|
+
'A video plan: an object with `phases` (or `sections`), each a "visual beat" with '
|
|
1492
|
+
+ '`action` (scroll_to_dwell / linear_scroll_during / scroll_back / hold / ...), a '
|
|
1493
|
+
+ 'target (`target_y` or `focus_region:[y1,y2]`) for scroll-type actions, and '
|
|
1494
|
+
+ '`dwell_ms` (how long to hold that beat — should match the segment\'s TTS duration).\n\n'
|
|
1495
|
+
+ 'For RECRUITMENT URLs (mp.weixin.qq.com / 校招 / 实习 / 岗位 content), each section MUST '
|
|
1496
|
+
+ 'also declare `target_y_content_label` — a short Chinese label describing what content '
|
|
1497
|
+
+ 'sits at that pixel y position on the page (e.g. "标题区" / "岗位信息卡片" / "公司介绍" / '
|
|
1498
|
+
+ '"届别说明"). Look at the take_page_screenshot output, find the y-pixel, and label it. '
|
|
1499
|
+
+ 'Labels matching forbidden regions ("二维码" / "扫码" / "投递入口" / "投递方式" / "联系方式" / '
|
|
1500
|
+
+ '"微信号" / "QR" / "阅读原文" / "外链") will cause the tool to refuse the recording — '
|
|
1501
|
+
+ 'recruitment content must NOT dwell on these areas (see fragments.md '
|
|
1502
|
+
+ 'frag.short.recruitment_url_mode_policy). Pick a different target_y in the 标题/岗位 '
|
|
1503
|
+
+ 'information area and rewrite that section.'
|
|
1504
|
+
),
|
|
1437
1505
|
output_path: z.string().optional().describe('Workspace-relative output mp4 path. Default tmp/wx3_video/recorded-{ts}.mp4'),
|
|
1438
1506
|
events_path: z.string().optional().describe('Workspace-relative events.json path. Default ${output_path}.events.json'),
|
|
1439
1507
|
viewport: z.object({
|
|
@@ -69,6 +69,74 @@ function assertPipelineCompliance(plan) {
|
|
|
69
69
|
}
|
|
70
70
|
}
|
|
71
71
|
|
|
72
|
+
// Forbidden region keywords for recruitment content. If a section's
|
|
73
|
+
// target_y_content_label matches, we refuse to record — the resulting video
|
|
74
|
+
// would show 投递入口 / 二维码 / contact info, which violates the recruitment
|
|
75
|
+
// content policy (see fragments.md frag.short.recruitment_url_mode_policy).
|
|
76
|
+
//
|
|
77
|
+
// Discovered after Task #25 v1 ended up dwelling on FunPlus's QR/投递 area:
|
|
78
|
+
// the agent's plan declared target_y=2180 with dwell_ms=8500 without checking
|
|
79
|
+
// what content lived at that pixel position. This is a prompt-level rule
|
|
80
|
+
// that's been ignored often enough that we enforce it at the tool layer.
|
|
81
|
+
const FORBIDDEN_REGION_PATTERNS = [
|
|
82
|
+
/二维码/, /扫码/, /扫一扫/,
|
|
83
|
+
/投递入口/, /投递方式/, /投递通道/, /投递渠道/, /报名入口/, /报名方式/,
|
|
84
|
+
/联系方式/, /联系人/, /微信号/, /\bWeChat\b/i, /\bQQ群\b/,
|
|
85
|
+
/阅读原文/, /外链/, /\bQR\b/i,
|
|
86
|
+
];
|
|
87
|
+
|
|
88
|
+
function isRecruitmentLikeUrl(url) {
|
|
89
|
+
// Conservative URL-based heuristic: mp.weixin.qq.com pages forwarding 招聘 /
|
|
90
|
+
// 校招 / 实习 / job content. Until we have content classification, treat
|
|
91
|
+
// mp.weixin.qq.com URLs as recruitment-class for safety — the cost of a
|
|
92
|
+
// mis-flag is "agent must add a label", not "recording fails permanently".
|
|
93
|
+
if (typeof url !== 'string') return false;
|
|
94
|
+
return /mp\.weixin\.qq\.com/.test(url);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
function describeForbiddenMatch(label) {
|
|
98
|
+
for (const pattern of FORBIDDEN_REGION_PATTERNS) {
|
|
99
|
+
if (pattern.test(label)) return pattern.source;
|
|
100
|
+
}
|
|
101
|
+
return null;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* For recruitment-class URLs, every plan section must declare what content
|
|
106
|
+
* sits at its target_y, and the label must NOT match the forbidden-region
|
|
107
|
+
* patterns. Returns null on pass, error message string on fail.
|
|
108
|
+
*/
|
|
109
|
+
function checkSafeRegionLabels({ url, plan }) {
|
|
110
|
+
if (!isRecruitmentLikeUrl(url)) return null;
|
|
111
|
+
const segments = planSegments(plan);
|
|
112
|
+
if (!segments) return null;
|
|
113
|
+
for (let i = 0; i < segments.length; i += 1) {
|
|
114
|
+
const seg = segments[i] ?? {};
|
|
115
|
+
const label = normalizeText(seg.target_y_content_label ?? seg.targetYContentLabel ?? '');
|
|
116
|
+
if (!label) {
|
|
117
|
+
return (
|
|
118
|
+
`record_url_narration: section[${i}] is missing required field `
|
|
119
|
+
+ `\`target_y_content_label\`. For recruitment URLs (mp.weixin.qq.com / `
|
|
120
|
+
+ `校招 / 实习等) you MUST label what content lives at target_y so the `
|
|
121
|
+
+ `tool can verify it is not 二维码/投递入口/联系方式. Look at the page `
|
|
122
|
+
+ `screenshot, find what is at target_y=${seg.target_y ?? '<unset>'}, `
|
|
123
|
+
+ `and add a short label like "标题区" / "岗位信息卡片" / "公司介绍".`
|
|
124
|
+
);
|
|
125
|
+
}
|
|
126
|
+
const match = describeForbiddenMatch(label);
|
|
127
|
+
if (match) {
|
|
128
|
+
return (
|
|
129
|
+
`record_url_narration: section[${i}] target_y=${seg.target_y ?? '?'} `
|
|
130
|
+
+ `is labeled "${label}", which matches a forbidden region pattern `
|
|
131
|
+
+ `/${match}/. Recruitment content must NOT dwell on 投递入口 / 二维码 / `
|
|
132
|
+
+ `联系方式 areas. Pick a different target_y inside the 标题区 / 岗位 `
|
|
133
|
+
+ `信息卡片 / 公司介绍 area and rewrite this section.`
|
|
134
|
+
);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
return null;
|
|
138
|
+
}
|
|
139
|
+
|
|
72
140
|
export function validateRecordUrlNarrationArgs(args = {}) {
|
|
73
141
|
const normalizedUrl = normalizeText(args.url);
|
|
74
142
|
if (!normalizedUrl) {
|
|
@@ -140,6 +208,18 @@ export async function runRecordUrlNarrationTool({
|
|
|
140
208
|
return toolError(`Error: ${error.message}`);
|
|
141
209
|
}
|
|
142
210
|
|
|
211
|
+
// Safe-region check for recruitment URLs — refuse plans that dwell on
|
|
212
|
+
// forbidden regions (二维码 / 投递入口 / 联系方式) before we even start
|
|
213
|
+
// Chromium. The agent must label each target_y with the content that lives
|
|
214
|
+
// there, and the labels are pattern-matched against a forbidden list.
|
|
215
|
+
const safeRegionError = checkSafeRegionLabels({
|
|
216
|
+
url: validatedInput.url,
|
|
217
|
+
plan: validatedInput.plan,
|
|
218
|
+
});
|
|
219
|
+
if (safeRegionError) {
|
|
220
|
+
return toolError(`Error: ${safeRegionError}`);
|
|
221
|
+
}
|
|
222
|
+
|
|
143
223
|
try {
|
|
144
224
|
const result = await runMandatoryLocalToolFn({
|
|
145
225
|
toolName: 'record_url_narration',
|
|
@@ -30,17 +30,18 @@ export async function runSubmitToLibraryTool({
|
|
|
30
30
|
'/content-library/submit',
|
|
31
31
|
buildSubmitToLibraryBody(args, currentWorkspaceId)
|
|
32
32
|
);
|
|
33
|
-
// Server returns 2xx + body.error for transient "still processing" cases
|
|
34
|
-
//
|
|
35
|
-
//
|
|
36
|
-
// treats any 2xx as success and returns the
|
|
37
|
-
// surface body.error here — otherwise the agent
|
|
38
|
-
// undefined and (because the literal "undefined"
|
|
39
|
-
// submit silently failed) tends to redo the entire
|
|
40
|
-
// retrying submit_to_library.
|
|
41
|
-
//
|
|
33
|
+
// Server returns 2xx + body.error for transient "still processing" cases:
|
|
34
|
+
// HTTP 202 + {error: 'upload_still_in_progress', message: '...'} when the
|
|
35
|
+
// async upload pipeline (see docs/upload-pipeline-design.md) hasn't
|
|
36
|
+
// finished. The HTTP client treats any 2xx as success and returns the
|
|
37
|
+
// body verbatim, so we must surface body.error here — otherwise the agent
|
|
38
|
+
// reads `data.itemId` as undefined and (because the literal "undefined"
|
|
39
|
+
// looks like proof the submit silently failed) tends to redo the entire
|
|
40
|
+
// video instead of just retrying submit_to_library. Prefer body.message
|
|
41
|
+
// when present (rich human-readable retry hint); fall back to body.error.
|
|
42
42
|
if (data && typeof data === 'object' && typeof data.error === 'string' && data.error.trim()) {
|
|
43
|
-
|
|
43
|
+
const detail = (typeof data.message === 'string' && data.message.trim()) ? data.message : data.error;
|
|
44
|
+
return toolError(`submit_to_library not ready: ${detail}. Retry submit_to_library with the same video_path in a few seconds — do NOT re-record or re-compose the video.`);
|
|
44
45
|
}
|
|
45
46
|
if (!data || typeof data.itemId !== 'string' || !data.itemId) {
|
|
46
47
|
return toolError(`submit_to_library returned no itemId. Server response: ${JSON.stringify(data).slice(0, 300)}. Retry submit_to_library with the same video_path — do NOT re-record or re-compose.`);
|
|
@@ -0,0 +1,565 @@
|
|
|
1
|
+
// Daemon-owned upload job manager.
|
|
2
|
+
//
|
|
3
|
+
// Decouples the agent-facing `write_workspace_file` tool from the actual
|
|
4
|
+
// PUT-to-COS round-trip so:
|
|
5
|
+
// - the tool returns in <50ms (just enqueues a job) rather than holding the
|
|
6
|
+
// codex/claude tool slot for minutes
|
|
7
|
+
// - PUT failures are retried with exponential backoff
|
|
8
|
+
// - large files use COS multipart so a single dropped chunk doesn't restart
|
|
9
|
+
// the whole upload
|
|
10
|
+
// - daemon restarts pick up in-flight jobs from disk and resume
|
|
11
|
+
//
|
|
12
|
+
// Out of scope for this module: the actual COS API calls. Those live behind
|
|
13
|
+
// the injected `serverApi` interface (M3 implements the server endpoints, M4
|
|
14
|
+
// wires this manager to it).
|
|
15
|
+
//
|
|
16
|
+
// Schema and state machine: see docs/upload-pipeline-design.md.
|
|
17
|
+
|
|
18
|
+
import {
|
|
19
|
+
readFileSync, writeFileSync, readdirSync, statSync,
|
|
20
|
+
promises as fsPromises,
|
|
21
|
+
createReadStream,
|
|
22
|
+
mkdirSync, renameSync,
|
|
23
|
+
} from 'node:fs';
|
|
24
|
+
import { open as fsOpen } from 'node:fs/promises';
|
|
25
|
+
import path from 'node:path';
|
|
26
|
+
import crypto, { randomUUID } from 'node:crypto';
|
|
27
|
+
import os from 'node:os';
|
|
28
|
+
|
|
29
|
+
export const SCHEMA_VERSION = 1;
|
|
30
|
+
export const DEFAULT_CHUNK_SIZE = 8 * 1024 * 1024; // 8 MB
|
|
31
|
+
export const MULTIPART_THRESHOLD = 5 * 1024 * 1024; // COS minimum part size
|
|
32
|
+
export const MAX_JOB_ATTEMPTS = 5;
|
|
33
|
+
export const DEFAULT_TICK_INTERVAL_MS = 2_000;
|
|
34
|
+
export const PART_RETRY_ATTEMPTS = 3;
|
|
35
|
+
export const PART_RETRY_BASE_MS = 1_000; // 1s, 3s, 9s
|
|
36
|
+
export const TERMINAL_JOB_TTL_MS = 7 * 24 * 3600 * 1000; // sweep done/dead_letter after 7 days
|
|
37
|
+
export const HOUSEKEEPING_INTERVAL_MS = 6 * 3600 * 1000; // run housekeeping every 6h
|
|
38
|
+
// Per-PUT timeout — Node's fetch has no overall request timeout. Without this
|
|
39
|
+
// a stalled COS connection wedges the chunk loop forever (observed during the
|
|
40
|
+
// first Task #25 upload: chunk 1 PUT hung 7+ minutes with no progress, no
|
|
41
|
+
// error). 5 minutes covers slow networks for an 8MB chunk (~25kB/s floor)
|
|
42
|
+
// while still letting failures surface to the chunk-level retry loop.
|
|
43
|
+
export const PUT_REQUEST_TIMEOUT_MS = 5 * 60 * 1000;
|
|
44
|
+
|
|
45
|
+
function nowIso() { return new Date().toISOString(); }
|
|
46
|
+
|
|
47
|
+
async function sha256OfFile(localPath) {
|
|
48
|
+
return new Promise((resolve, reject) => {
|
|
49
|
+
const h = crypto.createHash('sha256');
|
|
50
|
+
const stream = createReadStream(localPath);
|
|
51
|
+
stream.on('data', chunk => h.update(chunk));
|
|
52
|
+
stream.on('end', () => resolve(h.digest('hex')));
|
|
53
|
+
stream.on('error', reject);
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// Job-level backoff: attempts²×30s.
|
|
58
|
+
// 1st retry: +30s, 2nd: +120s, 3rd: +270s, 4th: +480s.
|
|
59
|
+
export function jobBackoffMs(attempts) {
|
|
60
|
+
return attempts * attempts * 30_000;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Chunk-level backoff: 1s, 3s, 9s.
|
|
64
|
+
function partBackoffMs(attempt) {
|
|
65
|
+
return PART_RETRY_BASE_MS * (3 ** (attempt - 1));
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function sleep(ms) { return new Promise(r => setTimeout(r, ms)); }
|
|
69
|
+
|
|
70
|
+
async function fetchWithTimeout(fetchFn, url, init, timeoutMs) {
|
|
71
|
+
const controller = new AbortController();
|
|
72
|
+
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
73
|
+
try {
|
|
74
|
+
return await fetchFn(url, { ...init, signal: controller.signal });
|
|
75
|
+
} catch (err) {
|
|
76
|
+
if (err?.name === 'AbortError' || controller.signal.aborted) {
|
|
77
|
+
const wrapped = new Error(`PUT timed out after ${Math.round(timeoutMs / 1000)}s (COS unresponsive)`);
|
|
78
|
+
wrapped.cause = err;
|
|
79
|
+
throw wrapped;
|
|
80
|
+
}
|
|
81
|
+
throw err;
|
|
82
|
+
} finally {
|
|
83
|
+
clearTimeout(timer);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* UploadJobManager — singleton in daemon process.
|
|
89
|
+
*
|
|
90
|
+
* const mgr = new UploadJobManager({ serverApi });
|
|
91
|
+
* mgr.start();
|
|
92
|
+
* const { jobId } = await mgr.enqueue({ workspaceId, agentId, localPath, workspacePath, mime });
|
|
93
|
+
* // job runs in background; later:
|
|
94
|
+
* mgr.getStatus(jobId);
|
|
95
|
+
* // or shutting down:
|
|
96
|
+
* mgr.stop();
|
|
97
|
+
*
|
|
98
|
+
* serverApi shape (M3 implements):
|
|
99
|
+
* presignSingle({ workspaceId, path, size, mime, sha256 }) → { objectKey, uploadUrl, method?, headers? }
|
|
100
|
+
* confirmSingle({ workspaceId, path, objectKey }) → { ok: true }
|
|
101
|
+
* presignMultipart({ workspaceId, path, size, mime, sha256 }) → { objectKey, cosUploadId }
|
|
102
|
+
* presignPart({ workspaceId, objectKey, cosUploadId, partNumber }) → { url, method?, headers? }
|
|
103
|
+
* completeMultipart({ workspaceId, path, objectKey, cosUploadId, parts, size, sha256 }) → { ok: true }
|
|
104
|
+
* abortMultipart({ workspaceId, path, objectKey, cosUploadId }) → { ok: true }
|
|
105
|
+
*/
|
|
106
|
+
export class UploadJobManager {
|
|
107
|
+
constructor({
|
|
108
|
+
jobDir = path.join(os.homedir(), '.lightcone', 'upload-jobs'),
|
|
109
|
+
serverApi,
|
|
110
|
+
fetchFn = globalThis.fetch,
|
|
111
|
+
nowFn = () => Date.now(),
|
|
112
|
+
log = (msg) => console.error(`[UploadJobManager] ${msg}`),
|
|
113
|
+
tickIntervalMs = DEFAULT_TICK_INTERVAL_MS,
|
|
114
|
+
chunkSize = DEFAULT_CHUNK_SIZE,
|
|
115
|
+
multipartThreshold = MULTIPART_THRESHOLD,
|
|
116
|
+
} = {}) {
|
|
117
|
+
if (!serverApi) throw new Error('UploadJobManager: serverApi is required');
|
|
118
|
+
this.jobDir = jobDir;
|
|
119
|
+
this.serverApi = serverApi;
|
|
120
|
+
this.fetchFn = fetchFn;
|
|
121
|
+
this.nowFn = nowFn;
|
|
122
|
+
this.log = log;
|
|
123
|
+
this.tickIntervalMs = tickIntervalMs;
|
|
124
|
+
this.chunkSize = chunkSize;
|
|
125
|
+
this.multipartThreshold = multipartThreshold;
|
|
126
|
+
|
|
127
|
+
this._tickInterval = null;
|
|
128
|
+
this._housekeepingInterval = null;
|
|
129
|
+
this._stopping = false;
|
|
130
|
+
this._activeJobs = new Set(); // jobIds currently advancing
|
|
131
|
+
this._pathLocks = new Map(); // `${workspaceId}|${workspacePath}` → jobId
|
|
132
|
+
|
|
133
|
+
mkdirSync(this.jobDir, { recursive: true });
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// ─── public API ──────────────────────────────────────────────────────────
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* Stat + sha256 the file, choose mode (single vs multipart), persist a
|
|
140
|
+
* pending job, and schedule a tick. Throws synchronously on path-lock
|
|
141
|
+
* conflict or missing/unreadable file.
|
|
142
|
+
*/
|
|
143
|
+
async enqueue({ workspaceId, agentId = null, localPath, workspacePath, mime = 'application/octet-stream' }) {
|
|
144
|
+
if (!workspaceId) throw new Error('enqueue: workspaceId required');
|
|
145
|
+
if (!localPath) throw new Error('enqueue: localPath required');
|
|
146
|
+
if (!workspacePath) throw new Error('enqueue: workspacePath required');
|
|
147
|
+
|
|
148
|
+
const pathKey = `${workspaceId}|${workspacePath}`;
|
|
149
|
+
const existing = this._pathLocks.get(pathKey);
|
|
150
|
+
if (existing) {
|
|
151
|
+
throw new Error(`upload_path_locked: another upload to ${workspacePath} is in progress (jobId=${existing})`);
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
let st;
|
|
155
|
+
try { st = statSync(localPath); }
|
|
156
|
+
catch (err) { throw new Error(`localPath not readable: ${err.message}`); }
|
|
157
|
+
const size = st.size;
|
|
158
|
+
if (!Number.isFinite(size) || size <= 0) {
|
|
159
|
+
throw new Error(`localPath has invalid size: ${size}`);
|
|
160
|
+
}
|
|
161
|
+
const mtimeMs = st.mtimeMs;
|
|
162
|
+
const sha256 = await sha256OfFile(localPath);
|
|
163
|
+
|
|
164
|
+
const uploadMode = size >= this.multipartThreshold ? 'multipart' : 'single';
|
|
165
|
+
const totalChunks = uploadMode === 'multipart' ? Math.ceil(size / this.chunkSize) : 1;
|
|
166
|
+
|
|
167
|
+
const job = {
|
|
168
|
+
schemaVersion: SCHEMA_VERSION,
|
|
169
|
+
jobId: randomUUID(),
|
|
170
|
+
createdAt: nowIso(),
|
|
171
|
+
updatedAt: nowIso(),
|
|
172
|
+
agentId,
|
|
173
|
+
workspaceId,
|
|
174
|
+
localPath,
|
|
175
|
+
workspacePath,
|
|
176
|
+
mime,
|
|
177
|
+
objectKey: null,
|
|
178
|
+
size,
|
|
179
|
+
mtimeMs,
|
|
180
|
+
sha256,
|
|
181
|
+
uploadMode,
|
|
182
|
+
chunkSize: this.chunkSize,
|
|
183
|
+
totalChunks,
|
|
184
|
+
cosUploadId: null,
|
|
185
|
+
doneParts: [],
|
|
186
|
+
status: 'pending',
|
|
187
|
+
attempts: 0,
|
|
188
|
+
lastError: null,
|
|
189
|
+
lastErrorAt: null,
|
|
190
|
+
nextAttemptAt: this.nowFn(),
|
|
191
|
+
};
|
|
192
|
+
|
|
193
|
+
this._persist(job);
|
|
194
|
+
this._pathLocks.set(pathKey, job.jobId);
|
|
195
|
+
|
|
196
|
+
// Best-effort immediate tick so the very first job doesn't wait for the interval.
|
|
197
|
+
setImmediate(() => this._tick().catch(err => this.log(`tick (post-enqueue) failed: ${err.message}`)));
|
|
198
|
+
|
|
199
|
+
return {
|
|
200
|
+
jobId: job.jobId,
|
|
201
|
+
mode: uploadMode,
|
|
202
|
+
totalChunks,
|
|
203
|
+
status: 'pending',
|
|
204
|
+
size,
|
|
205
|
+
};
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
getStatus(jobId) {
|
|
209
|
+
const job = this._loadById(jobId);
|
|
210
|
+
return job ? this._publicState(job) : null;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
start() {
|
|
214
|
+
if (this._tickInterval) return;
|
|
215
|
+
this._stopping = false;
|
|
216
|
+
// Rebuild path locks from on-disk jobs so concurrent enqueue still respects them.
|
|
217
|
+
for (const job of this._listJobs()) {
|
|
218
|
+
if (job.status === 'pending' || job.status === 'uploading') {
|
|
219
|
+
const pathKey = `${job.workspaceId}|${job.workspacePath}`;
|
|
220
|
+
this._pathLocks.set(pathKey, job.jobId);
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
this._tickInterval = setInterval(() => {
|
|
224
|
+
this._tick().catch(err => this.log(`tick failed: ${err.message}`));
|
|
225
|
+
}, this.tickIntervalMs);
|
|
226
|
+
// Run housekeeping immediately + on a slow schedule.
|
|
227
|
+
this._housekeep();
|
|
228
|
+
this._housekeepingInterval = setInterval(() => {
|
|
229
|
+
this._housekeep();
|
|
230
|
+
}, HOUSEKEEPING_INTERVAL_MS);
|
|
231
|
+
this.log(`started (jobDir=${this.jobDir}, tick=${this.tickIntervalMs}ms)`);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
/**
|
|
235
|
+
* Signal stop. In-flight `_advance` calls notice via `_stopping` flag and
|
|
236
|
+
* exit between chunks; partial state is persisted so the next start() can
|
|
237
|
+
* resume. Does NOT abort the underlying fetch in progress — a single chunk
|
|
238
|
+
* may still complete after stop returns.
|
|
239
|
+
*/
|
|
240
|
+
stop() {
|
|
241
|
+
this._stopping = true;
|
|
242
|
+
if (this._tickInterval) clearInterval(this._tickInterval);
|
|
243
|
+
this._tickInterval = null;
|
|
244
|
+
if (this._housekeepingInterval) clearInterval(this._housekeepingInterval);
|
|
245
|
+
this._housekeepingInterval = null;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
/**
|
|
249
|
+
* Drop terminal (done / dead_letter) job files older than TERMINAL_JOB_TTL_MS.
|
|
250
|
+
* Keeps recent ones around for observability / debugging.
|
|
251
|
+
*/
|
|
252
|
+
_housekeep() {
|
|
253
|
+
try {
|
|
254
|
+
const now = this.nowFn();
|
|
255
|
+
let pruned = 0;
|
|
256
|
+
for (const job of this._listJobs()) {
|
|
257
|
+
if (job.status !== 'done' && job.status !== 'dead_letter') continue;
|
|
258
|
+
const updatedAt = job.updatedAt ? Date.parse(job.updatedAt) : 0;
|
|
259
|
+
if (!Number.isFinite(updatedAt)) continue;
|
|
260
|
+
if (now - updatedAt < TERMINAL_JOB_TTL_MS) continue;
|
|
261
|
+
try {
|
|
262
|
+
// unlink via fsPromises is fine to do sync-ish since housekeeping is rare
|
|
263
|
+
// and the file is tiny.
|
|
264
|
+
fsPromises.unlink(this._jobPath(job.jobId)).catch(() => {});
|
|
265
|
+
pruned += 1;
|
|
266
|
+
} catch { /* ignore */ }
|
|
267
|
+
}
|
|
268
|
+
if (pruned > 0) this.log(`housekeeping: pruned ${pruned} terminal job(s) older than ${TERMINAL_JOB_TTL_MS / 86_400_000}d`);
|
|
269
|
+
} catch (err) {
|
|
270
|
+
this.log(`housekeeping failed: ${err?.message ?? err}`);
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
/**
|
|
275
|
+
* Test helper: wait until no jobs are pending/uploading or until timeout.
|
|
276
|
+
*/
|
|
277
|
+
async waitAllSettled({ timeoutMs = 30_000 } = {}) {
|
|
278
|
+
const start = this.nowFn();
|
|
279
|
+
while (this.nowFn() - start < timeoutMs) {
|
|
280
|
+
const jobs = this._listJobs();
|
|
281
|
+
const inFlight = jobs.some(j => j.status === 'pending' || j.status === 'uploading');
|
|
282
|
+
if (!inFlight && this._activeJobs.size === 0) return;
|
|
283
|
+
await sleep(50);
|
|
284
|
+
}
|
|
285
|
+
throw new Error('waitAllSettled: timeout');
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
// ─── internal ────────────────────────────────────────────────────────────
|
|
289
|
+
|
|
290
|
+
async _tick() {
|
|
291
|
+
if (this._stopping) return;
|
|
292
|
+
const jobs = this._listJobs();
|
|
293
|
+
const now = this.nowFn();
|
|
294
|
+
for (const job of jobs) {
|
|
295
|
+
if (this._stopping) break;
|
|
296
|
+
if (job.status === 'done' || job.status === 'dead_letter') continue;
|
|
297
|
+
if (this._activeJobs.has(job.jobId)) continue;
|
|
298
|
+
if (job.nextAttemptAt && job.nextAttemptAt > now) continue;
|
|
299
|
+
this._activeJobs.add(job.jobId);
|
|
300
|
+
this._advance(job)
|
|
301
|
+
.catch(err => this.log(`advance(${job.jobId}) crashed: ${err.message}`))
|
|
302
|
+
.finally(() => this._activeJobs.delete(job.jobId));
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
async _advance(job) {
|
|
307
|
+
try {
|
|
308
|
+
// Local-file integrity check before each attempt.
|
|
309
|
+
let st;
|
|
310
|
+
try { st = statSync(job.localPath); }
|
|
311
|
+
catch (err) {
|
|
312
|
+
return this._markDeadLetter(job, `local_file_gone:${err.code ?? err.message}`);
|
|
313
|
+
}
|
|
314
|
+
if (st.size !== job.size) {
|
|
315
|
+
return this._markDeadLetter(job, `local_file_changed: declared=${job.size}, now=${st.size}`);
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
if (job.status !== 'uploading') {
|
|
319
|
+
job.status = 'uploading';
|
|
320
|
+
this._persist(job);
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
if (job.uploadMode === 'single') {
|
|
324
|
+
await this._advanceSingle(job);
|
|
325
|
+
} else {
|
|
326
|
+
await this._advanceMultipart(job);
|
|
327
|
+
}
|
|
328
|
+
} catch (err) {
|
|
329
|
+
this._recordFailure(job, err);
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
async _advanceSingle(job) {
|
|
334
|
+
const presign = await this.serverApi.presignSingle({
|
|
335
|
+
workspaceId: job.workspaceId,
|
|
336
|
+
path: job.workspacePath,
|
|
337
|
+
size: job.size,
|
|
338
|
+
mime: job.mime,
|
|
339
|
+
sha256: job.sha256,
|
|
340
|
+
});
|
|
341
|
+
job.objectKey = presign.objectKey;
|
|
342
|
+
this._persist(job);
|
|
343
|
+
|
|
344
|
+
const fileBuf = await fsPromises.readFile(job.localPath);
|
|
345
|
+
const resp = await fetchWithTimeout(this.fetchFn, presign.uploadUrl, {
|
|
346
|
+
method: presign.method ?? 'PUT',
|
|
347
|
+
headers: {
|
|
348
|
+
'Content-Type': job.mime,
|
|
349
|
+
'Content-Length': String(job.size),
|
|
350
|
+
...(presign.headers ?? {}),
|
|
351
|
+
},
|
|
352
|
+
body: fileBuf,
|
|
353
|
+
}, PUT_REQUEST_TIMEOUT_MS);
|
|
354
|
+
if (!resp.ok) {
|
|
355
|
+
const text = await resp.text().catch(() => '');
|
|
356
|
+
throw new Error(`single PUT failed: HTTP ${resp.status} ${text.slice(0, 200)}`);
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
await this.serverApi.confirmSingle({
|
|
360
|
+
workspaceId: job.workspaceId,
|
|
361
|
+
path: job.workspacePath,
|
|
362
|
+
objectKey: job.objectKey,
|
|
363
|
+
});
|
|
364
|
+
|
|
365
|
+
this._markDone(job);
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
async _advanceMultipart(job) {
|
|
369
|
+
if (!job.cosUploadId) {
|
|
370
|
+
const init = await this.serverApi.presignMultipart({
|
|
371
|
+
workspaceId: job.workspaceId,
|
|
372
|
+
path: job.workspacePath,
|
|
373
|
+
size: job.size,
|
|
374
|
+
mime: job.mime,
|
|
375
|
+
sha256: job.sha256,
|
|
376
|
+
});
|
|
377
|
+
job.objectKey = init.objectKey;
|
|
378
|
+
job.cosUploadId = init.cosUploadId;
|
|
379
|
+
this._persist(job);
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
const doneSet = new Set(job.doneParts.map(p => p.partNumber));
|
|
383
|
+
const fh = await fsOpen(job.localPath, 'r');
|
|
384
|
+
try {
|
|
385
|
+
for (let i = 1; i <= job.totalChunks; i++) {
|
|
386
|
+
if (this._stopping) return; // graceful shutdown mid-job
|
|
387
|
+
if (doneSet.has(i)) continue;
|
|
388
|
+
|
|
389
|
+
const offset = (i - 1) * job.chunkSize;
|
|
390
|
+
const remaining = job.size - offset;
|
|
391
|
+
const partLen = Math.min(job.chunkSize, remaining);
|
|
392
|
+
const buf = Buffer.alloc(partLen);
|
|
393
|
+
await fh.read(buf, 0, partLen, offset);
|
|
394
|
+
|
|
395
|
+
const etag = await this._uploadPartWithRetry(job, i, buf);
|
|
396
|
+
job.doneParts.push({ partNumber: i, etag });
|
|
397
|
+
this._persist(job);
|
|
398
|
+
}
|
|
399
|
+
} finally {
|
|
400
|
+
await fh.close().catch(() => {});
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
if (this._stopping) return;
|
|
404
|
+
|
|
405
|
+
await this.serverApi.completeMultipart({
|
|
406
|
+
workspaceId: job.workspaceId,
|
|
407
|
+
path: job.workspacePath,
|
|
408
|
+
objectKey: job.objectKey,
|
|
409
|
+
cosUploadId: job.cosUploadId,
|
|
410
|
+
parts: job.doneParts,
|
|
411
|
+
size: job.size,
|
|
412
|
+
sha256: job.sha256,
|
|
413
|
+
});
|
|
414
|
+
|
|
415
|
+
this._markDone(job);
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
async _uploadPartWithRetry(job, partNumber, buf) {
|
|
419
|
+
let lastErr;
|
|
420
|
+
for (let attempt = 1; attempt <= PART_RETRY_ATTEMPTS; attempt++) {
|
|
421
|
+
if (this._stopping) throw new Error('stopping');
|
|
422
|
+
try {
|
|
423
|
+
const presign = await this.serverApi.presignPart({
|
|
424
|
+
workspaceId: job.workspaceId,
|
|
425
|
+
objectKey: job.objectKey,
|
|
426
|
+
cosUploadId: job.cosUploadId,
|
|
427
|
+
partNumber,
|
|
428
|
+
});
|
|
429
|
+
const resp = await fetchWithTimeout(this.fetchFn, presign.url, {
|
|
430
|
+
method: presign.method ?? 'PUT',
|
|
431
|
+
headers: {
|
|
432
|
+
'Content-Length': String(buf.length),
|
|
433
|
+
...(presign.headers ?? {}),
|
|
434
|
+
},
|
|
435
|
+
body: buf,
|
|
436
|
+
}, PUT_REQUEST_TIMEOUT_MS);
|
|
437
|
+
if (!resp.ok) {
|
|
438
|
+
const text = await resp.text().catch(() => '');
|
|
439
|
+
throw new Error(`HTTP ${resp.status} ${text.slice(0, 200)}`);
|
|
440
|
+
}
|
|
441
|
+
const etag = resp.headers.get?.('etag') ?? resp.headers.get?.('ETag') ?? presign.etag ?? '';
|
|
442
|
+
if (!etag) throw new Error(`PUT part ${partNumber} missing etag`);
|
|
443
|
+
return etag;
|
|
444
|
+
} catch (err) {
|
|
445
|
+
lastErr = err;
|
|
446
|
+
this.log(`part ${partNumber} attempt ${attempt}/${PART_RETRY_ATTEMPTS} failed: ${err.message}`);
|
|
447
|
+
if (attempt < PART_RETRY_ATTEMPTS) {
|
|
448
|
+
await sleep(partBackoffMs(attempt));
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
}
|
|
452
|
+
throw new Error(`part ${partNumber} exhausted ${PART_RETRY_ATTEMPTS} retries: ${lastErr?.message ?? lastErr}`);
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
_markDone(job) {
|
|
456
|
+
job.status = 'done';
|
|
457
|
+
job.updatedAt = nowIso();
|
|
458
|
+
job.lastError = null;
|
|
459
|
+
job.lastErrorAt = null;
|
|
460
|
+
job.nextAttemptAt = null;
|
|
461
|
+
this._persist(job);
|
|
462
|
+
this._releaseLock(job);
|
|
463
|
+
this.log(`job ${job.jobId} done (${job.workspacePath}, ${job.size} bytes, ${job.uploadMode})`);
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
_markDeadLetter(job, reason) {
|
|
467
|
+
// Best-effort COS cleanup so we don't leak storage cost on aborted multipart.
|
|
468
|
+
if (job.uploadMode === 'multipart' && job.cosUploadId && job.objectKey) {
|
|
469
|
+
Promise.resolve(this.serverApi.abortMultipart({
|
|
470
|
+
workspaceId: job.workspaceId,
|
|
471
|
+
path: job.workspacePath,
|
|
472
|
+
objectKey: job.objectKey,
|
|
473
|
+
cosUploadId: job.cosUploadId,
|
|
474
|
+
})).catch(err => this.log(`abort_multipart for ${job.jobId} failed: ${err.message}`));
|
|
475
|
+
}
|
|
476
|
+
job.status = 'dead_letter';
|
|
477
|
+
job.lastError = String(reason);
|
|
478
|
+
job.lastErrorAt = nowIso();
|
|
479
|
+
job.updatedAt = nowIso();
|
|
480
|
+
job.nextAttemptAt = null;
|
|
481
|
+
this._persist(job);
|
|
482
|
+
this._releaseLock(job);
|
|
483
|
+
this.log(`job ${job.jobId} dead_letter: ${reason}`);
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
_recordFailure(job, err) {
|
|
487
|
+
job.attempts = (job.attempts ?? 0) + 1;
|
|
488
|
+
job.lastError = String(err?.message ?? err);
|
|
489
|
+
job.lastErrorAt = nowIso();
|
|
490
|
+
job.updatedAt = nowIso();
|
|
491
|
+
if (job.attempts >= MAX_JOB_ATTEMPTS) {
|
|
492
|
+
this._markDeadLetter(job, `max_attempts_exhausted: ${job.lastError}`);
|
|
493
|
+
return;
|
|
494
|
+
}
|
|
495
|
+
job.status = 'pending';
|
|
496
|
+
const backoff = jobBackoffMs(job.attempts);
|
|
497
|
+
job.nextAttemptAt = this.nowFn() + backoff;
|
|
498
|
+
this._persist(job);
|
|
499
|
+
this.log(`job ${job.jobId} attempt ${job.attempts} failed: ${job.lastError}; next retry in ${Math.round(backoff / 1000)}s`);
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
_releaseLock(job) {
|
|
503
|
+
const pathKey = `${job.workspaceId}|${job.workspacePath}`;
|
|
504
|
+
if (this._pathLocks.get(pathKey) === job.jobId) {
|
|
505
|
+
this._pathLocks.delete(pathKey);
|
|
506
|
+
}
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
// ─── persistence ─────────────────────────────────────────────────────────
|
|
510
|
+
|
|
511
|
+
_jobPath(jobId) {
|
|
512
|
+
return path.join(this.jobDir, `${jobId}.json`);
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
_persist(job) {
|
|
516
|
+
job.updatedAt = nowIso();
|
|
517
|
+
const dest = this._jobPath(job.jobId);
|
|
518
|
+
const tmp = `${dest}.tmp`;
|
|
519
|
+
writeFileSync(tmp, JSON.stringify(job, null, 2));
|
|
520
|
+
renameSync(tmp, dest); // atomic on POSIX
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
_loadById(jobId) {
|
|
524
|
+
try {
|
|
525
|
+
const text = readFileSync(this._jobPath(jobId), 'utf8');
|
|
526
|
+
const job = JSON.parse(text);
|
|
527
|
+
if (job.schemaVersion !== SCHEMA_VERSION) {
|
|
528
|
+
this.log(`job ${jobId}: unsupported schemaVersion ${job.schemaVersion}, ignored`);
|
|
529
|
+
return null;
|
|
530
|
+
}
|
|
531
|
+
return job;
|
|
532
|
+
} catch { return null; }
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
_listJobs() {
|
|
536
|
+
let names;
|
|
537
|
+
try { names = readdirSync(this.jobDir); }
|
|
538
|
+
catch { return []; }
|
|
539
|
+
const out = [];
|
|
540
|
+
for (const name of names) {
|
|
541
|
+
if (!name.endsWith('.json') || name.endsWith('.tmp.json')) continue;
|
|
542
|
+
const jobId = name.slice(0, -5);
|
|
543
|
+
const job = this._loadById(jobId);
|
|
544
|
+
if (job) out.push(job);
|
|
545
|
+
}
|
|
546
|
+
return out;
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
_publicState(job) {
|
|
550
|
+
return {
|
|
551
|
+
jobId: job.jobId,
|
|
552
|
+
status: job.status,
|
|
553
|
+
mode: job.uploadMode,
|
|
554
|
+
size: job.size,
|
|
555
|
+
progress: job.uploadMode === 'multipart'
|
|
556
|
+
? { donePartCount: job.doneParts.length, totalChunks: job.totalChunks }
|
|
557
|
+
: { donePartCount: job.status === 'done' ? 1 : 0, totalChunks: 1 },
|
|
558
|
+
attempts: job.attempts,
|
|
559
|
+
lastError: job.lastError,
|
|
560
|
+
lastErrorAt: job.lastErrorAt,
|
|
561
|
+
nextAttemptAt: job.nextAttemptAt,
|
|
562
|
+
objectKey: job.objectKey,
|
|
563
|
+
};
|
|
564
|
+
}
|
|
565
|
+
}
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
// Thin daemon-side wrapper around the server's /storage/* endpoints.
|
|
2
|
+
//
|
|
3
|
+
// Implements the `serverApi` interface consumed by UploadJobManager. Single
|
|
4
|
+
// place that translates the abstract { presignSingle / confirmSingle /
|
|
5
|
+
// presignMultipart / presignPart / completeMultipart / abortMultipart } calls
|
|
6
|
+
// into HTTP requests against the lightcone server's internal/agent/.../storage/*
|
|
7
|
+
// endpoints. The actual HTTP plumbing — auth headers, governance, retries —
|
|
8
|
+
// is delegated to the `api` function passed in (typically chat-bridge's `api`).
|
|
9
|
+
//
|
|
10
|
+
// Why split this out: keeps UploadJobManager pure (no HTTP knowledge) and lets
|
|
11
|
+
// us write the manager's tests entirely against an injected mock.
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* @param {Object} deps
|
|
15
|
+
* @param {(method: string, path: string, body?: unknown) => Promise<unknown>} deps.api
|
|
16
|
+
* HTTP helper that POSTs to `/internal/agent/<agentId>/<path>` and returns
|
|
17
|
+
* the JSON body. Throws on non-2xx.
|
|
18
|
+
*/
|
|
19
|
+
export function createUploadServerApi({ api }) {
|
|
20
|
+
if (typeof api !== 'function') {
|
|
21
|
+
throw new Error('createUploadServerApi: api function is required');
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
return {
|
|
25
|
+
async presignSingle({ workspaceId, path, size, mime, sha256 }) {
|
|
26
|
+
// Existing endpoint, reused as-is. Returns { objectKey, uploadUrl, method?, headers? }.
|
|
27
|
+
const data = await api('POST', '/storage/presign', {
|
|
28
|
+
workspaceId, path, size, mime, sha256,
|
|
29
|
+
});
|
|
30
|
+
// /storage/presign returns { uploadUrl, method, headers, objectKey, alreadyExists }
|
|
31
|
+
// for the actual presigned PUT URL flow; surface them in the shape the
|
|
32
|
+
// manager expects.
|
|
33
|
+
return {
|
|
34
|
+
objectKey: data.objectKey,
|
|
35
|
+
uploadUrl: data.uploadUrl,
|
|
36
|
+
method: data.method ?? 'PUT',
|
|
37
|
+
headers: data.headers ?? {},
|
|
38
|
+
alreadyExists: !!data.alreadyExists,
|
|
39
|
+
};
|
|
40
|
+
},
|
|
41
|
+
|
|
42
|
+
async confirmSingle({ workspaceId, path, objectKey }) {
|
|
43
|
+
return api('POST', '/storage/confirm', { workspaceId, path, objectKey });
|
|
44
|
+
},
|
|
45
|
+
|
|
46
|
+
async presignMultipart({ workspaceId, path, size, mime, sha256 }) {
|
|
47
|
+
const data = await api('POST', '/storage/presign-multipart', {
|
|
48
|
+
workspaceId, path, size, mime, sha256,
|
|
49
|
+
});
|
|
50
|
+
return {
|
|
51
|
+
objectKey: data.objectKey,
|
|
52
|
+
cosUploadId: data.cosUploadId,
|
|
53
|
+
alreadyExists: !!data.alreadyExists,
|
|
54
|
+
};
|
|
55
|
+
},
|
|
56
|
+
|
|
57
|
+
async presignPart({ workspaceId, objectKey, cosUploadId, partNumber }) {
|
|
58
|
+
const data = await api('POST', '/storage/presign-part', {
|
|
59
|
+
workspaceId, objectKey, cosUploadId, partNumber,
|
|
60
|
+
});
|
|
61
|
+
return {
|
|
62
|
+
url: data.url,
|
|
63
|
+
method: data.method ?? 'PUT',
|
|
64
|
+
headers: data.headers ?? {},
|
|
65
|
+
};
|
|
66
|
+
},
|
|
67
|
+
|
|
68
|
+
async completeMultipart({ workspaceId, path, objectKey, cosUploadId, parts, size, sha256 }) {
|
|
69
|
+
return api('POST', '/storage/complete-multipart', {
|
|
70
|
+
workspaceId, path, objectKey, cosUploadId, parts, size, sha256,
|
|
71
|
+
});
|
|
72
|
+
},
|
|
73
|
+
|
|
74
|
+
async abortMultipart({ workspaceId, path, objectKey, cosUploadId }) {
|
|
75
|
+
return api('POST', '/storage/abort-multipart', {
|
|
76
|
+
workspaceId, path, objectKey, cosUploadId,
|
|
77
|
+
});
|
|
78
|
+
},
|
|
79
|
+
};
|
|
80
|
+
}
|