@lightcone-ai/daemon 0.15.77 → 0.16.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lightcone-ai/daemon",
3
- "version": "0.15.77",
3
+ "version": "0.16.1",
4
4
  "type": "module",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -2,11 +2,14 @@
2
2
  import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
3
3
  import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
4
4
  import { z } from 'zod';
5
- import { createReadStream, existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs';
5
+ import { createReadStream, existsSync, mkdirSync, readFileSync, writeFileSync, statSync } from 'fs';
6
6
  import { createHash, randomUUID } from 'crypto';
7
7
  import path, { extname } from 'path';
8
+ import os from 'os';
8
9
  import { recordUrlNarration } from './_vendor/video/recorder/index.js';
9
- import { writeLocalFileToWorkspace } from './workspace-file-upload.js';
10
+ import { writeLocalFileToWorkspace, resolveWorkspaceFileUploadPlan } from './workspace-file-upload.js';
11
+ import { UploadJobManager } from './upload-job-manager.js';
12
+ import { createUploadServerApi } from './upload-server-api.js';
10
13
  import { runRecordUrlNarrationTool } from './record-url-narration-tool.js';
11
14
  import { runSubmitToLibraryTool } from './submit-to-library-tool.js';
12
15
  import { runRenderTextToImageTool } from './tools/render-text-to-image.js';
@@ -843,6 +846,23 @@ const IS_HOST_AGENT = await resolveHostAgentFlag();
843
846
 
844
847
  const server = new McpServer({ name: 'chat', version: '0.1.0' });
845
848
 
849
+ // ── Upload job manager (async + multipart workspace-file uploads) ────────────
850
+ // Per-agent jobDir so each chat-bridge sees only its own pending jobs. When the
851
+ // chat-bridge process restarts (skill bind, manual restart) it picks up any
852
+ // in-flight uploads from disk and resumes them; see docs/upload-pipeline-design.md.
853
+ const _uploadJobDir = path.join(
854
+ os.homedir(),
855
+ '.lightcone',
856
+ 'upload-jobs',
857
+ AGENT_ID || 'default'
858
+ );
859
+ const uploadJobManager = new UploadJobManager({
860
+ jobDir: _uploadJobDir,
861
+ serverApi: createUploadServerApi({ api: (...args) => api(...args) }),
862
+ log: (msg) => console.error(`[chat-bridge:UploadJobManager] ${msg}`),
863
+ });
864
+ uploadJobManager.start();
865
+
846
866
  // ── check_messages ────────────────────────────────────────────────────────────
847
867
  server.tool('check_messages', 'Check for new messages in your inbox', {}, async () => {
848
868
  const data = await api('GET', '/receive');
@@ -884,11 +904,16 @@ server.tool('search_messages', 'Search messages within a specific workspace. You
884
904
  const data = await api('GET', `/search?${params}`);
885
905
  if (!data.results || data.results.length === 0)
886
906
  return { content: [{ type: 'text', text: 'No search results.' }] };
907
+ // Use full content rather than snippet — the snippet truncates at ~200 chars,
908
+ // which routinely cuts URLs / structured data in half (e.g. a forwarded list
909
+ // of mp.weixin.qq.com article URLs becomes useless if only the second one
910
+ // shrinks below its query-string). For search the agent's intent is usually
911
+ // "give me the original message verbatim so I can act on it", not a teaser.
887
912
  const formatted = data.results.map((r, i) => [
888
913
  `[${i + 1}] msg=${r.id} seq=${r.seq} time=${r.createdAt}`,
889
914
  `workspace: #${r.workspaceName}`,
890
915
  `sender: @${r.senderName}${r.senderType === 'agent' ? ' (agent)' : ''}`,
891
- `content: ${r.snippet}`,
916
+ `content: ${r.content ?? r.snippet}`,
892
917
  ].join('\n')).join('\n\n');
893
918
  return { content: [{ type: 'text', text: `## Search Results for "${trimmed}" (${data.results.length} results)\n\n${formatted}` }] };
894
919
  } catch (err) {
@@ -1099,38 +1124,67 @@ server.tool('write_workspace', 'Write a file to the shared workspace. Use this t
1099
1124
  return { content: [{ type: 'text', text: `Saved to workspace: ${path}` }] };
1100
1125
  });
1101
1126
 
1102
- server.tool('write_workspace_file', 'Write a local file directly to the shared workspace. Prefer this over write_workspace for images/PDFs/binary files so large base64 content never enters the model context. The source file may be a relative path under the current agent workspace, or an absolute path inside the agent workspace/workspace shared artifacts/notes/tmp directories.', {
1103
- file_path: z.string().describe('Local file path. Relative paths resolve from the current agent workspace. Absolute paths must stay inside the agent/workspace.'),
1104
- path: z.string().describe('Destination path relative to workspace root, e.g. "artifacts/cover.png"'),
1105
- }, async ({ file_path, path }) => {
1127
+ server.tool('write_workspace_file',
1128
+ 'Write a local file directly to the shared workspace. Prefer this over write_workspace for images/PDFs/binary files so large base64 content never enters the model context.\n\n'
1129
+ + 'Binary uploads are ASYNCHRONOUS: the tool queues an upload job and returns immediately with a jobId. The actual PUT runs in the background with retry + COS multipart (no codex 120s timeout, no single-PUT failures). When you next call submit_to_library on the same workspace path, the server waits up to ~90s for the upload to finish before indexing; if it returns "upload_still_in_progress" just retry submit_to_library a few seconds later — DO NOT re-record/re-compose the video, the source file is fine.\n\n'
1130
+ + 'Text files (.md/.txt/.json/etc.) still upload synchronously since they are tiny.',
1131
+ {
1132
+ file_path: z.string().describe('Local file path. Relative paths resolve from the current agent workspace. Absolute paths must stay inside the agent/workspace.'),
1133
+ path: z.string().describe('Destination path relative to workspace root, e.g. "artifacts/cover.png"'),
1134
+ },
1135
+ async ({ file_path, path: workspacePath }) => {
1106
1136
  if (!currentWorkspaceId) return { content: [{ type: 'text', text: 'No workspace context.' }] };
1107
1137
  const localPath = resolveLocalWorkspaceFile(file_path);
1108
- const result = await writeLocalFileToWorkspace({
1109
- localPath,
1110
- workspacePath: path,
1111
- workspaceId: currentWorkspaceId,
1112
- readFileSyncFn: readFileSync,
1113
- uploadWorkspaceMemory: async ({ workspacePath, workspaceId, content }) => {
1114
- await api('PUT', `/workspace-memory?path=${encodeURIComponent(workspacePath)}&workspaceId=${encodeURIComponent(workspaceId)}`, { content });
1115
- },
1116
- presign: async ({ workspaceId, path: filePath, size, mime, sha256 }) => {
1117
- return api('POST', '/storage/presign', { workspaceId, path: filePath, size, mime, sha256 });
1118
- },
1119
- confirmUpload: async ({ workspaceId, path: filePath, objectKey }) => {
1120
- await api('POST', '/storage/confirm', { workspaceId, path: filePath, objectKey });
1121
- },
1122
- });
1123
1138
 
1124
- const finalMime = result?.mime ?? 'application/octet-stream';
1125
- const finalBytes = Number.isFinite(result?.bytes) ? result.bytes : 0;
1126
- const sizeText = finalBytes > 0 ? formatBytes(finalBytes) : 'unknown size';
1127
- const asyncNote = result?.async ? ' (uploading in background)' : '';
1128
- return {
1129
- content: [{
1130
- type: 'text',
1131
- text: `Saved local file to workspace: ${path} (${finalMime}, ${sizeText})${asyncNote}`,
1132
- }],
1133
- };
1139
+ const plan = resolveWorkspaceFileUploadPlan({ localPath, workspacePath });
1140
+
1141
+ // ── Text path: tiny, sync, unchanged ────────────────────────────────────
1142
+ if (plan.isText) {
1143
+ const result = await writeLocalFileToWorkspace({
1144
+ localPath,
1145
+ workspacePath,
1146
+ workspaceId: currentWorkspaceId,
1147
+ readFileSyncFn: readFileSync,
1148
+ uploadWorkspaceMemory: async ({ workspacePath: wp, workspaceId, content }) => {
1149
+ await api('PUT', `/workspace-memory?path=${encodeURIComponent(wp)}&workspaceId=${encodeURIComponent(workspaceId)}`, { content });
1150
+ },
1151
+ // Binary path below replaces these; keep these stubs in case the plan
1152
+ // misclassifies, to avoid TypeError mid-tool-call.
1153
+ presign: async () => { throw new Error('binary upload should not reach legacy presign'); },
1154
+ confirmUpload: async () => { throw new Error('binary upload should not reach legacy confirmUpload'); },
1155
+ });
1156
+ const bytes = Number.isFinite(result?.bytes) ? result.bytes : 0;
1157
+ return {
1158
+ content: [{
1159
+ type: 'text',
1160
+ text: `Saved local text file to workspace: ${workspacePath} (${plan.mime}, ${formatBytes(bytes)})`,
1161
+ }],
1162
+ };
1163
+ }
1164
+
1165
+ // ── Binary path: async enqueue ──────────────────────────────────────────
1166
+ try {
1167
+ const enq = await uploadJobManager.enqueue({
1168
+ workspaceId: currentWorkspaceId,
1169
+ agentId: AGENT_ID || null,
1170
+ localPath,
1171
+ workspacePath,
1172
+ mime: plan.mime,
1173
+ });
1174
+ const lines = [
1175
+ `Queued upload to workspace: ${workspacePath} (${plan.mime}, ${formatBytes(enq.size)})`,
1176
+ `mode=${enq.mode}${enq.mode === 'multipart' ? ` chunks=${enq.totalChunks}` : ''}`,
1177
+ `job_id=${enq.jobId}`,
1178
+ `status=pending`,
1179
+ `note=Upload runs in background. Call submit_to_library when ready — it waits up to ~90s for the upload to finish. If it returns "upload_still_in_progress", just retry submit_to_library a few seconds later. DO NOT re-record or re-compose.`,
1180
+ ];
1181
+ return { content: [{ type: 'text', text: lines.join('\n') }] };
1182
+ } catch (err) {
1183
+ return {
1184
+ isError: true,
1185
+ content: [{ type: 'text', text: `write_workspace_file failed: ${err?.message ?? err}` }],
1186
+ };
1187
+ }
1134
1188
  });
1135
1189
 
1136
1190
  // ── skill_list ───────────────────────────────────────────────────────────────
@@ -1433,7 +1487,21 @@ server.tool('record_url_narration',
1433
1487
  'Record a silent video of a URL by driving Chromium on an Xvfb display and capturing it with Playwright recordVideo, driven by a video plan; ffmpeg then transcodes the recording to mp4. Outputs a silent mp4 that can be passed to compose_video_v2 as a video-kind segment with an audio_path for narration.\n\nUse this as the canonical recording step for URL-narration videos. Falls back: if the page needs interactions outside the visual_action vocabulary (clicks, waits, OCR loops), use Monitor (Bash) with custom Playwright instead.\n\nRuntime requirements: this tool only works on a Linux daemon machine with Xvfb + Chromium + ffmpeg installed (ffmpeg is used to transcode the recording to mp4; no x11grab device support needed). macOS / Windows daemons will fail at startup.',
1434
1488
  {
1435
1489
  url: z.string().describe('Page URL to record'),
1436
- plan: z.record(z.any()).describe('A video plan: an object with `phases` (or `sections`), each a "visual beat" with `action` (scroll_to_dwell / linear_scroll_during / scroll_back / hold / ...), a target (`target_y` or `focus_region:[y1,y2]`) for scroll-type actions, and `dwell_ms` (how long to hold that beat — should match the segment\'s TTS duration). It can be hand-written or the output of plan_video_segments (whose returned segments array doubles as a valid plan).'),
1490
+ plan: z.record(z.any()).describe(
1491
+ 'A video plan: an object with `phases` (or `sections`), each a "visual beat" with '
1492
+ + '`action` (scroll_to_dwell / linear_scroll_during / scroll_back / hold / ...), a '
1493
+ + 'target (`target_y` or `focus_region:[y1,y2]`) for scroll-type actions, and '
1494
+ + '`dwell_ms` (how long to hold that beat — should match the segment\'s TTS duration).\n\n'
1495
+ + 'For RECRUITMENT URLs (mp.weixin.qq.com / 校招 / 实习 / 岗位 content), each section MUST '
1496
+ + 'also declare `target_y_content_label` — a short Chinese label describing what content '
1497
+ + 'sits at that pixel y position on the page (e.g. "标题区" / "岗位信息卡片" / "公司介绍" / '
1498
+ + '"届别说明"). Look at the take_page_screenshot output, find the y-pixel, and label it. '
1499
+ + 'Labels matching forbidden regions ("二维码" / "扫码" / "投递入口" / "投递方式" / "联系方式" / '
1500
+ + '"微信号" / "QR" / "阅读原文" / "外链") will cause the tool to refuse the recording — '
1501
+ + 'recruitment content must NOT dwell on these areas (see fragments.md '
1502
+ + 'frag.short.recruitment_url_mode_policy). Pick a different target_y in the 标题/岗位 '
1503
+ + 'information area and rewrite that section.'
1504
+ ),
1437
1505
  output_path: z.string().optional().describe('Workspace-relative output mp4 path. Default tmp/wx3_video/recorded-{ts}.mp4'),
1438
1506
  events_path: z.string().optional().describe('Workspace-relative events.json path. Default ${output_path}.events.json'),
1439
1507
  viewport: z.object({
@@ -69,6 +69,74 @@ function assertPipelineCompliance(plan) {
69
69
  }
70
70
  }
71
71
 
72
+ // Forbidden region keywords for recruitment content. If a section's
73
+ // target_y_content_label matches, we refuse to record — the resulting video
74
+ // would show 投递入口 / 二维码 / contact info, which violates the recruitment
75
+ // content policy (see fragments.md frag.short.recruitment_url_mode_policy).
76
+ //
77
+ // Discovered after Task #25 v1 ended up dwelling on FunPlus's QR/投递 area:
78
+ // the agent's plan declared target_y=2180 with dwell_ms=8500 without checking
79
+ // what content lived at that pixel position. This is a prompt-level rule
80
+ // that's been ignored often enough that we enforce it at the tool layer.
81
+ const FORBIDDEN_REGION_PATTERNS = [
82
+ /二维码/, /扫码/, /扫一扫/,
83
+ /投递入口/, /投递方式/, /投递通道/, /投递渠道/, /报名入口/, /报名方式/,
84
+ /联系方式/, /联系人/, /微信号/, /\bWeChat\b/i, /\bQQ群\b/,
85
+ /阅读原文/, /外链/, /\bQR\b/i,
86
+ ];
87
+
88
+ function isRecruitmentLikeUrl(url) {
89
+ // Conservative URL-based heuristic: mp.weixin.qq.com pages forwarding 招聘 /
90
+ // 校招 / 实习 / job content. Until we have content classification, treat
91
+ // mp.weixin.qq.com URLs as recruitment-class for safety — the cost of a
92
+ // mis-flag is "agent must add a label", not "recording fails permanently".
93
+ if (typeof url !== 'string') return false;
94
+ return /mp\.weixin\.qq\.com/.test(url);
95
+ }
96
+
97
+ function describeForbiddenMatch(label) {
98
+ for (const pattern of FORBIDDEN_REGION_PATTERNS) {
99
+ if (pattern.test(label)) return pattern.source;
100
+ }
101
+ return null;
102
+ }
103
+
104
+ /**
105
+ * For recruitment-class URLs, every plan section must declare what content
106
+ * sits at its target_y, and the label must NOT match the forbidden-region
107
+ * patterns. Returns null on pass, error message string on fail.
108
+ */
109
+ function checkSafeRegionLabels({ url, plan }) {
110
+ if (!isRecruitmentLikeUrl(url)) return null;
111
+ const segments = planSegments(plan);
112
+ if (!segments) return null;
113
+ for (let i = 0; i < segments.length; i += 1) {
114
+ const seg = segments[i] ?? {};
115
+ const label = normalizeText(seg.target_y_content_label ?? seg.targetYContentLabel ?? '');
116
+ if (!label) {
117
+ return (
118
+ `record_url_narration: section[${i}] is missing required field `
119
+ + `\`target_y_content_label\`. For recruitment URLs (mp.weixin.qq.com / `
120
+ + `校招 / 实习等) you MUST label what content lives at target_y so the `
121
+ + `tool can verify it is not 二维码/投递入口/联系方式. Look at the page `
122
+ + `screenshot, find what is at target_y=${seg.target_y ?? '<unset>'}, `
123
+ + `and add a short label like "标题区" / "岗位信息卡片" / "公司介绍".`
124
+ );
125
+ }
126
+ const match = describeForbiddenMatch(label);
127
+ if (match) {
128
+ return (
129
+ `record_url_narration: section[${i}] target_y=${seg.target_y ?? '?'} `
130
+ + `is labeled "${label}", which matches a forbidden region pattern `
131
+ + `/${match}/. Recruitment content must NOT dwell on 投递入口 / 二维码 / `
132
+ + `联系方式 areas. Pick a different target_y inside the 标题区 / 岗位 `
133
+ + `信息卡片 / 公司介绍 area and rewrite this section.`
134
+ );
135
+ }
136
+ }
137
+ return null;
138
+ }
139
+
72
140
  export function validateRecordUrlNarrationArgs(args = {}) {
73
141
  const normalizedUrl = normalizeText(args.url);
74
142
  if (!normalizedUrl) {
@@ -140,6 +208,18 @@ export async function runRecordUrlNarrationTool({
140
208
  return toolError(`Error: ${error.message}`);
141
209
  }
142
210
 
211
+ // Safe-region check for recruitment URLs — refuse plans that dwell on
212
+ // forbidden regions (二维码 / 投递入口 / 联系方式) before we even start
213
+ // Chromium. The agent must label each target_y with the content that lives
214
+ // there, and the labels are pattern-matched against a forbidden list.
215
+ const safeRegionError = checkSafeRegionLabels({
216
+ url: validatedInput.url,
217
+ plan: validatedInput.plan,
218
+ });
219
+ if (safeRegionError) {
220
+ return toolError(`Error: ${safeRegionError}`);
221
+ }
222
+
143
223
  try {
144
224
  const result = await runMandatoryLocalToolFn({
145
225
  toolName: 'record_url_narration',
@@ -30,17 +30,18 @@ export async function runSubmitToLibraryTool({
30
30
  '/content-library/submit',
31
31
  buildSubmitToLibraryBody(args, currentWorkspaceId)
32
32
  );
33
- // Server returns 2xx + body.error for transient "still processing" cases
34
- // (e.g. HTTP 202 + {error: "video is still uploading, retry shortly"} when
35
- // the workspace file's object_status is still 'uploading'). The HTTP client
36
- // treats any 2xx as success and returns the body verbatim, so we must
37
- // surface body.error here — otherwise the agent reads `data.itemId` as
38
- // undefined and (because the literal "undefined" looks like proof the
39
- // submit silently failed) tends to redo the entire video instead of just
40
- // retrying submit_to_library. Fail loudly with the server's message so the
41
- // agent retries the right step.
33
+ // Server returns 2xx + body.error for transient "still processing" cases:
34
+ // HTTP 202 + {error: 'upload_still_in_progress', message: '...'} when the
35
+ // async upload pipeline (see docs/upload-pipeline-design.md) hasn't
36
+ // finished. The HTTP client treats any 2xx as success and returns the
37
+ // body verbatim, so we must surface body.error here — otherwise the agent
38
+ // reads `data.itemId` as undefined and (because the literal "undefined"
39
+ // looks like proof the submit silently failed) tends to redo the entire
40
+ // video instead of just retrying submit_to_library. Prefer body.message
41
+ // when present (rich human-readable retry hint); fall back to body.error.
42
42
  if (data && typeof data === 'object' && typeof data.error === 'string' && data.error.trim()) {
43
- return toolError(`submit_to_library not ready: ${data.error}. Retry submit_to_library with the same video_path in a few seconds — do NOT re-record or re-compose the video.`);
43
+ const detail = (typeof data.message === 'string' && data.message.trim()) ? data.message : data.error;
44
+ return toolError(`submit_to_library not ready: ${detail}. Retry submit_to_library with the same video_path in a few seconds — do NOT re-record or re-compose the video.`);
44
45
  }
45
46
  if (!data || typeof data.itemId !== 'string' || !data.itemId) {
46
47
  return toolError(`submit_to_library returned no itemId. Server response: ${JSON.stringify(data).slice(0, 300)}. Retry submit_to_library with the same video_path — do NOT re-record or re-compose.`);
@@ -0,0 +1,565 @@
1
+ // Daemon-owned upload job manager.
2
+ //
3
+ // Decouples the agent-facing `write_workspace_file` tool from the actual
4
+ // PUT-to-COS round-trip so:
5
+ // - the tool returns in <50ms (just enqueues a job) rather than holding the
6
+ // codex/claude tool slot for minutes
7
+ // - PUT failures are retried with exponential backoff
8
+ // - large files use COS multipart so a single dropped chunk doesn't restart
9
+ // the whole upload
10
+ // - daemon restarts pick up in-flight jobs from disk and resume
11
+ //
12
+ // Out of scope for this module: the actual COS API calls. Those live behind
13
+ // the injected `serverApi` interface (M3 implements the server endpoints, M4
14
+ // wires this manager to it).
15
+ //
16
+ // Schema and state machine: see docs/upload-pipeline-design.md.
17
+
18
+ import {
19
+ readFileSync, writeFileSync, readdirSync, statSync,
20
+ promises as fsPromises,
21
+ createReadStream,
22
+ mkdirSync, renameSync,
23
+ } from 'node:fs';
24
+ import { open as fsOpen } from 'node:fs/promises';
25
+ import path from 'node:path';
26
+ import crypto, { randomUUID } from 'node:crypto';
27
+ import os from 'node:os';
28
+
29
+ export const SCHEMA_VERSION = 1;
30
+ export const DEFAULT_CHUNK_SIZE = 8 * 1024 * 1024; // 8 MB
31
+ export const MULTIPART_THRESHOLD = 5 * 1024 * 1024; // COS minimum part size
32
+ export const MAX_JOB_ATTEMPTS = 5;
33
+ export const DEFAULT_TICK_INTERVAL_MS = 2_000;
34
+ export const PART_RETRY_ATTEMPTS = 3;
35
+ export const PART_RETRY_BASE_MS = 1_000; // 1s, 3s, 9s
36
+ export const TERMINAL_JOB_TTL_MS = 7 * 24 * 3600 * 1000; // sweep done/dead_letter after 7 days
37
+ export const HOUSEKEEPING_INTERVAL_MS = 6 * 3600 * 1000; // run housekeeping every 6h
38
+ // Per-PUT timeout — Node's fetch has no overall request timeout. Without this
39
+ // a stalled COS connection wedges the chunk loop forever (observed during the
40
+ // first Task #25 upload: chunk 1 PUT hung 7+ minutes with no progress, no
41
+ // error). 5 minutes covers slow networks for an 8MB chunk (~25kB/s floor)
42
+ // while still letting failures surface to the chunk-level retry loop.
43
+ export const PUT_REQUEST_TIMEOUT_MS = 5 * 60 * 1000;
44
+
45
+ function nowIso() { return new Date().toISOString(); }
46
+
47
+ async function sha256OfFile(localPath) {
48
+ return new Promise((resolve, reject) => {
49
+ const h = crypto.createHash('sha256');
50
+ const stream = createReadStream(localPath);
51
+ stream.on('data', chunk => h.update(chunk));
52
+ stream.on('end', () => resolve(h.digest('hex')));
53
+ stream.on('error', reject);
54
+ });
55
+ }
56
+
57
+ // Job-level backoff: attempts²×30s.
58
+ // 1st retry: +30s, 2nd: +120s, 3rd: +270s, 4th: +480s.
59
+ export function jobBackoffMs(attempts) {
60
+ return attempts * attempts * 30_000;
61
+ }
62
+
63
+ // Chunk-level backoff: 1s, 3s, 9s.
64
+ function partBackoffMs(attempt) {
65
+ return PART_RETRY_BASE_MS * (3 ** (attempt - 1));
66
+ }
67
+
68
+ function sleep(ms) { return new Promise(r => setTimeout(r, ms)); }
69
+
70
+ async function fetchWithTimeout(fetchFn, url, init, timeoutMs) {
71
+ const controller = new AbortController();
72
+ const timer = setTimeout(() => controller.abort(), timeoutMs);
73
+ try {
74
+ return await fetchFn(url, { ...init, signal: controller.signal });
75
+ } catch (err) {
76
+ if (err?.name === 'AbortError' || controller.signal.aborted) {
77
+ const wrapped = new Error(`PUT timed out after ${Math.round(timeoutMs / 1000)}s (COS unresponsive)`);
78
+ wrapped.cause = err;
79
+ throw wrapped;
80
+ }
81
+ throw err;
82
+ } finally {
83
+ clearTimeout(timer);
84
+ }
85
+ }
86
+
87
+ /**
88
+ * UploadJobManager — singleton in daemon process.
89
+ *
90
+ * const mgr = new UploadJobManager({ serverApi });
91
+ * mgr.start();
92
+ * const { jobId } = await mgr.enqueue({ workspaceId, agentId, localPath, workspacePath, mime });
93
+ * // job runs in background; later:
94
+ * mgr.getStatus(jobId);
95
+ * // or shutting down:
96
+ * mgr.stop();
97
+ *
98
+ * serverApi shape (M3 implements):
99
+ * presignSingle({ workspaceId, path, size, mime, sha256 }) → { objectKey, uploadUrl, method?, headers? }
100
+ * confirmSingle({ workspaceId, path, objectKey }) → { ok: true }
101
+ * presignMultipart({ workspaceId, path, size, mime, sha256 }) → { objectKey, cosUploadId }
102
+ * presignPart({ workspaceId, objectKey, cosUploadId, partNumber }) → { url, method?, headers? }
103
+ * completeMultipart({ workspaceId, path, objectKey, cosUploadId, parts, size, sha256 }) → { ok: true }
104
+ * abortMultipart({ workspaceId, path, objectKey, cosUploadId }) → { ok: true }
105
+ */
106
+ export class UploadJobManager {
107
+ constructor({
108
+ jobDir = path.join(os.homedir(), '.lightcone', 'upload-jobs'),
109
+ serverApi,
110
+ fetchFn = globalThis.fetch,
111
+ nowFn = () => Date.now(),
112
+ log = (msg) => console.error(`[UploadJobManager] ${msg}`),
113
+ tickIntervalMs = DEFAULT_TICK_INTERVAL_MS,
114
+ chunkSize = DEFAULT_CHUNK_SIZE,
115
+ multipartThreshold = MULTIPART_THRESHOLD,
116
+ } = {}) {
117
+ if (!serverApi) throw new Error('UploadJobManager: serverApi is required');
118
+ this.jobDir = jobDir;
119
+ this.serverApi = serverApi;
120
+ this.fetchFn = fetchFn;
121
+ this.nowFn = nowFn;
122
+ this.log = log;
123
+ this.tickIntervalMs = tickIntervalMs;
124
+ this.chunkSize = chunkSize;
125
+ this.multipartThreshold = multipartThreshold;
126
+
127
+ this._tickInterval = null;
128
+ this._housekeepingInterval = null;
129
+ this._stopping = false;
130
+ this._activeJobs = new Set(); // jobIds currently advancing
131
+ this._pathLocks = new Map(); // `${workspaceId}|${workspacePath}` → jobId
132
+
133
+ mkdirSync(this.jobDir, { recursive: true });
134
+ }
135
+
136
+ // ─── public API ──────────────────────────────────────────────────────────
137
+
138
+ /**
139
+ * Stat + sha256 the file, choose mode (single vs multipart), persist a
140
+ * pending job, and schedule a tick. Throws synchronously on path-lock
141
+ * conflict or missing/unreadable file.
142
+ */
143
+ async enqueue({ workspaceId, agentId = null, localPath, workspacePath, mime = 'application/octet-stream' }) {
144
+ if (!workspaceId) throw new Error('enqueue: workspaceId required');
145
+ if (!localPath) throw new Error('enqueue: localPath required');
146
+ if (!workspacePath) throw new Error('enqueue: workspacePath required');
147
+
148
+ const pathKey = `${workspaceId}|${workspacePath}`;
149
+ const existing = this._pathLocks.get(pathKey);
150
+ if (existing) {
151
+ throw new Error(`upload_path_locked: another upload to ${workspacePath} is in progress (jobId=${existing})`);
152
+ }
153
+
154
+ let st;
155
+ try { st = statSync(localPath); }
156
+ catch (err) { throw new Error(`localPath not readable: ${err.message}`); }
157
+ const size = st.size;
158
+ if (!Number.isFinite(size) || size <= 0) {
159
+ throw new Error(`localPath has invalid size: ${size}`);
160
+ }
161
+ const mtimeMs = st.mtimeMs;
162
+ const sha256 = await sha256OfFile(localPath);
163
+
164
+ const uploadMode = size >= this.multipartThreshold ? 'multipart' : 'single';
165
+ const totalChunks = uploadMode === 'multipart' ? Math.ceil(size / this.chunkSize) : 1;
166
+
167
+ const job = {
168
+ schemaVersion: SCHEMA_VERSION,
169
+ jobId: randomUUID(),
170
+ createdAt: nowIso(),
171
+ updatedAt: nowIso(),
172
+ agentId,
173
+ workspaceId,
174
+ localPath,
175
+ workspacePath,
176
+ mime,
177
+ objectKey: null,
178
+ size,
179
+ mtimeMs,
180
+ sha256,
181
+ uploadMode,
182
+ chunkSize: this.chunkSize,
183
+ totalChunks,
184
+ cosUploadId: null,
185
+ doneParts: [],
186
+ status: 'pending',
187
+ attempts: 0,
188
+ lastError: null,
189
+ lastErrorAt: null,
190
+ nextAttemptAt: this.nowFn(),
191
+ };
192
+
193
+ this._persist(job);
194
+ this._pathLocks.set(pathKey, job.jobId);
195
+
196
+ // Best-effort immediate tick so the very first job doesn't wait for the interval.
197
+ setImmediate(() => this._tick().catch(err => this.log(`tick (post-enqueue) failed: ${err.message}`)));
198
+
199
+ return {
200
+ jobId: job.jobId,
201
+ mode: uploadMode,
202
+ totalChunks,
203
+ status: 'pending',
204
+ size,
205
+ };
206
+ }
207
+
208
+ getStatus(jobId) {
209
+ const job = this._loadById(jobId);
210
+ return job ? this._publicState(job) : null;
211
+ }
212
+
213
+ start() {
214
+ if (this._tickInterval) return;
215
+ this._stopping = false;
216
+ // Rebuild path locks from on-disk jobs so concurrent enqueue still respects them.
217
+ for (const job of this._listJobs()) {
218
+ if (job.status === 'pending' || job.status === 'uploading') {
219
+ const pathKey = `${job.workspaceId}|${job.workspacePath}`;
220
+ this._pathLocks.set(pathKey, job.jobId);
221
+ }
222
+ }
223
+ this._tickInterval = setInterval(() => {
224
+ this._tick().catch(err => this.log(`tick failed: ${err.message}`));
225
+ }, this.tickIntervalMs);
226
+ // Run housekeeping immediately + on a slow schedule.
227
+ this._housekeep();
228
+ this._housekeepingInterval = setInterval(() => {
229
+ this._housekeep();
230
+ }, HOUSEKEEPING_INTERVAL_MS);
231
+ this.log(`started (jobDir=${this.jobDir}, tick=${this.tickIntervalMs}ms)`);
232
+ }
233
+
234
+ /**
235
+ * Signal stop. In-flight `_advance` calls notice via `_stopping` flag and
236
+ * exit between chunks; partial state is persisted so the next start() can
237
+ * resume. Does NOT abort the underlying fetch in progress — a single chunk
238
+ * may still complete after stop returns.
239
+ */
240
+ stop() {
241
+ this._stopping = true;
242
+ if (this._tickInterval) clearInterval(this._tickInterval);
243
+ this._tickInterval = null;
244
+ if (this._housekeepingInterval) clearInterval(this._housekeepingInterval);
245
+ this._housekeepingInterval = null;
246
+ }
247
+
248
+ /**
249
+ * Drop terminal (done / dead_letter) job files older than TERMINAL_JOB_TTL_MS.
250
+ * Keeps recent ones around for observability / debugging.
251
+ */
252
+ _housekeep() {
253
+ try {
254
+ const now = this.nowFn();
255
+ let pruned = 0;
256
+ for (const job of this._listJobs()) {
257
+ if (job.status !== 'done' && job.status !== 'dead_letter') continue;
258
+ const updatedAt = job.updatedAt ? Date.parse(job.updatedAt) : 0;
259
+ if (!Number.isFinite(updatedAt)) continue;
260
+ if (now - updatedAt < TERMINAL_JOB_TTL_MS) continue;
261
+ try {
262
+ // unlink via fsPromises is fine to do sync-ish since housekeeping is rare
263
+ // and the file is tiny.
264
+ fsPromises.unlink(this._jobPath(job.jobId)).catch(() => {});
265
+ pruned += 1;
266
+ } catch { /* ignore */ }
267
+ }
268
+ if (pruned > 0) this.log(`housekeeping: pruned ${pruned} terminal job(s) older than ${TERMINAL_JOB_TTL_MS / 86_400_000}d`);
269
+ } catch (err) {
270
+ this.log(`housekeeping failed: ${err?.message ?? err}`);
271
+ }
272
+ }
273
+
274
+ /**
275
+ * Test helper: wait until no jobs are pending/uploading or until timeout.
276
+ */
277
+ async waitAllSettled({ timeoutMs = 30_000 } = {}) {
278
+ const start = this.nowFn();
279
+ while (this.nowFn() - start < timeoutMs) {
280
+ const jobs = this._listJobs();
281
+ const inFlight = jobs.some(j => j.status === 'pending' || j.status === 'uploading');
282
+ if (!inFlight && this._activeJobs.size === 0) return;
283
+ await sleep(50);
284
+ }
285
+ throw new Error('waitAllSettled: timeout');
286
+ }
287
+
288
+ // ─── internal ────────────────────────────────────────────────────────────
289
+
290
+ async _tick() {
291
+ if (this._stopping) return;
292
+ const jobs = this._listJobs();
293
+ const now = this.nowFn();
294
+ for (const job of jobs) {
295
+ if (this._stopping) break;
296
+ if (job.status === 'done' || job.status === 'dead_letter') continue;
297
+ if (this._activeJobs.has(job.jobId)) continue;
298
+ if (job.nextAttemptAt && job.nextAttemptAt > now) continue;
299
+ this._activeJobs.add(job.jobId);
300
+ this._advance(job)
301
+ .catch(err => this.log(`advance(${job.jobId}) crashed: ${err.message}`))
302
+ .finally(() => this._activeJobs.delete(job.jobId));
303
+ }
304
+ }
305
+
306
+ async _advance(job) {
307
+ try {
308
+ // Local-file integrity check before each attempt.
309
+ let st;
310
+ try { st = statSync(job.localPath); }
311
+ catch (err) {
312
+ return this._markDeadLetter(job, `local_file_gone:${err.code ?? err.message}`);
313
+ }
314
+ if (st.size !== job.size) {
315
+ return this._markDeadLetter(job, `local_file_changed: declared=${job.size}, now=${st.size}`);
316
+ }
317
+
318
+ if (job.status !== 'uploading') {
319
+ job.status = 'uploading';
320
+ this._persist(job);
321
+ }
322
+
323
+ if (job.uploadMode === 'single') {
324
+ await this._advanceSingle(job);
325
+ } else {
326
+ await this._advanceMultipart(job);
327
+ }
328
+ } catch (err) {
329
+ this._recordFailure(job, err);
330
+ }
331
+ }
332
+
333
+ async _advanceSingle(job) {
334
+ const presign = await this.serverApi.presignSingle({
335
+ workspaceId: job.workspaceId,
336
+ path: job.workspacePath,
337
+ size: job.size,
338
+ mime: job.mime,
339
+ sha256: job.sha256,
340
+ });
341
+ job.objectKey = presign.objectKey;
342
+ this._persist(job);
343
+
344
+ const fileBuf = await fsPromises.readFile(job.localPath);
345
+ const resp = await fetchWithTimeout(this.fetchFn, presign.uploadUrl, {
346
+ method: presign.method ?? 'PUT',
347
+ headers: {
348
+ 'Content-Type': job.mime,
349
+ 'Content-Length': String(job.size),
350
+ ...(presign.headers ?? {}),
351
+ },
352
+ body: fileBuf,
353
+ }, PUT_REQUEST_TIMEOUT_MS);
354
+ if (!resp.ok) {
355
+ const text = await resp.text().catch(() => '');
356
+ throw new Error(`single PUT failed: HTTP ${resp.status} ${text.slice(0, 200)}`);
357
+ }
358
+
359
+ await this.serverApi.confirmSingle({
360
+ workspaceId: job.workspaceId,
361
+ path: job.workspacePath,
362
+ objectKey: job.objectKey,
363
+ });
364
+
365
+ this._markDone(job);
366
+ }
367
+
368
+ async _advanceMultipart(job) {
369
+ if (!job.cosUploadId) {
370
+ const init = await this.serverApi.presignMultipart({
371
+ workspaceId: job.workspaceId,
372
+ path: job.workspacePath,
373
+ size: job.size,
374
+ mime: job.mime,
375
+ sha256: job.sha256,
376
+ });
377
+ job.objectKey = init.objectKey;
378
+ job.cosUploadId = init.cosUploadId;
379
+ this._persist(job);
380
+ }
381
+
382
+ const doneSet = new Set(job.doneParts.map(p => p.partNumber));
383
+ const fh = await fsOpen(job.localPath, 'r');
384
+ try {
385
+ for (let i = 1; i <= job.totalChunks; i++) {
386
+ if (this._stopping) return; // graceful shutdown mid-job
387
+ if (doneSet.has(i)) continue;
388
+
389
+ const offset = (i - 1) * job.chunkSize;
390
+ const remaining = job.size - offset;
391
+ const partLen = Math.min(job.chunkSize, remaining);
392
+ const buf = Buffer.alloc(partLen);
393
+ await fh.read(buf, 0, partLen, offset);
394
+
395
+ const etag = await this._uploadPartWithRetry(job, i, buf);
396
+ job.doneParts.push({ partNumber: i, etag });
397
+ this._persist(job);
398
+ }
399
+ } finally {
400
+ await fh.close().catch(() => {});
401
+ }
402
+
403
+ if (this._stopping) return;
404
+
405
+ await this.serverApi.completeMultipart({
406
+ workspaceId: job.workspaceId,
407
+ path: job.workspacePath,
408
+ objectKey: job.objectKey,
409
+ cosUploadId: job.cosUploadId,
410
+ parts: job.doneParts,
411
+ size: job.size,
412
+ sha256: job.sha256,
413
+ });
414
+
415
+ this._markDone(job);
416
+ }
417
+
418
+ async _uploadPartWithRetry(job, partNumber, buf) {
419
+ let lastErr;
420
+ for (let attempt = 1; attempt <= PART_RETRY_ATTEMPTS; attempt++) {
421
+ if (this._stopping) throw new Error('stopping');
422
+ try {
423
+ const presign = await this.serverApi.presignPart({
424
+ workspaceId: job.workspaceId,
425
+ objectKey: job.objectKey,
426
+ cosUploadId: job.cosUploadId,
427
+ partNumber,
428
+ });
429
+ const resp = await fetchWithTimeout(this.fetchFn, presign.url, {
430
+ method: presign.method ?? 'PUT',
431
+ headers: {
432
+ 'Content-Length': String(buf.length),
433
+ ...(presign.headers ?? {}),
434
+ },
435
+ body: buf,
436
+ }, PUT_REQUEST_TIMEOUT_MS);
437
+ if (!resp.ok) {
438
+ const text = await resp.text().catch(() => '');
439
+ throw new Error(`HTTP ${resp.status} ${text.slice(0, 200)}`);
440
+ }
441
+ const etag = resp.headers.get?.('etag') ?? resp.headers.get?.('ETag') ?? presign.etag ?? '';
442
+ if (!etag) throw new Error(`PUT part ${partNumber} missing etag`);
443
+ return etag;
444
+ } catch (err) {
445
+ lastErr = err;
446
+ this.log(`part ${partNumber} attempt ${attempt}/${PART_RETRY_ATTEMPTS} failed: ${err.message}`);
447
+ if (attempt < PART_RETRY_ATTEMPTS) {
448
+ await sleep(partBackoffMs(attempt));
449
+ }
450
+ }
451
+ }
452
+ throw new Error(`part ${partNumber} exhausted ${PART_RETRY_ATTEMPTS} retries: ${lastErr?.message ?? lastErr}`);
453
+ }
454
+
455
+ _markDone(job) {
456
+ job.status = 'done';
457
+ job.updatedAt = nowIso();
458
+ job.lastError = null;
459
+ job.lastErrorAt = null;
460
+ job.nextAttemptAt = null;
461
+ this._persist(job);
462
+ this._releaseLock(job);
463
+ this.log(`job ${job.jobId} done (${job.workspacePath}, ${job.size} bytes, ${job.uploadMode})`);
464
+ }
465
+
466
+ _markDeadLetter(job, reason) {
467
+ // Best-effort COS cleanup so we don't leak storage cost on aborted multipart.
468
+ if (job.uploadMode === 'multipart' && job.cosUploadId && job.objectKey) {
469
+ Promise.resolve(this.serverApi.abortMultipart({
470
+ workspaceId: job.workspaceId,
471
+ path: job.workspacePath,
472
+ objectKey: job.objectKey,
473
+ cosUploadId: job.cosUploadId,
474
+ })).catch(err => this.log(`abort_multipart for ${job.jobId} failed: ${err.message}`));
475
+ }
476
+ job.status = 'dead_letter';
477
+ job.lastError = String(reason);
478
+ job.lastErrorAt = nowIso();
479
+ job.updatedAt = nowIso();
480
+ job.nextAttemptAt = null;
481
+ this._persist(job);
482
+ this._releaseLock(job);
483
+ this.log(`job ${job.jobId} dead_letter: ${reason}`);
484
+ }
485
+
486
+ _recordFailure(job, err) {
487
+ job.attempts = (job.attempts ?? 0) + 1;
488
+ job.lastError = String(err?.message ?? err);
489
+ job.lastErrorAt = nowIso();
490
+ job.updatedAt = nowIso();
491
+ if (job.attempts >= MAX_JOB_ATTEMPTS) {
492
+ this._markDeadLetter(job, `max_attempts_exhausted: ${job.lastError}`);
493
+ return;
494
+ }
495
+ job.status = 'pending';
496
+ const backoff = jobBackoffMs(job.attempts);
497
+ job.nextAttemptAt = this.nowFn() + backoff;
498
+ this._persist(job);
499
+ this.log(`job ${job.jobId} attempt ${job.attempts} failed: ${job.lastError}; next retry in ${Math.round(backoff / 1000)}s`);
500
+ }
501
+
502
+ _releaseLock(job) {
503
+ const pathKey = `${job.workspaceId}|${job.workspacePath}`;
504
+ if (this._pathLocks.get(pathKey) === job.jobId) {
505
+ this._pathLocks.delete(pathKey);
506
+ }
507
+ }
508
+
509
+ // ─── persistence ─────────────────────────────────────────────────────────
510
+
511
+ _jobPath(jobId) {
512
+ return path.join(this.jobDir, `${jobId}.json`);
513
+ }
514
+
515
+ _persist(job) {
516
+ job.updatedAt = nowIso();
517
+ const dest = this._jobPath(job.jobId);
518
+ const tmp = `${dest}.tmp`;
519
+ writeFileSync(tmp, JSON.stringify(job, null, 2));
520
+ renameSync(tmp, dest); // atomic on POSIX
521
+ }
522
+
523
+ _loadById(jobId) {
524
+ try {
525
+ const text = readFileSync(this._jobPath(jobId), 'utf8');
526
+ const job = JSON.parse(text);
527
+ if (job.schemaVersion !== SCHEMA_VERSION) {
528
+ this.log(`job ${jobId}: unsupported schemaVersion ${job.schemaVersion}, ignored`);
529
+ return null;
530
+ }
531
+ return job;
532
+ } catch { return null; }
533
+ }
534
+
535
+ _listJobs() {
536
+ let names;
537
+ try { names = readdirSync(this.jobDir); }
538
+ catch { return []; }
539
+ const out = [];
540
+ for (const name of names) {
541
+ if (!name.endsWith('.json') || name.endsWith('.tmp.json')) continue;
542
+ const jobId = name.slice(0, -5);
543
+ const job = this._loadById(jobId);
544
+ if (job) out.push(job);
545
+ }
546
+ return out;
547
+ }
548
+
549
+ _publicState(job) {
550
+ return {
551
+ jobId: job.jobId,
552
+ status: job.status,
553
+ mode: job.uploadMode,
554
+ size: job.size,
555
+ progress: job.uploadMode === 'multipart'
556
+ ? { donePartCount: job.doneParts.length, totalChunks: job.totalChunks }
557
+ : { donePartCount: job.status === 'done' ? 1 : 0, totalChunks: 1 },
558
+ attempts: job.attempts,
559
+ lastError: job.lastError,
560
+ lastErrorAt: job.lastErrorAt,
561
+ nextAttemptAt: job.nextAttemptAt,
562
+ objectKey: job.objectKey,
563
+ };
564
+ }
565
+ }
@@ -0,0 +1,80 @@
1
+ // Thin daemon-side wrapper around the server's /storage/* endpoints.
2
+ //
3
+ // Implements the `serverApi` interface consumed by UploadJobManager. Single
4
+ // place that translates the abstract { presignSingle / confirmSingle /
5
+ // presignMultipart / presignPart / completeMultipart / abortMultipart } calls
6
+ // into HTTP requests against the lightcone server's internal/agent/.../storage/*
7
+ // endpoints. The actual HTTP plumbing — auth headers, governance, retries —
8
+ // is delegated to the `api` function passed in (typically chat-bridge's `api`).
9
+ //
10
+ // Why split this out: keeps UploadJobManager pure (no HTTP knowledge) and lets
11
+ // us write the manager's tests entirely against an injected mock.
12
+
13
+ /**
14
+ * @param {Object} deps
15
+ * @param {(method: string, path: string, body?: unknown) => Promise<unknown>} deps.api
16
+ * HTTP helper that POSTs to `/internal/agent/<agentId>/<path>` and returns
17
+ * the JSON body. Throws on non-2xx.
18
+ */
19
+ export function createUploadServerApi({ api }) {
20
+ if (typeof api !== 'function') {
21
+ throw new Error('createUploadServerApi: api function is required');
22
+ }
23
+
24
+ return {
25
+ async presignSingle({ workspaceId, path, size, mime, sha256 }) {
26
+ // Existing endpoint, reused as-is. Returns { objectKey, uploadUrl, method?, headers? }.
27
+ const data = await api('POST', '/storage/presign', {
28
+ workspaceId, path, size, mime, sha256,
29
+ });
30
+ // /storage/presign returns { uploadUrl, method, headers, objectKey, alreadyExists }
31
+ // for the actual presigned PUT URL flow; surface them in the shape the
32
+ // manager expects.
33
+ return {
34
+ objectKey: data.objectKey,
35
+ uploadUrl: data.uploadUrl,
36
+ method: data.method ?? 'PUT',
37
+ headers: data.headers ?? {},
38
+ alreadyExists: !!data.alreadyExists,
39
+ };
40
+ },
41
+
42
+ async confirmSingle({ workspaceId, path, objectKey }) {
43
+ return api('POST', '/storage/confirm', { workspaceId, path, objectKey });
44
+ },
45
+
46
+ async presignMultipart({ workspaceId, path, size, mime, sha256 }) {
47
+ const data = await api('POST', '/storage/presign-multipart', {
48
+ workspaceId, path, size, mime, sha256,
49
+ });
50
+ return {
51
+ objectKey: data.objectKey,
52
+ cosUploadId: data.cosUploadId,
53
+ alreadyExists: !!data.alreadyExists,
54
+ };
55
+ },
56
+
57
+ async presignPart({ workspaceId, objectKey, cosUploadId, partNumber }) {
58
+ const data = await api('POST', '/storage/presign-part', {
59
+ workspaceId, objectKey, cosUploadId, partNumber,
60
+ });
61
+ return {
62
+ url: data.url,
63
+ method: data.method ?? 'PUT',
64
+ headers: data.headers ?? {},
65
+ };
66
+ },
67
+
68
+ async completeMultipart({ workspaceId, path, objectKey, cosUploadId, parts, size, sha256 }) {
69
+ return api('POST', '/storage/complete-multipart', {
70
+ workspaceId, path, objectKey, cosUploadId, parts, size, sha256,
71
+ });
72
+ },
73
+
74
+ async abortMultipart({ workspaceId, path, objectKey, cosUploadId }) {
75
+ return api('POST', '/storage/abort-multipart', {
76
+ workspaceId, path, objectKey, cosUploadId,
77
+ });
78
+ },
79
+ };
80
+ }