@worca/ui 0.20.0 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,14 +3,56 @@
3
3
  *
4
4
  * GET /worktrees — list worktree entries enriched with disk/age/group data
5
5
  * DELETE /worktrees/:run_id — remove a worktree (409 if running, 412 if resumable/grouped without ?force=1)
6
+ * POST /worktrees/cleanup — batch remove (always returns 200 with `{ok, results, failed_count}`)
6
7
  *
7
8
  * Expects req.project.worcaDir to be set by projectResolver middleware.
9
+ *
10
+ * NOTE on disk semantics: `disk_bytes` reflects project files only — vendored
11
+ * and derived directories listed in WALK_SKIP_DIRS (node_modules, .git, .venv,
12
+ * dist, build, .next, etc.) are skipped during the walk. This answers "how
13
+ * much project disk would I free?" rather than raw on-disk bytes, and makes
14
+ * cold first loads ~10× faster on node_modules-heavy worktrees. The route
15
+ * surfaces `disk_walk_skip_dirs` in the GET response so clients can document
16
+ * the discrepancy with `du -sh`.
8
17
  */
9
18
 
10
- import { existsSync, readdirSync, readFileSync, statSync } from 'node:fs';
19
+ import { existsSync, readdirSync, readFileSync, writeFileSync } from 'node:fs';
20
+ import * as fsp from 'node:fs/promises';
11
21
  import { join } from 'node:path';
12
22
  import { Router } from 'express';
13
- import { removeWorktree } from './worktree-ops.js';
23
+ import { pruneWorktrees, removeWorktree } from './worktree-ops.js';
24
+
25
+ const CLEANUP_CONCURRENCY = 4;
26
+
27
+ /**
28
+ * Run an array of `{run_id, fn}` tasks with bounded concurrency.
29
+ * Tasks are expected to return a result object — but if one throws,
30
+ * the limiter converts the throw into an attributable failure result
31
+ * so a single bad task can't halt the rest of the batch.
32
+ */
33
+ async function runWithConcurrencyLimit(tasks, limit) {
34
+ const results = new Array(tasks.length);
35
+ let nextIdx = 0;
36
+ async function worker() {
37
+ while (nextIdx < tasks.length) {
38
+ const idx = nextIdx++;
39
+ const { run_id, fn } = tasks[idx];
40
+ try {
41
+ results[idx] = await fn();
42
+ } catch (err) {
43
+ results[idx] = {
44
+ run_id,
45
+ ok: false,
46
+ error: err?.message || String(err),
47
+ };
48
+ }
49
+ }
50
+ }
51
+ await Promise.all(
52
+ Array.from({ length: Math.min(limit, tasks.length) }, worker),
53
+ );
54
+ return results;
55
+ }
14
56
 
15
57
  const RESUMABLE_STATUSES = new Set(['failed', 'paused', 'cancelled']);
16
58
 
@@ -18,61 +60,103 @@ const RESUMABLE_STATUSES = new Set(['failed', 'paused', 'cancelled']);
18
60
  const _diskCache = new Map();
19
61
  const DISK_CACHE_TTL_MS = 30_000;
20
62
 
63
+ /**
64
+ * Directory names skipped during the disk walk. These are vendored or derived
65
+ * trees that dominate file count without changing the user's mental model of
66
+ * "project disk". Excluding them drops the walked file count by ~10–20× on
67
+ * typical worktrees and keeps `disk_bytes` focused on the project's own
68
+ * source files — closing the gap between "raw on-disk bytes" and "bytes I
69
+ * would actually free by cleaning up this worktree".
70
+ */
71
+ export const WALK_SKIP_DIRS = new Set([
72
+ 'node_modules',
73
+ '.git',
74
+ '.venv',
75
+ 'venv',
76
+ '__pycache__',
77
+ '.pytest_cache',
78
+ '.mypy_cache',
79
+ '.ruff_cache',
80
+ 'dist',
81
+ 'build',
82
+ '.next',
83
+ '.turbo',
84
+ '.cache',
85
+ ]);
86
+
21
87
  /**
22
88
  * Sum file sizes under a directory tree. Cross-platform: prior `du -sb`
23
89
  * relied on GNU coreutils and silently returned 0 on macOS / BSD du,
24
90
  * which is why the Worktrees view always showed "0 B".
25
91
  *
26
- * Skips symlinks (don't follow into other trees) and is bounded by
92
+ * Skips symlinks (don't follow into other trees), skips directory names in
93
+ * WALK_SKIP_DIRS (node_modules, .git, build/cache dirs), and is bounded by
27
94
  * MAX_WALK_FILES so a runaway directory can't hang the request.
95
+ * Override the cap with WORCA_DISK_WALK_MAX (positive integer); the
96
+ * raised default of 1M handles node_modules-heavy worktrees, but very
97
+ * large monorepos may still want a higher ceiling.
28
98
  * Errors on individual entries are swallowed so a transiently-locked
29
99
  * file doesn't poison the whole sum.
30
100
  */
31
- const MAX_WALK_FILES = 100_000;
32
- function _walkDirSize(rootPath) {
33
- let total = 0;
101
+ function _resolveWalkCap() {
102
+ const raw = process.env.WORCA_DISK_WALK_MAX;
103
+ if (raw) {
104
+ const n = Number.parseInt(raw, 10);
105
+ if (Number.isFinite(n) && n > 0) return n;
106
+ }
107
+ return 1_000_000;
108
+ }
109
+ const MAX_WALK_FILES = _resolveWalkCap();
110
+ export async function walkDirSize(rootPath, maxFiles = MAX_WALK_FILES) {
111
+ let bytes = 0;
34
112
  let count = 0;
35
113
  const stack = [rootPath];
36
- while (stack.length > 0 && count < MAX_WALK_FILES) {
114
+ while (stack.length > 0 && count < maxFiles) {
37
115
  const cur = stack.pop();
38
- let entries;
116
+ let dir;
39
117
  try {
40
- entries = readdirSync(cur, { withFileTypes: true });
118
+ dir = await fsp.opendir(cur);
41
119
  } catch {
42
120
  continue;
43
121
  }
44
- for (const e of entries) {
122
+ for await (const e of dir) {
45
123
  count++;
46
- if (count >= MAX_WALK_FILES) break;
124
+ if (count >= maxFiles) break;
47
125
  const child = join(cur, e.name);
48
126
  if (e.isSymbolicLink()) continue;
49
127
  if (e.isDirectory()) {
50
- stack.push(child);
128
+ if (!WALK_SKIP_DIRS.has(e.name)) stack.push(child);
51
129
  } else if (e.isFile()) {
52
130
  try {
53
- total += statSync(child).size;
131
+ const st = await fsp.stat(child);
132
+ bytes += st.size;
54
133
  } catch {
55
134
  /* ignore — file vanished mid-walk */
56
135
  }
57
136
  }
58
137
  }
59
138
  }
60
- return total;
139
+ return { bytes, truncated: count >= maxFiles };
61
140
  }
62
141
 
63
- function _getDiskBytes(worktreePath) {
142
+ async function _getDiskBytes(worktreePath) {
64
143
  const now = Date.now();
65
144
  const hit = _diskCache.get(worktreePath);
66
- if (hit && hit.expiry > now) return hit.bytes;
145
+ if (hit && hit.expiry > now)
146
+ return { bytes: hit.bytes, truncated: hit.truncated };
67
147
 
68
- let bytes = 0;
148
+ let result = { bytes: 0, truncated: false };
69
149
  try {
70
- bytes = _walkDirSize(worktreePath);
150
+ result = await walkDirSize(worktreePath);
71
151
  } catch {
72
- bytes = 0;
152
+ result = { bytes: 0, truncated: false };
73
153
  }
74
- _diskCache.set(worktreePath, { bytes, expiry: now + DISK_CACHE_TTL_MS });
75
- return bytes;
154
+ _diskCache.set(worktreePath, {
155
+ bytes: result.bytes,
156
+ truncated: result.truncated,
157
+ expiry: now + DISK_CACHE_TTL_MS,
158
+ });
159
+ return result;
76
160
  }
77
161
 
78
162
  /**
@@ -108,11 +192,84 @@ function _readWorktreeStatus(worktreePath) {
108
192
  return null;
109
193
  }
110
194
 
111
- function _listWorktrees(worcaDir) {
195
+ /**
196
+ * Run a pre-validated cleanup batch in the background. Each task stamps
197
+ * `cleanup_state: 'cleaning'`, calls `removeWorktree` (which deletes the
198
+ * registry entry on success), and on failure stamps `cleanup_error` while
199
+ * clearing `cleanup_state` so the UI can render the error and let the user
200
+ * retry. Concurrency is bounded by `CLEANUP_CONCURRENCY`.
201
+ */
202
+ async function _runCleanupBatch(worcaDir, accepted) {
203
+ const tasks = accepted.map(({ run_id, reg }) => ({
204
+ run_id,
205
+ fn: async () => {
206
+ _patchRegistry(worcaDir, run_id, { cleanup_state: 'cleaning' });
207
+ try {
208
+ await removeWorktree(worcaDir, run_id, { skipPrune: true });
209
+ if (reg.worktree_path) _diskCache.delete(reg.worktree_path);
210
+ return { run_id, ok: true };
211
+ } catch (err) {
212
+ _patchRegistry(worcaDir, run_id, {
213
+ cleanup_state: undefined,
214
+ cleanup_error: err?.message || String(err),
215
+ });
216
+ return { run_id, ok: false, error: err?.message || String(err) };
217
+ }
218
+ },
219
+ }));
220
+
221
+ try {
222
+ await runWithConcurrencyLimit(tasks, CLEANUP_CONCURRENCY);
223
+ } catch {
224
+ /* per-task failures already persisted into the registry */
225
+ }
226
+
227
+ try {
228
+ await pruneWorktrees(worcaDir);
229
+ } catch {
230
+ /* non-fatal */
231
+ }
232
+ }
233
+
234
+ /**
235
+ * Atomically patch fields on a pipelines.d/<run>.json entry.
236
+ * Set a field to `undefined` to delete it. Returns `false` if the file is
237
+ * gone (the worktree was already cleaned up) or unreadable.
238
+ *
239
+ * Note: write is not strictly atomic — for a single-writer-per-id model
240
+ * (the cleanup background task owns its registry entry for the lifetime
241
+ * of the cleanup), read-modify-write is fine. A multi-writer scenario
242
+ * would need rename-into-place; we don't have that here.
243
+ */
244
+ function _patchRegistry(worcaDir, runId, patch) {
245
+ const regFile = join(worcaDir, 'multi', 'pipelines.d', `${runId}.json`);
246
+ if (!existsSync(regFile)) return false;
247
+ let reg;
248
+ try {
249
+ reg = JSON.parse(readFileSync(regFile, 'utf8'));
250
+ } catch {
251
+ return false;
252
+ }
253
+ for (const [k, v] of Object.entries(patch)) {
254
+ if (v === undefined) delete reg[k];
255
+ else reg[k] = v;
256
+ }
257
+ try {
258
+ writeFileSync(regFile, JSON.stringify(reg, null, 2), 'utf8');
259
+ return true;
260
+ } catch {
261
+ return false;
262
+ }
263
+ }
264
+
265
+ async function _listWorktrees(worcaDir) {
112
266
  const pipelinesDir = join(worcaDir, 'multi', 'pipelines.d');
113
267
  if (!existsSync(pipelinesDir)) return [];
114
268
 
115
- const entries = [];
269
+ // Phase 1: cheap synchronous metadata (registry parse, status read).
270
+ // Phase 2: disk walks in parallel — without this, 13 worktrees serialize
271
+ // ~3s of awaits even when most results would have been disk-cache hits.
272
+ const metas = [];
116
273
  for (const file of readdirSync(pipelinesDir)) {
117
274
  if (!file.endsWith('.json')) continue;
118
275
 
@@ -127,7 +284,6 @@ function _listWorktrees(worcaDir) {
127
284
  const worktreePath = reg.worktree_path;
128
285
  const worktreeExists = existsSync(worktreePath);
129
286
 
130
- // Prefer actual status.json; fall back to registry field
131
287
  let status = reg.status || 'unknown';
132
288
  if (worktreeExists) {
133
289
  const actual = _readWorktreeStatus(worktreePath);
@@ -142,26 +298,44 @@ function _listWorktrees(worcaDir) {
142
298
  }
143
299
  }
144
300
 
145
- entries.push({
146
- run_id: reg.run_id || '',
147
- title: reg.title || '',
148
- branch: reg.branch || '',
149
- worktree_path: worktreePath,
150
- disk_bytes: worktreeExists ? _getDiskBytes(worktreePath) : 0,
151
- age_seconds: ageSeconds,
152
- // started_at lets the client sort with the same sortByStartDesc helper
153
- // used by run-list, keeping ordering consistent across views.
154
- started_at: reg.started_at || null,
301
+ metas.push({
302
+ reg,
303
+ worktreePath,
304
+ worktreeExists,
155
305
  status,
156
- removable: status !== 'running',
157
- fleet_id: reg.fleet_id || null,
158
- workspace_id: reg.workspace_id || null,
159
- group_type: reg.group_type || null,
160
- group_status: null, // populated by W-040 / W-047
161
- resumable: RESUMABLE_STATUSES.has(status),
306
+ ageSeconds,
307
+ cleanup_state: reg.cleanup_state || null,
308
+ cleanup_error: reg.cleanup_error || null,
162
309
  });
163
310
  }
164
- return entries;
311
+
312
+ const disks = await Promise.all(
313
+ metas.map((m) =>
314
+ m.worktreeExists
315
+ ? _getDiskBytes(m.worktreePath)
316
+ : Promise.resolve({ bytes: 0, truncated: false }),
317
+ ),
318
+ );
319
+
320
+ return metas.map((m, i) => ({
321
+ run_id: m.reg.run_id || '',
322
+ title: m.reg.title || '',
323
+ branch: m.reg.branch || '',
324
+ worktree_path: m.worktreePath,
325
+ disk_bytes: disks[i].bytes,
326
+ truncated: disks[i].truncated,
327
+ age_seconds: m.ageSeconds,
328
+ started_at: m.reg.started_at || null,
329
+ status: m.status,
330
+ removable: m.status !== 'running',
331
+ fleet_id: m.reg.fleet_id || null,
332
+ workspace_id: m.reg.workspace_id || null,
333
+ group_type: m.reg.group_type || null,
334
+ group_status: null,
335
+ resumable: RESUMABLE_STATUSES.has(m.status),
336
+ cleanup_state: m.cleanup_state,
337
+ cleanup_error: m.cleanup_error,
338
+ }));
165
339
  }
166
340
 
167
341
  const RUN_ID_RE = /^[a-zA-Z0-9_-]+$/;
@@ -182,7 +356,7 @@ export function createWorktreesRouter() {
182
356
  const router = Router({ mergeParams: true });
183
357
 
184
358
  // GET /worktrees
185
- router.get('/', (req, res) => {
359
+ router.get('/', async (req, res) => {
186
360
  const worcaDir = req.project?.worcaDir;
187
361
  if (!worcaDir) {
188
362
  return res
@@ -190,15 +364,21 @@ export function createWorktreesRouter() {
190
364
  .json({ ok: false, error: 'worcaDir not configured' });
191
365
  }
192
366
  try {
193
- const worktrees = _listWorktrees(worcaDir);
194
- res.json({ ok: true, worktrees });
367
+ const worktrees = await _listWorktrees(worcaDir);
368
+ res.json({
369
+ ok: true,
370
+ worktrees,
371
+ // Documents the semantics shift in `disk_bytes` (project files only).
372
+ // Clients can render this as a caveat next to disk totals.
373
+ disk_walk_skip_dirs: [...WALK_SKIP_DIRS],
374
+ });
195
375
  } catch (err) {
196
376
  res.status(500).json({ ok: false, error: err.message });
197
377
  }
198
378
  });
199
379
 
200
380
  // DELETE /worktrees/:run_id
201
- router.delete('/:run_id', (req, res) => {
381
+ router.delete('/:run_id', async (req, res) => {
202
382
  const worcaDir = req.project?.worcaDir;
203
383
  if (!worcaDir) {
204
384
  return res
@@ -261,12 +441,127 @@ export function createWorktreesRouter() {
261
441
  });
262
442
  }
263
443
 
264
- removeWorktree(worcaDir, run_id);
444
+ await removeWorktree(worcaDir, run_id);
445
+ if (reg.worktree_path) _diskCache.delete(reg.worktree_path);
265
446
  res.json({ ok: true, run_id });
266
447
  } catch (err) {
267
448
  res.status(500).json({ ok: false, error: err.message });
268
449
  }
269
450
  });
270
451
 
452
+ // POST /worktrees/cleanup
453
+ //
454
+ // Batch worktree removal — async. Synchronously validates each id and
455
+ // stamps `cleanup_state: 'pending'` on the registry entries that pass
456
+ // pre-flight checks, then returns 202. The actual removal happens in
457
+ // the background with bounded concurrency. Clients poll GET /worktrees
458
+ // and observe `cleanup_state` per entry; on success the entry vanishes,
459
+ // on failure `cleanup_error` is set and `cleanup_state` is cleared.
460
+ //
461
+ // Response shape `{ ok, accepted, rejected }` where `rejected[]` carries
462
+ // entries that failed pre-flight (running, resumable without force, etc).
463
+ // A single bad id never blocks the rest of the batch; this stays
464
+ // compatible with the legacy synchronous shape's promise that partial
465
+ // failures are not signalled via HTTP status.
466
+ router.post('/cleanup', (req, res) => {
467
+ const worcaDir = req.project?.worcaDir;
468
+ if (!worcaDir) {
469
+ return res
470
+ .status(501)
471
+ .json({ ok: false, error: 'worcaDir not configured' });
472
+ }
473
+
474
+ const { run_ids, force = false } = req.body || {};
475
+ if (!Array.isArray(run_ids) || run_ids.length === 0) {
476
+ return res
477
+ .status(400)
478
+ .json({ ok: false, error: 'run_ids must be a non-empty array' });
479
+ }
480
+ for (const id of run_ids) {
481
+ if (!_validateRunId(id)) {
482
+ return res
483
+ .status(400)
484
+ .json({ ok: false, error: `Invalid run ID: ${id}` });
485
+ }
486
+ }
487
+
488
+ // Pre-flight: read each registry entry, decide pending vs reject. We do
489
+ // this synchronously so the HTTP response can carry the rejection list
490
+ // — clients shouldn't have to poll to learn that a 'running' worktree
491
+ // was refused.
492
+ const accepted = [];
493
+ const rejected = [];
494
+ for (const run_id of run_ids) {
495
+ const regFile = join(worcaDir, 'multi', 'pipelines.d', `${run_id}.json`);
496
+ if (!existsSync(regFile)) {
497
+ rejected.push({
498
+ run_id,
499
+ ok: false,
500
+ error: `Worktree "${run_id}" not found`,
501
+ });
502
+ continue;
503
+ }
504
+ let reg;
505
+ try {
506
+ reg = JSON.parse(readFileSync(regFile, 'utf8'));
507
+ } catch {
508
+ rejected.push({
509
+ run_id,
510
+ ok: false,
511
+ error: 'Failed to read registry entry',
512
+ });
513
+ continue;
514
+ }
515
+
516
+ let status = reg.status || 'unknown';
517
+ if (reg.worktree_path && existsSync(reg.worktree_path)) {
518
+ const actual = _readWorktreeStatus(reg.worktree_path);
519
+ if (actual) status = actual;
520
+ }
521
+
522
+ if (status === 'running') {
523
+ rejected.push({
524
+ run_id,
525
+ ok: false,
526
+ error: 'Cannot remove a running worktree',
527
+ code: 'running',
528
+ });
529
+ continue;
530
+ }
531
+
532
+ const isResumable = RESUMABLE_STATUSES.has(status);
533
+ const isGrouped = !!(reg.fleet_id || reg.workspace_id);
534
+ if (!force && (isResumable || isGrouped)) {
535
+ rejected.push({
536
+ run_id,
537
+ ok: false,
538
+ error:
539
+ 'Removing this worktree prevents resuming the run. Pass force=true to confirm.',
540
+ code: 'resumable_or_grouped',
541
+ });
542
+ continue;
543
+ }
544
+
545
+ // Stamp pending so a reload mid-cleanup shows the same state.
546
+ _patchRegistry(worcaDir, run_id, {
547
+ cleanup_state: 'pending',
548
+ cleanup_error: undefined,
549
+ });
550
+ accepted.push({ run_id, reg });
551
+ }
552
+
553
+ // Respond immediately — the client polls GET /worktrees to observe progress.
554
+ res.status(202).json({
555
+ ok: rejected.length === 0,
556
+ accepted: accepted.map((a) => a.run_id),
557
+ rejected,
558
+ });
559
+
560
+ // Fire-and-forget background removal. Errors are persisted into the
561
+ // registry so the client can render them; nothing here is awaited by
562
+ // the HTTP request.
563
+ void _runCleanupBatch(worcaDir, accepted);
564
+ });
565
+
271
566
  return router;
272
567
  }