@worca/ui 0.41.0 → 0.43.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,553 @@
1
+ /**
2
+ * File-access aggregator — reads pipeline.iteration.access events from a
3
+ * run's events.jsonl and folds payloads into the row/column model used by
4
+ * the Access Map view.
5
+ *
6
+ * Output shape:
7
+ * { enabled: false } — no access events (pre-W-064 run)
8
+ * { enabled: true, columns, tree, searches, summary }
9
+ *
10
+ * Columns: stage-ordered (STAGE_ORDER), then ascending iteration, then
11
+ * bead_id (nulls first, then lexicographic).
12
+ *
13
+ * Tree: union of reads∪writes paths, hierarchical dir/file nodes. Dir rows
14
+ * carry server-side rollups so the browser never recomputes aggregates.
15
+ *
16
+ * Searches: flat list of per-event search records with broad/zero_hit flags.
17
+ *
18
+ * GraphQueries: flat list of per-event knowledge-graph queries (graphify / CRG)
19
+ * with engine, op, target, and zero_hit flags. Empty unless the run used a
20
+ * graph engine.
21
+ *
22
+ * Summary: global aggregates. oracle:"degraded" if ANY event was degraded.
23
+ *
24
+ * Pattern: mirrors dispatch-events-aggregator.js.
25
+ */
26
+
27
+ import { existsSync, readdirSync, readFileSync } from 'node:fs';
28
+ import { join, resolve } from 'node:path';
29
+ import { STAGE_ORDER } from '../app/utils/stage-order.js';
30
+
31
+ const ACCESS_EVENT_TYPE = 'pipeline.iteration.access';
32
+
33
+ // Access fragment filename: `<stage>-<iter>.jsonl` or `<stage>-<iter>-<bead>.jsonl`.
34
+ // Stage keys never contain a hyphen (plan, coordinate, implement, plan_review…);
35
+ // iteration is digits; bead ids may contain hyphens, so they soak up the rest.
36
+ const FRAGMENT_NAME_RE = /^([a-z_]+)-(\d+)(?:-(.+))?\.jsonl$/;
37
+
38
+ /**
39
+ * Build the Access Map model from a run's events.jsonl.
40
+ *
41
+ * The completed iterations come from `pipeline.iteration.access` events (the
42
+ * runner's authoritative completion-time aggregation). When `runDir` is given,
43
+ * the still-running iteration is folded in LIVE by reading its on-disk access
44
+ * fragment directly — so the map, searches and graph-queries populate during
45
+ * the stage instead of only at completion. A live column is never double-counted
46
+ * once its completion event lands (the completion payload wins by colKey), and
47
+ * capture-integrity (leakage/oracle) stays pending for live columns since it's
48
+ * only computable from the finished iteration.
49
+ *
50
+ * @param {string} eventsPath — absolute path to events.jsonl
51
+ * @param {string|null} runDir — run directory (enables live fragment folding)
52
+ * @returns {{ enabled: false } | { enabled: true, columns, tree, searches, summary }}
53
+ */
54
+ export function buildFileAccessModel(eventsPath, runDir = null) {
55
+ // Parse completion-time access events (authoritative for finished iterations).
56
+ const accessPayloads = [];
57
+ if (eventsPath && existsSync(eventsPath)) {
58
+ let content = '';
59
+ try {
60
+ content = readFileSync(eventsPath, 'utf8');
61
+ } catch {
62
+ content = '';
63
+ }
64
+ for (const line of content.split('\n')) {
65
+ if (!line.trim()) continue;
66
+ let e;
67
+ try {
68
+ e = JSON.parse(line);
69
+ } catch {
70
+ continue;
71
+ }
72
+ if (e.event_type !== ACCESS_EVENT_TYPE) continue;
73
+ if (!e.payload) continue;
74
+ accessPayloads.push(e.payload);
75
+ }
76
+ }
77
+
78
+ // Fold the still-running iteration's fragment(s) in live — skipping any
79
+ // column that already has an authoritative completion event.
80
+ const completedCols = new Set(
81
+ accessPayloads.map((p) => colKey(p.stage, p.iteration, p.bead_id)),
82
+ );
83
+ const livePayloads = runDir
84
+ ? readLiveFragmentPayloads(runDir, completedCols)
85
+ : [];
86
+ accessPayloads.push(...livePayloads);
87
+
88
+ if (accessPayloads.length === 0) return { enabled: false };
89
+
90
+ // ------------------------------------------------------------------
91
+ // 1. Build columns (deduplicated, sorted)
92
+ // ------------------------------------------------------------------
93
+ const colMap = new Map();
94
+ for (const p of accessPayloads) {
95
+ const key = colKey(p.stage, p.iteration, p.bead_id);
96
+ if (!colMap.has(key)) {
97
+ colMap.set(key, {
98
+ key,
99
+ stage: p.stage,
100
+ iteration: p.iteration,
101
+ bead_id: p.bead_id ?? null,
102
+ agent: p.agent,
103
+ live: !!p._live,
104
+ });
105
+ }
106
+ }
107
+
108
+ const columns = [...colMap.values()].sort(compareColumns);
109
+
110
+ // ------------------------------------------------------------------
111
+ // 2. Fold payloads into per-file data and searches
112
+ // ------------------------------------------------------------------
113
+ // fileData: path → { cells: { colKey: { read?, write? } }, tracked }
114
+ const fileData = new Map();
115
+
116
+ const searches = [];
117
+ const graphQueries = [];
118
+ let oracleDegraded = false;
119
+
120
+ const summary = {
121
+ files_touched: 0,
122
+ distinct_read: 0,
123
+ total_read: 0,
124
+ distinct_write: 0,
125
+ total_write: 0,
126
+ searches: 0,
127
+ grep: 0,
128
+ glob: 0,
129
+ zero_result: 0,
130
+ root_scoped: 0,
131
+ graph_queries: 0,
132
+ graphify: 0,
133
+ crg: 0,
134
+ leakage_pct_max: 0,
135
+ oracle: 'ok',
136
+ };
137
+
138
+ for (const p of accessPayloads) {
139
+ const ck = colKey(p.stage, p.iteration, p.bead_id);
140
+ const fa = p.file_access || {};
141
+
142
+ for (const [path, count] of Object.entries(fa.reads || {})) {
143
+ const fd = ensureFile(fileData, path);
144
+ if (!fd.cells[ck]) fd.cells[ck] = {};
145
+ fd.cells[ck].read = (fd.cells[ck].read || 0) + count;
146
+ }
147
+
148
+ for (const [path, count] of Object.entries(fa.writes || {})) {
149
+ const fd = ensureFile(fileData, path);
150
+ if (!fd.cells[ck]) fd.cells[ck] = {};
151
+ fd.cells[ck].write = (fd.cells[ck].write || 0) + count;
152
+ }
153
+
154
+ for (const s of fa.searches || []) {
155
+ const isBroad = s.scope === '.' || s.scope === '';
156
+ searches.push({
157
+ colKey: ck,
158
+ stage: p.stage,
159
+ iteration: p.iteration,
160
+ tool: s.tool,
161
+ pattern: s.pattern,
162
+ scope: s.scope,
163
+ result_count: s.result_count,
164
+ broad: isBroad,
165
+ zero_hit: s.result_count === 0,
166
+ filter: s.filter ?? null,
167
+ });
168
+ }
169
+
170
+ const cap = fa.capture || {};
171
+ if (cap.oracle === 'degraded') oracleDegraded = true;
172
+ if (cap.leakage_pct != null && cap.leakage_pct > summary.leakage_pct_max) {
173
+ summary.leakage_pct_max = cap.leakage_pct;
174
+ }
175
+
176
+ summary.searches += (fa.searches || []).length;
177
+ for (const s of fa.searches || []) {
178
+ if (s.tool === 'Grep') summary.grep++;
179
+ if (s.tool === 'Glob') summary.glob++;
180
+ if (s.result_count === 0) summary.zero_result++;
181
+ if (s.scope === '.' || s.scope === '') summary.root_scoped++;
182
+ }
183
+
184
+ // Knowledge-graph queries (graphify / CRG) — structural/semantic lookups
185
+ // recorded alongside the lexical searches above. We only surface fields we
186
+ // can reliably capture from both engines: the engine, the op (graphify
187
+ // subcommand / CRG MCP tool name), and the verbatim query/args. Result
188
+ // counts and a separate "target" are op-dependent and not reliably
189
+ // available, so they are intentionally not collected.
190
+ for (const g of fa.graph_queries || []) {
191
+ graphQueries.push({
192
+ colKey: ck,
193
+ stage: p.stage,
194
+ iteration: p.iteration,
195
+ engine: g.engine,
196
+ op: g.op,
197
+ query: g.query,
198
+ });
199
+ summary.graph_queries++;
200
+ if (g.engine === 'graphify') summary.graphify++;
201
+ if (g.engine === 'crg') summary.crg++;
202
+ }
203
+ }
204
+
205
+ // Compute global file-level aggregates from the folded fileData.
206
+ for (const fd of fileData.values()) {
207
+ let fileRead = 0;
208
+ let fileWrite = 0;
209
+ for (const cell of Object.values(fd.cells)) {
210
+ fileRead += cell.read || 0;
211
+ fileWrite += cell.write || 0;
212
+ }
213
+ if (fileRead > 0) {
214
+ summary.distinct_read++;
215
+ summary.total_read += fileRead;
216
+ }
217
+ if (fileWrite > 0) {
218
+ summary.distinct_write++;
219
+ summary.total_write += fileWrite;
220
+ }
221
+ }
222
+
223
+ summary.files_touched = fileData.size;
224
+ if (oracleDegraded) summary.oracle = 'degraded';
225
+
226
+ // ------------------------------------------------------------------
227
+ // 3. Build hierarchical tree with server-side dir rollups
228
+ // ------------------------------------------------------------------
229
+ const tree = buildTree(fileData);
230
+
231
+ return { enabled: true, columns, tree, searches, graphQueries, summary };
232
+ }
233
+
234
+ // ---------------------------------------------------------------------------
235
+ // Helpers
236
+ // ---------------------------------------------------------------------------
237
+
238
+ function colKey(stage, iteration, beadId) {
239
+ return beadId ? `${stage}:${iteration}:${beadId}` : `${stage}:${iteration}`;
240
+ }
241
+
242
+ /**
243
+ * Read access fragments under runDir/access/ and synthesise per-iteration
244
+ * payloads (same shape as a pipeline.iteration.access event's payload) for the
245
+ * still-running iterations — i.e. any fragment whose column has no completion
246
+ * event yet. Mirrors the Python aggregation in file_access_aggregation.py,
247
+ * minus the GitPathOracle respelling (paths are repo-root-relativised here as a
248
+ * live approximation; the completion event respells authoritatively).
249
+ *
250
+ * @param {string} runDir
251
+ * @param {Set<string>} completedCols — colKeys that already have a completion event
252
+ * @returns {Array<object>} synthetic access payloads with `_live: true`
253
+ */
254
+ function readLiveFragmentPayloads(runDir, completedCols) {
255
+ const accessDir = join(runDir, 'access');
256
+ if (!existsSync(accessDir)) return [];
257
+ // runDir is `<repoRoot>/.worca/runs/<id>` → repoRoot is three levels up.
258
+ const repoRoot = resolve(runDir, '..', '..', '..');
259
+
260
+ let files;
261
+ try {
262
+ files = readdirSync(accessDir);
263
+ } catch {
264
+ return [];
265
+ }
266
+
267
+ const payloads = [];
268
+ for (const fname of files) {
269
+ const m = FRAGMENT_NAME_RE.exec(fname);
270
+ if (!m) continue;
271
+ const stage = m[1];
272
+ const iteration = Number(m[2]);
273
+ const bead_id = m[3] || null;
274
+ if (completedCols.has(colKey(stage, iteration, bead_id))) continue;
275
+
276
+ let records;
277
+ try {
278
+ records = readFileSync(join(accessDir, fname), 'utf8')
279
+ .split('\n')
280
+ .filter((l) => l.trim())
281
+ .map((l) => {
282
+ try {
283
+ return JSON.parse(l);
284
+ } catch {
285
+ return null;
286
+ }
287
+ })
288
+ .filter(Boolean);
289
+ } catch {
290
+ continue;
291
+ }
292
+ if (records.length === 0) continue;
293
+
294
+ payloads.push({
295
+ stage,
296
+ iteration,
297
+ bead_id,
298
+ agent: null,
299
+ _live: true,
300
+ file_access: fragmentRecordsToFileAccess(records, repoRoot),
301
+ });
302
+ }
303
+ return payloads;
304
+ }
305
+
306
+ /**
307
+ * Fold raw access-fragment records into the `file_access` shape the model
308
+ * builder consumes. Capture is left empty — leakage/oracle are only computable
309
+ * from the finished iteration, so they stay pending for a live column.
310
+ */
311
+ function fragmentRecordsToFileAccess(records, repoRoot) {
312
+ const reads = {};
313
+ const writes = {};
314
+ const searches = [];
315
+ const graph_queries = [];
316
+
317
+ for (const r of records) {
318
+ switch (r.op) {
319
+ case 'read': {
320
+ const p = canonicalizePath(r.path, repoRoot);
321
+ if (p) reads[p] = (reads[p] || 0) + 1;
322
+ break;
323
+ }
324
+ case 'write': {
325
+ const p = canonicalizePath(r.path, repoRoot);
326
+ if (p) writes[p] = (writes[p] || 0) + 1;
327
+ break;
328
+ }
329
+ case 'search': {
330
+ let scope = r.scope || '';
331
+ if (!scope || scope === '.') scope = '.';
332
+ const entry = {
333
+ tool: r.tool,
334
+ pattern: (r.pattern || '').slice(0, 200),
335
+ scope,
336
+ result_count: r.result_count ?? 0,
337
+ };
338
+ if ('filter' in r) entry.filter = r.filter;
339
+ searches.push(entry);
340
+ break;
341
+ }
342
+ case 'graph_query': {
343
+ if (r.engine === 'graphify' || r.engine === 'crg') {
344
+ graph_queries.push({
345
+ engine: r.engine,
346
+ op: r.graph_op || '',
347
+ query: (r.query || '').slice(0, 200),
348
+ });
349
+ }
350
+ break;
351
+ }
352
+ default:
353
+ break;
354
+ }
355
+ }
356
+
357
+ return { reads, writes, searches, graph_queries, capture: {} };
358
+ }
359
+
360
+ /**
361
+ * Relativise an absolute fragment path against the repo root (a live stand-in
362
+ * for the Python GitPathOracle respelling). Paths already relative, or outside
363
+ * the repo, are returned unchanged; the repo root itself maps to null.
364
+ *
365
+ * Tolerant recovery: when ``repoRoot`` is a worktree under
366
+ * ``<project>/.worktrees/<id>`` and ``rawPath`` is an absolute path pointing
367
+ * at a sibling clone of the same project (e.g. the main checkout), strip the
368
+ * prefix up to and including the project basename so the entry groups under
369
+ * the project tree instead of rendering the full absolute path. Mirrors the
370
+ * Python ``_recover_basename_tail`` in ``path_canon.py`` so live and
371
+ * completion-time views agree.
372
+ */
373
+ function canonicalizePath(rawPath, repoRoot) {
374
+ if (!rawPath) return null;
375
+ if (repoRoot && rawPath.startsWith(`${repoRoot}/`)) {
376
+ return rawPath.slice(repoRoot.length + 1);
377
+ }
378
+ if (rawPath === repoRoot) return null;
379
+ return recoverWorktreeBasenameTail(rawPath, repoRoot) ?? rawPath;
380
+ }
381
+
382
+ /**
383
+ * Recover repo-relative tail from a raw absolute path that fell outside a
384
+ * worktree root. Returns null when the heuristic doesn't apply so the caller
385
+ * can fall back to the raw path unchanged.
386
+ */
387
+ function recoverWorktreeBasenameTail(rawPath, repoRoot) {
388
+ if (!rawPath || !repoRoot) return null;
389
+ const rootParts = repoRoot.split('/');
390
+ const wtIdx = rootParts.indexOf('.worktrees');
391
+ if (wtIdx <= 0) return null;
392
+ const projectBasename = rootParts[wtIdx - 1];
393
+ if (!projectBasename) return null;
394
+ const rawParts = rawPath.split('/');
395
+ // Scan from the right — the tail closest to the leaf is the most likely
396
+ // intended target when the basename appears more than once.
397
+ for (let i = rawParts.length - 1; i >= 0; i--) {
398
+ if (rawParts[i] === projectBasename) {
399
+ const tail = rawParts.slice(i + 1).join('/');
400
+ return tail || null;
401
+ }
402
+ }
403
+ return null;
404
+ }
405
+
406
+ function ensureFile(fileData, path) {
407
+ if (!fileData.has(path)) {
408
+ fileData.set(path, { cells: {}, tracked: true });
409
+ }
410
+ return fileData.get(path);
411
+ }
412
+
413
+ function compareColumns(a, b) {
414
+ const ai = STAGE_ORDER.indexOf(a.stage);
415
+ const bi = STAGE_ORDER.indexOf(b.stage);
416
+ const stageA = ai === -1 ? 999 : ai;
417
+ const stageB = bi === -1 ? 999 : bi;
418
+ if (stageA !== stageB) return stageA - stageB;
419
+ if (a.iteration !== b.iteration) return a.iteration - b.iteration;
420
+ // nulls first
421
+ if (a.bead_id === null && b.bead_id !== null) return -1;
422
+ if (a.bead_id !== null && b.bead_id === null) return 1;
423
+ if (a.bead_id === b.bead_id) return 0;
424
+ return a.bead_id < b.bead_id ? -1 : 1;
425
+ }
426
+
427
+ /**
428
+ * Build a hierarchical dir/file tree from the flat fileData map.
429
+ * Dir nodes carry rolled-up totals and cells aggregated from children.
430
+ *
431
+ * @param {Map<string, {cells, tracked}>} fileData
432
+ * @returns {Array<TreeNode>}
433
+ */
434
+ function buildTree(fileData) {
435
+ // Root sentinel — not emitted, just holds top-level children.
436
+ const root = {
437
+ children: new Map(),
438
+ cells: {},
439
+ totals: { read: 0, write: 0 },
440
+ };
441
+
442
+ for (const [path, fd] of fileData) {
443
+ const parts = path.split('/');
444
+ let node = root;
445
+
446
+ // Walk/create intermediate dir nodes.
447
+ for (let i = 0; i < parts.length - 1; i++) {
448
+ const name = parts[i];
449
+ if (!node.children.has(name)) {
450
+ const dirPath = parts.slice(0, i + 1).join('/');
451
+ node.children.set(name, {
452
+ type: 'dir',
453
+ path: dirPath,
454
+ name,
455
+ children: new Map(),
456
+ cells: {},
457
+ totals: { read: 0, write: 0 },
458
+ });
459
+ }
460
+ node = node.children.get(name);
461
+ }
462
+
463
+ // Place the file leaf.
464
+ const fileName = parts[parts.length - 1];
465
+ const fileTotals = { read: 0, write: 0 };
466
+ for (const cell of Object.values(fd.cells)) {
467
+ fileTotals.read += cell.read || 0;
468
+ fileTotals.write += cell.write || 0;
469
+ }
470
+ const category =
471
+ fileTotals.write > 0 ? (fd.tracked ? 'write' : 'leaked') : 'read';
472
+
473
+ node.children.set(fileName, {
474
+ type: 'file',
475
+ path,
476
+ name: fileName,
477
+ tracked: fd.tracked,
478
+ category,
479
+ cells: fd.cells,
480
+ totals: fileTotals,
481
+ });
482
+ }
483
+
484
+ // Rollup dir totals and cells bottom-up, then serialise to arrays.
485
+ rollupDir(root);
486
+ return collapseSingleChildDirs(
487
+ [...root.children.values()].map(serializeNode),
488
+ );
489
+ }
490
+
491
+ /**
492
+ * Collapse chains of single-child intermediate directories into one synthetic
493
+ * node. Without this, a stray absolute path (e.g. /Volumes/Apps/dev/.../foo.js)
494
+ * renders as a six-deep chain of single-row dirs that buries the filename.
495
+ *
496
+ * Rule: walk bottom-up, then while a dir's only child is also a dir, merge:
497
+ * adopt the child's children/cells/totals (which equal the parent's, since
498
+ * the parent rolled up from the single child) and visually concatenate the
499
+ * names so the collapsed segments stay visible without burning rows.
500
+ *
501
+ * The collapsed node keeps the deeper child's ``path`` so per-file drawer
502
+ * lookups (which key on the file's full path) continue to work.
503
+ */
504
+ function collapseSingleChildDirs(nodes) {
505
+ return nodes.map(collapseNode);
506
+ }
507
+
508
+ function collapseNode(node) {
509
+ if (node.type !== 'dir') return node;
510
+ // Recurse first so the rule applies bottom-up.
511
+ let current = { ...node, children: node.children.map(collapseNode) };
512
+ while (current.children.length === 1 && current.children[0].type === 'dir') {
513
+ const child = current.children[0];
514
+ current = {
515
+ ...current,
516
+ path: child.path,
517
+ name: `${current.name}/${child.name}`,
518
+ children: child.children,
519
+ // cells/totals of a parent with a single dir child equal the child's
520
+ // (rollup invariant), so adopting the child's keeps the row identical
521
+ // to what it would have rendered before the collapse.
522
+ cells: child.cells,
523
+ totals: child.totals,
524
+ };
525
+ }
526
+ return current;
527
+ }
528
+
529
+ function rollupDir(node) {
530
+ if (node.type === 'file') return;
531
+ for (const child of node.children.values()) {
532
+ rollupDir(child);
533
+ node.totals.read += child.totals.read;
534
+ node.totals.write += child.totals.write;
535
+ for (const [ck, cell] of Object.entries(child.cells)) {
536
+ if (!node.cells[ck]) node.cells[ck] = {};
537
+ node.cells[ck].read = (node.cells[ck].read || 0) + (cell.read || 0);
538
+ node.cells[ck].write = (node.cells[ck].write || 0) + (cell.write || 0);
539
+ }
540
+ }
541
+ }
542
+
543
+ function serializeNode(node) {
544
+ if (node.type === 'file') return node;
545
+ return {
546
+ type: 'dir',
547
+ path: node.path,
548
+ name: node.name,
549
+ children: [...node.children.values()].map(serializeNode),
550
+ cells: node.cells,
551
+ totals: node.totals,
552
+ };
553
+ }