@smithers-orchestrator/time-travel 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/LICENSE +21 -0
  2. package/package.json +68 -0
  3. package/src/BranchInfo.ts +11 -0
  4. package/src/ForkParams.ts +11 -0
  5. package/src/JUMP_MAX_FRAME_NO.js +1 -0
  6. package/src/JUMP_RUN_ID_PATTERN.js +1 -0
  7. package/src/JumpResult.ts +9 -0
  8. package/src/JumpStepName.ts +10 -0
  9. package/src/JumpToFrameError.js +23 -0
  10. package/src/JumpToFrameInput.ts +36 -0
  11. package/src/NodeChange.ts +7 -0
  12. package/src/NodeSnapshot.ts +8 -0
  13. package/src/OutputChange.ts +5 -0
  14. package/src/ParsedSnapshot.ts +18 -0
  15. package/src/REWIND_RATE_LIMIT_MAX.js +1 -0
  16. package/src/REWIND_RATE_LIMIT_WINDOW_MS.js +1 -0
  17. package/src/RalphChange.ts +7 -0
  18. package/src/RalphSnapshot.ts +5 -0
  19. package/src/ReplayParams.ts +12 -0
  20. package/src/ReplayResult.ts +11 -0
  21. package/src/RetryTaskOptions.ts +10 -0
  22. package/src/RetryTaskResult.ts +5 -0
  23. package/src/RevertOptions.ts +9 -0
  24. package/src/RevertResult.ts +5 -0
  25. package/src/RewindAuditResult.ts +5 -0
  26. package/src/RewindLockHandle.ts +4 -0
  27. package/src/RunTimeline.ts +11 -0
  28. package/src/SnapshotDiff.ts +18 -0
  29. package/src/TimeTravelOptions.ts +11 -0
  30. package/src/TimeTravelResult.ts +7 -0
  31. package/src/TimelineFrame.ts +11 -0
  32. package/src/TimelineTree.ts +9 -0
  33. package/src/acquireRewindLock.js +32 -0
  34. package/src/countRecentRewindAuditRows.js +27 -0
  35. package/src/diff.js +189 -0
  36. package/src/evaluateRewindRateLimit.js +41 -0
  37. package/src/fork/_helpers.js +28 -0
  38. package/src/fork/forkRunEffect.js +147 -0
  39. package/src/fork/getBranchInfoEffect.js +26 -0
  40. package/src/fork/index.js +41 -0
  41. package/src/fork/listBranchesEffect.js +25 -0
  42. package/src/hasRewindLock.js +11 -0
  43. package/src/index.d.ts +1170 -0
  44. package/src/index.js +43 -0
  45. package/src/jumpToFrame.js +1077 -0
  46. package/src/listRewindAuditRows.js +83 -0
  47. package/src/metrics.js +4 -0
  48. package/src/recoverInProgressRewindAudits.js +72 -0
  49. package/src/replay.js +22 -0
  50. package/src/replayFromCheckpointEffect.js +59 -0
  51. package/src/replaysStarted.js +2 -0
  52. package/src/resetRewindLocksForTests.js +8 -0
  53. package/src/resolveRewindAuditClient.js +38 -0
  54. package/src/retry-task.js +215 -0
  55. package/src/revert.js +68 -0
  56. package/src/rewindAudit.js +9 -0
  57. package/src/rewindLock.js +7 -0
  58. package/src/rewindLockStore.js +8 -0
  59. package/src/rewindRateLimit.js +3 -0
  60. package/src/runForksCreated.js +2 -0
  61. package/src/schema.js +46 -0
  62. package/src/snapshot/Snapshot.ts +15 -0
  63. package/src/snapshot/SnapshotData.ts +19 -0
  64. package/src/snapshot/captureSnapshotEffect.js +70 -0
  65. package/src/snapshot/index.js +57 -0
  66. package/src/snapshot/listSnapshotsEffect.js +32 -0
  67. package/src/snapshot/loadSnapshotEffect.js +46 -0
  68. package/src/snapshot/parseSnapshot.js +31 -0
  69. package/src/snapshotDuration.js +7 -0
  70. package/src/snapshotsCaptured.js +2 -0
  71. package/src/timeline/_helpers.js +7 -0
  72. package/src/timeline/buildTimelineEffect.js +38 -0
  73. package/src/timeline/buildTimelineTreeEffect.js +30 -0
  74. package/src/timeline/formatTimelineAsJson.js +23 -0
  75. package/src/timeline/formatTimelineForTui.js +31 -0
  76. package/src/timeline/index.js +31 -0
  77. package/src/timetravel.js +247 -0
  78. package/src/types.ts +15 -0
  79. package/src/updateRewindAuditRow.js +35 -0
  80. package/src/validateJumpFrameNo.js +23 -0
  81. package/src/validateJumpRunId.js +18 -0
  82. package/src/vcs-version/VcsTag.ts +9 -0
  83. package/src/vcs-version/index.js +61 -0
  84. package/src/vcs-version/loadVcsTagEffect.js +27 -0
  85. package/src/vcs-version/rerunAtRevisionEffect.js +25 -0
  86. package/src/vcs-version/resolveWorkflowAtRevisionEffect.js +32 -0
  87. package/src/vcs-version/tagSnapshotVcsEffect.js +56 -0
  88. package/src/writeRewindAuditRow.js +46 -0
@@ -0,0 +1,1077 @@
1
+ import { Effect, Metric } from "effect";
2
+ import * as BunContext from "@effect/platform-bun/BunContext";
3
+ import { getJjPointer, revertToJjPointer } from "@smithers-orchestrator/vcs/jj";
4
+ import {
5
+ rewindTotal,
6
+ rewindRollbackTotal,
7
+ rewindDurationMs,
8
+ rewindFramesDeleted,
9
+ rewindSandboxesReverted,
10
+ } from "@smithers-orchestrator/observability/metrics";
11
+ import { JUMP_RUN_ID_PATTERN } from "./JUMP_RUN_ID_PATTERN.js";
12
+ import { JUMP_MAX_FRAME_NO } from "./JUMP_MAX_FRAME_NO.js";
13
+ import { JumpToFrameError } from "./JumpToFrameError.js";
14
+ import { validateJumpRunId } from "./validateJumpRunId.js";
15
+ import { validateJumpFrameNo } from "./validateJumpFrameNo.js";
16
+ import { acquireRewindLock } from "./acquireRewindLock.js";
17
+ import { evaluateRewindRateLimit } from "./evaluateRewindRateLimit.js";
18
+ import { writeRewindAuditRow } from "./writeRewindAuditRow.js";
19
+ import { updateRewindAuditRow } from "./updateRewindAuditRow.js";
20
+
21
+ export { JUMP_RUN_ID_PATTERN };
22
+ export { JUMP_MAX_FRAME_NO };
23
+ export { JumpToFrameError };
24
+ export { validateJumpRunId };
25
+ export { validateJumpFrameNo };
26
+
27
+ /** @typedef {import("@smithers-orchestrator/db/adapter").SmithersDb} SmithersDb */
28
+ /** @typedef {import("@smithers-orchestrator/observability/SmithersEvent").SmithersEvent} SmithersEvent */
29
+ /** @typedef {import("./JumpResult.ts").JumpResult} JumpResult */
30
+ /** @typedef {import("./JumpToFrameInput.ts").JumpToFrameInput} JumpToFrameInput */
31
+ /** @typedef {import("./JumpStepName.ts").JumpStepName} JumpStepName */
32
+
33
+ const OUTPUT_TABLE_PATTERN = /^[A-Za-z_][A-Za-z0-9_]*$/;
34
+
35
+ /**
36
+ * @param {unknown} value
37
+ * @returns {string | null}
38
+ */
39
+ function asString(value) {
40
+ return typeof value === "string" ? value : null;
41
+ }
42
+
43
+ /**
44
+ * @typedef {{
45
+ * query: (sql: string) => {
46
+ * run: (...args: unknown[]) => unknown;
47
+ * get: (...args: unknown[]) => Record<string, unknown> | null | undefined;
48
+ * all: (...args: unknown[]) => Array<Record<string, unknown>>;
49
+ * };
50
+ * }} JumpSqliteClient
51
+ */
52
+
53
+ /**
54
+ * @param {SmithersDb} adapter
55
+ * @returns {JumpSqliteClient}
56
+ */
57
+ function resolveSqliteClient(adapter) {
58
+ const db = /** @type {{ session?: { client?: unknown }; $client?: unknown } | null | undefined} */ (
59
+ /** @type {unknown} */ (adapter?.db)
60
+ );
61
+ const candidate = /** @type {unknown} */ (db?.session?.client ?? db?.$client);
62
+ if (
63
+ !candidate ||
64
+ typeof (/** @type {{ query?: unknown }} */ (candidate).query) !== "function"
65
+ ) {
66
+ throw new TypeError("Could not resolve Bun SQLite client from adapter.");
67
+ }
68
+ return /** @type {JumpSqliteClient} */ (candidate);
69
+ }
70
+
71
+ /**
72
+ * @param {string} identifier
73
+ */
74
+ function quoteIdentifier(identifier) {
75
+ return `"${identifier.replaceAll('"', '""')}"`;
76
+ }
77
+
78
+ /**
79
+ * @param {unknown} error
80
+ * @returns {string}
81
+ */
82
+ function formatError(error) {
83
+ if (error instanceof Error) {
84
+ return error.message;
85
+ }
86
+ return String(error);
87
+ }
88
+
89
+ /**
90
+ * @param {JumpToFrameInput["onLog"]} logger
91
+ * @param {"info" | "warn" | "error"} level
92
+ * @param {string} message
93
+ * @param {Record<string, unknown>} [fields]
94
+ */
95
+ async function emitLog(logger, level, message, fields = {}) {
96
+ if (!logger) {
97
+ return;
98
+ }
99
+ try {
100
+ await logger(level, message, fields);
101
+ } catch {
102
+ // logging failures must never derail the RPC
103
+ }
104
+ }
105
+
106
+ /**
107
+ * Run a segment of work inside a tracing span. We deliberately attach the
108
+ * span annotation via {@link Effect.withSpan} while preserving native JS
109
+ * error identity: if the inner promise rejects we re-throw the original
110
+ * error object so callers can match on `.code`, `.details`, etc. This
111
+ * mirrors the pattern used by `getNodeOutputRoute`/`streamDevToolsRoute`.
112
+ *
113
+ * @template T
114
+ * @param {string} spanName
115
+ * @param {Record<string, unknown>} attrs
116
+ * @param {() => Promise<T>} run
117
+ * @returns {Promise<T>}
118
+ */
119
+ async function withSpan(spanName, attrs, run) {
120
+ /** @type {T | undefined} */
121
+ let result;
122
+ /** @type {unknown} */
123
+ let captured = undefined;
124
+ let failed = false;
125
+ const effect = Effect.tryPromise({
126
+ try: async () => {
127
+ try {
128
+ result = await run();
129
+ } catch (error) {
130
+ captured = error;
131
+ failed = true;
132
+ }
133
+ },
134
+ catch: (error) => error,
135
+ }).pipe(Effect.withSpan(spanName, { attributes: attrs }));
136
+ try {
137
+ await Effect.runPromise(effect);
138
+ } catch {
139
+ // Swallow: the real thrown error is re-surfaced below so we preserve
140
+ // the original Error object (and its `.code`).
141
+ }
142
+ if (failed) {
143
+ throw captured;
144
+ }
145
+ return /** @type {T} */ (result);
146
+ }
147
+
148
+ /**
149
+ * @param {unknown} value
150
+ * @returns {string}
151
+ */
152
+ function normalizeCaller(value) {
153
+ if (typeof value !== "string") {
154
+ return "unknown";
155
+ }
156
+ const trimmed = value.trim();
157
+ return trimmed.length > 0 ? trimmed.slice(0, 256) : "unknown";
158
+ }
159
+
160
+ /**
161
+ * @param {SmithersDb} adapter
162
+ * @param {string} runId
163
+ * @returns {Promise<{ frameNo: number; createdAtMs: number; xmlJson: string } | null>}
164
+ */
165
+ async function readLatestFrame(adapter, runId) {
166
+ const latest = await adapter.getLastFrame(runId);
167
+ if (!latest) {
168
+ return null;
169
+ }
170
+ return {
171
+ frameNo: Number(latest.frameNo),
172
+ createdAtMs: Number(latest.createdAtMs),
173
+ xmlJson: String(latest.xmlJson ?? "{}"),
174
+ };
175
+ }
176
+
177
+ /**
178
+ * @param {SmithersDb} adapter
179
+ * @param {string} runId
180
+ * @param {number} frameNo
181
+ * @returns {Promise<{ frameNo: number; createdAtMs: number; xmlJson: string } | null>}
182
+ */
183
+ async function readFrameByNo(adapter, runId, frameNo) {
184
+ const client = resolveSqliteClient(adapter);
185
+ const row = client
186
+ .query(
187
+ `SELECT frame_no AS frameNo, created_at_ms AS createdAtMs, xml_json AS xmlJson
188
+ FROM _smithers_frames
189
+ WHERE run_id = ? AND frame_no = ?
190
+ LIMIT 1`,
191
+ )
192
+ .get(runId, frameNo);
193
+ if (!row) {
194
+ return null;
195
+ }
196
+ return {
197
+ frameNo: Number(row.frameNo),
198
+ createdAtMs: Number(row.createdAtMs),
199
+ xmlJson: String(row.xmlJson ?? "{}"),
200
+ };
201
+ }
202
+
203
+ /**
204
+ * @param {SmithersDb} adapter
205
+ * @param {string} runId
206
+ * @param {number} targetFrameNo
207
+ */
208
+ async function countFramesAfter(adapter, runId, targetFrameNo) {
209
+ const client = resolveSqliteClient(adapter);
210
+ const row = client
211
+ .query(
212
+ `SELECT COUNT(*) AS count
213
+ FROM _smithers_frames
214
+ WHERE run_id = ? AND frame_no > ?`,
215
+ )
216
+ .get(runId, targetFrameNo);
217
+ return Number(row?.count ?? 0);
218
+ }
219
+
220
+ /**
221
+ * @param {SmithersDb} adapter
222
+ * @param {string} runId
223
+ * @param {number} cutoffMs
224
+ */
225
+ async function deleteAttemptsStartedAfter(adapter, runId, cutoffMs) {
226
+ const client = resolveSqliteClient(adapter);
227
+ client
228
+ .query(
229
+ `DELETE FROM _smithers_attempts
230
+ WHERE run_id = ?
231
+ AND started_at_ms > ?`,
232
+ )
233
+ .run(runId, cutoffMs);
234
+ }
235
+
236
+ /**
237
+ * @param {SmithersDb} adapter
238
+ * @param {string} runId
239
+ * @param {Array<{ nodeId: string; iteration: number }>} nodeKeys
240
+ * @param {number} nowMs
241
+ */
242
+ async function resetNodesToPending(adapter, runId, nodeKeys, nowMs) {
243
+ if (nodeKeys.length === 0) {
244
+ return;
245
+ }
246
+ const client = resolveSqliteClient(adapter);
247
+ const statement = client.query(
248
+ `UPDATE _smithers_nodes
249
+ SET state = ?,
250
+ last_attempt = NULL,
251
+ updated_at_ms = ?
252
+ WHERE run_id = ?
253
+ AND node_id = ?
254
+ AND iteration = ?`,
255
+ );
256
+ for (const key of nodeKeys) {
257
+ statement.run("pending", nowMs, runId, key.nodeId, key.iteration);
258
+ }
259
+ }
260
+
261
+ /**
262
+ * @param {SmithersDb} adapter
263
+ * @param {string} runId
264
+ */
265
+ async function readNodeOutputTableMap(adapter, runId) {
266
+ const rows = await adapter.listNodes(runId);
267
+ /** @type {Map<string, string>} */
268
+ const map = new Map();
269
+ for (const row of rows) {
270
+ if (typeof row?.nodeId !== "string") {
271
+ continue;
272
+ }
273
+ const iteration = Number(row?.iteration ?? 0);
274
+ const outputTable = asString(row?.outputTable);
275
+ if (!outputTable || outputTable.length === 0) {
276
+ continue;
277
+ }
278
+ map.set(`${row.nodeId}::${iteration}`, outputTable);
279
+ }
280
+ return map;
281
+ }
282
+
283
+ /**
284
+ * @param {SmithersDb} adapter
285
+ * @param {Array<{ tableName: string; nodeId: string; iteration: number }>} targets
286
+ * @param {string} runId
287
+ */
288
+ async function deleteOutputTargets(adapter, targets, runId) {
289
+ if (targets.length === 0) {
290
+ return 0;
291
+ }
292
+ const client = resolveSqliteClient(adapter);
293
+ let deleted = 0;
294
+ for (const target of targets) {
295
+ if (!OUTPUT_TABLE_PATTERN.test(target.tableName)) {
296
+ continue;
297
+ }
298
+ const tableSql = quoteIdentifier(target.tableName);
299
+ try {
300
+ const countRow = client
301
+ .query(
302
+ `SELECT COUNT(*) AS count
303
+ FROM ${tableSql}
304
+ WHERE run_id = ? AND node_id = ? AND iteration = ?`,
305
+ )
306
+ .get(runId, target.nodeId, target.iteration);
307
+ deleted += Number(countRow?.count ?? 0);
308
+ client
309
+ .query(
310
+ `DELETE FROM ${tableSql}
311
+ WHERE run_id = ? AND node_id = ? AND iteration = ?`,
312
+ )
313
+ .run(runId, target.nodeId, target.iteration);
314
+ } catch (error) {
315
+ const message = formatError(error);
316
+ if (/no such table/i.test(message)) {
317
+ continue;
318
+ }
319
+ throw error;
320
+ }
321
+ }
322
+ return deleted;
323
+ }
324
+
325
+ /**
326
+ * @param {SmithersDb} adapter
327
+ * @param {string} runId
328
+ * @param {number} nowMs
329
+ * @param {string} reason
330
+ */
331
+ async function markRunNeedsAttention(adapter, runId, nowMs, reason) {
332
+ const payload = JSON.stringify({
333
+ code: "RewindFailed",
334
+ needsAttention: true,
335
+ message: reason,
336
+ timestampMs: nowMs,
337
+ });
338
+ try {
339
+ await adapter.updateRun(runId, {
340
+ status: "needs_attention",
341
+ finishedAtMs: nowMs,
342
+ heartbeatAtMs: null,
343
+ runtimeOwnerId: null,
344
+ errorJson: payload,
345
+ });
346
+ return;
347
+ } catch {
348
+ // Older status enums may not accept `needs_attention`; fall back while preserving intent in errorJson.
349
+ }
350
+ await adapter.updateRun(runId, {
351
+ status: "failed",
352
+ finishedAtMs: nowMs,
353
+ heartbeatAtMs: null,
354
+ runtimeOwnerId: null,
355
+ errorJson: payload,
356
+ });
357
+ }
358
+
359
+ /**
360
+ * @param {string} pointer
361
+ * @param {string | undefined} cwd
362
+ */
363
+ async function defaultRevertToPointer(pointer, cwd) {
364
+ return await Effect.runPromise(
365
+ revertToJjPointer(pointer, cwd).pipe(Effect.provide(BunContext.layer)),
366
+ );
367
+ }
368
+
369
+ /**
370
+ * @param {string | undefined} cwd
371
+ */
372
+ async function defaultGetCurrentPointer(cwd) {
373
+ return await Effect.runPromise(
374
+ getJjPointer(cwd).pipe(Effect.provide(BunContext.layer)),
375
+ );
376
+ }
377
+
378
+ /**
379
+ * @param {JumpToFrameInput["hooks"]} hooks
380
+ * @param {"before" | "after"} stage
381
+ * @param {JumpStepName} step
382
+ */
383
+ async function runStepHook(hooks, stage, step) {
384
+ if (!hooks) {
385
+ return;
386
+ }
387
+ if (stage === "before" && hooks.beforeStep) {
388
+ await hooks.beforeStep(step);
389
+ }
390
+ if (stage === "after" && hooks.afterStep) {
391
+ await hooks.afterStep(step);
392
+ }
393
+ }
394
+
395
+ /**
396
+ * @param {Array<{ cwd: string; targetPointer: string; previousPointer: string | null }>} revertedSandboxes
397
+ * @param {(pointer: string, cwd?: string) => Promise<{ success: boolean; error?: string }>} revertToPointerImpl
398
+ * @returns {Promise<Array<{ cwd: string; error: string }>>}
399
+ */
400
+ async function rollbackSandboxPointers(revertedSandboxes, revertToPointerImpl) {
401
+ /** @type {Array<{ cwd: string; error: string }>} */
402
+ const failures = [];
403
+ for (let index = revertedSandboxes.length - 1; index >= 0; index -= 1) {
404
+ const sandbox = revertedSandboxes[index];
405
+ if (typeof sandbox.previousPointer !== "string" || sandbox.previousPointer.length === 0) {
406
+ failures.push({ cwd: sandbox.cwd, error: "Missing pre-jump pointer." });
407
+ continue;
408
+ }
409
+ const restored = await revertToPointerImpl(sandbox.previousPointer, sandbox.cwd);
410
+ if (!restored.success) {
411
+ failures.push({
412
+ cwd: sandbox.cwd,
413
+ error: restored.error ?? "Failed to restore sandbox pointer.",
414
+ });
415
+ }
416
+ }
417
+ return failures;
418
+ }
419
+
420
+ /** @typedef {import("@smithers-orchestrator/db").AttemptRow} AttemptRow */
421
+
422
+ /**
423
+ * @param {ReadonlyArray<AttemptRow>} attemptsForRun
424
+ * @param {ReadonlyArray<AttemptRow>} attemptsToDelete
425
+ * @param {number} cutoffMs
426
+ * @param {(cwd?: string) => Promise<string | null>} getCurrentPointerImpl
427
+ * @returns {Promise<Array<{ cwd: string; targetPointer: string; previousPointer: string | null }>>}
428
+ */
429
+ async function planSandboxReverts(
430
+ attemptsForRun,
431
+ attemptsToDelete,
432
+ cutoffMs,
433
+ getCurrentPointerImpl,
434
+ ) {
435
+ /** @type {Map<string, { cwd: string; targetPointer: string; previousPointer: string | null }>} */
436
+ const byCwd = new Map();
437
+ const affectedCwds = new Set(
438
+ attemptsToDelete
439
+ .map((attempt) => (typeof attempt?.jjCwd === "string" ? attempt.jjCwd : ""))
440
+ .filter((cwd) => cwd.length > 0),
441
+ );
442
+
443
+ for (const cwd of affectedCwds) {
444
+ const beforeAttempts = attemptsForRun.filter(
445
+ (attempt) =>
446
+ attempt?.jjCwd === cwd &&
447
+ typeof attempt?.jjPointer === "string" &&
448
+ attempt.jjPointer.length > 0 &&
449
+ Number(attempt?.startedAtMs ?? -1) <= cutoffMs,
450
+ );
451
+ const targetAttempt = beforeAttempts[beforeAttempts.length - 1];
452
+ if (!targetAttempt || typeof targetAttempt.jjPointer !== "string") {
453
+ throw new JumpToFrameError(
454
+ "UnsupportedSandbox",
455
+ `Could not resolve a rewind pointer for sandbox cwd ${cwd}.`,
456
+ );
457
+ }
458
+ const previousPointer = await getCurrentPointerImpl(cwd);
459
+ byCwd.set(cwd, {
460
+ cwd,
461
+ targetPointer: targetAttempt.jjPointer,
462
+ previousPointer,
463
+ });
464
+ }
465
+
466
+ return [...byCwd.values()];
467
+ }
468
+
469
+ /**
470
+ * Rewind a run to a previous frame and make it resumable from that point.
471
+ *
472
+ * @param {JumpToFrameInput} input
473
+ * @returns {Promise<JumpResult>}
474
+ */
475
+ export async function jumpToFrame(input) {
476
+ const nowMs = input.nowMs ?? (() => Date.now());
477
+ const startedAtMs = nowMs();
478
+ const caller = normalizeCaller(input.caller);
479
+
480
+ let runIdForAudit = typeof input.runId === "string" ? input.runId : "invalid-run-id";
481
+ let fromFrameNoForAudit = -1;
482
+ let toFrameNoForAudit = Number.isInteger(input.frameNo) ? Number(input.frameNo) : -1;
483
+ /** @type {"success" | "failed" | "partial"} */
484
+ let auditResult = "failed";
485
+
486
+ /** @type {JumpResult | null} */
487
+ let successResult = null;
488
+ /** @type {JumpToFrameError | null} */
489
+ let finalError = null;
490
+
491
+ let lock = null;
492
+ /** @type {number | null} */
493
+ let auditRowId = null;
494
+
495
+ try {
496
+ return await withSpan(
497
+ "timetravel.jumpToFrame",
498
+ {
499
+ runId: typeof input.runId === "string" ? input.runId : "",
500
+ caller,
501
+ toFrameNo: typeof input.frameNo === "number" ? input.frameNo : -1,
502
+ },
503
+ async () => {
504
+ const runId = validateJumpRunId(input.runId);
505
+ const targetFrameNo = validateJumpFrameNo(input.frameNo);
506
+ runIdForAudit = runId;
507
+ toFrameNoForAudit = targetFrameNo;
508
+
509
+ if (input.confirm !== true) {
510
+ throw new JumpToFrameError(
511
+ "ConfirmationRequired",
512
+ "jumpToFrame is destructive; pass confirm: true to proceed.",
513
+ );
514
+ }
515
+
516
+ lock = await withSpan(
517
+ "timetravel.lock.acquire",
518
+ { runId },
519
+ async () => {
520
+ const handle = acquireRewindLock(runId);
521
+ if (!handle) {
522
+ throw new JumpToFrameError(
523
+ "Busy",
524
+ `Another jumpToFrame is already running for ${runId}.`,
525
+ );
526
+ }
527
+ return handle;
528
+ },
529
+ );
530
+
531
+ const rateLimit = await evaluateRewindRateLimit({
532
+ adapter: input.adapter,
533
+ runId,
534
+ caller,
535
+ nowMs,
536
+ maxPerWindow: input.rateLimit?.maxPerWindow,
537
+ windowMs: input.rateLimit?.windowMs,
538
+ });
539
+ if (rateLimit.limited) {
540
+ throw new JumpToFrameError(
541
+ "RateLimited",
542
+ `Rewind quota exceeded for ${runId}; max ${rateLimit.max} per ${Math.floor(
543
+ rateLimit.windowMs / 60_000,
544
+ )}m.`,
545
+ );
546
+ }
547
+
548
+ // Durable in_progress audit row is written BEFORE any mutation so a
549
+ // process kill leaves a marker for startup recovery.
550
+ auditRowId = await withSpan(
551
+ "timetravel.db.audit.insert",
552
+ { runId, caller, state: "in_progress" },
553
+ async () =>
554
+ await writeRewindAuditRow(input.adapter, {
555
+ runId,
556
+ fromFrameNo: fromFrameNoForAudit,
557
+ toFrameNo: targetFrameNo,
558
+ caller,
559
+ timestampMs: startedAtMs,
560
+ result: "in_progress",
561
+ durationMs: null,
562
+ }),
563
+ );
564
+
565
+ const run = await input.adapter.getRun(runId);
566
+ if (!run) {
567
+ throw new JumpToFrameError("RunNotFound", `Run not found: ${runId}`);
568
+ }
569
+
570
+ const latestFrame = await readLatestFrame(input.adapter, runId);
571
+ if (!latestFrame) {
572
+ throw new JumpToFrameError("FrameOutOfRange", `Run ${runId} has no frames.`);
573
+ }
574
+ fromFrameNoForAudit = latestFrame.frameNo;
575
+
576
+ if (targetFrameNo > latestFrame.frameNo) {
577
+ throw new JumpToFrameError(
578
+ "FrameOutOfRange",
579
+ `frameNo must be between 0 and ${latestFrame.frameNo}.`,
580
+ );
581
+ }
582
+
583
+ const targetFrame = await readFrameByNo(input.adapter, runId, targetFrameNo);
584
+ if (!targetFrame) {
585
+ throw new JumpToFrameError(
586
+ "FrameOutOfRange",
587
+ `Frame ${targetFrameNo} does not exist for run ${runId}.`,
588
+ );
589
+ }
590
+
591
+ await emitLog(input.onLog, "info", "jumpToFrame started", {
592
+ runId,
593
+ fromFrameNo: latestFrame.frameNo,
594
+ toFrameNo: targetFrameNo,
595
+ caller,
596
+ });
597
+
598
+ if (targetFrameNo === latestFrame.frameNo) {
599
+ auditResult = "success";
600
+ successResult = {
601
+ ok: true,
602
+ newFrameNo: targetFrameNo,
603
+ revertedSandboxes: 0,
604
+ deletedFrames: 0,
605
+ deletedAttempts: 0,
606
+ invalidatedDiffs: 0,
607
+ durationMs: Math.max(0, nowMs() - startedAtMs),
608
+ };
609
+ return successResult;
610
+ }
611
+
612
+ await runStepHook(input.hooks, "before", "snapshot-pre-jump");
613
+ const attemptsForRun = await input.adapter.listAttemptsForRun(runId);
614
+ const attemptsToDelete = attemptsForRun.filter(
615
+ (attempt) => Number(attempt?.startedAtMs ?? -1) > targetFrame.createdAtMs,
616
+ );
617
+ const getCurrentPointerImpl = input.getCurrentPointerImpl ?? defaultGetCurrentPointer;
618
+ const revertToPointerImpl = input.revertToPointerImpl ?? defaultRevertToPointer;
619
+ const sandboxPlan = await planSandboxReverts(
620
+ attemptsForRun,
621
+ attemptsToDelete,
622
+ targetFrame.createdAtMs,
623
+ getCurrentPointerImpl,
624
+ );
625
+
626
+ const reconcilerSnapshot = await withSpan(
627
+ "timetravel.snapshot.preJump",
628
+ { runId, sandboxes: sandboxPlan.length },
629
+ async () =>
630
+ input.captureReconcilerState ? await input.captureReconcilerState() : null,
631
+ );
632
+ await runStepHook(input.hooks, "after", "snapshot-pre-jump");
633
+
634
+ /** @type {Array<{ cwd: string; targetPointer: string; previousPointer: string | null }>} */
635
+ const revertedSandboxes = [];
636
+ let paused = false;
637
+
638
+ try {
639
+ await runStepHook(input.hooks, "before", "pause-event-loop");
640
+ if (input.pauseRunLoop) {
641
+ await input.pauseRunLoop();
642
+ }
643
+ paused = true;
644
+ await runStepHook(input.hooks, "after", "pause-event-loop");
645
+
646
+ await runStepHook(input.hooks, "before", "revert-sandboxes");
647
+ for (const sandbox of sandboxPlan) {
648
+ const reverted = await withSpan(
649
+ "timetravel.vcs.revert.target",
650
+ { cwd: sandbox.cwd, pointer: sandbox.targetPointer },
651
+ async () => revertToPointerImpl(sandbox.targetPointer, sandbox.cwd),
652
+ );
653
+ if (!reverted.success) {
654
+ throw new JumpToFrameError(
655
+ "VcsError",
656
+ reverted.error ?? `Failed to revert sandbox cwd ${sandbox.cwd}.`,
657
+ {
658
+ details: {
659
+ cwd: sandbox.cwd,
660
+ pointer: sandbox.targetPointer,
661
+ },
662
+ },
663
+ );
664
+ }
665
+ revertedSandboxes.push(sandbox);
666
+ }
667
+ await runStepHook(input.hooks, "after", "revert-sandboxes");
668
+
669
+ const deletedFrames = await countFramesAfter(input.adapter, runId, targetFrameNo);
670
+ const deletedAttempts = attemptsToDelete.length;
671
+
672
+ const outputTableMap = await readNodeOutputTableMap(input.adapter, runId);
673
+ /** @type {Map<string, { tableName: string; nodeId: string; iteration: number }>} */
674
+ const outputTargetsMap = new Map();
675
+ /** @type {Map<string, { nodeId: string; iteration: number }>} */
676
+ const nodeResetMap = new Map();
677
+ for (const attempt of attemptsToDelete) {
678
+ const nodeId = asString(attempt?.nodeId);
679
+ if (!nodeId) {
680
+ continue;
681
+ }
682
+ const iteration = Number(attempt?.iteration ?? 0);
683
+ const key = `${nodeId}::${iteration}`;
684
+ nodeResetMap.set(key, { nodeId, iteration });
685
+ const tableName = outputTableMap.get(key);
686
+ if (!tableName) {
687
+ continue;
688
+ }
689
+ outputTargetsMap.set(`${tableName}:${key}`, {
690
+ tableName,
691
+ nodeId,
692
+ iteration,
693
+ });
694
+ }
695
+
696
+ // Durable mutation: frames/attempts/outputs/diffs/reconciler/run-status/event
697
+ // all commit together or roll back together. If the event insert throws
698
+ // the frames truncation is reverted too, so DB is never left mutated
699
+ // without an audit/event record.
700
+ const dbStats = await (async () =>
701
+ await input.adapter.withTransaction(
702
+ `jump to frame ${runId}:${targetFrameNo}`,
703
+ Effect.gen(function* () {
704
+ // Invalidate node-diff cache BEFORE we truncate frames /
705
+ // attempts: the adapter hook computes which diffs are beyond
706
+ // the target by looking at the frame/attempt join, and that
707
+ // only works while frames/attempts are still intact.
708
+ yield* Effect.promise(() =>
709
+ runStepHook(input.hooks, "before", "invalidate-diffs"),
710
+ );
711
+ const invalidatedDiffs = yield* input.adapter
712
+ .invalidateNodeDiffsAfterFrame(runId, targetFrameNo);
713
+ yield* Effect.promise(() =>
714
+ runStepHook(input.hooks, "after", "invalidate-diffs"),
715
+ );
716
+
717
+ yield* Effect.promise(() =>
718
+ runStepHook(input.hooks, "before", "truncate-frames"),
719
+ );
720
+ yield* input.adapter.deleteFramesAfter(runId, targetFrameNo);
721
+ yield* Effect.promise(() =>
722
+ runStepHook(input.hooks, "after", "truncate-frames"),
723
+ );
724
+
725
+ yield* Effect.promise(() =>
726
+ runStepHook(input.hooks, "before", "truncate-attempts"),
727
+ );
728
+ yield* Effect.promise(() =>
729
+ deleteAttemptsStartedAfter(
730
+ input.adapter,
731
+ runId,
732
+ targetFrame.createdAtMs,
733
+ ),
734
+ );
735
+ yield* Effect.promise(() =>
736
+ runStepHook(input.hooks, "after", "truncate-attempts"),
737
+ );
738
+
739
+ yield* Effect.promise(() =>
740
+ runStepHook(input.hooks, "before", "truncate-outputs"),
741
+ );
742
+ const deletedOutputs = yield* Effect.promise(() =>
743
+ deleteOutputTargets(
744
+ input.adapter,
745
+ [...outputTargetsMap.values()],
746
+ runId,
747
+ ),
748
+ );
749
+ yield* Effect.promise(() =>
750
+ runStepHook(input.hooks, "after", "truncate-outputs"),
751
+ );
752
+
753
+ yield* Effect.promise(() =>
754
+ runStepHook(input.hooks, "before", "rebuild-reconciler"),
755
+ );
756
+ if (input.rebuildReconcilerState) {
757
+ yield* Effect.promise(() =>
758
+ input.rebuildReconcilerState?.(targetFrame.xmlJson),
759
+ );
760
+ }
761
+ yield* Effect.promise(() =>
762
+ runStepHook(input.hooks, "after", "rebuild-reconciler"),
763
+ );
764
+
765
+ yield* Effect.promise(() =>
766
+ resetNodesToPending(
767
+ input.adapter,
768
+ runId,
769
+ [...nodeResetMap.values()],
770
+ nowMs(),
771
+ ),
772
+ );
773
+
774
+ yield* input.adapter.updateRun(runId, {
775
+ status: "running",
776
+ finishedAtMs: null,
777
+ heartbeatAtMs: null,
778
+ runtimeOwnerId: null,
779
+ cancelRequestedAtMs: null,
780
+ hijackRequestedAtMs: null,
781
+ hijackTarget: null,
782
+ errorJson: null,
783
+ });
784
+
785
+ // Persist the TimeTravelJumped event inside the same
786
+ // transaction so frames/attempts truncation and audit/event
787
+ // rows commit atomically — there is no partial durable state.
788
+ const event = {
789
+ type: "TimeTravelJumped",
790
+ runId,
791
+ fromFrameNo: latestFrame.frameNo,
792
+ toFrameNo: targetFrameNo,
793
+ timestampMs: nowMs(),
794
+ caller,
795
+ };
796
+ // Insert the event row via raw SQL inside the enclosing
797
+ // transaction. We deliberately avoid `insertEventWithNextSeq`
798
+ // here because it opens its own BEGIN IMMEDIATE and would
799
+ // error out under a nested transaction.
800
+ yield* Effect.promise(() => {
801
+ const txnClient = resolveSqliteClient(input.adapter);
802
+ const seqRow = txnClient
803
+ .query(
804
+ `SELECT COALESCE(MAX(seq), -1) + 1 AS seq
805
+ FROM _smithers_events
806
+ WHERE run_id = ?`,
807
+ )
808
+ .get(runId);
809
+ const seq = Number(seqRow?.seq ?? 0);
810
+ txnClient
811
+ .query(
812
+ `INSERT INTO _smithers_events (run_id, seq, timestamp_ms, type, payload_json)
813
+ VALUES (?, ?, ?, ?, ?)`,
814
+ )
815
+ .run(
816
+ runId,
817
+ seq,
818
+ event.timestampMs,
819
+ event.type,
820
+ JSON.stringify(event),
821
+ );
822
+ return Promise.resolve(seq);
823
+ });
824
+
825
+ return {
826
+ deletedFrames,
827
+ deletedAttempts,
828
+ deletedOutputs,
829
+ invalidatedDiffs,
830
+ event,
831
+ };
832
+ }),
833
+ ))();
834
+
835
+ // In-memory broadcast is non-fatal: the durable event row is already
836
+ // committed, so subscribers can reconcile from seq on reconnect.
837
+ if (input.emitEvent) {
838
+ try {
839
+ await withSpan(
840
+ "timetravel.eventbus.emit",
841
+ { runId, type: "TimeTravelJumped" },
842
+ async () =>
843
+ input.emitEvent?.(/** @type {SmithersEvent} */ (dbStats.event)),
844
+ );
845
+ } catch (emitError) {
846
+ await emitLog(input.onLog, "warn", "jumpToFrame emit broadcast failed", {
847
+ runId,
848
+ caller,
849
+ error: formatError(emitError),
850
+ });
851
+ }
852
+ }
853
+
854
+ await runStepHook(input.hooks, "before", "resume-event-loop");
855
+ if (input.resumeRunLoop) {
856
+ await input.resumeRunLoop();
857
+ }
858
+ paused = false;
859
+ await runStepHook(input.hooks, "after", "resume-event-loop");
860
+
861
+ auditResult = "success";
862
+ successResult = {
863
+ ok: true,
864
+ newFrameNo: targetFrameNo,
865
+ revertedSandboxes: sandboxPlan.length,
866
+ deletedFrames: dbStats.deletedFrames,
867
+ deletedAttempts: dbStats.deletedAttempts,
868
+ invalidatedDiffs: dbStats.invalidatedDiffs,
869
+ durationMs: Math.max(0, nowMs() - startedAtMs),
870
+ };
871
+
872
+ await emitLog(input.onLog, "info", "jumpToFrame succeeded", {
873
+ runId,
874
+ caller,
875
+ fromFrameNo: latestFrame.frameNo,
876
+ toFrameNo: targetFrameNo,
877
+ revertedSandboxes: sandboxPlan.length,
878
+ deletedFrames: dbStats.deletedFrames,
879
+ deletedAttempts: dbStats.deletedAttempts,
880
+ deletedOutputs: dbStats.deletedOutputs,
881
+ invalidatedDiffs: dbStats.invalidatedDiffs,
882
+ durationMs: successResult.durationMs,
883
+ });
884
+
885
+ return successResult;
886
+ } catch (error) {
887
+ const rollbackSandboxErrors = await rollbackSandboxPointers(
888
+ revertedSandboxes,
889
+ revertToPointerImpl,
890
+ );
891
+ let rollbackReconcilerError = null;
892
+ if (input.restoreReconcilerState) {
893
+ try {
894
+ await input.restoreReconcilerState(reconcilerSnapshot);
895
+ } catch (restoreError) {
896
+ rollbackReconcilerError = formatError(restoreError);
897
+ }
898
+ }
899
+
900
+ if (paused) {
901
+ try {
902
+ await input.resumeRunLoop?.();
903
+ } catch (resumeError) {
904
+ rollbackSandboxErrors.push({
905
+ cwd: "<event-loop>",
906
+ error: formatError(resumeError),
907
+ });
908
+ }
909
+ }
910
+
911
+ if (rollbackSandboxErrors.length > 0 || rollbackReconcilerError) {
912
+ auditResult = "partial";
913
+ const now = nowMs();
914
+ const reason = [
915
+ `rollback sandbox failures=${rollbackSandboxErrors.length}`,
916
+ rollbackReconcilerError ? `reconciler=${rollbackReconcilerError}` : null,
917
+ ]
918
+ .filter(Boolean)
919
+ .join("; ");
920
+ await markRunNeedsAttention(
921
+ input.adapter,
922
+ runId,
923
+ now,
924
+ reason || "Rewind rollback was partial.",
925
+ );
926
+ finalError = new JumpToFrameError(
927
+ "RewindFailed",
928
+ "Rewind failed and rollback was only partial; run needs attention.",
929
+ {
930
+ details: {
931
+ cause: formatError(error),
932
+ rollbackSandboxErrors,
933
+ rollbackReconcilerError,
934
+ },
935
+ },
936
+ );
937
+ await emitLog(input.onLog, "warn", "jumpToFrame rollback partial", {
938
+ runId,
939
+ caller,
940
+ rollbackSandboxErrors,
941
+ rollbackReconcilerError,
942
+ });
943
+ } else {
944
+ finalError =
945
+ error instanceof JumpToFrameError
946
+ ? error
947
+ : new JumpToFrameError("RewindFailed", formatError(error));
948
+ }
949
+
950
+ throw finalError;
951
+ }
952
+ },
953
+ );
954
+ } catch (error) {
955
+ if (!finalError) {
956
+ finalError =
957
+ error instanceof JumpToFrameError
958
+ ? error
959
+ : new JumpToFrameError("RewindFailed", formatError(error));
960
+ }
961
+ } finally {
962
+ const durationMs = Math.max(0, nowMs() - startedAtMs);
963
+
964
+ // Persist the terminal audit state BEFORE releasing the lock so a second
965
+ // caller cannot beat us to the rate-limit count.
966
+ try {
967
+ if (auditRowId !== null) {
968
+ await updateRewindAuditRow(input.adapter, {
969
+ id: auditRowId,
970
+ result: auditResult,
971
+ durationMs,
972
+ fromFrameNo: fromFrameNoForAudit,
973
+ });
974
+ } else {
975
+ // We threw before reaching the in_progress write (usually validation /
976
+ // lock-busy / rate-limit). Still record the attempt for auditability.
977
+ await writeRewindAuditRow(input.adapter, {
978
+ runId: runIdForAudit,
979
+ fromFrameNo: fromFrameNoForAudit,
980
+ toFrameNo: toFrameNoForAudit,
981
+ caller,
982
+ timestampMs: startedAtMs,
983
+ result: auditResult,
984
+ durationMs,
985
+ });
986
+ }
987
+ await emitLog(input.onLog, "info", "jumpToFrame audit row written", {
988
+ runId: runIdForAudit,
989
+ fromFrameNo: fromFrameNoForAudit,
990
+ toFrameNo: toFrameNoForAudit,
991
+ caller,
992
+ result: auditResult,
993
+ });
994
+ } catch (auditError) {
995
+ await emitLog(input.onLog, "error", "jumpToFrame audit write failed", {
996
+ runId: runIdForAudit,
997
+ fromFrameNo: fromFrameNoForAudit,
998
+ toFrameNo: toFrameNoForAudit,
999
+ caller,
1000
+ result: auditResult,
1001
+ error: formatError(auditError),
1002
+ });
1003
+ if (!finalError) {
1004
+ finalError = new JumpToFrameError(
1005
+ "RewindFailed",
1006
+ "Failed to persist rewind audit row.",
1007
+ );
1008
+ }
1009
+ }
1010
+
1011
+ if (lock) {
1012
+ lock.release();
1013
+ }
1014
+
1015
+ let metricResultTag = "failed";
1016
+ if (auditResult === "success") {
1017
+ metricResultTag = "success";
1018
+ } else if (auditResult === "partial") {
1019
+ metricResultTag = "partial";
1020
+ } else if (finalError?.code === "Busy") {
1021
+ metricResultTag = "busy";
1022
+ } else if (finalError?.code === "RateLimited") {
1023
+ metricResultTag = "rate_limited";
1024
+ }
1025
+ try {
1026
+ await Effect.runPromise(
1027
+ Effect.all([
1028
+ Metric.increment(Metric.tagged(rewindTotal, "result", metricResultTag)),
1029
+ Metric.update(rewindDurationMs, durationMs),
1030
+ ]),
1031
+ );
1032
+ if (successResult) {
1033
+ await Effect.runPromise(
1034
+ Effect.all([
1035
+ Metric.update(rewindFramesDeleted, successResult.deletedFrames),
1036
+ Metric.update(rewindSandboxesReverted, successResult.revertedSandboxes),
1037
+ ]),
1038
+ );
1039
+ }
1040
+ if (auditResult === "partial") {
1041
+ await Effect.runPromise(Metric.increment(rewindRollbackTotal));
1042
+ }
1043
+ } catch {
1044
+ // metrics failures must never fail the RPC
1045
+ }
1046
+
1047
+ // Emit a final `error` log for VcsError/RewindFailed failures so operators
1048
+ // always see a crash in the log stream (complementing the partial-rollback
1049
+ // and audit-write logs emitted above).
1050
+ if (finalError && finalError.code !== "Busy" && finalError.code !== "RateLimited") {
1051
+ if (
1052
+ finalError.code === "VcsError" ||
1053
+ finalError.code === "RewindFailed" ||
1054
+ finalError.code === "UnsupportedSandbox"
1055
+ ) {
1056
+ await emitLog(input.onLog, "error", "jumpToFrame failed", {
1057
+ runId: runIdForAudit,
1058
+ fromFrameNo: fromFrameNoForAudit,
1059
+ toFrameNo: toFrameNoForAudit,
1060
+ caller,
1061
+ code: finalError.code,
1062
+ message: finalError.message,
1063
+ });
1064
+ }
1065
+ }
1066
+ }
1067
+
1068
+ if (finalError) {
1069
+ throw finalError;
1070
+ }
1071
+
1072
+ if (!successResult) {
1073
+ throw new JumpToFrameError("RewindFailed", "jumpToFrame completed without a result.");
1074
+ }
1075
+
1076
+ return successResult;
1077
+ }