la-machina-engine 0.19.3 → 0.19.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -10980,6 +10980,38 @@ var RunStateManager = class {
10980
10980
  await this.write(next);
10981
10981
  return next;
10982
10982
  }
10983
+ /**
10984
+ * Merge async lifecycle timing fields into the durable state.
10985
+ * Existing timestamps are preserved unless explicitly overwritten
10986
+ * by the caller.
10987
+ */
10988
+ async patchAsyncTiming(runId, nodeId, patch) {
10989
+ const current = await this.read(runId, nodeId);
10990
+ if (current === null) {
10991
+ throw new Error(`RunStateManager.patchAsyncTiming: no state found for ${runId}/${nodeId}`);
10992
+ }
10993
+ const next = {
10994
+ ...current,
10995
+ asyncTiming: { ...current.asyncTiming ?? {}, ...patch }
10996
+ };
10997
+ await this.write(next);
10998
+ return next;
10999
+ }
11000
+ async appendManualWebhookRetry(runId, nodeId, row) {
11001
+ const current = await this.read(runId, nodeId);
11002
+ if (current === null) {
11003
+ throw new Error(
11004
+ `RunStateManager.appendManualWebhookRetry: no state found for ${runId}/${nodeId}`
11005
+ );
11006
+ }
11007
+ const retries = current.manualWebhookRetries ?? [];
11008
+ const next = {
11009
+ ...current,
11010
+ manualWebhookRetries: [...retries, row]
11011
+ };
11012
+ await this.write(next);
11013
+ return next;
11014
+ }
10983
11015
  /**
10984
11016
  * Update just the heartbeat + progress (cheap, called every turn).
10985
11017
  */
@@ -11716,6 +11748,7 @@ ${inputJson}
11716
11748
  * uses fire-and-forget Promises which won't survive Worker request exit.
11717
11749
  */
11718
11750
  async start(options) {
11751
+ const startCalledAt = Date.now();
11719
11752
  const runId = options.runId ?? `run_${randomUUID()}`;
11720
11753
  const storage = await this.buildStorage();
11721
11754
  const stateManager = new RunStateManager(storage.workspace);
@@ -11727,19 +11760,62 @@ ${inputJson}
11727
11760
  deliveries: []
11728
11761
  } : void 0;
11729
11762
  const initial = RunStateManager.initial(runId, options.nodeId, webhook);
11730
- await stateManager.write(initial);
11763
+ await stateManager.write({
11764
+ ...initial,
11765
+ asyncTiming: { startCalledAt }
11766
+ });
11767
+ await this.recordAsyncTiming(stateManager, runId, options.nodeId, {
11768
+ initialStateWrittenAt: Date.now()
11769
+ });
11731
11770
  const handoffEnabled = this.config.runner !== void 0;
11771
+ await this.recordAsyncTiming(stateManager, runId, options.nodeId, {
11772
+ backgroundScheduledAt: Date.now()
11773
+ });
11732
11774
  this.backgroundExecutor.schedule(runId, async (signal) => {
11775
+ await this.recordAsyncTiming(stateManager, runId, options.nodeId, {
11776
+ backgroundStartedAt: Date.now()
11777
+ });
11778
+ if (signal.aborted) return;
11733
11779
  await stateManager.update(runId, options.nodeId, { status: "running" });
11734
11780
  try {
11781
+ await this.recordAsyncTiming(stateManager, runId, options.nodeId, {
11782
+ runCallStartedAt: Date.now()
11783
+ });
11735
11784
  const response = await this.run({ ...options, runId }, { handoffToRunner: handoffEnabled });
11785
+ await this.recordAsyncTiming(stateManager, runId, options.nodeId, {
11786
+ runCallCompletedAt: Date.now()
11787
+ });
11736
11788
  if (signal.aborted) return;
11789
+ await this.recordAsyncTiming(stateManager, runId, options.nodeId, {
11790
+ handoffStartedAt: Date.now()
11791
+ });
11737
11792
  const postHandoff = await this.maybeHandoffToRunner(runId, options.nodeId, response);
11793
+ await this.recordAsyncTiming(stateManager, runId, options.nodeId, {
11794
+ handoffCompletedAt: Date.now(),
11795
+ finalizeStartedAt: Date.now()
11796
+ });
11797
+ if (await this.runnerAlreadyWroteTerminal(stateManager, runId, options.nodeId, postHandoff)) {
11798
+ await this.recordAsyncTiming(stateManager, runId, options.nodeId, {
11799
+ finalizeCompletedAt: Date.now(),
11800
+ backgroundCompletedAt: Date.now()
11801
+ });
11802
+ return;
11803
+ }
11738
11804
  await stateManager.finalize(runId, options.nodeId, postHandoff);
11805
+ await this.recordAsyncTiming(stateManager, runId, options.nodeId, {
11806
+ finalizeCompletedAt: Date.now()
11807
+ });
11739
11808
  await this.maybeFireWebhook(stateManager, runId, options.nodeId, postHandoff);
11809
+ await this.recordAsyncTiming(stateManager, runId, options.nodeId, {
11810
+ backgroundCompletedAt: Date.now()
11811
+ });
11740
11812
  } catch (err) {
11741
11813
  if (signal.aborted) return;
11742
11814
  const errorMsg = err instanceof Error ? err.message : String(err);
11815
+ await this.recordAsyncTiming(stateManager, runId, options.nodeId, {
11816
+ backgroundFailedAt: Date.now(),
11817
+ errorMessage: errorMsg
11818
+ });
11743
11819
  const failResponse = {
11744
11820
  runId,
11745
11821
  status: "failed",
@@ -11748,8 +11824,17 @@ ${inputJson}
11748
11824
  errors: [{ code: "RUN_FAILED", message: errorMsg }],
11749
11825
  timestamp: Date.now()
11750
11826
  };
11827
+ await this.recordAsyncTiming(stateManager, runId, options.nodeId, {
11828
+ finalizeStartedAt: Date.now()
11829
+ });
11751
11830
  await stateManager.finalize(runId, options.nodeId, failResponse);
11831
+ await this.recordAsyncTiming(stateManager, runId, options.nodeId, {
11832
+ finalizeCompletedAt: Date.now()
11833
+ });
11752
11834
  await this.maybeFireWebhook(stateManager, runId, options.nodeId, failResponse);
11835
+ await this.recordAsyncTiming(stateManager, runId, options.nodeId, {
11836
+ backgroundCompletedAt: Date.now()
11837
+ });
11753
11838
  }
11754
11839
  });
11755
11840
  return { runId, nodeId: options.nodeId, status: "queued" };
@@ -11759,6 +11844,7 @@ ${inputJson}
11759
11844
  * dispatched via the background executor. Returns immediately.
11760
11845
  */
11761
11846
  async resumeAsync(options) {
11847
+ const startCalledAt = Date.now();
11762
11848
  const storage = await this.buildStorage();
11763
11849
  const stateManager = new RunStateManager(storage.workspace);
11764
11850
  let nodeId = options.nodeId;
@@ -11780,21 +11866,74 @@ ${inputJson}
11780
11866
  lastHeartbeat: Date.now(),
11781
11867
  response: null,
11782
11868
  // clear stale paused response so getStatus returns provisional
11869
+ asyncTiming: {
11870
+ ...existing.asyncTiming ?? {},
11871
+ startCalledAt
11872
+ },
11783
11873
  ...webhook !== void 0 ? { webhook } : {}
11784
- } : { ...RunStateManager.initial(options.runId, nodeId, webhook), status: "running" };
11874
+ } : {
11875
+ ...RunStateManager.initial(options.runId, nodeId, webhook),
11876
+ status: "running",
11877
+ asyncTiming: { startCalledAt }
11878
+ };
11785
11879
  await stateManager.write(next);
11880
+ await this.recordAsyncTiming(stateManager, options.runId, nodeId, {
11881
+ initialStateWrittenAt: Date.now()
11882
+ });
11786
11883
  const resumeNodeId = nodeId;
11787
11884
  const handoffEnabled = this.config.runner !== void 0;
11885
+ await this.recordAsyncTiming(stateManager, options.runId, resumeNodeId, {
11886
+ backgroundScheduledAt: Date.now()
11887
+ });
11788
11888
  this.backgroundExecutor.schedule(options.runId, async (signal) => {
11889
+ await this.recordAsyncTiming(stateManager, options.runId, resumeNodeId, {
11890
+ backgroundStartedAt: Date.now()
11891
+ });
11892
+ if (signal.aborted) return;
11789
11893
  try {
11894
+ await this.recordAsyncTiming(stateManager, options.runId, resumeNodeId, {
11895
+ runCallStartedAt: Date.now()
11896
+ });
11790
11897
  const response = await this.resume(options, { handoffToRunner: handoffEnabled });
11898
+ await this.recordAsyncTiming(stateManager, options.runId, resumeNodeId, {
11899
+ runCallCompletedAt: Date.now()
11900
+ });
11791
11901
  if (signal.aborted) return;
11902
+ await this.recordAsyncTiming(stateManager, options.runId, resumeNodeId, {
11903
+ handoffStartedAt: Date.now()
11904
+ });
11792
11905
  const postHandoff = await this.maybeHandoffToRunner(options.runId, resumeNodeId, response);
11906
+ await this.recordAsyncTiming(stateManager, options.runId, resumeNodeId, {
11907
+ handoffCompletedAt: Date.now(),
11908
+ finalizeStartedAt: Date.now()
11909
+ });
11910
+ if (await this.runnerAlreadyWroteTerminal(
11911
+ stateManager,
11912
+ options.runId,
11913
+ resumeNodeId,
11914
+ postHandoff
11915
+ )) {
11916
+ await this.recordAsyncTiming(stateManager, options.runId, resumeNodeId, {
11917
+ finalizeCompletedAt: Date.now(),
11918
+ backgroundCompletedAt: Date.now()
11919
+ });
11920
+ return;
11921
+ }
11793
11922
  await stateManager.finalize(options.runId, resumeNodeId, postHandoff);
11923
+ await this.recordAsyncTiming(stateManager, options.runId, resumeNodeId, {
11924
+ finalizeCompletedAt: Date.now()
11925
+ });
11794
11926
  await this.maybeFireWebhook(stateManager, options.runId, resumeNodeId, postHandoff);
11927
+ await this.recordAsyncTiming(stateManager, options.runId, resumeNodeId, {
11928
+ backgroundCompletedAt: Date.now()
11929
+ });
11795
11930
  } catch (err) {
11796
11931
  if (signal.aborted) return;
11797
11932
  const errorMsg = err instanceof Error ? err.message : String(err);
11933
+ await this.recordAsyncTiming(stateManager, options.runId, resumeNodeId, {
11934
+ backgroundFailedAt: Date.now(),
11935
+ errorMessage: errorMsg
11936
+ });
11798
11937
  const failResponse = {
11799
11938
  runId: options.runId,
11800
11939
  status: "failed",
@@ -11803,8 +11942,17 @@ ${inputJson}
11803
11942
  errors: [{ code: "RESUME_FAILED", message: errorMsg }],
11804
11943
  timestamp: Date.now()
11805
11944
  };
11945
+ await this.recordAsyncTiming(stateManager, options.runId, resumeNodeId, {
11946
+ finalizeStartedAt: Date.now()
11947
+ });
11806
11948
  await stateManager.finalize(options.runId, resumeNodeId, failResponse);
11949
+ await this.recordAsyncTiming(stateManager, options.runId, resumeNodeId, {
11950
+ finalizeCompletedAt: Date.now()
11951
+ });
11807
11952
  await this.maybeFireWebhook(stateManager, options.runId, resumeNodeId, failResponse);
11953
+ await this.recordAsyncTiming(stateManager, options.runId, resumeNodeId, {
11954
+ backgroundCompletedAt: Date.now()
11955
+ });
11808
11956
  }
11809
11957
  });
11810
11958
  return { runId: options.runId, nodeId, status: "running" };
@@ -11837,7 +11985,15 @@ ${inputJson}
11837
11985
  timestamp: Date.now()
11838
11986
  };
11839
11987
  }
11840
- if (state.response !== null) return state.response;
11988
+ if (state.response !== null) {
11989
+ return state.asyncTiming === void 0 ? state.response : {
11990
+ ...state.response,
11991
+ meta: {
11992
+ ...state.response.meta,
11993
+ asyncTiming: state.asyncTiming
11994
+ }
11995
+ };
11996
+ }
11841
11997
  return {
11842
11998
  runId: state.runId,
11843
11999
  status: state.status,
@@ -11847,7 +12003,8 @@ ${inputJson}
11847
12003
  turns: state.progress.turns,
11848
12004
  tokensUsed: state.progress.tokensUsed,
11849
12005
  activity: state.progress.currentActivity,
11850
- ...state.progress.lastTool !== void 0 ? { lastTool: state.progress.lastTool } : {}
12006
+ ...state.progress.lastTool !== void 0 ? { lastTool: state.progress.lastTool } : {},
12007
+ ...state.asyncTiming !== void 0 ? { asyncTiming: state.asyncTiming } : {}
11851
12008
  },
11852
12009
  errors: [],
11853
12010
  timestamp: state.lastHeartbeat
@@ -11856,6 +12013,11 @@ ${inputJson}
11856
12013
  /**
11857
12014
  * Poll until the run reaches a terminal state (done | failed | paused |
11858
12015
  * cancelled) or the timeout expires. Returns the final EngineResponse.
12016
+ *
12017
+ * For async paused runs, wait until the background wrapper has completed
12018
+ * its post-finalize bookkeeping before returning. Otherwise a caller can
12019
+ * immediately resume while the previous wrapper is still writing timing
12020
+ * diagnostics, allowing a stale paused state to overwrite the resumed run.
11859
12021
  */
11860
12022
  async waitFor(runId, opts = {}) {
11861
12023
  const pollInterval = opts.pollIntervalMs ?? 1e3;
@@ -11863,9 +12025,10 @@ ${inputJson}
11863
12025
  const deadline = timeoutMs > 0 ? Date.now() + timeoutMs : Infinity;
11864
12026
  for (; ; ) {
11865
12027
  const resp = await this.getStatus(runId, opts.nodeId);
11866
- if (resp.status === "done" || resp.status === "failed" || resp.status === "paused") {
12028
+ if (resp.status === "done" || resp.status === "failed") {
11867
12029
  return resp;
11868
12030
  }
12031
+ if (resp.status === "paused" && this.pausedRunIsQuiesced(resp, opts)) return resp;
11869
12032
  if (Date.now() >= deadline) {
11870
12033
  return {
11871
12034
  runId,
@@ -11881,6 +12044,12 @@ ${inputJson}
11881
12044
  await new Promise((r) => setTimeout(r, pollInterval));
11882
12045
  }
11883
12046
  }
12047
+ pausedRunIsQuiesced(resp, opts) {
12048
+ if (resp.meta.pauseReason === "handoff_to_runner") return opts.waitForRunnerHandoff !== true;
12049
+ const timing = resp.meta.asyncTiming;
12050
+ if (timing === void 0) return true;
12051
+ return timing.backgroundCompletedAt !== void 0 || timing.backgroundFailedAt !== void 0;
12052
+ }
11884
12053
  /**
11885
12054
  * Cancel an async run. Aborts the background executor and marks the
11886
12055
  * state as cancelled. Idempotent — safe to call on already-terminal runs.
@@ -11937,14 +12106,31 @@ ${inputJson}
11937
12106
  if (original === void 0) {
11938
12107
  throw new Error(`retryWebhook: delivery ${deliveryId} not found`);
11939
12108
  }
11940
- await this.dispatchWebhookWithRetries(
11941
- stateManager,
11942
- runId,
11943
- targetNodeId,
11944
- original.event,
11945
- state.response,
11946
- 1
11947
- );
12109
+ const retryStartedAt = Date.now();
12110
+ try {
12111
+ await this.dispatchWebhookWithRetries(
12112
+ stateManager,
12113
+ runId,
12114
+ targetNodeId,
12115
+ original.event,
12116
+ state.response,
12117
+ 1
12118
+ );
12119
+ await this.recordManualWebhookRetry(stateManager, runId, targetNodeId, {
12120
+ deliveryId,
12121
+ startedAt: retryStartedAt,
12122
+ completedAt: Date.now()
12123
+ });
12124
+ } catch (err) {
12125
+ const errorMessage = err instanceof Error ? err.message : String(err);
12126
+ await this.recordManualWebhookRetry(stateManager, runId, targetNodeId, {
12127
+ deliveryId,
12128
+ startedAt: retryStartedAt,
12129
+ completedAt: Date.now(),
12130
+ errorMessage
12131
+ });
12132
+ throw err;
12133
+ }
11948
12134
  }
11949
12135
  /**
11950
12136
  * Scan all runs for stale heartbeats and mark them as failed. Clients
@@ -11981,6 +12167,12 @@ ${inputJson}
11981
12167
  return orphaned;
11982
12168
  }
11983
12169
  // ---------- runner handoff (Plan 019) ----------
12170
+ async runnerAlreadyWroteTerminal(stateManager, runId, nodeId, response) {
12171
+ if (response.status !== "paused") return false;
12172
+ if (response.meta.pauseReason !== "handoff_to_runner") return false;
12173
+ const latest = await stateManager.read(runId, nodeId);
12174
+ return latest?.status === "done" || latest?.status === "failed" || latest?.status === "cancelled";
12175
+ }
11984
12176
  /**
11985
12177
  * When the response indicates the run paused for runner handoff, POST
11986
12178
  * `{ runId }` to the configured runner URL. On success, return the
@@ -12046,7 +12238,22 @@ ${inputJson}
12046
12238
  }
12047
12239
  }
12048
12240
  // ---------- webhook helpers ----------
12241
+ async recordAsyncTiming(stateManager, runId, nodeId, patch) {
12242
+ try {
12243
+ await stateManager.patchAsyncTiming(runId, nodeId, patch);
12244
+ } catch {
12245
+ }
12246
+ }
12247
+ async recordManualWebhookRetry(stateManager, runId, nodeId, row) {
12248
+ try {
12249
+ await stateManager.appendManualWebhookRetry(runId, nodeId, row);
12250
+ } catch {
12251
+ }
12252
+ }
12049
12253
  async maybeFireWebhook(stateManager, runId, nodeId, response) {
12254
+ await this.recordAsyncTiming(stateManager, runId, nodeId, {
12255
+ webhookCheckStartedAt: Date.now()
12256
+ });
12050
12257
  const state = await stateManager.read(runId, nodeId);
12051
12258
  if (state === null || state.webhook === void 0) return;
12052
12259
  const event = response.status === "done" ? "done" : response.status === "paused" ? "paused" : "failed";
@@ -12057,12 +12264,18 @@ ${inputJson}
12057
12264
  const state = await stateManager.read(runId, nodeId);
12058
12265
  if (state === null || state.webhook === void 0) return;
12059
12266
  const hook = state.webhook;
12267
+ await this.recordAsyncTiming(stateManager, runId, nodeId, {
12268
+ webhookDispatchStartedAt: Date.now()
12269
+ });
12060
12270
  let attempt = startAttempt;
12061
12271
  while (attempt <= MAX_ATTEMPTS) {
12062
12272
  const delay = RETRY_DELAYS_MS[attempt - 1] ?? 0;
12063
12273
  if (delay > 0) {
12064
12274
  await new Promise((r) => setTimeout(r, delay));
12065
12275
  }
12276
+ await this.recordAsyncTiming(stateManager, runId, nodeId, {
12277
+ webhookHttpStartedAt: Date.now()
12278
+ });
12066
12279
  const result = await this.webhookDispatcher.deliver({
12067
12280
  url: hook.url,
12068
12281
  event,
@@ -12071,6 +12284,9 @@ ${inputJson}
12071
12284
  ...hook.headers !== void 0 ? { headers: hook.headers } : {},
12072
12285
  attempt
12073
12286
  });
12287
+ await this.recordAsyncTiming(stateManager, runId, nodeId, {
12288
+ webhookHttpCompletedAt: Date.now()
12289
+ });
12074
12290
  const latest = await stateManager.read(runId, nodeId);
12075
12291
  if (latest !== null && latest.webhook !== void 0) {
12076
12292
  const updated = {
@@ -12078,10 +12294,21 @@ ${inputJson}
12078
12294
  deliveries: [...latest.webhook.deliveries, result.delivery]
12079
12295
  };
12080
12296
  await stateManager.update(runId, nodeId, { webhook: updated });
12297
+ await this.recordAsyncTiming(stateManager, runId, nodeId, {
12298
+ webhookStatePersistedAt: Date.now()
12299
+ });
12300
+ }
12301
+ if (!result.shouldRetry) {
12302
+ await this.recordAsyncTiming(stateManager, runId, nodeId, {
12303
+ webhookDispatchCompletedAt: Date.now()
12304
+ });
12305
+ return;
12081
12306
  }
12082
- if (!result.shouldRetry) return;
12083
12307
  attempt += 1;
12084
12308
  }
12309
+ await this.recordAsyncTiming(stateManager, runId, nodeId, {
12310
+ webhookDispatchCompletedAt: Date.now()
12311
+ });
12085
12312
  }
12086
12313
  /**
12087
12314
  * Shut down engine-owned background resources — currently just the