deepline 0.1.90 → 0.1.93
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +1356 -225
- package/dist/cli/index.mjs +1356 -225
- package/dist/index.d.mts +74 -5
- package/dist/index.d.ts +74 -5
- package/dist/index.js +1018 -62
- package/dist/index.mjs +1007 -62
- package/dist/repo/apps/play-runner-workers/src/coordinator-entry.ts +87 -20
- package/dist/repo/apps/play-runner-workers/src/entry.ts +52 -14
- package/dist/repo/sdk/src/client.ts +289 -40
- package/dist/repo/sdk/src/index.ts +1 -0
- package/dist/repo/sdk/src/release.ts +2 -2
- package/dist/repo/sdk/src/runs/observe-transport.ts +481 -0
- package/dist/repo/sdk/src/stream-reconnect.ts +44 -0
- package/dist/repo/sdk/src/types.ts +10 -3
- package/dist/repo/shared_libs/play-runtime/live-events.ts +217 -0
- package/dist/repo/shared_libs/play-runtime/run-ledger.ts +1074 -0
- package/dist/repo/shared_libs/play-runtime/run-snapshot-stream.ts +581 -0
- package/package.json +5 -2
|
@@ -3211,8 +3211,22 @@ function formatTailLogPart(value: unknown): string {
|
|
|
3211
3211
|
}
|
|
3212
3212
|
}
|
|
3213
3213
|
|
|
3214
|
+
// Operator-diagnostic console lines that carry the [deepline-run:] prefix but
|
|
3215
|
+
// are not user-facing run output. The console scrape fans run-prefixed lines
|
|
3216
|
+
// back into the run's durable Run Log Stream ('system' channel), so harness/
|
|
3217
|
+
// coordinator plumbing noise is filtered at ingestion, never at read time.
|
|
3218
|
+
// User play log lines (runner-event echoes) intentionally pass through.
|
|
3219
|
+
const OPERATOR_NOISE_LOG_PATTERNS: readonly RegExp[] = [
|
|
3220
|
+
/\[perf-trace\]/,
|
|
3221
|
+
/\[harness-probe\]/,
|
|
3222
|
+
/TenantWorkflow\.run entered/,
|
|
3223
|
+
/TenantWorkflow\.run threw/,
|
|
3224
|
+
/failed to forward runner perf trace/,
|
|
3225
|
+
/failed to forward TenantWorkflow\.run error/,
|
|
3226
|
+
];
|
|
3227
|
+
|
|
3214
3228
|
function parseRunLogLine(line: string): { runId: string; line: string } | null {
|
|
3215
|
-
if (
|
|
3229
|
+
if (OPERATOR_NOISE_LOG_PATTERNS.some((pattern) => pattern.test(line))) {
|
|
3216
3230
|
return null;
|
|
3217
3231
|
}
|
|
3218
3232
|
const prefixed = line.match(RUN_LOG_PREFIX_RE);
|
|
@@ -3661,26 +3675,79 @@ async function handleWorkflowRoute(input: {
|
|
|
3661
3675
|
}
|
|
3662
3676
|
try {
|
|
3663
3677
|
if (action === 'cancel') {
|
|
3664
|
-
if (
|
|
3665
|
-
|
|
3678
|
+
if (instance) {
|
|
3679
|
+
try {
|
|
3680
|
+
await instance.terminate();
|
|
3681
|
+
} catch (error) {
|
|
3682
|
+
const message =
|
|
3683
|
+
error instanceof Error ? error.message : String(error);
|
|
3684
|
+
// Tolerate four classes of error here:
|
|
3685
|
+
// - already-terminal (complete / errored / terminated)
|
|
3686
|
+
// - "Cannot terminate instance since its on a finite state"
|
|
3687
|
+
// (the runtime's wording for "already finished")
|
|
3688
|
+
// - "not implemented" (wrangler dev local mode doesn't support
|
|
3689
|
+
// instance.terminate() yet — silently no-op there)
|
|
3690
|
+
// - "not found" (instance never existed)
|
|
3691
|
+
if (
|
|
3692
|
+
!/complete|terminated|errored|finite state|cannot[ _]terminate|not[ _]implemented|not[ _]found|404/i.test(
|
|
3693
|
+
message,
|
|
3694
|
+
)
|
|
3695
|
+
) {
|
|
3696
|
+
throw error;
|
|
3697
|
+
}
|
|
3698
|
+
}
|
|
3666
3699
|
}
|
|
3667
|
-
|
|
3668
|
-
|
|
3669
|
-
|
|
3670
|
-
|
|
3671
|
-
|
|
3672
|
-
|
|
3673
|
-
|
|
3674
|
-
|
|
3675
|
-
|
|
3676
|
-
|
|
3677
|
-
|
|
3678
|
-
|
|
3679
|
-
|
|
3680
|
-
|
|
3681
|
-
|
|
3682
|
-
|
|
3683
|
-
|
|
3700
|
+
// terminate() kills the dynamic worker before its run() wrapper can
|
|
3701
|
+
// write terminal state (the only place completed/failed land), so
|
|
3702
|
+
// without this write /tail reports 'running' forever and any
|
|
3703
|
+
// start-stream watcher hangs after a cancel. Land the cancelled
|
|
3704
|
+
// terminal state here — terminal-set appends a 'terminal' run event
|
|
3705
|
+
// and wakes the dedup DO's long-poll waiters, which unblocks tails.
|
|
3706
|
+
//
|
|
3707
|
+
// Idempotency: first-wins from this side — if the run already went
|
|
3708
|
+
// terminal (completed/failed/cancelled) we keep that state. The DO
|
|
3709
|
+
// stores the cached terminal state under a single storage key
|
|
3710
|
+
// (last-wins on raw writes), but the run-event log is append-only
|
|
3711
|
+
// and /tail truncates at the FIRST terminal event, so a racing
|
|
3712
|
+
// completed/failed write from a dying worker can at worst replace
|
|
3713
|
+
// the cached key with another terminal status — it can never
|
|
3714
|
+
// resurrect 'running'.
|
|
3715
|
+
const existingTerminal = await readCoordinatorTerminalState(
|
|
3716
|
+
env,
|
|
3717
|
+
runId,
|
|
3718
|
+
).catch((error: unknown) => {
|
|
3719
|
+
// Tolerated: better to risk a harmless terminal-over-terminal
|
|
3720
|
+
// overwrite than to skip the cancelled write and hang watchers.
|
|
3721
|
+
console.warn('[coordinator] terminal state read before cancel failed', {
|
|
3722
|
+
runId,
|
|
3723
|
+
error: error instanceof Error ? error.message : String(error),
|
|
3724
|
+
});
|
|
3725
|
+
return null;
|
|
3726
|
+
});
|
|
3727
|
+
if (!existingTerminal) {
|
|
3728
|
+
try {
|
|
3729
|
+
await writeCoordinatorTerminalState(env, {
|
|
3730
|
+
runId,
|
|
3731
|
+
status: 'cancelled',
|
|
3732
|
+
error: 'Run cancelled',
|
|
3733
|
+
});
|
|
3734
|
+
} catch (error) {
|
|
3735
|
+
// Fail loudly: the workflow was terminated but watchers would
|
|
3736
|
+
// hang on 'running' forever without the terminal event.
|
|
3737
|
+
const message =
|
|
3738
|
+
error instanceof Error ? error.message : String(error);
|
|
3739
|
+
console.error('[coordinator] cancel terminal state write failed', {
|
|
3740
|
+
runId,
|
|
3741
|
+
error: message,
|
|
3742
|
+
});
|
|
3743
|
+
return Response.json(
|
|
3744
|
+
{
|
|
3745
|
+
runId,
|
|
3746
|
+
status: 'error',
|
|
3747
|
+
error: `workflow terminated but cancelled terminal state write failed: ${message}`,
|
|
3748
|
+
},
|
|
3749
|
+
{ status: 500 },
|
|
3750
|
+
);
|
|
3684
3751
|
}
|
|
3685
3752
|
}
|
|
3686
3753
|
return Response.json({ runId, status: 'cancelled' });
|
|
@@ -1206,7 +1206,10 @@ async function waitForSyntheticIntegrationEvent(
|
|
|
1206
1206
|
{
|
|
1207
1207
|
type: 'log.appended',
|
|
1208
1208
|
runId: req.runId,
|
|
1209
|
-
|
|
1209
|
+
// 'system' (windowed text-dedupe channel), NOT 'worker': this line is
|
|
1210
|
+
// emitted outside the harness log buffer, so it has no positional
|
|
1211
|
+
// channelOffset and must not pollute the worker channel cursor.
|
|
1212
|
+
source: 'system',
|
|
1210
1213
|
occurredAt: nowMs(),
|
|
1211
1214
|
lines: [
|
|
1212
1215
|
`Waiting for integration_event:${eventKey} for up to ${timeoutMs}ms.`,
|
|
@@ -5402,6 +5405,14 @@ async function executeRunRequest(
|
|
|
5402
5405
|
const abortSignal = abortController.signal;
|
|
5403
5406
|
let runLogBuffer: string[] = [];
|
|
5404
5407
|
let pendingRunLogLines: string[] = [];
|
|
5408
|
+
// Monotonic count of every line ever appended to this run's worker log
|
|
5409
|
+
// channel. runLogBuffer/pendingRunLogLines are rotating tails of those
|
|
5410
|
+
// lines (RUN_LOG_BUFFER_LIMIT is the coordinator transport cache only), so
|
|
5411
|
+
// each log.appended batch can carry the absolute channelOffset of its first
|
|
5412
|
+
// line: totalEmittedLogLines - pendingRunLogLines.length. Run Log Stream
|
|
5413
|
+
// ingestion skips re-sent prefixes positionally (exactly-once, repeated
|
|
5414
|
+
// identical lines preserved) instead of text-deduping.
|
|
5415
|
+
let totalEmittedLogLines = 0;
|
|
5405
5416
|
let stepProgressByNodeId: LiveNodeProgressMap = {};
|
|
5406
5417
|
let dirtyProgressNodeIds = new Set<string>();
|
|
5407
5418
|
let pendingLedgerEvents: PlayRunLedgerEvent[] = [
|
|
@@ -5424,6 +5435,7 @@ async function executeRunRequest(
|
|
|
5424
5435
|
const appendRunLogLine = (line: string) => {
|
|
5425
5436
|
const trimmed = redactSecretsFromLogString(line.trim());
|
|
5426
5437
|
if (!trimmed) return;
|
|
5438
|
+
totalEmittedLogLines += 1;
|
|
5427
5439
|
runLogBuffer = [...runLogBuffer, trimmed].slice(-RUN_LOG_BUFFER_LIMIT);
|
|
5428
5440
|
pendingRunLogLines = [...pendingRunLogLines, trimmed].slice(
|
|
5429
5441
|
-RUN_LOG_BUFFER_LIMIT,
|
|
@@ -5614,6 +5626,12 @@ async function executeRunRequest(
|
|
|
5614
5626
|
source: 'worker',
|
|
5615
5627
|
occurredAt,
|
|
5616
5628
|
lines: pendingRunLogLines,
|
|
5629
|
+
// Positional cursor: pendingRunLogLines always holds the LAST
|
|
5630
|
+
// pending lines emitted on this channel, so the offset of its first
|
|
5631
|
+
// line is total-emitted minus pending length. This also covers the
|
|
5632
|
+
// terminal full-buffer re-send (pending = runLogBuffer), which
|
|
5633
|
+
// ingestion then skips positionally instead of via text dedupe.
|
|
5634
|
+
channelOffset: totalEmittedLogLines - pendingRunLogLines.length,
|
|
5617
5635
|
});
|
|
5618
5636
|
pendingRunLogLines = [];
|
|
5619
5637
|
}
|
|
@@ -5709,6 +5727,9 @@ async function executeRunRequest(
|
|
|
5709
5727
|
): Promise<void> => {
|
|
5710
5728
|
if (!options?.persistResultDatasets) return;
|
|
5711
5729
|
const now = nowMs();
|
|
5730
|
+
// Terminal re-send of the full retained buffer. drainPendingLedgerEvents
|
|
5731
|
+
// stamps it with channelOffset = totalEmitted - buffer length, so Run Log
|
|
5732
|
+
// Stream ingestion drops the already-ingested prefix positionally.
|
|
5712
5733
|
pendingRunLogLines = runLogBuffer;
|
|
5713
5734
|
dirtyProgressNodeIds = new Set([
|
|
5714
5735
|
...dirtyProgressNodeIds,
|
|
@@ -5859,6 +5880,25 @@ async function executeRunRequest(
|
|
|
5859
5880
|
ms: nowMs() - resultDatasetStartedAt,
|
|
5860
5881
|
});
|
|
5861
5882
|
const parentSignal = startParentTerminalSignal();
|
|
5883
|
+
// Capped runs settle compute billing BEFORE declaring run.completed: a
|
|
5884
|
+
// per-run cap denial (422 billing_cap_exceeded) must fail the run as
|
|
5885
|
+
// its ONLY terminal. Flushing completed first opens a race — watchers
|
|
5886
|
+
// stream the ledger snapshot and exit on the transient completed
|
|
5887
|
+
// before the demoting run.failed lands.
|
|
5888
|
+
const capped = extractMaxCreditsPerRun(req.contractSnapshot) !== null;
|
|
5889
|
+
if (capped) {
|
|
5890
|
+
const billingStartedAt = nowMs();
|
|
5891
|
+
await finalizeWorkerComputeBilling({
|
|
5892
|
+
req,
|
|
5893
|
+
success: true,
|
|
5894
|
+
actionEstimate: 4,
|
|
5895
|
+
});
|
|
5896
|
+
recordRunnerPerfTrace({
|
|
5897
|
+
req,
|
|
5898
|
+
phase: 'runner.compute_billing_finalize',
|
|
5899
|
+
ms: nowMs() - billingStartedAt,
|
|
5900
|
+
});
|
|
5901
|
+
}
|
|
5862
5902
|
const terminalOccurredAt = nowMs();
|
|
5863
5903
|
const terminalUpdateStartedAt = nowMs();
|
|
5864
5904
|
await flushTerminalLedgerEvents({
|
|
@@ -5874,21 +5914,19 @@ async function executeRunRequest(
|
|
|
5874
5914
|
ms: nowMs() - terminalUpdateStartedAt,
|
|
5875
5915
|
});
|
|
5876
5916
|
|
|
5877
|
-
|
|
5878
|
-
|
|
5879
|
-
|
|
5880
|
-
success: true,
|
|
5881
|
-
actionEstimate: 4,
|
|
5882
|
-
}).then(() => {
|
|
5883
|
-
recordRunnerPerfTrace({
|
|
5917
|
+
if (!capped) {
|
|
5918
|
+
const billingStartedAt = nowMs();
|
|
5919
|
+
const billingPromise = finalizeWorkerComputeBilling({
|
|
5884
5920
|
req,
|
|
5885
|
-
|
|
5886
|
-
|
|
5921
|
+
success: true,
|
|
5922
|
+
actionEstimate: 4,
|
|
5923
|
+
}).then(() => {
|
|
5924
|
+
recordRunnerPerfTrace({
|
|
5925
|
+
req,
|
|
5926
|
+
phase: 'runner.compute_billing_finalize',
|
|
5927
|
+
ms: nowMs() - billingStartedAt,
|
|
5928
|
+
});
|
|
5887
5929
|
});
|
|
5888
|
-
});
|
|
5889
|
-
if (extractMaxCreditsPerRun(req.contractSnapshot) !== null) {
|
|
5890
|
-
await billingPromise;
|
|
5891
|
-
} else {
|
|
5892
5930
|
const nonBlockingBillingPromise = billingPromise.catch((error) => {
|
|
5893
5931
|
console.error(
|
|
5894
5932
|
`[play-harness] non-fatal compute billing finalize failed runId=${req.runId}: ${
|
|
@@ -36,6 +36,15 @@
|
|
|
36
36
|
import { resolveConfig } from './config.js';
|
|
37
37
|
import { DeeplineError } from './errors.js';
|
|
38
38
|
import { HttpClient } from './http.js';
|
|
39
|
+
import {
|
|
40
|
+
STREAM_HEALTHY_CONNECTION_MS,
|
|
41
|
+
isTransientPlayStreamError,
|
|
42
|
+
streamReconnectDelayMs,
|
|
43
|
+
} from './stream-reconnect.js';
|
|
44
|
+
import {
|
|
45
|
+
observeRunEvents,
|
|
46
|
+
RunObserveTransportUnavailableError,
|
|
47
|
+
} from './runs/observe-transport.js';
|
|
39
48
|
import type {
|
|
40
49
|
DeeplineClientOptions,
|
|
41
50
|
ResolvedConfig,
|
|
@@ -129,11 +138,29 @@ export type RunsListOptions = {
|
|
|
129
138
|
/** Streaming options for `client.runs.tail(...)`. */
|
|
130
139
|
export type RunsTailOptions = {
|
|
131
140
|
signal?: AbortSignal;
|
|
141
|
+
/**
|
|
142
|
+
* Called before each stream reconnect. Server stream windows are finite, so
|
|
143
|
+
* long runs reconnect with backoff until a terminal status is observed.
|
|
144
|
+
*/
|
|
145
|
+
onReconnect?: (info: {
|
|
146
|
+
attempt: number;
|
|
147
|
+
delayMs: number;
|
|
148
|
+
reason: string;
|
|
149
|
+
}) => void;
|
|
150
|
+
/**
|
|
151
|
+
* Display-only transport notices: subscription-transport reconnects,
|
|
152
|
+
* staleness warnings, and the one-time fallback notice when the server
|
|
153
|
+
* cannot serve the Convex subscription transport (ADR-0008).
|
|
154
|
+
*/
|
|
155
|
+
onNotice?: (message: string) => void;
|
|
132
156
|
};
|
|
133
157
|
|
|
134
158
|
/** Log fetch options for `client.runs.logs(...)`. */
|
|
135
159
|
export type RunsLogsOptions = {
|
|
160
|
+
/** Return the LAST `limit` stored log lines (default 200). */
|
|
136
161
|
limit?: number;
|
|
162
|
+
/** Fetch every stored log line, paginating to the full totalCount. */
|
|
163
|
+
all?: boolean;
|
|
137
164
|
};
|
|
138
165
|
|
|
139
166
|
/** Persisted log response for one play run. */
|
|
@@ -146,6 +173,28 @@ export type RunsLogsResult = {
|
|
|
146
173
|
truncated: boolean;
|
|
147
174
|
hasMore: boolean;
|
|
148
175
|
entries: string[];
|
|
176
|
+
/**
|
|
177
|
+
* True when the run crossed the Run Log Stream retention cap: `totalCount`
|
|
178
|
+
* keeps counting, but stored line bodies end at a loud truncation marker.
|
|
179
|
+
*/
|
|
180
|
+
logsTruncated?: boolean;
|
|
181
|
+
};
|
|
182
|
+
|
|
183
|
+
/** Server page cap for GET /api/v2/runs/:runId/logs (ADR-0009). */
|
|
184
|
+
const RUN_LOGS_PAGE_LIMIT = 1_000;
|
|
185
|
+
|
|
186
|
+
/** Wire shape of one GET /api/v2/runs/:runId/logs page. */
|
|
187
|
+
type RunLogsPageResponse = {
|
|
188
|
+
runId: string;
|
|
189
|
+
totalLogCount: number;
|
|
190
|
+
logsTruncated: boolean;
|
|
191
|
+
lastStoredSeq: number;
|
|
192
|
+
afterSeq: number;
|
|
193
|
+
entries: Array<{ seq: number; line: string }>;
|
|
194
|
+
firstSeq: number | null;
|
|
195
|
+
lastSeq: number | null;
|
|
196
|
+
hasMore: boolean;
|
|
197
|
+
nextAfterSeq: number | null;
|
|
149
198
|
};
|
|
150
199
|
|
|
151
200
|
/** One persisted runtime-sheet row returned by `client.runs.exportDatasetRows(...)`. */
|
|
@@ -328,6 +377,13 @@ type PlayLiveStatusState = {
|
|
|
328
377
|
runId: string;
|
|
329
378
|
status: PlayStatus['status'];
|
|
330
379
|
logs: string[];
|
|
380
|
+
/**
|
|
381
|
+
* Absolute (1-based) sequence number of the last log line appended to
|
|
382
|
+
* `logs`. play.run.log payloads carry `firstSeq` (ADR-0009), so overlapping
|
|
383
|
+
* re-deliveries are skipped positionally — repeated identical lines are
|
|
384
|
+
* preserved and snapshots never replace the accumulated log list.
|
|
385
|
+
*/
|
|
386
|
+
lastLogSeq: number;
|
|
331
387
|
result?: unknown;
|
|
332
388
|
error?: string;
|
|
333
389
|
latest: PlayStatus | null;
|
|
@@ -355,13 +411,52 @@ function normalizeLiveStatus(value: unknown): PlayStatus['status'] | null {
|
|
|
355
411
|
return null;
|
|
356
412
|
}
|
|
357
413
|
|
|
414
|
+
function appendPlayLiveLogLines(
|
|
415
|
+
state: PlayLiveStatusState,
|
|
416
|
+
payload: Record<string, unknown>,
|
|
417
|
+
): void {
|
|
418
|
+
const lines = readStringArray(payload.lines);
|
|
419
|
+
if (lines.length === 0) {
|
|
420
|
+
return;
|
|
421
|
+
}
|
|
422
|
+
const firstSeq =
|
|
423
|
+
typeof payload.firstSeq === 'number' &&
|
|
424
|
+
Number.isFinite(payload.firstSeq) &&
|
|
425
|
+
payload.firstSeq >= 1
|
|
426
|
+
? Math.trunc(payload.firstSeq)
|
|
427
|
+
: null;
|
|
428
|
+
if (firstSeq === null) {
|
|
429
|
+
// Marker payloads (gap/unavailable notices) and pre-ADR-0009 servers
|
|
430
|
+
// carry no seq: append verbatim and advance the cursor by the payload's
|
|
431
|
+
// cumulative count when present so later seq-stamped lines line up.
|
|
432
|
+
state.logs.push(...lines);
|
|
433
|
+
const totalLogCount =
|
|
434
|
+
typeof payload.totalLogCount === 'number' &&
|
|
435
|
+
Number.isFinite(payload.totalLogCount)
|
|
436
|
+
? Math.trunc(payload.totalLogCount)
|
|
437
|
+
: null;
|
|
438
|
+
if (totalLogCount !== null) {
|
|
439
|
+
state.lastLogSeq = Math.max(state.lastLogSeq, totalLogCount);
|
|
440
|
+
}
|
|
441
|
+
return;
|
|
442
|
+
}
|
|
443
|
+
// Positional append: skip the already-seen prefix of overlapping
|
|
444
|
+
// re-deliveries; repeated identical lines are preserved.
|
|
445
|
+
const skip = Math.max(0, state.lastLogSeq + 1 - firstSeq);
|
|
446
|
+
if (skip >= lines.length) {
|
|
447
|
+
return;
|
|
448
|
+
}
|
|
449
|
+
state.logs.push(...lines.slice(skip));
|
|
450
|
+
state.lastLogSeq = Math.max(state.lastLogSeq, firstSeq + lines.length - 1);
|
|
451
|
+
}
|
|
452
|
+
|
|
358
453
|
function updatePlayLiveStatusState(
|
|
359
454
|
state: PlayLiveStatusState,
|
|
360
455
|
event: PlayLiveEvent,
|
|
361
456
|
): PlayStatus | null {
|
|
362
457
|
const payload = getPlayLiveEventPayload(event);
|
|
363
458
|
if (event.type === 'play.run.log') {
|
|
364
|
-
state
|
|
459
|
+
appendPlayLiveLogLines(state, payload);
|
|
365
460
|
return null;
|
|
366
461
|
}
|
|
367
462
|
if (
|
|
@@ -385,15 +480,23 @@ function updatePlayLiveStatusState(
|
|
|
385
480
|
: null) ??
|
|
386
481
|
state.status;
|
|
387
482
|
const progressPayload = isRecord(payload.progress) ? payload.progress : {};
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
483
|
+
// Snapshots no longer REPLACE accumulated logs (ADR-0009): the snapshot
|
|
484
|
+
// only retains a bounded tail, so replacing would clobber the seq-keyed
|
|
485
|
+
// log list built from play.run.log events (the stream differ always emits
|
|
486
|
+
// log lines through play.run.log, snapshot ticks included). A terminal
|
|
487
|
+
// final_status payload may still seed an EMPTY state — that is the only
|
|
488
|
+
// event some non-stream flows ever see.
|
|
391
489
|
if (
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
490
|
+
event.type === 'play.run.final_status' &&
|
|
491
|
+
state.logs.length === 0 &&
|
|
492
|
+
state.lastLogSeq === 0
|
|
395
493
|
) {
|
|
396
|
-
|
|
494
|
+
const payloadLogs = readStringArray(payload.logs);
|
|
495
|
+
const progressLogs = readStringArray(progressPayload.logs);
|
|
496
|
+
const seedLogs = payloadLogs.length > 0 ? payloadLogs : progressLogs;
|
|
497
|
+
if (seedLogs.length > 0) {
|
|
498
|
+
state.logs = seedLogs;
|
|
499
|
+
}
|
|
397
500
|
}
|
|
398
501
|
if ('result' in payload) {
|
|
399
502
|
state.result = payload.result;
|
|
@@ -1560,44 +1663,161 @@ export class DeeplineClient {
|
|
|
1560
1663
|
return response.runs ?? [];
|
|
1561
1664
|
}
|
|
1562
1665
|
|
|
1563
|
-
/**
|
|
1564
|
-
|
|
1666
|
+
/**
|
|
1667
|
+
* Observe one run's live events through the Convex Run Snapshot
|
|
1668
|
+
* subscription transport (ADR-0008). Yields the same `play.*` event
|
|
1669
|
+
* envelopes as {@link streamPlayRunEvents} and ends after the terminal
|
|
1670
|
+
* snapshot. Throws {@link RunObserveTransportUnavailableError} when this
|
|
1671
|
+
* server cannot serve the transport (older server, unconfigured grants, or
|
|
1672
|
+
* unreachable Convex) — callers fall back to the SSE stream with a notice.
|
|
1673
|
+
*/
|
|
1674
|
+
observeRunEvents(
|
|
1675
|
+
runId: string,
|
|
1676
|
+
options?: { signal?: AbortSignal; onNotice?: (message: string) => void },
|
|
1677
|
+
): AsyncGenerator<PlayLiveEvent> {
|
|
1678
|
+
return observeRunEvents({
|
|
1679
|
+
http: this.http,
|
|
1680
|
+
runId,
|
|
1681
|
+
signal: options?.signal,
|
|
1682
|
+
onNotice: options?.onNotice,
|
|
1683
|
+
}) as AsyncGenerator<PlayLiveEvent>;
|
|
1684
|
+
}
|
|
1685
|
+
|
|
1686
|
+
/**
|
|
1687
|
+
* Tail one run through the subscription transport until terminal, then
|
|
1688
|
+
* return one durable REST status read (the final Run Response Package).
|
|
1689
|
+
*/
|
|
1690
|
+
private async tailRunViaObserveTransport(
|
|
1691
|
+
runId: string,
|
|
1692
|
+
options?: RunsTailOptions,
|
|
1693
|
+
): Promise<PlayStatus> {
|
|
1565
1694
|
const state: PlayLiveStatusState = {
|
|
1566
1695
|
runId,
|
|
1567
1696
|
status: 'running',
|
|
1568
1697
|
logs: [],
|
|
1698
|
+
lastLogSeq: 0,
|
|
1569
1699
|
latest: null,
|
|
1570
1700
|
};
|
|
1571
|
-
|
|
1572
|
-
for await (const event of this.streamPlayRunEvents(runId, {
|
|
1573
|
-
mode: 'cli',
|
|
1701
|
+
for await (const event of this.observeRunEvents(runId, {
|
|
1574
1702
|
signal: options?.signal,
|
|
1703
|
+
onNotice: options?.onNotice,
|
|
1575
1704
|
})) {
|
|
1576
1705
|
const status = updatePlayLiveStatusState(state, event);
|
|
1577
|
-
if (!status) {
|
|
1706
|
+
if (!status || !TERMINAL_PLAY_STATUSES.has(status.status)) {
|
|
1578
1707
|
continue;
|
|
1579
1708
|
}
|
|
1580
|
-
|
|
1581
|
-
if (terminal) {
|
|
1582
|
-
break;
|
|
1583
|
-
}
|
|
1584
|
-
}
|
|
1585
|
-
if (terminal && state.latest) {
|
|
1586
|
-
return await this.getRunStatus(state.latest.runId || runId).catch(
|
|
1709
|
+
return await this.getRunStatus(status.runId || runId).catch(
|
|
1587
1710
|
() => state.latest ?? playRunStatusFromState(state),
|
|
1588
1711
|
);
|
|
1589
1712
|
}
|
|
1590
|
-
if (
|
|
1591
|
-
|
|
1713
|
+
if (options?.signal?.aborted) {
|
|
1714
|
+
throw new DeeplineError('Run observation aborted.', undefined, 'ABORTED');
|
|
1715
|
+
}
|
|
1716
|
+
// The transport ends only after a terminal snapshot; the differ always
|
|
1717
|
+
// emits a terminal `play.run.status` first, so reaching here means the
|
|
1718
|
+
// terminal package read raced — re-check durable status once, loudly.
|
|
1719
|
+
const refreshed = await this.getRunStatus(runId);
|
|
1720
|
+
if (TERMINAL_PLAY_STATUSES.has(refreshed.status)) {
|
|
1721
|
+
return refreshed;
|
|
1592
1722
|
}
|
|
1593
1723
|
throw new DeeplineError(
|
|
1594
|
-
`Run
|
|
1724
|
+
`Run observation for ${runId} ended before a terminal status.`,
|
|
1595
1725
|
undefined,
|
|
1596
|
-
'
|
|
1597
|
-
{ runId },
|
|
1726
|
+
'PLAY_LIVE_STREAM_ENDED',
|
|
1598
1727
|
);
|
|
1599
1728
|
}
|
|
1600
1729
|
|
|
1730
|
+
/**
|
|
1731
|
+
* Read the canonical run stream until a terminal run status is observed.
|
|
1732
|
+
*
|
|
1733
|
+
* Tries the Convex Run Snapshot subscription transport first (ADR-0008);
|
|
1734
|
+
* when the server cannot serve it (grant endpoint missing/unconfigured or
|
|
1735
|
+
* Convex unreachable) it falls back — with one `onNotice` message — to the
|
|
1736
|
+
* support-window SSE stream below.
|
|
1737
|
+
*
|
|
1738
|
+
* Server stream windows are finite: they end cleanly at the function
|
|
1739
|
+
* ceiling even while the run keeps executing. A window that ends (cleanly
|
|
1740
|
+
* or via transient network error) without a terminal event triggers one
|
|
1741
|
+
* durable-status re-check followed by a backed-off reconnect, so long runs
|
|
1742
|
+
* tail to completion. Abort via `options.signal` to stop waiting.
|
|
1743
|
+
*/
|
|
1744
|
+
async tailRun(runId: string, options?: RunsTailOptions): Promise<PlayStatus> {
|
|
1745
|
+
try {
|
|
1746
|
+
return await this.tailRunViaObserveTransport(runId, options);
|
|
1747
|
+
} catch (error) {
|
|
1748
|
+
if (!(error instanceof RunObserveTransportUnavailableError)) {
|
|
1749
|
+
throw error;
|
|
1750
|
+
}
|
|
1751
|
+
options?.onNotice?.(
|
|
1752
|
+
`[observe] live subscription unavailable (${error.reason}); falling back to SSE tail (support window, ADR-0008)`,
|
|
1753
|
+
);
|
|
1754
|
+
}
|
|
1755
|
+
const state: PlayLiveStatusState = {
|
|
1756
|
+
runId,
|
|
1757
|
+
status: 'running',
|
|
1758
|
+
logs: [],
|
|
1759
|
+
lastLogSeq: 0,
|
|
1760
|
+
latest: null,
|
|
1761
|
+
};
|
|
1762
|
+
let reconnectAttempt = 0;
|
|
1763
|
+
|
|
1764
|
+
for (;;) {
|
|
1765
|
+
const connectedAt = Date.now();
|
|
1766
|
+
let sawEvent = false;
|
|
1767
|
+
let endedReason = 'stream window ended before a terminal event';
|
|
1768
|
+
try {
|
|
1769
|
+
for await (const event of this.streamPlayRunEvents(runId, {
|
|
1770
|
+
mode: 'cli',
|
|
1771
|
+
signal: options?.signal,
|
|
1772
|
+
})) {
|
|
1773
|
+
sawEvent = true;
|
|
1774
|
+
const status = updatePlayLiveStatusState(state, event);
|
|
1775
|
+
if (!status || !TERMINAL_PLAY_STATUSES.has(status.status)) {
|
|
1776
|
+
continue;
|
|
1777
|
+
}
|
|
1778
|
+
return await this.getRunStatus(status.runId || runId).catch(
|
|
1779
|
+
() => state.latest ?? playRunStatusFromState(state),
|
|
1780
|
+
);
|
|
1781
|
+
}
|
|
1782
|
+
} catch (error) {
|
|
1783
|
+
if (options?.signal?.aborted || !isTransientPlayStreamError(error)) {
|
|
1784
|
+
throw error;
|
|
1785
|
+
}
|
|
1786
|
+
endedReason = error instanceof Error ? error.message : String(error);
|
|
1787
|
+
}
|
|
1788
|
+
|
|
1789
|
+
// Window ended without a terminal event. The run may have finished
|
|
1790
|
+
// during the gap — re-check durable status once before reconnecting.
|
|
1791
|
+
// Non-transient status failures (e.g. 404 = run gone) fail loudly.
|
|
1792
|
+
let refreshed: PlayStatus | null = null;
|
|
1793
|
+
try {
|
|
1794
|
+
refreshed = await this.getRunStatus(runId);
|
|
1795
|
+
} catch (error) {
|
|
1796
|
+
if (!isTransientPlayStreamError(error)) {
|
|
1797
|
+
throw error;
|
|
1798
|
+
}
|
|
1799
|
+
}
|
|
1800
|
+
if (refreshed && TERMINAL_PLAY_STATUSES.has(refreshed.status)) {
|
|
1801
|
+
return refreshed;
|
|
1802
|
+
}
|
|
1803
|
+
|
|
1804
|
+
if (
|
|
1805
|
+
sawEvent ||
|
|
1806
|
+
Date.now() - connectedAt >= STREAM_HEALTHY_CONNECTION_MS
|
|
1807
|
+
) {
|
|
1808
|
+
reconnectAttempt = 0;
|
|
1809
|
+
}
|
|
1810
|
+
const delayMs = streamReconnectDelayMs(reconnectAttempt);
|
|
1811
|
+
reconnectAttempt += 1;
|
|
1812
|
+
options?.onReconnect?.({
|
|
1813
|
+
attempt: reconnectAttempt,
|
|
1814
|
+
delayMs,
|
|
1815
|
+
reason: endedReason,
|
|
1816
|
+
});
|
|
1817
|
+
await sleep(delayMs);
|
|
1818
|
+
}
|
|
1819
|
+
}
|
|
1820
|
+
|
|
1601
1821
|
/**
|
|
1602
1822
|
* Fetch persisted logs for a run using the public runs resource model.
|
|
1603
1823
|
*
|
|
@@ -1611,23 +1831,51 @@ export class DeeplineClient {
|
|
|
1611
1831
|
runId: string,
|
|
1612
1832
|
options?: RunsLogsOptions,
|
|
1613
1833
|
): Promise<RunsLogsResult> {
|
|
1614
|
-
const
|
|
1615
|
-
|
|
1616
|
-
|
|
1617
|
-
|
|
1618
|
-
|
|
1834
|
+
const limit = options?.all
|
|
1835
|
+
? Number.MAX_SAFE_INTEGER
|
|
1836
|
+
: typeof options?.limit === 'number' &&
|
|
1837
|
+
Number.isFinite(options.limit) &&
|
|
1838
|
+
options.limit > 0
|
|
1839
|
+
? Math.trunc(options.limit)
|
|
1619
1840
|
: 200;
|
|
1620
|
-
const
|
|
1841
|
+
const fetchPage = (afterSeq: number, pageLimit: number) =>
|
|
1842
|
+
this.http.get<RunLogsPageResponse>(
|
|
1843
|
+
`/api/v2/runs/${encodeURIComponent(runId)}/logs?afterSeq=${afterSeq}&limit=${pageLimit}`,
|
|
1844
|
+
);
|
|
1845
|
+
// Probe for the run's stored extent, then read the LAST `limit` stored
|
|
1846
|
+
// lines (matching the historical tail-slice semantics), paginating in
|
|
1847
|
+
// server-capped pages until the window is exhausted.
|
|
1848
|
+
const probe = await fetchPage(0, 1);
|
|
1849
|
+
const lastStoredSeq = probe.lastStoredSeq;
|
|
1850
|
+
let afterSeq = options?.all ? 0 : Math.max(0, lastStoredSeq - limit);
|
|
1851
|
+
const entries: Array<{ seq: number; line: string }> = [];
|
|
1852
|
+
while (entries.length < limit) {
|
|
1853
|
+
const page = await fetchPage(
|
|
1854
|
+
afterSeq,
|
|
1855
|
+
Math.min(RUN_LOGS_PAGE_LIMIT, limit - entries.length),
|
|
1856
|
+
);
|
|
1857
|
+
if (page.entries.length === 0) {
|
|
1858
|
+
break;
|
|
1859
|
+
}
|
|
1860
|
+
entries.push(...page.entries);
|
|
1861
|
+
afterSeq = page.entries[page.entries.length - 1]!.seq;
|
|
1862
|
+
if (!page.hasMore) {
|
|
1863
|
+
break;
|
|
1864
|
+
}
|
|
1865
|
+
}
|
|
1866
|
+
const firstSequence = entries.length > 0 ? entries[0]!.seq : null;
|
|
1867
|
+
const lastSequence =
|
|
1868
|
+
entries.length > 0 ? entries[entries.length - 1]!.seq : null;
|
|
1621
1869
|
return {
|
|
1622
|
-
runId:
|
|
1623
|
-
totalCount:
|
|
1870
|
+
runId: probe.runId,
|
|
1871
|
+
totalCount: probe.totalLogCount,
|
|
1624
1872
|
returnedCount: entries.length,
|
|
1625
|
-
firstSequence
|
|
1626
|
-
|
|
1627
|
-
|
|
1628
|
-
|
|
1629
|
-
|
|
1630
|
-
|
|
1873
|
+
firstSequence,
|
|
1874
|
+
lastSequence,
|
|
1875
|
+
truncated: entries.length < probe.totalLogCount,
|
|
1876
|
+
hasMore: lastSequence !== null && lastSequence < lastStoredSeq,
|
|
1877
|
+
entries: entries.map((entry) => entry.line),
|
|
1878
|
+
...(probe.logsTruncated ? { logsTruncated: true } : {}),
|
|
1631
1879
|
};
|
|
1632
1880
|
}
|
|
1633
1881
|
|
|
@@ -1993,6 +2241,7 @@ export class DeeplineClient {
|
|
|
1993
2241
|
runId: workflowId,
|
|
1994
2242
|
status: 'running',
|
|
1995
2243
|
logs: [],
|
|
2244
|
+
lastLogSeq: 0,
|
|
1996
2245
|
latest: null,
|
|
1997
2246
|
};
|
|
1998
2247
|
|