deepline 0.1.139 → 0.1.141
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bundling-sources/apps/play-runner-workers/src/coordinator-entry.ts +58 -18
- package/dist/bundling-sources/apps/play-runner-workers/src/entry.ts +46 -22
- package/dist/bundling-sources/apps/play-runner-workers/src/runtime/live-progress.ts +4 -0
- package/dist/bundling-sources/apps/play-runner-workers/src/runtime/tool-http-errors.ts +7 -262
- package/dist/bundling-sources/apps/play-runner-workers/src/workflow-retry.ts +15 -1
- package/dist/bundling-sources/sdk/src/client.ts +24 -0
- package/dist/bundling-sources/sdk/src/release.ts +2 -2
- package/dist/bundling-sources/sdk/src/types.ts +32 -0
- package/dist/bundling-sources/shared_libs/play-runtime/context.ts +54 -34
- package/dist/bundling-sources/shared_libs/play-runtime/coordinator-headers.ts +17 -0
- package/dist/bundling-sources/shared_libs/play-runtime/live-events.ts +4 -0
- package/dist/bundling-sources/shared_libs/play-runtime/live-state-contract.ts +4 -0
- package/dist/bundling-sources/shared_libs/play-runtime/run-failure.ts +1 -1
- package/dist/bundling-sources/shared_libs/play-runtime/run-ledger.ts +59 -2
- package/dist/bundling-sources/shared_libs/play-runtime/run-snapshot-stream.ts +12 -0
- package/dist/bundling-sources/shared_libs/play-runtime/scheduler-backend.ts +3 -0
- package/dist/bundling-sources/shared_libs/play-runtime/tool-execute-retry-policy.ts +55 -0
- package/dist/bundling-sources/shared_libs/play-runtime/tool-http-errors.ts +248 -0
- package/dist/bundling-sources/shared_libs/play-runtime/worker-api-types.ts +4 -0
- package/dist/cli/index.js +165 -42
- package/dist/cli/index.mjs +165 -42
- package/dist/index.d.mts +44 -0
- package/dist/index.d.ts +44 -0
- package/dist/index.js +36 -2
- package/dist/index.mjs +36 -2
- package/package.json +1 -1
|
@@ -53,6 +53,14 @@ import {
|
|
|
53
53
|
type ToolExecuteResult,
|
|
54
54
|
type ToolResultMetadataInput,
|
|
55
55
|
} from './tool-result';
|
|
56
|
+
import {
|
|
57
|
+
TOOL_EXECUTE_TRANSIENT_HTTP_MAX_ATTEMPTS,
|
|
58
|
+
decideToolExecuteHttpRetry,
|
|
59
|
+
} from './tool-execute-retry-policy';
|
|
60
|
+
import {
|
|
61
|
+
isHardBillingToolHttpError,
|
|
62
|
+
normalizeToolHttpErrorMessage,
|
|
63
|
+
} from './tool-http-errors';
|
|
56
64
|
import { sqlSafePlayColumnName } from '@shared_libs/plays/static-pipeline';
|
|
57
65
|
import { createRuntimeDatasetId } from './dataset-id';
|
|
58
66
|
import { dedupeExplicitMapKeyRows } from './map-row-identity';
|
|
@@ -179,7 +187,6 @@ const MAP_INCREMENTAL_PERSIST_INTERVAL_MS = 100;
|
|
|
179
187
|
const MAP_FRAME_FLUSH_INTERVAL_MS = 250;
|
|
180
188
|
const TOOL_RETRY_AFTER_FALLBACK_MS = 1_000;
|
|
181
189
|
const TOOL_RETRY_HEARTBEAT_INTERVAL_MS = 30_000;
|
|
182
|
-
const TOOL_TRANSIENT_HTTP_MAX_ATTEMPTS = 3;
|
|
183
190
|
const FETCH_TRANSPORT_MAX_ATTEMPTS = 3;
|
|
184
191
|
const FETCH_TRANSPORT_RETRY_DELAY_MS = 100;
|
|
185
192
|
type SafeFetchModule = typeof import('@shared_libs/security/safe-fetch');
|
|
@@ -5561,7 +5568,7 @@ export class PlayContextImpl {
|
|
|
5561
5568
|
transportAttempt += 1;
|
|
5562
5569
|
const message =
|
|
5563
5570
|
error instanceof Error ? error.message : String(error);
|
|
5564
|
-
if (transportAttempt <
|
|
5571
|
+
if (transportAttempt < TOOL_EXECUTE_TRANSIENT_HTTP_MAX_ATTEMPTS) {
|
|
5565
5572
|
this.governor.chargeBudget('retry');
|
|
5566
5573
|
const retryAfterMs =
|
|
5567
5574
|
TOOL_RETRY_AFTER_FALLBACK_MS * transportAttempt;
|
|
@@ -5570,13 +5577,13 @@ export class PlayContextImpl {
|
|
|
5570
5577
|
transportAttempt,
|
|
5571
5578
|
);
|
|
5572
5579
|
this.log(
|
|
5573
|
-
`Tool ${toolId} transport failed calling ${url} on attempt ${transportAttempt}/${
|
|
5580
|
+
`Tool ${toolId} transport failed calling ${url} on attempt ${transportAttempt}/${TOOL_EXECUTE_TRANSIENT_HTTP_MAX_ATTEMPTS}; retrying after ${retryAfterMs}ms: ${message}`,
|
|
5574
5581
|
);
|
|
5575
5582
|
await this.sleepWithCheckpointHeartbeat(retryAfterMs);
|
|
5576
5583
|
continue;
|
|
5577
5584
|
}
|
|
5578
5585
|
throw new Error(
|
|
5579
|
-
`Tool ${toolId} transport failed calling ${url} after ${transportAttempt}/${
|
|
5586
|
+
`Tool ${toolId} transport failed calling ${url} after ${transportAttempt}/${TOOL_EXECUTE_TRANSIENT_HTTP_MAX_ATTEMPTS} attempts: ${message}`,
|
|
5580
5587
|
);
|
|
5581
5588
|
}
|
|
5582
5589
|
|
|
@@ -5584,6 +5591,29 @@ export class PlayContextImpl {
|
|
|
5584
5591
|
|
|
5585
5592
|
if (response.status === 429) {
|
|
5586
5593
|
rateLimitAttempt += 1;
|
|
5594
|
+
const text = await response.text();
|
|
5595
|
+
const initialRetryDecision = decideToolExecuteHttpRetry({
|
|
5596
|
+
toolId,
|
|
5597
|
+
status: response.status,
|
|
5598
|
+
});
|
|
5599
|
+
const error = normalizeToolHttpErrorMessage({
|
|
5600
|
+
toolId,
|
|
5601
|
+
status: response.status,
|
|
5602
|
+
attempt: rateLimitAttempt,
|
|
5603
|
+
maxAttempts: initialRetryDecision.attemptCap,
|
|
5604
|
+
bodyText: text,
|
|
5605
|
+
});
|
|
5606
|
+
const retryDecision = decideToolExecuteHttpRetry({
|
|
5607
|
+
toolId,
|
|
5608
|
+
status: response.status,
|
|
5609
|
+
hardBillingFailure: isHardBillingToolHttpError(error),
|
|
5610
|
+
});
|
|
5611
|
+
if (
|
|
5612
|
+
!retryDecision.retryable ||
|
|
5613
|
+
rateLimitAttempt >= retryDecision.attemptCap
|
|
5614
|
+
) {
|
|
5615
|
+
throw error;
|
|
5616
|
+
}
|
|
5587
5617
|
this.governor.chargeBudget('retry');
|
|
5588
5618
|
const retryAfterMs = parseRetryAfterMs(
|
|
5589
5619
|
response.headers.get('retry-after'),
|
|
@@ -5605,10 +5635,25 @@ export class PlayContextImpl {
|
|
|
5605
5635
|
|
|
5606
5636
|
if (!response.ok) {
|
|
5607
5637
|
const text = await response.text();
|
|
5638
|
+
const initialRetryDecision = decideToolExecuteHttpRetry({
|
|
5639
|
+
toolId,
|
|
5640
|
+
status: response.status,
|
|
5641
|
+
});
|
|
5642
|
+
const error = normalizeToolHttpErrorMessage({
|
|
5643
|
+
toolId,
|
|
5644
|
+
status: response.status,
|
|
5645
|
+
attempt: rateLimitAttempt + 1,
|
|
5646
|
+
maxAttempts: initialRetryDecision.attemptCap,
|
|
5647
|
+
bodyText: text,
|
|
5648
|
+
});
|
|
5649
|
+
const retryDecision = decideToolExecuteHttpRetry({
|
|
5650
|
+
toolId,
|
|
5651
|
+
status: response.status,
|
|
5652
|
+
hardBillingFailure: isHardBillingToolHttpError(error),
|
|
5653
|
+
});
|
|
5608
5654
|
if (
|
|
5609
|
-
|
|
5610
|
-
|
|
5611
|
-
rateLimitAttempt + 1 < TOOL_TRANSIENT_HTTP_MAX_ATTEMPTS
|
|
5655
|
+
retryDecision.retryable &&
|
|
5656
|
+
rateLimitAttempt + 1 < retryDecision.attemptCap
|
|
5612
5657
|
) {
|
|
5613
5658
|
rateLimitAttempt += 1;
|
|
5614
5659
|
this.governor.chargeBudget('retry');
|
|
@@ -5629,33 +5674,8 @@ export class PlayContextImpl {
|
|
|
5629
5674
|
await this.sleepWithCheckpointHeartbeat(retryAfterMs);
|
|
5630
5675
|
continue;
|
|
5631
5676
|
}
|
|
5632
|
-
|
|
5633
|
-
|
|
5634
|
-
const parsed = JSON.parse(text) as Record<string, unknown>;
|
|
5635
|
-
const detail =
|
|
5636
|
-
typeof parsed.message === 'string'
|
|
5637
|
-
? parsed.message
|
|
5638
|
-
: typeof parsed.error === 'string'
|
|
5639
|
-
? parsed.error
|
|
5640
|
-
: text;
|
|
5641
|
-
const code =
|
|
5642
|
-
typeof parsed.code === 'string'
|
|
5643
|
-
? ` code=${parsed.code}`
|
|
5644
|
-
: '';
|
|
5645
|
-
const requestId =
|
|
5646
|
-
typeof parsed.request_id === 'string'
|
|
5647
|
-
? ` requestId=${parsed.request_id}`
|
|
5648
|
-
: typeof parsed.requestId === 'string'
|
|
5649
|
-
? ` requestId=${parsed.requestId}`
|
|
5650
|
-
: '';
|
|
5651
|
-
return `${detail}${code}${requestId}`;
|
|
5652
|
-
} catch {
|
|
5653
|
-
return text;
|
|
5654
|
-
}
|
|
5655
|
-
})();
|
|
5656
|
-
const message = `Tool ${toolId} failed (${response.status}): ${failureDetail}`;
|
|
5657
|
-
this.log(message);
|
|
5658
|
-
throw new Error(message);
|
|
5677
|
+
this.log(error.message);
|
|
5678
|
+
throw error;
|
|
5659
5679
|
}
|
|
5660
5680
|
|
|
5661
5681
|
const data = (await response.json()) as Record<string, unknown>;
|
|
@@ -13,6 +13,8 @@
|
|
|
13
13
|
* The header is harmless when there is only one active version (the default).
|
|
14
14
|
*/
|
|
15
15
|
export const COORDINATOR_VERSION_KEY_HEADER = 'Cloudflare-Workers-Version-Key';
|
|
16
|
+
export const COORDINATOR_VERSION_OVERRIDES_HEADER =
|
|
17
|
+
'Cloudflare-Workers-Version-Overrides';
|
|
16
18
|
/**
|
|
17
19
|
* Shared secret the coordinator uses to authenticate dispatcher (Vercel app)
|
|
18
20
|
* traffic. The coordinator also sanity-checks `x-deepline-run-scope` matches
|
|
@@ -65,6 +67,8 @@ export function coordinatorRequestHeaders(input: {
|
|
|
65
67
|
runId: string;
|
|
66
68
|
contentType?: string | null;
|
|
67
69
|
internalToken?: string | null;
|
|
70
|
+
runtimeDeployVersion?: string | null;
|
|
71
|
+
coordinatorWorkerName?: string | null;
|
|
68
72
|
/**
|
|
69
73
|
* When set, the coordinator validates this matches the runId in the URL.
|
|
70
74
|
* Pass the runId on `/cancel` / `/signal` calls so a leaked dispatcher
|
|
@@ -80,6 +84,19 @@ export function coordinatorRequestHeaders(input: {
|
|
|
80
84
|
if (trimmed) {
|
|
81
85
|
headers[COORDINATOR_VERSION_KEY_HEADER] = trimmed;
|
|
82
86
|
}
|
|
87
|
+
const runtimeDeployVersion = input.runtimeDeployVersion?.trim();
|
|
88
|
+
const coordinatorWorkerName = input.coordinatorWorkerName?.trim();
|
|
89
|
+
if (
|
|
90
|
+
runtimeDeployVersion &&
|
|
91
|
+
coordinatorWorkerName &&
|
|
92
|
+
/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i.test(
|
|
93
|
+
runtimeDeployVersion,
|
|
94
|
+
) &&
|
|
95
|
+
/^[A-Za-z0-9_-]+$/.test(coordinatorWorkerName)
|
|
96
|
+
) {
|
|
97
|
+
headers[COORDINATOR_VERSION_OVERRIDES_HEADER] =
|
|
98
|
+
`${coordinatorWorkerName}="${runtimeDeployVersion}"`;
|
|
99
|
+
}
|
|
83
100
|
const internalToken =
|
|
84
101
|
input.internalToken?.trim() || resolveInternalCoordinatorToken();
|
|
85
102
|
if (internalToken) {
|
|
@@ -79,6 +79,10 @@ export type PlayStepProgressEventPayload = {
|
|
|
79
79
|
completed?: number;
|
|
80
80
|
total?: number;
|
|
81
81
|
failed?: number;
|
|
82
|
+
startedRows?: number;
|
|
83
|
+
activeRows?: number;
|
|
84
|
+
waitingRows?: number;
|
|
85
|
+
completedRows?: number;
|
|
82
86
|
message?: string;
|
|
83
87
|
artifactTableNamespace?: string | null;
|
|
84
88
|
} & PlayRuntimeTimingWindow;
|
|
@@ -21,6 +21,10 @@ export type PlayVisualNodeProgressSnapshot = {
|
|
|
21
21
|
completed?: number;
|
|
22
22
|
total?: number;
|
|
23
23
|
failed?: number;
|
|
24
|
+
startedRows?: number;
|
|
25
|
+
activeRows?: number;
|
|
26
|
+
waitingRows?: number;
|
|
27
|
+
completedRows?: number;
|
|
24
28
|
message?: string;
|
|
25
29
|
updatedAt?: number | null;
|
|
26
30
|
startedAt?: number | null;
|
|
@@ -2,7 +2,7 @@ const CLOUDFLARE_DURABLE_OBJECT_RESET_RE =
|
|
|
2
2
|
/Durable Object.*(?:code was updated|storage caused object)/;
|
|
3
3
|
|
|
4
4
|
export const PLATFORM_DEPLOY_INTERRUPTED_MESSAGE =
|
|
5
|
-
'Run interrupted by a platform deploy
|
|
5
|
+
'Run interrupted by a platform deploy. Deepline retries this automatically when possible; if this error is still visible, re-run the same command.';
|
|
6
6
|
|
|
7
7
|
export const INTERNAL_RUNTIME_STORAGE_ERROR_MESSAGE =
|
|
8
8
|
'Internal play runtime storage failed. Please retry the run; if this keeps happening, contact Deepline support with the run ID.';
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import { normalizePlayRunFailure } from './run-failure';
|
|
2
|
+
|
|
1
3
|
export type PlayRunLedgerStatus =
|
|
2
4
|
| 'queued'
|
|
3
5
|
| 'running'
|
|
@@ -26,6 +28,10 @@ export type PlayRunLedgerStepProgress = {
|
|
|
26
28
|
completed?: number;
|
|
27
29
|
total?: number;
|
|
28
30
|
failed?: number;
|
|
31
|
+
startedRows?: number;
|
|
32
|
+
activeRows?: number;
|
|
33
|
+
waitingRows?: number;
|
|
34
|
+
completedRows?: number;
|
|
29
35
|
message?: string;
|
|
30
36
|
artifactTableNamespace?: string | null;
|
|
31
37
|
startedAt?: number | null;
|
|
@@ -428,6 +434,18 @@ function normalizeStepProgress(
|
|
|
428
434
|
...(optionalFiniteNumber(value.failed) !== undefined
|
|
429
435
|
? { failed: optionalFiniteNumber(value.failed) }
|
|
430
436
|
: {}),
|
|
437
|
+
...(optionalFiniteNumber(value.startedRows) !== undefined
|
|
438
|
+
? { startedRows: optionalFiniteNumber(value.startedRows) }
|
|
439
|
+
: {}),
|
|
440
|
+
...(optionalFiniteNumber(value.activeRows) !== undefined
|
|
441
|
+
? { activeRows: optionalFiniteNumber(value.activeRows) }
|
|
442
|
+
: {}),
|
|
443
|
+
...(optionalFiniteNumber(value.waitingRows) !== undefined
|
|
444
|
+
? { waitingRows: optionalFiniteNumber(value.waitingRows) }
|
|
445
|
+
: {}),
|
|
446
|
+
...(optionalFiniteNumber(value.completedRows) !== undefined
|
|
447
|
+
? { completedRows: optionalFiniteNumber(value.completedRows) }
|
|
448
|
+
: {}),
|
|
431
449
|
...(optionalString(value.message)
|
|
432
450
|
? { message: optionalString(value.message) }
|
|
433
451
|
: {}),
|
|
@@ -560,6 +578,7 @@ function conflictingTerminalSnapshot(
|
|
|
560
578
|
base: PlayRunLedgerSnapshot,
|
|
561
579
|
eventType: keyof typeof TERMINAL_STATUS_BY_EVENT_TYPE,
|
|
562
580
|
occurredAt: number,
|
|
581
|
+
eventError?: string | null,
|
|
563
582
|
): PlayRunLedgerSnapshot | null {
|
|
564
583
|
if (!isTerminalPlayRunLedgerStatus(base.status)) {
|
|
565
584
|
return null;
|
|
@@ -567,6 +586,18 @@ function conflictingTerminalSnapshot(
|
|
|
567
586
|
if (TERMINAL_STATUS_BY_EVENT_TYPE[eventType] === base.status) {
|
|
568
587
|
return null;
|
|
569
588
|
}
|
|
589
|
+
if (
|
|
590
|
+
base.status === 'completed' &&
|
|
591
|
+
eventType === 'run.failed' &&
|
|
592
|
+
eventError &&
|
|
593
|
+
normalizePlayRunFailure(eventError).code === 'PLATFORM_DEPLOY_INTERRUPTED'
|
|
594
|
+
) {
|
|
595
|
+
return withTiming(
|
|
596
|
+
appendLogLines(base, [
|
|
597
|
+
`[ledger] platform deploy terminal event ${eventType} ignored; status already ${base.status}`,
|
|
598
|
+
]),
|
|
599
|
+
);
|
|
600
|
+
}
|
|
570
601
|
const terminalAt = base.finishedAt ?? base.updatedAt ?? 0;
|
|
571
602
|
if (occurredAt > terminalAt) {
|
|
572
603
|
// Newer terminal evidence reconciles the run. This covers replay/receipt
|
|
@@ -683,7 +714,12 @@ export function reducePlayRunLedgerEvent(
|
|
|
683
714
|
);
|
|
684
715
|
case 'run.failed':
|
|
685
716
|
return (
|
|
686
|
-
conflictingTerminalSnapshot(
|
|
717
|
+
conflictingTerminalSnapshot(
|
|
718
|
+
base,
|
|
719
|
+
event.type,
|
|
720
|
+
occurredAt,
|
|
721
|
+
event.error,
|
|
722
|
+
) ??
|
|
687
723
|
withTiming({
|
|
688
724
|
...settleRunningStepsOnTerminal(base, 'failed', occurredAt),
|
|
689
725
|
status: 'failed',
|
|
@@ -697,7 +733,12 @@ export function reducePlayRunLedgerEvent(
|
|
|
697
733
|
);
|
|
698
734
|
case 'run.cancelled':
|
|
699
735
|
return (
|
|
700
|
-
conflictingTerminalSnapshot(
|
|
736
|
+
conflictingTerminalSnapshot(
|
|
737
|
+
base,
|
|
738
|
+
event.type,
|
|
739
|
+
occurredAt,
|
|
740
|
+
event.error,
|
|
741
|
+
) ??
|
|
701
742
|
withTiming({
|
|
702
743
|
...settleRunningStepsOnTerminal(base, 'failed', occurredAt),
|
|
703
744
|
status: 'cancelled',
|
|
@@ -908,6 +949,10 @@ function progressSignature(
|
|
|
908
949
|
completed: progress?.completed ?? null,
|
|
909
950
|
total: progress?.total ?? null,
|
|
910
951
|
failed: progress?.failed ?? null,
|
|
952
|
+
startedRows: progress?.startedRows ?? null,
|
|
953
|
+
activeRows: progress?.activeRows ?? null,
|
|
954
|
+
waitingRows: progress?.waitingRows ?? null,
|
|
955
|
+
completedRows: progress?.completedRows ?? null,
|
|
911
956
|
message: progress?.message ?? null,
|
|
912
957
|
artifactTableNamespace: progress?.artifactTableNamespace ?? null,
|
|
913
958
|
startedAt: progress?.startedAt ?? null,
|
|
@@ -1127,6 +1172,18 @@ export function buildPlayRunLedgerEventsFromStatusPatch(input: {
|
|
|
1127
1172
|
: {}),
|
|
1128
1173
|
...(progress.total !== undefined ? { total: progress.total } : {}),
|
|
1129
1174
|
...(progress.failed !== undefined ? { failed: progress.failed } : {}),
|
|
1175
|
+
...(progress.startedRows !== undefined
|
|
1176
|
+
? { startedRows: progress.startedRows }
|
|
1177
|
+
: {}),
|
|
1178
|
+
...(progress.activeRows !== undefined
|
|
1179
|
+
? { activeRows: progress.activeRows }
|
|
1180
|
+
: {}),
|
|
1181
|
+
...(progress.waitingRows !== undefined
|
|
1182
|
+
? { waitingRows: progress.waitingRows }
|
|
1183
|
+
: {}),
|
|
1184
|
+
...(progress.completedRows !== undefined
|
|
1185
|
+
? { completedRows: progress.completedRows }
|
|
1186
|
+
: {}),
|
|
1130
1187
|
...(progress.message !== undefined ? { message: progress.message } : {}),
|
|
1131
1188
|
...(progress.artifactTableNamespace !== undefined
|
|
1132
1189
|
? { artifactTableNamespace: progress.artifactTableNamespace }
|
|
@@ -31,6 +31,10 @@ export type PlayRunStreamNodeProgress = {
|
|
|
31
31
|
completed?: number;
|
|
32
32
|
total?: number;
|
|
33
33
|
failed?: number;
|
|
34
|
+
startedRows?: number;
|
|
35
|
+
activeRows?: number;
|
|
36
|
+
waitingRows?: number;
|
|
37
|
+
completedRows?: number;
|
|
34
38
|
message?: string;
|
|
35
39
|
updatedAt?: number | null;
|
|
36
40
|
startedAt?: number | null;
|
|
@@ -192,6 +196,10 @@ function buildSnapshotFromLedger(
|
|
|
192
196
|
completed: step.progress.completed,
|
|
193
197
|
total: step.progress.total,
|
|
194
198
|
failed: step.progress.failed,
|
|
199
|
+
startedRows: step.progress.startedRows,
|
|
200
|
+
activeRows: step.progress.activeRows,
|
|
201
|
+
waitingRows: step.progress.waitingRows,
|
|
202
|
+
completedRows: step.progress.completedRows,
|
|
195
203
|
message: step.progress.message,
|
|
196
204
|
artifactTableNamespace:
|
|
197
205
|
step.progress.artifactTableNamespace ??
|
|
@@ -544,6 +552,10 @@ export function diffPlayRunStreamEvents(input: {
|
|
|
544
552
|
completed: state.progress.completed,
|
|
545
553
|
total: state.progress.total,
|
|
546
554
|
failed: state.progress.failed,
|
|
555
|
+
startedRows: state.progress.startedRows,
|
|
556
|
+
activeRows: state.progress.activeRows,
|
|
557
|
+
waitingRows: state.progress.waitingRows,
|
|
558
|
+
completedRows: state.progress.completedRows,
|
|
547
559
|
message: state.progress.message,
|
|
548
560
|
artifactTableNamespace:
|
|
549
561
|
state.progress.artifactTableNamespace ??
|
|
@@ -141,6 +141,8 @@ export type PlaySchedulerSubmitInput = {
|
|
|
141
141
|
coordinatorUrl?: string | null;
|
|
142
142
|
/** Request-scoped coordinator auth token for non-production preview/dev runs. */
|
|
143
143
|
coordinatorInternalToken?: string | null;
|
|
144
|
+
/** Runtime deploy generation/version that owns this run, when known. */
|
|
145
|
+
runtimeDeployVersion?: string | null;
|
|
144
146
|
/** Request-scoped Vercel Deployment Protection bypass for preview runtime callbacks. */
|
|
145
147
|
vercelProtectionBypassToken?: string | null;
|
|
146
148
|
/** Millisecond epoch timestamp captured immediately before scheduler submit. */
|
|
@@ -213,6 +215,7 @@ export interface PlaySchedulerBackend {
|
|
|
213
215
|
options?: {
|
|
214
216
|
coordinatorUrl?: string | null;
|
|
215
217
|
coordinatorInternalToken?: string | null;
|
|
218
|
+
runtimeDeployVersion?: string | null;
|
|
216
219
|
initialState?: Record<string, unknown> | null;
|
|
217
220
|
orgId?: string | null;
|
|
218
221
|
},
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
export const TOOL_EXECUTE_TRANSIENT_HTTP_MAX_ATTEMPTS = 3;
|
|
2
|
+
export const TOOL_EXECUTE_RATE_LIMIT_MAX_ATTEMPTS = 8;
|
|
3
|
+
export const TOOL_EXECUTE_TRANSPORT_MAX_ATTEMPTS = 3;
|
|
4
|
+
export const TOOL_EXECUTE_TRANSPORT_RETRY_DELAY_MS = 1_000;
|
|
5
|
+
|
|
6
|
+
export type ToolExecuteHttpRetryDecision = {
|
|
7
|
+
retryable: boolean;
|
|
8
|
+
attemptCap: number;
|
|
9
|
+
reason:
|
|
10
|
+
| 'rate_limit'
|
|
11
|
+
| 'retry_safe_transient_5xx'
|
|
12
|
+
| 'hard_billing_error'
|
|
13
|
+
| 'unsafe_transient_5xx'
|
|
14
|
+
| 'non_retryable_status';
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
export function decideToolExecuteHttpRetry(input: {
|
|
18
|
+
toolId: string;
|
|
19
|
+
status: number;
|
|
20
|
+
hardBillingFailure?: boolean;
|
|
21
|
+
}): ToolExecuteHttpRetryDecision {
|
|
22
|
+
if (input.status === 429) {
|
|
23
|
+
if (input.hardBillingFailure) {
|
|
24
|
+
return {
|
|
25
|
+
retryable: false,
|
|
26
|
+
attemptCap: TOOL_EXECUTE_RATE_LIMIT_MAX_ATTEMPTS,
|
|
27
|
+
reason: 'hard_billing_error',
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
return {
|
|
31
|
+
retryable: true,
|
|
32
|
+
attemptCap: TOOL_EXECUTE_RATE_LIMIT_MAX_ATTEMPTS,
|
|
33
|
+
reason: 'rate_limit',
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
if (input.status >= 500 && input.status < 600) {
|
|
37
|
+
if (input.toolId === 'test_transient_500') {
|
|
38
|
+
return {
|
|
39
|
+
retryable: true,
|
|
40
|
+
attemptCap: TOOL_EXECUTE_TRANSIENT_HTTP_MAX_ATTEMPTS,
|
|
41
|
+
reason: 'retry_safe_transient_5xx',
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
return {
|
|
45
|
+
retryable: false,
|
|
46
|
+
attemptCap: 1,
|
|
47
|
+
reason: 'unsafe_transient_5xx',
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
return {
|
|
51
|
+
retryable: false,
|
|
52
|
+
attemptCap: 1,
|
|
53
|
+
reason: 'non_retryable_status',
|
|
54
|
+
};
|
|
55
|
+
}
|
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
export class ToolHttpError extends Error {
|
|
2
|
+
readonly billing: Record<string, unknown> | null;
|
|
3
|
+
/** HTTP status of the failed tool-execute response (e.g. 429, 502). */
|
|
4
|
+
readonly status: number;
|
|
5
|
+
|
|
6
|
+
constructor(
|
|
7
|
+
message: string,
|
|
8
|
+
billing: Record<string, unknown> | null,
|
|
9
|
+
status: number,
|
|
10
|
+
) {
|
|
11
|
+
super(message);
|
|
12
|
+
this.name = 'ToolHttpError';
|
|
13
|
+
this.billing = billing;
|
|
14
|
+
this.status = status;
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
function formatCreditAmount(value: unknown): string {
|
|
19
|
+
if (typeof value !== 'number' || !Number.isFinite(value)) {
|
|
20
|
+
return String(value ?? '-');
|
|
21
|
+
}
|
|
22
|
+
return Number(value.toFixed(8)).toString();
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function isRecord(value: unknown): value is Record<string, unknown> {
|
|
26
|
+
return value !== null && typeof value === 'object' && !Array.isArray(value);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function getStringField(value: unknown, key: string): string | null {
|
|
30
|
+
if (!isRecord(value)) return null;
|
|
31
|
+
const field = value[key];
|
|
32
|
+
return typeof field === 'string' && field.trim() ? field : null;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function getObjectField(
|
|
36
|
+
value: unknown,
|
|
37
|
+
key: string,
|
|
38
|
+
): Record<string, unknown> | null {
|
|
39
|
+
if (!isRecord(value)) return null;
|
|
40
|
+
const field = value[key];
|
|
41
|
+
return isRecord(field) ? field : null;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function isInsufficientCreditsBilling(
|
|
45
|
+
billing: Record<string, unknown> | null,
|
|
46
|
+
): billing is Record<string, unknown> {
|
|
47
|
+
return billing?.kind === 'insufficient_credits';
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
function isHardBillingFailurePayload(
|
|
51
|
+
payload: Record<string, unknown> | null,
|
|
52
|
+
): payload is Record<string, unknown> {
|
|
53
|
+
if (!payload) return false;
|
|
54
|
+
const category = String(
|
|
55
|
+
payload.error_category ?? payload.errorCategory ?? '',
|
|
56
|
+
).toLowerCase();
|
|
57
|
+
const code = String(payload.code ?? payload.error_code ?? '').toUpperCase();
|
|
58
|
+
const message = String(
|
|
59
|
+
payload.error ?? payload.message ?? payload.failure_description ?? '',
|
|
60
|
+
).toLowerCase();
|
|
61
|
+
if (category === 'billing') return true;
|
|
62
|
+
if (
|
|
63
|
+
code === 'INSUFFICIENT_CREDITS' ||
|
|
64
|
+
code === 'BILLING_CAP_EXCEEDED' ||
|
|
65
|
+
code === 'MONTHLY_BILLING_LIMIT_EXCEEDED'
|
|
66
|
+
) {
|
|
67
|
+
return true;
|
|
68
|
+
}
|
|
69
|
+
return (
|
|
70
|
+
(message.includes('billing cap') ||
|
|
71
|
+
message.includes('monthly billing limit') ||
|
|
72
|
+
message.includes('rolling 30-day organization billing cap') ||
|
|
73
|
+
message.includes('insufficient credits')) &&
|
|
74
|
+
!message.includes('rate limit')
|
|
75
|
+
);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
function normalizeHardBillingPayload(
|
|
79
|
+
payload: Record<string, unknown>,
|
|
80
|
+
): Record<string, unknown> {
|
|
81
|
+
return {
|
|
82
|
+
kind: 'billing_cap_exceeded',
|
|
83
|
+
code: getStringField(payload, 'code') ?? 'MONTHLY_BILLING_LIMIT_EXCEEDED',
|
|
84
|
+
error_category: 'billing',
|
|
85
|
+
failure_origin:
|
|
86
|
+
getStringField(payload, 'failure_origin') ?? 'deepline_billing',
|
|
87
|
+
message:
|
|
88
|
+
getStringField(payload, 'error') ??
|
|
89
|
+
getStringField(payload, 'message') ??
|
|
90
|
+
'Deepline billing cap exceeded.',
|
|
91
|
+
...payload,
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function formatHardBillingFailureMessage(input: {
|
|
96
|
+
billing: Record<string, unknown>;
|
|
97
|
+
toolId: string;
|
|
98
|
+
status: number;
|
|
99
|
+
attempt: number;
|
|
100
|
+
maxAttempts: number;
|
|
101
|
+
}): string {
|
|
102
|
+
const code = getStringField(input.billing, 'code');
|
|
103
|
+
const message =
|
|
104
|
+
getStringField(input.billing, 'message') ??
|
|
105
|
+
getStringField(input.billing, 'error') ??
|
|
106
|
+
'Deepline billing cap exceeded.';
|
|
107
|
+
return `tool ${input.toolId} ${input.status} attempt ${input.attempt}/${input.maxAttempts}: Deepline billing cap exceeded. Run halted before marking remaining rows processed. ${code ? `code=${code}. ` : ''}${message}`;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
function formatInsufficientCreditsMessage(input: {
|
|
111
|
+
billing: Record<string, unknown>;
|
|
112
|
+
toolId: string;
|
|
113
|
+
}): string {
|
|
114
|
+
const operation =
|
|
115
|
+
getStringField(input.billing, 'operation_id') ??
|
|
116
|
+
getStringField(input.billing, 'operation') ??
|
|
117
|
+
input.toolId;
|
|
118
|
+
const balance = formatCreditAmount(input.billing.balance_credits);
|
|
119
|
+
const required = formatCreditAmount(input.billing.required_credits);
|
|
120
|
+
const recommended = formatCreditAmount(
|
|
121
|
+
input.billing.recommended_add_credits ?? input.billing.needed_credits,
|
|
122
|
+
);
|
|
123
|
+
const billingUrl = getStringField(input.billing, 'billing_url');
|
|
124
|
+
const addSuffix =
|
|
125
|
+
billingUrl && recommended !== '-'
|
|
126
|
+
? ` Add >=${recommended} at ${billingUrl}.`
|
|
127
|
+
: billingUrl
|
|
128
|
+
? ` Add credits at ${billingUrl}.`
|
|
129
|
+
: '';
|
|
130
|
+
return `Workspace balance ${balance} < required ${required} for ${operation}.${addSuffix}`;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
function formatPublicToolErrorPayload(input: {
|
|
134
|
+
parsed: Record<string, unknown> | null;
|
|
135
|
+
bodyText: string;
|
|
136
|
+
}): string {
|
|
137
|
+
if (!input.parsed) {
|
|
138
|
+
return input.bodyText.slice(0, 500);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
const selected: Record<string, unknown> = {};
|
|
142
|
+
for (const key of [
|
|
143
|
+
'error',
|
|
144
|
+
'message',
|
|
145
|
+
'code',
|
|
146
|
+
'failure_origin',
|
|
147
|
+
'error_category',
|
|
148
|
+
'failure_description',
|
|
149
|
+
'operator_hint',
|
|
150
|
+
'failure_hint',
|
|
151
|
+
'details',
|
|
152
|
+
'provider',
|
|
153
|
+
'operation',
|
|
154
|
+
'request_id',
|
|
155
|
+
'requestId',
|
|
156
|
+
'credential_source',
|
|
157
|
+
'credential_owner',
|
|
158
|
+
]) {
|
|
159
|
+
const value = input.parsed[key];
|
|
160
|
+
if (typeof value === 'string' && value.trim()) {
|
|
161
|
+
selected[key] = value;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
return JSON.stringify(
|
|
166
|
+
Object.keys(selected).length > 0 ? selected : input.parsed,
|
|
167
|
+
).slice(0, 1_500);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
export function normalizeToolHttpErrorMessage(input: {
|
|
171
|
+
toolId: string;
|
|
172
|
+
status: number;
|
|
173
|
+
attempt: number;
|
|
174
|
+
maxAttempts: number;
|
|
175
|
+
bodyText: string;
|
|
176
|
+
}): ToolHttpError {
|
|
177
|
+
let parsed: Record<string, unknown> | null = null;
|
|
178
|
+
try {
|
|
179
|
+
const candidate = JSON.parse(input.bodyText);
|
|
180
|
+
parsed = isRecord(candidate) ? candidate : null;
|
|
181
|
+
} catch {
|
|
182
|
+
parsed = null;
|
|
183
|
+
}
|
|
184
|
+
const billing = getObjectField(parsed, 'billing');
|
|
185
|
+
if (isInsufficientCreditsBilling(billing)) {
|
|
186
|
+
return new ToolHttpError(
|
|
187
|
+
`tool ${input.toolId} ${input.status} attempt ${input.attempt}/${input.maxAttempts}: ${formatInsufficientCreditsMessage(
|
|
188
|
+
{
|
|
189
|
+
billing,
|
|
190
|
+
toolId: input.toolId,
|
|
191
|
+
},
|
|
192
|
+
)}`,
|
|
193
|
+
billing,
|
|
194
|
+
input.status,
|
|
195
|
+
);
|
|
196
|
+
}
|
|
197
|
+
const hardBillingPayload = isHardBillingFailurePayload(billing)
|
|
198
|
+
? normalizeHardBillingPayload(billing)
|
|
199
|
+
: isHardBillingFailurePayload(parsed)
|
|
200
|
+
? normalizeHardBillingPayload(parsed)
|
|
201
|
+
: null;
|
|
202
|
+
if (hardBillingPayload) {
|
|
203
|
+
return new ToolHttpError(
|
|
204
|
+
formatHardBillingFailureMessage({
|
|
205
|
+
billing: hardBillingPayload,
|
|
206
|
+
toolId: input.toolId,
|
|
207
|
+
status: input.status,
|
|
208
|
+
attempt: input.attempt,
|
|
209
|
+
maxAttempts: input.maxAttempts,
|
|
210
|
+
}),
|
|
211
|
+
hardBillingPayload,
|
|
212
|
+
input.status,
|
|
213
|
+
);
|
|
214
|
+
}
|
|
215
|
+
return new ToolHttpError(
|
|
216
|
+
`tool ${input.toolId} ${input.status} attempt ${input.attempt}/${input.maxAttempts}: ${formatPublicToolErrorPayload(
|
|
217
|
+
{
|
|
218
|
+
parsed,
|
|
219
|
+
bodyText: input.bodyText,
|
|
220
|
+
},
|
|
221
|
+
)}`,
|
|
222
|
+
billing,
|
|
223
|
+
input.status,
|
|
224
|
+
);
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
export function extractErrorBilling(
|
|
228
|
+
error: unknown,
|
|
229
|
+
): Record<string, unknown> | null {
|
|
230
|
+
return error instanceof ToolHttpError ? error.billing : null;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
export function isHardBillingToolHttpError(error: unknown): boolean {
|
|
234
|
+
return (
|
|
235
|
+
error instanceof ToolHttpError &&
|
|
236
|
+
(isInsufficientCreditsBilling(error.billing) ||
|
|
237
|
+
isHardBillingFailurePayload(error.billing))
|
|
238
|
+
);
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
/**
|
|
242
|
+
* A tool call that ultimately failed with HTTP 429 — provider or
|
|
243
|
+
* Deepline-internal rate-limit pushback that survived the in-process retry
|
|
244
|
+
* budget. This is run-level throughput pressure, never a row-specific defect.
|
|
245
|
+
*/
|
|
246
|
+
export function isRateLimitToolHttpError(error: unknown): boolean {
|
|
247
|
+
return error instanceof ToolHttpError && error.status === 429;
|
|
248
|
+
}
|