deepline 0.1.79 → 0.1.80
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +68 -31
- package/dist/cli/index.mjs +68 -31
- package/dist/index.d.mts +9 -1
- package/dist/index.d.ts +9 -1
- package/dist/index.js +7 -4
- package/dist/index.mjs +7 -4
- package/dist/repo/apps/play-runner-workers/src/child-play-await.ts +192 -0
- package/dist/repo/apps/play-runner-workers/src/coordinator-entry.ts +1102 -1616
- package/dist/repo/apps/play-runner-workers/src/dedup-do.ts +506 -654
- package/dist/repo/apps/play-runner-workers/src/entry.ts +896 -354
- package/dist/repo/apps/play-runner-workers/src/workflow-retry-state.ts +8 -2
- package/dist/repo/sdk/src/client.ts +9 -2
- package/dist/repo/sdk/src/release.ts +2 -2
- package/dist/repo/sdk/src/types.ts +5 -0
- package/dist/repo/shared_libs/play-runtime/governor/coordinator-rate-state-backend.ts +231 -0
- package/dist/repo/shared_libs/play-runtime/governor/governor.ts +376 -0
- package/dist/repo/shared_libs/play-runtime/governor/policy.ts +179 -0
- package/dist/repo/shared_libs/play-runtime/governor/rate-state-backend.ts +87 -0
- package/dist/repo/shared_libs/play-runtime/run-failure.ts +12 -0
- package/dist/repo/shared_libs/play-runtime/scheduler-backend.ts +24 -0
- package/dist/repo/shared_libs/play-runtime/submit-limits.ts +35 -0
- package/dist/repo/shared_libs/plays/bundling/index.ts +4 -12
- package/dist/repo/shared_libs/plays/bundling/limits.ts +29 -0
- package/dist/repo/shared_libs/plays/static-pipeline.ts +56 -3
- package/dist/repo/shared_libs/temporal/constants.ts +38 -0
- package/package.json +1 -1
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Play Execution Governor — the single policy table.
|
|
3
|
+
*
|
|
4
|
+
* This is the ONE source of truth for every concurrency, budget, and pacing
|
|
5
|
+
* number in the play runtime. Both runner Adapters (`cjs_node20` in-process and
|
|
6
|
+
* `esm_workers` cloud) resolve their limits from here, so the two substrates
|
|
7
|
+
* cannot drift. See ADR 0007 and the "Play Execution Governor" entry in
|
|
8
|
+
* CONTEXT.md.
|
|
9
|
+
*
|
|
10
|
+
* Tuning philosophy (product decision):
|
|
11
|
+
* - Caps are GENEROUS and tuned for fast execution. They are runaway /
|
|
12
|
+
* anti-starvation guards, NOT per-workload throttles. A legitimate large run
|
|
13
|
+
* should never hit them.
|
|
14
|
+
* - The real-time bounds on a run are (a) the wall-clock runtime cap and
|
|
15
|
+
* (b) per-provider rate pacing. The counters below only stop pathological
|
|
16
|
+
* recursion / fan-out.
|
|
17
|
+
* - Every value is an EXPLICIT, finite cap. Nothing here is unbounded — there
|
|
18
|
+
* is no `null`, no "off", no implicit infinity. Unaccounted resource use
|
|
19
|
+
* must be impossible.
|
|
20
|
+
*
|
|
21
|
+
* The docs catalog (`src/lib/plays/limits-catalog.ts`) and its generated public
|
|
22
|
+
* + internal pages source their numbers from this table.
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
/** Concurrency ceilings — how much may run at once. */
|
|
26
|
+
export interface ExecutionConcurrencyPolicy {
|
|
27
|
+
/** Map rows resolving their fields concurrently when no `concurrency` is given. */
|
|
28
|
+
readonly rowDefault: number;
|
|
29
|
+
/** Hard ceiling for an explicit map `concurrency` value; larger is clamped. */
|
|
30
|
+
readonly rowMax: number;
|
|
31
|
+
/** Concurrently in-flight child plays (`ctx.runPlay`). Excess launches block. */
|
|
32
|
+
readonly childPlay: number;
|
|
33
|
+
/** Global backstop on concurrently in-flight tool calls across all providers. */
|
|
34
|
+
readonly toolCalls: number;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Per-run budgets — total attempts allowed before a run is treated as runaway.
|
|
39
|
+
* Accumulated down the dispatch lineage (a child seeds from the parent), so the
|
|
40
|
+
* budget is global across the nested-play tree, not per-worker.
|
|
41
|
+
*/
|
|
42
|
+
export interface ExecutionBudgetPolicy {
|
|
43
|
+
/** Max nesting depth of `ctx.runPlay` chains. Deeper is almost certainly a cycle. */
|
|
44
|
+
readonly maxPlayCallDepth: number;
|
|
45
|
+
/** Max total `ctx.runPlay` calls along a lineage. */
|
|
46
|
+
readonly maxPlayCallCount: number;
|
|
47
|
+
/** Max direct child plays one play may launch. */
|
|
48
|
+
readonly maxChildPlayCallsPerParent: number;
|
|
49
|
+
/** Max nested-play descendants created during a run. */
|
|
50
|
+
readonly maxDescendants: number;
|
|
51
|
+
/** Max total tool calls in a run. */
|
|
52
|
+
readonly maxToolCallCount: number;
|
|
53
|
+
/** Max total retries across all steps/tools. */
|
|
54
|
+
readonly maxRetryCount: number;
|
|
55
|
+
/** Max total waterfall-step executions in a run. */
|
|
56
|
+
readonly maxWaterfallStepExecutions: number;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Per-provider rate pacing — the real outbound throughput governor.
|
|
61
|
+
*
|
|
62
|
+
* SUBSTRATE NOTE: the per-provider request RATE (`requestsPerWindow`/`windowMs`
|
|
63
|
+
* from a provider's `PacingRule`s) is enforced on BOTH substrates. A rule's
|
|
64
|
+
* optional `maxConcurrency` (simultaneous-in-flight cap) is enforced only on
|
|
65
|
+
* `cjs_node20`; on `esm_workers` it is intentionally excluded from the pacing
|
|
66
|
+
* contract because a fanned-out run cannot guarantee the per-isolate release
|
|
67
|
+
* signal an in-flight count needs, so only the org-wide tool-concurrency
|
|
68
|
+
* backstop applies there. See PacingRule.maxConcurrency in rate-state-backend.ts.
|
|
69
|
+
*/
|
|
70
|
+
export interface ExecutionPacingPolicy {
|
|
71
|
+
/** RPS applied to a provider that declares no explicit rate limit. */
|
|
72
|
+
readonly defaultProviderRequestsPerSecond: number;
|
|
73
|
+
/** Parallelism the scheduler suggests before a provider's own hints tighten it. */
|
|
74
|
+
readonly suggestedMaxParallelism: number;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
export interface ResolvedExecutionPolicy {
|
|
78
|
+
readonly concurrency: ExecutionConcurrencyPolicy;
|
|
79
|
+
readonly budgets: ExecutionBudgetPolicy;
|
|
80
|
+
readonly pacing: ExecutionPacingPolicy;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* The shared default policy. Both substrates use this verbatim unless an entry
|
|
85
|
+
* in {@link ADAPTER_POLICY_OVERRIDES} forces a documented difference.
|
|
86
|
+
*/
|
|
87
|
+
export const SHARED_EXECUTION_POLICY: ResolvedExecutionPolicy = {
|
|
88
|
+
concurrency: {
|
|
89
|
+
// Map row concurrency is platform-controlled — customers cannot set it — so
|
|
90
|
+
// every map runs at the ceiling: high enough that per-provider pacing (not
|
|
91
|
+
// the row pool) is the bottleneck even for multi-step waterfall rows. Pure-JS
|
|
92
|
+
// maps use a separate fast path and are not bound by this. Kept == rowMax so
|
|
93
|
+
// the default and the cap are the same single platform value.
|
|
94
|
+
rowDefault: 2_000,
|
|
95
|
+
// Hard cap. Above this, more in-flight rows only park memory — outbound is
|
|
96
|
+
// already bounded by tool concurrency + per-provider pacing.
|
|
97
|
+
rowMax: 2_000,
|
|
98
|
+
// Concurrent child-play launches. Generous; each child is a real launch, so
|
|
99
|
+
// this is the one value most likely to need a documented esm_workers
|
|
100
|
+
// override if isolate pressure shows up in E2E.
|
|
101
|
+
childPlay: 32,
|
|
102
|
+
// Global all-provider backstop. Per-provider pacing is the real limit; this
|
|
103
|
+
// just stops a single run from opening an absurd number of sockets at once.
|
|
104
|
+
toolCalls: 256,
|
|
105
|
+
},
|
|
106
|
+
budgets: {
|
|
107
|
+
// Runaway guards, not workload limits. A 5,000-row map calling several tools
|
|
108
|
+
// per row is normal and must fit comfortably under these.
|
|
109
|
+
maxPlayCallDepth: 8,
|
|
110
|
+
maxPlayCallCount: 100_000,
|
|
111
|
+
maxChildPlayCallsPerParent: 100_000,
|
|
112
|
+
maxDescendants: 100_000,
|
|
113
|
+
maxToolCallCount: 5_000_000,
|
|
114
|
+
maxRetryCount: 100_000,
|
|
115
|
+
maxWaterfallStepExecutions: 5_000_000,
|
|
116
|
+
},
|
|
117
|
+
pacing: {
|
|
118
|
+
// Undeclared providers; declared providers (rate-limit-definitions.ts) win.
|
|
119
|
+
defaultProviderRequestsPerSecond: 30,
|
|
120
|
+
suggestedMaxParallelism: 50,
|
|
121
|
+
},
|
|
122
|
+
};
|
|
123
|
+
|
|
124
|
+
export type AdapterId = 'cjs_node20' | 'esm_workers';
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* One level deep on purpose: each policy section is a flat record of numbers, so
|
|
128
|
+
* an override is `{ section: { key: value } }`. The merge in
|
|
129
|
+
* {@link resolveExecutionPolicy} is one level; this type matches it exactly so
|
|
130
|
+
* it can never advertise nested-override support the merge doesn't implement.
|
|
131
|
+
*/
|
|
132
|
+
type PolicyOverride = {
|
|
133
|
+
[S in keyof ResolvedExecutionPolicy]?: Partial<ResolvedExecutionPolicy[S]>;
|
|
134
|
+
};
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* The ONLY sanctioned per-substrate divergence. Empty by design — both
|
|
138
|
+
* substrates run the shared policy. Every entry added here MUST carry a
|
|
139
|
+
* one-line comment citing the substrate constraint that forces it (e.g. isolate
|
|
140
|
+
* CPU/memory). CI may assert this map stays small. This is the anti-drift seam:
|
|
141
|
+
* differences are explicit and justified, never accidental.
|
|
142
|
+
*/
|
|
143
|
+
export const ADAPTER_POLICY_OVERRIDES: Record<AdapterId, PolicyOverride> = {
|
|
144
|
+
cjs_node20: {},
|
|
145
|
+
esm_workers: {},
|
|
146
|
+
};
|
|
147
|
+
|
|
148
|
+
/** Merge the shared policy with any documented Adapter override. */
|
|
149
|
+
export function resolveExecutionPolicy(
|
|
150
|
+
adapter: AdapterId,
|
|
151
|
+
): ResolvedExecutionPolicy {
|
|
152
|
+
const override = ADAPTER_POLICY_OVERRIDES[adapter];
|
|
153
|
+
return {
|
|
154
|
+
concurrency: {
|
|
155
|
+
...SHARED_EXECUTION_POLICY.concurrency,
|
|
156
|
+
...override.concurrency,
|
|
157
|
+
},
|
|
158
|
+
budgets: { ...SHARED_EXECUTION_POLICY.budgets, ...override.budgets },
|
|
159
|
+
pacing: { ...SHARED_EXECUTION_POLICY.pacing, ...override.pacing },
|
|
160
|
+
};
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
/**
|
|
164
|
+
* Effective row concurrency for a map: an explicit positive request clamped to
|
|
165
|
+
* `[1, rowMax]`, otherwise the default. Single helper, used by both substrates.
|
|
166
|
+
*/
|
|
167
|
+
export function resolveRowConcurrency(
|
|
168
|
+
policy: ResolvedExecutionPolicy,
|
|
169
|
+
requested?: number,
|
|
170
|
+
): number {
|
|
171
|
+
if (
|
|
172
|
+
typeof requested === 'number' &&
|
|
173
|
+
Number.isFinite(requested) &&
|
|
174
|
+
requested > 0
|
|
175
|
+
) {
|
|
176
|
+
return Math.min(Math.floor(requested), policy.concurrency.rowMax);
|
|
177
|
+
}
|
|
178
|
+
return policy.concurrency.rowDefault;
|
|
179
|
+
}
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Rate State Backend — the one swappable seam of the Play Execution Governor.
|
|
3
|
+
*
|
|
4
|
+
* Per-`(org, provider)` rate windows are the only execution state that cannot be
|
|
5
|
+
* process-local: on `esm_workers` a run fans child plays across isolates, so the
|
|
6
|
+
* window must be shared. Everything else the Governor owns (budgets, concurrency
|
|
7
|
+
* slots) stays Governor-local and threads down the lineage via the snapshot.
|
|
8
|
+
*
|
|
9
|
+
* Adapters: an in-memory backend for the single-process `cjs_node20` runner, and
|
|
10
|
+
* a shared backend (Run Coordination Cache Durable Object, or server-side Redis)
|
|
11
|
+
* for `esm_workers`. See ADR 0007 and CONTEXT.md (Rate State Backend).
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
/** A single resolved rate rule for a bucket (from rate-limit-definitions). */
|
|
15
|
+
export interface PacingRule {
|
|
16
|
+
readonly ruleId: string;
|
|
17
|
+
readonly requestsPerWindow: number;
|
|
18
|
+
readonly windowMs: number;
|
|
19
|
+
/**
|
|
20
|
+
* Optional simultaneous-in-flight cap for this rule.
|
|
21
|
+
*
|
|
22
|
+
* SUBSTRATE NOTE: `maxConcurrency` is enforced ONLY on `cjs_node20` (the
|
|
23
|
+
* single-process {@link RateStateBackend} can hold a reliable in-flight count
|
|
24
|
+
* and release it on permit.release). On `esm_workers` it is intentionally NOT
|
|
25
|
+
* enforced: a run fans across V8 isolates and a dying isolate cannot guarantee
|
|
26
|
+
* the release signal a leased in-flight count would require, so the
|
|
27
|
+
* coordinator DO debits `requestsPerWindow` only. The Governor's global
|
|
28
|
+
* tool-concurrency semaphore is the cross-isolate in-flight backstop there.
|
|
29
|
+
* `requestsPerWindow`/`windowMs` (the request-RATE governor) ARE enforced on
|
|
30
|
+
* both substrates. See coordinator-rate-state-backend.ts and dedup-do.ts
|
|
31
|
+
* (computeRateAcquire) for the workers side.
|
|
32
|
+
*/
|
|
33
|
+
readonly maxConcurrency: number | null;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Per-tool queue-hint metadata produced by the runtime tool catalog
|
|
38
|
+
* (`src/lib/plays/runtime-tool-metadata.ts`) and surfaced to the runtime via
|
|
39
|
+
* `ContextOptions.getToolQueueHints`. It is the raw provider rate-limit metadata
|
|
40
|
+
* that the runtime maps into one {@link PacingRule} per hint before handing it
|
|
41
|
+
* to the Governor's pacing resolver. The `provider` field identifies the pacing
|
|
42
|
+
* bucket; `bucketId`/`operation` are descriptive and used for logging/grouping.
|
|
43
|
+
*/
|
|
44
|
+
export interface PlayQueueHint {
|
|
45
|
+
bucketId: string;
|
|
46
|
+
provider: string;
|
|
47
|
+
operation: string;
|
|
48
|
+
ruleId: string;
|
|
49
|
+
requestsPerWindow: number;
|
|
50
|
+
windowMs: number;
|
|
51
|
+
maxConcurrency: number | null;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/** Handle returned by acquire(); release frees any concurrency held by the rules. */
|
|
55
|
+
export interface PacingPermit {
|
|
56
|
+
release(): void;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
export interface RateStateBackend {
|
|
60
|
+
/**
|
|
61
|
+
* Block until one outbound call is permitted for `bucketId` under all `rules`
|
|
62
|
+
* (request windows always; per-rule `maxConcurrency` only on the in-memory
|
|
63
|
+
* `cjs_node20` backend — see {@link PacingRule.maxConcurrency}), then debit and
|
|
64
|
+
* return a permit. `bucketId` is `${orgId}:${provider}` so the window is global
|
|
65
|
+
* per (org, provider). Resolves immediately when `rules` is empty (provider has
|
|
66
|
+
* no configured limit — pacing is a no-op, the global tool-concurrency backstop
|
|
67
|
+
* still applies).
|
|
68
|
+
*/
|
|
69
|
+
acquire(input: {
|
|
70
|
+
bucketId: string;
|
|
71
|
+
rules: readonly PacingRule[];
|
|
72
|
+
signal?: AbortSignal;
|
|
73
|
+
}): Promise<PacingPermit>;
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Feed a server-observed Retry-After back so future acquires for this bucket
|
|
77
|
+
* back off. Advisory and idempotent; never un-charges an in-flight call.
|
|
78
|
+
*/
|
|
79
|
+
penalize(input: { bucketId: string; cooldownMs: number }): void;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const NOOP_PERMIT: PacingPermit = { release() {} };
|
|
83
|
+
|
|
84
|
+
/** Permit used when a bucket has no rules — nothing to debit or release. */
|
|
85
|
+
export function noopPacingPermit(): PacingPermit {
|
|
86
|
+
return NOOP_PERMIT;
|
|
87
|
+
}
|
|
@@ -38,6 +38,18 @@ export function normalizePlayRunFailure(error: unknown): PlayRunFailureDetails {
|
|
|
38
38
|
cause: CLOUDFLARE_DURABLE_OBJECT_CODE_UPDATED_ERROR,
|
|
39
39
|
};
|
|
40
40
|
}
|
|
41
|
+
const playDepthBudgetMatch = cause.match(
|
|
42
|
+
/Play execution playDepth budget exceeded \((\d+)\/(\d+)\)\./,
|
|
43
|
+
);
|
|
44
|
+
if (playDepthBudgetMatch) {
|
|
45
|
+
return {
|
|
46
|
+
code: 'PLAY_CALL_DEPTH_EXCEEDED',
|
|
47
|
+
phase: 'runtime',
|
|
48
|
+
message: `Play-call depth exceeded (${playDepthBudgetMatch[1]}/${playDepthBudgetMatch[2]}).`,
|
|
49
|
+
retryable: false,
|
|
50
|
+
cause,
|
|
51
|
+
};
|
|
52
|
+
}
|
|
41
53
|
return {
|
|
42
54
|
code: 'RUN_FAILED',
|
|
43
55
|
phase: 'runtime',
|
|
@@ -35,6 +35,30 @@ export type PlayCallGovernanceSnapshot = {
|
|
|
35
35
|
key: string;
|
|
36
36
|
ancestryPlayIds: string[];
|
|
37
37
|
callDepth: number;
|
|
38
|
+
/**
|
|
39
|
+
* Cumulative lineage-global budget counters consumed by ancestors at the
|
|
40
|
+
* moment this child was launched. The child seeds its own counters from these
|
|
41
|
+
* so the corresponding budgets (`maxPlayCallCount`, `maxToolCallCount`,
|
|
42
|
+
* `maxRetryCount`, `maxDescendants`, `maxWaterfallStepExecutions`) accumulate
|
|
43
|
+
* down the dispatch lineage instead of resetting to 0 in each worker isolate —
|
|
44
|
+
* matching the cjs forkChild path, which threads all of them. The Governor
|
|
45
|
+
* documents these budgets as global across the nested-play tree, not
|
|
46
|
+
* per-worker (see policy.ts / rate-state-backend.ts); threading them here is
|
|
47
|
+
* what makes that true on `esm_workers`.
|
|
48
|
+
*
|
|
49
|
+
* `descendantCount` is load-bearing for fan-out: forkChild charges
|
|
50
|
+
* `descendant` on every child launch, so without threading it a deep/wide tree
|
|
51
|
+
* would reset descendant accounting at each isolate and never converge on the
|
|
52
|
+
* lineage-global cap.
|
|
53
|
+
*
|
|
54
|
+
* All optional and fail-safe: if absent (older callers / dropped in transit)
|
|
55
|
+
* the child falls back to 0, i.e. prior behavior.
|
|
56
|
+
*/
|
|
57
|
+
playCallCount?: number;
|
|
58
|
+
toolCallCount?: number;
|
|
59
|
+
retryCount?: number;
|
|
60
|
+
descendantCount?: number;
|
|
61
|
+
waterfallStepExecutions?: number;
|
|
38
62
|
};
|
|
39
63
|
|
|
40
64
|
export type PlaySchedulerSubmitInput = {
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Single source of truth for play submit-path size/wait limits.
|
|
3
|
+
*
|
|
4
|
+
* Dependency-free leaf shared by the Next.js run route
|
|
5
|
+
* (`POST /api/v2/plays/run`), the Cloudflare coordinator's workflow retry-state
|
|
6
|
+
* codec, and the limits documentation generator. These bound what a single
|
|
7
|
+
* submission may carry inline, the hard ceiling on submitted input, and how
|
|
8
|
+
* long the API will block for a synchronous result.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Max size of a CSV/file input that may be inlined into the submit payload
|
|
13
|
+
* (and the workflow event history). Larger inputs must be staged via
|
|
14
|
+
* `POST /api/v2/plays/files/stage` and referenced by handle.
|
|
15
|
+
*/
|
|
16
|
+
export const MAX_TEMPORAL_INLINE_INPUT_FILE_BYTES = 64 * 1024;
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Max time the run route will block waiting for a play to finish before
|
|
20
|
+
* returning a pending handle the caller can poll/stream.
|
|
21
|
+
*/
|
|
22
|
+
export const MAX_WAIT_FOR_COMPLETION_MS = 15_000;
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Submitted input/retry-state at or below this size stays inline in the
|
|
26
|
+
* coordinator Durable Object. Above it, the params are externalized to a
|
|
27
|
+
* short-lived play artifact (up to {@link PLAY_SUBMIT_INPUT_MAX_BYTES}).
|
|
28
|
+
*/
|
|
29
|
+
export const PLAY_SUBMIT_INPUT_INLINE_MAX_BYTES = 100_000;
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Absolute ceiling for submitted input/retry-state. A submission larger than
|
|
33
|
+
* this is rejected with guidance to use staged files or `ctx.csv` inputs.
|
|
34
|
+
*/
|
|
35
|
+
export const PLAY_SUBMIT_INPUT_MAX_BYTES = 1024 * 1024;
|
|
@@ -26,20 +26,12 @@ import type {
|
|
|
26
26
|
} from '../artifact-types';
|
|
27
27
|
import { buildPlayContractCompatibility } from '../contracts';
|
|
28
28
|
import { validatePlaySourceFilesHaveNoInlineSecrets } from '../secret-guardrails';
|
|
29
|
+
import {
|
|
30
|
+
MAX_ESM_WORKERS_BUNDLE_BYTES,
|
|
31
|
+
MAX_PLAY_BUNDLE_BYTES,
|
|
32
|
+
} from './limits';
|
|
29
33
|
|
|
30
34
|
const PLAY_BUNDLE_CACHE_VERSION = 24;
|
|
31
|
-
const MAX_PLAY_BUNDLE_BYTES = 30 * 1024 * 1024;
|
|
32
|
-
// workerd local-mode (`wrangler dev` Worker Loader) silently fails to
|
|
33
|
-
// instantiate per-graphHash play Workers when the bundled code passes a
|
|
34
|
-
// threshold somewhere between 1.04 MiB (44-package-imports — works) and
|
|
35
|
-
// 1.18 MiB (the same play with date-fns added — hangs forever). The
|
|
36
|
-
// workflow body never runs, no error is logged anywhere, and the run
|
|
37
|
-
// hangs indefinitely. We surface this as a hard bundle failure so the
|
|
38
|
-
// user gets an actionable message at submit time instead of a 5-minute
|
|
39
|
-
// silent timeout. Real CF (workers.dev) accepts much larger bundles, but
|
|
40
|
-
// `dev:v2 cloudflare` is the regression entrypoint so the local limit is
|
|
41
|
-
// the binding one.
|
|
42
|
-
const MAX_ESM_WORKERS_BUNDLE_BYTES = 1_150_000;
|
|
43
35
|
const PLAY_ARTIFACT_CACHE_DIR = join(
|
|
44
36
|
tmpdir(),
|
|
45
37
|
`deepline-play-artifacts-v${PLAY_BUNDLE_CACHE_VERSION}`,
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Single source of truth for play bundle/compile size limits.
|
|
3
|
+
*
|
|
4
|
+
* Kept in a dependency-free leaf module (no esbuild, no Node-only imports) so
|
|
5
|
+
* both the bundler (`./index.ts`) and the limits documentation generator
|
|
6
|
+
* (`scripts/generate-limits-docs.ts`) can import the numbers without pulling in
|
|
7
|
+
* the compile toolchain. Do not restate these values in docs — the public page
|
|
8
|
+
* is generated from here.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Absolute hard ceiling for a compiled play bundle, across every artifact kind.
|
|
13
|
+
* A bundle larger than this is rejected at submit time.
|
|
14
|
+
*/
|
|
15
|
+
export const MAX_PLAY_BUNDLE_BYTES = 30 * 1024 * 1024;
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Tighter ceiling for the `esm_workers` artifact kind. workerd local-mode
|
|
19
|
+
* (`wrangler dev` Worker Loader) silently fails to instantiate per-graphHash
|
|
20
|
+
* play Workers when the bundled code passes a threshold somewhere between
|
|
21
|
+
* 1.04 MiB (44-package-imports — works) and 1.18 MiB (the same play with
|
|
22
|
+
* date-fns added — hangs forever). The workflow body never runs, no error is
|
|
23
|
+
* logged anywhere, and the run hangs indefinitely. We surface this as a hard
|
|
24
|
+
* bundle failure so the user gets an actionable message at submit time instead
|
|
25
|
+
* of a 5-minute silent timeout. Real CF (workers.dev) accepts much larger
|
|
26
|
+
* bundles, but `dev:v2 cloudflare` is the regression entrypoint so the local
|
|
27
|
+
* limit is the binding one.
|
|
28
|
+
*/
|
|
29
|
+
export const MAX_ESM_WORKERS_BUNDLE_BYTES = 1_150_000;
|
|
@@ -1,5 +1,16 @@
|
|
|
1
1
|
import { normalizeTableNamespace } from './row-identity';
|
|
2
2
|
|
|
3
|
+
/**
|
|
4
|
+
* A top-level key the play's function literally `return`s. Derived from the
|
|
5
|
+
* `return { ... }` object literal — NOT from dataset `.withColumn(...)` names —
|
|
6
|
+
* so the "Returns" graph node mirrors the function's real output shape.
|
|
7
|
+
* `isDataset` is true when the key's value is a `PlayDataset` handle (a table).
|
|
8
|
+
*/
|
|
9
|
+
export interface PlayStaticReturnField {
|
|
10
|
+
name: string;
|
|
11
|
+
isDataset: boolean;
|
|
12
|
+
}
|
|
13
|
+
|
|
3
14
|
export interface PlayStaticPipeline {
|
|
4
15
|
tableNamespace?: string;
|
|
5
16
|
inputFields?: string[];
|
|
@@ -9,6 +20,12 @@ export interface PlayStaticPipeline {
|
|
|
9
20
|
csvDescription?: string;
|
|
10
21
|
datasetDescription?: string;
|
|
11
22
|
fields: string[];
|
|
23
|
+
/**
|
|
24
|
+
* Top-level keys of the play's `return { ... }` object literal, in source
|
|
25
|
+
* order. Undefined when the terminal return isn't a statically-known object
|
|
26
|
+
* literal (bare value, dataset handle, conditional returns, etc.).
|
|
27
|
+
*/
|
|
28
|
+
returnFields?: PlayStaticReturnField[];
|
|
12
29
|
stages?: PlayStaticSubstep[];
|
|
13
30
|
substeps: PlayStaticSubstep[];
|
|
14
31
|
sheetContract?: PlaySheetContract | null;
|
|
@@ -54,6 +71,7 @@ export interface PlayStaticColumnProducer {
|
|
|
54
71
|
field: string;
|
|
55
72
|
toolId?: string;
|
|
56
73
|
playId?: string;
|
|
74
|
+
staleAfterSeconds?: number;
|
|
57
75
|
conditional?: boolean;
|
|
58
76
|
sourceRange?: PlayStaticSourceRange;
|
|
59
77
|
steps?: PlayStaticColumnProducer[];
|
|
@@ -64,6 +82,7 @@ export interface PlayStaticDatasetColumn {
|
|
|
64
82
|
id: string;
|
|
65
83
|
source: PlaySheetColumnSource;
|
|
66
84
|
sqlName?: string;
|
|
85
|
+
staleAfterSeconds?: number;
|
|
67
86
|
producers: PlayStaticColumnProducer[];
|
|
68
87
|
}
|
|
69
88
|
|
|
@@ -212,6 +231,9 @@ export function truncateStaticPipelineForStorage(
|
|
|
212
231
|
? [...pipeline.rowKeyFields]
|
|
213
232
|
: undefined,
|
|
214
233
|
fields: [...(pipeline.fields ?? [])],
|
|
234
|
+
returnFields: pipeline.returnFields
|
|
235
|
+
? pipeline.returnFields.map((field) => ({ ...field }))
|
|
236
|
+
: undefined,
|
|
215
237
|
stages: truncateStaticSubstepsForStorage(pipeline.stages, {
|
|
216
238
|
embeddedPlayCallPipelineDepth,
|
|
217
239
|
maxEmbeddedPlayCallPipelineDepth,
|
|
@@ -237,6 +259,7 @@ export interface PlayStaticSourceRange {
|
|
|
237
259
|
|
|
238
260
|
type PlayStaticSubstepMetadata = {
|
|
239
261
|
conditional?: boolean;
|
|
262
|
+
staleAfterSeconds?: number;
|
|
240
263
|
};
|
|
241
264
|
|
|
242
265
|
export type PlayStaticSubstep = PlayStaticSubstepMetadata &
|
|
@@ -433,7 +456,7 @@ export function compileStaticGraph(
|
|
|
433
456
|
if (substep.type !== 'dataset') {
|
|
434
457
|
return substep;
|
|
435
458
|
}
|
|
436
|
-
const columns = compileDatasetColumns(substep);
|
|
459
|
+
const columns = compileDatasetColumns(substep, pipeline?.substeps ?? []);
|
|
437
460
|
const tableNamespace = (substep.tableNamespace ?? substep.field).trim();
|
|
438
461
|
if (tableNamespace) {
|
|
439
462
|
datasets.push({ tableNamespace, columns });
|
|
@@ -448,6 +471,7 @@ export function compileStaticGraph(
|
|
|
448
471
|
|
|
449
472
|
function compileDatasetColumns(
|
|
450
473
|
dataset: Extract<PlayStaticSubstep, { type: 'dataset' }>,
|
|
474
|
+
pipelineSubsteps: PlayStaticSubstep[] = [],
|
|
451
475
|
): PlayStaticDatasetColumn[] {
|
|
452
476
|
const columnsById = new Map<string, PlayStaticDatasetColumn>();
|
|
453
477
|
const ensureColumn = (
|
|
@@ -482,7 +506,15 @@ function compileDatasetColumns(
|
|
|
482
506
|
ensureColumn(field, 'datasetColumn', sqlSafePlayColumnName(field));
|
|
483
507
|
}
|
|
484
508
|
|
|
485
|
-
|
|
509
|
+
const datasetProducerSteps =
|
|
510
|
+
dataset.steps && dataset.steps.length > 0
|
|
511
|
+
? dataset.steps
|
|
512
|
+
: pipelineSubsteps.filter((substep) => {
|
|
513
|
+
const field = fieldForColumnProducer(substep);
|
|
514
|
+
return field ? (dataset.outputFields ?? []).includes(field) : false;
|
|
515
|
+
});
|
|
516
|
+
|
|
517
|
+
for (const substep of datasetProducerSteps) {
|
|
486
518
|
const field = fieldForColumnProducer(substep);
|
|
487
519
|
if (!field) continue;
|
|
488
520
|
const column = ensureColumn(
|
|
@@ -491,7 +523,25 @@ function compileDatasetColumns(
|
|
|
491
523
|
sqlSafePlayColumnName(field),
|
|
492
524
|
);
|
|
493
525
|
if (!column) continue;
|
|
494
|
-
|
|
526
|
+
const pipelineSubstep =
|
|
527
|
+
substep.staleAfterSeconds === undefined
|
|
528
|
+
? pipelineSubsteps.find(
|
|
529
|
+
(candidate) => fieldForColumnProducer(candidate) === field,
|
|
530
|
+
)
|
|
531
|
+
: undefined;
|
|
532
|
+
const producer = columnProducerFromSubstep(
|
|
533
|
+
pipelineSubstep && pipelineSubstep.staleAfterSeconds !== undefined
|
|
534
|
+
? pipelineSubstep
|
|
535
|
+
: substep,
|
|
536
|
+
field,
|
|
537
|
+
);
|
|
538
|
+
column.producers.push(producer);
|
|
539
|
+
if (
|
|
540
|
+
column.staleAfterSeconds === undefined &&
|
|
541
|
+
producer.staleAfterSeconds !== undefined
|
|
542
|
+
) {
|
|
543
|
+
column.staleAfterSeconds = producer.staleAfterSeconds;
|
|
544
|
+
}
|
|
495
545
|
}
|
|
496
546
|
|
|
497
547
|
return [...columnsById.values()];
|
|
@@ -536,6 +586,9 @@ function columnProducerFromSubstep(
|
|
|
536
586
|
field,
|
|
537
587
|
...(substep.type === 'tool' ? { toolId: substep.toolId } : {}),
|
|
538
588
|
...(substep.type === 'play_call' ? { playId: substep.playId } : {}),
|
|
589
|
+
...(substep.staleAfterSeconds !== undefined
|
|
590
|
+
? { staleAfterSeconds: substep.staleAfterSeconds }
|
|
591
|
+
: {}),
|
|
539
592
|
...(substep.conditional ? { conditional: true } : {}),
|
|
540
593
|
...(substep.sourceRange ? { sourceRange: substep.sourceRange } : {}),
|
|
541
594
|
...(steps && steps.length > 0 ? { steps } : {}),
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared Temporal execution constants.
|
|
3
|
+
*
|
|
4
|
+
* Keep values that both the API/auth surface and the worker need here so the
|
|
5
|
+
* API never imports from worker-only modules.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Local Temporal dev defaults.
|
|
10
|
+
*
|
|
11
|
+
* These match the host ports exposed by docker-compose.yml and the env files
|
|
12
|
+
* used by the local dev flows (`.env.local`, `.env.worktree`).
|
|
13
|
+
*/
|
|
14
|
+
export const LOCAL_TEMPORAL_FRONTEND_PORT = 17233;
|
|
15
|
+
export const LOCAL_TEMPORAL_UI_PORT = 18233;
|
|
16
|
+
export const LOCAL_TEMPORAL_NAMESPACE = 'default';
|
|
17
|
+
export const LOCAL_TEMPORAL_ADDRESS = `127.0.0.1:${LOCAL_TEMPORAL_FRONTEND_PORT}`;
|
|
18
|
+
export const LOCAL_TEMPORAL_UI_URL = `http://127.0.0.1:${LOCAL_TEMPORAL_UI_PORT}`;
|
|
19
|
+
|
|
20
|
+
/** Maximum active user-code runtime for a standard play, in seconds. */
|
|
21
|
+
export const STANDARD_PLAY_RUNTIME_LIMIT_SECONDS = 10 * 60; // 10 minutes
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Activity timeout includes cleanup/billing headroom after the 10 minute
|
|
25
|
+
* user-code runtime cap. Keep this higher than STANDARD_PLAY_RUNTIME_LIMIT_SECONDS.
|
|
26
|
+
*/
|
|
27
|
+
export const PLAY_ACTIVITY_TIMEOUT_SECONDS = 12 * 60; // 12 minutes
|
|
28
|
+
|
|
29
|
+
/** Heartbeat cadence for the long-running play execution activity. */
|
|
30
|
+
export const PLAY_EXECUTE_ACTIVITY_HEARTBEAT_INTERVAL_SECONDS = 15;
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* TTL for workflow executor tokens, in seconds.
|
|
34
|
+
* Matches the activity timeout so tokens expire when the activity would
|
|
35
|
+
* time out anyway.
|
|
36
|
+
*/
|
|
37
|
+
export const WORKFLOW_EXECUTOR_TOKEN_TTL_SECONDS =
|
|
38
|
+
PLAY_ACTIVITY_TIMEOUT_SECONDS;
|