deepline 0.1.79 → 0.1.81

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/README.md +2 -1
  2. package/dist/cli/index.js +76 -42
  3. package/dist/cli/index.mjs +76 -42
  4. package/dist/index.d.mts +9 -1
  5. package/dist/index.d.ts +9 -1
  6. package/dist/index.js +13 -10
  7. package/dist/index.mjs +13 -10
  8. package/dist/repo/apps/play-runner-workers/src/child-play-await.ts +192 -0
  9. package/dist/repo/apps/play-runner-workers/src/coordinator-entry.ts +1103 -1617
  10. package/dist/repo/apps/play-runner-workers/src/dedup-do.ts +506 -654
  11. package/dist/repo/apps/play-runner-workers/src/entry.ts +1148 -598
  12. package/dist/repo/apps/play-runner-workers/src/runtime/tool-http-errors.ts +43 -1
  13. package/dist/repo/apps/play-runner-workers/src/workflow-retry-state.ts +8 -2
  14. package/dist/repo/sdk/src/client.ts +15 -8
  15. package/dist/repo/sdk/src/release.ts +2 -2
  16. package/dist/repo/sdk/src/types.ts +5 -0
  17. package/dist/repo/shared_libs/play-runtime/governor/coordinator-rate-state-backend.ts +231 -0
  18. package/dist/repo/shared_libs/play-runtime/governor/governor.ts +376 -0
  19. package/dist/repo/shared_libs/play-runtime/governor/policy.ts +179 -0
  20. package/dist/repo/shared_libs/play-runtime/governor/rate-state-backend.ts +87 -0
  21. package/dist/repo/shared_libs/play-runtime/run-failure.ts +12 -0
  22. package/dist/repo/shared_libs/play-runtime/scheduler-backend.ts +24 -0
  23. package/dist/repo/shared_libs/play-runtime/submit-limits.ts +35 -0
  24. package/dist/repo/shared_libs/plays/bundling/index.ts +4 -12
  25. package/dist/repo/shared_libs/plays/bundling/limits.ts +29 -0
  26. package/dist/repo/shared_libs/plays/static-pipeline.ts +56 -3
  27. package/dist/repo/shared_libs/temporal/constants.ts +38 -0
  28. package/package.json +1 -1
  29. package/dist/repo/shared_libs/play-runtime/tool-batch-executor.ts +0 -149
@@ -0,0 +1,35 @@
1
+ /**
2
+ * Single source of truth for play submit-path size/wait limits.
3
+ *
4
+ * Dependency-free leaf shared by the Next.js run route
5
+ * (`POST /api/v2/plays/run`), the Cloudflare coordinator's workflow retry-state
6
+ * codec, and the limits documentation generator. These bound what a single
7
+ * submission may carry inline, the hard ceiling on submitted input, and how
8
+ * long the API will block for a synchronous result.
9
+ */
10
+
11
+ /**
12
+ * Max size of a CSV/file input that may be inlined into the submit payload
13
+ * (and the workflow event history). Larger inputs must be staged via
14
+ * `POST /api/v2/plays/files/stage` and referenced by handle.
15
+ */
16
+ export const MAX_TEMPORAL_INLINE_INPUT_FILE_BYTES = 64 * 1024;
17
+
18
+ /**
19
+ * Max time the run route will block waiting for a play to finish before
20
+ * returning a pending handle the caller can poll/stream.
21
+ */
22
+ export const MAX_WAIT_FOR_COMPLETION_MS = 15_000;
23
+
24
+ /**
25
+ * Submitted input/retry-state at or below this size stays inline in the
26
+ * coordinator Durable Object. Above it, the params are externalized to a
27
+ * short-lived play artifact (up to {@link PLAY_SUBMIT_INPUT_MAX_BYTES}).
28
+ */
29
+ export const PLAY_SUBMIT_INPUT_INLINE_MAX_BYTES = 100_000;
30
+
31
+ /**
32
+ * Absolute ceiling for submitted input/retry-state. A submission larger than
33
+ * this is rejected with guidance to use staged files or `ctx.csv` inputs.
34
+ */
35
+ export const PLAY_SUBMIT_INPUT_MAX_BYTES = 1024 * 1024;
@@ -26,20 +26,12 @@ import type {
26
26
  } from '../artifact-types';
27
27
  import { buildPlayContractCompatibility } from '../contracts';
28
28
  import { validatePlaySourceFilesHaveNoInlineSecrets } from '../secret-guardrails';
29
+ import {
30
+ MAX_ESM_WORKERS_BUNDLE_BYTES,
31
+ MAX_PLAY_BUNDLE_BYTES,
32
+ } from './limits';
29
33
 
30
34
  const PLAY_BUNDLE_CACHE_VERSION = 24;
31
- const MAX_PLAY_BUNDLE_BYTES = 30 * 1024 * 1024;
32
- // workerd local-mode (`wrangler dev` Worker Loader) silently fails to
33
- // instantiate per-graphHash play Workers when the bundled code passes a
34
- // threshold somewhere between 1.04 MiB (44-package-imports — works) and
35
- // 1.18 MiB (the same play with date-fns added — hangs forever). The
36
- // workflow body never runs, no error is logged anywhere, and the run
37
- // hangs indefinitely. We surface this as a hard bundle failure so the
38
- // user gets an actionable message at submit time instead of a 5-minute
39
- // silent timeout. Real CF (workers.dev) accepts much larger bundles, but
40
- // `dev:v2 cloudflare` is the regression entrypoint so the local limit is
41
- // the binding one.
42
- const MAX_ESM_WORKERS_BUNDLE_BYTES = 1_150_000;
43
35
  const PLAY_ARTIFACT_CACHE_DIR = join(
44
36
  tmpdir(),
45
37
  `deepline-play-artifacts-v${PLAY_BUNDLE_CACHE_VERSION}`,
@@ -0,0 +1,29 @@
1
+ /**
2
+ * Single source of truth for play bundle/compile size limits.
3
+ *
4
+ * Kept in a dependency-free leaf module (no esbuild, no Node-only imports) so
5
+ * both the bundler (`./index.ts`) and the limits documentation generator
6
+ * (`scripts/generate-limits-docs.ts`) can import the numbers without pulling in
7
+ * the compile toolchain. Do not restate these values in docs — the public page
8
+ * is generated from here.
9
+ */
10
+
11
+ /**
12
+ * Absolute hard ceiling for a compiled play bundle, across every artifact kind.
13
+ * A bundle larger than this is rejected at submit time.
14
+ */
15
+ export const MAX_PLAY_BUNDLE_BYTES = 30 * 1024 * 1024;
16
+
17
+ /**
18
+ * Tighter ceiling for the `esm_workers` artifact kind. workerd local-mode
19
+ * (`wrangler dev` Worker Loader) silently fails to instantiate per-graphHash
20
+ * play Workers when the bundled code passes a threshold somewhere between
21
+ * 1.04 MiB (44-package-imports — works) and 1.18 MiB (the same play with
22
+ * date-fns added — hangs forever). The workflow body never runs, no error is
23
+ * logged anywhere, and the run hangs indefinitely. We surface this as a hard
24
+ * bundle failure so the user gets an actionable message at submit time instead
25
+ * of a 5-minute silent timeout. Real CF (workers.dev) accepts much larger
26
+ * bundles, but `dev:v2 cloudflare` is the regression entrypoint so the local
27
+ * limit is the binding one.
28
+ */
29
+ export const MAX_ESM_WORKERS_BUNDLE_BYTES = 1_150_000;
@@ -1,5 +1,16 @@
1
1
  import { normalizeTableNamespace } from './row-identity';
2
2
 
3
+ /**
4
+ * A top-level key the play's function literally `return`s. Derived from the
5
+ * `return { ... }` object literal — NOT from dataset `.withColumn(...)` names —
6
+ * so the "Returns" graph node mirrors the function's real output shape.
7
+ * `isDataset` is true when the key's value is a `PlayDataset` handle (a table).
8
+ */
9
+ export interface PlayStaticReturnField {
10
+ name: string;
11
+ isDataset: boolean;
12
+ }
13
+
3
14
  export interface PlayStaticPipeline {
4
15
  tableNamespace?: string;
5
16
  inputFields?: string[];
@@ -9,6 +20,12 @@ export interface PlayStaticPipeline {
9
20
  csvDescription?: string;
10
21
  datasetDescription?: string;
11
22
  fields: string[];
23
+ /**
24
+ * Top-level keys of the play's `return { ... }` object literal, in source
25
+ * order. Undefined when the terminal return isn't a statically-known object
26
+ * literal (bare value, dataset handle, conditional returns, etc.).
27
+ */
28
+ returnFields?: PlayStaticReturnField[];
12
29
  stages?: PlayStaticSubstep[];
13
30
  substeps: PlayStaticSubstep[];
14
31
  sheetContract?: PlaySheetContract | null;
@@ -54,6 +71,7 @@ export interface PlayStaticColumnProducer {
54
71
  field: string;
55
72
  toolId?: string;
56
73
  playId?: string;
74
+ staleAfterSeconds?: number;
57
75
  conditional?: boolean;
58
76
  sourceRange?: PlayStaticSourceRange;
59
77
  steps?: PlayStaticColumnProducer[];
@@ -64,6 +82,7 @@ export interface PlayStaticDatasetColumn {
64
82
  id: string;
65
83
  source: PlaySheetColumnSource;
66
84
  sqlName?: string;
85
+ staleAfterSeconds?: number;
67
86
  producers: PlayStaticColumnProducer[];
68
87
  }
69
88
 
@@ -212,6 +231,9 @@ export function truncateStaticPipelineForStorage(
212
231
  ? [...pipeline.rowKeyFields]
213
232
  : undefined,
214
233
  fields: [...(pipeline.fields ?? [])],
234
+ returnFields: pipeline.returnFields
235
+ ? pipeline.returnFields.map((field) => ({ ...field }))
236
+ : undefined,
215
237
  stages: truncateStaticSubstepsForStorage(pipeline.stages, {
216
238
  embeddedPlayCallPipelineDepth,
217
239
  maxEmbeddedPlayCallPipelineDepth,
@@ -237,6 +259,7 @@ export interface PlayStaticSourceRange {
237
259
 
238
260
  type PlayStaticSubstepMetadata = {
239
261
  conditional?: boolean;
262
+ staleAfterSeconds?: number;
240
263
  };
241
264
 
242
265
  export type PlayStaticSubstep = PlayStaticSubstepMetadata &
@@ -433,7 +456,7 @@ export function compileStaticGraph(
433
456
  if (substep.type !== 'dataset') {
434
457
  return substep;
435
458
  }
436
- const columns = compileDatasetColumns(substep);
459
+ const columns = compileDatasetColumns(substep, pipeline?.substeps ?? []);
437
460
  const tableNamespace = (substep.tableNamespace ?? substep.field).trim();
438
461
  if (tableNamespace) {
439
462
  datasets.push({ tableNamespace, columns });
@@ -448,6 +471,7 @@ export function compileStaticGraph(
448
471
 
449
472
  function compileDatasetColumns(
450
473
  dataset: Extract<PlayStaticSubstep, { type: 'dataset' }>,
474
+ pipelineSubsteps: PlayStaticSubstep[] = [],
451
475
  ): PlayStaticDatasetColumn[] {
452
476
  const columnsById = new Map<string, PlayStaticDatasetColumn>();
453
477
  const ensureColumn = (
@@ -482,7 +506,15 @@ function compileDatasetColumns(
482
506
  ensureColumn(field, 'datasetColumn', sqlSafePlayColumnName(field));
483
507
  }
484
508
 
485
- for (const substep of dataset.steps ?? []) {
509
+ const datasetProducerSteps =
510
+ dataset.steps && dataset.steps.length > 0
511
+ ? dataset.steps
512
+ : pipelineSubsteps.filter((substep) => {
513
+ const field = fieldForColumnProducer(substep);
514
+ return field ? (dataset.outputFields ?? []).includes(field) : false;
515
+ });
516
+
517
+ for (const substep of datasetProducerSteps) {
486
518
  const field = fieldForColumnProducer(substep);
487
519
  if (!field) continue;
488
520
  const column = ensureColumn(
@@ -491,7 +523,25 @@ function compileDatasetColumns(
491
523
  sqlSafePlayColumnName(field),
492
524
  );
493
525
  if (!column) continue;
494
- column.producers.push(columnProducerFromSubstep(substep, field));
526
+ const pipelineSubstep =
527
+ substep.staleAfterSeconds === undefined
528
+ ? pipelineSubsteps.find(
529
+ (candidate) => fieldForColumnProducer(candidate) === field,
530
+ )
531
+ : undefined;
532
+ const producer = columnProducerFromSubstep(
533
+ pipelineSubstep && pipelineSubstep.staleAfterSeconds !== undefined
534
+ ? pipelineSubstep
535
+ : substep,
536
+ field,
537
+ );
538
+ column.producers.push(producer);
539
+ if (
540
+ column.staleAfterSeconds === undefined &&
541
+ producer.staleAfterSeconds !== undefined
542
+ ) {
543
+ column.staleAfterSeconds = producer.staleAfterSeconds;
544
+ }
495
545
  }
496
546
 
497
547
  return [...columnsById.values()];
@@ -536,6 +586,9 @@ function columnProducerFromSubstep(
536
586
  field,
537
587
  ...(substep.type === 'tool' ? { toolId: substep.toolId } : {}),
538
588
  ...(substep.type === 'play_call' ? { playId: substep.playId } : {}),
589
+ ...(substep.staleAfterSeconds !== undefined
590
+ ? { staleAfterSeconds: substep.staleAfterSeconds }
591
+ : {}),
539
592
  ...(substep.conditional ? { conditional: true } : {}),
540
593
  ...(substep.sourceRange ? { sourceRange: substep.sourceRange } : {}),
541
594
  ...(steps && steps.length > 0 ? { steps } : {}),
@@ -0,0 +1,38 @@
1
+ /**
2
+ * Shared Temporal execution constants.
3
+ *
4
+ * Keep values that both the API/auth surface and the worker need here so the
5
+ * API never imports from worker-only modules.
6
+ */
7
+
8
+ /**
9
+ * Local Temporal dev defaults.
10
+ *
11
+ * These match the host ports exposed by docker-compose.yml and the env files
12
+ * used by the local dev flows (`.env.local`, `.env.worktree`).
13
+ */
14
+ export const LOCAL_TEMPORAL_FRONTEND_PORT = 17233;
15
+ export const LOCAL_TEMPORAL_UI_PORT = 18233;
16
+ export const LOCAL_TEMPORAL_NAMESPACE = 'default';
17
+ export const LOCAL_TEMPORAL_ADDRESS = `127.0.0.1:${LOCAL_TEMPORAL_FRONTEND_PORT}`;
18
+ export const LOCAL_TEMPORAL_UI_URL = `http://127.0.0.1:${LOCAL_TEMPORAL_UI_PORT}`;
19
+
20
+ /** Maximum active user-code runtime for a standard play, in seconds. */
21
+ export const STANDARD_PLAY_RUNTIME_LIMIT_SECONDS = 10 * 60; // 10 minutes
22
+
23
+ /**
24
+ * Activity timeout includes cleanup/billing headroom after the 10 minute
25
+ * user-code runtime cap. Keep this higher than STANDARD_PLAY_RUNTIME_LIMIT_SECONDS.
26
+ */
27
+ export const PLAY_ACTIVITY_TIMEOUT_SECONDS = 12 * 60; // 12 minutes
28
+
29
+ /** Heartbeat cadence for the long-running play execution activity. */
30
+ export const PLAY_EXECUTE_ACTIVITY_HEARTBEAT_INTERVAL_SECONDS = 15;
31
+
32
+ /**
33
+ * TTL for workflow executor tokens, in seconds.
34
+ * Matches the activity timeout so tokens expire when the activity would
35
+ * time out anyway.
36
+ */
37
+ export const WORKFLOW_EXECUTOR_TOKEN_TTL_SECONDS =
38
+ PLAY_ACTIVITY_TIMEOUT_SECONDS;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "deepline",
3
- "version": "0.1.79",
3
+ "version": "0.1.81",
4
4
  "description": "Deepline SDK + CLI — B2B data enrichment powered by durable cloud execution",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -1,149 +0,0 @@
1
- export type ToolBatchItem = {
2
- itemKey: string;
3
- payload: Record<string, unknown>;
4
- inputHash?: string | null;
5
- };
6
-
7
- export type ToolBatchRequest = {
8
- runId: string;
9
- orgId: string;
10
- toolId: string;
11
- operation: string;
12
- provider: string;
13
- items: ToolBatchItem[];
14
- waterfallId?: string | null;
15
- stageId?: string | null;
16
- fieldName?: string | null;
17
- mapName?: string | null;
18
- chunkIndex?: number | null;
19
- userProvidedRateLimitKey?: string | null;
20
- providerBatchSize: number;
21
- };
22
-
23
- export type ToolBatchItemResult = {
24
- itemKey: string;
25
- result: unknown;
26
- cached?: boolean;
27
- };
28
-
29
- export type ToolBatchResult = {
30
- runId: string;
31
- toolId: string;
32
- operation: string;
33
- provider: string;
34
- batchCount: number;
35
- itemCount: number;
36
- results: ToolBatchItemResult[];
37
- };
38
-
39
- export type ToolBatchExecutorTransport = {
40
- executeProviderBatch(input: {
41
- request: ToolBatchRequest;
42
- batchIndex: number;
43
- idempotencyKeys: string[];
44
- rateLimitKey: string;
45
- items: ToolBatchItem[];
46
- }): Promise<ToolBatchItemResult[]>;
47
- };
48
-
49
- export type ToolBatchExecutor = {
50
- executeToolBatch(request: ToolBatchRequest): Promise<ToolBatchResult>;
51
- };
52
-
53
- export function createToolBatchExecutor(
54
- transport: ToolBatchExecutorTransport,
55
- ): ToolBatchExecutor {
56
- return {
57
- async executeToolBatch(request) {
58
- const providerBatchSize = Math.max(
59
- 1,
60
- Math.floor(request.providerBatchSize),
61
- );
62
- const batches = chunkToolBatchItems(request.items, providerBatchSize);
63
- const results: ToolBatchItemResult[] = [];
64
- for (let batchIndex = 0; batchIndex < batches.length; batchIndex += 1) {
65
- const items = batches[batchIndex]!;
66
- results.push(
67
- ...(await transport.executeProviderBatch({
68
- request,
69
- batchIndex,
70
- items,
71
- rateLimitKey: buildToolBatchRateLimitKey(request),
72
- idempotencyKeys: items.map((item) =>
73
- buildToolBatchIdempotencyKey(request, item),
74
- ),
75
- })),
76
- );
77
- }
78
- return {
79
- runId: request.runId,
80
- toolId: request.toolId,
81
- operation: request.operation,
82
- provider: request.provider,
83
- batchCount: batches.length,
84
- itemCount: request.items.length,
85
- results,
86
- };
87
- },
88
- };
89
- }
90
-
91
- export function buildToolBatchIdempotencyKey(
92
- request: ToolBatchRequest,
93
- item: ToolBatchItem,
94
- ): string {
95
- return [
96
- request.runId,
97
- request.mapName ?? '',
98
- request.chunkIndex ?? '',
99
- item.itemKey,
100
- request.fieldName ?? '',
101
- request.waterfallId ?? '',
102
- request.stageId ?? '',
103
- item.inputHash ?? stableToolBatchHash(item.payload),
104
- ].join(':');
105
- }
106
-
107
- export function buildToolBatchRateLimitKey(request: ToolBatchRequest): string {
108
- return [
109
- request.orgId,
110
- request.provider,
111
- request.operation,
112
- request.userProvidedRateLimitKey ?? '',
113
- ].join(':');
114
- }
115
-
116
- function chunkToolBatchItems(
117
- items: readonly ToolBatchItem[],
118
- size: number,
119
- ): ToolBatchItem[][] {
120
- const chunks: ToolBatchItem[][] = [];
121
- for (let index = 0; index < items.length; index += size) {
122
- chunks.push(items.slice(index, index + size));
123
- }
124
- return chunks;
125
- }
126
-
127
- function stableToolBatchHash(value: unknown): string {
128
- const text = stableStringify(value);
129
- let hash = 2166136261;
130
- for (let index = 0; index < text.length; index += 1) {
131
- hash ^= text.charCodeAt(index);
132
- hash = Math.imul(hash, 16777619);
133
- }
134
- return (hash >>> 0).toString(36);
135
- }
136
-
137
- function stableStringify(value: unknown): string {
138
- if (value === null || typeof value !== 'object') {
139
- return JSON.stringify(value);
140
- }
141
- if (Array.isArray(value)) {
142
- return `[${value.map(stableStringify).join(',')}]`;
143
- }
144
- const record = value as Record<string, unknown>;
145
- return `{${Object.keys(record)
146
- .sort()
147
- .map((key) => `${JSON.stringify(key)}:${stableStringify(record[key])}`)
148
- .join(',')}}`;
149
- }