deepline 0.1.146 → 0.1.148

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -120,6 +120,7 @@ import {
120
120
  createInlineDatasetHandle,
121
121
  createMaterializedDatasetHandle,
122
122
  createPersistedDatasetHandle,
123
+ datasetHandleCapabilities,
123
124
  datasetRowCountHint,
124
125
  isDatasetHandle,
125
126
  iterDatasetChunks,
@@ -147,6 +148,12 @@ import {
147
148
  setHarnessBinding,
148
149
  } from '../../../sdk/src/plays/harness-stub';
149
150
  import { createHarnessWorkerReceiptStore } from './runtime/harness-receipt-store';
151
+ import {
152
+ hydrateSerializedResultDatasets,
153
+ projectTerminalResultDatasets,
154
+ type ProjectedResultDatasetHandle,
155
+ } from './runtime/output-datasets';
156
+ import { collectStableResultDatasetPersistenceContract } from './runtime/result-dataset-persistence';
150
157
  import {
151
158
  runtimeCsvExecutionRow,
152
159
  publicCsvInputRow,
@@ -701,6 +708,39 @@ function buildOrderedNodeList(
701
708
  }));
702
709
  }
703
710
 
711
+ function collectReservedResultDatasetNamespaces(
712
+ contractSnapshot: unknown,
713
+ ): string[] {
714
+ const snapshot = isRecord(contractSnapshot) ? contractSnapshot : null;
715
+ const pipeline =
716
+ (snapshot?.staticPipeline as PlayStaticPipeline | null | undefined) ?? null;
717
+ if (!pipeline) return [];
718
+
719
+ const namespaces = new Set<string>();
720
+ const addNamespace = (value: unknown) => {
721
+ if (typeof value === 'string' && value.trim()) {
722
+ namespaces.add(value);
723
+ }
724
+ };
725
+ addNamespace(pipeline.tableNamespace);
726
+ addNamespace(pipeline.sheetContract?.tableNamespace);
727
+
728
+ for (const substep of flattenStaticPipeline(pipeline)) {
729
+ if (substep.type === 'dataset') {
730
+ addNamespace(substep.tableNamespace ?? substep.field);
731
+ addNamespace(substep.sheetContract?.tableNamespace);
732
+ continue;
733
+ }
734
+ const record = substep as Record<string, unknown>;
735
+ addNamespace(record.tableNamespace);
736
+ const sheetContract = record.sheetContract;
737
+ if (isRecord(sheetContract)) {
738
+ addNamespace(sheetContract.tableNamespace);
739
+ }
740
+ }
741
+ return [...namespaces];
742
+ }
743
+
704
744
  function recordRunnerPerfTrace(input: {
705
745
  req: RunRequest;
706
746
  phase: string;
@@ -959,6 +999,59 @@ function extractChildPlayOutput(status: Record<string, unknown>): unknown {
959
999
  return result ?? null;
960
1000
  }
961
1001
 
1002
+ function hydrateChildPlayOutputDatasets(input: {
1003
+ req: RunRequest;
1004
+ childRunId: string;
1005
+ childPlayName: string;
1006
+ value: unknown;
1007
+ }): unknown {
1008
+ return hydrateSerializedResultDatasets(input.value, (dataset) => {
1009
+ const tableNamespace = dataset.tableNamespace?.trim();
1010
+ if (!tableNamespace) {
1011
+ return dataset;
1012
+ }
1013
+ const count = Math.max(0, Math.floor(dataset.count));
1014
+ const previewRows = dataset.preview.filter(isRecord);
1015
+ return createPersistedDatasetHandle({
1016
+ playName: input.childPlayName,
1017
+ name: tableNamespace,
1018
+ count,
1019
+ datasetKind: dataset.datasetKind,
1020
+ previewRows,
1021
+ cachedRows: count <= previewRows.length ? previewRows : null,
1022
+ sourceLabel: dataset.sourceLabel ?? null,
1023
+ readRows: async ({ limit, offset }) => {
1024
+ const result = await harnessReadSheetDatasetRows({
1025
+ baseUrl: input.req.baseUrl,
1026
+ executorToken: input.req.executorToken,
1027
+ orgId: input.req.orgId,
1028
+ playName: input.childPlayName,
1029
+ tableNamespace,
1030
+ runId: input.childRunId,
1031
+ limit,
1032
+ offset,
1033
+ userEmail: input.req.userEmail,
1034
+ preloadedDbSessions: input.req.preloadedDbSessions ?? null,
1035
+ });
1036
+ return result.rows as Array<Record<string, unknown>>;
1037
+ },
1038
+ trace: (phase, ms, extra) =>
1039
+ recordRunnerPerfTrace({
1040
+ req: input.req,
1041
+ phase,
1042
+ ms,
1043
+ extra: {
1044
+ ...(extra ?? {}),
1045
+ childRunId: input.childRunId,
1046
+ childPlayName: input.childPlayName,
1047
+ },
1048
+ }),
1049
+ nowMs,
1050
+ workProgress: dataset._metadata?.workProgress,
1051
+ });
1052
+ });
1053
+ }
1054
+
962
1055
  async function hashChildPlayEventKey(input: unknown): Promise<string> {
963
1056
  return (await hashJson(input)).slice(0, 32);
964
1057
  }
@@ -983,7 +1076,7 @@ function workflowTimeoutFromMs(timeoutMs: number): string {
983
1076
  async function signalParentPlayTerminal(input: {
984
1077
  req: RunRequest;
985
1078
  status: 'completed' | 'failed' | 'cancelled';
986
- result?: Record<string, unknown> | null;
1079
+ result?: unknown;
987
1080
  error?: string | null;
988
1081
  }): Promise<void> {
989
1082
  const governance = input.req.playCallGovernance;
@@ -5402,7 +5495,12 @@ function createMinimalWorkerCtx(
5402
5495
  message: `Completed child play ${resolvedName} (${normalizedKey})`,
5403
5496
  ts: nowMs(),
5404
5497
  });
5405
- return started.output ?? extractChildPlayOutput(started);
5498
+ return hydrateChildPlayOutputDatasets({
5499
+ req,
5500
+ childRunId: workflowId,
5501
+ childPlayName: resolvedName,
5502
+ value: started.output ?? extractChildPlayOutput(started),
5503
+ });
5406
5504
  }
5407
5505
  if (startedStatus === 'failed') {
5408
5506
  const startedError = isRecord(started.error)
@@ -5523,7 +5621,12 @@ function createMinimalWorkerCtx(
5523
5621
  message: `Completed child play ${resolvedName} (${normalizedKey})`,
5524
5622
  ts: nowMs(),
5525
5623
  });
5526
- return waitResult.output;
5624
+ return hydrateChildPlayOutputDatasets({
5625
+ req,
5626
+ childRunId: workflowId,
5627
+ childPlayName: resolvedName,
5628
+ value: waitResult.output,
5629
+ });
5527
5630
  } finally {
5528
5631
  childPlaySlot?.release();
5529
5632
  }
@@ -6362,14 +6465,51 @@ async function executeRunRequest(
6362
6465
  ms: nowMs() - playStartedAt,
6363
6466
  });
6364
6467
  stepLifecycle?.markAllTerminal(nowMs());
6468
+ const promoted = projectTerminalResultDatasets(
6469
+ result,
6470
+ (dataset) =>
6471
+ createInlineDatasetHandle(dataset.rows, {
6472
+ name: dataset.tableNamespace,
6473
+ kind: 'csv',
6474
+ sourceLabel: dataset.path,
6475
+ }),
6476
+ {
6477
+ playName: req.playName,
6478
+ reservedTableNamespaces: collectReservedResultDatasetNamespaces(
6479
+ req.contractSnapshot,
6480
+ ),
6481
+ },
6482
+ );
6365
6483
  const serializeStartedAt = nowMs();
6366
- const serializedResult = serializePlayReturnValue(result);
6484
+ const serializedResult = serializePlayReturnValue(promoted.result);
6367
6485
  recordRunnerPerfTrace({
6368
6486
  req,
6369
6487
  phase: 'runner.serialize_result',
6370
6488
  ms: nowMs() - serializeStartedAt,
6489
+ extra: {
6490
+ promotedOutputDatasets: promoted.datasets.length,
6491
+ },
6371
6492
  });
6372
6493
  const terminalResult = trimResultForStatus(serializedResult);
6494
+ let resultDatasetsPersisted = false;
6495
+ const persistProjectedResultDatasets = async (): Promise<void> => {
6496
+ if (resultDatasetsPersisted) return;
6497
+ const ledgerFlushWaitStartedAt = nowMs();
6498
+ await ledgerFlushInFlight;
6499
+ recordRunnerPerfTrace({
6500
+ req,
6501
+ phase: 'runner.run_ledger_flush_wait',
6502
+ ms: nowMs() - ledgerFlushWaitStartedAt,
6503
+ });
6504
+ const resultDatasetStartedAt = nowMs();
6505
+ await persistResultDatasets(req, promoted.handles, serializedResult);
6506
+ resultDatasetsPersisted = true;
6507
+ recordRunnerPerfTrace({
6508
+ req,
6509
+ phase: 'runner.persist_result_datasets',
6510
+ ms: nowMs() - resultDatasetStartedAt,
6511
+ });
6512
+ };
6373
6513
  let parentSignalPromise: Promise<void> | null = null;
6374
6514
  const startParentTerminalSignal = (): Promise<void> => {
6375
6515
  if (!parentSignalPromise) {
@@ -6377,7 +6517,7 @@ async function executeRunRequest(
6377
6517
  parentSignalPromise = signalParentPlayTerminal({
6378
6518
  req,
6379
6519
  status: 'completed',
6380
- result: terminalResult as Record<string, unknown>,
6520
+ result: req.playCallGovernance ? serializedResult : terminalResult,
6381
6521
  })
6382
6522
  .catch((error) => {
6383
6523
  console.error(
@@ -6396,21 +6536,11 @@ async function executeRunRequest(
6396
6536
  }
6397
6537
  return parentSignalPromise;
6398
6538
  };
6539
+ if (req.playCallGovernance && !options?.persistResultDatasets) {
6540
+ await persistProjectedResultDatasets();
6541
+ }
6399
6542
  if (options?.persistResultDatasets) {
6400
- const ledgerFlushWaitStartedAt = nowMs();
6401
- await ledgerFlushInFlight;
6402
- recordRunnerPerfTrace({
6403
- req,
6404
- phase: 'runner.run_ledger_flush_wait',
6405
- ms: nowMs() - ledgerFlushWaitStartedAt,
6406
- });
6407
- const resultDatasetStartedAt = nowMs();
6408
- await persistResultDatasets(req, result, serializedResult);
6409
- recordRunnerPerfTrace({
6410
- req,
6411
- phase: 'runner.persist_result_datasets',
6412
- ms: nowMs() - resultDatasetStartedAt,
6413
- });
6543
+ await persistProjectedResultDatasets();
6414
6544
  const parentSignal = startParentTerminalSignal();
6415
6545
  // Capped runs settle compute billing BEFORE declaring run.completed: a
6416
6546
  // per-run cap denial (422 billing_cap_exceeded) must fail the run as
@@ -6767,17 +6897,30 @@ function isPlayCallGovernanceSnapshot(
6767
6897
 
6768
6898
  async function persistResultDatasets(
6769
6899
  req: RunRequest,
6770
- result: unknown,
6900
+ resultDatasets: ProjectedResultDatasetHandle[],
6771
6901
  serializedResult: unknown,
6772
6902
  ): Promise<void> {
6773
6903
  const persistedNamespaces = new Set<string>();
6774
- for (const dataset of collectDatasetHandles(result)) {
6904
+ for (const dataset of resultDatasets) {
6775
6905
  if (dataset.datasetKind === 'map') continue;
6906
+ const handle = dataset.handle as WorkerDatasetHandle<
6907
+ Record<string, unknown>
6908
+ >;
6909
+ const capabilities = datasetHandleCapabilities(handle);
6910
+ const baseContract = baseSheetContractForResultDataset(
6911
+ req,
6912
+ dataset.tableNamespace,
6913
+ );
6914
+ const scan = await collectStableResultDatasetPersistenceContract({
6915
+ contract: baseContract,
6916
+ chunks: iterDatasetChunks(handle, RESULT_DATASET_PERSIST_CHUNK_ROWS),
6917
+ cacheChunks: !capabilities.replayable,
6918
+ });
6776
6919
  let inputOffset = 0;
6777
- for await (const chunk of iterDatasetChunks(
6778
- dataset.handle,
6779
- RESULT_DATASET_PERSIST_CHUNK_ROWS,
6780
- )) {
6920
+ const chunks =
6921
+ scan.cachedChunks ??
6922
+ iterDatasetChunks(handle, RESULT_DATASET_PERSIST_CHUNK_ROWS);
6923
+ for await (const chunk of chunks) {
6781
6924
  if (chunk.length === 0) continue;
6782
6925
  await harnessStartSheetDataset({
6783
6926
  baseUrl: req.baseUrl,
@@ -6785,7 +6928,7 @@ async function persistResultDatasets(
6785
6928
  orgId: req.orgId,
6786
6929
  playName: req.playName,
6787
6930
  tableNamespace: dataset.tableNamespace,
6788
- sheetContract: requireSheetContract(req, dataset.tableNamespace),
6931
+ sheetContract: scan.sheetContract,
6789
6932
  rows: chunk.map((row) => ({ ...row })),
6790
6933
  runId: req.runId,
6791
6934
  inputOffset,
@@ -6800,8 +6943,20 @@ async function persistResultDatasets(
6800
6943
  const datasets = collectDatasetEnvelopes(serializedResult);
6801
6944
  for (const dataset of datasets) {
6802
6945
  if (dataset.datasetKind === 'map') continue;
6803
- if (dataset.rows.length === 0) continue;
6804
6946
  if (persistedNamespaces.has(dataset.tableNamespace)) continue;
6947
+ if (dataset.rows.length < dataset.count) {
6948
+ throw new Error(
6949
+ `Returned serialized dataset ${JSON.stringify(
6950
+ dataset.tableNamespace,
6951
+ )} only includes ${dataset.rows.length}/${dataset.count} preview rows. ` +
6952
+ 'Return the live Dataset Handle instead of dataset.toJSON() so Deepline can persist and stream the full result.',
6953
+ );
6954
+ }
6955
+ if (dataset.rows.length === 0) continue;
6956
+ const scan = await collectStableResultDatasetPersistenceContract({
6957
+ contract: baseSheetContractForResultDataset(req, dataset.tableNamespace),
6958
+ chunks: [dataset.rows],
6959
+ });
6805
6960
  await harnessStartSheetDataset({
6806
6961
  baseUrl: req.baseUrl,
6807
6962
  executorToken: req.executorToken,
@@ -6809,7 +6964,7 @@ async function persistResultDatasets(
6809
6964
  preloadedDbSessions: req.preloadedDbSessions ?? null,
6810
6965
  playName: req.playName,
6811
6966
  tableNamespace: dataset.tableNamespace,
6812
- sheetContract: requireSheetContract(req, dataset.tableNamespace),
6967
+ sheetContract: scan.sheetContract,
6813
6968
  rows: dataset.rows,
6814
6969
  runId: req.runId,
6815
6970
  inputOffset: 0,
@@ -6820,52 +6975,16 @@ async function persistResultDatasets(
6820
6975
 
6821
6976
  const RESULT_DATASET_PERSIST_CHUNK_ROWS = 5_000;
6822
6977
 
6823
- function collectDatasetHandles(value: unknown): Array<{
6824
- tableNamespace: string;
6825
- datasetKind: 'csv' | 'map' | null;
6826
- handle: WorkerDatasetHandle<Record<string, unknown>>;
6827
- }> {
6828
- const datasets: Array<{
6829
- tableNamespace: string;
6830
- datasetKind: 'csv' | 'map' | null;
6831
- handle: WorkerDatasetHandle<Record<string, unknown>>;
6832
- }> = [];
6833
- const seen = new WeakSet<object>();
6834
- const walk = (candidate: unknown, depth: number) => {
6835
- if (depth > 12 || candidate == null) return;
6836
- if (isDatasetHandle(candidate)) {
6837
- const metadata = candidate.toJSON() as Record<string, unknown>;
6838
- const tableNamespace =
6839
- typeof metadata.tableNamespace === 'string'
6840
- ? metadata.tableNamespace
6841
- : null;
6842
- const datasetKind =
6843
- metadata.datasetKind === 'csv' || metadata.datasetKind === 'map'
6844
- ? metadata.datasetKind
6845
- : null;
6846
- if (tableNamespace) {
6847
- datasets.push({
6848
- tableNamespace,
6849
- datasetKind,
6850
- handle: candidate as WorkerDatasetHandle<Record<string, unknown>>,
6851
- });
6852
- }
6853
- return;
6854
- }
6855
- if (Array.isArray(candidate)) {
6856
- for (const item of candidate) walk(item, depth + 1);
6857
- return;
6858
- }
6859
- if (typeof candidate !== 'object') return;
6860
- const object = candidate as Record<string, unknown>;
6861
- if (seen.has(object)) return;
6862
- seen.add(object);
6863
- for (const child of Object.values(object)) {
6864
- walk(child, depth + 1);
6978
+ function baseSheetContractForResultDataset(
6979
+ req: RunRequest,
6980
+ tableNamespace: string,
6981
+ ): PlaySheetContract {
6982
+ return (
6983
+ resolveSheetContractFromReq(req, tableNamespace) ?? {
6984
+ tableNamespace,
6985
+ columns: [],
6865
6986
  }
6866
- };
6867
- walk(value, 0);
6868
- return datasets;
6987
+ );
6869
6988
  }
6870
6989
 
6871
6990
  function serializePlayReturnValue(value: unknown): unknown {
@@ -6943,11 +7062,13 @@ function serializeValue(value: unknown, depth: number): unknown {
6943
7062
  function collectDatasetEnvelopes(value: unknown): Array<{
6944
7063
  tableNamespace: string;
6945
7064
  datasetKind: 'csv' | 'map' | null;
7065
+ count: number;
6946
7066
  rows: Record<string, unknown>[];
6947
7067
  }> {
6948
7068
  const datasets: Array<{
6949
7069
  tableNamespace: string;
6950
7070
  datasetKind: 'csv' | 'map' | null;
7071
+ count: number;
6951
7072
  rows: Record<string, unknown>[];
6952
7073
  }> = [];
6953
7074
  const seen = new Set<string>();
@@ -6964,13 +7085,22 @@ function collectDatasetEnvelopes(value: unknown): Array<{
6964
7085
  const preview = Array.isArray(record.preview)
6965
7086
  ? (record.preview as Record<string, unknown>[])
6966
7087
  : null;
7088
+ const count =
7089
+ typeof record.count === 'number' && Number.isFinite(record.count)
7090
+ ? Math.max(0, Math.floor(record.count))
7091
+ : null;
6967
7092
  const datasetKind =
6968
7093
  record.datasetKind === 'csv' || record.datasetKind === 'map'
6969
7094
  ? record.datasetKind
6970
7095
  : null;
6971
- if (tableNamespace && preview && !seen.has(tableNamespace)) {
7096
+ if (
7097
+ tableNamespace &&
7098
+ preview &&
7099
+ count !== null &&
7100
+ !seen.has(tableNamespace)
7101
+ ) {
6972
7102
  seen.add(tableNamespace);
6973
- datasets.push({ tableNamespace, datasetKind, rows: preview });
7103
+ datasets.push({ tableNamespace, datasetKind, count, rows: preview });
6974
7104
  }
6975
7105
  for (const [key, child] of Object.entries(record)) {
6976
7106
  if (key === 'preview') continue;
@@ -454,6 +454,14 @@ export function datasetHandleCapabilities<T extends DatasetRow>(
454
454
  if (capabilities) {
455
455
  return capabilities;
456
456
  }
457
+ return {
458
+ storage: 'iterable',
459
+ chunkReadable: false,
460
+ pageBacked: Boolean(input.backing),
461
+ replayable: true,
462
+ countHint: null,
463
+ materialization: 'bounded',
464
+ };
457
465
  }
458
466
  return {
459
467
  storage: 'iterable',