deepline 0.1.85 → 0.1.88

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -600,7 +600,7 @@ type WorkerCtxCallbacks = {
600
600
  nodeId: string;
601
601
  progress: LiveNodeProgressSnapshot;
602
602
  forceFlush?: boolean;
603
- }) => void;
603
+ }) => void | Promise<void>;
604
604
  onMapStarted?: (nodeId: string, at?: number) => void;
605
605
  onMapCompleted?: (nodeId: string, at?: number) => void;
606
606
  onToolCalled?: (toolId: string, at?: number) => void;
@@ -1592,6 +1592,7 @@ type WorkerToolBatchRequest = {
1592
1592
  };
1593
1593
 
1594
1594
  const WORKER_TOOL_BATCH_GRACE_MS = 250;
1595
+ const MAP_EXECUTION_HEARTBEAT_INTERVAL_MS = 5_000;
1595
1596
  // Fallback batch-chunk parallelism when a tool declares no provider rate hints.
1596
1597
  // Matches the prior hardcoded `Math.min(4, ...)` so undeclared providers keep
1597
1598
  // their previous batching behavior; declared providers tighten via the
@@ -1599,6 +1600,10 @@ const WORKER_TOOL_BATCH_GRACE_MS = 250;
1599
1600
  const WORKER_TOOL_BATCH_DEFAULT_PARALLELISM = 4;
1600
1601
  const WORKER_RETRY_SAFE_5XX_TOOLS = new Set(['test_transient_500']);
1601
1602
 
1603
+ function sleepWorkerMs(ms: number): Promise<void> {
1604
+ return new Promise((resolve) => setTimeout(resolve, ms));
1605
+ }
1606
+
1602
1607
  function stepProgramColumnName(parentField: string, stepId: string): string {
1603
1608
  return sqlSafePlayColumnName(`${parentField}.${stepId}`);
1604
1609
  }
@@ -3542,8 +3547,11 @@ function createMinimalWorkerCtx(
3542
3547
  softWorkflowStepBudget: plan?.chunkPlan.softWorkflowStepBudget,
3543
3548
  });
3544
3549
  const outputFields = fieldEntries.map(([field]) => field);
3545
- const updateMapProgress = (progress: LiveNodeProgressSnapshot) => {
3546
- callbacks?.onNodeProgress?.({
3550
+ const updateMapProgress = (
3551
+ progress: LiveNodeProgressSnapshot,
3552
+ options?: { forceFlush?: boolean },
3553
+ ): void | Promise<void> => {
3554
+ return callbacks?.onNodeProgress?.({
3547
3555
  nodeId: mapNodeId,
3548
3556
  progress: {
3549
3557
  artifactTableNamespace: name,
@@ -3551,19 +3559,62 @@ function createMinimalWorkerCtx(
3551
3559
  ...progress,
3552
3560
  updatedAt: progress.updatedAt ?? nowMs(),
3553
3561
  },
3554
- forceFlush: true,
3562
+ forceFlush: options?.forceFlush ?? true,
3555
3563
  });
3556
3564
  };
3557
3565
  const formatMapProgressMessage = (completed: number, total?: number) =>
3558
3566
  typeof total === 'number' && Number.isFinite(total) && total > 0
3559
3567
  ? `${completed.toLocaleString()} / ${total.toLocaleString()} rows processed`
3560
3568
  : `${completed.toLocaleString()} rows processed`;
3569
+ const formatMapPreparingMessage = (total?: number) =>
3570
+ typeof total === 'number' && Number.isFinite(total) && total > 0
3571
+ ? `Preparing ${total.toLocaleString()} rows`
3572
+ : 'Preparing rows';
3573
+ const formatMapQueuedMessage = (input: {
3574
+ completed: number;
3575
+ queued: number;
3576
+ total?: number;
3577
+ }) => {
3578
+ const completed = Math.max(0, input.completed);
3579
+ const queued = Math.max(0, input.queued);
3580
+ if (completed > 0 && queued > 0) {
3581
+ return `${completed.toLocaleString()} already satisfied, ${queued.toLocaleString()} queued`;
3582
+ }
3583
+ if (queued > 0) {
3584
+ return `${queued.toLocaleString()} rows queued`;
3585
+ }
3586
+ return formatMapProgressMessage(completed, input.total);
3587
+ };
3588
+ const formatMapProcessingMessage = (rowsToExecute: number) =>
3589
+ rowsToExecute > 0
3590
+ ? `Processing ${rowsToExecute.toLocaleString()} rows`
3591
+ : null;
3592
+ const formatMapExecutionHeartbeatMessage = (input: {
3593
+ rowsToExecute: number;
3594
+ startedRows: number;
3595
+ activeRows: number;
3596
+ completedRows: number;
3597
+ }) => {
3598
+ const rowsToExecute = Math.max(0, input.rowsToExecute);
3599
+ const startedRows = Math.max(0, input.startedRows);
3600
+ const activeRows = Math.max(0, input.activeRows);
3601
+ const completedRows = Math.max(0, input.completedRows);
3602
+ const waitingRows = Math.max(0, rowsToExecute - startedRows);
3603
+ const parts = [
3604
+ activeRows > 0 ? `${activeRows.toLocaleString()} active` : null,
3605
+ waitingRows > 0 ? `${waitingRows.toLocaleString()} waiting` : null,
3606
+ completedRows > 0 ? `${completedRows.toLocaleString()} done` : null,
3607
+ ].filter((part): part is string => Boolean(part));
3608
+ const base =
3609
+ formatMapProcessingMessage(rowsToExecute) ?? 'Processing rows';
3610
+ return parts.length > 0 ? `${base} (${parts.join(', ')})` : base;
3611
+ };
3561
3612
  callbacks?.onMapStarted?.(mapNodeId, mapStartedAt);
3562
- updateMapProgress({
3613
+ await updateMapProgress({
3563
3614
  completed: 0,
3564
3615
  total: rowCountHint ?? undefined,
3565
3616
  startedAt: mapStartedAt,
3566
- message: formatMapProgressMessage(0, rowCountHint ?? undefined),
3617
+ message: formatMapPreparingMessage(rowCountHint ?? undefined),
3567
3618
  });
3568
3619
  const explicitRowKeysSeen =
3569
3620
  opts?.key === undefined ? null : new Map<string, number>();
@@ -3637,6 +3688,8 @@ function createMinimalWorkerCtx(
3637
3688
  }
3638
3689
  };
3639
3690
 
3691
+ let totalRowsWritten = 0;
3692
+
3640
3693
  const processChunk = async (
3641
3694
  chunkRows: T[],
3642
3695
  chunkStart: number,
@@ -3681,17 +3734,20 @@ function createMinimalWorkerCtx(
3681
3734
  completedRows: prepared.completedRows.length,
3682
3735
  },
3683
3736
  });
3684
- updateMapProgress({
3685
- completed: prepared.completedRows.length,
3686
- total: chunkRows.length,
3737
+ const progressTotalRows = rowCountHint ?? chunkRows.length;
3738
+ const preparedCompletedRows = Math.min(
3739
+ progressTotalRows,
3740
+ totalRowsWritten + prepared.completedRows.length,
3741
+ );
3742
+ await updateMapProgress({
3743
+ completed: preparedCompletedRows,
3744
+ total: progressTotalRows,
3687
3745
  startedAt: mapStartedAt,
3688
- message:
3689
- prepared.pendingRows.length > 0
3690
- ? `${prepared.pendingRows.length.toLocaleString()} rows queued`
3691
- : formatMapProgressMessage(
3692
- prepared.completedRows.length,
3693
- chunkRows.length,
3694
- ),
3746
+ message: formatMapQueuedMessage({
3747
+ completed: preparedCompletedRows,
3748
+ queued: prepared.pendingRows.length,
3749
+ total: progressTotalRows,
3750
+ }),
3695
3751
  });
3696
3752
  const pendingKeys = new Set<string>();
3697
3753
  const pendingRowsByKey = new Map<string, Record<string, unknown>>();
@@ -3735,38 +3791,83 @@ function createMinimalWorkerCtx(
3735
3791
  new Set(chunkEntries.map((entry) => entry.rowKey)).size,
3736
3792
  );
3737
3793
  const rowsToExecute = uniqueRowsToExecuteEntries.map(({ row }) => row);
3794
+ const processingMessage = formatMapProcessingMessage(
3795
+ rowsToExecute.length,
3796
+ );
3797
+ if (processingMessage) {
3798
+ await updateMapProgress({
3799
+ completed: preparedCompletedRows,
3800
+ total: progressTotalRows,
3801
+ startedAt: mapStartedAt,
3802
+ message: processingMessage,
3803
+ });
3804
+ }
3738
3805
  const rowsInserted = prepared.inserted + missingPreparedRows.length;
3739
3806
  const rowsSkipped = Math.max(
3740
3807
  0,
3741
3808
  prepared.skipped - missingPreparedRows.length,
3742
3809
  );
3743
- let settledToolRequests = 0;
3744
- let lastToolProgressAt = 0;
3745
- const reportSettledToolRequests = (count: number) => {
3746
- if (count <= 0) return;
3747
- settledToolRequests += count;
3810
+ let completedExecutedRows = 0;
3811
+ let startedExecutedRows = 0;
3812
+ let activeExecutedRows = 0;
3813
+ let lastChunkProgressAt = 0;
3814
+ let lastExecutionHeartbeatAt = nowMs();
3815
+ const completedRowsForProgress = () =>
3816
+ Math.min(
3817
+ progressTotalRows,
3818
+ totalRowsWritten +
3819
+ prepared.completedRows.length +
3820
+ completedExecutedRows,
3821
+ );
3822
+ const reportExecutionHeartbeat = (force = false) => {
3748
3823
  const now = nowMs();
3749
- const estimatedCompleted = Math.min(
3750
- chunkRows.length,
3751
- prepared.completedRows.length + settledToolRequests,
3824
+ if (
3825
+ !force &&
3826
+ now - lastExecutionHeartbeatAt < MAP_EXECUTION_HEARTBEAT_INTERVAL_MS
3827
+ ) {
3828
+ return;
3829
+ }
3830
+ lastExecutionHeartbeatAt = now;
3831
+ void updateMapProgress(
3832
+ {
3833
+ completed: completedRowsForProgress(),
3834
+ total: progressTotalRows,
3835
+ startedAt: mapStartedAt,
3836
+ message: formatMapExecutionHeartbeatMessage({
3837
+ rowsToExecute: rowsToExecute.length,
3838
+ startedRows: startedExecutedRows,
3839
+ activeRows: activeExecutedRows,
3840
+ completedRows: completedExecutedRows,
3841
+ }),
3842
+ },
3843
+ { forceFlush: force },
3752
3844
  );
3753
- const isTerminalEstimate = estimatedCompleted >= chunkRows.length;
3845
+ };
3846
+ const reportChunkProgress = (force = false) => {
3847
+ const now = nowMs();
3848
+ const completed = completedRowsForProgress();
3849
+ const isTerminalEstimate = completed >= progressTotalRows;
3754
3850
  if (
3851
+ !force &&
3755
3852
  !isTerminalEstimate &&
3756
- now - lastToolProgressAt < RUN_LEDGER_FLUSH_INTERVAL_MS
3853
+ now - lastChunkProgressAt < RUN_LEDGER_FLUSH_INTERVAL_MS
3757
3854
  ) {
3758
3855
  return;
3759
3856
  }
3760
- lastToolProgressAt = now;
3761
- updateMapProgress({
3762
- completed: estimatedCompleted,
3763
- total: chunkRows.length,
3764
- startedAt: mapStartedAt,
3765
- message: formatMapProgressMessage(
3766
- estimatedCompleted,
3767
- chunkRows.length,
3768
- ),
3769
- });
3857
+ lastChunkProgressAt = now;
3858
+ void updateMapProgress(
3859
+ {
3860
+ completed,
3861
+ total: progressTotalRows,
3862
+ startedAt: mapStartedAt,
3863
+ message: formatMapProgressMessage(completed, progressTotalRows),
3864
+ },
3865
+ { forceFlush: force },
3866
+ );
3867
+ };
3868
+ const reportSettledToolRequests = (count: number) => {
3869
+ if (count <= 0) return;
3870
+ reportChunkProgress(false);
3770
3871
  };
3771
3872
  // Row concurrency comes from the Governor: an explicit map concurrency is
3772
3873
  // clamped to the policy row-max, otherwise the policy default. Each row
@@ -3812,7 +3913,12 @@ function createMinimalWorkerCtx(
3812
3913
  const rowSlot = await governor.acquireRowSlot({
3813
3914
  signal: abortSignal,
3814
3915
  });
3916
+ let rowMarkedActive = false;
3815
3917
  try {
3918
+ startedExecutedRows += 1;
3919
+ activeExecutedRows += 1;
3920
+ rowMarkedActive = true;
3921
+ reportExecutionHeartbeat(false);
3816
3922
  const entry = uniqueRowsToExecuteEntries[myIndex]!;
3817
3923
  const pendingRow = pendingRowsByKey.get(entry.rowKey);
3818
3924
  const row = pendingRow
@@ -3985,7 +4091,13 @@ function createMinimalWorkerCtx(
3985
4091
  ? cellMetaPatch
3986
4092
  : undefined;
3987
4093
  executedRows[myIndex] = enriched as T & Record<string, unknown>;
4094
+ completedExecutedRows += 1;
4095
+ reportChunkProgress(false);
3988
4096
  } finally {
4097
+ if (rowMarkedActive) {
4098
+ activeExecutedRows = Math.max(0, activeExecutedRows - 1);
4099
+ reportExecutionHeartbeat(false);
4100
+ }
3989
4101
  rowSlot.release();
3990
4102
  }
3991
4103
  }
@@ -4031,7 +4143,27 @@ function createMinimalWorkerCtx(
4031
4143
  });
4032
4144
  };
4033
4145
  const workersStartedAt = nowMs();
4034
- const workerResults = await Promise.allSettled(workers);
4146
+ // Track completion with a boolean flag rather than narrowing a
4147
+ // closure-assigned `| null` variable: TypeScript's control-flow analysis
4148
+ // does not see the assignment inside `.then(...)`, so a
4149
+ // `while (results === null)` loop would narrow it to `never` afterwards.
4150
+ let workerSettled = false;
4151
+ const workerResultsPromise = Promise.allSettled(workers).then(
4152
+ (results) => {
4153
+ workerSettled = true;
4154
+ return results;
4155
+ },
4156
+ );
4157
+ while (!workerSettled) {
4158
+ await Promise.race([
4159
+ workerResultsPromise,
4160
+ sleepWorkerMs(MAP_EXECUTION_HEARTBEAT_INTERVAL_MS),
4161
+ ]);
4162
+ if (!workerSettled) {
4163
+ reportExecutionHeartbeat(false);
4164
+ }
4165
+ }
4166
+ const workerResults = await workerResultsPromise;
4035
4167
  recordRunnerPerfTrace({
4036
4168
  req,
4037
4169
  phase: 'runner.map_chunk.execute_workers',
@@ -4198,7 +4330,7 @@ function createMinimalWorkerCtx(
4198
4330
  `inserted=${totalRowsInserted} skipped=${totalRowsSkipped}`;
4199
4331
  const completedAt = nowMs();
4200
4332
  callbacks?.onMapCompleted?.(mapNodeId, completedAt);
4201
- updateMapProgress({
4333
+ void updateMapProgress({
4202
4334
  completed: totalRowsWritten,
4203
4335
  total: totalRowsWritten,
4204
4336
  completedAt,
@@ -4249,7 +4381,6 @@ function createMinimalWorkerCtx(
4249
4381
  });
4250
4382
  };
4251
4383
 
4252
- let totalRowsWritten = 0;
4253
4384
  let chunkIndex = 0;
4254
4385
  let chunkStart = 0;
4255
4386
  for await (const chunkRows of iterDatasetChunks(inputRows, rowsPerChunk)) {
@@ -4263,7 +4394,7 @@ function createMinimalWorkerCtx(
4263
4394
  totalRowsDuplicateReused += chunkResult.rowsDuplicateReused;
4264
4395
  totalRowsInserted += chunkResult.rowsInserted;
4265
4396
  totalRowsSkipped += chunkResult.rowsSkipped;
4266
- updateMapProgress({
4397
+ await updateMapProgress({
4267
4398
  completed: totalRowsWritten,
4268
4399
  total: rowCountHint ?? undefined,
4269
4400
  message: formatMapProgressMessage(
@@ -5374,6 +5505,10 @@ async function executeRunRequest(
5374
5505
  ];
5375
5506
  let lastLedgerFlushAt = startedAt;
5376
5507
  let ledgerFlushInFlight: Promise<void> = Promise.resolve();
5508
+ let ledgerFlushQueueDepth = 0;
5509
+ let lastCoordinatorProgressPublishAt = 0;
5510
+ let coordinatorProgressPublishInFlight: Promise<void> = Promise.resolve();
5511
+ let coordinatorProgressPublishQueueDepth = 0;
5377
5512
 
5378
5513
  const appendRunLogLine = (line: string) => {
5379
5514
  const trimmed = redactSecretsFromLogString(line.trim());
@@ -5496,6 +5631,36 @@ async function executeRunRequest(
5496
5631
  });
5497
5632
  };
5498
5633
 
5634
+ const flushCoordinatorProgressEvent = (force: boolean): Promise<void> => {
5635
+ const now = nowMs();
5636
+ if (
5637
+ !force &&
5638
+ now - lastCoordinatorProgressPublishAt <
5639
+ MAP_EXECUTION_HEARTBEAT_INTERVAL_MS
5640
+ ) {
5641
+ return Promise.resolve();
5642
+ }
5643
+ if (!force && coordinatorProgressPublishQueueDepth > 0) {
5644
+ return Promise.resolve();
5645
+ }
5646
+ lastCoordinatorProgressPublishAt = now;
5647
+ coordinatorProgressPublishQueueDepth += 1;
5648
+ coordinatorProgressPublishInFlight = coordinatorProgressPublishInFlight
5649
+ .catch(() => undefined)
5650
+ .then(async () => {
5651
+ try {
5652
+ await publishCoordinatorProgressEvent(now);
5653
+ } finally {
5654
+ coordinatorProgressPublishQueueDepth = Math.max(
5655
+ 0,
5656
+ coordinatorProgressPublishQueueDepth - 1,
5657
+ );
5658
+ }
5659
+ })
5660
+ .catch(() => undefined);
5661
+ return force ? coordinatorProgressPublishInFlight : Promise.resolve();
5662
+ };
5663
+
5499
5664
  const appendStepLifecycleEvent = (event: PlayStepLifecycleEvent) => {
5500
5665
  updateStepProgress({
5501
5666
  nodeId: event.nodeId,
@@ -5595,15 +5760,19 @@ async function executeRunRequest(
5595
5760
  return events;
5596
5761
  };
5597
5762
 
5598
- const flushLedgerEvents = (force: boolean): void => {
5599
- if (!options?.persistResultDatasets) return;
5763
+ const flushLedgerEvents = (force: boolean): Promise<void> => {
5764
+ if (!options?.persistResultDatasets) return Promise.resolve();
5600
5765
  const now = nowMs();
5601
5766
  if (!force && now - lastLedgerFlushAt < RUN_LEDGER_FLUSH_INTERVAL_MS) {
5602
- return;
5767
+ return Promise.resolve();
5768
+ }
5769
+ if (!force && ledgerFlushQueueDepth > 0) {
5770
+ return Promise.resolve();
5603
5771
  }
5604
5772
  const events = drainPendingLedgerEvents(now);
5605
- if (events.length === 0) return;
5773
+ if (events.length === 0) return Promise.resolve();
5606
5774
  lastLedgerFlushAt = now;
5775
+ ledgerFlushQueueDepth += 1;
5607
5776
  ledgerFlushInFlight = ledgerFlushInFlight
5608
5777
  .catch(() => undefined)
5609
5778
  .then(async () => {
@@ -5616,10 +5785,12 @@ async function executeRunRequest(
5616
5785
  } catch {
5617
5786
  pendingLedgerEvents = [...events, ...pendingLedgerEvents];
5618
5787
  throw new Error('runtime run-ledger append failed');
5788
+ } finally {
5789
+ ledgerFlushQueueDepth = Math.max(0, ledgerFlushQueueDepth - 1);
5619
5790
  }
5620
- await publishCoordinatorProgressEvent(now).catch(() => undefined);
5621
5791
  })
5622
5792
  .catch(() => undefined);
5793
+ return force ? ledgerFlushInFlight : Promise.resolve();
5623
5794
  };
5624
5795
 
5625
5796
  const flushTerminalLedgerEvents = async (
@@ -5661,7 +5832,12 @@ async function executeRunRequest(
5661
5832
  const workerCallbacks: WorkerCtxCallbacks = {
5662
5833
  onNodeProgress: (input) => {
5663
5834
  updateStepProgress(input);
5664
- flushLedgerEvents(Boolean(input.forceFlush));
5835
+ const force = Boolean(input.forceFlush);
5836
+ const ledgerFlush = flushLedgerEvents(force);
5837
+ const progressFlush = flushCoordinatorProgressEvent(force);
5838
+ return force
5839
+ ? Promise.all([ledgerFlush, progressFlush]).then(() => undefined)
5840
+ : Promise.resolve();
5665
5841
  },
5666
5842
  onMapStarted: (nodeId, at) => stepLifecycle?.onMapStarted(nodeId, at),
5667
5843
  onMapCompleted: (nodeId, at) => stepLifecycle?.onMapCompleted(nodeId, at),