deepline 0.1.131 → 0.1.133

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1466,6 +1466,9 @@ type WorkerToolBatchRequest = {
1466
1466
 
1467
1467
  const WORKER_TOOL_BATCH_GRACE_MS = 250;
1468
1468
  const MAP_EXECUTION_HEARTBEAT_INTERVAL_MS = 5_000;
1469
+ const MAP_INCREMENTAL_PERSIST_CHUNK_ROWS = 100;
1470
+ const MAP_INCREMENTAL_PERSIST_CHUNK_BYTES = 1 * 1024 * 1024;
1471
+ const MAP_INCREMENTAL_PERSIST_INTERVAL_MS = 100;
1469
1472
  /**
1470
1473
  * Bounded number of per-row failure samples carried in chunk summaries and the
1471
1474
  * map's terminal partial-failure log. Every failed row is persisted with its
@@ -3850,6 +3853,153 @@ function createMinimalWorkerCtx(
3850
3853
  reportSettledToolRequests,
3851
3854
  );
3852
3855
  const generatedOutputFields = new Set<string>();
3856
+ const persistedExecutedIndexes = new Set<number>();
3857
+ const persistedFailedIndexes = new Set<number>();
3858
+ let pendingPersistRows = 0;
3859
+ let pendingPersistBytes = 0;
3860
+ let scheduledPersistTimer: ReturnType<typeof setTimeout> | null = null;
3861
+ let persistFlushChain: Promise<void> = Promise.resolve();
3862
+ let persistFailure: unknown = null;
3863
+
3864
+ const clearScheduledPersistTimer = () => {
3865
+ if (scheduledPersistTimer) {
3866
+ clearTimeout(scheduledPersistTimer);
3867
+ scheduledPersistTimer = null;
3868
+ }
3869
+ };
3870
+
3871
+ const persistExecutedRows = async () => {
3872
+ const rowsToPersist = executedRows
3873
+ .map((row, executedIndex) =>
3874
+ row && !persistedExecutedIndexes.has(executedIndex)
3875
+ ? {
3876
+ row,
3877
+ executedIndex,
3878
+ }
3879
+ : null,
3880
+ )
3881
+ .filter(
3882
+ (
3883
+ entry,
3884
+ ): entry is {
3885
+ row: T & Record<string, unknown>;
3886
+ executedIndex: number;
3887
+ } => entry !== null,
3888
+ );
3889
+ const allFailedRowsToPersist = failedRowEntries
3890
+ .map((failure, executedIndex) =>
3891
+ failure && !persistedFailedIndexes.has(executedIndex)
3892
+ ? {
3893
+ failure,
3894
+ executedIndex,
3895
+ }
3896
+ : null,
3897
+ )
3898
+ .filter(
3899
+ (
3900
+ entry,
3901
+ ): entry is {
3902
+ failure: { row: T & Record<string, unknown>; error: string };
3903
+ executedIndex: number;
3904
+ } => entry !== null,
3905
+ );
3906
+ // Under the default isolation, every failed row persists as a
3907
+ // recoverable `_status='failed'` row (it re-executes free next run).
3908
+ // Under `onRowError: 'fail'` the run dies, so a failed row's partial
3909
+ // data is persisted ONLY as a last-resort recovery: when this chunk has
3910
+ // no other recoverable rows (no successful executed rows and no
3911
+ // already-completed rows). That keeps a partial fail-fast run's export
3912
+ // to the rows that fully committed before the failure, while an
3913
+ // all-rows-failed fail-fast run still exposes the persisted partial
3914
+ // cells instead of advertising an empty, unrecoverable dataset.
3915
+ const failedRowsToPersist =
3916
+ failFastRowErrors &&
3917
+ (rowsToPersist.length > 0 ||
3918
+ persistedExecutedIndexes.size > 0 ||
3919
+ prepared.completedRows.length > 0)
3920
+ ? []
3921
+ : allFailedRowsToPersist;
3922
+ if (rowsToPersist.length === 0 && failedRowsToPersist.length === 0) {
3923
+ return;
3924
+ }
3925
+ await persistCompletedMapRows({
3926
+ req,
3927
+ tableNamespace: name,
3928
+ outputFields,
3929
+ extraOutputFields: Array.from(generatedOutputFields),
3930
+ rows: [
3931
+ ...rowsToPersist.map(({ row, executedIndex }) => ({
3932
+ ...row,
3933
+ ...(executedCellMetaPatches[executedIndex]
3934
+ ? {
3935
+ __deeplineCellMetaPatch:
3936
+ executedCellMetaPatches[executedIndex],
3937
+ }
3938
+ : {}),
3939
+ __deeplineRowKey:
3940
+ uniqueRowsToExecuteEntries[executedIndex]!.rowKey,
3941
+ })),
3942
+ // Failed rows persist as recoverable `_status='failed'` sheet
3943
+ // rows: partial data + per-cell failure meta + the row error.
3944
+ ...failedRowsToPersist.map(({ failure, executedIndex }) => ({
3945
+ ...failure.row,
3946
+ ...(executedCellMetaPatches[executedIndex]
3947
+ ? {
3948
+ __deeplineCellMetaPatch:
3949
+ executedCellMetaPatches[executedIndex],
3950
+ }
3951
+ : {}),
3952
+ __deeplineRowKey:
3953
+ uniqueRowsToExecuteEntries[executedIndex]!.rowKey,
3954
+ __deeplineRowStatus: 'failed',
3955
+ __deeplineRowError: failure.error,
3956
+ })),
3957
+ ],
3958
+ });
3959
+ for (const { executedIndex } of rowsToPersist) {
3960
+ persistedExecutedIndexes.add(executedIndex);
3961
+ }
3962
+ for (const { executedIndex } of failedRowsToPersist) {
3963
+ persistedFailedIndexes.add(executedIndex);
3964
+ }
3965
+ };
3966
+
3967
+ const enqueuePersistExecutedRows = (): Promise<void> => {
3968
+ clearScheduledPersistTimer();
3969
+ pendingPersistRows = 0;
3970
+ pendingPersistBytes = 0;
3971
+ const task = persistFlushChain.then(async () => {
3972
+ if (persistFailure) throw persistFailure;
3973
+ await persistExecutedRows();
3974
+ });
3975
+ persistFlushChain = task.catch((error) => {
3976
+ persistFailure ??= error;
3977
+ });
3978
+ return task;
3979
+ };
3980
+
3981
+ const schedulePersistExecutedRows = () => {
3982
+ if (persistFailure) return;
3983
+ if (
3984
+ pendingPersistRows >= MAP_INCREMENTAL_PERSIST_CHUNK_ROWS ||
3985
+ pendingPersistBytes >= MAP_INCREMENTAL_PERSIST_CHUNK_BYTES
3986
+ ) {
3987
+ void enqueuePersistExecutedRows().catch(() => undefined);
3988
+ return;
3989
+ }
3990
+ if (scheduledPersistTimer) return;
3991
+ scheduledPersistTimer = setTimeout(() => {
3992
+ scheduledPersistTimer = null;
3993
+ void enqueuePersistExecutedRows().catch(() => undefined);
3994
+ }, MAP_INCREMENTAL_PERSIST_INTERVAL_MS);
3995
+ };
3996
+
3997
+ const notePersistableRow = (row: Record<string, unknown>) => {
3998
+ pendingPersistRows += 1;
3999
+ pendingPersistBytes += JSON.stringify(row).length;
4000
+ schedulePersistExecutedRows();
4001
+ };
4002
+
3853
4003
  let idx = 0;
3854
4004
  const workers: Array<Promise<void>> = [];
3855
4005
  for (let w = 0; w < concurrency; w += 1) {
@@ -4025,6 +4175,7 @@ function createMinimalWorkerCtx(
4025
4175
  executedRows[myIndex] = enriched as T &
4026
4176
  Record<string, unknown>;
4027
4177
  completedExecutedRows += 1;
4178
+ notePersistableRow(enriched);
4028
4179
  reportChunkProgress(false);
4029
4180
  } catch (rowError) {
4030
4181
  // Abort/budget errors stay run-fatal and leave no partial
@@ -4045,19 +4196,19 @@ function createMinimalWorkerCtx(
4045
4196
  Object.keys(cellMetaPatch).length > 0
4046
4197
  ? cellMetaPatch
4047
4198
  : undefined;
4048
- // Keep the partially-enriched row so its already-succeeded
4049
- // sibling cells (e.g. a contact column that ran before the
4050
- // failing column) persist as a recoverable `_status='failed'`
4051
- // sheet row. This holds for BOTH the default isolation path
4052
- // (row re-executes free on the next run) AND `onRowError:
4053
- // 'fail'`: the chunk still persists every recorded row, so the
4054
- // failed run advertises a working recovered export even when
4055
- // every row fails (see the runMap-level fail-fast throw).
4199
+ // Keep the partially-enriched row. Default isolation persists
4200
+ // it as `_status='failed'` so the row can re-execute free on
4201
+ // the next run. Fail-fast persists failed rows only after the
4202
+ // chunk settles and only when every row failed; otherwise only
4203
+ // fully committed successful rows are recoverable.
4056
4204
  failedRowEntries[myIndex] = {
4057
4205
  row: enriched as T & Record<string, unknown>,
4058
4206
  error: message,
4059
4207
  };
4060
4208
  failedExecutedRows += 1;
4209
+ if (!failFastRowErrors) {
4210
+ notePersistableRow(enriched);
4211
+ }
4061
4212
  // Bounded per-chunk samples: every failure is persisted on
4062
4213
  // its row, but only the first few get a log line so a wide
4063
4214
  // outage cannot flood the Run Log Stream.
@@ -4069,7 +4220,7 @@ function createMinimalWorkerCtx(
4069
4220
  `Row ${absoluteIndex} of ctx.dataset("${name}") failed` +
4070
4221
  `${activeField ? ` at column "${activeField}"` : ''}: ${message} ` +
4071
4222
  (failFastRowErrors
4072
- ? '(row persisted as failed; onRowError:"fail" fails the run after committing it)'
4223
+ ? '(row recorded as failed; onRowError:"fail" persists it only if every row fails)'
4073
4224
  : '(row recorded as failed; sibling rows continue and the row re-executes on the next run)'),
4074
4225
  ts: nowMs(),
4075
4226
  });
@@ -4098,93 +4249,6 @@ function createMinimalWorkerCtx(
4098
4249
  })(),
4099
4250
  );
4100
4251
  }
4101
- const persistExecutedRows = async () => {
4102
- const rowsToPersist = executedRows
4103
- .map((row, executedIndex) =>
4104
- row
4105
- ? {
4106
- row,
4107
- executedIndex,
4108
- }
4109
- : null,
4110
- )
4111
- .filter(
4112
- (
4113
- entry,
4114
- ): entry is {
4115
- row: T & Record<string, unknown>;
4116
- executedIndex: number;
4117
- } => entry !== null,
4118
- );
4119
- const allFailedRowsToPersist = failedRowEntries
4120
- .map((failure, executedIndex) =>
4121
- failure
4122
- ? {
4123
- failure,
4124
- executedIndex,
4125
- }
4126
- : null,
4127
- )
4128
- .filter(
4129
- (
4130
- entry,
4131
- ): entry is {
4132
- failure: { row: T & Record<string, unknown>; error: string };
4133
- executedIndex: number;
4134
- } => entry !== null,
4135
- );
4136
- // Under the default isolation, every failed row persists as a
4137
- // recoverable `_status='failed'` row (it re-executes free next run).
4138
- // Under `onRowError: 'fail'` the run dies, so a failed row's partial
4139
- // data is persisted ONLY as a last-resort recovery: when this chunk has
4140
- // no other recoverable rows (no successful executed rows and no
4141
- // already-completed rows). That keeps a partial fail-fast run's export
4142
- // to the rows that fully committed before the failure, while an
4143
- // all-rows-failed fail-fast run still exposes the persisted partial
4144
- // cells instead of advertising an empty, unrecoverable dataset.
4145
- const failedRowsToPersist =
4146
- failFastRowErrors &&
4147
- (rowsToPersist.length > 0 || prepared.completedRows.length > 0)
4148
- ? []
4149
- : allFailedRowsToPersist;
4150
- if (rowsToPersist.length === 0 && failedRowsToPersist.length === 0) {
4151
- return;
4152
- }
4153
- await persistCompletedMapRows({
4154
- req,
4155
- tableNamespace: name,
4156
- outputFields,
4157
- extraOutputFields: Array.from(generatedOutputFields),
4158
- rows: [
4159
- ...rowsToPersist.map(({ row, executedIndex }) => ({
4160
- ...row,
4161
- ...(executedCellMetaPatches[executedIndex]
4162
- ? {
4163
- __deeplineCellMetaPatch:
4164
- executedCellMetaPatches[executedIndex],
4165
- }
4166
- : {}),
4167
- __deeplineRowKey:
4168
- uniqueRowsToExecuteEntries[executedIndex]!.rowKey,
4169
- })),
4170
- // Failed rows persist as recoverable `_status='failed'` sheet
4171
- // rows: partial data + per-cell failure meta + the row error.
4172
- ...failedRowsToPersist.map(({ failure, executedIndex }) => ({
4173
- ...failure.row,
4174
- ...(executedCellMetaPatches[executedIndex]
4175
- ? {
4176
- __deeplineCellMetaPatch:
4177
- executedCellMetaPatches[executedIndex],
4178
- }
4179
- : {}),
4180
- __deeplineRowKey:
4181
- uniqueRowsToExecuteEntries[executedIndex]!.rowKey,
4182
- __deeplineRowStatus: 'failed',
4183
- __deeplineRowError: failure.error,
4184
- })),
4185
- ],
4186
- });
4187
- };
4188
4252
  const workersStartedAt = nowMs();
4189
4253
  // Track completion with a boolean flag rather than narrowing a
4190
4254
  // closure-assigned `| null` variable: TypeScript's control-flow analysis
@@ -4230,7 +4294,9 @@ function createMinimalWorkerCtx(
4230
4294
  },
4231
4295
  });
4232
4296
  try {
4233
- await persistExecutedRows();
4297
+ await enqueuePersistExecutedRows();
4298
+ await persistFlushChain;
4299
+ if (persistFailure) throw persistFailure;
4234
4300
  recordRunnerPerfTrace({
4235
4301
  req,
4236
4302
  phase: 'runner.map_chunk.persist_rows',
@@ -30,19 +30,29 @@ export async function executeChunkedRequests<TRequest, TResult>(input: {
30
30
  const results: Array<ChunkExecutionResult<TRequest, TResult>> = [];
31
31
  for (let start = 0; start < input.requests.length; start += input.batchSize) {
32
32
  const chunk = input.requests.slice(start, start + input.batchSize);
33
- const settled = await Promise.allSettled(
34
- chunk.map((request) => input.execute(request)),
33
+ let notifyChain: Promise<void> = Promise.resolve();
34
+ const notify = async (
35
+ entry: ChunkExecutionResult<TRequest, TResult>,
36
+ ): Promise<void> => {
37
+ results.push(entry);
38
+ notifyChain = notifyChain.then(
39
+ async () => await input.onChunkComplete?.([entry]),
40
+ );
41
+ await notifyChain;
42
+ };
43
+
44
+ await Promise.all(
45
+ chunk.map(async (request) => {
46
+ let entry: ChunkExecutionResult<TRequest, TResult>;
47
+ try {
48
+ entry = { request, result: await input.execute(request) };
49
+ } catch (error) {
50
+ entry = { request, result: null, error };
51
+ }
52
+ await notify(entry);
53
+ }),
35
54
  );
36
- for (let index = 0; index < chunk.length; index += 1) {
37
- const request = chunk[index]!;
38
- const outcome = settled[index]!;
39
- if (outcome.status === 'rejected') {
40
- results.push({ request, result: null, error: outcome.reason });
41
- continue;
42
- }
43
- results.push({ request, result: outcome.value });
44
- }
45
- await input.onChunkComplete?.(results.slice(results.length - chunk.length));
55
+ await notifyChain;
46
56
  }
47
57
  return results;
48
58
  }
@@ -101,10 +101,10 @@ export const SDK_RELEASE = {
101
101
  // 0.1.108 ships explicit dataset column/tool recompute policy and removes
102
102
  // the SDK enrich generator's one-second stale policy.
103
103
  // 0.1.110 ships authored V2 prebuilts and required top-level play descriptions.
104
- version: '0.1.131',
104
+ version: '0.1.133',
105
105
  apiContract: '2026-06-dataset-column-cell-stale-hard-cutover',
106
106
  supportPolicy: {
107
- latest: '0.1.131',
107
+ latest: '0.1.133',
108
108
  minimumSupported: '0.1.53',
109
109
  deprecatedBelow: '0.1.53',
110
110
  commandMinimumSupported: [
@@ -45,30 +45,37 @@ export async function executeChunkedRequests<TRequest, TResult>(input: {
45
45
 
46
46
  for (let start = 0; start < input.requests.length; start += input.batchSize) {
47
47
  const chunk = input.requests.slice(start, start + input.batchSize);
48
- const settled = await Promise.allSettled(
49
- chunk.map((request) => input.execute(request)),
50
- );
51
-
52
- for (let index = 0; index < chunk.length; index += 1) {
53
- const request = chunk[index]!;
54
- const outcome = settled[index]!;
55
- if (outcome.status === 'rejected') {
56
- input.onRequestError?.(request, outcome.reason);
57
- results.push({
58
- request,
59
- result: null,
60
- error: formatChunkExecutionError(outcome.reason),
61
- });
62
- continue;
63
- }
64
- results.push({
65
- request,
66
- result: outcome.value,
67
- });
68
- }
48
+ let notifyChain: Promise<void> = Promise.resolve();
49
+ const notify = async (
50
+ entry: ChunkExecutionResult<TRequest, TResult>,
51
+ ): Promise<void> => {
52
+ results.push(entry);
53
+ notifyChain = notifyChain.then(
54
+ async () => await input.onChunkComplete?.([entry]),
55
+ );
56
+ await notifyChain;
57
+ };
69
58
 
70
- const completedChunk = results.slice(results.length - chunk.length);
71
- await input.onChunkComplete?.(completedChunk);
59
+ await Promise.all(
60
+ chunk.map(async (request) => {
61
+ let entry: ChunkExecutionResult<TRequest, TResult>;
62
+ try {
63
+ entry = {
64
+ request,
65
+ result: await input.execute(request),
66
+ };
67
+ } catch (error) {
68
+ input.onRequestError?.(request, error);
69
+ entry = {
70
+ request,
71
+ result: null,
72
+ error: formatChunkExecutionError(error),
73
+ };
74
+ }
75
+ await notify(entry);
76
+ }),
77
+ );
78
+ await notifyChain;
72
79
  }
73
80
 
74
81
  return results;