deepline 0.1.100 → 0.1.102

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -78,6 +78,7 @@ import {
78
78
  derivePlayRowIdentityFromKey,
79
79
  deriveToolRequestIdentity,
80
80
  } from '../../../shared_libs/plays/row-identity';
81
+ import { DEDUPE_DUPLICATE_KEY_SAMPLE_CAP } from '../../../shared_libs/play-runtime/map-row-identity';
81
82
  import {
82
83
  getTopLevelPipelineSubsteps,
83
84
  getCompiledPipelineSubsteps,
@@ -3667,24 +3668,52 @@ function createMinimalWorkerCtx(
3667
3668
  ? derivePlayRowIdentity(inputRow, name)
3668
3669
  : derivePlayRowIdentityFromKey(explicitKeyValue, name);
3669
3670
  };
3670
- const assertUniqueExplicitRowKeys = (
3671
- chunkRows: readonly Record<string, unknown>[],
3671
+ // Cross-chunk dedupe accumulators for the single end-of-map log line.
3672
+ // `dedupeExplicitMapKeyRows` (shared_libs/play-runtime/map-row-identity.ts)
3673
+ // can't be reused directly here because that helper is a single pass over a
3674
+ // fully materialized array, whereas the workers path streams rows in chunks
3675
+ // and must keep a persistent seen-set (`explicitRowKeysSeen`) across chunks.
3676
+ // We mirror its exact semantics (keep-first, order preserved, capped sample)
3677
+ // and share its sample cap constant so the two stay in lockstep.
3678
+ let totalDuplicateKeysDropped = 0;
3679
+ const droppedDuplicateKeySamples: string[] = [];
3680
+ const seenDroppedDuplicateKeys = new Set<string>();
3681
+ // Silent dedupe of duplicate explicit map keys: keep the first row per
3682
+ // canonical key and drop subsequent duplicates, preserving input order.
3683
+ // Deduping here — before rows reach `processChunk` — keeps every downstream
3684
+ // derivation (chunk counts, sheet writes, resumable keys, result mapping)
3685
+ // consistent: dropped rows never enter the chunk, so there is no index
3686
+ // mismatch. Replaces the prior throw-on-duplicate behavior per product
3687
+ // decision: duplicate keys must never fail the run.
3688
+ const dedupeExplicitRowKeys = (
3689
+ chunkRows: readonly T[],
3672
3690
  chunkStart: number,
3673
- ) => {
3674
- if (!explicitRowKeysSeen) return;
3691
+ ): T[] => {
3692
+ if (!explicitRowKeysSeen) return [...chunkRows];
3693
+ const kept: T[] = [];
3675
3694
  for (let localIndex = 0; localIndex < chunkRows.length; localIndex += 1) {
3676
3695
  const index = chunkStart + localIndex;
3677
- const keyValue = resolveExplicitKeyValue(chunkRows[localIndex]!, index);
3678
- if (keyValue == null) continue;
3679
- const previousIndex = explicitRowKeysSeen?.get(keyValue);
3680
- if (previousIndex !== undefined) {
3681
- throw new Error(
3682
- `ctx.dataset("${name}") key function produced duplicate value "${keyValue}" for rows ${previousIndex} and ${index}. ` +
3683
- 'Each row must produce a unique key. Combine columns (e.g. `${row.email}|${row.company}`) or pick a column that is unique per row.',
3684
- );
3696
+ const row = chunkRows[localIndex]!;
3697
+ const keyValue = resolveExplicitKeyValue(row, index);
3698
+ if (keyValue == null) {
3699
+ kept.push(row);
3700
+ continue;
3701
+ }
3702
+ if (explicitRowKeysSeen.has(keyValue)) {
3703
+ totalDuplicateKeysDropped += 1;
3704
+ if (
3705
+ !seenDroppedDuplicateKeys.has(keyValue) &&
3706
+ droppedDuplicateKeySamples.length < DEDUPE_DUPLICATE_KEY_SAMPLE_CAP
3707
+ ) {
3708
+ droppedDuplicateKeySamples.push(keyValue);
3709
+ }
3710
+ seenDroppedDuplicateKeys.add(keyValue);
3711
+ continue;
3685
3712
  }
3686
- explicitRowKeysSeen?.set(keyValue, index);
3713
+ explicitRowKeysSeen.set(keyValue, index);
3714
+ kept.push(row);
3687
3715
  }
3716
+ return kept;
3688
3717
  };
3689
3718
 
3690
3719
  let totalRowsWritten = 0;
@@ -4085,14 +4114,9 @@ function createMinimalWorkerCtx(
4085
4114
  completedExecutedRows += 1;
4086
4115
  reportChunkProgress(false);
4087
4116
  } catch (rowError) {
4088
- // Row failure isolation (the default): one row's
4089
- // tool/provider error is recorded on that row and its
4090
- // siblings continue. Abort/budget errors stay run-fatal,
4091
- // and `onRowError: 'fail'` opts the map into fail-fast.
4092
- if (
4093
- failFastRowErrors ||
4094
- isRowIsolationExemptError(rowError)
4095
- ) {
4117
+ // Abort/budget errors stay run-fatal and leave no partial
4118
+ // state: rethrow immediately without recording the row.
4119
+ if (isRowIsolationExemptError(rowError)) {
4096
4120
  throw rowError;
4097
4121
  }
4098
4122
  const message = formatWorkerRowFailureMessage(rowError);
@@ -4108,8 +4132,14 @@ function createMinimalWorkerCtx(
4108
4132
  Object.keys(cellMetaPatch).length > 0
4109
4133
  ? cellMetaPatch
4110
4134
  : undefined;
4111
- // Keep the partially-enriched row so completed sibling
4112
- // cells persist and replay free when the row re-executes.
4135
+ // Keep the partially-enriched row so its already-succeeded
4136
+ // sibling cells (e.g. a contact column that ran before the
4137
+ // failing column) persist as a recoverable `_status='failed'`
4138
+ // sheet row. This holds for BOTH the default isolation path
4139
+ // (row re-executes free on the next run) AND `onRowError:
4140
+ // 'fail'`: the chunk still persists every recorded row, so the
4141
+ // failed run advertises a working recovered export even when
4142
+ // every row fails (see the runMap-level fail-fast throw).
4113
4143
  failedRowEntries[myIndex] = {
4114
4144
  row: enriched as T & Record<string, unknown>,
4115
4145
  error: message,
@@ -4125,11 +4155,24 @@ function createMinimalWorkerCtx(
4125
4155
  message:
4126
4156
  `Row ${absoluteIndex} of ctx.dataset("${name}") failed` +
4127
4157
  `${activeField ? ` at column "${activeField}"` : ''}: ${message} ` +
4128
- '(row recorded as failed; sibling rows continue and the row re-executes on the next run)',
4158
+ (failFastRowErrors
4159
+ ? '(row persisted as failed; onRowError:"fail" fails the run after committing it)'
4160
+ : '(row recorded as failed; sibling rows continue and the row re-executes on the next run)'),
4129
4161
  ts: nowMs(),
4130
4162
  });
4131
4163
  }
4132
4164
  reportChunkProgress(false);
4165
+ // onRowError:'fail' fails the run — but NOT by throwing from
4166
+ // inside this per-row worker. A throw here would propagate out
4167
+ // of `processChunk`, which runs inside the durable chunk step's
4168
+ // retry wrapper, so CF would re-run (and re-fail) the whole
4169
+ // chunk up to its retry limit before the run finally fails.
4170
+ // Instead the row is recorded above (so its already-succeeded
4171
+ // sibling cells persist as recoverable), the chunk completes
4172
+ // normally (persisting all rows), and `runMap` fails the run
4173
+ // once after the chunk step returns — outside the retry
4174
+ // wrapper. This keeps both the recoverable export AND the
4175
+ // single clean run failure.
4133
4176
  }
4134
4177
  } finally {
4135
4178
  if (rowMarkedActive) {
@@ -4160,7 +4203,7 @@ function createMinimalWorkerCtx(
4160
4203
  executedIndex: number;
4161
4204
  } => entry !== null,
4162
4205
  );
4163
- const failedRowsToPersist = failedRowEntries
4206
+ const allFailedRowsToPersist = failedRowEntries
4164
4207
  .map((failure, executedIndex) =>
4165
4208
  failure
4166
4209
  ? {
@@ -4177,6 +4220,20 @@ function createMinimalWorkerCtx(
4177
4220
  executedIndex: number;
4178
4221
  } => entry !== null,
4179
4222
  );
4223
+ // Under the default isolation, every failed row persists as a
4224
+ // recoverable `_status='failed'` row (it re-executes free next run).
4225
+ // Under `onRowError: 'fail'` the run dies, so a failed row's partial
4226
+ // data is persisted ONLY as a last-resort recovery: when this chunk has
4227
+ // no other recoverable rows (no successful executed rows and no
4228
+ // already-completed rows). That keeps a partial fail-fast run's export
4229
+ // to the rows that fully committed before the failure, while an
4230
+ // all-rows-failed fail-fast run still exposes the persisted partial
4231
+ // cells instead of advertising an empty, unrecoverable dataset.
4232
+ const failedRowsToPersist =
4233
+ failFastRowErrors &&
4234
+ (rowsToPersist.length > 0 || prepared.completedRows.length > 0)
4235
+ ? []
4236
+ : allFailedRowsToPersist;
4180
4237
  if (rowsToPersist.length === 0 && failedRowsToPersist.length === 0) {
4181
4238
  return;
4182
4239
  }
@@ -4492,12 +4549,21 @@ function createMinimalWorkerCtx(
4492
4549
  });
4493
4550
  };
4494
4551
 
4552
+ const failFastRowErrors = opts?.onRowError === 'fail';
4495
4553
  let chunkIndex = 0;
4496
4554
  let chunkStart = 0;
4497
- for await (const chunkRows of iterDatasetChunks(inputRows, rowsPerChunk)) {
4555
+ for await (const rawChunkRows of iterDatasetChunks(inputRows, rowsPerChunk)) {
4498
4556
  assertNotAborted(abortSignal);
4499
- if (chunkRows.length === 0) continue;
4500
- assertUniqueExplicitRowKeys(chunkRows, chunkStart);
4557
+ if (rawChunkRows.length === 0) continue;
4558
+ // Drop duplicate explicit-key rows before anything downstream observes
4559
+ // them. `chunkStart` keeps advancing by the original (pre-dedupe) chunk
4560
+ // length so cross-chunk key indices and persisted input offsets stay
4561
+ // aligned to the original input stream.
4562
+ const chunkRows = dedupeExplicitRowKeys(rawChunkRows, chunkStart);
4563
+ if (chunkRows.length === 0) {
4564
+ chunkStart += rawChunkRows.length;
4565
+ continue;
4566
+ }
4501
4567
  const chunkResult = await runChunkStep(chunkRows, chunkStart, chunkIndex);
4502
4568
  totalRowsWritten += chunkResult.rowsWritten;
4503
4569
  totalRowsExecuted += chunkResult.rowsExecuted;
@@ -4541,14 +4607,56 @@ function createMinimalWorkerCtx(
4541
4607
  canCacheRows = false;
4542
4608
  }
4543
4609
  }
4544
- chunkStart += chunkRows.length;
4610
+ chunkStart += rawChunkRows.length;
4545
4611
  chunkIndex += 1;
4612
+ // onRowError:'fail' short-circuit: once a chunk reports a row failure,
4613
+ // skip the remaining chunks entirely. The failing chunk itself completed
4614
+ // normally (no chunk-step retry storm) and persisted its rows, but
4615
+ // executing later chunks would keep spending provider credits on a run
4616
+ // the caller asked to fail fast. The post-loop fail-fast throw below
4617
+ // reports what committed before the stop.
4618
+ if (failFastRowErrors && totalRowsFailed > 0) {
4619
+ break;
4620
+ }
4621
+ }
4622
+ if (totalDuplicateKeysDropped > 0) {
4623
+ const keySample = droppedDuplicateKeySamples.join(', ');
4624
+ emitEvent({
4625
+ type: 'log',
4626
+ level: 'info',
4627
+ message:
4628
+ `deduped ${totalDuplicateKeysDropped} duplicate dataset key(s) for ctx.dataset("${name}"); keeping first occurrence` +
4629
+ (keySample ? ` (e.g. ${keySample})` : ''),
4630
+ ts: nowMs(),
4631
+ });
4632
+ }
4633
+ if (failFastRowErrors && totalRowsFailed > 0 && totalRowsWritten > 0) {
4634
+ // onRowError:'fail', PARTIAL failure (some rows committed): fail the run
4635
+ // without finalizing the dataset. The committed rows already persisted
4636
+ // per chunk and are surfaced as a recovered dataset (the failed rows'
4637
+ // partial data was intentionally NOT persisted here — only the rows that
4638
+ // fully committed before the failure are recoverable). We reach this
4639
+ // AFTER the failing chunk completed normally (no per-row throw inside
4640
+ // the durable chunk step, so no chunk-step retry storm); later chunks
4641
+ // were skipped by the fail-fast short-circuit in the chunk loop.
4642
+ const firstError = totalRowFailureSamples[0]?.error ?? 'unknown error';
4643
+ throw new Error(
4644
+ `ctx.dataset("${name}") failed for ${totalRowsFailed} executed row(s) under onRowError:'fail'. ` +
4645
+ `First error: ${firstError} ` +
4646
+ `(${totalRowsWritten} row(s) committed before the failure are recoverable — ` +
4647
+ `export them, fix the cause, and re-run to resume)`,
4648
+ );
4546
4649
  }
4547
4650
  if (totalRowsFailed > 0 && totalRowsWritten === 0) {
4548
- // Every row failed: this is a systemic failure (provider outage, broken
4549
- // resolver, exhausted credits), not a partial one. Isolating it would
4550
- // silently complete the run with an empty dataset. Fail loudly — the
4551
- // failed rows are persisted with their errors and re-execute on re-run.
4651
+ // Every executed row failed under onRowError:'fail' (the run dies) OR
4652
+ // the default isolation (a systemic failure, not a partial one; isolating
4653
+ // it would silently complete the run with an empty dataset). Fail loudly,
4654
+ // but finalize first so the failed rows' persisted partial cells (the
4655
+ // always-succeeding sibling columns committed by each chunk's persist
4656
+ // step when no row otherwise succeeded) are summarized and registered as
4657
+ // a recovered dataset — the failed run then advertises a WORKING export
4658
+ // instead of a dead end (#15/#27). The run still fails (the throw below).
4659
+ finalize(totalRowsWritten);
4552
4660
  const firstError = totalRowFailureSamples[0]?.error ?? 'unknown error';
4553
4661
  throw new Error(
4554
4662
  `ctx.dataset("${name}") failed for all ${totalRowsFailed} executed rows. ` +
@@ -250,6 +250,29 @@ export class HttpClient {
250
250
  }
251
251
 
252
252
  if (!response.ok) {
253
+ // A 5xx that escaped a Worker can return Cloudflare's default HTML
254
+ // error page. Never surface raw HTML to CLI users: replace the
255
+ // message with a structured summary and store a short flag in
256
+ // details instead of the full HTML body. JSON-envelope handling
257
+ // below is unchanged.
258
+ const htmlError = detectHtmlErrorBody(
259
+ body,
260
+ response.headers.get('content-type'),
261
+ );
262
+ if (htmlError) {
263
+ throw new DeeplineError(
264
+ htmlError.message(response.status),
265
+ response.status,
266
+ 'API_ERROR',
267
+ {
268
+ htmlErrorPage: true,
269
+ ...(htmlError.title ? { title: htmlError.title } : {}),
270
+ ...(htmlError.workerThrewException
271
+ ? { workerThrewException: true }
272
+ : {}),
273
+ },
274
+ );
275
+ }
253
276
  const errorValue =
254
277
  typeof parsed === 'object' && parsed && 'error' in parsed
255
278
  ? (parsed as Record<string, unknown>).error
@@ -343,6 +366,24 @@ export class HttpClient {
343
366
  }
344
367
  if (!response.ok) {
345
368
  const body = await response.text();
369
+ const htmlError = detectHtmlErrorBody(
370
+ body,
371
+ response.headers.get('content-type'),
372
+ );
373
+ if (htmlError) {
374
+ throw new DeeplineError(
375
+ htmlError.message(response.status),
376
+ response.status,
377
+ 'API_ERROR',
378
+ {
379
+ htmlErrorPage: true,
380
+ ...(htmlError.title ? { title: htmlError.title } : {}),
381
+ ...(htmlError.workerThrewException
382
+ ? { workerThrewException: true }
383
+ : {}),
384
+ },
385
+ );
386
+ }
346
387
  const parsed = parseResponseBody(body);
347
388
  throw new DeeplineError(
348
389
  apiErrorMessage(parsed, response.status),
@@ -430,6 +471,54 @@ function parseResponseBody(body: string): unknown {
430
471
  }
431
472
  }
432
473
 
474
+ /**
475
+ * Detect a raw Cloudflare HTML error page in an error response body.
476
+ *
477
+ * A 5xx that escaped a Worker can return Cloudflare's default HTML error page
478
+ * ("Worker threw exception" / error code 1042). We must never surface raw HTML
479
+ * to CLI users — it lands verbatim in `DeeplineError.message`/`details`. When
480
+ * detected, we synthesize a structured message (status + extracted <title>) and
481
+ * preserve the literal token `Worker threw exception` when present, since SDK
482
+ * retry classifiers (`isTransientPlayStreamError`) regex-match on it.
483
+ */
484
+ function detectHtmlErrorBody(
485
+ body: string,
486
+ contentType?: string | null,
487
+ ): {
488
+ title?: string;
489
+ workerThrewException: boolean;
490
+ message: (status: number) => string;
491
+ } | null {
492
+ const trimmed = body.trim();
493
+ const lower = trimmed.toLowerCase();
494
+ const isHtml =
495
+ (contentType ?? '').toLowerCase().includes('text/html') ||
496
+ lower.startsWith('<!doctype') ||
497
+ lower.startsWith('<html');
498
+ if (!isHtml) {
499
+ return null;
500
+ }
501
+ const titleMatch = trimmed.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
502
+ const title = titleMatch?.[1]?.replace(/\s+/g, ' ').trim() || undefined;
503
+ const workerThrewException = /worker threw exception/i.test(trimmed);
504
+ return {
505
+ title,
506
+ workerThrewException,
507
+ message: (status: number): string => {
508
+ // Detail segments joined with ': ', then the suppression note appended
509
+ // in parens so it never reads as another colon-delimited segment.
510
+ const segments = [`HTTP ${status}`];
511
+ if (workerThrewException) {
512
+ segments.push('Worker threw exception');
513
+ }
514
+ if (title) {
515
+ segments.push(title);
516
+ }
517
+ return `${segments.join(': ')} (Cloudflare HTML error page suppressed)`;
518
+ },
519
+ };
520
+ }
521
+
433
522
  function apiErrorMessage(parsed: unknown, status: number): string {
434
523
  const errorValue =
435
524
  typeof parsed === 'object' && parsed && 'error' in parsed
@@ -53,10 +53,15 @@ export const SDK_RELEASE = {
53
53
  // 0.1.94 is claimed by PR #1527 — this watch-render fix ships as 0.1.95.
54
54
  // 0.1.98 ships the duplicate-browser-tab fix (default-browser detection).
55
55
  // 0.1.99 ships prebuilt job-change source-column preservation and validation fixes.
56
- version: '0.1.100',
56
+ // 0.1.101 ships retryable play artifact publish failures and CI retry hardening.
57
+ // 0.1.102 ships the job-change ledger fixes: recovered-dataset export on
58
+ // failed runs, persisted/succeeded/failed row counts, strict local CSV
59
+ // preflight (existence, data rows, quotes, duplicate headers), HTML error
60
+ // scrubbing, and word-boundary watch truncation.
61
+ version: '0.1.102',
57
62
  apiContract: '2026-06-dataset-column-cell-stale-hard-cutover',
58
63
  supportPolicy: {
59
- latest: '0.1.100',
64
+ latest: '0.1.102',
60
65
  minimumSupported: '0.1.53',
61
66
  deprecatedBelow: '0.1.53',
62
67
  },
@@ -38,7 +38,7 @@ export function isTransientPlayStreamError(error: unknown): boolean {
38
38
  return error.statusCode >= 500 && error.statusCode < 600;
39
39
  }
40
40
  const text = error instanceof Error ? error.message : String(error);
41
- return /auth validation backend timed out|fetch failed|eaddrnotavail|econnreset|etimedout|eai_again|socket hang up/i.test(
41
+ return /auth validation backend timed out|coordinator \/submit(?:\?[^ ]*)? 5\d\d|Worker threw exception|Internal Server Error|Service Unavailable|fetch failed|eaddrnotavail|econnreset|etimedout|eai_again|socket hang up/i.test(
42
42
  text,
43
43
  );
44
44
  }
@@ -0,0 +1,204 @@
1
+ import {
2
+ derivePlayRowIdentity,
3
+ derivePlayRowIdentityFromKey,
4
+ // Relative (not '@shared_libs/...') because this file ships inside the
5
+ // packed SDK's dist/repo source graph (reachable from the
6
+ // apps/play-runner-workers entry), where only relative imports resolve.
7
+ } from '../plays/row-identity';
8
+ import type { MapExecutionScope } from './ctx-types';
9
+
10
+ export type ExplicitMapKeyResolver = (
11
+ row: Record<string, unknown>,
12
+ index: number,
13
+ ) => string;
14
+
15
+ export type ExplicitMapKeyInput =
16
+ | string
17
+ | readonly string[]
18
+ | ((row: Record<string, unknown>, index: number) => unknown);
19
+
20
+ export function stripMapFieldOutputs(
21
+ row: Record<string, unknown>,
22
+ fieldNames: readonly string[],
23
+ ): Record<string, unknown> {
24
+ return Object.fromEntries(
25
+ Object.entries(row).filter(
26
+ ([fieldName]) => !fieldNames.includes(fieldName),
27
+ ),
28
+ );
29
+ }
30
+
31
+ export function createExplicitMapKeyResolver(input: {
32
+ mapNamespace: string;
33
+ fieldNames: readonly string[];
34
+ key: ExplicitMapKeyInput | null | undefined;
35
+ }): ExplicitMapKeyResolver | null {
36
+ if (!input.key) {
37
+ return null;
38
+ }
39
+ const key = input.key;
40
+ return (row, index) => {
41
+ const stableRow = stripMapFieldOutputs(row, input.fieldNames);
42
+ let raw: unknown;
43
+ if (typeof key === 'function') {
44
+ raw = key(stableRow, index);
45
+ } else if (typeof key === 'string') {
46
+ raw = stableRow[key];
47
+ } else {
48
+ raw = key.map((fieldName) => stableRow[fieldName]);
49
+ }
50
+ if (raw === null || raw === undefined) {
51
+ throw new Error(
52
+ `ctx.dataset("${input.mapNamespace}") key function returned ${raw === null ? 'null' : 'undefined'} for row ${index}. ` +
53
+ 'Use a non-empty stable input column (e.g. { key: "email" }) or return a non-empty string, number, or tuple.',
54
+ );
55
+ }
56
+ const asString = normalizeExplicitMapKey(raw);
57
+ if (!asString) {
58
+ throw new Error(
59
+ `ctx.dataset("${input.mapNamespace}") key function returned an empty value for row ${index}. ` +
60
+ 'Use non-empty stable input columns or return a non-empty string, number, or tuple.',
61
+ );
62
+ }
63
+ return asString;
64
+ };
65
+ }
66
+
67
+ function normalizeExplicitMapKey(value: unknown): string {
68
+ if (Array.isArray(value)) {
69
+ const parts = value.map((entry) => normalizeExplicitMapKeyPart(entry));
70
+ return parts.every(Boolean) ? JSON.stringify(parts) : '';
71
+ }
72
+ return normalizeExplicitMapKeyPart(value);
73
+ }
74
+
75
+ function normalizeExplicitMapKeyPart(value: unknown): string {
76
+ if (typeof value === 'number') {
77
+ return Number.isFinite(value) ? String(value) : '';
78
+ }
79
+ return String(value ?? '').trim();
80
+ }
81
+
82
+ /**
83
+ * Maximum number of distinct duplicate keys retained in dedupe metadata for
84
+ * observability. Keeps log lines bounded when many keys collide.
85
+ */
86
+ export const DEDUPE_DUPLICATE_KEY_SAMPLE_CAP = 5;
87
+
88
+ export interface DedupeExplicitMapKeyResult<TRow> {
89
+ /** Rows with duplicates removed; first occurrence per key kept, order preserved. */
90
+ rows: TRow[];
91
+ /** Count of rows dropped because an earlier row produced the same key. */
92
+ droppedCount: number;
93
+ /** Capped sample of the duplicate key values that were deduped. */
94
+ duplicateKeys: string[];
95
+ }
96
+
97
+ /**
98
+ * Resolver shape used by {@link dedupeExplicitMapKeyRows}. Receives each row in
99
+ * its original (pre-dedupe) position. Callers that hold output rows in a
100
+ * different form than `Record<string, unknown>` adapt to this signature with a
101
+ * thin wrapper.
102
+ */
103
+ export type DedupeKeyResolver<TRow> = (row: TRow, index: number) => string;
104
+
105
+ /**
106
+ * Silent dedupe for explicit `ctx.dataset(...)` map keys. Keeps the first row
107
+ * for each canonical key and drops subsequent duplicates, preserving original
108
+ * order. With no resolver this is a pure passthrough (a fresh array copy with
109
+ * empty metadata). Replaces the prior throw-on-duplicate behavior per product
110
+ * decision: duplicate keys should never fail the run.
111
+ */
112
+ export function dedupeExplicitMapKeyRows<TRow>(input: {
113
+ rows: readonly TRow[];
114
+ resolver: DedupeKeyResolver<TRow> | null;
115
+ }): DedupeExplicitMapKeyResult<TRow> {
116
+ if (!input.resolver) {
117
+ return { rows: [...input.rows], droppedCount: 0, duplicateKeys: [] };
118
+ }
119
+ const resolver = input.resolver;
120
+ const seenKeys = new Set<string>();
121
+ const duplicateKeys: string[] = [];
122
+ const seenDuplicateKeys = new Set<string>();
123
+ const rows: TRow[] = [];
124
+ for (let index = 0; index < input.rows.length; index += 1) {
125
+ const row = input.rows[index]!;
126
+ const keyValue = resolver(row, index);
127
+ if (seenKeys.has(keyValue)) {
128
+ if (
129
+ !seenDuplicateKeys.has(keyValue) &&
130
+ duplicateKeys.length < DEDUPE_DUPLICATE_KEY_SAMPLE_CAP
131
+ ) {
132
+ duplicateKeys.push(keyValue);
133
+ }
134
+ seenDuplicateKeys.add(keyValue);
135
+ continue;
136
+ }
137
+ seenKeys.add(keyValue);
138
+ rows.push(row);
139
+ }
140
+ return {
141
+ rows,
142
+ droppedCount: input.rows.length - rows.length,
143
+ duplicateKeys,
144
+ };
145
+ }
146
+
147
+ export function deriveMapRowIdentity(input: {
148
+ row: Record<string, unknown>;
149
+ index?: number;
150
+ artifactTableNamespace: string;
151
+ fieldNames?: readonly string[];
152
+ explicitKey?: ExplicitMapKeyResolver | null;
153
+ }): string {
154
+ const stableRow = stripMapFieldOutputs(input.row, input.fieldNames ?? []);
155
+ return input.explicitKey
156
+ ? derivePlayRowIdentityFromKey(
157
+ input.explicitKey(stableRow, input.index ?? 0),
158
+ input.artifactTableNamespace,
159
+ )
160
+ : derivePlayRowIdentity(stableRow, input.artifactTableNamespace);
161
+ }
162
+
163
+ export class MapRowIdentity {
164
+ private nextInvocationIndex: number;
165
+
166
+ constructor(startInvocationIndex = 0) {
167
+ this.nextInvocationIndex = startInvocationIndex;
168
+ }
169
+
170
+ get invocationIndex(): number {
171
+ return this.nextInvocationIndex;
172
+ }
173
+
174
+ set invocationIndex(value: number) {
175
+ this.nextInvocationIndex = value;
176
+ }
177
+
178
+ createScope(input: {
179
+ logicalNamespace: string;
180
+ artifactTableNamespace: string;
181
+ mapNodeId?: string | null;
182
+ fieldNames?: readonly string[];
183
+ explicitKey?: ExplicitMapKeyResolver | null;
184
+ }): MapExecutionScope {
185
+ const mapInvocationId = `${input.logicalNamespace}:${this.nextInvocationIndex}`;
186
+ this.nextInvocationIndex += 1;
187
+ const explicitKey = input.explicitKey ?? null;
188
+ const fieldNames = input.fieldNames ?? [];
189
+ return {
190
+ mapInvocationId,
191
+ mapNodeId: input.mapNodeId ?? null,
192
+ logicalNamespace: input.logicalNamespace,
193
+ artifactTableNamespace: input.artifactTableNamespace,
194
+ rowIdentity: (row, index) =>
195
+ deriveMapRowIdentity({
196
+ row,
197
+ index,
198
+ artifactTableNamespace: input.artifactTableNamespace,
199
+ fieldNames,
200
+ explicitKey,
201
+ }),
202
+ };
203
+ }
204
+ }
@@ -592,13 +592,18 @@ export function reducePlayRunLedgerEvent(
592
592
  };
593
593
 
594
594
  switch (event.type) {
595
- case 'run.created':
595
+ case 'run.created': {
596
+ const createdStatus = event.status ?? base.status;
596
597
  return withTiming({
597
598
  ...base,
598
599
  playName: event.playName ?? base.playName ?? null,
599
- status: event.status ?? base.status,
600
+ status: createdStatus,
600
601
  createdAt: base.createdAt ?? occurredAt,
602
+ startedAt:
603
+ base.startedAt ??
604
+ (createdStatus === 'running' ? occurredAt : base.startedAt),
601
605
  });
606
+ }
602
607
  case 'run.started':
603
608
  return withTiming({
604
609
  ...base,
@@ -50,6 +50,10 @@ export type PlayRunStreamNodeState = {
50
50
  export type PlayRunLiveSnapshot = {
51
51
  runId: string;
52
52
  status: PlayRunLiveStatus;
53
+ createdAt?: number | null;
54
+ startedAt?: number | null;
55
+ finishedAt?: number | null;
56
+ durationMs?: number | null;
53
57
  updatedAt: number | null;
54
58
  /**
55
59
  * Rotating log tail (the ledger snapshot's bounded `logTail`; the wire name
@@ -167,6 +171,10 @@ function buildSnapshotFromLedger(
167
171
  return {
168
172
  runId: snapshot.runId,
169
173
  status: normalizePlayRunLiveStatus(snapshot.status),
174
+ createdAt: snapshot.createdAt ?? null,
175
+ startedAt: snapshot.startedAt ?? null,
176
+ finishedAt: snapshot.finishedAt ?? null,
177
+ durationMs: snapshot.durationMs ?? null,
170
178
  updatedAt:
171
179
  snapshot.updatedAt ?? snapshot.finishedAt ?? snapshot.startedAt ?? null,
172
180
  logs: snapshot.logTail,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "deepline",
3
- "version": "0.1.100",
3
+ "version": "0.1.102",
4
4
  "description": "Deepline SDK + CLI — B2B data enrichment powered by durable cloud execution",
5
5
  "license": "MIT",
6
6
  "repository": {