deepline 0.1.100 → 0.1.102
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +434 -20
- package/dist/cli/index.mjs +437 -22
- package/dist/index.js +69 -3
- package/dist/index.mjs +69 -3
- package/dist/repo/apps/play-runner-workers/src/coordinator-entry.ts +209 -68
- package/dist/repo/apps/play-runner-workers/src/entry.ts +141 -33
- package/dist/repo/sdk/src/http.ts +89 -0
- package/dist/repo/sdk/src/release.ts +7 -2
- package/dist/repo/sdk/src/stream-reconnect.ts +1 -1
- package/dist/repo/shared_libs/play-runtime/map-row-identity.ts +204 -0
- package/dist/repo/shared_libs/play-runtime/run-ledger.ts +7 -2
- package/dist/repo/shared_libs/play-runtime/run-snapshot-stream.ts +8 -0
- package/package.json +1 -1
|
@@ -78,6 +78,7 @@ import {
|
|
|
78
78
|
derivePlayRowIdentityFromKey,
|
|
79
79
|
deriveToolRequestIdentity,
|
|
80
80
|
} from '../../../shared_libs/plays/row-identity';
|
|
81
|
+
import { DEDUPE_DUPLICATE_KEY_SAMPLE_CAP } from '../../../shared_libs/play-runtime/map-row-identity';
|
|
81
82
|
import {
|
|
82
83
|
getTopLevelPipelineSubsteps,
|
|
83
84
|
getCompiledPipelineSubsteps,
|
|
@@ -3667,24 +3668,52 @@ function createMinimalWorkerCtx(
|
|
|
3667
3668
|
? derivePlayRowIdentity(inputRow, name)
|
|
3668
3669
|
: derivePlayRowIdentityFromKey(explicitKeyValue, name);
|
|
3669
3670
|
};
|
|
3670
|
-
|
|
3671
|
-
|
|
3671
|
+
// Cross-chunk dedupe accumulators for the single end-of-map log line.
|
|
3672
|
+
// `dedupeExplicitMapKeyRows` (shared_libs/play-runtime/map-row-identity.ts)
|
|
3673
|
+
// can't be reused directly here because that helper is a single pass over a
|
|
3674
|
+
// fully materialized array, whereas the workers path streams rows in chunks
|
|
3675
|
+
// and must keep a persistent seen-set (`explicitRowKeysSeen`) across chunks.
|
|
3676
|
+
// We mirror its exact semantics (keep-first, order preserved, capped sample)
|
|
3677
|
+
// and share its sample cap constant so the two stay in lockstep.
|
|
3678
|
+
let totalDuplicateKeysDropped = 0;
|
|
3679
|
+
const droppedDuplicateKeySamples: string[] = [];
|
|
3680
|
+
const seenDroppedDuplicateKeys = new Set<string>();
|
|
3681
|
+
// Silent dedupe of duplicate explicit map keys: keep the first row per
|
|
3682
|
+
// canonical key and drop subsequent duplicates, preserving input order.
|
|
3683
|
+
// Deduping here — before rows reach `processChunk` — keeps every downstream
|
|
3684
|
+
// derivation (chunk counts, sheet writes, resumable keys, result mapping)
|
|
3685
|
+
// consistent: dropped rows never enter the chunk, so there is no index
|
|
3686
|
+
// mismatch. Replaces the prior throw-on-duplicate behavior per product
|
|
3687
|
+
// decision: duplicate keys must never fail the run.
|
|
3688
|
+
const dedupeExplicitRowKeys = (
|
|
3689
|
+
chunkRows: readonly T[],
|
|
3672
3690
|
chunkStart: number,
|
|
3673
|
-
) => {
|
|
3674
|
-
if (!explicitRowKeysSeen) return;
|
|
3691
|
+
): T[] => {
|
|
3692
|
+
if (!explicitRowKeysSeen) return [...chunkRows];
|
|
3693
|
+
const kept: T[] = [];
|
|
3675
3694
|
for (let localIndex = 0; localIndex < chunkRows.length; localIndex += 1) {
|
|
3676
3695
|
const index = chunkStart + localIndex;
|
|
3677
|
-
const
|
|
3678
|
-
|
|
3679
|
-
|
|
3680
|
-
|
|
3681
|
-
|
|
3682
|
-
|
|
3683
|
-
|
|
3684
|
-
|
|
3696
|
+
const row = chunkRows[localIndex]!;
|
|
3697
|
+
const keyValue = resolveExplicitKeyValue(row, index);
|
|
3698
|
+
if (keyValue == null) {
|
|
3699
|
+
kept.push(row);
|
|
3700
|
+
continue;
|
|
3701
|
+
}
|
|
3702
|
+
if (explicitRowKeysSeen.has(keyValue)) {
|
|
3703
|
+
totalDuplicateKeysDropped += 1;
|
|
3704
|
+
if (
|
|
3705
|
+
!seenDroppedDuplicateKeys.has(keyValue) &&
|
|
3706
|
+
droppedDuplicateKeySamples.length < DEDUPE_DUPLICATE_KEY_SAMPLE_CAP
|
|
3707
|
+
) {
|
|
3708
|
+
droppedDuplicateKeySamples.push(keyValue);
|
|
3709
|
+
}
|
|
3710
|
+
seenDroppedDuplicateKeys.add(keyValue);
|
|
3711
|
+
continue;
|
|
3685
3712
|
}
|
|
3686
|
-
explicitRowKeysSeen
|
|
3713
|
+
explicitRowKeysSeen.set(keyValue, index);
|
|
3714
|
+
kept.push(row);
|
|
3687
3715
|
}
|
|
3716
|
+
return kept;
|
|
3688
3717
|
};
|
|
3689
3718
|
|
|
3690
3719
|
let totalRowsWritten = 0;
|
|
@@ -4085,14 +4114,9 @@ function createMinimalWorkerCtx(
|
|
|
4085
4114
|
completedExecutedRows += 1;
|
|
4086
4115
|
reportChunkProgress(false);
|
|
4087
4116
|
} catch (rowError) {
|
|
4088
|
-
//
|
|
4089
|
-
//
|
|
4090
|
-
|
|
4091
|
-
// and `onRowError: 'fail'` opts the map into fail-fast.
|
|
4092
|
-
if (
|
|
4093
|
-
failFastRowErrors ||
|
|
4094
|
-
isRowIsolationExemptError(rowError)
|
|
4095
|
-
) {
|
|
4117
|
+
// Abort/budget errors stay run-fatal and leave no partial
|
|
4118
|
+
// state: rethrow immediately without recording the row.
|
|
4119
|
+
if (isRowIsolationExemptError(rowError)) {
|
|
4096
4120
|
throw rowError;
|
|
4097
4121
|
}
|
|
4098
4122
|
const message = formatWorkerRowFailureMessage(rowError);
|
|
@@ -4108,8 +4132,14 @@ function createMinimalWorkerCtx(
|
|
|
4108
4132
|
Object.keys(cellMetaPatch).length > 0
|
|
4109
4133
|
? cellMetaPatch
|
|
4110
4134
|
: undefined;
|
|
4111
|
-
// Keep the partially-enriched row so
|
|
4112
|
-
// cells
|
|
4135
|
+
// Keep the partially-enriched row so its already-succeeded
|
|
4136
|
+
// sibling cells (e.g. a contact column that ran before the
|
|
4137
|
+
// failing column) persist as a recoverable `_status='failed'`
|
|
4138
|
+
// sheet row. This holds for BOTH the default isolation path
|
|
4139
|
+
// (row re-executes free on the next run) AND `onRowError:
|
|
4140
|
+
// 'fail'`: the chunk still persists every recorded row, so the
|
|
4141
|
+
// failed run advertises a working recovered export even when
|
|
4142
|
+
// every row fails (see the runMap-level fail-fast throw).
|
|
4113
4143
|
failedRowEntries[myIndex] = {
|
|
4114
4144
|
row: enriched as T & Record<string, unknown>,
|
|
4115
4145
|
error: message,
|
|
@@ -4125,11 +4155,24 @@ function createMinimalWorkerCtx(
|
|
|
4125
4155
|
message:
|
|
4126
4156
|
`Row ${absoluteIndex} of ctx.dataset("${name}") failed` +
|
|
4127
4157
|
`${activeField ? ` at column "${activeField}"` : ''}: ${message} ` +
|
|
4128
|
-
|
|
4158
|
+
(failFastRowErrors
|
|
4159
|
+
? '(row persisted as failed; onRowError:"fail" fails the run after committing it)'
|
|
4160
|
+
: '(row recorded as failed; sibling rows continue and the row re-executes on the next run)'),
|
|
4129
4161
|
ts: nowMs(),
|
|
4130
4162
|
});
|
|
4131
4163
|
}
|
|
4132
4164
|
reportChunkProgress(false);
|
|
4165
|
+
// onRowError:'fail' fails the run — but NOT by throwing from
|
|
4166
|
+
// inside this per-row worker. A throw here would propagate out
|
|
4167
|
+
// of `processChunk`, which runs inside the durable chunk step's
|
|
4168
|
+
// retry wrapper, so CF would re-run (and re-fail) the whole
|
|
4169
|
+
// chunk up to its retry limit before the run finally fails.
|
|
4170
|
+
// Instead the row is recorded above (so its already-succeeded
|
|
4171
|
+
// sibling cells persist as recoverable), the chunk completes
|
|
4172
|
+
// normally (persisting all rows), and `runMap` fails the run
|
|
4173
|
+
// once after the chunk step returns — outside the retry
|
|
4174
|
+
// wrapper. This keeps both the recoverable export AND the
|
|
4175
|
+
// single clean run failure.
|
|
4133
4176
|
}
|
|
4134
4177
|
} finally {
|
|
4135
4178
|
if (rowMarkedActive) {
|
|
@@ -4160,7 +4203,7 @@ function createMinimalWorkerCtx(
|
|
|
4160
4203
|
executedIndex: number;
|
|
4161
4204
|
} => entry !== null,
|
|
4162
4205
|
);
|
|
4163
|
-
const
|
|
4206
|
+
const allFailedRowsToPersist = failedRowEntries
|
|
4164
4207
|
.map((failure, executedIndex) =>
|
|
4165
4208
|
failure
|
|
4166
4209
|
? {
|
|
@@ -4177,6 +4220,20 @@ function createMinimalWorkerCtx(
|
|
|
4177
4220
|
executedIndex: number;
|
|
4178
4221
|
} => entry !== null,
|
|
4179
4222
|
);
|
|
4223
|
+
// Under the default isolation, every failed row persists as a
|
|
4224
|
+
// recoverable `_status='failed'` row (it re-executes free next run).
|
|
4225
|
+
// Under `onRowError: 'fail'` the run dies, so a failed row's partial
|
|
4226
|
+
// data is persisted ONLY as a last-resort recovery: when this chunk has
|
|
4227
|
+
// no other recoverable rows (no successful executed rows and no
|
|
4228
|
+
// already-completed rows). That keeps a partial fail-fast run's export
|
|
4229
|
+
// to the rows that fully committed before the failure, while an
|
|
4230
|
+
// all-rows-failed fail-fast run still exposes the persisted partial
|
|
4231
|
+
// cells instead of advertising an empty, unrecoverable dataset.
|
|
4232
|
+
const failedRowsToPersist =
|
|
4233
|
+
failFastRowErrors &&
|
|
4234
|
+
(rowsToPersist.length > 0 || prepared.completedRows.length > 0)
|
|
4235
|
+
? []
|
|
4236
|
+
: allFailedRowsToPersist;
|
|
4180
4237
|
if (rowsToPersist.length === 0 && failedRowsToPersist.length === 0) {
|
|
4181
4238
|
return;
|
|
4182
4239
|
}
|
|
@@ -4492,12 +4549,21 @@ function createMinimalWorkerCtx(
|
|
|
4492
4549
|
});
|
|
4493
4550
|
};
|
|
4494
4551
|
|
|
4552
|
+
const failFastRowErrors = opts?.onRowError === 'fail';
|
|
4495
4553
|
let chunkIndex = 0;
|
|
4496
4554
|
let chunkStart = 0;
|
|
4497
|
-
for await (const
|
|
4555
|
+
for await (const rawChunkRows of iterDatasetChunks(inputRows, rowsPerChunk)) {
|
|
4498
4556
|
assertNotAborted(abortSignal);
|
|
4499
|
-
if (
|
|
4500
|
-
|
|
4557
|
+
if (rawChunkRows.length === 0) continue;
|
|
4558
|
+
// Drop duplicate explicit-key rows before anything downstream observes
|
|
4559
|
+
// them. `chunkStart` keeps advancing by the original (pre-dedupe) chunk
|
|
4560
|
+
// length so cross-chunk key indices and persisted input offsets stay
|
|
4561
|
+
// aligned to the original input stream.
|
|
4562
|
+
const chunkRows = dedupeExplicitRowKeys(rawChunkRows, chunkStart);
|
|
4563
|
+
if (chunkRows.length === 0) {
|
|
4564
|
+
chunkStart += rawChunkRows.length;
|
|
4565
|
+
continue;
|
|
4566
|
+
}
|
|
4501
4567
|
const chunkResult = await runChunkStep(chunkRows, chunkStart, chunkIndex);
|
|
4502
4568
|
totalRowsWritten += chunkResult.rowsWritten;
|
|
4503
4569
|
totalRowsExecuted += chunkResult.rowsExecuted;
|
|
@@ -4541,14 +4607,56 @@ function createMinimalWorkerCtx(
|
|
|
4541
4607
|
canCacheRows = false;
|
|
4542
4608
|
}
|
|
4543
4609
|
}
|
|
4544
|
-
chunkStart +=
|
|
4610
|
+
chunkStart += rawChunkRows.length;
|
|
4545
4611
|
chunkIndex += 1;
|
|
4612
|
+
// onRowError:'fail' short-circuit: once a chunk reports a row failure,
|
|
4613
|
+
// skip the remaining chunks entirely. The failing chunk itself completed
|
|
4614
|
+
// normally (no chunk-step retry storm) and persisted its rows, but
|
|
4615
|
+
// executing later chunks would keep spending provider credits on a run
|
|
4616
|
+
// the caller asked to fail fast. The post-loop fail-fast throw below
|
|
4617
|
+
// reports what committed before the stop.
|
|
4618
|
+
if (failFastRowErrors && totalRowsFailed > 0) {
|
|
4619
|
+
break;
|
|
4620
|
+
}
|
|
4621
|
+
}
|
|
4622
|
+
if (totalDuplicateKeysDropped > 0) {
|
|
4623
|
+
const keySample = droppedDuplicateKeySamples.join(', ');
|
|
4624
|
+
emitEvent({
|
|
4625
|
+
type: 'log',
|
|
4626
|
+
level: 'info',
|
|
4627
|
+
message:
|
|
4628
|
+
`deduped ${totalDuplicateKeysDropped} duplicate dataset key(s) for ctx.dataset("${name}"); keeping first occurrence` +
|
|
4629
|
+
(keySample ? ` (e.g. ${keySample})` : ''),
|
|
4630
|
+
ts: nowMs(),
|
|
4631
|
+
});
|
|
4632
|
+
}
|
|
4633
|
+
if (failFastRowErrors && totalRowsFailed > 0 && totalRowsWritten > 0) {
|
|
4634
|
+
// onRowError:'fail', PARTIAL failure (some rows committed): fail the run
|
|
4635
|
+
// without finalizing the dataset. The committed rows already persisted
|
|
4636
|
+
// per chunk and are surfaced as a recovered dataset (the failed rows'
|
|
4637
|
+
// partial data was intentionally NOT persisted here — only the rows that
|
|
4638
|
+
// fully committed before the failure are recoverable). We reach this
|
|
4639
|
+
// AFTER the failing chunk completed normally (no per-row throw inside
|
|
4640
|
+
// the durable chunk step, so no chunk-step retry storm); later chunks
|
|
4641
|
+
// were skipped by the fail-fast short-circuit in the chunk loop.
|
|
4642
|
+
const firstError = totalRowFailureSamples[0]?.error ?? 'unknown error';
|
|
4643
|
+
throw new Error(
|
|
4644
|
+
`ctx.dataset("${name}") failed for ${totalRowsFailed} executed row(s) under onRowError:'fail'. ` +
|
|
4645
|
+
`First error: ${firstError} ` +
|
|
4646
|
+
`(${totalRowsWritten} row(s) committed before the failure are recoverable — ` +
|
|
4647
|
+
`export them, fix the cause, and re-run to resume)`,
|
|
4648
|
+
);
|
|
4546
4649
|
}
|
|
4547
4650
|
if (totalRowsFailed > 0 && totalRowsWritten === 0) {
|
|
4548
|
-
// Every row failed
|
|
4549
|
-
//
|
|
4550
|
-
// silently complete the run with an empty dataset. Fail loudly
|
|
4551
|
-
//
|
|
4651
|
+
// Every executed row failed — under onRowError:'fail' (the run dies) OR
|
|
4652
|
+
// the default isolation (a systemic failure, not a partial one; isolating
|
|
4653
|
+
// it would silently complete the run with an empty dataset). Fail loudly,
|
|
4654
|
+
// but finalize first so the failed rows' persisted partial cells (the
|
|
4655
|
+
// always-succeeding sibling columns committed by each chunk's persist
|
|
4656
|
+
// step when no row otherwise succeeded) are summarized and registered as
|
|
4657
|
+
// a recovered dataset — the failed run then advertises a WORKING export
|
|
4658
|
+
// instead of a dead end (#15/#27). The run still fails (the throw below).
|
|
4659
|
+
finalize(totalRowsWritten);
|
|
4552
4660
|
const firstError = totalRowFailureSamples[0]?.error ?? 'unknown error';
|
|
4553
4661
|
throw new Error(
|
|
4554
4662
|
`ctx.dataset("${name}") failed for all ${totalRowsFailed} executed rows. ` +
|
|
@@ -250,6 +250,29 @@ export class HttpClient {
|
|
|
250
250
|
}
|
|
251
251
|
|
|
252
252
|
if (!response.ok) {
|
|
253
|
+
// A 5xx that escaped a Worker can return Cloudflare's default HTML
|
|
254
|
+
// error page. Never surface raw HTML to CLI users: replace the
|
|
255
|
+
// message with a structured summary and store a short flag in
|
|
256
|
+
// details instead of the full HTML body. JSON-envelope handling
|
|
257
|
+
// below is unchanged.
|
|
258
|
+
const htmlError = detectHtmlErrorBody(
|
|
259
|
+
body,
|
|
260
|
+
response.headers.get('content-type'),
|
|
261
|
+
);
|
|
262
|
+
if (htmlError) {
|
|
263
|
+
throw new DeeplineError(
|
|
264
|
+
htmlError.message(response.status),
|
|
265
|
+
response.status,
|
|
266
|
+
'API_ERROR',
|
|
267
|
+
{
|
|
268
|
+
htmlErrorPage: true,
|
|
269
|
+
...(htmlError.title ? { title: htmlError.title } : {}),
|
|
270
|
+
...(htmlError.workerThrewException
|
|
271
|
+
? { workerThrewException: true }
|
|
272
|
+
: {}),
|
|
273
|
+
},
|
|
274
|
+
);
|
|
275
|
+
}
|
|
253
276
|
const errorValue =
|
|
254
277
|
typeof parsed === 'object' && parsed && 'error' in parsed
|
|
255
278
|
? (parsed as Record<string, unknown>).error
|
|
@@ -343,6 +366,24 @@ export class HttpClient {
|
|
|
343
366
|
}
|
|
344
367
|
if (!response.ok) {
|
|
345
368
|
const body = await response.text();
|
|
369
|
+
const htmlError = detectHtmlErrorBody(
|
|
370
|
+
body,
|
|
371
|
+
response.headers.get('content-type'),
|
|
372
|
+
);
|
|
373
|
+
if (htmlError) {
|
|
374
|
+
throw new DeeplineError(
|
|
375
|
+
htmlError.message(response.status),
|
|
376
|
+
response.status,
|
|
377
|
+
'API_ERROR',
|
|
378
|
+
{
|
|
379
|
+
htmlErrorPage: true,
|
|
380
|
+
...(htmlError.title ? { title: htmlError.title } : {}),
|
|
381
|
+
...(htmlError.workerThrewException
|
|
382
|
+
? { workerThrewException: true }
|
|
383
|
+
: {}),
|
|
384
|
+
},
|
|
385
|
+
);
|
|
386
|
+
}
|
|
346
387
|
const parsed = parseResponseBody(body);
|
|
347
388
|
throw new DeeplineError(
|
|
348
389
|
apiErrorMessage(parsed, response.status),
|
|
@@ -430,6 +471,54 @@ function parseResponseBody(body: string): unknown {
|
|
|
430
471
|
}
|
|
431
472
|
}
|
|
432
473
|
|
|
474
|
+
/**
|
|
475
|
+
* Detect a raw Cloudflare HTML error page in an error response body.
|
|
476
|
+
*
|
|
477
|
+
* A 5xx that escaped a Worker can return Cloudflare's default HTML error page
|
|
478
|
+
* ("Worker threw exception" / error code 1042). We must never surface raw HTML
|
|
479
|
+
* to CLI users — it lands verbatim in `DeeplineError.message`/`details`. When
|
|
480
|
+
* detected, we synthesize a structured message (status + extracted <title>) and
|
|
481
|
+
* preserve the literal token `Worker threw exception` when present, since SDK
|
|
482
|
+
* retry classifiers (`isTransientPlayStreamError`) regex-match on it.
|
|
483
|
+
*/
|
|
484
|
+
function detectHtmlErrorBody(
|
|
485
|
+
body: string,
|
|
486
|
+
contentType?: string | null,
|
|
487
|
+
): {
|
|
488
|
+
title?: string;
|
|
489
|
+
workerThrewException: boolean;
|
|
490
|
+
message: (status: number) => string;
|
|
491
|
+
} | null {
|
|
492
|
+
const trimmed = body.trim();
|
|
493
|
+
const lower = trimmed.toLowerCase();
|
|
494
|
+
const isHtml =
|
|
495
|
+
(contentType ?? '').toLowerCase().includes('text/html') ||
|
|
496
|
+
lower.startsWith('<!doctype') ||
|
|
497
|
+
lower.startsWith('<html');
|
|
498
|
+
if (!isHtml) {
|
|
499
|
+
return null;
|
|
500
|
+
}
|
|
501
|
+
const titleMatch = trimmed.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
|
|
502
|
+
const title = titleMatch?.[1]?.replace(/\s+/g, ' ').trim() || undefined;
|
|
503
|
+
const workerThrewException = /worker threw exception/i.test(trimmed);
|
|
504
|
+
return {
|
|
505
|
+
title,
|
|
506
|
+
workerThrewException,
|
|
507
|
+
message: (status: number): string => {
|
|
508
|
+
// Detail segments joined with ': ', then the suppression note appended
|
|
509
|
+
// in parens so it never reads as another colon-delimited segment.
|
|
510
|
+
const segments = [`HTTP ${status}`];
|
|
511
|
+
if (workerThrewException) {
|
|
512
|
+
segments.push('Worker threw exception');
|
|
513
|
+
}
|
|
514
|
+
if (title) {
|
|
515
|
+
segments.push(title);
|
|
516
|
+
}
|
|
517
|
+
return `${segments.join(': ')} (Cloudflare HTML error page suppressed)`;
|
|
518
|
+
},
|
|
519
|
+
};
|
|
520
|
+
}
|
|
521
|
+
|
|
433
522
|
function apiErrorMessage(parsed: unknown, status: number): string {
|
|
434
523
|
const errorValue =
|
|
435
524
|
typeof parsed === 'object' && parsed && 'error' in parsed
|
|
@@ -53,10 +53,15 @@ export const SDK_RELEASE = {
|
|
|
53
53
|
// 0.1.94 is claimed by PR #1527 — this watch-render fix ships as 0.1.95.
|
|
54
54
|
// 0.1.98 ships the duplicate-browser-tab fix (default-browser detection).
|
|
55
55
|
// 0.1.99 ships prebuilt job-change source-column preservation and validation fixes.
|
|
56
|
-
|
|
56
|
+
// 0.1.101 ships retryable play artifact publish failures and CI retry hardening.
|
|
57
|
+
// 0.1.102 ships the job-change ledger fixes: recovered-dataset export on
|
|
58
|
+
// failed runs, persisted/succeeded/failed row counts, strict local CSV
|
|
59
|
+
// preflight (existence, data rows, quotes, duplicate headers), HTML error
|
|
60
|
+
// scrubbing, and word-boundary watch truncation.
|
|
61
|
+
version: '0.1.102',
|
|
57
62
|
apiContract: '2026-06-dataset-column-cell-stale-hard-cutover',
|
|
58
63
|
supportPolicy: {
|
|
59
|
-
latest: '0.1.
|
|
64
|
+
latest: '0.1.102',
|
|
60
65
|
minimumSupported: '0.1.53',
|
|
61
66
|
deprecatedBelow: '0.1.53',
|
|
62
67
|
},
|
|
@@ -38,7 +38,7 @@ export function isTransientPlayStreamError(error: unknown): boolean {
|
|
|
38
38
|
return error.statusCode >= 500 && error.statusCode < 600;
|
|
39
39
|
}
|
|
40
40
|
const text = error instanceof Error ? error.message : String(error);
|
|
41
|
-
return /auth validation backend timed out|fetch failed|eaddrnotavail|econnreset|etimedout|eai_again|socket hang up/i.test(
|
|
41
|
+
return /auth validation backend timed out|coordinator \/submit(?:\?[^ ]*)? 5\d\d|Worker threw exception|Internal Server Error|Service Unavailable|fetch failed|eaddrnotavail|econnreset|etimedout|eai_again|socket hang up/i.test(
|
|
42
42
|
text,
|
|
43
43
|
);
|
|
44
44
|
}
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
import {
|
|
2
|
+
derivePlayRowIdentity,
|
|
3
|
+
derivePlayRowIdentityFromKey,
|
|
4
|
+
// Relative (not '@shared_libs/...') because this file ships inside the
|
|
5
|
+
// packed SDK's dist/repo source graph (reachable from the
|
|
6
|
+
// apps/play-runner-workers entry), where only relative imports resolve.
|
|
7
|
+
} from '../plays/row-identity';
|
|
8
|
+
import type { MapExecutionScope } from './ctx-types';
|
|
9
|
+
|
|
10
|
+
export type ExplicitMapKeyResolver = (
|
|
11
|
+
row: Record<string, unknown>,
|
|
12
|
+
index: number,
|
|
13
|
+
) => string;
|
|
14
|
+
|
|
15
|
+
export type ExplicitMapKeyInput =
|
|
16
|
+
| string
|
|
17
|
+
| readonly string[]
|
|
18
|
+
| ((row: Record<string, unknown>, index: number) => unknown);
|
|
19
|
+
|
|
20
|
+
export function stripMapFieldOutputs(
|
|
21
|
+
row: Record<string, unknown>,
|
|
22
|
+
fieldNames: readonly string[],
|
|
23
|
+
): Record<string, unknown> {
|
|
24
|
+
return Object.fromEntries(
|
|
25
|
+
Object.entries(row).filter(
|
|
26
|
+
([fieldName]) => !fieldNames.includes(fieldName),
|
|
27
|
+
),
|
|
28
|
+
);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export function createExplicitMapKeyResolver(input: {
|
|
32
|
+
mapNamespace: string;
|
|
33
|
+
fieldNames: readonly string[];
|
|
34
|
+
key: ExplicitMapKeyInput | null | undefined;
|
|
35
|
+
}): ExplicitMapKeyResolver | null {
|
|
36
|
+
if (!input.key) {
|
|
37
|
+
return null;
|
|
38
|
+
}
|
|
39
|
+
const key = input.key;
|
|
40
|
+
return (row, index) => {
|
|
41
|
+
const stableRow = stripMapFieldOutputs(row, input.fieldNames);
|
|
42
|
+
let raw: unknown;
|
|
43
|
+
if (typeof key === 'function') {
|
|
44
|
+
raw = key(stableRow, index);
|
|
45
|
+
} else if (typeof key === 'string') {
|
|
46
|
+
raw = stableRow[key];
|
|
47
|
+
} else {
|
|
48
|
+
raw = key.map((fieldName) => stableRow[fieldName]);
|
|
49
|
+
}
|
|
50
|
+
if (raw === null || raw === undefined) {
|
|
51
|
+
throw new Error(
|
|
52
|
+
`ctx.dataset("${input.mapNamespace}") key function returned ${raw === null ? 'null' : 'undefined'} for row ${index}. ` +
|
|
53
|
+
'Use a non-empty stable input column (e.g. { key: "email" }) or return a non-empty string, number, or tuple.',
|
|
54
|
+
);
|
|
55
|
+
}
|
|
56
|
+
const asString = normalizeExplicitMapKey(raw);
|
|
57
|
+
if (!asString) {
|
|
58
|
+
throw new Error(
|
|
59
|
+
`ctx.dataset("${input.mapNamespace}") key function returned an empty value for row ${index}. ` +
|
|
60
|
+
'Use non-empty stable input columns or return a non-empty string, number, or tuple.',
|
|
61
|
+
);
|
|
62
|
+
}
|
|
63
|
+
return asString;
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function normalizeExplicitMapKey(value: unknown): string {
|
|
68
|
+
if (Array.isArray(value)) {
|
|
69
|
+
const parts = value.map((entry) => normalizeExplicitMapKeyPart(entry));
|
|
70
|
+
return parts.every(Boolean) ? JSON.stringify(parts) : '';
|
|
71
|
+
}
|
|
72
|
+
return normalizeExplicitMapKeyPart(value);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
function normalizeExplicitMapKeyPart(value: unknown): string {
|
|
76
|
+
if (typeof value === 'number') {
|
|
77
|
+
return Number.isFinite(value) ? String(value) : '';
|
|
78
|
+
}
|
|
79
|
+
return String(value ?? '').trim();
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Maximum number of distinct duplicate keys retained in dedupe metadata for
|
|
84
|
+
* observability. Keeps log lines bounded when many keys collide.
|
|
85
|
+
*/
|
|
86
|
+
export const DEDUPE_DUPLICATE_KEY_SAMPLE_CAP = 5;
|
|
87
|
+
|
|
88
|
+
export interface DedupeExplicitMapKeyResult<TRow> {
|
|
89
|
+
/** Rows with duplicates removed; first occurrence per key kept, order preserved. */
|
|
90
|
+
rows: TRow[];
|
|
91
|
+
/** Count of rows dropped because an earlier row produced the same key. */
|
|
92
|
+
droppedCount: number;
|
|
93
|
+
/** Capped sample of the duplicate key values that were deduped. */
|
|
94
|
+
duplicateKeys: string[];
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Resolver shape used by {@link dedupeExplicitMapKeyRows}. Receives each row in
|
|
99
|
+
* its original (pre-dedupe) position. Callers that hold output rows in a
|
|
100
|
+
* different form than `Record<string, unknown>` adapt to this signature with a
|
|
101
|
+
* thin wrapper.
|
|
102
|
+
*/
|
|
103
|
+
export type DedupeKeyResolver<TRow> = (row: TRow, index: number) => string;
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Silent dedupe for explicit `ctx.dataset(...)` map keys. Keeps the first row
|
|
107
|
+
* for each canonical key and drops subsequent duplicates, preserving original
|
|
108
|
+
* order. With no resolver this is a pure passthrough (a fresh array copy with
|
|
109
|
+
* empty metadata). Replaces the prior throw-on-duplicate behavior per product
|
|
110
|
+
* decision: duplicate keys should never fail the run.
|
|
111
|
+
*/
|
|
112
|
+
export function dedupeExplicitMapKeyRows<TRow>(input: {
|
|
113
|
+
rows: readonly TRow[];
|
|
114
|
+
resolver: DedupeKeyResolver<TRow> | null;
|
|
115
|
+
}): DedupeExplicitMapKeyResult<TRow> {
|
|
116
|
+
if (!input.resolver) {
|
|
117
|
+
return { rows: [...input.rows], droppedCount: 0, duplicateKeys: [] };
|
|
118
|
+
}
|
|
119
|
+
const resolver = input.resolver;
|
|
120
|
+
const seenKeys = new Set<string>();
|
|
121
|
+
const duplicateKeys: string[] = [];
|
|
122
|
+
const seenDuplicateKeys = new Set<string>();
|
|
123
|
+
const rows: TRow[] = [];
|
|
124
|
+
for (let index = 0; index < input.rows.length; index += 1) {
|
|
125
|
+
const row = input.rows[index]!;
|
|
126
|
+
const keyValue = resolver(row, index);
|
|
127
|
+
if (seenKeys.has(keyValue)) {
|
|
128
|
+
if (
|
|
129
|
+
!seenDuplicateKeys.has(keyValue) &&
|
|
130
|
+
duplicateKeys.length < DEDUPE_DUPLICATE_KEY_SAMPLE_CAP
|
|
131
|
+
) {
|
|
132
|
+
duplicateKeys.push(keyValue);
|
|
133
|
+
}
|
|
134
|
+
seenDuplicateKeys.add(keyValue);
|
|
135
|
+
continue;
|
|
136
|
+
}
|
|
137
|
+
seenKeys.add(keyValue);
|
|
138
|
+
rows.push(row);
|
|
139
|
+
}
|
|
140
|
+
return {
|
|
141
|
+
rows,
|
|
142
|
+
droppedCount: input.rows.length - rows.length,
|
|
143
|
+
duplicateKeys,
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
export function deriveMapRowIdentity(input: {
|
|
148
|
+
row: Record<string, unknown>;
|
|
149
|
+
index?: number;
|
|
150
|
+
artifactTableNamespace: string;
|
|
151
|
+
fieldNames?: readonly string[];
|
|
152
|
+
explicitKey?: ExplicitMapKeyResolver | null;
|
|
153
|
+
}): string {
|
|
154
|
+
const stableRow = stripMapFieldOutputs(input.row, input.fieldNames ?? []);
|
|
155
|
+
return input.explicitKey
|
|
156
|
+
? derivePlayRowIdentityFromKey(
|
|
157
|
+
input.explicitKey(stableRow, input.index ?? 0),
|
|
158
|
+
input.artifactTableNamespace,
|
|
159
|
+
)
|
|
160
|
+
: derivePlayRowIdentity(stableRow, input.artifactTableNamespace);
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
export class MapRowIdentity {
|
|
164
|
+
private nextInvocationIndex: number;
|
|
165
|
+
|
|
166
|
+
constructor(startInvocationIndex = 0) {
|
|
167
|
+
this.nextInvocationIndex = startInvocationIndex;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
get invocationIndex(): number {
|
|
171
|
+
return this.nextInvocationIndex;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
set invocationIndex(value: number) {
|
|
175
|
+
this.nextInvocationIndex = value;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
createScope(input: {
|
|
179
|
+
logicalNamespace: string;
|
|
180
|
+
artifactTableNamespace: string;
|
|
181
|
+
mapNodeId?: string | null;
|
|
182
|
+
fieldNames?: readonly string[];
|
|
183
|
+
explicitKey?: ExplicitMapKeyResolver | null;
|
|
184
|
+
}): MapExecutionScope {
|
|
185
|
+
const mapInvocationId = `${input.logicalNamespace}:${this.nextInvocationIndex}`;
|
|
186
|
+
this.nextInvocationIndex += 1;
|
|
187
|
+
const explicitKey = input.explicitKey ?? null;
|
|
188
|
+
const fieldNames = input.fieldNames ?? [];
|
|
189
|
+
return {
|
|
190
|
+
mapInvocationId,
|
|
191
|
+
mapNodeId: input.mapNodeId ?? null,
|
|
192
|
+
logicalNamespace: input.logicalNamespace,
|
|
193
|
+
artifactTableNamespace: input.artifactTableNamespace,
|
|
194
|
+
rowIdentity: (row, index) =>
|
|
195
|
+
deriveMapRowIdentity({
|
|
196
|
+
row,
|
|
197
|
+
index,
|
|
198
|
+
artifactTableNamespace: input.artifactTableNamespace,
|
|
199
|
+
fieldNames,
|
|
200
|
+
explicitKey,
|
|
201
|
+
}),
|
|
202
|
+
};
|
|
203
|
+
}
|
|
204
|
+
}
|
|
@@ -592,13 +592,18 @@ export function reducePlayRunLedgerEvent(
|
|
|
592
592
|
};
|
|
593
593
|
|
|
594
594
|
switch (event.type) {
|
|
595
|
-
case 'run.created':
|
|
595
|
+
case 'run.created': {
|
|
596
|
+
const createdStatus = event.status ?? base.status;
|
|
596
597
|
return withTiming({
|
|
597
598
|
...base,
|
|
598
599
|
playName: event.playName ?? base.playName ?? null,
|
|
599
|
-
status:
|
|
600
|
+
status: createdStatus,
|
|
600
601
|
createdAt: base.createdAt ?? occurredAt,
|
|
602
|
+
startedAt:
|
|
603
|
+
base.startedAt ??
|
|
604
|
+
(createdStatus === 'running' ? occurredAt : base.startedAt),
|
|
601
605
|
});
|
|
606
|
+
}
|
|
602
607
|
case 'run.started':
|
|
603
608
|
return withTiming({
|
|
604
609
|
...base,
|
|
@@ -50,6 +50,10 @@ export type PlayRunStreamNodeState = {
|
|
|
50
50
|
export type PlayRunLiveSnapshot = {
|
|
51
51
|
runId: string;
|
|
52
52
|
status: PlayRunLiveStatus;
|
|
53
|
+
createdAt?: number | null;
|
|
54
|
+
startedAt?: number | null;
|
|
55
|
+
finishedAt?: number | null;
|
|
56
|
+
durationMs?: number | null;
|
|
53
57
|
updatedAt: number | null;
|
|
54
58
|
/**
|
|
55
59
|
* Rotating log tail (the ledger snapshot's bounded `logTail`; the wire name
|
|
@@ -167,6 +171,10 @@ function buildSnapshotFromLedger(
|
|
|
167
171
|
return {
|
|
168
172
|
runId: snapshot.runId,
|
|
169
173
|
status: normalizePlayRunLiveStatus(snapshot.status),
|
|
174
|
+
createdAt: snapshot.createdAt ?? null,
|
|
175
|
+
startedAt: snapshot.startedAt ?? null,
|
|
176
|
+
finishedAt: snapshot.finishedAt ?? null,
|
|
177
|
+
durationMs: snapshot.durationMs ?? null,
|
|
170
178
|
updatedAt:
|
|
171
179
|
snapshot.updatedAt ?? snapshot.finishedAt ?? snapshot.startedAt ?? null,
|
|
172
180
|
logs: snapshot.logTail,
|