deepline 0.1.153 → 0.1.155

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/dist/bundling-sources/apps/play-runner-workers/src/coordinator-entry.ts +15 -0
  2. package/dist/bundling-sources/apps/play-runner-workers/src/entry.ts +1180 -825
  3. package/dist/bundling-sources/apps/play-runner-workers/src/runtime/batching.ts +34 -18
  4. package/dist/bundling-sources/apps/play-runner-workers/src/runtime/harness-receipt-store.ts +41 -0
  5. package/dist/bundling-sources/apps/play-runner-workers/src/runtime/receipts.ts +143 -8
  6. package/dist/bundling-sources/apps/play-runner-workers/src/runtime/tool-receipts.ts +104 -0
  7. package/dist/bundling-sources/sdk/src/index.ts +0 -1
  8. package/dist/bundling-sources/sdk/src/play.ts +3 -48
  9. package/dist/bundling-sources/sdk/src/plays/harness-stub.ts +27 -2
  10. package/dist/bundling-sources/sdk/src/release.ts +6 -4
  11. package/dist/bundling-sources/sdk/src/worker-play-entry.ts +0 -10
  12. package/dist/bundling-sources/shared_libs/play-data-plane/index.ts +0 -1
  13. package/dist/bundling-sources/shared_libs/play-runtime/app-runtime-api.ts +87 -0
  14. package/dist/bundling-sources/shared_libs/play-runtime/batch-runtime.ts +0 -59
  15. package/dist/bundling-sources/shared_libs/play-runtime/cell-staleness.ts +0 -253
  16. package/dist/bundling-sources/shared_libs/play-runtime/context.ts +805 -1570
  17. package/dist/bundling-sources/shared_libs/play-runtime/ctx-types.ts +47 -74
  18. package/dist/bundling-sources/shared_libs/play-runtime/default-batch-strategies.ts +36 -14
  19. package/dist/bundling-sources/shared_libs/play-runtime/durable-call-cache.ts +145 -0
  20. package/dist/bundling-sources/shared_libs/play-runtime/durable-receipt-execution.ts +284 -0
  21. package/dist/bundling-sources/shared_libs/play-runtime/postgres-json.ts +12 -5
  22. package/dist/bundling-sources/shared_libs/play-runtime/run-lifecycle-policy.ts +78 -0
  23. package/dist/bundling-sources/shared_libs/play-runtime/run-snapshot-stream.ts +10 -45
  24. package/dist/bundling-sources/shared_libs/play-runtime/runtime-actions.ts +1 -0
  25. package/dist/bundling-sources/shared_libs/play-runtime/runtime-api.ts +923 -535
  26. package/dist/bundling-sources/shared_libs/play-runtime/runtime-pg-driver-neon-serverless.ts +45 -76
  27. package/dist/bundling-sources/shared_libs/play-runtime/runtime-pg-driver.ts +12 -1
  28. package/dist/bundling-sources/shared_libs/play-runtime/step-program-dataset-builder.ts +1 -14
  29. package/dist/bundling-sources/shared_libs/play-runtime/tool-execution-outcome.ts +159 -0
  30. package/dist/bundling-sources/shared_libs/play-runtime/tool-result-types.ts +4 -1
  31. package/dist/bundling-sources/shared_libs/play-runtime/work-receipts.ts +32 -0
  32. package/dist/bundling-sources/shared_libs/plays/definition.ts +4 -2
  33. package/dist/bundling-sources/shared_libs/plays/runtime-validation.ts +3 -14
  34. package/dist/bundling-sources/shared_libs/plays/static-pipeline.ts +1 -43
  35. package/dist/cli/index.js +1305 -401
  36. package/dist/cli/index.mjs +1273 -363
  37. package/dist/{compiler-manifest-BjoRENv9.d.ts → compiler-manifest-DW1flrHk.d.mts} +0 -9
  38. package/dist/{compiler-manifest-BjoRENv9.d.mts → compiler-manifest-DW1flrHk.d.ts} +0 -9
  39. package/dist/index.d.mts +9 -38
  40. package/dist/index.d.ts +9 -38
  41. package/dist/index.js +26 -13
  42. package/dist/index.mjs +26 -13
  43. package/dist/plays/bundle-play-file.d.mts +2 -2
  44. package/dist/plays/bundle-play-file.d.ts +2 -2
  45. package/package.json +1 -1
  46. package/dist/bundling-sources/shared_libs/play-data-plane/cell-policy.ts +0 -76
  47. package/dist/bundling-sources/shared_libs/play-runtime/progress-emitter.ts +0 -197
  48. package/dist/bundling-sources/shared_libs/play-runtime/waterfall-replay.ts +0 -79
@@ -2,6 +2,7 @@ import { createHash } from 'node:crypto';
2
2
  import {
3
3
  createRuntimePool,
4
4
  createRuntimeOneShotQueryClient,
5
+ canReuseRuntimePostgresPoolsAcrossRequests,
5
6
  isRuntimeOneShotQueryFactoryRegistered,
6
7
  type RuntimePool,
7
8
  type RuntimePoolClient,
@@ -15,7 +16,6 @@ import type {
15
16
  import type { PlayBundleArtifact } from '../plays/artifact-types';
16
17
  import {
17
18
  augmentSheetContractWithDatasetFields,
18
- outputFieldsFromSheetContract,
19
19
  outputPhysicalSheetColumnNames,
20
20
  outputPhysicalSheetColumnProjections,
21
21
  physicalSheetColumnNames,
@@ -57,7 +57,10 @@ import {
57
57
  type WorkReceipt,
58
58
  type WorkReceiptClaim,
59
59
  } from './work-receipts';
60
- import { stringifyPostgresJson } from './postgres-json';
60
+ import {
61
+ sanitizePostgresJsonValue,
62
+ stringifyPostgresJson,
63
+ } from './postgres-json';
61
64
  import { RECEIPT_STATUS_CODE, receiptStatusFromCode } from './receipt-status';
62
65
  import type { MapRowOutcome } from './durability-store';
63
66
  import {
@@ -69,12 +72,7 @@ import {
69
72
  mapRowOutcomeRuntimeFields,
70
73
  resolveMapRowOutcomeKey,
71
74
  } from './map-row-outcome';
72
- import {
73
- DEEPLINE_CELL_META_FIELD,
74
- cellPolicyFields,
75
- shouldRecomputeCell,
76
- type CellStalenessPolicyByField,
77
- } from './cell-staleness';
75
+ import { DEEPLINE_CELL_META_FIELD } from './cell-staleness';
78
76
  import type { PlayArtifactKind } from './backend';
79
77
 
80
78
  type RuntimeApiContext = {
@@ -126,6 +124,8 @@ const RUNTIME_POSTGRES_PREWARM_MAX_ATTEMPTS = 4;
126
124
  const RUNTIME_POSTGRES_PREWARM_RETRY_DELAYS_MS = [250, 750, 1_500] as const;
127
125
  const RUNTIME_POSTGRES_CONNECT_MAX_ATTEMPTS = 4;
128
126
  const RUNTIME_POSTGRES_CONNECT_RETRY_DELAYS_MS = [250, 750, 1_500] as const;
127
+ const RUNTIME_WORK_RECEIPT_QUERY_MAX_ATTEMPTS = 3;
128
+ const RUNTIME_WORK_RECEIPT_QUERY_RETRY_DELAYS_MS = [250, 750] as const;
129
129
  // Runtime DB sessions are minted against the pooled tenant endpoint. A healthy
130
130
  // connect is sub-second; spending minutes on one sandbox dial only hides a
131
131
  // broken route and stalls the whole play. Keep retries bounded and loud.
@@ -140,6 +140,20 @@ const RECEIPT_STATUS_COMPLETED = RECEIPT_STATUS_CODE.completed;
140
140
  const RECEIPT_STATUS_FAILED = RECEIPT_STATUS_CODE.failed;
141
141
  const RECEIPT_STATUS_SKIPPED = RECEIPT_STATUS_CODE.skipped;
142
142
 
143
+ function runtimeSummaryTotalSql(input: {
144
+ currentTotal: string;
145
+ totalDelta: string;
146
+ queued: string;
147
+ running: string;
148
+ completed: string;
149
+ failed: string;
150
+ }): string {
151
+ return `GREATEST(
152
+ GREATEST(${input.currentTotal} + ${input.totalDelta}, 0),
153
+ (${input.queued}) + (${input.running}) + (${input.completed}) + (${input.failed})
154
+ )`;
155
+ }
156
+
143
157
  export type ResolvedRuntimePlay = {
144
158
  playId: string;
145
159
  sourceCode?: string | null;
@@ -1068,6 +1082,27 @@ function isTransientRuntimePostgresConnectionError(error: unknown): boolean {
1068
1082
  );
1069
1083
  }
1070
1084
 
1085
+ function isTransientRuntimePostgresOperationError(error: unknown): boolean {
1086
+ if (isTransientRuntimePostgresConnectionError(error)) {
1087
+ return true;
1088
+ }
1089
+ if (!error || typeof error !== 'object') {
1090
+ return false;
1091
+ }
1092
+ const nestedErrors = (error as { errors?: unknown }).errors;
1093
+ if (
1094
+ Array.isArray(nestedErrors) &&
1095
+ nestedErrors.some(isTransientRuntimePostgresOperationError)
1096
+ ) {
1097
+ return true;
1098
+ }
1099
+ const name = 'name' in error ? String(error.name) : '';
1100
+ const message = 'message' in error ? String(error.message) : '';
1101
+ return /fetch failed|network error|socket hang up|connection (terminated|timeout|timed out|closed|reset)|ETIMEDOUT|ECONNRESET|ECONNREFUSED|UND_ERR_CONNECT_TIMEOUT/i.test(
1102
+ `${name} ${message} ${String(error)}`,
1103
+ );
1104
+ }
1105
+
1071
1106
  function sleep(ms: number): Promise<void> {
1072
1107
  return new Promise((resolve) => setTimeout(resolve, ms));
1073
1108
  }
@@ -1129,8 +1164,12 @@ function getPostgresPool(postgresUrl: string, cachePool = true): RuntimePool {
1129
1164
  return pool;
1130
1165
  }
1131
1166
 
1132
- function canReuseRuntimePostgresPoolsAcrossRequests(): boolean {
1133
- return true;
1167
+ async function resetRuntimePostgresPool(postgresUrl: string): Promise<void> {
1168
+ const pool = postgresPools.get(postgresUrl);
1169
+ postgresPools.delete(postgresUrl);
1170
+ if (pool) {
1171
+ await Promise.resolve(pool.end()).catch(() => {});
1172
+ }
1134
1173
  }
1135
1174
 
1136
1175
  async function withRuntimePostgres<T>(
@@ -1528,15 +1567,41 @@ async function withRuntimeWorkReceiptClient<T>(
1528
1567
  }
1529
1568
  };
1530
1569
 
1531
- if (isRuntimeOneShotQueryFactoryRegistered()) {
1532
- return await withRuntimeOneShotPostgres(session, runWithSelfHeal);
1570
+ for (
1571
+ let attempt = 1;
1572
+ attempt <= RUNTIME_WORK_RECEIPT_QUERY_MAX_ATTEMPTS;
1573
+ attempt += 1
1574
+ ) {
1575
+ try {
1576
+ return isRuntimeOneShotQueryFactoryRegistered()
1577
+ ? await withRuntimeOneShotPostgres(session, runWithSelfHeal)
1578
+ : await withRuntimePostgres(
1579
+ session,
1580
+ (client) => runWithSelfHeal(client),
1581
+ { cachePool: !context.disablePostgresPoolCache },
1582
+ );
1583
+ } catch (error) {
1584
+ if (
1585
+ attempt >= RUNTIME_WORK_RECEIPT_QUERY_MAX_ATTEMPTS ||
1586
+ !isTransientRuntimePostgresOperationError(error)
1587
+ ) {
1588
+ throw error;
1589
+ }
1590
+ if (
1591
+ !isRuntimeOneShotQueryFactoryRegistered() &&
1592
+ !context.disablePostgresPoolCache
1593
+ ) {
1594
+ await resetRuntimePostgresPool(session.postgresUrl);
1595
+ }
1596
+ await sleep(
1597
+ RUNTIME_WORK_RECEIPT_QUERY_RETRY_DELAYS_MS[attempt - 1] ??
1598
+ RUNTIME_WORK_RECEIPT_QUERY_RETRY_DELAYS_MS[
1599
+ RUNTIME_WORK_RECEIPT_QUERY_RETRY_DELAYS_MS.length - 1
1600
+ ],
1601
+ );
1602
+ }
1533
1603
  }
1534
-
1535
- return await withRuntimePostgres(
1536
- session,
1537
- (client) => runWithSelfHeal(client),
1538
- { cachePool: !context.disablePostgresPoolCache },
1539
- );
1604
+ throw new Error('Runtime work receipt query failed after retries.');
1540
1605
  }
1541
1606
 
1542
1607
  const PLAY_INTERNAL_SHEET_VERSION_SEQUENCE = '_deepline_sheet_version_seq';
@@ -1596,6 +1661,8 @@ function isSystemSheetColumn(columnName: string): boolean {
1596
1661
  case '__has_failed':
1597
1662
  case '__deeplineCsvProjectedFields':
1598
1663
  case '__deeplineCsvProjectedValues':
1664
+ case '__deeplineSourceRowIndex':
1665
+ case '__deeplineOriginalSourceRowIndex':
1599
1666
  return true;
1600
1667
  default:
1601
1668
  return false;
@@ -1652,49 +1719,33 @@ function mapRuntimePostgresRow(input: {
1652
1719
  };
1653
1720
  }
1654
1721
 
1655
- function mapRuntimeProjectedRows(
1656
- rows: readonly Record<string, unknown>[],
1657
- outputColumns: readonly PhysicalSheetColumnProjection[],
1658
- ): RuntimeApiRowRecord[] {
1659
- return rows.map((raw) => {
1660
- const key = String(raw._key ?? '');
1661
- const data = Object.fromEntries(
1662
- outputColumns
1663
- .filter((column) =>
1664
- Object.prototype.hasOwnProperty.call(raw, column.fieldName),
1665
- )
1666
- .map((column) => [column.fieldName, raw[column.fieldName]]),
1667
- );
1668
- return { key, data };
1669
- });
1670
- }
1671
-
1672
- function cachedRuntimeCellMetaPatch(runId: string): Record<string, unknown> {
1673
- return {
1674
- status: 'cached',
1675
- runId,
1676
- reused: true,
1677
- };
1678
- }
1679
-
1680
- function cachedRuntimeCellMetaUpdateSql(
1681
- tableAlias: string,
1682
- outputFields: readonly string[],
1683
- patchSql: string,
1684
- ): string {
1685
- const uniqueFields = [...new Set(outputFields)];
1686
- if (uniqueFields.length === 0) {
1687
- return `${tableAlias}._cell_meta`;
1722
+ function completedRuntimeCellMetaPatch(input: {
1723
+ runId: string;
1724
+ outputFields: readonly string[];
1725
+ rowPatch?: Record<string, unknown>;
1726
+ }): Record<string, unknown> {
1727
+ const patch: Record<string, unknown> = {};
1728
+ const completedAt = Date.now();
1729
+ for (const field of input.outputFields) {
1730
+ const existing =
1731
+ input.rowPatch?.[field] &&
1732
+ typeof input.rowPatch[field] === 'object' &&
1733
+ !Array.isArray(input.rowPatch[field])
1734
+ ? (input.rowPatch[field] as Record<string, unknown>)
1735
+ : {};
1736
+ patch[field] = {
1737
+ status: 'completed',
1738
+ runId: input.runId,
1739
+ completedAt,
1740
+ ...existing,
1741
+ };
1742
+ }
1743
+ for (const [field, meta] of Object.entries(input.rowPatch ?? {})) {
1744
+ if (!Object.hasOwn(patch, field)) {
1745
+ patch[field] = meta;
1746
+ }
1688
1747
  }
1689
- return uniqueFields.reduce((expression, field) => {
1690
- const fieldLiteral = quoteLiteral(field);
1691
- return `jsonb_set(
1692
- ${expression},
1693
- ARRAY[${fieldLiteral}]::text[],
1694
- coalesce(${tableAlias}._cell_meta -> ${fieldLiteral}, '{}'::jsonb) || ${patchSql},
1695
- true
1696
- )`;
1697
- }, `coalesce(${tableAlias}._cell_meta, '{}'::jsonb)`);
1748
+ return patch;
1698
1749
  }
1699
1750
 
1700
1751
  function mergeRuntimeCellMetaPatchSql(
@@ -1710,166 +1761,6 @@ function mergeRuntimeCellMetaPatchSql(
1710
1761
  )`;
1711
1762
  }
1712
1763
 
1713
- async function insertCachedRuntimeColumnSummaryDelta(
1714
- client: RuntimeQueryClient,
1715
- session: RuntimePostgresSession,
1716
- input: {
1717
- tableNamespace: string;
1718
- outputFields: readonly string[];
1719
- cached: number;
1720
- },
1721
- ): Promise<void> {
1722
- const uniqueFields = [...new Set(input.outputFields)];
1723
- if (uniqueFields.length === 0 || input.cached <= 0) {
1724
- return;
1725
- }
1726
- const normalizedPlayName = normalizePlayNameForSheet(session.playName);
1727
- const normalizedTableNamespace = normalizeTableNamespace(
1728
- input.tableNamespace,
1729
- );
1730
- await client.query(
1731
- `
1732
- INSERT INTO ${columnSummaryTable(session)} AS target (
1733
- play_name,
1734
- table_namespace,
1735
- field,
1736
- cached
1737
- )
1738
- SELECT $1::text, $2::text, field_values.field, $3::int
1739
- FROM unnest($4::text[]) AS field_values(field)
1740
- ON CONFLICT (play_name, table_namespace, field) DO UPDATE
1741
- SET cached = GREATEST(target.cached + EXCLUDED.cached, 0),
1742
- _updated_at = now()
1743
- `,
1744
- [normalizedPlayName, normalizedTableNamespace, input.cached, uniqueFields],
1745
- );
1746
- }
1747
-
1748
- async function markAndReadRuntimeCompletedRowsCached(
1749
- client: RuntimeQueryClient,
1750
- session: RuntimePostgresSession,
1751
- input: {
1752
- tableNamespace: string;
1753
- keys: string[];
1754
- runId: string;
1755
- outputFields: string[];
1756
- sheetContract: PlaySheetContract;
1757
- },
1758
- ): Promise<RuntimeApiRowRecord[]> {
1759
- if (input.keys.length === 0) {
1760
- return [];
1761
- }
1762
- const cellMetaSql = cachedRuntimeCellMetaUpdateSql(
1763
- 'target',
1764
- input.outputFields,
1765
- '$3::jsonb',
1766
- );
1767
-
1768
- const { rows } = await client.query<Record<string, unknown>>(
1769
- `
1770
- WITH updated_rows AS (
1771
- UPDATE ${sheetTable(session)} AS target
1772
- SET _run_id = $2::text,
1773
- _updated_at = now(),
1774
- _version = ${nextRuntimeSheetVersionExpression(session)},
1775
- _cell_meta = ${cellMetaSql}
1776
- WHERE target._key = ANY($1::text[])
1777
- AND target._status = 'enriched'
1778
- RETURNING target.*
1779
- )
1780
- SELECT *
1781
- FROM updated_rows
1782
- ORDER BY _input_index ASC NULLS LAST, _created_at ASC, _key ASC
1783
- `,
1784
- [
1785
- input.keys,
1786
- input.runId,
1787
- JSON.stringify(cachedRuntimeCellMetaPatch(input.runId)),
1788
- ],
1789
- );
1790
- await insertCachedRuntimeColumnSummaryDelta(client, session, {
1791
- tableNamespace: input.tableNamespace,
1792
- outputFields: input.outputFields,
1793
- cached: rows.length,
1794
- });
1795
- return rows.map((raw) =>
1796
- mapRuntimePostgresRow({ raw, sheetContract: input.sheetContract }),
1797
- );
1798
- }
1799
-
1800
- async function markAndReadRuntimeCompletedRowsCachedProjection(
1801
- client: RuntimeQueryClient,
1802
- session: RuntimePostgresSession,
1803
- input: {
1804
- tableNamespace: string;
1805
- keys: string[];
1806
- runId: string;
1807
- outputFields: string[];
1808
- outputColumns: PhysicalSheetColumnProjection[];
1809
- timings?: RuntimeSheetTiming[];
1810
- },
1811
- ): Promise<RuntimeApiRowRecord[]> {
1812
- if (input.keys.length === 0) {
1813
- return [];
1814
- }
1815
- const cellMetaSql = cachedRuntimeCellMetaUpdateSql(
1816
- 'target',
1817
- input.outputFields,
1818
- '$3::jsonb',
1819
- );
1820
- const outputReturnSql =
1821
- input.outputColumns.length > 0
1822
- ? `, ${input.outputColumns
1823
- .map(
1824
- (column) =>
1825
- `target.${quoteIdentifier(column.sqlName)} AS ${quoteIdentifier(
1826
- column.fieldName,
1827
- )}`,
1828
- )
1829
- .join(', ')}`
1830
- : '';
1831
-
1832
- const queryStartedAt = Date.now();
1833
- const { rows } = await client.query<Record<string, unknown>>(
1834
- `
1835
- WITH updated_rows AS (
1836
- UPDATE ${sheetTable(session)} AS target
1837
- SET _run_id = $2::text,
1838
- _updated_at = now(),
1839
- _version = ${nextRuntimeSheetVersionExpression(session)},
1840
- _cell_meta = ${cellMetaSql}
1841
- WHERE target._key = ANY($1::text[])
1842
- AND target._status = 'enriched'
1843
- RETURNING target._key${outputReturnSql}
1844
- )
1845
- SELECT *
1846
- FROM updated_rows
1847
- `,
1848
- [
1849
- input.keys,
1850
- input.runId,
1851
- JSON.stringify(cachedRuntimeCellMetaPatch(input.runId)),
1852
- ],
1853
- );
1854
- input.timings?.push({
1855
- phase: 'cached_fast_path.mark_read_projected_query',
1856
- ms: Date.now() - queryStartedAt,
1857
- rows: input.keys.length,
1858
- completed: rows.length,
1859
- cached: true,
1860
- });
1861
-
1862
- const mapStartedAt = Date.now();
1863
- const mapped = mapRuntimeProjectedRows(rows, input.outputColumns);
1864
- input.timings?.push({
1865
- phase: 'cached_fast_path.map_projected_rows',
1866
- ms: Date.now() - mapStartedAt,
1867
- rows: mapped.length,
1868
- cached: true,
1869
- });
1870
- return mapped;
1871
- }
1872
-
1873
1764
  async function readRuntimeRowsByKey(
1874
1765
  client: RuntimeQueryClient,
1875
1766
  session: RuntimePostgresSession,
@@ -2012,13 +1903,20 @@ async function readRuntimeSummary(
2012
1903
  );
2013
1904
  return await withRuntimePostgres(session, async (client) => {
2014
1905
  const { rows } = await client.query(
2015
- `SELECT total
1906
+ `SELECT total, queued, running, completed, failed
2016
1907
  FROM ${summaryTable(session)}
2017
1908
  WHERE play_name = $1 AND table_namespace = $2
2018
1909
  LIMIT 1`,
2019
1910
  [normalizedPlayName, normalizedTableNamespace],
2020
1911
  );
2021
- return { stats: { total: Number(rows[0]?.total ?? 0) } };
1912
+ const row = rows[0];
1913
+ const total = Number(row?.total ?? 0);
1914
+ const partitionTotal =
1915
+ Number(row?.queued ?? 0) +
1916
+ Number(row?.running ?? 0) +
1917
+ Number(row?.completed ?? 0) +
1918
+ Number(row?.failed ?? 0);
1919
+ return { stats: { total: Math.max(total, partitionTotal) } };
2022
1920
  });
2023
1921
  }
2024
1922
 
@@ -2190,8 +2088,15 @@ async function writeRuntimeRows(
2190
2088
  FROM inserted_count_cte
2191
2089
  WHERE c > 0
2192
2090
  ON CONFLICT (play_name, table_namespace) DO UPDATE SET
2193
- total = ${summaryTable(session)}.total + EXCLUDED.total,
2194
2091
  queued = ${summaryTable(session)}.queued + EXCLUDED.queued,
2092
+ total = ${runtimeSummaryTotalSql({
2093
+ currentTotal: `${summaryTable(session)}.total`,
2094
+ totalDelta: 'EXCLUDED.total',
2095
+ queued: `${summaryTable(session)}.queued + EXCLUDED.queued`,
2096
+ running: `${summaryTable(session)}.running`,
2097
+ completed: `${summaryTable(session)}.completed`,
2098
+ failed: `${summaryTable(session)}.failed`,
2099
+ })},
2195
2100
  _updated_at = now()
2196
2101
  RETURNING 1
2197
2102
  )
@@ -2369,9 +2274,16 @@ async function prepareRuntimeSheetDatasetRows(
2369
2274
  FROM summary_counts
2370
2275
  WHERE inserted_count > 0 OR missing_output_count > 0
2371
2276
  ON CONFLICT (play_name, table_namespace) DO UPDATE SET
2372
- total = GREATEST(target.total + EXCLUDED.total, 0),
2373
2277
  queued = GREATEST(target.queued + EXCLUDED.queued, 0),
2374
2278
  completed = GREATEST(target.completed + EXCLUDED.completed, 0),
2279
+ total = ${runtimeSummaryTotalSql({
2280
+ currentTotal: 'target.total',
2281
+ totalDelta: 'EXCLUDED.total',
2282
+ queued: 'GREATEST(target.queued + EXCLUDED.queued, 0)',
2283
+ running: 'target.running',
2284
+ completed: 'GREATEST(target.completed + EXCLUDED.completed, 0)',
2285
+ failed: 'target.failed',
2286
+ })},
2375
2287
  _updated_at = now()
2376
2288
  RETURNING 1
2377
2289
  )
@@ -2398,83 +2310,6 @@ async function prepareRuntimeSheetDatasetRows(
2398
2310
  return { inserted, pendingKeys };
2399
2311
  }
2400
2312
 
2401
- async function tryPrepareRuntimeSheetDatasetRowsCachedOnly(
2402
- client: RuntimeQueryClient,
2403
- session: RuntimePostgresSession,
2404
- input: {
2405
- chunks: RuntimeDatasetRowEntry[][];
2406
- outputPhysicalColumns: string[];
2407
- timings?: RuntimeSheetTiming[];
2408
- },
2409
- ): Promise<{ prepared: { inserted: number; pendingKeys: string[] } | null }> {
2410
- const pendingKeys: string[] = [];
2411
- const startedAt = Date.now();
2412
- for (const chunk of input.chunks) {
2413
- const chunkKeys = chunk.map((entry) => entry.key);
2414
- const chunkInputIndexes = chunk.map((entry) => entry.inputIndex);
2415
- const existingMissingOutputSql = missingOutputCellSql(
2416
- 'existing',
2417
- input.outputPhysicalColumns,
2418
- );
2419
- const { rows } = await client.query(
2420
- `
2421
- WITH input_rows AS (
2422
- SELECT DISTINCT ON (key_values._key)
2423
- key_values._key, index_values._input_index
2424
- FROM unnest($1::text[]) WITH ORDINALITY AS key_values(_key, ord)
2425
- JOIN unnest($2::bigint[]) WITH ORDINALITY AS index_values(_input_index, ord)
2426
- ON index_values.ord = key_values.ord
2427
- ORDER BY key_values._key, key_values.ord
2428
- ),
2429
- existing_rows AS (
2430
- UPDATE ${sheetTable(session)} AS target
2431
- SET _input_index = input_rows._input_index,
2432
- _updated_at = now(),
2433
- _version = ${nextRuntimeSheetVersionExpression(session)}
2434
- FROM input_rows
2435
- WHERE target._key = input_rows._key
2436
- AND target._input_index IS DISTINCT FROM input_rows._input_index
2437
- RETURNING target._key
2438
- ),
2439
- pending_rows AS (
2440
- SELECT input_rows._key
2441
- FROM input_rows
2442
- LEFT JOIN ${sheetTable(session)} AS existing
2443
- ON existing._key = input_rows._key
2444
- WHERE existing._key IS NULL
2445
- OR existing._status IN ('pending', 'running', 'failed', 'stale')
2446
- OR (
2447
- existing._status = 'enriched'
2448
- AND (${existingMissingOutputSql})
2449
- )
2450
- )
2451
- SELECT
2452
- coalesce((SELECT array_agg(_key) FROM pending_rows), '{}'::text[]) AS pending_keys,
2453
- (SELECT count(*)::int FROM existing_rows) AS reordered_count
2454
- `,
2455
- [chunkKeys, chunkInputIndexes],
2456
- );
2457
- if (Array.isArray(rows[0]?.pending_keys)) {
2458
- pendingKeys.push(...(rows[0]?.pending_keys as string[]));
2459
- }
2460
- }
2461
- input.timings?.push({
2462
- phase:
2463
- pendingKeys.length === 0
2464
- ? 'cached_fast_path.prepare_probe'
2465
- : 'cached_fast_path.prepare_probe_fallback',
2466
- ms: Date.now() - startedAt,
2467
- rows: input.chunks.reduce((sum, chunk) => sum + chunk.length, 0),
2468
- chunks: input.chunks.length,
2469
- pending: pendingKeys.length,
2470
- cached: pendingKeys.length === 0,
2471
- });
2472
- if (pendingKeys.length > 0) {
2473
- return { prepared: null };
2474
- }
2475
- return { prepared: { inserted: 0, pendingKeys: [] } };
2476
- }
2477
-
2478
2313
  async function buildRuntimeSheetDatasetStartResult(
2479
2314
  client: RuntimeQueryClient,
2480
2315
  session: RuntimePostgresSession,
@@ -2483,227 +2318,165 @@ async function buildRuntimeSheetDatasetStartResult(
2483
2318
  sourceRowsLength: number;
2484
2319
  rowEntries: RuntimeDatasetRowEntry[];
2485
2320
  sheetContract: PlaySheetContract;
2321
+ normalizedPlayName: string;
2322
+ normalizedTableNamespace: string;
2486
2323
  runId: string;
2487
2324
  inserted: number;
2488
2325
  pendingKeys: string[];
2489
- cellPolicies?: CellStalenessPolicyByField;
2490
2326
  timings?: RuntimeSheetTiming[];
2491
2327
  },
2492
2328
  ): Promise<PrepareRuntimeSheetResult> {
2493
- const outputFields = outputFieldsFromSheetContract(input.sheetContract);
2494
- const hasCellPolicies = cellPolicyFields(input.cellPolicies).length > 0;
2495
- if (input.pendingKeys.length === 0 && !hasCellPolicies) {
2496
- const fastPathStartedAt = Date.now();
2497
- const outputColumns = outputPhysicalSheetColumnProjections(
2498
- input.sheetContract,
2499
- );
2500
- const completedRowsByKey = new Map(
2501
- (
2502
- await markAndReadRuntimeCompletedRowsCachedProjection(client, session, {
2503
- tableNamespace: input.tableNamespace,
2504
- keys: input.rowEntries.map((entry) => entry.key),
2505
- runId: input.runId,
2506
- outputFields,
2507
- outputColumns,
2508
- timings: input.timings,
2509
- })
2510
- ).map((row) => [row.key, row.data]),
2329
+ void input.pendingKeys;
2330
+ if (input.inserted === input.rowEntries.length) {
2331
+ const datasetFields = input.sheetContract.columns.flatMap((column) =>
2332
+ column.source === 'datasetColumn' && typeof column.field === 'string'
2333
+ ? [column.field]
2334
+ : [],
2511
2335
  );
2512
- if (completedRowsByKey.size === input.rowEntries.length) {
2513
- await insertCachedRuntimeColumnSummaryDelta(client, session, {
2514
- tableNamespace: input.tableNamespace,
2515
- outputFields,
2516
- cached: completedRowsByKey.size,
2517
- });
2518
- input.timings?.push({
2519
- phase: 'cached_fast_path.total',
2520
- ms: Date.now() - fastPathStartedAt,
2521
- rows: input.rowEntries.length,
2522
- completed: completedRowsByKey.size,
2523
- cached: true,
2524
- });
2525
- return {
2526
- inserted: input.inserted,
2527
- skipped:
2528
- input.rowEntries.length -
2529
- input.inserted +
2530
- (input.sourceRowsLength - input.rowEntries.length),
2531
- pendingRows: [],
2532
- completedRows: input.rowEntries.map((entry) => ({
2533
- ...mergeRuntimeCompletedRow({
2534
- inputRow: entry.row,
2535
- completedData: stripRuntimeCellMeta(
2536
- completedRowsByKey.get(entry.key) ?? {},
2537
- ),
2538
- sheetContract: input.sheetContract,
2539
- }),
2540
- ...mapRowOutcomeRuntimeFields({ key: entry.key }),
2541
- })),
2542
- tableNamespace: input.tableNamespace,
2543
- };
2544
- }
2545
- input.timings?.push({
2546
- phase: 'cached_fast_path.fallback_incomplete',
2547
- ms: Date.now() - fastPathStartedAt,
2548
- rows: input.rowEntries.length,
2549
- completed: completedRowsByKey.size,
2550
- cached: true,
2551
- });
2336
+ return {
2337
+ inserted: input.inserted,
2338
+ skipped: input.sourceRowsLength - input.rowEntries.length,
2339
+ pendingRows: input.rowEntries.map((entry) => {
2340
+ const row: Record<string, unknown> = {
2341
+ ...sanitizePostgresJsonValue(entry.row),
2342
+ __deeplineRowKey: entry.key,
2343
+ };
2344
+ for (const field of datasetFields) {
2345
+ if (!Object.prototype.hasOwnProperty.call(row, field)) {
2346
+ row[field] = null;
2347
+ }
2348
+ }
2349
+ return row;
2350
+ }),
2351
+ completedRows: [],
2352
+ tableNamespace: input.tableNamespace,
2353
+ };
2552
2354
  }
2553
- const pendingKeySet = new Set(input.pendingKeys);
2554
- const initiallyPendingRows = input.rowEntries.filter((entry) =>
2555
- pendingKeySet.has(entry.key),
2556
- );
2557
- const initiallyCompletedEntries = input.rowEntries.filter(
2558
- (entry) => !pendingKeySet.has(entry.key),
2559
- );
2560
- const initiallyCompletedRows = await readRuntimeRowsByKey(
2355
+
2356
+ const startedAt = Date.now();
2357
+ await markRuntimeRowsPendingForRecompute(client, session, {
2358
+ keys: input.rowEntries.map((entry) => entry.key),
2359
+ runId: input.runId,
2360
+ normalizedPlayName: input.normalizedPlayName,
2361
+ normalizedTableNamespace: input.normalizedTableNamespace,
2362
+ });
2363
+ const persistedPendingRows = await readRuntimeRowsByKey(
2561
2364
  client,
2562
2365
  session,
2563
- initiallyCompletedEntries.map((entry) => entry.key),
2366
+ input.rowEntries.map((entry) => entry.key),
2564
2367
  input.sheetContract,
2565
2368
  );
2566
- const initiallyCompletedRowsByKey = new Map(
2567
- initiallyCompletedRows.map((row) => [row.key, row]),
2568
- );
2569
- const staleKeySet = new Set<string>();
2570
- for (const entry of initiallyCompletedEntries) {
2571
- const completedRow = initiallyCompletedRowsByKey.get(entry.key);
2572
- if (!completedRow) {
2573
- staleKeySet.add(entry.key);
2574
- continue;
2575
- }
2576
- const cellMeta =
2577
- completedRow.data[DEEPLINE_CELL_META_FIELD] &&
2578
- typeof completedRow.data[DEEPLINE_CELL_META_FIELD] === 'object'
2579
- ? (completedRow.data[DEEPLINE_CELL_META_FIELD] as Record<
2580
- string,
2581
- unknown
2582
- >)
2583
- : {};
2584
- for (const field of outputFields) {
2585
- const decision = shouldRecomputeCell({
2586
- hasValue:
2587
- Object.prototype.hasOwnProperty.call(completedRow.data, field) &&
2588
- completedRow.data[field] !== null &&
2589
- completedRow.data[field] !== undefined &&
2590
- !(
2591
- typeof completedRow.data[field] === 'string' &&
2592
- completedRow.data[field].length === 0
2593
- ),
2594
- value: completedRow.data[field],
2595
- meta:
2596
- cellMeta[field] && typeof cellMeta[field] === 'object'
2597
- ? (cellMeta[field] as {
2598
- status?: string;
2599
- completedAt?: number;
2600
- staleAt?: number | null;
2601
- staleAfterSeconds?: number | null;
2602
- })
2603
- : null,
2604
- policy: input.cellPolicies?.[field],
2605
- });
2606
- if (decision.action === 'recompute') {
2607
- staleKeySet.add(entry.key);
2608
- break;
2609
- }
2610
- }
2611
- }
2612
- const pendingRows = [
2613
- ...initiallyPendingRows,
2614
- ...initiallyCompletedEntries.filter((entry) => staleKeySet.has(entry.key)),
2615
- ];
2616
- if (staleKeySet.size > 0) {
2617
- await markRuntimeRowsPendingForCellStaleness(client, session, {
2618
- keys: [...staleKeySet],
2619
- runId: input.runId,
2620
- });
2621
- }
2622
- const completedKeys = initiallyCompletedEntries
2623
- .filter((entry) => !staleKeySet.has(entry.key))
2624
- .map((entry) => entry.key);
2625
- const existingPendingRowsByKey = new Map(
2626
- (
2627
- await readRuntimeRowsByKey(
2628
- client,
2629
- session,
2630
- pendingRows.map((entry) => entry.key),
2631
- input.sheetContract,
2632
- )
2633
- ).map((row) => [row.key, row.data]),
2634
- );
2635
- const completedRowsByKey = new Map(
2636
- (
2637
- await markAndReadRuntimeCompletedRowsCached(client, session, {
2638
- tableNamespace: input.tableNamespace,
2639
- keys: completedKeys,
2640
- runId: input.runId,
2641
- outputFields,
2642
- sheetContract: input.sheetContract,
2643
- })
2644
- ).map((row) => [row.key, row.data]),
2369
+ const persistedRowsByKey = new Map(
2370
+ persistedPendingRows.map((row) => [row.key, row.data]),
2645
2371
  );
2646
- const completedKeySet = new Set(completedKeys);
2647
- const completedRows = input.rowEntries
2648
- .filter((entry) => completedKeySet.has(entry.key))
2649
- .map((entry) => ({
2650
- ...mergeRuntimeCompletedRow({
2651
- inputRow: entry.row,
2652
- completedData: stripRuntimeCellMeta(
2653
- completedRowsByKey.get(entry.key) ?? {},
2654
- ),
2655
- sheetContract: input.sheetContract,
2656
- }),
2657
- ...mapRowOutcomeRuntimeFields({ key: entry.key }),
2658
- }));
2372
+ input.timings?.push({
2373
+ phase: 'mark_rows_pending_for_recompute',
2374
+ ms: Date.now() - startedAt,
2375
+ rows: input.rowEntries.length,
2376
+ });
2659
2377
  return {
2660
2378
  inserted: input.inserted,
2661
- skipped:
2662
- completedKeys.length + (input.sourceRowsLength - input.rowEntries.length),
2663
- pendingRows: pendingRows.map((entry) => ({
2664
- ...mergeRuntimeCompletedRow({
2665
- inputRow: entry.row,
2666
- completedData: existingPendingRowsByKey.get(entry.key) ?? {},
2667
- sheetContract: input.sheetContract,
2668
- }),
2669
- ...mapRowOutcomeRuntimeFields({ key: entry.key }),
2670
- })),
2671
- completedRows,
2379
+ skipped: input.sourceRowsLength - input.rowEntries.length,
2380
+ pendingRows: input.rowEntries.map((entry) => {
2381
+ const merged = { ...entry.row };
2382
+ for (const [field, value] of Object.entries(
2383
+ persistedRowsByKey.get(entry.key) ?? {},
2384
+ )) {
2385
+ if (
2386
+ value !== null ||
2387
+ !Object.prototype.hasOwnProperty.call(merged, field)
2388
+ ) {
2389
+ merged[field] = value;
2390
+ }
2391
+ }
2392
+ return {
2393
+ ...merged,
2394
+ __deeplineRowKey: entry.key,
2395
+ };
2396
+ }),
2397
+ completedRows: [],
2672
2398
  tableNamespace: input.tableNamespace,
2673
2399
  };
2674
2400
  }
2675
2401
 
2676
- async function markRuntimeRowsPendingForCellStaleness(
2402
+ async function markRuntimeRowsPendingForRecompute(
2677
2403
  client: RuntimeQueryClient,
2678
2404
  session: RuntimePostgresSession,
2679
2405
  input: {
2680
2406
  keys: string[];
2681
2407
  runId: string;
2408
+ normalizedPlayName: string;
2409
+ normalizedTableNamespace: string;
2682
2410
  },
2683
2411
  ): Promise<void> {
2684
2412
  if (input.keys.length === 0) return;
2685
2413
  await client.query(
2686
- `UPDATE ${sheetTable(session)} AS target
2414
+ `WITH target_rows AS (
2415
+ SELECT _key, _status
2416
+ FROM ${sheetTable(session)}
2417
+ WHERE _key = ANY($1::text[])
2418
+ FOR UPDATE
2419
+ ),
2420
+ updated AS (
2421
+ UPDATE ${sheetTable(session)} AS target
2687
2422
  SET _status = 'pending',
2688
2423
  _run_id = $2::text,
2689
2424
  _updated_at = now(),
2690
2425
  _version = ${nextRuntimeSheetVersionExpression(session)}
2691
- WHERE target._key = ANY($1::text[])`,
2692
- [input.keys, input.runId],
2426
+ FROM target_rows
2427
+ WHERE target._key = target_rows._key
2428
+ AND (target._run_id IS NULL OR target._run_id <= $2::text)
2429
+ AND (
2430
+ target._status <> 'pending'
2431
+ OR target._run_id IS DISTINCT FROM $2::text
2432
+ )
2433
+ RETURNING target_rows._status AS previous_status
2434
+ ),
2435
+ summary_counts AS (
2436
+ SELECT
2437
+ count(*) FILTER (WHERE previous_status = 'enriched')::int AS completed_to_pending,
2438
+ count(*) FILTER (WHERE previous_status = 'failed')::int AS failed_to_pending,
2439
+ count(*) FILTER (WHERE previous_status = 'running')::int AS running_to_pending
2440
+ FROM updated
2441
+ ),
2442
+ summary_delta AS (
2443
+ INSERT INTO ${summaryTable(session)} AS target (play_name, table_namespace, total, queued, running, completed, failed)
2444
+ SELECT
2445
+ $3::text,
2446
+ $4::text,
2447
+ 0,
2448
+ completed_to_pending + failed_to_pending + running_to_pending,
2449
+ -running_to_pending,
2450
+ -completed_to_pending,
2451
+ -failed_to_pending
2452
+ FROM summary_counts
2453
+ WHERE completed_to_pending > 0 OR failed_to_pending > 0 OR running_to_pending > 0
2454
+ ON CONFLICT (play_name, table_namespace) DO UPDATE SET
2455
+ queued = GREATEST(target.queued + EXCLUDED.queued, 0),
2456
+ running = GREATEST(target.running + EXCLUDED.running, 0),
2457
+ completed = GREATEST(target.completed + EXCLUDED.completed, 0),
2458
+ failed = GREATEST(target.failed + EXCLUDED.failed, 0),
2459
+ total = ${runtimeSummaryTotalSql({
2460
+ currentTotal: 'target.total',
2461
+ totalDelta: 'EXCLUDED.total',
2462
+ queued: 'GREATEST(target.queued + EXCLUDED.queued, 0)',
2463
+ running: 'GREATEST(target.running + EXCLUDED.running, 0)',
2464
+ completed: 'GREATEST(target.completed + EXCLUDED.completed, 0)',
2465
+ failed: 'GREATEST(target.failed + EXCLUDED.failed, 0)',
2466
+ })},
2467
+ _updated_at = now()
2468
+ RETURNING 1
2469
+ )
2470
+ SELECT 1`,
2471
+ [
2472
+ input.keys,
2473
+ input.runId,
2474
+ input.normalizedPlayName,
2475
+ input.normalizedTableNamespace,
2476
+ ],
2693
2477
  );
2694
2478
  }
2695
2479
 
2696
- function stripRuntimeCellMeta(
2697
- data: Record<string, unknown>,
2698
- ): Record<string, unknown> {
2699
- if (!Object.prototype.hasOwnProperty.call(data, DEEPLINE_CELL_META_FIELD)) {
2700
- return data;
2701
- }
2702
- const { [DEEPLINE_CELL_META_FIELD]: _cellMeta, ...publicData } = data;
2703
- void _cellMeta;
2704
- return publicData;
2705
- }
2706
-
2707
2480
  async function getRuntimeWorkReceiptSession(
2708
2481
  context: RuntimeApiContext,
2709
2482
  input: {
@@ -2737,6 +2510,27 @@ async function getRuntimeWorkReceiptSession(
2737
2510
  return session;
2738
2511
  }
2739
2512
 
2513
+ async function getRuntimeWorkReceiptSessionForKeys(
2514
+ context: RuntimeApiContext,
2515
+ input: {
2516
+ playName: string;
2517
+ keys: string[];
2518
+ },
2519
+ ): Promise<RuntimePostgresSession> {
2520
+ const firstKey = input.keys.find((key) => key.trim());
2521
+ if (!firstKey) {
2522
+ throw new Error('Runtime work receipt batch requires at least one key.');
2523
+ }
2524
+ const session = await getRuntimeWorkReceiptSession(context, {
2525
+ playName: input.playName,
2526
+ key: firstKey,
2527
+ });
2528
+ for (const key of input.keys) {
2529
+ validateRuntimeWorkReceiptKeyScope(session, { key });
2530
+ }
2531
+ return session;
2532
+ }
2533
+
2740
2534
  async function readRuntimeWorkReceipt(
2741
2535
  client: RuntimeQueryClient,
2742
2536
  session: RuntimePostgresSession,
@@ -2767,6 +2561,49 @@ export async function getRuntimeWorkReceipt(
2767
2561
  );
2768
2562
  }
2769
2563
 
2564
+ export async function getRuntimeWorkReceipts(
2565
+ context: RuntimeApiContext,
2566
+ input: {
2567
+ playName: string;
2568
+ keys: string[];
2569
+ },
2570
+ ): Promise<WorkReceipt[]> {
2571
+ const keys = [
2572
+ ...new Set(input.keys.map((key) => key.trim()).filter(Boolean)),
2573
+ ];
2574
+ if (keys.length === 0) return [];
2575
+ const session = await getRuntimeWorkReceiptSessionForKeys(context, {
2576
+ playName: input.playName,
2577
+ keys,
2578
+ });
2579
+ return await withRuntimeWorkReceiptClient(
2580
+ context,
2581
+ session,
2582
+ async (client) => {
2583
+ const { rows } = await client.query<Record<string, unknown>>(
2584
+ `
2585
+ WITH input_keys AS (
2586
+ SELECT key_values.key_hex, key_values.ord
2587
+ FROM unnest($1::text[]) WITH ORDINALITY AS key_values(key_hex, ord)
2588
+ )
2589
+ SELECT convert_from(receipts.k, 'UTF8') AS k,
2590
+ receipts.status,
2591
+ receipts.output,
2592
+ receipts.error,
2593
+ receipts.run_id,
2594
+ receipts.updated_at
2595
+ FROM input_keys
2596
+ JOIN ${workReceiptTable(session)} AS receipts
2597
+ ON receipts.k = decode(input_keys.key_hex, 'hex')
2598
+ ORDER BY input_keys.ord
2599
+ `,
2600
+ [keys.map(workReceiptKeyHex)],
2601
+ );
2602
+ return rows.map(mapRuntimeWorkReceiptRow);
2603
+ },
2604
+ );
2605
+ }
2606
+
2770
2607
  export async function claimRuntimeWorkReceipt(
2771
2608
  context: RuntimeApiContext,
2772
2609
  input: {
@@ -2774,6 +2611,7 @@ export async function claimRuntimeWorkReceipt(
2774
2611
  runId: string;
2775
2612
  key: string;
2776
2613
  reclaimRunning?: boolean;
2614
+ forceRefresh?: boolean;
2777
2615
  },
2778
2616
  ): Promise<WorkReceiptClaim> {
2779
2617
  const session = await getRuntimeWorkReceiptSession(context, {
@@ -2784,6 +2622,21 @@ export async function claimRuntimeWorkReceipt(
2784
2622
  context,
2785
2623
  session,
2786
2624
  async (client) => {
2625
+ const claimableStatuses = input.forceRefresh
2626
+ ? [
2627
+ RECEIPT_STATUS_PENDING,
2628
+ RECEIPT_STATUS_FAILED,
2629
+ RECEIPT_STATUS_RUNNING,
2630
+ RECEIPT_STATUS_COMPLETED,
2631
+ RECEIPT_STATUS_SKIPPED,
2632
+ ]
2633
+ : input.reclaimRunning
2634
+ ? [
2635
+ RECEIPT_STATUS_PENDING,
2636
+ RECEIPT_STATUS_FAILED,
2637
+ RECEIPT_STATUS_RUNNING,
2638
+ ]
2639
+ : [RECEIPT_STATUS_PENDING, RECEIPT_STATUS_FAILED];
2787
2640
  const { rows } = await client.query<Record<string, unknown>>(
2788
2641
  `
2789
2642
  WITH claimed AS (
@@ -2795,7 +2648,7 @@ export async function claimRuntimeWorkReceipt(
2795
2648
  run_id = $3,
2796
2649
  error = NULL,
2797
2650
  updated_at = now()
2798
- WHERE ${workReceiptTable(session)}.status IN ($5::smallint, $6::smallint)
2651
+ WHERE ${workReceiptTable(session)}.status = ANY($5::smallint[])
2799
2652
  RETURNING convert_from(k, 'UTF8') AS k, status, output, error, run_id, updated_at
2800
2653
  )
2801
2654
  SELECT k, status, output, error, run_id, updated_at
@@ -2806,8 +2659,7 @@ export async function claimRuntimeWorkReceipt(
2806
2659
  RECEIPT_STATUS_RUNNING,
2807
2660
  input.runId,
2808
2661
  RECEIPT_STATUS_RUNNING,
2809
- RECEIPT_STATUS_PENDING,
2810
- RECEIPT_STATUS_FAILED,
2662
+ claimableStatuses,
2811
2663
  ],
2812
2664
  );
2813
2665
  const claimed = rows[0] ? mapRuntimeWorkReceiptRow(rows[0]) : null;
@@ -2832,6 +2684,132 @@ export async function claimRuntimeWorkReceipt(
2832
2684
  );
2833
2685
  }
2834
2686
 
2687
+ export async function claimRuntimeWorkReceipts(
2688
+ context: RuntimeApiContext,
2689
+ input: {
2690
+ playName: string;
2691
+ runId: string;
2692
+ keys: string[];
2693
+ reclaimRunning?: boolean;
2694
+ forceRefresh?: boolean;
2695
+ },
2696
+ ): Promise<WorkReceiptClaim[]> {
2697
+ const keys = [
2698
+ ...new Set(input.keys.map((key) => key.trim()).filter(Boolean)),
2699
+ ];
2700
+ if (keys.length === 0) return [];
2701
+ const session = await getRuntimeWorkReceiptSessionForKeys(context, {
2702
+ playName: input.playName,
2703
+ keys,
2704
+ });
2705
+ return await withRuntimeWorkReceiptClient(
2706
+ context,
2707
+ session,
2708
+ async (client) => {
2709
+ const keyHexes = keys.map(workReceiptKeyHex);
2710
+ const reclaimStatuses = input.forceRefresh
2711
+ ? [
2712
+ RECEIPT_STATUS_PENDING,
2713
+ RECEIPT_STATUS_FAILED,
2714
+ RECEIPT_STATUS_RUNNING,
2715
+ RECEIPT_STATUS_COMPLETED,
2716
+ RECEIPT_STATUS_SKIPPED,
2717
+ ]
2718
+ : input.reclaimRunning
2719
+ ? [
2720
+ RECEIPT_STATUS_PENDING,
2721
+ RECEIPT_STATUS_FAILED,
2722
+ RECEIPT_STATUS_RUNNING,
2723
+ ]
2724
+ : [RECEIPT_STATUS_PENDING, RECEIPT_STATUS_FAILED];
2725
+ const { rows } = await client.query<Record<string, unknown>>(
2726
+ `
2727
+ WITH input_keys AS (
2728
+ SELECT key_values.key_hex, key_values.ord
2729
+ FROM unnest($1::text[]) WITH ORDINALITY AS key_values(key_hex, ord)
2730
+ ),
2731
+ claimed AS (
2732
+ INSERT INTO ${workReceiptTable(session)} (k, status, run_id, updated_at)
2733
+ SELECT decode(input_keys.key_hex, 'hex'), $2::smallint, $3::text, now()
2734
+ FROM input_keys
2735
+ ON CONFLICT (k) DO UPDATE
2736
+ SET status = $4::smallint,
2737
+ output = NULL,
2738
+ run_id = $3::text,
2739
+ error = NULL,
2740
+ updated_at = now()
2741
+ WHERE ${workReceiptTable(session)}.status = ANY($5::smallint[])
2742
+ RETURNING k, status, output, error, run_id, updated_at
2743
+ ),
2744
+ latest AS (
2745
+ SELECT receipts.k,
2746
+ receipts.status,
2747
+ receipts.output,
2748
+ receipts.error,
2749
+ receipts.run_id,
2750
+ receipts.updated_at,
2751
+ input_keys.ord,
2752
+ false AS claimed
2753
+ FROM input_keys
2754
+ JOIN ${workReceiptTable(session)} AS receipts
2755
+ ON receipts.k = decode(input_keys.key_hex, 'hex')
2756
+ WHERE NOT EXISTS (
2757
+ SELECT 1 FROM claimed WHERE claimed.k = receipts.k
2758
+ )
2759
+ ),
2760
+ returned AS (
2761
+ SELECT claimed.k,
2762
+ claimed.status,
2763
+ claimed.output,
2764
+ claimed.error,
2765
+ claimed.run_id,
2766
+ claimed.updated_at,
2767
+ input_keys.ord,
2768
+ true AS claimed
2769
+ FROM claimed
2770
+ JOIN input_keys ON claimed.k = decode(input_keys.key_hex, 'hex')
2771
+ UNION ALL
2772
+ SELECT k, status, output, error, run_id, updated_at, ord, claimed
2773
+ FROM latest
2774
+ )
2775
+ SELECT convert_from(k, 'UTF8') AS k,
2776
+ status,
2777
+ output,
2778
+ error,
2779
+ run_id,
2780
+ updated_at,
2781
+ claimed
2782
+ FROM returned
2783
+ ORDER BY ord
2784
+ `,
2785
+ [
2786
+ keyHexes,
2787
+ RECEIPT_STATUS_RUNNING,
2788
+ input.runId,
2789
+ RECEIPT_STATUS_RUNNING,
2790
+ reclaimStatuses,
2791
+ ],
2792
+ );
2793
+ return rows.map((row) => {
2794
+ const receipt = mapRuntimeWorkReceiptRow(row);
2795
+ if (row.claimed === true) {
2796
+ return { disposition: 'claimed', receipt } satisfies WorkReceiptClaim;
2797
+ }
2798
+ if (isReusableWorkReceipt(receipt)) {
2799
+ return { disposition: 'reused', receipt } satisfies WorkReceiptClaim;
2800
+ }
2801
+ if (receipt.status === 'running') {
2802
+ return { disposition: 'running', receipt } satisfies WorkReceiptClaim;
2803
+ }
2804
+ if (receipt.status === 'failed') {
2805
+ return { disposition: 'failed', receipt } satisfies WorkReceiptClaim;
2806
+ }
2807
+ return { disposition: 'running', receipt } satisfies WorkReceiptClaim;
2808
+ });
2809
+ },
2810
+ );
2811
+ }
2812
+
2835
2813
  export async function completeRuntimeWorkReceipt(
2836
2814
  context: RuntimeApiContext,
2837
2815
  input: {
@@ -2859,7 +2837,8 @@ export async function completeRuntimeWorkReceipt(
2859
2837
  run_id = $4,
2860
2838
  updated_at = now()
2861
2839
  WHERE k = decode($1, 'hex')
2862
- AND (run_id IS NULL OR run_id <= $4)
2840
+ AND status = $5::smallint
2841
+ AND run_id = $4
2863
2842
  RETURNING convert_from(k, 'UTF8') AS k, status, output, error, run_id, updated_at
2864
2843
  ),
2865
2844
  latest AS (
@@ -2877,6 +2856,7 @@ export async function completeRuntimeWorkReceipt(
2877
2856
  RECEIPT_STATUS_COMPLETED,
2878
2857
  input.output === null ? null : stringifyPostgresJson(input.output),
2879
2858
  input.runId,
2859
+ RECEIPT_STATUS_RUNNING,
2880
2860
  ],
2881
2861
  );
2882
2862
  return rows[0] ? mapRuntimeWorkReceiptRow(rows[0]) : null;
@@ -2884,6 +2864,95 @@ export async function completeRuntimeWorkReceipt(
2884
2864
  );
2885
2865
  }
2886
2866
 
2867
+ export async function completeRuntimeWorkReceipts(
2868
+ context: RuntimeApiContext,
2869
+ input: {
2870
+ playName: string;
2871
+ receipts: Array<{ runId: string; key: string; output: unknown }>;
2872
+ },
2873
+ ): Promise<WorkReceipt[]> {
2874
+ const receipts = input.receipts.filter((receipt) => receipt.key.trim());
2875
+ if (receipts.length === 0) return [];
2876
+ const session = await getRuntimeWorkReceiptSessionForKeys(context, {
2877
+ playName: input.playName,
2878
+ keys: receipts.map((receipt) => receipt.key),
2879
+ });
2880
+ return await withRuntimeWorkReceiptClient(
2881
+ context,
2882
+ session,
2883
+ async (client) => {
2884
+ const { rows } = await client.query<Record<string, unknown>>(
2885
+ `
2886
+ WITH inputs AS (
2887
+ SELECT key_values.key_hex,
2888
+ run_values.run_id,
2889
+ output_values.output,
2890
+ key_values.ord
2891
+ FROM unnest($1::text[]) WITH ORDINALITY AS key_values(key_hex, ord)
2892
+ JOIN unnest($2::text[]) WITH ORDINALITY AS run_values(run_id, ord)
2893
+ ON run_values.ord = key_values.ord
2894
+ JOIN jsonb_array_elements($3::jsonb) WITH ORDINALITY AS output_values(output, ord)
2895
+ ON output_values.ord = key_values.ord
2896
+ ),
2897
+ completed AS (
2898
+ UPDATE ${workReceiptTable(session)} AS target
2899
+ SET status = $4::smallint,
2900
+ output = inputs.output,
2901
+ error = NULL,
2902
+ run_id = inputs.run_id,
2903
+ updated_at = now()
2904
+ FROM inputs
2905
+ WHERE target.k = decode(inputs.key_hex, 'hex')
2906
+ AND target.status = $5::smallint
2907
+ AND target.run_id = inputs.run_id
2908
+ RETURNING target.k, target.status, target.output, target.error, target.run_id, target.updated_at, inputs.ord
2909
+ ),
2910
+ latest AS (
2911
+ SELECT receipts.k,
2912
+ receipts.status,
2913
+ receipts.output,
2914
+ receipts.error,
2915
+ receipts.run_id,
2916
+ receipts.updated_at,
2917
+ inputs.ord
2918
+ FROM inputs
2919
+ JOIN ${workReceiptTable(session)} AS receipts
2920
+ ON receipts.k = decode(inputs.key_hex, 'hex')
2921
+ WHERE NOT EXISTS (
2922
+ SELECT 1 FROM completed WHERE completed.k = receipts.k
2923
+ )
2924
+ ),
2925
+ returned AS (
2926
+ SELECT k, status, output, error, run_id, updated_at, ord FROM completed
2927
+ UNION ALL
2928
+ SELECT k, status, output, error, run_id, updated_at, ord FROM latest
2929
+ )
2930
+ SELECT convert_from(returned.k, 'UTF8') AS k,
2931
+ returned.status,
2932
+ returned.output,
2933
+ returned.error,
2934
+ returned.run_id,
2935
+ returned.updated_at
2936
+ FROM returned
2937
+ ORDER BY returned.ord
2938
+ `,
2939
+ [
2940
+ receipts.map((receipt) => workReceiptKeyHex(receipt.key)),
2941
+ receipts.map((receipt) => receipt.runId),
2942
+ stringifyPostgresJson(
2943
+ receipts.map((receipt) =>
2944
+ receipt.output === null ? null : receipt.output,
2945
+ ),
2946
+ ),
2947
+ RECEIPT_STATUS_COMPLETED,
2948
+ RECEIPT_STATUS_RUNNING,
2949
+ ],
2950
+ );
2951
+ return rows.map(mapRuntimeWorkReceiptRow);
2952
+ },
2953
+ );
2954
+ }
2955
+
2887
2956
  export async function failRuntimeWorkReceipt(
2888
2957
  context: RuntimeApiContext,
2889
2958
  input: {
@@ -2911,9 +2980,8 @@ export async function failRuntimeWorkReceipt(
2911
2980
  run_id = $4,
2912
2981
  updated_at = now()
2913
2982
  WHERE k = decode($1, 'hex')
2914
- AND status <> $5::smallint
2915
- AND status <> $6::smallint
2916
- AND (run_id IS NULL OR run_id <= $4)
2983
+ AND status = $5::smallint
2984
+ AND run_id = $4
2917
2985
  RETURNING convert_from(k, 'UTF8') AS k, status, output, error, run_id, updated_at
2918
2986
  ),
2919
2987
  latest AS (
@@ -2931,8 +2999,7 @@ export async function failRuntimeWorkReceipt(
2931
2999
  RECEIPT_STATUS_FAILED,
2932
3000
  input.error,
2933
3001
  input.runId,
2934
- RECEIPT_STATUS_COMPLETED,
2935
- RECEIPT_STATUS_SKIPPED,
3002
+ RECEIPT_STATUS_RUNNING,
2936
3003
  ],
2937
3004
  );
2938
3005
  return rows[0] ? mapRuntimeWorkReceiptRow(rows[0]) : null;
@@ -2940,6 +3007,91 @@ export async function failRuntimeWorkReceipt(
2940
3007
  );
2941
3008
  }
2942
3009
 
3010
+ export async function failRuntimeWorkReceipts(
3011
+ context: RuntimeApiContext,
3012
+ input: {
3013
+ playName: string;
3014
+ receipts: Array<{ runId: string; key: string; error: string }>;
3015
+ },
3016
+ ): Promise<WorkReceipt[]> {
3017
+ const receipts = input.receipts.filter((receipt) => receipt.key.trim());
3018
+ if (receipts.length === 0) return [];
3019
+ const session = await getRuntimeWorkReceiptSessionForKeys(context, {
3020
+ playName: input.playName,
3021
+ keys: receipts.map((receipt) => receipt.key),
3022
+ });
3023
+ return await withRuntimeWorkReceiptClient(
3024
+ context,
3025
+ session,
3026
+ async (client) => {
3027
+ const { rows } = await client.query<Record<string, unknown>>(
3028
+ `
3029
+ WITH inputs AS (
3030
+ SELECT key_values.key_hex,
3031
+ run_values.run_id,
3032
+ error_values.error,
3033
+ key_values.ord
3034
+ FROM unnest($1::text[]) WITH ORDINALITY AS key_values(key_hex, ord)
3035
+ JOIN unnest($2::text[]) WITH ORDINALITY AS run_values(run_id, ord)
3036
+ ON run_values.ord = key_values.ord
3037
+ JOIN unnest($3::text[]) WITH ORDINALITY AS error_values(error, ord)
3038
+ ON error_values.ord = key_values.ord
3039
+ ),
3040
+ failed AS (
3041
+ UPDATE ${workReceiptTable(session)} AS target
3042
+ SET status = $4::smallint,
3043
+ output = NULL,
3044
+ error = inputs.error,
3045
+ run_id = inputs.run_id,
3046
+ updated_at = now()
3047
+ FROM inputs
3048
+ WHERE target.k = decode(inputs.key_hex, 'hex')
3049
+ AND target.status = $5::smallint
3050
+ AND target.run_id = inputs.run_id
3051
+ RETURNING target.k, target.status, target.output, target.error, target.run_id, target.updated_at, inputs.ord
3052
+ ),
3053
+ latest AS (
3054
+ SELECT receipts.k,
3055
+ receipts.status,
3056
+ receipts.output,
3057
+ receipts.error,
3058
+ receipts.run_id,
3059
+ receipts.updated_at,
3060
+ inputs.ord
3061
+ FROM inputs
3062
+ JOIN ${workReceiptTable(session)} AS receipts
3063
+ ON receipts.k = decode(inputs.key_hex, 'hex')
3064
+ WHERE NOT EXISTS (
3065
+ SELECT 1 FROM failed WHERE failed.k = receipts.k
3066
+ )
3067
+ ),
3068
+ returned AS (
3069
+ SELECT k, status, output, error, run_id, updated_at, ord FROM failed
3070
+ UNION ALL
3071
+ SELECT k, status, output, error, run_id, updated_at, ord FROM latest
3072
+ )
3073
+ SELECT convert_from(returned.k, 'UTF8') AS k,
3074
+ returned.status,
3075
+ returned.output,
3076
+ returned.error,
3077
+ returned.run_id,
3078
+ returned.updated_at
3079
+ FROM returned
3080
+ ORDER BY returned.ord
3081
+ `,
3082
+ [
3083
+ receipts.map((receipt) => workReceiptKeyHex(receipt.key)),
3084
+ receipts.map((receipt) => receipt.runId),
3085
+ receipts.map((receipt) => receipt.error),
3086
+ RECEIPT_STATUS_FAILED,
3087
+ RECEIPT_STATUS_RUNNING,
3088
+ ],
3089
+ );
3090
+ return rows.map(mapRuntimeWorkReceiptRow);
3091
+ },
3092
+ );
3093
+ }
3094
+
2943
3095
  export async function skipRuntimeWorkReceipt(
2944
3096
  context: RuntimeApiContext,
2945
3097
  input: {
@@ -2967,7 +3119,8 @@ export async function skipRuntimeWorkReceipt(
2967
3119
  run_id = $4,
2968
3120
  updated_at = now()
2969
3121
  WHERE k = decode($1, 'hex')
2970
- AND (run_id IS NULL OR run_id <= $4)
3122
+ AND status = $5::smallint
3123
+ AND run_id = $4
2971
3124
  RETURNING convert_from(k, 'UTF8') AS k, status, output, error, run_id, updated_at
2972
3125
  ),
2973
3126
  latest AS (
@@ -2985,6 +3138,7 @@ export async function skipRuntimeWorkReceipt(
2985
3138
  RECEIPT_STATUS_SKIPPED,
2986
3139
  input.output === null ? null : stringifyPostgresJson(input.output),
2987
3140
  input.runId,
3141
+ RECEIPT_STATUS_RUNNING,
2988
3142
  ],
2989
3143
  );
2990
3144
  return rows[0] ? mapRuntimeWorkReceiptRow(rows[0]) : null;
@@ -3002,7 +3156,6 @@ export async function startRuntimeSheetDataset(
3002
3156
  rows: Record<string, unknown>[];
3003
3157
  runId: string;
3004
3158
  inputOffset?: number;
3005
- cellPolicies?: CellStalenessPolicyByField;
3006
3159
  },
3007
3160
  ): Promise<PrepareRuntimeSheetResult> {
3008
3161
  const totalStartedAt = Date.now();
@@ -3115,25 +3268,15 @@ export async function startRuntimeSheetDataset(
3115
3268
  },
3116
3269
  async (client) => {
3117
3270
  const prepareStartedAt = Date.now();
3118
- const hasCellPolicies = cellPolicyFields(input.cellPolicies).length > 0;
3119
- const cachedProbe = hasCellPolicies
3120
- ? { prepared: null }
3121
- : await tryPrepareRuntimeSheetDatasetRowsCachedOnly(client, session, {
3122
- chunks,
3123
- outputPhysicalColumns,
3124
- timings,
3125
- });
3126
- const prepared =
3127
- cachedProbe.prepared ??
3128
- (await prepareRuntimeSheetDatasetRows(client, session, {
3129
- chunks,
3130
- runId: input.runId,
3131
- normalizedPlayName,
3132
- normalizedTableNamespace,
3133
- physicalInsertColumnsSql,
3134
- physicalInsertValuesSql,
3135
- outputPhysicalColumns,
3136
- }));
3271
+ const prepared = await prepareRuntimeSheetDatasetRows(client, session, {
3272
+ chunks,
3273
+ runId: input.runId,
3274
+ normalizedPlayName,
3275
+ normalizedTableNamespace,
3276
+ physicalInsertColumnsSql,
3277
+ physicalInsertValuesSql,
3278
+ outputPhysicalColumns,
3279
+ });
3137
3280
  timings.push({
3138
3281
  phase: 'prepare_rows_sql',
3139
3282
  ms: Date.now() - prepareStartedAt,
@@ -3148,8 +3291,9 @@ export async function startRuntimeSheetDataset(
3148
3291
  sourceRowsLength: input.rows.length,
3149
3292
  rowEntries,
3150
3293
  sheetContract: input.sheetContract,
3294
+ normalizedPlayName,
3295
+ normalizedTableNamespace,
3151
3296
  runId: input.runId,
3152
- cellPolicies: input.cellPolicies,
3153
3297
  timings,
3154
3298
  ...prepared,
3155
3299
  });
@@ -3178,18 +3322,217 @@ export async function startRuntimeSheetDataset(
3178
3322
  return { ...result, timings };
3179
3323
  }
3180
3324
 
3325
+ type CompleteRuntimeMapRowChunksInput = {
3326
+ chunks: RuntimePreparedCompletedRow[][];
3327
+ physicalUpdateSetSql: string;
3328
+ physicalColumnProjections: PhysicalSheetColumnProjection[];
3329
+ runId: string;
3330
+ normalizedPlayName: string;
3331
+ normalizedTableNamespace: string;
3332
+ outputFields: string[];
3333
+ };
3334
+
3181
3335
  async function completeRuntimeMapRowChunks(
3182
3336
  client: RuntimeQueryClient,
3183
3337
  session: RuntimePostgresSession,
3184
- input: {
3185
- chunks: RuntimePreparedCompletedRow[][];
3186
- physicalUpdateSetSql: string;
3187
- physicalColumnProjections: PhysicalSheetColumnProjection[];
3188
- runId: string;
3189
- normalizedPlayName: string;
3190
- normalizedTableNamespace: string;
3191
- outputFields: string[];
3192
- },
3338
+ input: CompleteRuntimeMapRowChunksInput,
3339
+ ): Promise<{ updated: number }> {
3340
+ let updated = 0;
3341
+ for (const chunk of input.chunks) {
3342
+ const chunkKeys = chunk.map((row) => row.key);
3343
+ const chunkInputIndexes = chunk.map((row) => row.input_index);
3344
+ const chunkDataPatches = chunk.map((row) =>
3345
+ stringifyPostgresJson(row.data_patch),
3346
+ );
3347
+ const chunkCellMetaPatches = chunk.map((row) =>
3348
+ stringifyPostgresJson(row.cell_meta_patch),
3349
+ );
3350
+ const targetChangedPatchedCellSql = changedPatchedCellSql(
3351
+ 'target',
3352
+ 'updates.data_patch',
3353
+ input.physicalColumnProjections,
3354
+ );
3355
+ const { rows } = await client.query<{
3356
+ updated: number;
3357
+ matched_keys: string[];
3358
+ }>(
3359
+ `WITH updates AS (
3360
+ SELECT key_values._key,
3361
+ input_index_values.input_index,
3362
+ data_values.data_patch,
3363
+ cell_meta_values.cell_meta_patch
3364
+ FROM unnest($1::text[]) WITH ORDINALITY AS key_values(_key, ord)
3365
+ JOIN unnest($2::bigint[]) WITH ORDINALITY AS input_index_values(input_index, ord)
3366
+ ON input_index_values.ord = key_values.ord
3367
+ JOIN unnest($3::jsonb[]) WITH ORDINALITY AS data_values(data_patch, ord)
3368
+ ON data_values.ord = key_values.ord
3369
+ JOIN unnest($4::jsonb[]) WITH ORDINALITY AS cell_meta_values(cell_meta_patch, ord)
3370
+ ON cell_meta_values.ord = key_values.ord
3371
+ ),
3372
+ matched_updates AS (
3373
+ SELECT target._key AS matched_key,
3374
+ target._status AS prev_status,
3375
+ target._cell_meta AS prev_cell_meta,
3376
+ updates.input_index,
3377
+ updates.data_patch,
3378
+ updates.cell_meta_patch
3379
+ FROM updates
3380
+ JOIN ${sheetTable(session)} AS target
3381
+ ON target._key = updates._key
3382
+ ),
3383
+ applied_rows AS (
3384
+ UPDATE ${sheetTable(session)} AS target
3385
+ SET _status = 'enriched',
3386
+ _run_id = $5::text,
3387
+ _error = NULL,
3388
+ _updated_at = now(),
3389
+ _version = ${nextRuntimeSheetVersionExpression(session)},
3390
+ _cell_meta = ${mergeRuntimeCellMetaPatchSql('target._cell_meta', 'updates.cell_meta_patch')}${input.physicalUpdateSetSql}
3391
+ FROM matched_updates AS updates
3392
+ WHERE target._key = updates.matched_key
3393
+ AND (target._run_id IS NULL OR target._run_id <= $5::text)
3394
+ AND (
3395
+ target._status <> 'enriched'
3396
+ OR (${targetChangedPatchedCellSql})
3397
+ OR EXISTS (
3398
+ SELECT 1
3399
+ FROM unnest($8::text[]) AS field_values(field)
3400
+ WHERE coalesce(target._cell_meta -> field_values.field ->> 'status', '') <> 'completed'
3401
+ )
3402
+ )
3403
+ RETURNING target._key, updates.prev_status, updates.prev_cell_meta
3404
+ ),
3405
+ applied_count AS (
3406
+ SELECT count(*)::bigint AS c,
3407
+ count(*) FILTER (WHERE prev_status = 'failed')::bigint AS from_failed,
3408
+ count(*) FILTER (WHERE prev_status = 'running')::bigint AS from_running,
3409
+ count(*) FILTER (WHERE prev_status <> 'enriched')::bigint AS newly_completed
3410
+ FROM applied_rows
3411
+ ),
3412
+ summary_counts AS (
3413
+ SELECT newly_completed,
3414
+ from_failed,
3415
+ from_running,
3416
+ GREATEST(newly_completed - from_failed - from_running, 0)::bigint AS from_queued
3417
+ FROM applied_count
3418
+ ),
3419
+ summary_delta AS (
3420
+ INSERT INTO ${summaryTable(session)} AS target (
3421
+ play_name,
3422
+ table_namespace,
3423
+ total,
3424
+ queued,
3425
+ running,
3426
+ completed,
3427
+ failed
3428
+ )
3429
+ SELECT $6::text,
3430
+ $7::text,
3431
+ 0,
3432
+ (-from_queued)::int,
3433
+ (-from_running)::int,
3434
+ newly_completed::int,
3435
+ (-from_failed)::int
3436
+ FROM summary_counts
3437
+ WHERE newly_completed > 0 OR from_failed > 0
3438
+ ON CONFLICT (play_name, table_namespace) DO UPDATE SET
3439
+ queued = GREATEST(target.queued + EXCLUDED.queued, 0),
3440
+ running = GREATEST(target.running + EXCLUDED.running, 0),
3441
+ completed = GREATEST(target.completed + EXCLUDED.completed, 0),
3442
+ failed = GREATEST(target.failed + EXCLUDED.failed, 0),
3443
+ total = ${runtimeSummaryTotalSql({
3444
+ currentTotal: 'target.total',
3445
+ totalDelta: 'EXCLUDED.total',
3446
+ queued: 'GREATEST(target.queued + EXCLUDED.queued, 0)',
3447
+ running: 'GREATEST(target.running + EXCLUDED.running, 0)',
3448
+ completed: 'GREATEST(target.completed + EXCLUDED.completed, 0)',
3449
+ failed: 'GREATEST(target.failed + EXCLUDED.failed, 0)',
3450
+ })},
3451
+ _updated_at = now()
3452
+ RETURNING 1
3453
+ ),
3454
+ completed_cell_delta AS (
3455
+ SELECT field_values.field, count(*)::bigint AS c
3456
+ FROM applied_rows
3457
+ JOIN unnest($8::text[]) AS field_values(field)
3458
+ ON applied_rows.prev_status <> 'enriched'
3459
+ OR coalesce(applied_rows.prev_cell_meta -> field_values.field ->> 'status', '') <> 'completed'
3460
+ GROUP BY field_values.field
3461
+ ),
3462
+ prev_failed_cells AS (
3463
+ SELECT field_values.field, count(*)::bigint AS c
3464
+ FROM applied_rows
3465
+ JOIN unnest($8::text[]) AS field_values(field)
3466
+ ON coalesce(applied_rows.prev_cell_meta -> field_values.field ->> 'status', '') = 'failed'
3467
+ GROUP BY field_values.field
3468
+ ),
3469
+ column_delta AS (
3470
+ INSERT INTO ${columnSummaryTable(session)} AS target (
3471
+ play_name,
3472
+ table_namespace,
3473
+ field,
3474
+ completed,
3475
+ failed
3476
+ )
3477
+ SELECT $6::text,
3478
+ $7::text,
3479
+ coalesce(completed_cell_delta.field, prev_failed_cells.field),
3480
+ coalesce(completed_cell_delta.c, 0)::int,
3481
+ (-coalesce(prev_failed_cells.c, 0))::int
3482
+ FROM completed_cell_delta
3483
+ FULL JOIN prev_failed_cells
3484
+ ON prev_failed_cells.field = completed_cell_delta.field
3485
+ WHERE coalesce(completed_cell_delta.c, 0) > 0
3486
+ OR coalesce(prev_failed_cells.c, 0) > 0
3487
+ ON CONFLICT (play_name, table_namespace, field) DO UPDATE SET
3488
+ completed = GREATEST(target.completed + EXCLUDED.completed, 0),
3489
+ failed = GREATEST(target.failed + EXCLUDED.failed, 0),
3490
+ _updated_at = now()
3491
+ RETURNING 1
3492
+ )
3493
+ SELECT
3494
+ (SELECT count(*)::int FROM applied_rows) AS updated,
3495
+ coalesce((SELECT array_agg(matched_key) FROM matched_updates), '{}'::text[]) AS matched_keys,
3496
+ (SELECT count(*)::int FROM summary_delta) AS summary_delta_count,
3497
+ (SELECT count(*)::int FROM column_delta) AS column_delta_count`,
3498
+ [
3499
+ chunkKeys,
3500
+ chunkInputIndexes,
3501
+ chunkDataPatches,
3502
+ chunkCellMetaPatches,
3503
+ input.runId,
3504
+ input.normalizedPlayName,
3505
+ input.normalizedTableNamespace,
3506
+ [...new Set(input.outputFields)],
3507
+ ],
3508
+ );
3509
+ updated += Number(rows[0]?.updated ?? 0);
3510
+
3511
+ const matchedKeys = new Set(rows[0]?.matched_keys ?? []);
3512
+ if (matchedKeys.size === chunk.length) {
3513
+ continue;
3514
+ }
3515
+
3516
+ const repairChunk = chunk.filter(
3517
+ (row) => !matchedKeys.has(row.key) && row.input_index !== null,
3518
+ );
3519
+ if (repairChunk.length === 0) {
3520
+ continue;
3521
+ }
3522
+ const repaired = await completeRuntimeMapRowChunksWithInputIndexRepair(
3523
+ client,
3524
+ session,
3525
+ { ...input, chunks: [repairChunk] },
3526
+ );
3527
+ updated += repaired.updated;
3528
+ }
3529
+ return { updated };
3530
+ }
3531
+
3532
+ async function completeRuntimeMapRowChunksWithInputIndexRepair(
3533
+ client: RuntimeQueryClient,
3534
+ session: RuntimePostgresSession,
3535
+ input: CompleteRuntimeMapRowChunksInput,
3193
3536
  ): Promise<{ updated: number }> {
3194
3537
  let updated = 0;
3195
3538
  for (const chunk of input.chunks) {
@@ -3261,9 +3604,17 @@ async function completeRuntimeMapRowChunks(
3261
3604
  applied_count AS (
3262
3605
  SELECT count(*)::bigint AS c,
3263
3606
  count(*) FILTER (WHERE prev_status = 'failed')::bigint AS from_failed,
3607
+ count(*) FILTER (WHERE prev_status = 'running')::bigint AS from_running,
3264
3608
  count(*) FILTER (WHERE prev_status <> 'enriched')::bigint AS newly_completed
3265
3609
  FROM applied_rows
3266
3610
  ),
3611
+ summary_counts AS (
3612
+ SELECT newly_completed,
3613
+ from_failed,
3614
+ from_running,
3615
+ GREATEST(newly_completed - from_failed - from_running, 0)::bigint AS from_queued
3616
+ FROM applied_count
3617
+ ),
3267
3618
  summary_delta AS (
3268
3619
  INSERT INTO ${summaryTable(session)} AS target (
3269
3620
  play_name,
@@ -3274,13 +3625,28 @@ async function completeRuntimeMapRowChunks(
3274
3625
  completed,
3275
3626
  failed
3276
3627
  )
3277
- SELECT $6::text, $7::text, 0, 0, 0, newly_completed::int, (-from_failed)::int
3278
- FROM applied_count
3628
+ SELECT $6::text,
3629
+ $7::text,
3630
+ 0,
3631
+ (-from_queued)::int,
3632
+ (-from_running)::int,
3633
+ newly_completed::int,
3634
+ (-from_failed)::int
3635
+ FROM summary_counts
3279
3636
  WHERE newly_completed > 0 OR from_failed > 0
3280
3637
  ON CONFLICT (play_name, table_namespace) DO UPDATE SET
3281
- queued = GREATEST(target.queued - (EXCLUDED.completed + EXCLUDED.failed), 0),
3638
+ queued = GREATEST(target.queued + EXCLUDED.queued, 0),
3639
+ running = GREATEST(target.running + EXCLUDED.running, 0),
3282
3640
  completed = GREATEST(target.completed + EXCLUDED.completed, 0),
3283
3641
  failed = GREATEST(target.failed + EXCLUDED.failed, 0),
3642
+ total = ${runtimeSummaryTotalSql({
3643
+ currentTotal: 'target.total',
3644
+ totalDelta: 'EXCLUDED.total',
3645
+ queued: 'GREATEST(target.queued + EXCLUDED.queued, 0)',
3646
+ running: 'GREATEST(target.running + EXCLUDED.running, 0)',
3647
+ completed: 'GREATEST(target.completed + EXCLUDED.completed, 0)',
3648
+ failed: 'GREATEST(target.failed + EXCLUDED.failed, 0)',
3649
+ })},
3284
3650
  _updated_at = now()
3285
3651
  RETURNING 1
3286
3652
  ),
@@ -3441,9 +3807,16 @@ async function failRuntimeMapRowChunks(
3441
3807
  ),
3442
3808
  applied_count AS (
3443
3809
  SELECT count(*)::bigint AS c,
3444
- count(*) FILTER (WHERE prev_status = 'failed')::bigint AS already_failed
3810
+ count(*) FILTER (WHERE prev_status = 'failed')::bigint AS already_failed,
3811
+ count(*) FILTER (WHERE prev_status = 'running')::bigint AS from_running
3445
3812
  FROM applied_rows
3446
3813
  ),
3814
+ summary_counts AS (
3815
+ SELECT (c - already_failed)::bigint AS newly_failed,
3816
+ from_running,
3817
+ GREATEST(c - already_failed - from_running, 0)::bigint AS from_queued
3818
+ FROM applied_count
3819
+ ),
3447
3820
  summary_delta AS (
3448
3821
  INSERT INTO ${summaryTable(session)} AS target (
3449
3822
  play_name,
@@ -3454,12 +3827,27 @@ async function failRuntimeMapRowChunks(
3454
3827
  completed,
3455
3828
  failed
3456
3829
  )
3457
- SELECT $7::text, $8::text, 0, 0, 0, 0, (c - already_failed)::int
3458
- FROM applied_count
3459
- WHERE c > 0
3830
+ SELECT $7::text,
3831
+ $8::text,
3832
+ 0,
3833
+ (-from_queued)::int,
3834
+ (-from_running)::int,
3835
+ 0,
3836
+ newly_failed::int
3837
+ FROM summary_counts
3838
+ WHERE newly_failed > 0
3460
3839
  ON CONFLICT (play_name, table_namespace) DO UPDATE SET
3461
- queued = GREATEST(target.queued - EXCLUDED.failed, 0),
3840
+ queued = GREATEST(target.queued + EXCLUDED.queued, 0),
3841
+ running = GREATEST(target.running + EXCLUDED.running, 0),
3462
3842
  failed = GREATEST(target.failed + EXCLUDED.failed, 0),
3843
+ total = ${runtimeSummaryTotalSql({
3844
+ currentTotal: 'target.total',
3845
+ totalDelta: 'EXCLUDED.total',
3846
+ queued: 'GREATEST(target.queued + EXCLUDED.queued, 0)',
3847
+ running: 'GREATEST(target.running + EXCLUDED.running, 0)',
3848
+ completed: 'target.completed',
3849
+ failed: 'GREATEST(target.failed + EXCLUDED.failed, 0)',
3850
+ })},
3463
3851
  _updated_at = now()
3464
3852
  RETURNING 1
3465
3853
  ),