@elaraai/e3-core 1.0.4 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/README.md +3 -2
  2. package/dist/src/dataflow/orchestrator/LocalOrchestrator.d.ts.map +1 -1
  3. package/dist/src/dataflow/orchestrator/LocalOrchestrator.js +65 -6
  4. package/dist/src/dataflow/orchestrator/LocalOrchestrator.js.map +1 -1
  5. package/dist/src/dataflow/steps.d.ts.map +1 -1
  6. package/dist/src/dataflow/steps.js +2 -3
  7. package/dist/src/dataflow/steps.js.map +1 -1
  8. package/dist/src/dataflow/steps.spec.js +1 -0
  9. package/dist/src/dataflow/steps.spec.js.map +1 -1
  10. package/dist/src/dataflow-orchestration.spec.js +399 -0
  11. package/dist/src/dataflow-orchestration.spec.js.map +1 -1
  12. package/dist/src/dataflow.d.ts.map +1 -1
  13. package/dist/src/dataflow.js +2 -3
  14. package/dist/src/dataflow.js.map +1 -1
  15. package/dist/src/dataflow.spec.js +2 -152
  16. package/dist/src/dataflow.spec.js.map +1 -1
  17. package/dist/src/dataset-refs.spec.d.ts +6 -0
  18. package/dist/src/dataset-refs.spec.d.ts.map +1 -0
  19. package/dist/src/dataset-refs.spec.js +107 -0
  20. package/dist/src/dataset-refs.spec.js.map +1 -0
  21. package/dist/src/execution/LocalTaskRunner.d.ts +3 -0
  22. package/dist/src/execution/LocalTaskRunner.d.ts.map +1 -1
  23. package/dist/src/execution/LocalTaskRunner.js +81 -125
  24. package/dist/src/execution/LocalTaskRunner.js.map +1 -1
  25. package/dist/src/execution/LocalTaskRunner.spec.d.ts +6 -0
  26. package/dist/src/execution/LocalTaskRunner.spec.d.ts.map +1 -0
  27. package/dist/src/execution/LocalTaskRunner.spec.js +52 -0
  28. package/dist/src/execution/LocalTaskRunner.spec.js.map +1 -0
  29. package/dist/src/execution/MockTaskRunner.d.ts +8 -0
  30. package/dist/src/execution/MockTaskRunner.d.ts.map +1 -1
  31. package/dist/src/execution/MockTaskRunner.js +21 -0
  32. package/dist/src/execution/MockTaskRunner.js.map +1 -1
  33. package/dist/src/execution/index.d.ts +2 -0
  34. package/dist/src/execution/index.d.ts.map +1 -1
  35. package/dist/src/execution/index.js +4 -0
  36. package/dist/src/execution/index.js.map +1 -1
  37. package/dist/src/execution/interfaces.d.ts +11 -0
  38. package/dist/src/execution/interfaces.d.ts.map +1 -1
  39. package/dist/src/execution/processExec.d.ts +108 -0
  40. package/dist/src/execution/processExec.d.ts.map +1 -0
  41. package/dist/src/execution/processExec.js +271 -0
  42. package/dist/src/execution/processExec.js.map +1 -0
  43. package/dist/src/execution/runDetached.d.ts +83 -0
  44. package/dist/src/execution/runDetached.d.ts.map +1 -0
  45. package/dist/src/execution/runDetached.js +90 -0
  46. package/dist/src/execution/runDetached.js.map +1 -0
  47. package/dist/src/execution/runDetached.spec.d.ts +6 -0
  48. package/dist/src/execution/runDetached.spec.d.ts.map +1 -0
  49. package/dist/src/execution/runDetached.spec.js +193 -0
  50. package/dist/src/execution/runDetached.spec.js.map +1 -0
  51. package/dist/src/gc.spec.js +36 -3
  52. package/dist/src/gc.spec.js.map +1 -1
  53. package/dist/src/objects.spec.js +3 -1
  54. package/dist/src/objects.spec.js.map +1 -1
  55. package/dist/src/packages.d.ts.map +1 -1
  56. package/dist/src/packages.js +13 -5
  57. package/dist/src/packages.js.map +1 -1
  58. package/dist/src/storage/in-memory/InMemoryStorage.d.ts +4 -0
  59. package/dist/src/storage/in-memory/InMemoryStorage.d.ts.map +1 -1
  60. package/dist/src/storage/in-memory/InMemoryStorage.js +9 -0
  61. package/dist/src/storage/in-memory/InMemoryStorage.js.map +1 -1
  62. package/dist/src/storage/interfaces.d.ts +19 -0
  63. package/dist/src/storage/interfaces.d.ts.map +1 -1
  64. package/dist/src/storage/local/LocalDatasetRefStore.d.ts.map +1 -1
  65. package/dist/src/storage/local/LocalDatasetRefStore.js +41 -4
  66. package/dist/src/storage/local/LocalDatasetRefStore.js.map +1 -1
  67. package/dist/src/storage/local/LocalLockService.spec.js +3 -1
  68. package/dist/src/storage/local/LocalLockService.spec.js.map +1 -1
  69. package/dist/src/storage/local/LocalRefStore.d.ts +4 -0
  70. package/dist/src/storage/local/LocalRefStore.d.ts.map +1 -1
  71. package/dist/src/storage/local/LocalRefStore.js +11 -0
  72. package/dist/src/storage/local/LocalRefStore.js.map +1 -1
  73. package/dist/src/storage/local/gc.d.ts.map +1 -1
  74. package/dist/src/storage/local/gc.js +22 -0
  75. package/dist/src/storage/local/gc.js.map +1 -1
  76. package/dist/src/tasks.d.ts.map +1 -1
  77. package/dist/src/tasks.js +2 -3
  78. package/dist/src/tasks.js.map +1 -1
  79. package/dist/src/trees.d.ts.map +1 -1
  80. package/dist/src/trees.js +3 -5
  81. package/dist/src/trees.js.map +1 -1
  82. package/dist/src/workspaceStatus.d.ts.map +1 -1
  83. package/dist/src/workspaceStatus.js +16 -11
  84. package/dist/src/workspaceStatus.js.map +1 -1
  85. package/dist/src/workspaceStatus.spec.d.ts +6 -0
  86. package/dist/src/workspaceStatus.spec.d.ts.map +1 -0
  87. package/dist/src/workspaceStatus.spec.js +156 -0
  88. package/dist/src/workspaceStatus.spec.js.map +1 -0
  89. package/dist/src/workspaces.d.ts.map +1 -1
  90. package/dist/src/workspaces.js +15 -4
  91. package/dist/src/workspaces.js.map +1 -1
  92. package/package.json +6 -4
@@ -89,6 +89,7 @@ describe('dataflow orchestration with MockTaskRunner', () => {
89
89
  refs: new Map(),
90
90
  },
91
91
  tasks: tasksMap,
92
+ functions: new Map(),
92
93
  };
93
94
  const pkgHash = await objectWrite(repoPath, pkgEncoder(pkgObj));
94
95
  const pkgDir = join(repoPath, 'packages', 'test');
@@ -815,6 +816,404 @@ describe('dataflow orchestration with MockTaskRunner', () => {
815
816
  assert.strictEqual(result.reexecuted, 0);
816
817
  });
817
818
  });
819
+ describe('diamond interleavings (deterministic)', () => {
820
+ // The classic dataflow hazards — join synchronisation, mid-flight root
821
+ // changes, mixed-version joins, fan-in failure — tested with NO sleeps:
822
+ // MockTaskRunner task bodies are promise-gated / perform their mutation
823
+ // synchronously inside a task execution, so every interleaving is a
824
+ // fixed, repeatable schedule (CI-speed independent).
825
+ /** Build the diamond: input → left, input → right, (left,right) → merge. */
826
+ async function createDiamond() {
827
+ const structure = {
828
+ type: 'struct',
829
+ value: new Map([
830
+ ['input', { type: 'value', value: { type: StringType, writable: true } }],
831
+ ['left_out', { type: 'value', value: { type: StringType, writable: true } }],
832
+ ['right_out', { type: 'value', value: { type: StringType, writable: true } }],
833
+ ['merge_out', { type: 'value', value: { type: StringType, writable: true } }],
834
+ ]),
835
+ };
836
+ const inputPath = [variant('field', 'input')];
837
+ const leftPath = [variant('field', 'left_out')];
838
+ const rightPath = [variant('field', 'right_out')];
839
+ const mergePath = [variant('field', 'merge_out')];
840
+ const taskHashes = await createPackageWithTasks(testRepo, [
841
+ { name: 'left', command: ['echo'], inputs: [inputPath], output: leftPath },
842
+ { name: 'right', command: ['echo'], inputs: [inputPath], output: rightPath },
843
+ { name: 'merge', command: ['echo'], inputs: [leftPath, rightPath], output: mergePath },
844
+ ], structure);
845
+ await workspaceDeploy(storage, testRepo, 'test-ws', 'test', '1.0.0');
846
+ await workspaceSetDataset(storage, testRepo, 'test-ws', inputPath, 'v1', StringType);
847
+ return { taskHashes, inputPath };
848
+ }
849
+ /** Update the diamond's root input ref to a fresh value (mid-flight mutation). */
850
+ async function mutateInput(value) {
851
+ const newHash = await datasetWrite(storage, testRepo, value, StringType);
852
+ const ref = variant('value', { hash: newHash, versions: new Map() });
853
+ await storage.datasets.write(testRepo, 'test-ws', 'input', ref);
854
+ }
855
+ it('merge waits for BOTH branches even when one is held in-flight', async () => {
856
+ const { taskHashes } = await createDiamond();
857
+ const leftHash = taskHashes.get('left');
858
+ const rightHash = taskHashes.get('right');
859
+ const mergeHash = taskHashes.get('merge');
860
+ // Hold `left` open until `right` has fully completed — the exact
861
+ // schedule where a premature join would fire with a missing branch.
862
+ let releaseLeft;
863
+ const leftGate = new Promise((res) => { releaseLeft = res; });
864
+ let rightCompleted = false;
865
+ let mergeStarted = false;
866
+ mockRunner.setResult(leftHash, async () => {
867
+ await leftGate;
868
+ return { state: 'success', cached: false, outputHash: 'left-v1' };
869
+ });
870
+ mockRunner.setResult(rightHash, async () => {
871
+ rightCompleted = true;
872
+ releaseLeft();
873
+ return { state: 'success', cached: false, outputHash: 'right-v1' };
874
+ });
875
+ mockRunner.setResult(mergeHash, async (inputHashes) => {
876
+ mergeStarted = true;
877
+ // The join must see both branch outputs, never a partial set
878
+ assert.deepStrictEqual([...inputHashes].sort(), ['left-v1', 'right-v1']);
879
+ assert.strictEqual(rightCompleted, true, 'merge started before right completed');
880
+ return { state: 'success', cached: false, outputHash: 'merge-v1' };
881
+ });
882
+ const result = await dataflowExecute(storage, testRepo, 'test-ws', {
883
+ runner: mockRunner,
884
+ concurrency: 4,
885
+ });
886
+ assert.strictEqual(result.success, true);
887
+ assert.strictEqual(mergeStarted, true);
888
+ assert.strictEqual(result.executed, 3);
889
+ });
890
+ it('never joins mixed versions when the root changes mid-flight', async () => {
891
+ // The version-vector hazard: input changes while the branches run.
892
+ // A broken engine merges left@v1 with right@v2 (or vice versa); a
893
+ // correct one re-executes both branches and joins a matching pair.
894
+ const { taskHashes } = await createDiamond();
895
+ const leftHash = taskHashes.get('left');
896
+ const rightHash = taskHashes.get('right');
897
+ const mergeHash = taskHashes.get('merge');
898
+ let leftCalls = 0;
899
+ let rightCalls = 0;
900
+ const mergeCalls = [];
901
+ mockRunner.setResult(leftHash, async () => {
902
+ leftCalls++;
903
+ if (leftCalls === 1) {
904
+ // Root input changes WHILE left is executing — deterministic,
905
+ // no sleeps: the mutation happens inside the task body.
906
+ await mutateInput('v2');
907
+ }
908
+ return { state: 'success', cached: false, outputHash: `left-v${leftCalls}` };
909
+ });
910
+ mockRunner.setResult(rightHash, async () => {
911
+ rightCalls++;
912
+ return { state: 'success', cached: false, outputHash: `right-v${rightCalls}` };
913
+ });
914
+ mockRunner.setResult(mergeHash, async (inputHashes) => {
915
+ mergeCalls.push([...inputHashes]);
916
+ return { state: 'success', cached: false, outputHash: `merge-v${mergeCalls.length}` };
917
+ });
918
+ const result = await dataflowExecute(storage, testRepo, 'test-ws', {
919
+ runner: mockRunner,
920
+ concurrency: 4,
921
+ });
922
+ assert.strictEqual(result.success, true);
923
+ // Both branches re-ran after the root change
924
+ assert.strictEqual(leftCalls, 2, 'left should re-execute after root change');
925
+ assert.strictEqual(rightCalls, 2, 'right should re-execute after root change');
926
+ assert.ok(result.reexecuted >= 2, `expected >=2 re-executions, got ${result.reexecuted}`);
927
+ // THE invariant: every merge call joined a matching version pair
928
+ assert.ok(mergeCalls.length >= 1, 'merge never ran');
929
+ for (const inputs of mergeCalls) {
930
+ const versions = new Set(inputs.map((h) => h.split('-v')[1]));
931
+ assert.strictEqual(versions.size, 1, `merge joined mixed versions: ${inputs.join(', ')}`);
932
+ }
933
+ // And the final join used the post-change branch outputs
934
+ const final = mergeCalls[mergeCalls.length - 1];
935
+ assert.deepStrictEqual([...final].sort(), ['left-v2', 'right-v2']);
936
+ });
937
+ it('skips the join when one branch fails and preserves the surviving branch', async () => {
938
+ const { taskHashes } = await createDiamond();
939
+ const leftHash = taskHashes.get('left');
940
+ const rightHash = taskHashes.get('right');
941
+ const mergeHash = taskHashes.get('merge');
942
+ let mergeRan = false;
943
+ mockRunner.setResult(leftHash, { state: 'failed', cached: false, exitCode: 1 });
944
+ mockRunner.setResult(rightHash, { state: 'success', cached: false, outputHash: 'right-v1' });
945
+ mockRunner.setResult(mergeHash, async () => {
946
+ mergeRan = true;
947
+ return { state: 'success', cached: false, outputHash: 'merge-v1' };
948
+ });
949
+ const result = await dataflowExecute(storage, testRepo, 'test-ws', {
950
+ runner: mockRunner,
951
+ concurrency: 4,
952
+ });
953
+ assert.strictEqual(result.success, false);
954
+ assert.strictEqual(mergeRan, false, 'merge must not run when a parent branch failed');
955
+ assert.strictEqual(result.failed, 1);
956
+ assert.strictEqual(result.skipped, 1);
957
+ // The surviving branch's output is preserved in the workspace
958
+ const rightRef = await storage.datasets.read(testRepo, 'test-ws', 'right_out');
959
+ assert.strictEqual(rightRef?.type, 'value');
960
+ });
961
+ it('conflicting writes to two roots during execution converge to a consistent fixpoint', async () => {
962
+ // Two independent roots, each mutated while the OTHER root's task is
963
+ // running — the cross-invalidation case. Deterministic: mutations
964
+ // happen inside the first execution of each task.
965
+ const structure = {
966
+ type: 'struct',
967
+ value: new Map([
968
+ ['x', { type: 'value', value: { type: StringType, writable: true } }],
969
+ ['y', { type: 'value', value: { type: StringType, writable: true } }],
970
+ ['a_out', { type: 'value', value: { type: StringType, writable: true } }],
971
+ ['b_out', { type: 'value', value: { type: StringType, writable: true } }],
972
+ ['merge_out', { type: 'value', value: { type: StringType, writable: true } }],
973
+ ]),
974
+ };
975
+ const xPath = [variant('field', 'x')];
976
+ const yPath = [variant('field', 'y')];
977
+ const aPath = [variant('field', 'a_out')];
978
+ const bPath = [variant('field', 'b_out')];
979
+ const taskHashes = await createPackageWithTasks(testRepo, [
980
+ { name: 'ta', command: ['echo'], inputs: [xPath], output: aPath },
981
+ { name: 'tb', command: ['echo'], inputs: [yPath], output: bPath },
982
+ { name: 'merge', command: ['echo'], inputs: [aPath, bPath], output: [variant('field', 'merge_out')] },
983
+ ], structure);
984
+ await workspaceDeploy(storage, testRepo, 'test-ws', 'test', '1.0.0');
985
+ await workspaceSetDataset(storage, testRepo, 'test-ws', xPath, 'x1', StringType);
986
+ await workspaceSetDataset(storage, testRepo, 'test-ws', yPath, 'y1', StringType);
987
+ const writeRoot = async (refPath, value) => {
988
+ const newHash = await datasetWrite(storage, testRepo, value, StringType);
989
+ const ref = variant('value', { hash: newHash, versions: new Map() });
990
+ await storage.datasets.write(testRepo, 'test-ws', refPath, ref);
991
+ };
992
+ let taCalls = 0;
993
+ let tbCalls = 0;
994
+ const mergeCalls = [];
995
+ mockRunner.setResult(taskHashes.get('ta'), async () => {
996
+ taCalls++;
997
+ if (taCalls === 1)
998
+ await writeRoot('y', 'y2'); // mutate the OTHER root
999
+ return { state: 'success', cached: false, outputHash: `a-v${taCalls}` };
1000
+ });
1001
+ mockRunner.setResult(taskHashes.get('tb'), async () => {
1002
+ tbCalls++;
1003
+ if (tbCalls === 1)
1004
+ await writeRoot('x', 'x2'); // mutate the OTHER root
1005
+ return { state: 'success', cached: false, outputHash: `b-v${tbCalls}` };
1006
+ });
1007
+ mockRunner.setResult(taskHashes.get('merge'), async (inputHashes) => {
1008
+ mergeCalls.push([...inputHashes]);
1009
+ return { state: 'success', cached: false, outputHash: `m-v${mergeCalls.length}` };
1010
+ });
1011
+ const result = await dataflowExecute(storage, testRepo, 'test-ws', {
1012
+ runner: mockRunner,
1013
+ concurrency: 4,
1014
+ });
1015
+ assert.strictEqual(result.success, true);
1016
+ // Each task re-ran for its own root's change
1017
+ assert.strictEqual(taCalls, 2, 'ta should re-execute after x changed');
1018
+ assert.strictEqual(tbCalls, 2, 'tb should re-execute after y changed');
1019
+ // The final join saw the post-change outputs of BOTH branches
1020
+ const final = mergeCalls[mergeCalls.length - 1];
1021
+ assert.deepStrictEqual([...final].sort(), ['a-v2', 'b-v2']);
1022
+ });
1023
+ it('re-joins a consistent pair when the root changes while the join itself is running', async () => {
1024
+ // The change lands during MERGE's execution (not a branch's): the
1025
+ // in-progress join finishes with the v1 pair, then the engine must
1026
+ // re-execute both branches AND the join with the v2 pair.
1027
+ const { taskHashes } = await createDiamond();
1028
+ let leftCalls = 0;
1029
+ let rightCalls = 0;
1030
+ const mergeCalls = [];
1031
+ mockRunner.setResult(taskHashes.get('left'), async () => {
1032
+ leftCalls++;
1033
+ return { state: 'success', cached: false, outputHash: `left-v${leftCalls}` };
1034
+ });
1035
+ mockRunner.setResult(taskHashes.get('right'), async () => {
1036
+ rightCalls++;
1037
+ return { state: 'success', cached: false, outputHash: `right-v${rightCalls}` };
1038
+ });
1039
+ mockRunner.setResult(taskHashes.get('merge'), async (inputHashes) => {
1040
+ mergeCalls.push([...inputHashes]);
1041
+ if (mergeCalls.length === 1) {
1042
+ await mutateInput('v2'); // root changes while the join runs
1043
+ }
1044
+ return { state: 'success', cached: false, outputHash: `merge-v${mergeCalls.length}` };
1045
+ });
1046
+ const result = await dataflowExecute(storage, testRepo, 'test-ws', {
1047
+ runner: mockRunner,
1048
+ concurrency: 4,
1049
+ });
1050
+ assert.strictEqual(result.success, true);
1051
+ assert.strictEqual(leftCalls, 2, 'left should re-execute after the change');
1052
+ assert.strictEqual(rightCalls, 2, 'right should re-execute after the change');
1053
+ assert.strictEqual(mergeCalls.length, 2, 'join should re-execute after the change');
1054
+ assert.deepStrictEqual([...mergeCalls[1]].sort(), ['left-v2', 'right-v2']);
1055
+ });
1056
+ it('converges to the LAST value when the root changes repeatedly during one run', async () => {
1057
+ // Two successive changes inside one run — the fixpoint loop must keep
1058
+ // re-executing until the graph reflects the final value, and stop.
1059
+ const { taskHashes } = await createDiamond();
1060
+ let leftCalls = 0;
1061
+ let rightCalls = 0;
1062
+ const mergeCalls = [];
1063
+ mockRunner.setResult(taskHashes.get('left'), async () => {
1064
+ leftCalls++;
1065
+ if (leftCalls === 1)
1066
+ await mutateInput('v2');
1067
+ if (leftCalls === 2)
1068
+ await mutateInput('v3');
1069
+ return { state: 'success', cached: false, outputHash: `left-v${leftCalls}` };
1070
+ });
1071
+ mockRunner.setResult(taskHashes.get('right'), async () => {
1072
+ rightCalls++;
1073
+ return { state: 'success', cached: false, outputHash: `right-v${rightCalls}` };
1074
+ });
1075
+ mockRunner.setResult(taskHashes.get('merge'), async (inputHashes) => {
1076
+ mergeCalls.push([...inputHashes]);
1077
+ return { state: 'success', cached: false, outputHash: `merge-v${mergeCalls.length}` };
1078
+ });
1079
+ const result = await dataflowExecute(storage, testRepo, 'test-ws', {
1080
+ runner: mockRunner,
1081
+ concurrency: 4,
1082
+ });
1083
+ assert.strictEqual(result.success, true);
1084
+ assert.strictEqual(leftCalls, 3, 'left should run for v1, v2, v3');
1085
+ const final = mergeCalls[mergeCalls.length - 1];
1086
+ assert.deepStrictEqual([...final].sort(), [`left-v${leftCalls}`, `right-v${rightCalls}`]);
1087
+ // No mixed-version join at any point
1088
+ for (const inputs of mergeCalls) {
1089
+ const lv = inputs.find((h) => h.startsWith('left-')).split('-v')[1];
1090
+ const rv = inputs.find((h) => h.startsWith('right-')).split('-v')[1];
1091
+ assert.strictEqual(lv, rv, `merge joined mixed versions: ${inputs.join(', ')}`);
1092
+ }
1093
+ });
1094
+ it('partially invalidates: a change feeding one branch does not re-run the other', async () => {
1095
+ // Two independent roots: x→ta→a, y→tb→b, (a,b)→merge. Changing y while
1096
+ // ta runs must re-execute ONLY tb and the join — ta's work stands.
1097
+ // Over-invalidation here is the efficiency regression that makes big
1098
+ // real-world dataflows re-run everything on every edit.
1099
+ const structure = {
1100
+ type: 'struct',
1101
+ value: new Map([
1102
+ ['x', { type: 'value', value: { type: StringType, writable: true } }],
1103
+ ['y', { type: 'value', value: { type: StringType, writable: true } }],
1104
+ ['a_out', { type: 'value', value: { type: StringType, writable: true } }],
1105
+ ['b_out', { type: 'value', value: { type: StringType, writable: true } }],
1106
+ ['merge_out', { type: 'value', value: { type: StringType, writable: true } }],
1107
+ ]),
1108
+ };
1109
+ const xPath = [variant('field', 'x')];
1110
+ const yPath = [variant('field', 'y')];
1111
+ const aPath = [variant('field', 'a_out')];
1112
+ const bPath = [variant('field', 'b_out')];
1113
+ const taskHashes = await createPackageWithTasks(testRepo, [
1114
+ { name: 'ta', command: ['echo'], inputs: [xPath], output: aPath },
1115
+ { name: 'tb', command: ['echo'], inputs: [yPath], output: bPath },
1116
+ { name: 'merge', command: ['echo'], inputs: [aPath, bPath], output: [variant('field', 'merge_out')] },
1117
+ ], structure);
1118
+ await workspaceDeploy(storage, testRepo, 'test-ws', 'test', '1.0.0');
1119
+ await workspaceSetDataset(storage, testRepo, 'test-ws', xPath, 'x1', StringType);
1120
+ await workspaceSetDataset(storage, testRepo, 'test-ws', yPath, 'y1', StringType);
1121
+ let taCalls = 0;
1122
+ let tbCalls = 0;
1123
+ const mergeCalls = [];
1124
+ mockRunner.setResult(taskHashes.get('ta'), async () => {
1125
+ taCalls++;
1126
+ if (taCalls === 1) {
1127
+ const newHash = await datasetWrite(storage, testRepo, 'y2', StringType);
1128
+ const ref = variant('value', { hash: newHash, versions: new Map() });
1129
+ await storage.datasets.write(testRepo, 'test-ws', 'y', ref);
1130
+ }
1131
+ return { state: 'success', cached: false, outputHash: `a-v${taCalls}` };
1132
+ });
1133
+ mockRunner.setResult(taskHashes.get('tb'), async () => {
1134
+ tbCalls++;
1135
+ return { state: 'success', cached: false, outputHash: `b-v${tbCalls}` };
1136
+ });
1137
+ mockRunner.setResult(taskHashes.get('merge'), async (inputHashes) => {
1138
+ mergeCalls.push([...inputHashes]);
1139
+ return { state: 'success', cached: false, outputHash: `m-v${mergeCalls.length}` };
1140
+ });
1141
+ const result = await dataflowExecute(storage, testRepo, 'test-ws', {
1142
+ runner: mockRunner,
1143
+ concurrency: 4,
1144
+ });
1145
+ assert.strictEqual(result.success, true);
1146
+ assert.strictEqual(taCalls, 1, 'ta must NOT re-run for a change to y (over-invalidation)');
1147
+ assert.strictEqual(tbCalls, 2, 'tb should re-run for the y change');
1148
+ const final = mergeCalls[mergeCalls.length - 1];
1149
+ assert.deepStrictEqual([...final].sort(), ['a-v1', 'b-v2']);
1150
+ });
1151
+ it('propagates a mid-flight change transitively through a double diamond', async () => {
1152
+ // diamond1 (input → l1,r1 → m1) feeding diamond2 (m1 → l2,r2 → m2):
1153
+ // a root change during l1 must cascade re-execution through BOTH
1154
+ // fan-out/fan-in layers, and m2 must join a consistent final pair.
1155
+ const fields = [
1156
+ ['input', { type: 'value', value: { type: StringType, writable: true } }],
1157
+ ['l1_out', { type: 'value', value: { type: StringType, writable: true } }],
1158
+ ['r1_out', { type: 'value', value: { type: StringType, writable: true } }],
1159
+ ['m1_out', { type: 'value', value: { type: StringType, writable: true } }],
1160
+ ['l2_out', { type: 'value', value: { type: StringType, writable: true } }],
1161
+ ['r2_out', { type: 'value', value: { type: StringType, writable: true } }],
1162
+ ['m2_out', { type: 'value', value: { type: StringType, writable: true } }],
1163
+ ];
1164
+ const structure = {
1165
+ type: 'struct',
1166
+ value: new Map(fields),
1167
+ };
1168
+ const path = (name) => [variant('field', name)];
1169
+ const taskHashes = await createPackageWithTasks(testRepo, [
1170
+ { name: 'l1', command: ['echo'], inputs: [path('input')], output: path('l1_out') },
1171
+ { name: 'r1', command: ['echo'], inputs: [path('input')], output: path('r1_out') },
1172
+ { name: 'm1', command: ['echo'], inputs: [path('l1_out'), path('r1_out')], output: path('m1_out') },
1173
+ { name: 'l2', command: ['echo'], inputs: [path('m1_out')], output: path('l2_out') },
1174
+ { name: 'r2', command: ['echo'], inputs: [path('m1_out')], output: path('r2_out') },
1175
+ { name: 'm2', command: ['echo'], inputs: [path('l2_out'), path('r2_out')], output: path('m2_out') },
1176
+ ], structure);
1177
+ await workspaceDeploy(storage, testRepo, 'test-ws', 'test', '1.0.0');
1178
+ await workspaceSetDataset(storage, testRepo, 'test-ws', path('input'), 'v1', StringType);
1179
+ // Exact downstream call counts are schedule-dependent (a join that
1180
+ // wasn't ready yet when the change was detected legitimately runs
1181
+ // once, with the new inputs). The schedule-independent invariant is:
1182
+ // every join's LAST call consumed its parents' LATEST outputs.
1183
+ const calls = new Map();
1184
+ const lastInputs = new Map();
1185
+ const counted = (name, hash, extra) => {
1186
+ mockRunner.setResult(hash, async (inputHashes) => {
1187
+ const n = (calls.get(name) ?? 0) + 1;
1188
+ calls.set(name, n);
1189
+ lastInputs.set(name, [...inputHashes]);
1190
+ if (extra)
1191
+ await extra(n);
1192
+ return { state: 'success', cached: false, outputHash: `${name}-v${n}` };
1193
+ });
1194
+ };
1195
+ counted('l1', taskHashes.get('l1'), async (n) => {
1196
+ if (n === 1)
1197
+ await mutateInput('v2');
1198
+ });
1199
+ for (const name of ['r1', 'm1', 'l2', 'r2', 'm2']) {
1200
+ counted(name, taskHashes.get(name));
1201
+ }
1202
+ const result = await dataflowExecute(storage, testRepo, 'test-ws', {
1203
+ runner: mockRunner,
1204
+ concurrency: 4,
1205
+ });
1206
+ assert.strictEqual(result.success, true);
1207
+ // The task that observed the change must have re-executed
1208
+ assert.strictEqual(calls.get('l1'), 2, 'l1 should re-execute after the root change');
1209
+ // Transitive freshness: each join's last call used its parents' final outputs
1210
+ const latest = (name) => `${name}-v${calls.get(name)}`;
1211
+ assert.deepStrictEqual([...lastInputs.get('m1')].sort(), [latest('l1'), latest('r1')].sort(), 'first join must consume the latest branch outputs');
1212
+ assert.deepStrictEqual(lastInputs.get('l2'), [latest('m1')], 'second fan-out must consume the latest m1');
1213
+ assert.deepStrictEqual(lastInputs.get('r2'), [latest('m1')]);
1214
+ assert.deepStrictEqual([...lastInputs.get('m2')].sort(), [latest('l2'), latest('r2')].sort(), 'final join must consume the latest second-layer outputs');
1215
+ });
1216
+ });
818
1217
  describe('DataflowRun recording', () => {
819
1218
  it('records correct outputVersions with task output hashes', async () => {
820
1219
  const structure = {