@elaraai/e3-core 1.0.4 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -2
- package/dist/src/dataflow/orchestrator/LocalOrchestrator.d.ts.map +1 -1
- package/dist/src/dataflow/orchestrator/LocalOrchestrator.js +65 -6
- package/dist/src/dataflow/orchestrator/LocalOrchestrator.js.map +1 -1
- package/dist/src/dataflow/steps.d.ts.map +1 -1
- package/dist/src/dataflow/steps.js +2 -3
- package/dist/src/dataflow/steps.js.map +1 -1
- package/dist/src/dataflow/steps.spec.js +1 -0
- package/dist/src/dataflow/steps.spec.js.map +1 -1
- package/dist/src/dataflow-orchestration.spec.js +399 -0
- package/dist/src/dataflow-orchestration.spec.js.map +1 -1
- package/dist/src/dataflow.d.ts.map +1 -1
- package/dist/src/dataflow.js +2 -3
- package/dist/src/dataflow.js.map +1 -1
- package/dist/src/dataflow.spec.js +2 -152
- package/dist/src/dataflow.spec.js.map +1 -1
- package/dist/src/dataset-refs.spec.d.ts +6 -0
- package/dist/src/dataset-refs.spec.d.ts.map +1 -0
- package/dist/src/dataset-refs.spec.js +107 -0
- package/dist/src/dataset-refs.spec.js.map +1 -0
- package/dist/src/execution/LocalTaskRunner.d.ts +3 -0
- package/dist/src/execution/LocalTaskRunner.d.ts.map +1 -1
- package/dist/src/execution/LocalTaskRunner.js +81 -125
- package/dist/src/execution/LocalTaskRunner.js.map +1 -1
- package/dist/src/execution/LocalTaskRunner.spec.d.ts +6 -0
- package/dist/src/execution/LocalTaskRunner.spec.d.ts.map +1 -0
- package/dist/src/execution/LocalTaskRunner.spec.js +52 -0
- package/dist/src/execution/LocalTaskRunner.spec.js.map +1 -0
- package/dist/src/execution/MockTaskRunner.d.ts +8 -0
- package/dist/src/execution/MockTaskRunner.d.ts.map +1 -1
- package/dist/src/execution/MockTaskRunner.js +21 -0
- package/dist/src/execution/MockTaskRunner.js.map +1 -1
- package/dist/src/execution/index.d.ts +2 -0
- package/dist/src/execution/index.d.ts.map +1 -1
- package/dist/src/execution/index.js +4 -0
- package/dist/src/execution/index.js.map +1 -1
- package/dist/src/execution/interfaces.d.ts +11 -0
- package/dist/src/execution/interfaces.d.ts.map +1 -1
- package/dist/src/execution/processExec.d.ts +108 -0
- package/dist/src/execution/processExec.d.ts.map +1 -0
- package/dist/src/execution/processExec.js +271 -0
- package/dist/src/execution/processExec.js.map +1 -0
- package/dist/src/execution/runDetached.d.ts +83 -0
- package/dist/src/execution/runDetached.d.ts.map +1 -0
- package/dist/src/execution/runDetached.js +90 -0
- package/dist/src/execution/runDetached.js.map +1 -0
- package/dist/src/execution/runDetached.spec.d.ts +6 -0
- package/dist/src/execution/runDetached.spec.d.ts.map +1 -0
- package/dist/src/execution/runDetached.spec.js +193 -0
- package/dist/src/execution/runDetached.spec.js.map +1 -0
- package/dist/src/gc.spec.js +36 -3
- package/dist/src/gc.spec.js.map +1 -1
- package/dist/src/objects.spec.js +3 -1
- package/dist/src/objects.spec.js.map +1 -1
- package/dist/src/packages.d.ts.map +1 -1
- package/dist/src/packages.js +13 -5
- package/dist/src/packages.js.map +1 -1
- package/dist/src/storage/in-memory/InMemoryStorage.d.ts +4 -0
- package/dist/src/storage/in-memory/InMemoryStorage.d.ts.map +1 -1
- package/dist/src/storage/in-memory/InMemoryStorage.js +9 -0
- package/dist/src/storage/in-memory/InMemoryStorage.js.map +1 -1
- package/dist/src/storage/interfaces.d.ts +19 -0
- package/dist/src/storage/interfaces.d.ts.map +1 -1
- package/dist/src/storage/local/LocalDatasetRefStore.d.ts.map +1 -1
- package/dist/src/storage/local/LocalDatasetRefStore.js +41 -4
- package/dist/src/storage/local/LocalDatasetRefStore.js.map +1 -1
- package/dist/src/storage/local/LocalLockService.spec.js +3 -1
- package/dist/src/storage/local/LocalLockService.spec.js.map +1 -1
- package/dist/src/storage/local/LocalRefStore.d.ts +4 -0
- package/dist/src/storage/local/LocalRefStore.d.ts.map +1 -1
- package/dist/src/storage/local/LocalRefStore.js +11 -0
- package/dist/src/storage/local/LocalRefStore.js.map +1 -1
- package/dist/src/storage/local/gc.d.ts.map +1 -1
- package/dist/src/storage/local/gc.js +22 -0
- package/dist/src/storage/local/gc.js.map +1 -1
- package/dist/src/tasks.d.ts.map +1 -1
- package/dist/src/tasks.js +2 -3
- package/dist/src/tasks.js.map +1 -1
- package/dist/src/trees.d.ts.map +1 -1
- package/dist/src/trees.js +3 -5
- package/dist/src/trees.js.map +1 -1
- package/dist/src/workspaceStatus.d.ts.map +1 -1
- package/dist/src/workspaceStatus.js +16 -11
- package/dist/src/workspaceStatus.js.map +1 -1
- package/dist/src/workspaceStatus.spec.d.ts +6 -0
- package/dist/src/workspaceStatus.spec.d.ts.map +1 -0
- package/dist/src/workspaceStatus.spec.js +156 -0
- package/dist/src/workspaceStatus.spec.js.map +1 -0
- package/dist/src/workspaces.d.ts.map +1 -1
- package/dist/src/workspaces.js +15 -4
- package/dist/src/workspaces.js.map +1 -1
- package/package.json +6 -4
|
@@ -89,6 +89,7 @@ describe('dataflow orchestration with MockTaskRunner', () => {
|
|
|
89
89
|
refs: new Map(),
|
|
90
90
|
},
|
|
91
91
|
tasks: tasksMap,
|
|
92
|
+
functions: new Map(),
|
|
92
93
|
};
|
|
93
94
|
const pkgHash = await objectWrite(repoPath, pkgEncoder(pkgObj));
|
|
94
95
|
const pkgDir = join(repoPath, 'packages', 'test');
|
|
@@ -815,6 +816,404 @@ describe('dataflow orchestration with MockTaskRunner', () => {
|
|
|
815
816
|
assert.strictEqual(result.reexecuted, 0);
|
|
816
817
|
});
|
|
817
818
|
});
|
|
819
|
+
describe('diamond interleavings (deterministic)', () => {
|
|
820
|
+
// The classic dataflow hazards — join synchronisation, mid-flight root
|
|
821
|
+
// changes, mixed-version joins, fan-in failure — tested with NO sleeps:
|
|
822
|
+
// MockTaskRunner task bodies are promise-gated / perform their mutation
|
|
823
|
+
// synchronously inside a task execution, so every interleaving is a
|
|
824
|
+
// fixed, repeatable schedule (CI-speed independent).
|
|
825
|
+
/** Build the diamond: input → left, input → right, (left,right) → merge. */
|
|
826
|
+
async function createDiamond() {
|
|
827
|
+
const structure = {
|
|
828
|
+
type: 'struct',
|
|
829
|
+
value: new Map([
|
|
830
|
+
['input', { type: 'value', value: { type: StringType, writable: true } }],
|
|
831
|
+
['left_out', { type: 'value', value: { type: StringType, writable: true } }],
|
|
832
|
+
['right_out', { type: 'value', value: { type: StringType, writable: true } }],
|
|
833
|
+
['merge_out', { type: 'value', value: { type: StringType, writable: true } }],
|
|
834
|
+
]),
|
|
835
|
+
};
|
|
836
|
+
const inputPath = [variant('field', 'input')];
|
|
837
|
+
const leftPath = [variant('field', 'left_out')];
|
|
838
|
+
const rightPath = [variant('field', 'right_out')];
|
|
839
|
+
const mergePath = [variant('field', 'merge_out')];
|
|
840
|
+
const taskHashes = await createPackageWithTasks(testRepo, [
|
|
841
|
+
{ name: 'left', command: ['echo'], inputs: [inputPath], output: leftPath },
|
|
842
|
+
{ name: 'right', command: ['echo'], inputs: [inputPath], output: rightPath },
|
|
843
|
+
{ name: 'merge', command: ['echo'], inputs: [leftPath, rightPath], output: mergePath },
|
|
844
|
+
], structure);
|
|
845
|
+
await workspaceDeploy(storage, testRepo, 'test-ws', 'test', '1.0.0');
|
|
846
|
+
await workspaceSetDataset(storage, testRepo, 'test-ws', inputPath, 'v1', StringType);
|
|
847
|
+
return { taskHashes, inputPath };
|
|
848
|
+
}
|
|
849
|
+
/** Update the diamond's root input ref to a fresh value (mid-flight mutation). */
|
|
850
|
+
async function mutateInput(value) {
|
|
851
|
+
const newHash = await datasetWrite(storage, testRepo, value, StringType);
|
|
852
|
+
const ref = variant('value', { hash: newHash, versions: new Map() });
|
|
853
|
+
await storage.datasets.write(testRepo, 'test-ws', 'input', ref);
|
|
854
|
+
}
|
|
855
|
+
it('merge waits for BOTH branches even when one is held in-flight', async () => {
|
|
856
|
+
const { taskHashes } = await createDiamond();
|
|
857
|
+
const leftHash = taskHashes.get('left');
|
|
858
|
+
const rightHash = taskHashes.get('right');
|
|
859
|
+
const mergeHash = taskHashes.get('merge');
|
|
860
|
+
// Hold `left` open until `right` has fully completed — the exact
|
|
861
|
+
// schedule where a premature join would fire with a missing branch.
|
|
862
|
+
let releaseLeft;
|
|
863
|
+
const leftGate = new Promise((res) => { releaseLeft = res; });
|
|
864
|
+
let rightCompleted = false;
|
|
865
|
+
let mergeStarted = false;
|
|
866
|
+
mockRunner.setResult(leftHash, async () => {
|
|
867
|
+
await leftGate;
|
|
868
|
+
return { state: 'success', cached: false, outputHash: 'left-v1' };
|
|
869
|
+
});
|
|
870
|
+
mockRunner.setResult(rightHash, async () => {
|
|
871
|
+
rightCompleted = true;
|
|
872
|
+
releaseLeft();
|
|
873
|
+
return { state: 'success', cached: false, outputHash: 'right-v1' };
|
|
874
|
+
});
|
|
875
|
+
mockRunner.setResult(mergeHash, async (inputHashes) => {
|
|
876
|
+
mergeStarted = true;
|
|
877
|
+
// The join must see both branch outputs, never a partial set
|
|
878
|
+
assert.deepStrictEqual([...inputHashes].sort(), ['left-v1', 'right-v1']);
|
|
879
|
+
assert.strictEqual(rightCompleted, true, 'merge started before right completed');
|
|
880
|
+
return { state: 'success', cached: false, outputHash: 'merge-v1' };
|
|
881
|
+
});
|
|
882
|
+
const result = await dataflowExecute(storage, testRepo, 'test-ws', {
|
|
883
|
+
runner: mockRunner,
|
|
884
|
+
concurrency: 4,
|
|
885
|
+
});
|
|
886
|
+
assert.strictEqual(result.success, true);
|
|
887
|
+
assert.strictEqual(mergeStarted, true);
|
|
888
|
+
assert.strictEqual(result.executed, 3);
|
|
889
|
+
});
|
|
890
|
+
it('never joins mixed versions when the root changes mid-flight', async () => {
|
|
891
|
+
// The version-vector hazard: input changes while the branches run.
|
|
892
|
+
// A broken engine merges left@v1 with right@v2 (or vice versa); a
|
|
893
|
+
// correct one re-executes both branches and joins a matching pair.
|
|
894
|
+
const { taskHashes } = await createDiamond();
|
|
895
|
+
const leftHash = taskHashes.get('left');
|
|
896
|
+
const rightHash = taskHashes.get('right');
|
|
897
|
+
const mergeHash = taskHashes.get('merge');
|
|
898
|
+
let leftCalls = 0;
|
|
899
|
+
let rightCalls = 0;
|
|
900
|
+
const mergeCalls = [];
|
|
901
|
+
mockRunner.setResult(leftHash, async () => {
|
|
902
|
+
leftCalls++;
|
|
903
|
+
if (leftCalls === 1) {
|
|
904
|
+
// Root input changes WHILE left is executing — deterministic,
|
|
905
|
+
// no sleeps: the mutation happens inside the task body.
|
|
906
|
+
await mutateInput('v2');
|
|
907
|
+
}
|
|
908
|
+
return { state: 'success', cached: false, outputHash: `left-v${leftCalls}` };
|
|
909
|
+
});
|
|
910
|
+
mockRunner.setResult(rightHash, async () => {
|
|
911
|
+
rightCalls++;
|
|
912
|
+
return { state: 'success', cached: false, outputHash: `right-v${rightCalls}` };
|
|
913
|
+
});
|
|
914
|
+
mockRunner.setResult(mergeHash, async (inputHashes) => {
|
|
915
|
+
mergeCalls.push([...inputHashes]);
|
|
916
|
+
return { state: 'success', cached: false, outputHash: `merge-v${mergeCalls.length}` };
|
|
917
|
+
});
|
|
918
|
+
const result = await dataflowExecute(storage, testRepo, 'test-ws', {
|
|
919
|
+
runner: mockRunner,
|
|
920
|
+
concurrency: 4,
|
|
921
|
+
});
|
|
922
|
+
assert.strictEqual(result.success, true);
|
|
923
|
+
// Both branches re-ran after the root change
|
|
924
|
+
assert.strictEqual(leftCalls, 2, 'left should re-execute after root change');
|
|
925
|
+
assert.strictEqual(rightCalls, 2, 'right should re-execute after root change');
|
|
926
|
+
assert.ok(result.reexecuted >= 2, `expected >=2 re-executions, got ${result.reexecuted}`);
|
|
927
|
+
// THE invariant: every merge call joined a matching version pair
|
|
928
|
+
assert.ok(mergeCalls.length >= 1, 'merge never ran');
|
|
929
|
+
for (const inputs of mergeCalls) {
|
|
930
|
+
const versions = new Set(inputs.map((h) => h.split('-v')[1]));
|
|
931
|
+
assert.strictEqual(versions.size, 1, `merge joined mixed versions: ${inputs.join(', ')}`);
|
|
932
|
+
}
|
|
933
|
+
// And the final join used the post-change branch outputs
|
|
934
|
+
const final = mergeCalls[mergeCalls.length - 1];
|
|
935
|
+
assert.deepStrictEqual([...final].sort(), ['left-v2', 'right-v2']);
|
|
936
|
+
});
|
|
937
|
+
it('skips the join when one branch fails and preserves the surviving branch', async () => {
|
|
938
|
+
const { taskHashes } = await createDiamond();
|
|
939
|
+
const leftHash = taskHashes.get('left');
|
|
940
|
+
const rightHash = taskHashes.get('right');
|
|
941
|
+
const mergeHash = taskHashes.get('merge');
|
|
942
|
+
let mergeRan = false;
|
|
943
|
+
mockRunner.setResult(leftHash, { state: 'failed', cached: false, exitCode: 1 });
|
|
944
|
+
mockRunner.setResult(rightHash, { state: 'success', cached: false, outputHash: 'right-v1' });
|
|
945
|
+
mockRunner.setResult(mergeHash, async () => {
|
|
946
|
+
mergeRan = true;
|
|
947
|
+
return { state: 'success', cached: false, outputHash: 'merge-v1' };
|
|
948
|
+
});
|
|
949
|
+
const result = await dataflowExecute(storage, testRepo, 'test-ws', {
|
|
950
|
+
runner: mockRunner,
|
|
951
|
+
concurrency: 4,
|
|
952
|
+
});
|
|
953
|
+
assert.strictEqual(result.success, false);
|
|
954
|
+
assert.strictEqual(mergeRan, false, 'merge must not run when a parent branch failed');
|
|
955
|
+
assert.strictEqual(result.failed, 1);
|
|
956
|
+
assert.strictEqual(result.skipped, 1);
|
|
957
|
+
// The surviving branch's output is preserved in the workspace
|
|
958
|
+
const rightRef = await storage.datasets.read(testRepo, 'test-ws', 'right_out');
|
|
959
|
+
assert.strictEqual(rightRef?.type, 'value');
|
|
960
|
+
});
|
|
961
|
+
it('conflicting writes to two roots during execution converge to a consistent fixpoint', async () => {
|
|
962
|
+
// Two independent roots, each mutated while the OTHER root's task is
|
|
963
|
+
// running — the cross-invalidation case. Deterministic: mutations
|
|
964
|
+
// happen inside the first execution of each task.
|
|
965
|
+
const structure = {
|
|
966
|
+
type: 'struct',
|
|
967
|
+
value: new Map([
|
|
968
|
+
['x', { type: 'value', value: { type: StringType, writable: true } }],
|
|
969
|
+
['y', { type: 'value', value: { type: StringType, writable: true } }],
|
|
970
|
+
['a_out', { type: 'value', value: { type: StringType, writable: true } }],
|
|
971
|
+
['b_out', { type: 'value', value: { type: StringType, writable: true } }],
|
|
972
|
+
['merge_out', { type: 'value', value: { type: StringType, writable: true } }],
|
|
973
|
+
]),
|
|
974
|
+
};
|
|
975
|
+
const xPath = [variant('field', 'x')];
|
|
976
|
+
const yPath = [variant('field', 'y')];
|
|
977
|
+
const aPath = [variant('field', 'a_out')];
|
|
978
|
+
const bPath = [variant('field', 'b_out')];
|
|
979
|
+
const taskHashes = await createPackageWithTasks(testRepo, [
|
|
980
|
+
{ name: 'ta', command: ['echo'], inputs: [xPath], output: aPath },
|
|
981
|
+
{ name: 'tb', command: ['echo'], inputs: [yPath], output: bPath },
|
|
982
|
+
{ name: 'merge', command: ['echo'], inputs: [aPath, bPath], output: [variant('field', 'merge_out')] },
|
|
983
|
+
], structure);
|
|
984
|
+
await workspaceDeploy(storage, testRepo, 'test-ws', 'test', '1.0.0');
|
|
985
|
+
await workspaceSetDataset(storage, testRepo, 'test-ws', xPath, 'x1', StringType);
|
|
986
|
+
await workspaceSetDataset(storage, testRepo, 'test-ws', yPath, 'y1', StringType);
|
|
987
|
+
const writeRoot = async (refPath, value) => {
|
|
988
|
+
const newHash = await datasetWrite(storage, testRepo, value, StringType);
|
|
989
|
+
const ref = variant('value', { hash: newHash, versions: new Map() });
|
|
990
|
+
await storage.datasets.write(testRepo, 'test-ws', refPath, ref);
|
|
991
|
+
};
|
|
992
|
+
let taCalls = 0;
|
|
993
|
+
let tbCalls = 0;
|
|
994
|
+
const mergeCalls = [];
|
|
995
|
+
mockRunner.setResult(taskHashes.get('ta'), async () => {
|
|
996
|
+
taCalls++;
|
|
997
|
+
if (taCalls === 1)
|
|
998
|
+
await writeRoot('y', 'y2'); // mutate the OTHER root
|
|
999
|
+
return { state: 'success', cached: false, outputHash: `a-v${taCalls}` };
|
|
1000
|
+
});
|
|
1001
|
+
mockRunner.setResult(taskHashes.get('tb'), async () => {
|
|
1002
|
+
tbCalls++;
|
|
1003
|
+
if (tbCalls === 1)
|
|
1004
|
+
await writeRoot('x', 'x2'); // mutate the OTHER root
|
|
1005
|
+
return { state: 'success', cached: false, outputHash: `b-v${tbCalls}` };
|
|
1006
|
+
});
|
|
1007
|
+
mockRunner.setResult(taskHashes.get('merge'), async (inputHashes) => {
|
|
1008
|
+
mergeCalls.push([...inputHashes]);
|
|
1009
|
+
return { state: 'success', cached: false, outputHash: `m-v${mergeCalls.length}` };
|
|
1010
|
+
});
|
|
1011
|
+
const result = await dataflowExecute(storage, testRepo, 'test-ws', {
|
|
1012
|
+
runner: mockRunner,
|
|
1013
|
+
concurrency: 4,
|
|
1014
|
+
});
|
|
1015
|
+
assert.strictEqual(result.success, true);
|
|
1016
|
+
// Each task re-ran for its own root's change
|
|
1017
|
+
assert.strictEqual(taCalls, 2, 'ta should re-execute after x changed');
|
|
1018
|
+
assert.strictEqual(tbCalls, 2, 'tb should re-execute after y changed');
|
|
1019
|
+
// The final join saw the post-change outputs of BOTH branches
|
|
1020
|
+
const final = mergeCalls[mergeCalls.length - 1];
|
|
1021
|
+
assert.deepStrictEqual([...final].sort(), ['a-v2', 'b-v2']);
|
|
1022
|
+
});
|
|
1023
|
+
it('re-joins a consistent pair when the root changes while the join itself is running', async () => {
|
|
1024
|
+
// The change lands during MERGE's execution (not a branch's): the
|
|
1025
|
+
// in-progress join finishes with the v1 pair, then the engine must
|
|
1026
|
+
// re-execute both branches AND the join with the v2 pair.
|
|
1027
|
+
const { taskHashes } = await createDiamond();
|
|
1028
|
+
let leftCalls = 0;
|
|
1029
|
+
let rightCalls = 0;
|
|
1030
|
+
const mergeCalls = [];
|
|
1031
|
+
mockRunner.setResult(taskHashes.get('left'), async () => {
|
|
1032
|
+
leftCalls++;
|
|
1033
|
+
return { state: 'success', cached: false, outputHash: `left-v${leftCalls}` };
|
|
1034
|
+
});
|
|
1035
|
+
mockRunner.setResult(taskHashes.get('right'), async () => {
|
|
1036
|
+
rightCalls++;
|
|
1037
|
+
return { state: 'success', cached: false, outputHash: `right-v${rightCalls}` };
|
|
1038
|
+
});
|
|
1039
|
+
mockRunner.setResult(taskHashes.get('merge'), async (inputHashes) => {
|
|
1040
|
+
mergeCalls.push([...inputHashes]);
|
|
1041
|
+
if (mergeCalls.length === 1) {
|
|
1042
|
+
await mutateInput('v2'); // root changes while the join runs
|
|
1043
|
+
}
|
|
1044
|
+
return { state: 'success', cached: false, outputHash: `merge-v${mergeCalls.length}` };
|
|
1045
|
+
});
|
|
1046
|
+
const result = await dataflowExecute(storage, testRepo, 'test-ws', {
|
|
1047
|
+
runner: mockRunner,
|
|
1048
|
+
concurrency: 4,
|
|
1049
|
+
});
|
|
1050
|
+
assert.strictEqual(result.success, true);
|
|
1051
|
+
assert.strictEqual(leftCalls, 2, 'left should re-execute after the change');
|
|
1052
|
+
assert.strictEqual(rightCalls, 2, 'right should re-execute after the change');
|
|
1053
|
+
assert.strictEqual(mergeCalls.length, 2, 'join should re-execute after the change');
|
|
1054
|
+
assert.deepStrictEqual([...mergeCalls[1]].sort(), ['left-v2', 'right-v2']);
|
|
1055
|
+
});
|
|
1056
|
+
it('converges to the LAST value when the root changes repeatedly during one run', async () => {
|
|
1057
|
+
// Two successive changes inside one run — the fixpoint loop must keep
|
|
1058
|
+
// re-executing until the graph reflects the final value, and stop.
|
|
1059
|
+
const { taskHashes } = await createDiamond();
|
|
1060
|
+
let leftCalls = 0;
|
|
1061
|
+
let rightCalls = 0;
|
|
1062
|
+
const mergeCalls = [];
|
|
1063
|
+
mockRunner.setResult(taskHashes.get('left'), async () => {
|
|
1064
|
+
leftCalls++;
|
|
1065
|
+
if (leftCalls === 1)
|
|
1066
|
+
await mutateInput('v2');
|
|
1067
|
+
if (leftCalls === 2)
|
|
1068
|
+
await mutateInput('v3');
|
|
1069
|
+
return { state: 'success', cached: false, outputHash: `left-v${leftCalls}` };
|
|
1070
|
+
});
|
|
1071
|
+
mockRunner.setResult(taskHashes.get('right'), async () => {
|
|
1072
|
+
rightCalls++;
|
|
1073
|
+
return { state: 'success', cached: false, outputHash: `right-v${rightCalls}` };
|
|
1074
|
+
});
|
|
1075
|
+
mockRunner.setResult(taskHashes.get('merge'), async (inputHashes) => {
|
|
1076
|
+
mergeCalls.push([...inputHashes]);
|
|
1077
|
+
return { state: 'success', cached: false, outputHash: `merge-v${mergeCalls.length}` };
|
|
1078
|
+
});
|
|
1079
|
+
const result = await dataflowExecute(storage, testRepo, 'test-ws', {
|
|
1080
|
+
runner: mockRunner,
|
|
1081
|
+
concurrency: 4,
|
|
1082
|
+
});
|
|
1083
|
+
assert.strictEqual(result.success, true);
|
|
1084
|
+
assert.strictEqual(leftCalls, 3, 'left should run for v1, v2, v3');
|
|
1085
|
+
const final = mergeCalls[mergeCalls.length - 1];
|
|
1086
|
+
assert.deepStrictEqual([...final].sort(), [`left-v${leftCalls}`, `right-v${rightCalls}`]);
|
|
1087
|
+
// No mixed-version join at any point
|
|
1088
|
+
for (const inputs of mergeCalls) {
|
|
1089
|
+
const lv = inputs.find((h) => h.startsWith('left-')).split('-v')[1];
|
|
1090
|
+
const rv = inputs.find((h) => h.startsWith('right-')).split('-v')[1];
|
|
1091
|
+
assert.strictEqual(lv, rv, `merge joined mixed versions: ${inputs.join(', ')}`);
|
|
1092
|
+
}
|
|
1093
|
+
});
|
|
1094
|
+
it('partially invalidates: a change feeding one branch does not re-run the other', async () => {
|
|
1095
|
+
// Two independent roots: x→ta→a, y→tb→b, (a,b)→merge. Changing y while
|
|
1096
|
+
// ta runs must re-execute ONLY tb and the join — ta's work stands.
|
|
1097
|
+
// Over-invalidation here is the efficiency regression that makes big
|
|
1098
|
+
// real-world dataflows re-run everything on every edit.
|
|
1099
|
+
const structure = {
|
|
1100
|
+
type: 'struct',
|
|
1101
|
+
value: new Map([
|
|
1102
|
+
['x', { type: 'value', value: { type: StringType, writable: true } }],
|
|
1103
|
+
['y', { type: 'value', value: { type: StringType, writable: true } }],
|
|
1104
|
+
['a_out', { type: 'value', value: { type: StringType, writable: true } }],
|
|
1105
|
+
['b_out', { type: 'value', value: { type: StringType, writable: true } }],
|
|
1106
|
+
['merge_out', { type: 'value', value: { type: StringType, writable: true } }],
|
|
1107
|
+
]),
|
|
1108
|
+
};
|
|
1109
|
+
const xPath = [variant('field', 'x')];
|
|
1110
|
+
const yPath = [variant('field', 'y')];
|
|
1111
|
+
const aPath = [variant('field', 'a_out')];
|
|
1112
|
+
const bPath = [variant('field', 'b_out')];
|
|
1113
|
+
const taskHashes = await createPackageWithTasks(testRepo, [
|
|
1114
|
+
{ name: 'ta', command: ['echo'], inputs: [xPath], output: aPath },
|
|
1115
|
+
{ name: 'tb', command: ['echo'], inputs: [yPath], output: bPath },
|
|
1116
|
+
{ name: 'merge', command: ['echo'], inputs: [aPath, bPath], output: [variant('field', 'merge_out')] },
|
|
1117
|
+
], structure);
|
|
1118
|
+
await workspaceDeploy(storage, testRepo, 'test-ws', 'test', '1.0.0');
|
|
1119
|
+
await workspaceSetDataset(storage, testRepo, 'test-ws', xPath, 'x1', StringType);
|
|
1120
|
+
await workspaceSetDataset(storage, testRepo, 'test-ws', yPath, 'y1', StringType);
|
|
1121
|
+
let taCalls = 0;
|
|
1122
|
+
let tbCalls = 0;
|
|
1123
|
+
const mergeCalls = [];
|
|
1124
|
+
mockRunner.setResult(taskHashes.get('ta'), async () => {
|
|
1125
|
+
taCalls++;
|
|
1126
|
+
if (taCalls === 1) {
|
|
1127
|
+
const newHash = await datasetWrite(storage, testRepo, 'y2', StringType);
|
|
1128
|
+
const ref = variant('value', { hash: newHash, versions: new Map() });
|
|
1129
|
+
await storage.datasets.write(testRepo, 'test-ws', 'y', ref);
|
|
1130
|
+
}
|
|
1131
|
+
return { state: 'success', cached: false, outputHash: `a-v${taCalls}` };
|
|
1132
|
+
});
|
|
1133
|
+
mockRunner.setResult(taskHashes.get('tb'), async () => {
|
|
1134
|
+
tbCalls++;
|
|
1135
|
+
return { state: 'success', cached: false, outputHash: `b-v${tbCalls}` };
|
|
1136
|
+
});
|
|
1137
|
+
mockRunner.setResult(taskHashes.get('merge'), async (inputHashes) => {
|
|
1138
|
+
mergeCalls.push([...inputHashes]);
|
|
1139
|
+
return { state: 'success', cached: false, outputHash: `m-v${mergeCalls.length}` };
|
|
1140
|
+
});
|
|
1141
|
+
const result = await dataflowExecute(storage, testRepo, 'test-ws', {
|
|
1142
|
+
runner: mockRunner,
|
|
1143
|
+
concurrency: 4,
|
|
1144
|
+
});
|
|
1145
|
+
assert.strictEqual(result.success, true);
|
|
1146
|
+
assert.strictEqual(taCalls, 1, 'ta must NOT re-run for a change to y (over-invalidation)');
|
|
1147
|
+
assert.strictEqual(tbCalls, 2, 'tb should re-run for the y change');
|
|
1148
|
+
const final = mergeCalls[mergeCalls.length - 1];
|
|
1149
|
+
assert.deepStrictEqual([...final].sort(), ['a-v1', 'b-v2']);
|
|
1150
|
+
});
|
|
1151
|
+
it('propagates a mid-flight change transitively through a double diamond', async () => {
|
|
1152
|
+
// diamond1 (input → l1,r1 → m1) feeding diamond2 (m1 → l2,r2 → m2):
|
|
1153
|
+
// a root change during l1 must cascade re-execution through BOTH
|
|
1154
|
+
// fan-out/fan-in layers, and m2 must join a consistent final pair.
|
|
1155
|
+
const fields = [
|
|
1156
|
+
['input', { type: 'value', value: { type: StringType, writable: true } }],
|
|
1157
|
+
['l1_out', { type: 'value', value: { type: StringType, writable: true } }],
|
|
1158
|
+
['r1_out', { type: 'value', value: { type: StringType, writable: true } }],
|
|
1159
|
+
['m1_out', { type: 'value', value: { type: StringType, writable: true } }],
|
|
1160
|
+
['l2_out', { type: 'value', value: { type: StringType, writable: true } }],
|
|
1161
|
+
['r2_out', { type: 'value', value: { type: StringType, writable: true } }],
|
|
1162
|
+
['m2_out', { type: 'value', value: { type: StringType, writable: true } }],
|
|
1163
|
+
];
|
|
1164
|
+
const structure = {
|
|
1165
|
+
type: 'struct',
|
|
1166
|
+
value: new Map(fields),
|
|
1167
|
+
};
|
|
1168
|
+
const path = (name) => [variant('field', name)];
|
|
1169
|
+
const taskHashes = await createPackageWithTasks(testRepo, [
|
|
1170
|
+
{ name: 'l1', command: ['echo'], inputs: [path('input')], output: path('l1_out') },
|
|
1171
|
+
{ name: 'r1', command: ['echo'], inputs: [path('input')], output: path('r1_out') },
|
|
1172
|
+
{ name: 'm1', command: ['echo'], inputs: [path('l1_out'), path('r1_out')], output: path('m1_out') },
|
|
1173
|
+
{ name: 'l2', command: ['echo'], inputs: [path('m1_out')], output: path('l2_out') },
|
|
1174
|
+
{ name: 'r2', command: ['echo'], inputs: [path('m1_out')], output: path('r2_out') },
|
|
1175
|
+
{ name: 'm2', command: ['echo'], inputs: [path('l2_out'), path('r2_out')], output: path('m2_out') },
|
|
1176
|
+
], structure);
|
|
1177
|
+
await workspaceDeploy(storage, testRepo, 'test-ws', 'test', '1.0.0');
|
|
1178
|
+
await workspaceSetDataset(storage, testRepo, 'test-ws', path('input'), 'v1', StringType);
|
|
1179
|
+
// Exact downstream call counts are schedule-dependent (a join that
|
|
1180
|
+
// wasn't ready yet when the change was detected legitimately runs
|
|
1181
|
+
// once, with the new inputs). The schedule-independent invariant is:
|
|
1182
|
+
// every join's LAST call consumed its parents' LATEST outputs.
|
|
1183
|
+
const calls = new Map();
|
|
1184
|
+
const lastInputs = new Map();
|
|
1185
|
+
const counted = (name, hash, extra) => {
|
|
1186
|
+
mockRunner.setResult(hash, async (inputHashes) => {
|
|
1187
|
+
const n = (calls.get(name) ?? 0) + 1;
|
|
1188
|
+
calls.set(name, n);
|
|
1189
|
+
lastInputs.set(name, [...inputHashes]);
|
|
1190
|
+
if (extra)
|
|
1191
|
+
await extra(n);
|
|
1192
|
+
return { state: 'success', cached: false, outputHash: `${name}-v${n}` };
|
|
1193
|
+
});
|
|
1194
|
+
};
|
|
1195
|
+
counted('l1', taskHashes.get('l1'), async (n) => {
|
|
1196
|
+
if (n === 1)
|
|
1197
|
+
await mutateInput('v2');
|
|
1198
|
+
});
|
|
1199
|
+
for (const name of ['r1', 'm1', 'l2', 'r2', 'm2']) {
|
|
1200
|
+
counted(name, taskHashes.get(name));
|
|
1201
|
+
}
|
|
1202
|
+
const result = await dataflowExecute(storage, testRepo, 'test-ws', {
|
|
1203
|
+
runner: mockRunner,
|
|
1204
|
+
concurrency: 4,
|
|
1205
|
+
});
|
|
1206
|
+
assert.strictEqual(result.success, true);
|
|
1207
|
+
// The task that observed the change must have re-executed
|
|
1208
|
+
assert.strictEqual(calls.get('l1'), 2, 'l1 should re-execute after the root change');
|
|
1209
|
+
// Transitive freshness: each join's last call used its parents' final outputs
|
|
1210
|
+
const latest = (name) => `${name}-v${calls.get(name)}`;
|
|
1211
|
+
assert.deepStrictEqual([...lastInputs.get('m1')].sort(), [latest('l1'), latest('r1')].sort(), 'first join must consume the latest branch outputs');
|
|
1212
|
+
assert.deepStrictEqual(lastInputs.get('l2'), [latest('m1')], 'second fan-out must consume the latest m1');
|
|
1213
|
+
assert.deepStrictEqual(lastInputs.get('r2'), [latest('m1')]);
|
|
1214
|
+
assert.deepStrictEqual([...lastInputs.get('m2')].sort(), [latest('l2'), latest('r2')].sort(), 'final join must consume the latest second-layer outputs');
|
|
1215
|
+
});
|
|
1216
|
+
});
|
|
818
1217
|
describe('DataflowRun recording', () => {
|
|
819
1218
|
it('records correct outputVersions with task output hashes', async () => {
|
|
820
1219
|
const structure = {
|