@elaraai/e3-core 0.0.2-beta.5 → 0.0.2-beta.50
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -22
- package/dist/src/dataflow/api-compat.d.ts +90 -0
- package/dist/src/dataflow/api-compat.d.ts.map +1 -0
- package/dist/src/dataflow/api-compat.js +139 -0
- package/dist/src/dataflow/api-compat.js.map +1 -0
- package/dist/src/dataflow/index.d.ts +18 -0
- package/dist/src/dataflow/index.d.ts.map +1 -0
- package/dist/src/dataflow/index.js +23 -0
- package/dist/src/dataflow/index.js.map +1 -0
- package/dist/src/dataflow/orchestrator/LocalOrchestrator.d.ts +76 -0
- package/dist/src/dataflow/orchestrator/LocalOrchestrator.d.ts.map +1 -0
- package/dist/src/dataflow/orchestrator/LocalOrchestrator.js +729 -0
- package/dist/src/dataflow/orchestrator/LocalOrchestrator.js.map +1 -0
- package/dist/src/dataflow/orchestrator/index.d.ts +12 -0
- package/dist/src/dataflow/orchestrator/index.d.ts.map +1 -0
- package/dist/src/dataflow/orchestrator/index.js +12 -0
- package/dist/src/dataflow/orchestrator/index.js.map +1 -0
- package/dist/src/dataflow/orchestrator/interfaces.d.ts +163 -0
- package/dist/src/dataflow/orchestrator/interfaces.d.ts.map +1 -0
- package/dist/src/dataflow/orchestrator/interfaces.js +52 -0
- package/dist/src/dataflow/orchestrator/interfaces.js.map +1 -0
- package/dist/src/dataflow/state-store/FileStateStore.d.ts +67 -0
- package/dist/src/dataflow/state-store/FileStateStore.d.ts.map +1 -0
- package/dist/src/dataflow/state-store/FileStateStore.js +300 -0
- package/dist/src/dataflow/state-store/FileStateStore.js.map +1 -0
- package/dist/src/dataflow/state-store/InMemoryStateStore.d.ts +42 -0
- package/dist/src/dataflow/state-store/InMemoryStateStore.d.ts.map +1 -0
- package/dist/src/dataflow/state-store/InMemoryStateStore.js +229 -0
- package/dist/src/dataflow/state-store/InMemoryStateStore.js.map +1 -0
- package/dist/src/dataflow/state-store/index.d.ts +13 -0
- package/dist/src/dataflow/state-store/index.d.ts.map +1 -0
- package/dist/src/dataflow/state-store/index.js +13 -0
- package/dist/src/dataflow/state-store/index.js.map +1 -0
- package/dist/src/dataflow/state-store/interfaces.d.ts +159 -0
- package/dist/src/dataflow/state-store/interfaces.d.ts.map +1 -0
- package/dist/src/dataflow/state-store/interfaces.js +6 -0
- package/dist/src/dataflow/state-store/interfaces.js.map +1 -0
- package/dist/src/dataflow/steps.d.ts +222 -0
- package/dist/src/dataflow/steps.d.ts.map +1 -0
- package/dist/src/dataflow/steps.js +707 -0
- package/dist/src/dataflow/steps.js.map +1 -0
- package/dist/src/dataflow/types.d.ts +127 -0
- package/dist/src/dataflow/types.d.ts.map +1 -0
- package/dist/src/dataflow/types.js +7 -0
- package/dist/src/dataflow/types.js.map +1 -0
- package/dist/src/dataflow.d.ts +113 -38
- package/dist/src/dataflow.d.ts.map +1 -1
- package/dist/src/dataflow.js +269 -416
- package/dist/src/dataflow.js.map +1 -1
- package/dist/src/dataset-refs.d.ts +124 -0
- package/dist/src/dataset-refs.d.ts.map +1 -0
- package/dist/src/dataset-refs.js +319 -0
- package/dist/src/dataset-refs.js.map +1 -0
- package/dist/src/errors.d.ts +39 -9
- package/dist/src/errors.d.ts.map +1 -1
- package/dist/src/errors.js +51 -8
- package/dist/src/errors.js.map +1 -1
- package/dist/src/execution/LocalTaskRunner.d.ts +73 -0
- package/dist/src/execution/LocalTaskRunner.d.ts.map +1 -0
- package/dist/src/execution/LocalTaskRunner.js +399 -0
- package/dist/src/execution/LocalTaskRunner.js.map +1 -0
- package/dist/src/execution/MockTaskRunner.d.ts +49 -0
- package/dist/src/execution/MockTaskRunner.d.ts.map +1 -0
- package/dist/src/execution/MockTaskRunner.js +54 -0
- package/dist/src/execution/MockTaskRunner.js.map +1 -0
- package/dist/src/execution/index.d.ts +16 -0
- package/dist/src/execution/index.d.ts.map +1 -0
- package/dist/src/execution/index.js +8 -0
- package/dist/src/execution/index.js.map +1 -0
- package/dist/src/execution/interfaces.d.ts +246 -0
- package/dist/src/execution/interfaces.d.ts.map +1 -0
- package/dist/src/execution/interfaces.js +6 -0
- package/dist/src/execution/interfaces.js.map +1 -0
- package/dist/src/execution/processHelpers.d.ts +20 -0
- package/dist/src/execution/processHelpers.d.ts.map +1 -0
- package/dist/src/execution/processHelpers.js +62 -0
- package/dist/src/execution/processHelpers.js.map +1 -0
- package/dist/src/executions.d.ts +71 -104
- package/dist/src/executions.d.ts.map +1 -1
- package/dist/src/executions.js +110 -476
- package/dist/src/executions.js.map +1 -1
- package/dist/src/index.d.ts +20 -10
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +48 -18
- package/dist/src/index.js.map +1 -1
- package/dist/src/objects.d.ts +7 -53
- package/dist/src/objects.d.ts.map +1 -1
- package/dist/src/objects.js +13 -232
- package/dist/src/objects.js.map +1 -1
- package/dist/src/packages.d.ts +41 -14
- package/dist/src/packages.d.ts.map +1 -1
- package/dist/src/packages.js +145 -88
- package/dist/src/packages.js.map +1 -1
- package/dist/src/storage/in-memory/InMemoryRepoStore.d.ts +35 -0
- package/dist/src/storage/in-memory/InMemoryRepoStore.d.ts.map +1 -0
- package/dist/src/storage/in-memory/InMemoryRepoStore.js +107 -0
- package/dist/src/storage/in-memory/InMemoryRepoStore.js.map +1 -0
- package/dist/src/storage/in-memory/InMemoryStorage.d.ts +139 -0
- package/dist/src/storage/in-memory/InMemoryStorage.d.ts.map +1 -0
- package/dist/src/storage/in-memory/InMemoryStorage.js +439 -0
- package/dist/src/storage/in-memory/InMemoryStorage.js.map +1 -0
- package/dist/src/storage/in-memory/index.d.ts +12 -0
- package/dist/src/storage/in-memory/index.d.ts.map +1 -0
- package/dist/src/storage/in-memory/index.js +12 -0
- package/dist/src/storage/in-memory/index.js.map +1 -0
- package/dist/src/storage/index.d.ts +18 -0
- package/dist/src/storage/index.d.ts.map +1 -0
- package/dist/src/storage/index.js +10 -0
- package/dist/src/storage/index.js.map +1 -0
- package/dist/src/storage/interfaces.d.ts +581 -0
- package/dist/src/storage/interfaces.d.ts.map +1 -0
- package/dist/src/storage/interfaces.js +6 -0
- package/dist/src/storage/interfaces.js.map +1 -0
- package/dist/src/storage/local/LocalBackend.d.ts +56 -0
- package/dist/src/storage/local/LocalBackend.d.ts.map +1 -0
- package/dist/src/storage/local/LocalBackend.js +145 -0
- package/dist/src/storage/local/LocalBackend.js.map +1 -0
- package/dist/src/storage/local/LocalDatasetRefStore.d.ts +22 -0
- package/dist/src/storage/local/LocalDatasetRefStore.d.ts.map +1 -0
- package/dist/src/storage/local/LocalDatasetRefStore.js +118 -0
- package/dist/src/storage/local/LocalDatasetRefStore.js.map +1 -0
- package/dist/src/storage/local/LocalLockService.d.ts +111 -0
- package/dist/src/storage/local/LocalLockService.d.ts.map +1 -0
- package/dist/src/storage/local/LocalLockService.js +364 -0
- package/dist/src/storage/local/LocalLockService.js.map +1 -0
- package/dist/src/storage/local/LocalLogStore.d.ts +23 -0
- package/dist/src/storage/local/LocalLogStore.d.ts.map +1 -0
- package/dist/src/storage/local/LocalLogStore.js +66 -0
- package/dist/src/storage/local/LocalLogStore.js.map +1 -0
- package/dist/src/storage/local/LocalObjectStore.d.ts +55 -0
- package/dist/src/storage/local/LocalObjectStore.d.ts.map +1 -0
- package/dist/src/storage/local/LocalObjectStore.js +300 -0
- package/dist/src/storage/local/LocalObjectStore.js.map +1 -0
- package/dist/src/storage/local/LocalRefStore.d.ts +50 -0
- package/dist/src/storage/local/LocalRefStore.d.ts.map +1 -0
- package/dist/src/storage/local/LocalRefStore.js +337 -0
- package/dist/src/storage/local/LocalRefStore.js.map +1 -0
- package/dist/src/storage/local/LocalRepoStore.d.ts +55 -0
- package/dist/src/storage/local/LocalRepoStore.d.ts.map +1 -0
- package/dist/src/storage/local/LocalRepoStore.js +365 -0
- package/dist/src/storage/local/LocalRepoStore.js.map +1 -0
- package/dist/src/storage/local/gc.d.ts +92 -0
- package/dist/src/storage/local/gc.d.ts.map +1 -0
- package/dist/src/storage/local/gc.js +377 -0
- package/dist/src/storage/local/gc.js.map +1 -0
- package/dist/src/storage/local/index.d.ts +18 -0
- package/dist/src/storage/local/index.d.ts.map +1 -0
- package/dist/src/storage/local/index.js +18 -0
- package/dist/src/storage/local/index.js.map +1 -0
- package/dist/src/storage/local/localHelpers.d.ts +25 -0
- package/dist/src/storage/local/localHelpers.d.ts.map +1 -0
- package/dist/src/storage/local/localHelpers.js +69 -0
- package/dist/src/storage/local/localHelpers.js.map +1 -0
- package/dist/src/{repository.d.ts → storage/local/repository.d.ts} +8 -4
- package/dist/src/storage/local/repository.d.ts.map +1 -0
- package/dist/src/{repository.js → storage/local/repository.js} +31 -29
- package/dist/src/storage/local/repository.js.map +1 -0
- package/dist/src/tasks.d.ts +16 -10
- package/dist/src/tasks.d.ts.map +1 -1
- package/dist/src/tasks.js +35 -41
- package/dist/src/tasks.js.map +1 -1
- package/dist/src/test-helpers.d.ts +5 -4
- package/dist/src/test-helpers.d.ts.map +1 -1
- package/dist/src/test-helpers.js +9 -21
- package/dist/src/test-helpers.js.map +1 -1
- package/dist/src/transfer/InMemoryTransferBackend.d.ts +75 -0
- package/dist/src/transfer/InMemoryTransferBackend.d.ts.map +1 -0
- package/dist/src/transfer/InMemoryTransferBackend.js +211 -0
- package/dist/src/transfer/InMemoryTransferBackend.js.map +1 -0
- package/dist/src/transfer/index.d.ts +9 -0
- package/dist/src/transfer/index.d.ts.map +1 -0
- package/dist/src/transfer/index.js +11 -0
- package/dist/src/transfer/index.js.map +1 -0
- package/dist/src/transfer/interfaces.d.ts +103 -0
- package/dist/src/transfer/interfaces.d.ts.map +1 -0
- package/dist/src/transfer/interfaces.js +6 -0
- package/dist/src/transfer/interfaces.js.map +1 -0
- package/dist/src/transfer/process.d.ts +55 -0
- package/dist/src/transfer/process.d.ts.map +1 -0
- package/dist/src/transfer/process.js +144 -0
- package/dist/src/transfer/process.js.map +1 -0
- package/dist/src/transfer/types.d.ts +106 -0
- package/dist/src/transfer/types.d.ts.map +1 -0
- package/dist/src/transfer/types.js +61 -0
- package/dist/src/transfer/types.js.map +1 -0
- package/dist/src/trees.d.ts +147 -59
- package/dist/src/trees.d.ts.map +1 -1
- package/dist/src/trees.js +372 -419
- package/dist/src/trees.js.map +1 -1
- package/dist/src/uuid.d.ts +26 -0
- package/dist/src/uuid.d.ts.map +1 -0
- package/dist/src/uuid.js +80 -0
- package/dist/src/uuid.js.map +1 -0
- package/dist/src/workspaceStatus.d.ts +6 -4
- package/dist/src/workspaceStatus.d.ts.map +1 -1
- package/dist/src/workspaceStatus.js +46 -60
- package/dist/src/workspaceStatus.js.map +1 -1
- package/dist/src/workspaces.d.ts +46 -47
- package/dist/src/workspaces.d.ts.map +1 -1
- package/dist/src/workspaces.js +281 -221
- package/dist/src/workspaces.js.map +1 -1
- package/package.json +4 -4
- package/dist/src/gc.d.ts +0 -54
- package/dist/src/gc.d.ts.map +0 -1
- package/dist/src/gc.js +0 -233
- package/dist/src/gc.js.map +0 -1
- package/dist/src/repository.d.ts.map +0 -1
- package/dist/src/repository.js.map +0 -1
- package/dist/src/workspaceLock.d.ts +0 -67
- package/dist/src/workspaceLock.d.ts.map +0 -1
- package/dist/src/workspaceLock.js +0 -217
- package/dist/src/workspaceLock.js.map +0 -1
package/dist/src/dataflow.js
CHANGED
|
@@ -5,128 +5,101 @@
|
|
|
5
5
|
/**
|
|
6
6
|
* Dataflow execution for e3 workspaces.
|
|
7
7
|
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
8
|
+
* Provides the high-level `dataflowExecute` entry point (which delegates
|
|
9
|
+
* to `LocalOrchestrator`) and shared graph-building utilities used by
|
|
10
|
+
* both local and cloud execution paths.
|
|
10
11
|
*
|
|
11
|
-
* The execution
|
|
12
|
-
*
|
|
13
|
-
*
|
|
14
|
-
* 3. Initialize ready queue with tasks whose inputs are all assigned
|
|
15
|
-
* 4. Execute tasks from ready queue, respecting concurrency limit
|
|
16
|
-
* 5. On task completion, queue workspace update then check dependents for readiness
|
|
17
|
-
* 6. On failure, stop launching new tasks but wait for running ones
|
|
18
|
-
*
|
|
19
|
-
* IMPORTANT: Workspace state updates are serialized through an async queue to
|
|
20
|
-
* prevent race conditions when multiple tasks complete concurrently. Each task's
|
|
21
|
-
* output is written to the workspace and dependents are notified only after the
|
|
22
|
-
* write completes, ensuring downstream tasks see consistent state.
|
|
12
|
+
* The reactive execution logic (input change detection, task invalidation,
|
|
13
|
+
* version vector consistency) lives in `dataflow/steps.ts` and is orchestrated
|
|
14
|
+
* by `dataflow/orchestrator/LocalOrchestrator.ts`.
|
|
23
15
|
*/
|
|
24
|
-
import { decodeBeast2For } from '@elaraai/east';
|
|
16
|
+
import { decodeBeast2For, variant } from '@elaraai/east';
|
|
25
17
|
import { PackageObjectType, TaskObjectType, WorkspaceStateType, pathToString, } from '@elaraai/e3-types';
|
|
26
|
-
import {
|
|
27
|
-
import {
|
|
28
|
-
import {
|
|
29
|
-
import { E3Error, WorkspaceNotFoundError, WorkspaceNotDeployedError, TaskNotFoundError, DataflowError, DataflowAbortedError, isNotFoundError, } from './errors.js';
|
|
30
|
-
import { acquireWorkspaceLock, } from './workspaceLock.js';
|
|
31
|
-
import * as fs from 'fs/promises';
|
|
32
|
-
import * as path from 'path';
|
|
18
|
+
import { executionGetOutput, inputsHash, } from './executions.js';
|
|
19
|
+
import { workspaceGetDatasetHash, } from './trees.js';
|
|
20
|
+
import { E3Error, WorkspaceNotFoundError, WorkspaceNotDeployedError, DataflowError, } from './errors.js';
|
|
33
21
|
// =============================================================================
|
|
34
|
-
//
|
|
22
|
+
// Path Parsing Helper
|
|
35
23
|
// =============================================================================
|
|
36
24
|
/**
|
|
37
|
-
*
|
|
25
|
+
* Parse a keypath string (from pathToString) back to TreePath.
|
|
26
|
+
*
|
|
27
|
+
* The keypath format is: .field1.field2 (dot-separated field names)
|
|
28
|
+
* Quoted identifiers use backticks: .field1.`complex/name`
|
|
38
29
|
*
|
|
39
|
-
*
|
|
40
|
-
*
|
|
41
|
-
* root hash). This mutex ensures only one update runs at a time.
|
|
30
|
+
* @param pathStr - The path string in keypath format
|
|
31
|
+
* @returns TreePath array of path segments
|
|
42
32
|
*/
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
/**
|
|
47
|
-
* Acquire the mutex, execute the callback, then release.
|
|
48
|
-
* If the mutex is already held, waits until it's available.
|
|
49
|
-
*/
|
|
50
|
-
async runExclusive(fn) {
|
|
51
|
-
await this.acquire();
|
|
52
|
-
try {
|
|
53
|
-
return await fn();
|
|
54
|
-
}
|
|
55
|
-
finally {
|
|
56
|
-
this.release();
|
|
57
|
-
}
|
|
33
|
+
export function parsePathString(pathStr) {
|
|
34
|
+
if (!pathStr.startsWith('.')) {
|
|
35
|
+
throw new Error(`Invalid path string: expected '.' prefix, got '${pathStr}'`);
|
|
58
36
|
}
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
37
|
+
const segments = [];
|
|
38
|
+
let i = 1; // Skip the leading '.'
|
|
39
|
+
while (i < pathStr.length) {
|
|
40
|
+
let fieldName;
|
|
41
|
+
if (pathStr[i] === '`') {
|
|
42
|
+
// Quoted identifier: find closing backtick
|
|
43
|
+
const endQuote = pathStr.indexOf('`', i + 1);
|
|
44
|
+
if (endQuote === -1) {
|
|
45
|
+
throw new Error(`Invalid path string: unclosed backtick at position ${i}`);
|
|
64
46
|
}
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
}
|
|
68
|
-
});
|
|
69
|
-
}
|
|
70
|
-
release() {
|
|
71
|
-
const next = this.queue.shift();
|
|
72
|
-
if (next) {
|
|
73
|
-
next();
|
|
47
|
+
fieldName = pathStr.slice(i + 1, endQuote);
|
|
48
|
+
i = endQuote + 1;
|
|
74
49
|
}
|
|
75
50
|
else {
|
|
76
|
-
|
|
51
|
+
// Unquoted identifier: read until '.' or end
|
|
52
|
+
let end = pathStr.indexOf('.', i);
|
|
53
|
+
if (end === -1)
|
|
54
|
+
end = pathStr.length;
|
|
55
|
+
fieldName = pathStr.slice(i, end);
|
|
56
|
+
i = end;
|
|
57
|
+
}
|
|
58
|
+
if (fieldName) {
|
|
59
|
+
segments.push(variant('field', fieldName));
|
|
60
|
+
}
|
|
61
|
+
// Skip the '.' separator
|
|
62
|
+
if (i < pathStr.length && pathStr[i] === '.') {
|
|
63
|
+
i++;
|
|
77
64
|
}
|
|
78
65
|
}
|
|
66
|
+
return segments;
|
|
79
67
|
}
|
|
80
68
|
// =============================================================================
|
|
81
69
|
// Workspace State Reader
|
|
82
70
|
// =============================================================================
|
|
83
71
|
/**
|
|
84
|
-
* Read workspace state
|
|
72
|
+
* Read workspace state.
|
|
85
73
|
* @throws {WorkspaceNotFoundError} If workspace doesn't exist
|
|
86
74
|
* @throws {WorkspaceNotDeployedError} If workspace has no package deployed
|
|
87
75
|
*/
|
|
88
|
-
async function readWorkspaceState(
|
|
89
|
-
const
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
data = await fs.readFile(stateFile);
|
|
93
|
-
}
|
|
94
|
-
catch (err) {
|
|
95
|
-
if (isNotFoundError(err)) {
|
|
96
|
-
throw new WorkspaceNotFoundError(ws);
|
|
97
|
-
}
|
|
98
|
-
throw err;
|
|
76
|
+
async function readWorkspaceState(storage, repo, ws) {
|
|
77
|
+
const data = await storage.refs.workspaceRead(repo, ws);
|
|
78
|
+
if (data === null) {
|
|
79
|
+
throw new WorkspaceNotFoundError(ws);
|
|
99
80
|
}
|
|
100
81
|
if (data.length === 0) {
|
|
101
82
|
throw new WorkspaceNotDeployedError(ws);
|
|
102
83
|
}
|
|
103
84
|
const decoder = decodeBeast2For(WorkspaceStateType);
|
|
104
|
-
return decoder(data);
|
|
85
|
+
return decoder(Buffer.from(data));
|
|
105
86
|
}
|
|
106
87
|
// =============================================================================
|
|
107
88
|
// Dependency Graph Building
|
|
108
89
|
// =============================================================================
|
|
109
90
|
/**
|
|
110
91
|
* Build the dependency graph for a workspace.
|
|
111
|
-
*
|
|
112
|
-
* Returns:
|
|
113
|
-
* - taskNodes: Map of task name -> TaskNode
|
|
114
|
-
* - outputToTask: Map of output path string -> task name
|
|
115
|
-
* - taskDependents: Map of task name -> set of dependent task names
|
|
116
92
|
*/
|
|
117
|
-
async function buildDependencyGraph(
|
|
118
|
-
|
|
119
|
-
const
|
|
120
|
-
// Read package object to get tasks map
|
|
121
|
-
const pkgData = await objectRead(repoPath, state.packageHash);
|
|
93
|
+
async function buildDependencyGraph(storage, repo, ws) {
|
|
94
|
+
const state = await readWorkspaceState(storage, repo, ws);
|
|
95
|
+
const pkgData = await storage.objects.read(repo, state.packageHash);
|
|
122
96
|
const pkgDecoder = decodeBeast2For(PackageObjectType);
|
|
123
97
|
const pkgObject = pkgDecoder(Buffer.from(pkgData));
|
|
124
98
|
const taskNodes = new Map();
|
|
125
|
-
const outputToTask = new Map();
|
|
126
|
-
// First pass: load all tasks and build output->task map
|
|
99
|
+
const outputToTask = new Map();
|
|
127
100
|
const taskDecoder = decodeBeast2For(TaskObjectType);
|
|
128
101
|
for (const [taskName, taskHash] of pkgObject.tasks) {
|
|
129
|
-
const taskData = await
|
|
102
|
+
const taskData = await storage.objects.read(repo, taskHash);
|
|
130
103
|
const task = taskDecoder(Buffer.from(taskData));
|
|
131
104
|
const outputPathStr = pathToString(task.output);
|
|
132
105
|
outputToTask.set(outputPathStr, taskName);
|
|
@@ -136,32 +109,24 @@ async function buildDependencyGraph(repoPath, ws) {
|
|
|
136
109
|
task,
|
|
137
110
|
inputPaths: task.inputs,
|
|
138
111
|
outputPath: task.output,
|
|
139
|
-
unresolvedCount: 0,
|
|
112
|
+
unresolvedCount: 0,
|
|
140
113
|
});
|
|
141
114
|
}
|
|
142
|
-
// Build reverse dependency map: task -> tasks that depend on it
|
|
143
115
|
const taskDependents = new Map();
|
|
144
116
|
for (const taskName of taskNodes.keys()) {
|
|
145
117
|
taskDependents.set(taskName, new Set());
|
|
146
118
|
}
|
|
147
|
-
// Second pass: compute dependencies and unresolved counts
|
|
148
119
|
for (const [taskName, node] of taskNodes) {
|
|
149
120
|
for (const inputPath of node.inputPaths) {
|
|
150
121
|
const inputPathStr = pathToString(inputPath);
|
|
151
122
|
const producerTask = outputToTask.get(inputPathStr);
|
|
152
123
|
if (producerTask) {
|
|
153
|
-
// This input comes from another task's output.
|
|
154
|
-
// The task cannot run until the producer task completes,
|
|
155
|
-
// regardless of whether the output is currently assigned
|
|
156
|
-
// (it might be stale from a previous run).
|
|
157
124
|
taskDependents.get(producerTask).add(taskName);
|
|
158
125
|
node.unresolvedCount++;
|
|
159
126
|
}
|
|
160
|
-
// If not produced by a task, it's an external input - check if assigned
|
|
161
127
|
else {
|
|
162
|
-
const { refType } = await workspaceGetDatasetHash(
|
|
128
|
+
const { refType } = await workspaceGetDatasetHash(storage, repo, ws, inputPath);
|
|
163
129
|
if (refType === 'unassigned') {
|
|
164
|
-
// External input that is unassigned - this task can never run
|
|
165
130
|
node.unresolvedCount++;
|
|
166
131
|
}
|
|
167
132
|
}
|
|
@@ -175,362 +140,109 @@ async function buildDependencyGraph(repoPath, ws) {
|
|
|
175
140
|
/**
|
|
176
141
|
* Execute all tasks in a workspace according to the dependency graph.
|
|
177
142
|
*
|
|
178
|
-
*
|
|
179
|
-
*
|
|
180
|
-
*
|
|
143
|
+
* Delegates to `LocalOrchestrator` which implements reactive fixpoint
|
|
144
|
+
* execution using step functions. After each task completes, input changes
|
|
145
|
+
* are detected and affected tasks are invalidated and re-executed.
|
|
181
146
|
*
|
|
182
|
-
*
|
|
183
|
-
*
|
|
184
|
-
* lock instead (caller is responsible for releasing it).
|
|
185
|
-
*
|
|
186
|
-
* @param repoPath - Path to .e3 repository
|
|
147
|
+
* @param storage - Storage backend
|
|
148
|
+
* @param repo - Repository identifier
|
|
187
149
|
* @param ws - Workspace name
|
|
188
150
|
* @param options - Execution options
|
|
189
151
|
* @returns Result of the dataflow execution
|
|
152
|
+
*
|
|
190
153
|
* @throws {WorkspaceLockError} If workspace is locked by another process
|
|
191
154
|
* @throws {WorkspaceNotFoundError} If workspace doesn't exist
|
|
192
155
|
* @throws {WorkspaceNotDeployedError} If workspace has no package deployed
|
|
193
156
|
* @throws {TaskNotFoundError} If filter specifies a task that doesn't exist
|
|
194
157
|
* @throws {DataflowError} If execution fails for other reasons
|
|
195
158
|
*/
|
|
196
|
-
export async function dataflowExecute(
|
|
197
|
-
|
|
198
|
-
const
|
|
199
|
-
const
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
159
|
+
export async function dataflowExecute(storage, repo, ws, options = {}) {
|
|
160
|
+
const { LocalOrchestrator } = await import('./dataflow/orchestrator/LocalOrchestrator.js');
|
|
161
|
+
const orchestrator = new LocalOrchestrator();
|
|
162
|
+
const taskResults = [];
|
|
163
|
+
const handle = await orchestrator.start(storage, repo, ws, {
|
|
164
|
+
concurrency: options.concurrency,
|
|
165
|
+
force: options.force,
|
|
166
|
+
filter: options.filter,
|
|
167
|
+
signal: options.signal,
|
|
168
|
+
lock: options.lock,
|
|
169
|
+
runner: options.runner,
|
|
170
|
+
onTaskStart: options.onTaskStart,
|
|
171
|
+
onTaskComplete: (result) => {
|
|
172
|
+
taskResults.push({
|
|
173
|
+
name: result.name,
|
|
174
|
+
cached: result.cached,
|
|
175
|
+
state: result.state,
|
|
176
|
+
error: result.error,
|
|
177
|
+
exitCode: result.exitCode,
|
|
178
|
+
duration: result.duration,
|
|
179
|
+
});
|
|
180
|
+
options.onTaskComplete?.({
|
|
181
|
+
name: result.name,
|
|
182
|
+
cached: result.cached,
|
|
183
|
+
state: result.state,
|
|
184
|
+
error: result.error,
|
|
185
|
+
exitCode: result.exitCode,
|
|
186
|
+
duration: result.duration,
|
|
187
|
+
});
|
|
188
|
+
},
|
|
189
|
+
onStdout: options.onStdout,
|
|
190
|
+
onStderr: options.onStderr,
|
|
191
|
+
onInputChanged: options.onInputChanged,
|
|
192
|
+
onTaskInvalidated: options.onTaskInvalidated,
|
|
193
|
+
onTaskDeferred: options.onTaskDeferred,
|
|
194
|
+
});
|
|
195
|
+
const result = await orchestrator.wait(handle);
|
|
196
|
+
return {
|
|
197
|
+
success: result.success,
|
|
198
|
+
runId: result.runId,
|
|
199
|
+
executed: result.executed,
|
|
200
|
+
cached: result.cached,
|
|
201
|
+
failed: result.failed,
|
|
202
|
+
skipped: result.skipped,
|
|
203
|
+
reexecuted: result.reexecuted,
|
|
204
|
+
tasks: taskResults,
|
|
205
|
+
duration: result.duration,
|
|
206
|
+
};
|
|
209
207
|
}
|
|
210
208
|
/**
|
|
211
|
-
*
|
|
212
|
-
*
|
|
213
|
-
* Returns a promise immediately without awaiting execution. The lock is
|
|
214
|
-
* released automatically when execution completes.
|
|
209
|
+
* Execute dataflow with an externally-held lock.
|
|
210
|
+
* The lock is released automatically when execution completes or fails.
|
|
215
211
|
*
|
|
216
|
-
* @param
|
|
212
|
+
* @param storage - Storage backend
|
|
213
|
+
* @param repo - Repository identifier
|
|
217
214
|
* @param ws - Workspace name
|
|
218
215
|
* @param options - Execution options (lock must be provided)
|
|
219
216
|
* @returns Promise that resolves when execution completes
|
|
220
|
-
* @throws {WorkspaceNotFoundError} If workspace doesn't exist
|
|
221
|
-
* @throws {WorkspaceNotDeployedError} If workspace has no package deployed
|
|
222
|
-
* @throws {TaskNotFoundError} If filter specifies a task that doesn't exist
|
|
223
|
-
* @throws {DataflowError} If execution fails for other reasons
|
|
224
217
|
*/
|
|
225
|
-
export function dataflowStart(
|
|
226
|
-
return dataflowExecuteWithLock(repoPath, ws, options)
|
|
227
|
-
.finally(() => options.lock.release());
|
|
228
|
-
}
|
|
229
|
-
/**
|
|
230
|
-
* Internal: Execute dataflow with lock already held.
|
|
231
|
-
*/
|
|
232
|
-
async function dataflowExecuteWithLock(repoPath, ws, options) {
|
|
233
|
-
const startTime = Date.now();
|
|
234
|
-
const concurrency = options.concurrency ?? 4;
|
|
235
|
-
let taskNodes;
|
|
236
|
-
let taskDependents;
|
|
218
|
+
export async function dataflowStart(storage, repo, ws, options) {
|
|
237
219
|
try {
|
|
238
|
-
|
|
239
|
-
const graph = await buildDependencyGraph(repoPath, ws);
|
|
240
|
-
taskNodes = graph.taskNodes;
|
|
241
|
-
taskDependents = graph.taskDependents;
|
|
242
|
-
}
|
|
243
|
-
catch (err) {
|
|
244
|
-
// Re-throw E3Errors as-is
|
|
245
|
-
if (err instanceof E3Error)
|
|
246
|
-
throw err;
|
|
247
|
-
// Wrap unexpected errors
|
|
248
|
-
throw new DataflowError(`Failed to build dependency graph: ${err instanceof Error ? err.message : err}`);
|
|
249
|
-
}
|
|
250
|
-
// Apply filter if specified
|
|
251
|
-
const filteredTaskNames = options.filter
|
|
252
|
-
? new Set([options.filter])
|
|
253
|
-
: null;
|
|
254
|
-
// Validate filter
|
|
255
|
-
if (filteredTaskNames && options.filter && !taskNodes.has(options.filter)) {
|
|
256
|
-
throw new TaskNotFoundError(options.filter);
|
|
257
|
-
}
|
|
258
|
-
// Track execution state
|
|
259
|
-
const results = [];
|
|
260
|
-
let executed = 0;
|
|
261
|
-
let cached = 0;
|
|
262
|
-
let failed = 0;
|
|
263
|
-
let skipped = 0;
|
|
264
|
-
let hasFailure = false;
|
|
265
|
-
let aborted = false;
|
|
266
|
-
// Check for abort signal
|
|
267
|
-
const checkAborted = () => {
|
|
268
|
-
if (options.signal?.aborted && !aborted) {
|
|
269
|
-
aborted = true;
|
|
270
|
-
}
|
|
271
|
-
return aborted;
|
|
272
|
-
};
|
|
273
|
-
// Mutex to serialize workspace state updates.
|
|
274
|
-
// When multiple tasks complete concurrently, their writes to the workspace
|
|
275
|
-
// must be serialized to prevent lost updates (read-modify-write race).
|
|
276
|
-
const workspaceUpdateMutex = new AsyncMutex();
|
|
277
|
-
// Ready queue: tasks with all dependencies resolved
|
|
278
|
-
const readyQueue = [];
|
|
279
|
-
const completed = new Set();
|
|
280
|
-
const inProgress = new Set();
|
|
281
|
-
// Initialize ready queue with tasks that have no unresolved dependencies
|
|
282
|
-
// and pass the filter (if any)
|
|
283
|
-
for (const [taskName, node] of taskNodes) {
|
|
284
|
-
if (node.unresolvedCount === 0) {
|
|
285
|
-
if (!filteredTaskNames || filteredTaskNames.has(taskName)) {
|
|
286
|
-
readyQueue.push(taskName);
|
|
287
|
-
}
|
|
288
|
-
}
|
|
289
|
-
}
|
|
290
|
-
// Check if the task has a valid cached execution for current inputs
|
|
291
|
-
// Returns the output hash if cached, null if re-execution is needed
|
|
292
|
-
async function getCachedOutput(taskName) {
|
|
293
|
-
const node = taskNodes.get(taskName);
|
|
294
|
-
// Gather current input hashes
|
|
295
|
-
const currentInputHashes = [];
|
|
296
|
-
for (const inputPath of node.inputPaths) {
|
|
297
|
-
const { refType, hash } = await workspaceGetDatasetHash(repoPath, ws, inputPath);
|
|
298
|
-
if (refType !== 'value' || hash === null) {
|
|
299
|
-
// Input not assigned, can't be cached
|
|
300
|
-
return null;
|
|
301
|
-
}
|
|
302
|
-
currentInputHashes.push(hash);
|
|
303
|
-
}
|
|
304
|
-
// Check if there's a cached execution for these inputs
|
|
305
|
-
const inHash = inputsHash(currentInputHashes);
|
|
306
|
-
const cachedOutputHash = await executionGetOutput(repoPath, node.hash, inHash);
|
|
307
|
-
if (cachedOutputHash === null) {
|
|
308
|
-
// No cached execution for current inputs
|
|
309
|
-
return null;
|
|
310
|
-
}
|
|
311
|
-
// Also verify the workspace output matches the cached output
|
|
312
|
-
// (in case the workspace was modified outside of execution)
|
|
313
|
-
const { refType, hash: wsOutputHash } = await workspaceGetDatasetHash(repoPath, ws, node.outputPath);
|
|
314
|
-
if (refType !== 'value' || wsOutputHash !== cachedOutputHash) {
|
|
315
|
-
// Workspace output doesn't match cached output, need to re-execute
|
|
316
|
-
// (or update workspace with cached value)
|
|
317
|
-
return null;
|
|
318
|
-
}
|
|
319
|
-
return cachedOutputHash;
|
|
320
|
-
}
|
|
321
|
-
// Execute a single task (does NOT write to workspace - caller must do that)
|
|
322
|
-
async function executeTask(taskName) {
|
|
323
|
-
const node = taskNodes.get(taskName);
|
|
324
|
-
const taskStartTime = Date.now();
|
|
325
|
-
options.onTaskStart?.(taskName);
|
|
326
|
-
// Gather input hashes
|
|
327
|
-
const inputHashes = [];
|
|
328
|
-
for (const inputPath of node.inputPaths) {
|
|
329
|
-
const { refType, hash } = await workspaceGetDatasetHash(repoPath, ws, inputPath);
|
|
330
|
-
if (refType !== 'value' || hash === null) {
|
|
331
|
-
// Input not available - should not happen if dependency tracking is correct
|
|
332
|
-
return {
|
|
333
|
-
name: taskName,
|
|
334
|
-
cached: false,
|
|
335
|
-
state: 'error',
|
|
336
|
-
error: `Input at ${pathToString(inputPath)} is not assigned (refType: ${refType})`,
|
|
337
|
-
duration: Date.now() - taskStartTime,
|
|
338
|
-
};
|
|
339
|
-
}
|
|
340
|
-
inputHashes.push(hash);
|
|
341
|
-
}
|
|
342
|
-
// Execute the task
|
|
343
|
-
const execOptions = {
|
|
344
|
-
force: options.force,
|
|
345
|
-
signal: options.signal,
|
|
346
|
-
onStdout: options.onStdout ? (data) => options.onStdout(taskName, data) : undefined,
|
|
347
|
-
onStderr: options.onStderr ? (data) => options.onStderr(taskName, data) : undefined,
|
|
348
|
-
};
|
|
349
|
-
const result = await taskExecute(repoPath, node.hash, inputHashes, execOptions);
|
|
350
|
-
// Build task result (NOTE: workspace update happens later, in mutex-protected section)
|
|
351
|
-
const taskResult = {
|
|
352
|
-
name: taskName,
|
|
353
|
-
cached: result.cached,
|
|
354
|
-
state: result.state,
|
|
355
|
-
duration: Date.now() - taskStartTime,
|
|
356
|
-
};
|
|
357
|
-
if (result.state === 'error') {
|
|
358
|
-
taskResult.error = result.error ?? undefined;
|
|
359
|
-
}
|
|
360
|
-
else if (result.state === 'failed') {
|
|
361
|
-
taskResult.exitCode = result.exitCode ?? undefined;
|
|
362
|
-
taskResult.error = result.error ?? undefined;
|
|
363
|
-
}
|
|
364
|
-
// Pass output hash to caller for workspace update (if successful)
|
|
365
|
-
if (result.state === 'success' && result.outputHash) {
|
|
366
|
-
taskResult.outputHash = result.outputHash;
|
|
367
|
-
}
|
|
368
|
-
return taskResult;
|
|
369
|
-
}
|
|
370
|
-
// Process dependents when a task completes
|
|
371
|
-
function notifyDependents(taskName) {
|
|
372
|
-
const dependents = taskDependents.get(taskName) ?? new Set();
|
|
373
|
-
for (const depName of dependents) {
|
|
374
|
-
if (completed.has(depName) || inProgress.has(depName))
|
|
375
|
-
continue;
|
|
376
|
-
// Skip dependents not in the filter
|
|
377
|
-
if (filteredTaskNames && !filteredTaskNames.has(depName))
|
|
378
|
-
continue;
|
|
379
|
-
const depNode = taskNodes.get(depName);
|
|
380
|
-
depNode.unresolvedCount--;
|
|
381
|
-
if (depNode.unresolvedCount === 0 && !readyQueue.includes(depName)) {
|
|
382
|
-
readyQueue.push(depName);
|
|
383
|
-
}
|
|
384
|
-
}
|
|
220
|
+
return await dataflowExecute(storage, repo, ws, options);
|
|
385
221
|
}
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
const dependents = taskDependents.get(taskName) ?? new Set();
|
|
389
|
-
for (const depName of dependents) {
|
|
390
|
-
if (completed.has(depName) || inProgress.has(depName))
|
|
391
|
-
continue;
|
|
392
|
-
// Skip dependents not in the filter
|
|
393
|
-
if (filteredTaskNames && !filteredTaskNames.has(depName))
|
|
394
|
-
continue;
|
|
395
|
-
// Recursively skip
|
|
396
|
-
completed.add(depName);
|
|
397
|
-
skipped++;
|
|
398
|
-
results.push({
|
|
399
|
-
name: depName,
|
|
400
|
-
cached: false,
|
|
401
|
-
state: 'skipped',
|
|
402
|
-
duration: 0,
|
|
403
|
-
});
|
|
404
|
-
options.onTaskComplete?.({
|
|
405
|
-
name: depName,
|
|
406
|
-
cached: false,
|
|
407
|
-
state: 'skipped',
|
|
408
|
-
duration: 0,
|
|
409
|
-
});
|
|
410
|
-
skipDependents(depName);
|
|
411
|
-
}
|
|
412
|
-
}
|
|
413
|
-
// Main execution loop using a work-stealing approach
|
|
414
|
-
const runningPromises = new Map();
|
|
415
|
-
async function processQueue() {
|
|
416
|
-
while (true) {
|
|
417
|
-
// Check if we're done
|
|
418
|
-
if (readyQueue.length === 0 && runningPromises.size === 0) {
|
|
419
|
-
break;
|
|
420
|
-
}
|
|
421
|
-
// Launch tasks up to concurrency limit if no failure and not aborted
|
|
422
|
-
while (!hasFailure && !checkAborted() && readyQueue.length > 0 && runningPromises.size < concurrency) {
|
|
423
|
-
const taskName = readyQueue.shift();
|
|
424
|
-
if (completed.has(taskName) || inProgress.has(taskName))
|
|
425
|
-
continue;
|
|
426
|
-
// Check if there's a valid cached execution for current inputs
|
|
427
|
-
const cachedOutputHash = await getCachedOutput(taskName);
|
|
428
|
-
if (cachedOutputHash !== null && !options.force) {
|
|
429
|
-
// Valid cached execution exists for current inputs.
|
|
430
|
-
// No workspace write needed (output already matches), but we still
|
|
431
|
-
// need mutex protection for state updates to prevent races with
|
|
432
|
-
// concurrent task completions.
|
|
433
|
-
await workspaceUpdateMutex.runExclusive(() => {
|
|
434
|
-
completed.add(taskName);
|
|
435
|
-
cached++;
|
|
436
|
-
const result = {
|
|
437
|
-
name: taskName,
|
|
438
|
-
cached: true,
|
|
439
|
-
state: 'success',
|
|
440
|
-
duration: 0,
|
|
441
|
-
};
|
|
442
|
-
results.push(result);
|
|
443
|
-
options.onTaskComplete?.(result);
|
|
444
|
-
notifyDependents(taskName);
|
|
445
|
-
});
|
|
446
|
-
continue;
|
|
447
|
-
}
|
|
448
|
-
inProgress.add(taskName);
|
|
449
|
-
const promise = (async () => {
|
|
450
|
-
try {
|
|
451
|
-
const result = await executeTask(taskName);
|
|
452
|
-
// Use mutex to serialize workspace updates and dependent notifications.
|
|
453
|
-
// This prevents race conditions where two tasks complete simultaneously,
|
|
454
|
-
// both read the same workspace state, and one overwrites the other's changes.
|
|
455
|
-
await workspaceUpdateMutex.runExclusive(async () => {
|
|
456
|
-
// Write output to workspace BEFORE notifying dependents
|
|
457
|
-
if (result.state === 'success' && result.outputHash) {
|
|
458
|
-
const node = taskNodes.get(taskName);
|
|
459
|
-
await workspaceSetDatasetByHash(repoPath, ws, node.outputPath, result.outputHash);
|
|
460
|
-
}
|
|
461
|
-
// Now safe to update execution state and notify dependents
|
|
462
|
-
inProgress.delete(taskName);
|
|
463
|
-
completed.add(taskName);
|
|
464
|
-
results.push(result);
|
|
465
|
-
options.onTaskComplete?.(result);
|
|
466
|
-
if (result.state === 'success') {
|
|
467
|
-
if (result.cached) {
|
|
468
|
-
cached++;
|
|
469
|
-
}
|
|
470
|
-
else {
|
|
471
|
-
executed++;
|
|
472
|
-
}
|
|
473
|
-
notifyDependents(taskName);
|
|
474
|
-
}
|
|
475
|
-
else {
|
|
476
|
-
failed++;
|
|
477
|
-
hasFailure = true;
|
|
478
|
-
skipDependents(taskName);
|
|
479
|
-
}
|
|
480
|
-
});
|
|
481
|
-
}
|
|
482
|
-
finally {
|
|
483
|
-
runningPromises.delete(taskName);
|
|
484
|
-
}
|
|
485
|
-
})();
|
|
486
|
-
runningPromises.set(taskName, promise);
|
|
487
|
-
}
|
|
488
|
-
// Wait for at least one task to complete if we can't launch more
|
|
489
|
-
if (runningPromises.size > 0) {
|
|
490
|
-
await Promise.race(runningPromises.values());
|
|
491
|
-
}
|
|
492
|
-
else if (readyQueue.length === 0) {
|
|
493
|
-
// No running tasks and no ready tasks - we might have unresolvable dependencies
|
|
494
|
-
break;
|
|
495
|
-
}
|
|
496
|
-
}
|
|
497
|
-
}
|
|
498
|
-
await processQueue();
|
|
499
|
-
// Wait for any remaining tasks
|
|
500
|
-
if (runningPromises.size > 0) {
|
|
501
|
-
await Promise.all(runningPromises.values());
|
|
502
|
-
}
|
|
503
|
-
// Check for abort one final time
|
|
504
|
-
checkAborted();
|
|
505
|
-
// If aborted, throw with partial results
|
|
506
|
-
if (aborted) {
|
|
507
|
-
throw new DataflowAbortedError(results);
|
|
222
|
+
finally {
|
|
223
|
+
await options.lock.release();
|
|
508
224
|
}
|
|
509
|
-
return {
|
|
510
|
-
success: !hasFailure,
|
|
511
|
-
executed,
|
|
512
|
-
cached,
|
|
513
|
-
failed,
|
|
514
|
-
skipped,
|
|
515
|
-
tasks: results,
|
|
516
|
-
duration: Date.now() - startTime,
|
|
517
|
-
};
|
|
518
225
|
}
|
|
226
|
+
// =============================================================================
|
|
227
|
+
// Graph Queries (shared between local and cloud execution)
|
|
228
|
+
// =============================================================================
|
|
519
229
|
/**
|
|
520
230
|
* Get the dependency graph for a workspace (for visualization/debugging).
|
|
521
231
|
*
|
|
522
|
-
* @param
|
|
232
|
+
* @param storage - Storage backend
|
|
233
|
+
* @param repo - Repository identifier
|
|
523
234
|
* @param ws - Workspace name
|
|
524
235
|
* @returns Graph information
|
|
236
|
+
*
|
|
525
237
|
* @throws {WorkspaceNotFoundError} If workspace doesn't exist
|
|
526
238
|
* @throws {WorkspaceNotDeployedError} If workspace has no package deployed
|
|
527
239
|
* @throws {DataflowError} If graph building fails for other reasons
|
|
528
240
|
*/
|
|
529
|
-
export async function dataflowGetGraph(
|
|
241
|
+
export async function dataflowGetGraph(storage, repo, ws) {
|
|
530
242
|
let taskNodes;
|
|
531
243
|
let outputToTask;
|
|
532
244
|
try {
|
|
533
|
-
const graph = await buildDependencyGraph(
|
|
245
|
+
const graph = await buildDependencyGraph(storage, repo, ws);
|
|
534
246
|
taskNodes = graph.taskNodes;
|
|
535
247
|
outputToTask = graph.outputToTask;
|
|
536
248
|
}
|
|
@@ -559,4 +271,145 @@ export async function dataflowGetGraph(repoPath, ws) {
|
|
|
559
271
|
}
|
|
560
272
|
return { tasks };
|
|
561
273
|
}
|
|
274
|
+
/**
|
|
275
|
+
* Find all tasks affected by input changes (transitive dependents).
|
|
276
|
+
* An affected task is one whose output could change due to the input change.
|
|
277
|
+
*
|
|
278
|
+
* @param graph - The dependency graph
|
|
279
|
+
* @param changes - Array of changed input paths
|
|
280
|
+
* @returns Array of affected task names
|
|
281
|
+
*/
|
|
282
|
+
export function findAffectedTasks(graph, changes) {
|
|
283
|
+
const changedPaths = new Set(changes.map(c => c.path));
|
|
284
|
+
const affected = new Set();
|
|
285
|
+
const queue = [];
|
|
286
|
+
// Build forward dep map: task name → tasks that depend on its output
|
|
287
|
+
const taskToDependents = new Map();
|
|
288
|
+
for (const task of graph.tasks) {
|
|
289
|
+
for (const dep of task.dependsOn) {
|
|
290
|
+
if (!taskToDependents.has(dep))
|
|
291
|
+
taskToDependents.set(dep, []);
|
|
292
|
+
taskToDependents.get(dep).push(task.name);
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
// Seed: tasks that directly read a changed input
|
|
296
|
+
for (const task of graph.tasks) {
|
|
297
|
+
if (task.inputs.some(inp => changedPaths.has(inp))) {
|
|
298
|
+
queue.push(task.name);
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
// BFS through dependency graph
|
|
302
|
+
while (queue.length > 0) {
|
|
303
|
+
const name = queue.shift();
|
|
304
|
+
if (affected.has(name))
|
|
305
|
+
continue;
|
|
306
|
+
affected.add(name);
|
|
307
|
+
for (const dep of taskToDependents.get(name) ?? []) {
|
|
308
|
+
queue.push(dep);
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
return Array.from(affected);
|
|
312
|
+
}
|
|
313
|
+
/**
|
|
314
|
+
* Get tasks that are ready to execute given the set of completed tasks.
|
|
315
|
+
*
|
|
316
|
+
* A task is ready when all tasks it depends on have completed.
|
|
317
|
+
*
|
|
318
|
+
* @param graph - The dependency graph from dataflowGetGraph
|
|
319
|
+
* @param completedTasks - Set of task names that have completed
|
|
320
|
+
* @returns Array of task names that are ready to execute
|
|
321
|
+
*/
|
|
322
|
+
export function dataflowGetReadyTasks(graph, completedTasks) {
|
|
323
|
+
const ready = [];
|
|
324
|
+
for (const task of graph.tasks) {
|
|
325
|
+
if (completedTasks.has(task.name)) {
|
|
326
|
+
continue;
|
|
327
|
+
}
|
|
328
|
+
const allDepsCompleted = task.dependsOn.every(dep => completedTasks.has(dep));
|
|
329
|
+
if (allDepsCompleted) {
|
|
330
|
+
ready.push(task.name);
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
return ready;
|
|
334
|
+
}
|
|
335
|
+
/**
|
|
336
|
+
* Check if a task execution is cached for the given inputs.
|
|
337
|
+
*
|
|
338
|
+
* @param storage - Storage backend
|
|
339
|
+
* @param repo - Repository path
|
|
340
|
+
* @param taskHash - Hash of the TaskObject
|
|
341
|
+
* @param inputHashes - Array of input dataset hashes (in order)
|
|
342
|
+
* @returns Output hash if cached, null if execution needed
|
|
343
|
+
*/
|
|
344
|
+
export async function dataflowCheckCache(storage, repo, taskHash, inputHashes) {
|
|
345
|
+
const inHash = inputsHash(inputHashes);
|
|
346
|
+
return executionGetOutput(storage, repo, taskHash, inHash);
|
|
347
|
+
}
|
|
348
|
+
/**
|
|
349
|
+
* Find tasks that should be skipped when a task fails.
|
|
350
|
+
*
|
|
351
|
+
* Returns all tasks that transitively depend on the failed task,
|
|
352
|
+
* excluding already completed or already skipped tasks.
|
|
353
|
+
*
|
|
354
|
+
* @param graph - The dependency graph from dataflowGetGraph
|
|
355
|
+
* @param failedTask - Name of the task that failed
|
|
356
|
+
* @param completedTasks - Set of task names already completed
|
|
357
|
+
* @param skippedTasks - Set of task names already skipped
|
|
358
|
+
* @returns Array of task names that should be skipped
|
|
359
|
+
*/
|
|
360
|
+
export function dataflowGetDependentsToSkip(graph, failedTask, completedTasks, skippedTasks) {
|
|
361
|
+
const dependents = new Map();
|
|
362
|
+
for (const task of graph.tasks) {
|
|
363
|
+
dependents.set(task.name, []);
|
|
364
|
+
}
|
|
365
|
+
for (const task of graph.tasks) {
|
|
366
|
+
for (const dep of task.dependsOn) {
|
|
367
|
+
dependents.get(dep)?.push(task.name);
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
const toSkip = [];
|
|
371
|
+
const visited = new Set();
|
|
372
|
+
const queue = [failedTask];
|
|
373
|
+
while (queue.length > 0) {
|
|
374
|
+
const current = queue.shift();
|
|
375
|
+
const deps = dependents.get(current) ?? [];
|
|
376
|
+
for (const dep of deps) {
|
|
377
|
+
if (visited.has(dep))
|
|
378
|
+
continue;
|
|
379
|
+
visited.add(dep);
|
|
380
|
+
if (completedTasks.has(dep))
|
|
381
|
+
continue;
|
|
382
|
+
if (skippedTasks.has(dep)) {
|
|
383
|
+
queue.push(dep);
|
|
384
|
+
continue;
|
|
385
|
+
}
|
|
386
|
+
toSkip.push(dep);
|
|
387
|
+
queue.push(dep);
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
return toSkip;
|
|
391
|
+
}
|
|
392
|
+
/**
|
|
393
|
+
* Resolve input hashes for a task from current workspace state.
|
|
394
|
+
*
|
|
395
|
+
* @param storage - Storage backend
|
|
396
|
+
* @param repo - Repository path
|
|
397
|
+
* @param ws - Workspace name
|
|
398
|
+
* @param task - Task info from the graph
|
|
399
|
+
* @returns Array of hashes (null if input is unassigned)
|
|
400
|
+
*/
|
|
401
|
+
export async function dataflowResolveInputHashes(storage, repo, ws, task) {
|
|
402
|
+
const hashes = [];
|
|
403
|
+
for (const inputPathStr of task.inputs) {
|
|
404
|
+
const inputPath = parsePathString(inputPathStr);
|
|
405
|
+
const { refType, hash } = await workspaceGetDatasetHash(storage, repo, ws, inputPath);
|
|
406
|
+
if (refType === 'value' && hash !== null) {
|
|
407
|
+
hashes.push(hash);
|
|
408
|
+
}
|
|
409
|
+
else {
|
|
410
|
+
hashes.push(null);
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
return hashes;
|
|
414
|
+
}
|
|
562
415
|
//# sourceMappingURL=dataflow.js.map
|