@elaraai/e3-core 0.0.2-beta.3 → 0.0.2-beta.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -22
- package/dist/src/dataflow/api-compat.d.ts +90 -0
- package/dist/src/dataflow/api-compat.d.ts.map +1 -0
- package/dist/src/dataflow/api-compat.js +134 -0
- package/dist/src/dataflow/api-compat.js.map +1 -0
- package/dist/src/dataflow/index.d.ts +18 -0
- package/dist/src/dataflow/index.d.ts.map +1 -0
- package/dist/src/dataflow/index.js +23 -0
- package/dist/src/dataflow/index.js.map +1 -0
- package/dist/src/dataflow/orchestrator/LocalOrchestrator.d.ts +53 -0
- package/dist/src/dataflow/orchestrator/LocalOrchestrator.d.ts.map +1 -0
- package/dist/src/dataflow/orchestrator/LocalOrchestrator.js +416 -0
- package/dist/src/dataflow/orchestrator/LocalOrchestrator.js.map +1 -0
- package/dist/src/dataflow/orchestrator/index.d.ts +12 -0
- package/dist/src/dataflow/orchestrator/index.d.ts.map +1 -0
- package/dist/src/dataflow/orchestrator/index.js +12 -0
- package/dist/src/dataflow/orchestrator/index.js.map +1 -0
- package/dist/src/dataflow/orchestrator/interfaces.d.ts +157 -0
- package/dist/src/dataflow/orchestrator/interfaces.d.ts.map +1 -0
- package/dist/src/dataflow/orchestrator/interfaces.js +51 -0
- package/dist/src/dataflow/orchestrator/interfaces.js.map +1 -0
- package/dist/src/dataflow/state-store/FileStateStore.d.ts +67 -0
- package/dist/src/dataflow/state-store/FileStateStore.d.ts.map +1 -0
- package/dist/src/dataflow/state-store/FileStateStore.js +286 -0
- package/dist/src/dataflow/state-store/FileStateStore.js.map +1 -0
- package/dist/src/dataflow/state-store/InMemoryStateStore.d.ts +42 -0
- package/dist/src/dataflow/state-store/InMemoryStateStore.d.ts.map +1 -0
- package/dist/src/dataflow/state-store/InMemoryStateStore.js +214 -0
- package/dist/src/dataflow/state-store/InMemoryStateStore.js.map +1 -0
- package/dist/src/dataflow/state-store/index.d.ts +13 -0
- package/dist/src/dataflow/state-store/index.d.ts.map +1 -0
- package/dist/src/dataflow/state-store/index.js +13 -0
- package/dist/src/dataflow/state-store/index.js.map +1 -0
- package/dist/src/dataflow/state-store/interfaces.d.ts +159 -0
- package/dist/src/dataflow/state-store/interfaces.d.ts.map +1 -0
- package/dist/src/dataflow/state-store/interfaces.js +6 -0
- package/dist/src/dataflow/state-store/interfaces.js.map +1 -0
- package/dist/src/dataflow/steps.d.ts +176 -0
- package/dist/src/dataflow/steps.d.ts.map +1 -0
- package/dist/src/dataflow/steps.js +528 -0
- package/dist/src/dataflow/steps.js.map +1 -0
- package/dist/src/dataflow/types.d.ts +116 -0
- package/dist/src/dataflow/types.d.ts.map +1 -0
- package/dist/src/dataflow/types.js +7 -0
- package/dist/src/dataflow/types.js.map +1 -0
- package/dist/src/dataflow.d.ts +142 -9
- package/dist/src/dataflow.d.ts.map +1 -1
- package/dist/src/dataflow.js +427 -64
- package/dist/src/dataflow.js.map +1 -1
- package/dist/src/errors.d.ts +39 -9
- package/dist/src/errors.d.ts.map +1 -1
- package/dist/src/errors.js +51 -8
- package/dist/src/errors.js.map +1 -1
- package/dist/src/execution/LocalTaskRunner.d.ts +73 -0
- package/dist/src/execution/LocalTaskRunner.d.ts.map +1 -0
- package/dist/src/execution/LocalTaskRunner.js +399 -0
- package/dist/src/execution/LocalTaskRunner.js.map +1 -0
- package/dist/src/execution/MockTaskRunner.d.ts +49 -0
- package/dist/src/execution/MockTaskRunner.d.ts.map +1 -0
- package/dist/src/execution/MockTaskRunner.js +55 -0
- package/dist/src/execution/MockTaskRunner.js.map +1 -0
- package/dist/src/execution/index.d.ts +16 -0
- package/dist/src/execution/index.d.ts.map +1 -0
- package/dist/src/execution/index.js +8 -0
- package/dist/src/execution/index.js.map +1 -0
- package/dist/src/execution/interfaces.d.ts +246 -0
- package/dist/src/execution/interfaces.d.ts.map +1 -0
- package/dist/src/execution/interfaces.js +6 -0
- package/dist/src/execution/interfaces.js.map +1 -0
- package/dist/src/execution/processHelpers.d.ts +20 -0
- package/dist/src/execution/processHelpers.d.ts.map +1 -0
- package/dist/src/execution/processHelpers.js +62 -0
- package/dist/src/execution/processHelpers.js.map +1 -0
- package/dist/src/executions.d.ts +71 -104
- package/dist/src/executions.d.ts.map +1 -1
- package/dist/src/executions.js +110 -476
- package/dist/src/executions.js.map +1 -1
- package/dist/src/index.d.ts +17 -9
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +44 -18
- package/dist/src/index.js.map +1 -1
- package/dist/src/objects.d.ts +6 -53
- package/dist/src/objects.d.ts.map +1 -1
- package/dist/src/objects.js +11 -232
- package/dist/src/objects.js.map +1 -1
- package/dist/src/packages.d.ts +22 -14
- package/dist/src/packages.d.ts.map +1 -1
- package/dist/src/packages.js +116 -83
- package/dist/src/packages.js.map +1 -1
- package/dist/src/storage/in-memory/InMemoryRepoStore.d.ts +35 -0
- package/dist/src/storage/in-memory/InMemoryRepoStore.d.ts.map +1 -0
- package/dist/src/storage/in-memory/InMemoryRepoStore.js +107 -0
- package/dist/src/storage/in-memory/InMemoryRepoStore.js.map +1 -0
- package/dist/src/storage/in-memory/InMemoryStorage.d.ts +114 -0
- package/dist/src/storage/in-memory/InMemoryStorage.d.ts.map +1 -0
- package/dist/src/storage/in-memory/InMemoryStorage.js +349 -0
- package/dist/src/storage/in-memory/InMemoryStorage.js.map +1 -0
- package/dist/src/storage/in-memory/index.d.ts +12 -0
- package/dist/src/storage/in-memory/index.d.ts.map +1 -0
- package/dist/src/storage/in-memory/index.js +12 -0
- package/dist/src/storage/in-memory/index.js.map +1 -0
- package/dist/src/storage/index.d.ts +18 -0
- package/dist/src/storage/index.d.ts.map +1 -0
- package/dist/src/storage/index.js +10 -0
- package/dist/src/storage/index.js.map +1 -0
- package/dist/src/storage/interfaces.d.ts +520 -0
- package/dist/src/storage/interfaces.d.ts.map +1 -0
- package/dist/src/storage/interfaces.js +6 -0
- package/dist/src/storage/interfaces.js.map +1 -0
- package/dist/src/storage/local/LocalBackend.d.ts +54 -0
- package/dist/src/storage/local/LocalBackend.d.ts.map +1 -0
- package/dist/src/storage/local/LocalBackend.js +141 -0
- package/dist/src/storage/local/LocalBackend.js.map +1 -0
- package/dist/src/storage/local/LocalLockService.d.ts +105 -0
- package/dist/src/storage/local/LocalLockService.d.ts.map +1 -0
- package/dist/src/storage/local/LocalLockService.js +342 -0
- package/dist/src/storage/local/LocalLockService.js.map +1 -0
- package/dist/src/storage/local/LocalLogStore.d.ts +23 -0
- package/dist/src/storage/local/LocalLogStore.d.ts.map +1 -0
- package/dist/src/storage/local/LocalLogStore.js +66 -0
- package/dist/src/storage/local/LocalLogStore.js.map +1 -0
- package/dist/src/storage/local/LocalObjectStore.d.ts +52 -0
- package/dist/src/storage/local/LocalObjectStore.d.ts.map +1 -0
- package/dist/src/storage/local/LocalObjectStore.js +287 -0
- package/dist/src/storage/local/LocalObjectStore.js.map +1 -0
- package/dist/src/storage/local/LocalRefStore.d.ts +50 -0
- package/dist/src/storage/local/LocalRefStore.d.ts.map +1 -0
- package/dist/src/storage/local/LocalRefStore.js +337 -0
- package/dist/src/storage/local/LocalRefStore.js.map +1 -0
- package/dist/src/storage/local/LocalRepoStore.d.ts +53 -0
- package/dist/src/storage/local/LocalRepoStore.d.ts.map +1 -0
- package/dist/src/storage/local/LocalRepoStore.js +353 -0
- package/dist/src/storage/local/LocalRepoStore.js.map +1 -0
- package/dist/src/storage/local/gc.d.ts +92 -0
- package/dist/src/storage/local/gc.d.ts.map +1 -0
- package/dist/src/storage/local/gc.js +322 -0
- package/dist/src/storage/local/gc.js.map +1 -0
- package/dist/src/storage/local/index.d.ts +17 -0
- package/dist/src/storage/local/index.d.ts.map +1 -0
- package/dist/src/storage/local/index.js +17 -0
- package/dist/src/storage/local/index.js.map +1 -0
- package/dist/src/storage/local/localHelpers.d.ts +25 -0
- package/dist/src/storage/local/localHelpers.d.ts.map +1 -0
- package/dist/src/storage/local/localHelpers.js +69 -0
- package/dist/src/storage/local/localHelpers.js.map +1 -0
- package/dist/src/{repository.d.ts → storage/local/repository.d.ts} +8 -4
- package/dist/src/storage/local/repository.d.ts.map +1 -0
- package/dist/src/{repository.js → storage/local/repository.js} +31 -29
- package/dist/src/storage/local/repository.js.map +1 -0
- package/dist/src/tasks.d.ts +16 -10
- package/dist/src/tasks.d.ts.map +1 -1
- package/dist/src/tasks.js +35 -41
- package/dist/src/tasks.js.map +1 -1
- package/dist/src/test-helpers.d.ts +4 -4
- package/dist/src/test-helpers.d.ts.map +1 -1
- package/dist/src/test-helpers.js +7 -21
- package/dist/src/test-helpers.js.map +1 -1
- package/dist/src/trees.d.ts +89 -27
- package/dist/src/trees.d.ts.map +1 -1
- package/dist/src/trees.js +218 -100
- package/dist/src/trees.js.map +1 -1
- package/dist/src/uuid.d.ts +26 -0
- package/dist/src/uuid.d.ts.map +1 -0
- package/dist/src/uuid.js +80 -0
- package/dist/src/uuid.js.map +1 -0
- package/dist/src/workspaceStatus.d.ts +6 -4
- package/dist/src/workspaceStatus.d.ts.map +1 -1
- package/dist/src/workspaceStatus.js +43 -49
- package/dist/src/workspaceStatus.js.map +1 -1
- package/dist/src/workspaces.d.ts +35 -26
- package/dist/src/workspaces.d.ts.map +1 -1
- package/dist/src/workspaces.js +169 -118
- package/dist/src/workspaces.js.map +1 -1
- package/package.json +4 -4
- package/dist/src/gc.d.ts +0 -54
- package/dist/src/gc.d.ts.map +0 -1
- package/dist/src/gc.js +0 -233
- package/dist/src/gc.js.map +0 -1
- package/dist/src/repository.d.ts.map +0 -1
- package/dist/src/repository.js.map +0 -1
- package/dist/src/workspaceLock.d.ts +0 -67
- package/dist/src/workspaceLock.d.ts.map +0 -1
- package/dist/src/workspaceLock.js +0 -217
- package/dist/src/workspaceLock.js.map +0 -1
package/dist/src/dataflow.js
CHANGED
|
@@ -21,15 +21,60 @@
|
|
|
21
21
|
* output is written to the workspace and dependents are notified only after the
|
|
22
22
|
* write completes, ensuring downstream tasks see consistent state.
|
|
23
23
|
*/
|
|
24
|
-
import { decodeBeast2For } from '@elaraai/east';
|
|
24
|
+
import { decodeBeast2For, encodeBeast2For, variant } from '@elaraai/east';
|
|
25
25
|
import { PackageObjectType, TaskObjectType, WorkspaceStateType, pathToString, } from '@elaraai/e3-types';
|
|
26
|
-
import {
|
|
27
|
-
import {
|
|
26
|
+
import { executionGetOutput, inputsHash, } from './executions.js';
|
|
27
|
+
import { uuidv7 } from './uuid.js';
|
|
28
|
+
import { taskExecute } from './execution/LocalTaskRunner.js';
|
|
28
29
|
import { workspaceGetDatasetHash, workspaceSetDatasetByHash, } from './trees.js';
|
|
29
|
-
import { E3Error, WorkspaceNotFoundError, WorkspaceNotDeployedError, TaskNotFoundError, DataflowError, DataflowAbortedError,
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
30
|
+
import { E3Error, WorkspaceNotFoundError, WorkspaceNotDeployedError, WorkspaceLockError, TaskNotFoundError, DataflowError, DataflowAbortedError, } from './errors.js';
|
|
31
|
+
// =============================================================================
|
|
32
|
+
// Path Parsing Helper
|
|
33
|
+
// =============================================================================
|
|
34
|
+
/**
|
|
35
|
+
* Parse a keypath string (from pathToString) back to TreePath.
|
|
36
|
+
*
|
|
37
|
+
* The keypath format is: .field1.field2 (dot-separated field names)
|
|
38
|
+
* Quoted identifiers use backticks: .field1.`complex/name`
|
|
39
|
+
*
|
|
40
|
+
* @param pathStr - The path string in keypath format
|
|
41
|
+
* @returns TreePath array of path segments
|
|
42
|
+
*/
|
|
43
|
+
export function parsePathString(pathStr) {
|
|
44
|
+
if (!pathStr.startsWith('.')) {
|
|
45
|
+
throw new Error(`Invalid path string: expected '.' prefix, got '${pathStr}'`);
|
|
46
|
+
}
|
|
47
|
+
const segments = [];
|
|
48
|
+
let i = 1; // Skip the leading '.'
|
|
49
|
+
while (i < pathStr.length) {
|
|
50
|
+
let fieldName;
|
|
51
|
+
if (pathStr[i] === '`') {
|
|
52
|
+
// Quoted identifier: find closing backtick
|
|
53
|
+
const endQuote = pathStr.indexOf('`', i + 1);
|
|
54
|
+
if (endQuote === -1) {
|
|
55
|
+
throw new Error(`Invalid path string: unclosed backtick at position ${i}`);
|
|
56
|
+
}
|
|
57
|
+
fieldName = pathStr.slice(i + 1, endQuote);
|
|
58
|
+
i = endQuote + 1;
|
|
59
|
+
}
|
|
60
|
+
else {
|
|
61
|
+
// Unquoted identifier: read until '.' or end
|
|
62
|
+
let end = pathStr.indexOf('.', i);
|
|
63
|
+
if (end === -1)
|
|
64
|
+
end = pathStr.length;
|
|
65
|
+
fieldName = pathStr.slice(i, end);
|
|
66
|
+
i = end;
|
|
67
|
+
}
|
|
68
|
+
if (fieldName) {
|
|
69
|
+
segments.push(variant('field', fieldName));
|
|
70
|
+
}
|
|
71
|
+
// Skip the '.' separator
|
|
72
|
+
if (i < pathStr.length && pathStr[i] === '.') {
|
|
73
|
+
i++;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
return segments;
|
|
77
|
+
}
|
|
33
78
|
// =============================================================================
|
|
34
79
|
// Async Mutex for Workspace Updates
|
|
35
80
|
// =============================================================================
|
|
@@ -81,27 +126,20 @@ class AsyncMutex {
|
|
|
81
126
|
// Workspace State Reader
|
|
82
127
|
// =============================================================================
|
|
83
128
|
/**
|
|
84
|
-
* Read workspace state
|
|
129
|
+
* Read workspace state.
|
|
85
130
|
* @throws {WorkspaceNotFoundError} If workspace doesn't exist
|
|
86
131
|
* @throws {WorkspaceNotDeployedError} If workspace has no package deployed
|
|
87
132
|
*/
|
|
88
|
-
async function readWorkspaceState(
|
|
89
|
-
const
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
data = await fs.readFile(stateFile);
|
|
93
|
-
}
|
|
94
|
-
catch (err) {
|
|
95
|
-
if (isNotFoundError(err)) {
|
|
96
|
-
throw new WorkspaceNotFoundError(ws);
|
|
97
|
-
}
|
|
98
|
-
throw err;
|
|
133
|
+
async function readWorkspaceState(storage, repo, ws) {
|
|
134
|
+
const data = await storage.refs.workspaceRead(repo, ws);
|
|
135
|
+
if (data === null) {
|
|
136
|
+
throw new WorkspaceNotFoundError(ws);
|
|
99
137
|
}
|
|
100
138
|
if (data.length === 0) {
|
|
101
139
|
throw new WorkspaceNotDeployedError(ws);
|
|
102
140
|
}
|
|
103
141
|
const decoder = decodeBeast2For(WorkspaceStateType);
|
|
104
|
-
return decoder(data);
|
|
142
|
+
return decoder(Buffer.from(data));
|
|
105
143
|
}
|
|
106
144
|
// =============================================================================
|
|
107
145
|
// Dependency Graph Building
|
|
@@ -114,11 +152,11 @@ async function readWorkspaceState(repoPath, ws) {
|
|
|
114
152
|
* - outputToTask: Map of output path string -> task name
|
|
115
153
|
* - taskDependents: Map of task name -> set of dependent task names
|
|
116
154
|
*/
|
|
117
|
-
async function buildDependencyGraph(
|
|
155
|
+
async function buildDependencyGraph(storage, repo, ws) {
|
|
118
156
|
// Read workspace state to get package hash
|
|
119
|
-
const state = await readWorkspaceState(
|
|
157
|
+
const state = await readWorkspaceState(storage, repo, ws);
|
|
120
158
|
// Read package object to get tasks map
|
|
121
|
-
const pkgData = await
|
|
159
|
+
const pkgData = await storage.objects.read(repo, state.packageHash);
|
|
122
160
|
const pkgDecoder = decodeBeast2For(PackageObjectType);
|
|
123
161
|
const pkgObject = pkgDecoder(Buffer.from(pkgData));
|
|
124
162
|
const taskNodes = new Map();
|
|
@@ -126,7 +164,7 @@ async function buildDependencyGraph(repoPath, ws) {
|
|
|
126
164
|
// First pass: load all tasks and build output->task map
|
|
127
165
|
const taskDecoder = decodeBeast2For(TaskObjectType);
|
|
128
166
|
for (const [taskName, taskHash] of pkgObject.tasks) {
|
|
129
|
-
const taskData = await
|
|
167
|
+
const taskData = await storage.objects.read(repo, taskHash);
|
|
130
168
|
const task = taskDecoder(Buffer.from(taskData));
|
|
131
169
|
const outputPathStr = pathToString(task.output);
|
|
132
170
|
outputToTask.set(outputPathStr, taskName);
|
|
@@ -159,7 +197,7 @@ async function buildDependencyGraph(repoPath, ws) {
|
|
|
159
197
|
}
|
|
160
198
|
// If not produced by a task, it's an external input - check if assigned
|
|
161
199
|
else {
|
|
162
|
-
const { refType } = await workspaceGetDatasetHash(
|
|
200
|
+
const { refType } = await workspaceGetDatasetHash(storage, repo, ws, inputPath);
|
|
163
201
|
if (refType === 'unassigned') {
|
|
164
202
|
// External input that is unassigned - this task can never run
|
|
165
203
|
node.unresolvedCount++;
|
|
@@ -183,7 +221,8 @@ async function buildDependencyGraph(repoPath, ws) {
|
|
|
183
221
|
* to prevent concurrent modifications. If options.lock is provided, uses that
|
|
184
222
|
* lock instead (caller is responsible for releasing it).
|
|
185
223
|
*
|
|
186
|
-
* @param
|
|
224
|
+
* @param storage - Storage backend
|
|
225
|
+
* @param repo - Repository identifier (for local storage, the path to e3 repository directory)
|
|
187
226
|
* @param ws - Workspace name
|
|
188
227
|
* @param options - Execution options
|
|
189
228
|
* @returns Result of the dataflow execution
|
|
@@ -193,12 +232,16 @@ async function buildDependencyGraph(repoPath, ws) {
|
|
|
193
232
|
* @throws {TaskNotFoundError} If filter specifies a task that doesn't exist
|
|
194
233
|
* @throws {DataflowError} If execution fails for other reasons
|
|
195
234
|
*/
|
|
196
|
-
export async function dataflowExecute(
|
|
235
|
+
export async function dataflowExecute(storage, repo, ws, options = {}) {
|
|
197
236
|
// Acquire lock if not provided externally
|
|
198
237
|
const externalLock = options.lock;
|
|
199
|
-
const lock = externalLock ?? await
|
|
238
|
+
const lock = externalLock ?? await storage.locks.acquire(repo, ws, variant('dataflow', null));
|
|
239
|
+
if (!lock) {
|
|
240
|
+
// Lock couldn't be acquired - the LockService returns null instead of throwing
|
|
241
|
+
throw new WorkspaceLockError(ws);
|
|
242
|
+
}
|
|
200
243
|
try {
|
|
201
|
-
return await dataflowExecuteWithLock(
|
|
244
|
+
return await dataflowExecuteWithLock(storage, repo, ws, options);
|
|
202
245
|
}
|
|
203
246
|
finally {
|
|
204
247
|
// Only release the lock if we acquired it internally
|
|
@@ -213,7 +256,8 @@ export async function dataflowExecute(repoPath, ws, options = {}) {
|
|
|
213
256
|
* Returns a promise immediately without awaiting execution. The lock is
|
|
214
257
|
* released automatically when execution completes.
|
|
215
258
|
*
|
|
216
|
-
* @param
|
|
259
|
+
* @param storage - Storage backend
|
|
260
|
+
* @param repo - Repository identifier (for local storage, the path to e3 repository directory)
|
|
217
261
|
* @param ws - Workspace name
|
|
218
262
|
* @param options - Execution options (lock must be provided)
|
|
219
263
|
* @returns Promise that resolves when execution completes
|
|
@@ -222,23 +266,31 @@ export async function dataflowExecute(repoPath, ws, options = {}) {
|
|
|
222
266
|
* @throws {TaskNotFoundError} If filter specifies a task that doesn't exist
|
|
223
267
|
* @throws {DataflowError} If execution fails for other reasons
|
|
224
268
|
*/
|
|
225
|
-
export function dataflowStart(
|
|
226
|
-
return dataflowExecuteWithLock(
|
|
269
|
+
export function dataflowStart(storage, repo, ws, options) {
|
|
270
|
+
return dataflowExecuteWithLock(storage, repo, ws, options)
|
|
227
271
|
.finally(() => options.lock.release());
|
|
228
272
|
}
|
|
229
273
|
/**
|
|
230
274
|
* Internal: Execute dataflow with lock already held.
|
|
231
275
|
*/
|
|
232
|
-
async function dataflowExecuteWithLock(
|
|
276
|
+
async function dataflowExecuteWithLock(storage, repo, ws, options) {
|
|
233
277
|
const startTime = Date.now();
|
|
278
|
+
const startedAt = new Date();
|
|
234
279
|
const concurrency = options.concurrency ?? 4;
|
|
280
|
+
// Generate run ID for this execution
|
|
281
|
+
const runId = uuidv7();
|
|
235
282
|
let taskNodes;
|
|
236
283
|
let taskDependents;
|
|
284
|
+
let outputToTask;
|
|
285
|
+
let wsState;
|
|
237
286
|
try {
|
|
287
|
+
// Read workspace state for run tracking
|
|
288
|
+
wsState = await readWorkspaceState(storage, repo, ws);
|
|
238
289
|
// Build dependency graph
|
|
239
|
-
const
|
|
240
|
-
taskNodes =
|
|
241
|
-
taskDependents =
|
|
290
|
+
const graphResult = await buildDependencyGraph(storage, repo, ws);
|
|
291
|
+
taskNodes = graphResult.taskNodes;
|
|
292
|
+
taskDependents = graphResult.taskDependents;
|
|
293
|
+
outputToTask = graphResult.outputToTask;
|
|
242
294
|
}
|
|
243
295
|
catch (err) {
|
|
244
296
|
// Re-throw E3Errors as-is
|
|
@@ -247,6 +299,54 @@ async function dataflowExecuteWithLock(repoPath, ws, options) {
|
|
|
247
299
|
// Wrap unexpected errors
|
|
248
300
|
throw new DataflowError(`Failed to build dependency graph: ${err instanceof Error ? err.message : err}`);
|
|
249
301
|
}
|
|
302
|
+
// Clean up all previous runs (we hold the lock, so no concurrent runs)
|
|
303
|
+
const allRunIds = await storage.refs.dataflowRunList(repo, ws);
|
|
304
|
+
for (const oldRunId of allRunIds) {
|
|
305
|
+
await storage.refs.dataflowRunDelete(repo, ws, oldRunId);
|
|
306
|
+
}
|
|
307
|
+
// Initialize task execution records map
|
|
308
|
+
const taskExecutions = new Map();
|
|
309
|
+
// Create initial DataflowRun record
|
|
310
|
+
const initialRun = {
|
|
311
|
+
runId,
|
|
312
|
+
workspaceName: ws,
|
|
313
|
+
packageRef: `${wsState.packageName}@${wsState.packageVersion}`,
|
|
314
|
+
startedAt,
|
|
315
|
+
completedAt: variant('none', null),
|
|
316
|
+
status: variant('running', {}),
|
|
317
|
+
inputSnapshot: wsState.rootHash,
|
|
318
|
+
outputSnapshot: variant('none', null),
|
|
319
|
+
taskExecutions: taskExecutions,
|
|
320
|
+
summary: {
|
|
321
|
+
total: BigInt(taskNodes.size),
|
|
322
|
+
completed: 0n,
|
|
323
|
+
cached: 0n,
|
|
324
|
+
failed: 0n,
|
|
325
|
+
skipped: 0n,
|
|
326
|
+
},
|
|
327
|
+
};
|
|
328
|
+
// Write initial run record
|
|
329
|
+
await storage.refs.dataflowRunWrite(repo, ws, initialRun);
|
|
330
|
+
// Build DataflowGraph for use with decomposed building blocks
|
|
331
|
+
const dataflowGraph = {
|
|
332
|
+
tasks: Array.from(taskNodes.entries()).map(([taskName, node]) => {
|
|
333
|
+
const dependsOn = [];
|
|
334
|
+
for (const inputPath of node.inputPaths) {
|
|
335
|
+
const inputPathStr = pathToString(inputPath);
|
|
336
|
+
const producerTask = outputToTask.get(inputPathStr);
|
|
337
|
+
if (producerTask) {
|
|
338
|
+
dependsOn.push(producerTask);
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
return {
|
|
342
|
+
name: taskName,
|
|
343
|
+
hash: node.hash,
|
|
344
|
+
inputs: node.inputPaths.map(pathToString),
|
|
345
|
+
output: pathToString(node.outputPath),
|
|
346
|
+
dependsOn,
|
|
347
|
+
};
|
|
348
|
+
}),
|
|
349
|
+
};
|
|
250
350
|
// Apply filter if specified
|
|
251
351
|
const filteredTaskNames = options.filter
|
|
252
352
|
? new Set([options.filter])
|
|
@@ -278,6 +378,7 @@ async function dataflowExecuteWithLock(repoPath, ws, options) {
|
|
|
278
378
|
const readyQueue = [];
|
|
279
379
|
const completed = new Set();
|
|
280
380
|
const inProgress = new Set();
|
|
381
|
+
const skippedTasks = new Set(); // Track skipped tasks separately for dataflowGetDependentsToSkip
|
|
281
382
|
// Initialize ready queue with tasks that have no unresolved dependencies
|
|
282
383
|
// and pass the filter (if any)
|
|
283
384
|
for (const [taskName, node] of taskNodes) {
|
|
@@ -288,13 +389,13 @@ async function dataflowExecuteWithLock(repoPath, ws, options) {
|
|
|
288
389
|
}
|
|
289
390
|
}
|
|
290
391
|
// Check if the task has a valid cached execution for current inputs
|
|
291
|
-
// Returns the output hash if cached, null if re-execution is needed
|
|
392
|
+
// Returns the output hash and executionId if cached, null if re-execution is needed
|
|
292
393
|
async function getCachedOutput(taskName) {
|
|
293
394
|
const node = taskNodes.get(taskName);
|
|
294
395
|
// Gather current input hashes
|
|
295
396
|
const currentInputHashes = [];
|
|
296
397
|
for (const inputPath of node.inputPaths) {
|
|
297
|
-
const { refType, hash } = await workspaceGetDatasetHash(
|
|
398
|
+
const { refType, hash } = await workspaceGetDatasetHash(storage, repo, ws, inputPath);
|
|
298
399
|
if (refType !== 'value' || hash === null) {
|
|
299
400
|
// Input not assigned, can't be cached
|
|
300
401
|
return null;
|
|
@@ -303,20 +404,26 @@ async function dataflowExecuteWithLock(repoPath, ws, options) {
|
|
|
303
404
|
}
|
|
304
405
|
// Check if there's a cached execution for these inputs
|
|
305
406
|
const inHash = inputsHash(currentInputHashes);
|
|
306
|
-
const cachedOutputHash = await executionGetOutput(
|
|
407
|
+
const cachedOutputHash = await executionGetOutput(storage, repo, node.hash, inHash);
|
|
307
408
|
if (cachedOutputHash === null) {
|
|
308
409
|
// No cached execution for current inputs
|
|
309
410
|
return null;
|
|
310
411
|
}
|
|
412
|
+
// Get the latest execution status to retrieve the executionId
|
|
413
|
+
const latestStatus = await storage.refs.executionGetLatest(repo, node.hash, inHash);
|
|
414
|
+
if (!latestStatus || latestStatus.type !== 'success') {
|
|
415
|
+
// Latest execution wasn't a success
|
|
416
|
+
return null;
|
|
417
|
+
}
|
|
311
418
|
// Also verify the workspace output matches the cached output
|
|
312
419
|
// (in case the workspace was modified outside of execution)
|
|
313
|
-
const { refType, hash: wsOutputHash } = await workspaceGetDatasetHash(
|
|
420
|
+
const { refType, hash: wsOutputHash } = await workspaceGetDatasetHash(storage, repo, ws, node.outputPath);
|
|
314
421
|
if (refType !== 'value' || wsOutputHash !== cachedOutputHash) {
|
|
315
422
|
// Workspace output doesn't match cached output, need to re-execute
|
|
316
423
|
// (or update workspace with cached value)
|
|
317
424
|
return null;
|
|
318
425
|
}
|
|
319
|
-
return cachedOutputHash;
|
|
426
|
+
return { outputHash: cachedOutputHash, executionId: latestStatus.value.executionId };
|
|
320
427
|
}
|
|
321
428
|
// Execute a single task (does NOT write to workspace - caller must do that)
|
|
322
429
|
async function executeTask(taskName) {
|
|
@@ -326,7 +433,7 @@ async function dataflowExecuteWithLock(repoPath, ws, options) {
|
|
|
326
433
|
// Gather input hashes
|
|
327
434
|
const inputHashes = [];
|
|
328
435
|
for (const inputPath of node.inputPaths) {
|
|
329
|
-
const { refType, hash } = await workspaceGetDatasetHash(
|
|
436
|
+
const { refType, hash } = await workspaceGetDatasetHash(storage, repo, ws, inputPath);
|
|
330
437
|
if (refType !== 'value' || hash === null) {
|
|
331
438
|
// Input not available - should not happen if dependency tracking is correct
|
|
332
439
|
return {
|
|
@@ -339,18 +446,22 @@ async function dataflowExecuteWithLock(repoPath, ws, options) {
|
|
|
339
446
|
}
|
|
340
447
|
inputHashes.push(hash);
|
|
341
448
|
}
|
|
342
|
-
// Execute the task
|
|
449
|
+
// Execute the task using either the provided runner or direct taskExecute()
|
|
343
450
|
const execOptions = {
|
|
344
451
|
force: options.force,
|
|
345
452
|
signal: options.signal,
|
|
346
453
|
onStdout: options.onStdout ? (data) => options.onStdout(taskName, data) : undefined,
|
|
347
454
|
onStderr: options.onStderr ? (data) => options.onStderr(taskName, data) : undefined,
|
|
348
455
|
};
|
|
349
|
-
|
|
456
|
+
// Use provided runner if available, otherwise call taskExecute directly
|
|
457
|
+
const result = options.runner
|
|
458
|
+
? await options.runner.execute(storage, node.hash, inputHashes, execOptions)
|
|
459
|
+
: await taskExecute(storage, repo, node.hash, inputHashes, execOptions);
|
|
350
460
|
// Build task result (NOTE: workspace update happens later, in mutex-protected section)
|
|
351
461
|
const taskResult = {
|
|
352
462
|
name: taskName,
|
|
353
463
|
cached: result.cached,
|
|
464
|
+
executionId: result.executionId,
|
|
354
465
|
state: result.state,
|
|
355
466
|
duration: Date.now() - taskStartTime,
|
|
356
467
|
};
|
|
@@ -359,7 +470,6 @@ async function dataflowExecuteWithLock(repoPath, ws, options) {
|
|
|
359
470
|
}
|
|
360
471
|
else if (result.state === 'failed') {
|
|
361
472
|
taskResult.exitCode = result.exitCode ?? undefined;
|
|
362
|
-
taskResult.error = result.error ?? undefined;
|
|
363
473
|
}
|
|
364
474
|
// Pass output hash to caller for workspace update (if successful)
|
|
365
475
|
if (result.state === 'success' && result.outputHash) {
|
|
@@ -383,17 +493,17 @@ async function dataflowExecuteWithLock(repoPath, ws, options) {
|
|
|
383
493
|
}
|
|
384
494
|
}
|
|
385
495
|
}
|
|
386
|
-
// Mark dependents as skipped when a task fails
|
|
496
|
+
// Mark dependents as skipped when a task fails.
|
|
497
|
+
// Uses dataflowGetDependentsToSkip to find all transitive dependents at once
|
|
498
|
+
// (shared with distributed execution in e3-aws).
|
|
387
499
|
function skipDependents(taskName) {
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
if (filteredTaskNames && !filteredTaskNames.has(depName))
|
|
394
|
-
continue;
|
|
395
|
-
// Recursively skip
|
|
500
|
+
// Get all tasks to skip (excludes already completed, already skipped, and in-progress)
|
|
501
|
+
const toSkip = dataflowGetDependentsToSkip(dataflowGraph, taskName, completed, skippedTasks)
|
|
502
|
+
.filter(name => !inProgress.has(name)) // Also exclude in-progress tasks
|
|
503
|
+
.filter(name => !filteredTaskNames || filteredTaskNames.has(name)); // Apply filter
|
|
504
|
+
for (const depName of toSkip) {
|
|
396
505
|
completed.add(depName);
|
|
506
|
+
skippedTasks.add(depName);
|
|
397
507
|
skipped++;
|
|
398
508
|
results.push({
|
|
399
509
|
name: depName,
|
|
@@ -407,7 +517,6 @@ async function dataflowExecuteWithLock(repoPath, ws, options) {
|
|
|
407
517
|
state: 'skipped',
|
|
408
518
|
duration: 0,
|
|
409
519
|
});
|
|
410
|
-
skipDependents(depName);
|
|
411
520
|
}
|
|
412
521
|
}
|
|
413
522
|
// Main execution loop using a work-stealing approach
|
|
@@ -424,8 +533,8 @@ async function dataflowExecuteWithLock(repoPath, ws, options) {
|
|
|
424
533
|
if (completed.has(taskName) || inProgress.has(taskName))
|
|
425
534
|
continue;
|
|
426
535
|
// Check if there's a valid cached execution for current inputs
|
|
427
|
-
const
|
|
428
|
-
if (
|
|
536
|
+
const cachedResult = await getCachedOutput(taskName);
|
|
537
|
+
if (cachedResult !== null && !options.force) {
|
|
429
538
|
// Valid cached execution exists for current inputs.
|
|
430
539
|
// No workspace write needed (output already matches), but we still
|
|
431
540
|
// need mutex protection for state updates to prevent races with
|
|
@@ -436,12 +545,18 @@ async function dataflowExecuteWithLock(repoPath, ws, options) {
|
|
|
436
545
|
const result = {
|
|
437
546
|
name: taskName,
|
|
438
547
|
cached: true,
|
|
548
|
+
executionId: cachedResult.executionId,
|
|
439
549
|
state: 'success',
|
|
440
550
|
duration: 0,
|
|
441
551
|
};
|
|
442
552
|
results.push(result);
|
|
443
553
|
options.onTaskComplete?.(result);
|
|
444
554
|
notifyDependents(taskName);
|
|
555
|
+
// Track in taskExecutions map
|
|
556
|
+
taskExecutions.set(taskName, {
|
|
557
|
+
executionId: cachedResult.executionId,
|
|
558
|
+
cached: true,
|
|
559
|
+
});
|
|
445
560
|
});
|
|
446
561
|
continue;
|
|
447
562
|
}
|
|
@@ -456,7 +571,7 @@ async function dataflowExecuteWithLock(repoPath, ws, options) {
|
|
|
456
571
|
// Write output to workspace BEFORE notifying dependents
|
|
457
572
|
if (result.state === 'success' && result.outputHash) {
|
|
458
573
|
const node = taskNodes.get(taskName);
|
|
459
|
-
await workspaceSetDatasetByHash(
|
|
574
|
+
await workspaceSetDatasetByHash(storage, repo, ws, node.outputPath, result.outputHash);
|
|
460
575
|
}
|
|
461
576
|
// Now safe to update execution state and notify dependents
|
|
462
577
|
inProgress.delete(taskName);
|
|
@@ -471,11 +586,25 @@ async function dataflowExecuteWithLock(repoPath, ws, options) {
|
|
|
471
586
|
executed++;
|
|
472
587
|
}
|
|
473
588
|
notifyDependents(taskName);
|
|
589
|
+
// Track in taskExecutions map
|
|
590
|
+
if (result.executionId) {
|
|
591
|
+
taskExecutions.set(taskName, {
|
|
592
|
+
executionId: result.executionId,
|
|
593
|
+
cached: result.cached,
|
|
594
|
+
});
|
|
595
|
+
}
|
|
474
596
|
}
|
|
475
597
|
else {
|
|
476
598
|
failed++;
|
|
477
599
|
hasFailure = true;
|
|
478
600
|
skipDependents(taskName);
|
|
601
|
+
// Track failed execution too
|
|
602
|
+
if (result.executionId) {
|
|
603
|
+
taskExecutions.set(taskName, {
|
|
604
|
+
executionId: result.executionId,
|
|
605
|
+
cached: false,
|
|
606
|
+
});
|
|
607
|
+
}
|
|
479
608
|
}
|
|
480
609
|
});
|
|
481
610
|
}
|
|
@@ -489,8 +618,10 @@ async function dataflowExecuteWithLock(repoPath, ws, options) {
|
|
|
489
618
|
if (runningPromises.size > 0) {
|
|
490
619
|
await Promise.race(runningPromises.values());
|
|
491
620
|
}
|
|
492
|
-
else if (readyQueue.length === 0) {
|
|
493
|
-
// No running tasks and
|
|
621
|
+
else if (readyQueue.length === 0 || aborted) {
|
|
622
|
+
// No running tasks and either:
|
|
623
|
+
// - no ready tasks (unresolvable dependencies)
|
|
624
|
+
// - aborted (stop processing)
|
|
494
625
|
break;
|
|
495
626
|
}
|
|
496
627
|
}
|
|
@@ -502,12 +633,79 @@ async function dataflowExecuteWithLock(repoPath, ws, options) {
|
|
|
502
633
|
}
|
|
503
634
|
// Check for abort one final time
|
|
504
635
|
checkAborted();
|
|
505
|
-
// If aborted, throw with partial results
|
|
636
|
+
// If aborted, throw with partial results (also update run record)
|
|
506
637
|
if (aborted) {
|
|
638
|
+
const finalWsState = await readWorkspaceState(storage, repo, ws);
|
|
639
|
+
const cancelledRun = {
|
|
640
|
+
runId,
|
|
641
|
+
workspaceName: ws,
|
|
642
|
+
packageRef: `${wsState.packageName}@${wsState.packageVersion}`,
|
|
643
|
+
startedAt,
|
|
644
|
+
completedAt: variant('some', new Date()),
|
|
645
|
+
status: variant('cancelled', {}),
|
|
646
|
+
inputSnapshot: wsState.rootHash,
|
|
647
|
+
outputSnapshot: variant('some', finalWsState.rootHash),
|
|
648
|
+
taskExecutions,
|
|
649
|
+
summary: {
|
|
650
|
+
total: BigInt(taskNodes.size),
|
|
651
|
+
completed: BigInt(executed + cached),
|
|
652
|
+
cached: BigInt(cached),
|
|
653
|
+
failed: BigInt(failed),
|
|
654
|
+
skipped: BigInt(skipped),
|
|
655
|
+
},
|
|
656
|
+
};
|
|
657
|
+
await storage.refs.dataflowRunWrite(repo, ws, cancelledRun);
|
|
507
658
|
throw new DataflowAbortedError(results);
|
|
508
659
|
}
|
|
660
|
+
// Read final workspace state for output snapshot
|
|
661
|
+
const finalWsState = await readWorkspaceState(storage, repo, ws);
|
|
662
|
+
// Determine final status
|
|
663
|
+
let finalStatus;
|
|
664
|
+
if (hasFailure) {
|
|
665
|
+
// Find the failed task
|
|
666
|
+
const failedTask = results.find(r => r.state === 'failed' || r.state === 'error');
|
|
667
|
+
finalStatus = variant('failed', {
|
|
668
|
+
failedTask: failedTask?.name ?? 'unknown',
|
|
669
|
+
error: failedTask?.error ?? failedTask?.exitCode?.toString() ?? 'Task failed',
|
|
670
|
+
});
|
|
671
|
+
}
|
|
672
|
+
else {
|
|
673
|
+
finalStatus = variant('completed', {});
|
|
674
|
+
}
|
|
675
|
+
// Write final DataflowRun record
|
|
676
|
+
const finalRun = {
|
|
677
|
+
runId,
|
|
678
|
+
workspaceName: ws,
|
|
679
|
+
packageRef: `${wsState.packageName}@${wsState.packageVersion}`,
|
|
680
|
+
startedAt,
|
|
681
|
+
completedAt: variant('some', new Date()),
|
|
682
|
+
status: finalStatus,
|
|
683
|
+
inputSnapshot: wsState.rootHash,
|
|
684
|
+
outputSnapshot: variant('some', finalWsState.rootHash),
|
|
685
|
+
taskExecutions,
|
|
686
|
+
summary: {
|
|
687
|
+
total: BigInt(taskNodes.size),
|
|
688
|
+
completed: BigInt(executed + cached),
|
|
689
|
+
cached: BigInt(cached),
|
|
690
|
+
failed: BigInt(failed),
|
|
691
|
+
skipped: BigInt(skipped),
|
|
692
|
+
},
|
|
693
|
+
};
|
|
694
|
+
await storage.refs.dataflowRunWrite(repo, ws, finalRun);
|
|
695
|
+
// Update workspace state with currentRunId on success
|
|
696
|
+
if (!hasFailure) {
|
|
697
|
+
// Read, update, write workspace state
|
|
698
|
+
const currentState = await readWorkspaceState(storage, repo, ws);
|
|
699
|
+
const updatedState = {
|
|
700
|
+
...currentState,
|
|
701
|
+
currentRunId: variant('some', runId),
|
|
702
|
+
};
|
|
703
|
+
const encoder = encodeBeast2For(WorkspaceStateType);
|
|
704
|
+
await storage.refs.workspaceWrite(repo, ws, encoder(updatedState));
|
|
705
|
+
}
|
|
509
706
|
return {
|
|
510
707
|
success: !hasFailure,
|
|
708
|
+
runId,
|
|
511
709
|
executed,
|
|
512
710
|
cached,
|
|
513
711
|
failed,
|
|
@@ -519,18 +717,19 @@ async function dataflowExecuteWithLock(repoPath, ws, options) {
|
|
|
519
717
|
/**
|
|
520
718
|
* Get the dependency graph for a workspace (for visualization/debugging).
|
|
521
719
|
*
|
|
522
|
-
* @param
|
|
720
|
+
* @param storage - Storage backend
|
|
721
|
+
* @param repo - Repository identifier (for local storage, the path to e3 repository directory)
|
|
523
722
|
* @param ws - Workspace name
|
|
524
723
|
* @returns Graph information
|
|
525
724
|
* @throws {WorkspaceNotFoundError} If workspace doesn't exist
|
|
526
725
|
* @throws {WorkspaceNotDeployedError} If workspace has no package deployed
|
|
527
726
|
* @throws {DataflowError} If graph building fails for other reasons
|
|
528
727
|
*/
|
|
529
|
-
export async function dataflowGetGraph(
|
|
728
|
+
export async function dataflowGetGraph(storage, repo, ws) {
|
|
530
729
|
let taskNodes;
|
|
531
730
|
let outputToTask;
|
|
532
731
|
try {
|
|
533
|
-
const graph = await buildDependencyGraph(
|
|
732
|
+
const graph = await buildDependencyGraph(storage, repo, ws);
|
|
534
733
|
taskNodes = graph.taskNodes;
|
|
535
734
|
outputToTask = graph.outputToTask;
|
|
536
735
|
}
|
|
@@ -559,4 +758,168 @@ export async function dataflowGetGraph(repoPath, ws) {
|
|
|
559
758
|
}
|
|
560
759
|
return { tasks };
|
|
561
760
|
}
|
|
761
|
+
/**
|
|
762
|
+
* Get tasks that are ready to execute given the set of completed tasks.
|
|
763
|
+
*
|
|
764
|
+
* A task is ready when all tasks it depends on have completed.
|
|
765
|
+
* This is useful for distributed execution (e.g., AWS Step Functions)
|
|
766
|
+
* where a coordinator needs to determine which tasks can run next.
|
|
767
|
+
*
|
|
768
|
+
* @param graph - The dependency graph from dataflowGetGraph
|
|
769
|
+
* @param completedTasks - Set of task names that have completed
|
|
770
|
+
* @returns Array of task names that are ready to execute
|
|
771
|
+
*
|
|
772
|
+
* @example
|
|
773
|
+
* ```typescript
|
|
774
|
+
* const graph = await dataflowGetGraph(storage, repo, 'production');
|
|
775
|
+
* const ready = dataflowGetReadyTasks(graph, new Set()); // Initial ready tasks
|
|
776
|
+
* // Execute ready[0]...
|
|
777
|
+
* const nextReady = dataflowGetReadyTasks(graph, new Set([ready[0]]));
|
|
778
|
+
* ```
|
|
779
|
+
*/
|
|
780
|
+
export function dataflowGetReadyTasks(graph, completedTasks) {
|
|
781
|
+
const ready = [];
|
|
782
|
+
for (const task of graph.tasks) {
|
|
783
|
+
// Skip already completed tasks
|
|
784
|
+
if (completedTasks.has(task.name)) {
|
|
785
|
+
continue;
|
|
786
|
+
}
|
|
787
|
+
// Check if all dependencies are satisfied
|
|
788
|
+
const allDepsCompleted = task.dependsOn.every(dep => completedTasks.has(dep));
|
|
789
|
+
if (allDepsCompleted) {
|
|
790
|
+
ready.push(task.name);
|
|
791
|
+
}
|
|
792
|
+
}
|
|
793
|
+
return ready;
|
|
794
|
+
}
|
|
795
|
+
/**
|
|
796
|
+
* Check if a task execution is cached for the given inputs.
|
|
797
|
+
*
|
|
798
|
+
* This is useful for distributed execution where a Lambda handler needs
|
|
799
|
+
* to check if a task can be skipped before spawning execution.
|
|
800
|
+
*
|
|
801
|
+
* @param storage - Storage backend
|
|
802
|
+
* @param repo - Repository path
|
|
803
|
+
* @param taskHash - Hash of the TaskObject
|
|
804
|
+
* @param inputHashes - Array of input dataset hashes (in order)
|
|
805
|
+
* @returns Output hash if cached, null if execution needed
|
|
806
|
+
*
|
|
807
|
+
* @example
|
|
808
|
+
* ```typescript
|
|
809
|
+
* const outputHash = await dataflowCheckCache(storage, repo, taskHash, inputHashes);
|
|
810
|
+
* if (outputHash) {
|
|
811
|
+
* // Task is cached, use outputHash directly
|
|
812
|
+
* } else {
|
|
813
|
+
* // Need to execute task
|
|
814
|
+
* }
|
|
815
|
+
* ```
|
|
816
|
+
*/
|
|
817
|
+
export async function dataflowCheckCache(storage, repo, taskHash, inputHashes) {
|
|
818
|
+
const inHash = inputsHash(inputHashes);
|
|
819
|
+
return executionGetOutput(storage, repo, taskHash, inHash);
|
|
820
|
+
}
|
|
821
|
+
/**
|
|
822
|
+
* Find tasks that should be skipped when a task fails.
|
|
823
|
+
*
|
|
824
|
+
* Returns all tasks that transitively depend on the failed task
|
|
825
|
+
* (directly or through other tasks), excluding already completed
|
|
826
|
+
* or already skipped tasks.
|
|
827
|
+
*
|
|
828
|
+
* This is useful for distributed execution where the coordinator
|
|
829
|
+
* needs to mark downstream tasks as skipped after a failure.
|
|
830
|
+
*
|
|
831
|
+
* @param graph - The dependency graph from dataflowGetGraph
|
|
832
|
+
* @param failedTask - Name of the task that failed
|
|
833
|
+
* @param completedTasks - Set of task names already completed (won't be skipped)
|
|
834
|
+
* @param skippedTasks - Set of task names already skipped (won't be returned again)
|
|
835
|
+
* @returns Array of task names that should be skipped
|
|
836
|
+
*
|
|
837
|
+
* @example
|
|
838
|
+
* ```typescript
|
|
839
|
+
* const graph = await dataflowGetGraph(storage, repo, 'production');
|
|
840
|
+
* // Task 'etl' failed...
|
|
841
|
+
* const toSkip = dataflowGetDependentsToSkip(graph, 'etl', completed, skipped);
|
|
842
|
+
* // toSkip might be ['transform', 'aggregate', 'report'] - all downstream tasks
|
|
843
|
+
* ```
|
|
844
|
+
*/
|
|
845
|
+
export function dataflowGetDependentsToSkip(graph, failedTask, completedTasks, skippedTasks) {
|
|
846
|
+
// Build reverse dependency map: task -> tasks that depend on it
|
|
847
|
+
const dependents = new Map();
|
|
848
|
+
for (const task of graph.tasks) {
|
|
849
|
+
dependents.set(task.name, []);
|
|
850
|
+
}
|
|
851
|
+
for (const task of graph.tasks) {
|
|
852
|
+
for (const dep of task.dependsOn) {
|
|
853
|
+
dependents.get(dep)?.push(task.name);
|
|
854
|
+
}
|
|
855
|
+
}
|
|
856
|
+
// BFS to find all transitive dependents
|
|
857
|
+
const toSkip = [];
|
|
858
|
+
const visited = new Set();
|
|
859
|
+
const queue = [failedTask];
|
|
860
|
+
while (queue.length > 0) {
|
|
861
|
+
const current = queue.shift();
|
|
862
|
+
const deps = dependents.get(current) ?? [];
|
|
863
|
+
for (const dep of deps) {
|
|
864
|
+
// Skip if already processed
|
|
865
|
+
if (visited.has(dep)) {
|
|
866
|
+
continue;
|
|
867
|
+
}
|
|
868
|
+
visited.add(dep);
|
|
869
|
+
// Skip if already completed (no need to explore further - completed tasks break the chain)
|
|
870
|
+
if (completedTasks.has(dep)) {
|
|
871
|
+
continue;
|
|
872
|
+
}
|
|
873
|
+
// If already skipped, still explore dependents but don't add to result again
|
|
874
|
+
if (skippedTasks.has(dep)) {
|
|
875
|
+
queue.push(dep);
|
|
876
|
+
continue;
|
|
877
|
+
}
|
|
878
|
+
// New task to skip
|
|
879
|
+
toSkip.push(dep);
|
|
880
|
+
queue.push(dep);
|
|
881
|
+
}
|
|
882
|
+
}
|
|
883
|
+
return toSkip;
|
|
884
|
+
}
|
|
885
|
+
/**
|
|
886
|
+
* Resolve input hashes for a task from current workspace state.
|
|
887
|
+
*
|
|
888
|
+
* Returns an array of hashes in the same order as the task's inputs.
|
|
889
|
+
* If any input is unassigned, returns null for that position.
|
|
890
|
+
*
|
|
891
|
+
* This is useful for distributed execution where the input hashes
|
|
892
|
+
* need to be resolved before checking cache or executing.
|
|
893
|
+
*
|
|
894
|
+
* @param storage - Storage backend
|
|
895
|
+
* @param repo - Repository path
|
|
896
|
+
* @param ws - Workspace name
|
|
897
|
+
* @param task - Task info from the graph (needs inputs array)
|
|
898
|
+
* @returns Array of hashes (null if input is unassigned)
|
|
899
|
+
*
|
|
900
|
+
* @example
|
|
901
|
+
* ```typescript
|
|
902
|
+
* const graph = await dataflowGetGraph(storage, repo, 'production');
|
|
903
|
+
* const task = graph.tasks.find(t => t.name === 'etl')!;
|
|
904
|
+
* const inputHashes = await dataflowResolveInputHashes(storage, repo, 'production', task);
|
|
905
|
+
* if (!inputHashes.includes(null)) {
|
|
906
|
+
* const cached = await dataflowCheckCache(storage, repo, task.hash, inputHashes);
|
|
907
|
+
* }
|
|
908
|
+
* ```
|
|
909
|
+
*/
|
|
910
|
+
export async function dataflowResolveInputHashes(storage, repo, ws, task) {
|
|
911
|
+
const hashes = [];
|
|
912
|
+
for (const inputPathStr of task.inputs) {
|
|
913
|
+
// Parse the keypath string back to TreePath
|
|
914
|
+
const inputPath = parsePathString(inputPathStr);
|
|
915
|
+
const { refType, hash } = await workspaceGetDatasetHash(storage, repo, ws, inputPath);
|
|
916
|
+
if (refType === 'value' && hash !== null) {
|
|
917
|
+
hashes.push(hash);
|
|
918
|
+
}
|
|
919
|
+
else {
|
|
920
|
+
hashes.push(null);
|
|
921
|
+
}
|
|
922
|
+
}
|
|
923
|
+
return hashes;
|
|
924
|
+
}
|
|
562
925
|
//# sourceMappingURL=dataflow.js.map
|