@elaraai/e3-core 0.0.2-beta.3 → 0.0.2-beta.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. package/README.md +25 -22
  2. package/dist/src/dataflow/api-compat.d.ts +90 -0
  3. package/dist/src/dataflow/api-compat.d.ts.map +1 -0
  4. package/dist/src/dataflow/api-compat.js +134 -0
  5. package/dist/src/dataflow/api-compat.js.map +1 -0
  6. package/dist/src/dataflow/index.d.ts +18 -0
  7. package/dist/src/dataflow/index.d.ts.map +1 -0
  8. package/dist/src/dataflow/index.js +23 -0
  9. package/dist/src/dataflow/index.js.map +1 -0
  10. package/dist/src/dataflow/orchestrator/LocalOrchestrator.d.ts +53 -0
  11. package/dist/src/dataflow/orchestrator/LocalOrchestrator.d.ts.map +1 -0
  12. package/dist/src/dataflow/orchestrator/LocalOrchestrator.js +416 -0
  13. package/dist/src/dataflow/orchestrator/LocalOrchestrator.js.map +1 -0
  14. package/dist/src/dataflow/orchestrator/index.d.ts +12 -0
  15. package/dist/src/dataflow/orchestrator/index.d.ts.map +1 -0
  16. package/dist/src/dataflow/orchestrator/index.js +12 -0
  17. package/dist/src/dataflow/orchestrator/index.js.map +1 -0
  18. package/dist/src/dataflow/orchestrator/interfaces.d.ts +157 -0
  19. package/dist/src/dataflow/orchestrator/interfaces.d.ts.map +1 -0
  20. package/dist/src/dataflow/orchestrator/interfaces.js +51 -0
  21. package/dist/src/dataflow/orchestrator/interfaces.js.map +1 -0
  22. package/dist/src/dataflow/state-store/FileStateStore.d.ts +67 -0
  23. package/dist/src/dataflow/state-store/FileStateStore.d.ts.map +1 -0
  24. package/dist/src/dataflow/state-store/FileStateStore.js +286 -0
  25. package/dist/src/dataflow/state-store/FileStateStore.js.map +1 -0
  26. package/dist/src/dataflow/state-store/InMemoryStateStore.d.ts +42 -0
  27. package/dist/src/dataflow/state-store/InMemoryStateStore.d.ts.map +1 -0
  28. package/dist/src/dataflow/state-store/InMemoryStateStore.js +214 -0
  29. package/dist/src/dataflow/state-store/InMemoryStateStore.js.map +1 -0
  30. package/dist/src/dataflow/state-store/index.d.ts +13 -0
  31. package/dist/src/dataflow/state-store/index.d.ts.map +1 -0
  32. package/dist/src/dataflow/state-store/index.js +13 -0
  33. package/dist/src/dataflow/state-store/index.js.map +1 -0
  34. package/dist/src/dataflow/state-store/interfaces.d.ts +159 -0
  35. package/dist/src/dataflow/state-store/interfaces.d.ts.map +1 -0
  36. package/dist/src/dataflow/state-store/interfaces.js +6 -0
  37. package/dist/src/dataflow/state-store/interfaces.js.map +1 -0
  38. package/dist/src/dataflow/steps.d.ts +176 -0
  39. package/dist/src/dataflow/steps.d.ts.map +1 -0
  40. package/dist/src/dataflow/steps.js +528 -0
  41. package/dist/src/dataflow/steps.js.map +1 -0
  42. package/dist/src/dataflow/types.d.ts +116 -0
  43. package/dist/src/dataflow/types.d.ts.map +1 -0
  44. package/dist/src/dataflow/types.js +7 -0
  45. package/dist/src/dataflow/types.js.map +1 -0
  46. package/dist/src/dataflow.d.ts +142 -9
  47. package/dist/src/dataflow.d.ts.map +1 -1
  48. package/dist/src/dataflow.js +427 -64
  49. package/dist/src/dataflow.js.map +1 -1
  50. package/dist/src/errors.d.ts +39 -9
  51. package/dist/src/errors.d.ts.map +1 -1
  52. package/dist/src/errors.js +51 -8
  53. package/dist/src/errors.js.map +1 -1
  54. package/dist/src/execution/LocalTaskRunner.d.ts +73 -0
  55. package/dist/src/execution/LocalTaskRunner.d.ts.map +1 -0
  56. package/dist/src/execution/LocalTaskRunner.js +399 -0
  57. package/dist/src/execution/LocalTaskRunner.js.map +1 -0
  58. package/dist/src/execution/MockTaskRunner.d.ts +49 -0
  59. package/dist/src/execution/MockTaskRunner.d.ts.map +1 -0
  60. package/dist/src/execution/MockTaskRunner.js +55 -0
  61. package/dist/src/execution/MockTaskRunner.js.map +1 -0
  62. package/dist/src/execution/index.d.ts +16 -0
  63. package/dist/src/execution/index.d.ts.map +1 -0
  64. package/dist/src/execution/index.js +8 -0
  65. package/dist/src/execution/index.js.map +1 -0
  66. package/dist/src/execution/interfaces.d.ts +246 -0
  67. package/dist/src/execution/interfaces.d.ts.map +1 -0
  68. package/dist/src/execution/interfaces.js +6 -0
  69. package/dist/src/execution/interfaces.js.map +1 -0
  70. package/dist/src/execution/processHelpers.d.ts +20 -0
  71. package/dist/src/execution/processHelpers.d.ts.map +1 -0
  72. package/dist/src/execution/processHelpers.js +62 -0
  73. package/dist/src/execution/processHelpers.js.map +1 -0
  74. package/dist/src/executions.d.ts +71 -104
  75. package/dist/src/executions.d.ts.map +1 -1
  76. package/dist/src/executions.js +110 -476
  77. package/dist/src/executions.js.map +1 -1
  78. package/dist/src/index.d.ts +17 -9
  79. package/dist/src/index.d.ts.map +1 -1
  80. package/dist/src/index.js +44 -18
  81. package/dist/src/index.js.map +1 -1
  82. package/dist/src/objects.d.ts +6 -53
  83. package/dist/src/objects.d.ts.map +1 -1
  84. package/dist/src/objects.js +11 -232
  85. package/dist/src/objects.js.map +1 -1
  86. package/dist/src/packages.d.ts +22 -14
  87. package/dist/src/packages.d.ts.map +1 -1
  88. package/dist/src/packages.js +116 -83
  89. package/dist/src/packages.js.map +1 -1
  90. package/dist/src/storage/in-memory/InMemoryRepoStore.d.ts +35 -0
  91. package/dist/src/storage/in-memory/InMemoryRepoStore.d.ts.map +1 -0
  92. package/dist/src/storage/in-memory/InMemoryRepoStore.js +107 -0
  93. package/dist/src/storage/in-memory/InMemoryRepoStore.js.map +1 -0
  94. package/dist/src/storage/in-memory/InMemoryStorage.d.ts +114 -0
  95. package/dist/src/storage/in-memory/InMemoryStorage.d.ts.map +1 -0
  96. package/dist/src/storage/in-memory/InMemoryStorage.js +349 -0
  97. package/dist/src/storage/in-memory/InMemoryStorage.js.map +1 -0
  98. package/dist/src/storage/in-memory/index.d.ts +12 -0
  99. package/dist/src/storage/in-memory/index.d.ts.map +1 -0
  100. package/dist/src/storage/in-memory/index.js +12 -0
  101. package/dist/src/storage/in-memory/index.js.map +1 -0
  102. package/dist/src/storage/index.d.ts +18 -0
  103. package/dist/src/storage/index.d.ts.map +1 -0
  104. package/dist/src/storage/index.js +10 -0
  105. package/dist/src/storage/index.js.map +1 -0
  106. package/dist/src/storage/interfaces.d.ts +520 -0
  107. package/dist/src/storage/interfaces.d.ts.map +1 -0
  108. package/dist/src/storage/interfaces.js +6 -0
  109. package/dist/src/storage/interfaces.js.map +1 -0
  110. package/dist/src/storage/local/LocalBackend.d.ts +54 -0
  111. package/dist/src/storage/local/LocalBackend.d.ts.map +1 -0
  112. package/dist/src/storage/local/LocalBackend.js +141 -0
  113. package/dist/src/storage/local/LocalBackend.js.map +1 -0
  114. package/dist/src/storage/local/LocalLockService.d.ts +105 -0
  115. package/dist/src/storage/local/LocalLockService.d.ts.map +1 -0
  116. package/dist/src/storage/local/LocalLockService.js +342 -0
  117. package/dist/src/storage/local/LocalLockService.js.map +1 -0
  118. package/dist/src/storage/local/LocalLogStore.d.ts +23 -0
  119. package/dist/src/storage/local/LocalLogStore.d.ts.map +1 -0
  120. package/dist/src/storage/local/LocalLogStore.js +66 -0
  121. package/dist/src/storage/local/LocalLogStore.js.map +1 -0
  122. package/dist/src/storage/local/LocalObjectStore.d.ts +52 -0
  123. package/dist/src/storage/local/LocalObjectStore.d.ts.map +1 -0
  124. package/dist/src/storage/local/LocalObjectStore.js +287 -0
  125. package/dist/src/storage/local/LocalObjectStore.js.map +1 -0
  126. package/dist/src/storage/local/LocalRefStore.d.ts +50 -0
  127. package/dist/src/storage/local/LocalRefStore.d.ts.map +1 -0
  128. package/dist/src/storage/local/LocalRefStore.js +337 -0
  129. package/dist/src/storage/local/LocalRefStore.js.map +1 -0
  130. package/dist/src/storage/local/LocalRepoStore.d.ts +53 -0
  131. package/dist/src/storage/local/LocalRepoStore.d.ts.map +1 -0
  132. package/dist/src/storage/local/LocalRepoStore.js +353 -0
  133. package/dist/src/storage/local/LocalRepoStore.js.map +1 -0
  134. package/dist/src/storage/local/gc.d.ts +92 -0
  135. package/dist/src/storage/local/gc.d.ts.map +1 -0
  136. package/dist/src/storage/local/gc.js +322 -0
  137. package/dist/src/storage/local/gc.js.map +1 -0
  138. package/dist/src/storage/local/index.d.ts +17 -0
  139. package/dist/src/storage/local/index.d.ts.map +1 -0
  140. package/dist/src/storage/local/index.js +17 -0
  141. package/dist/src/storage/local/index.js.map +1 -0
  142. package/dist/src/storage/local/localHelpers.d.ts +25 -0
  143. package/dist/src/storage/local/localHelpers.d.ts.map +1 -0
  144. package/dist/src/storage/local/localHelpers.js +69 -0
  145. package/dist/src/storage/local/localHelpers.js.map +1 -0
  146. package/dist/src/{repository.d.ts → storage/local/repository.d.ts} +8 -4
  147. package/dist/src/storage/local/repository.d.ts.map +1 -0
  148. package/dist/src/{repository.js → storage/local/repository.js} +31 -29
  149. package/dist/src/storage/local/repository.js.map +1 -0
  150. package/dist/src/tasks.d.ts +16 -10
  151. package/dist/src/tasks.d.ts.map +1 -1
  152. package/dist/src/tasks.js +35 -41
  153. package/dist/src/tasks.js.map +1 -1
  154. package/dist/src/test-helpers.d.ts +4 -4
  155. package/dist/src/test-helpers.d.ts.map +1 -1
  156. package/dist/src/test-helpers.js +7 -21
  157. package/dist/src/test-helpers.js.map +1 -1
  158. package/dist/src/trees.d.ts +89 -27
  159. package/dist/src/trees.d.ts.map +1 -1
  160. package/dist/src/trees.js +218 -100
  161. package/dist/src/trees.js.map +1 -1
  162. package/dist/src/uuid.d.ts +26 -0
  163. package/dist/src/uuid.d.ts.map +1 -0
  164. package/dist/src/uuid.js +80 -0
  165. package/dist/src/uuid.js.map +1 -0
  166. package/dist/src/workspaceStatus.d.ts +6 -4
  167. package/dist/src/workspaceStatus.d.ts.map +1 -1
  168. package/dist/src/workspaceStatus.js +43 -49
  169. package/dist/src/workspaceStatus.js.map +1 -1
  170. package/dist/src/workspaces.d.ts +35 -26
  171. package/dist/src/workspaces.d.ts.map +1 -1
  172. package/dist/src/workspaces.js +169 -118
  173. package/dist/src/workspaces.js.map +1 -1
  174. package/package.json +4 -4
  175. package/dist/src/gc.d.ts +0 -54
  176. package/dist/src/gc.d.ts.map +0 -1
  177. package/dist/src/gc.js +0 -233
  178. package/dist/src/gc.js.map +0 -1
  179. package/dist/src/repository.d.ts.map +0 -1
  180. package/dist/src/repository.js.map +0 -1
  181. package/dist/src/workspaceLock.d.ts +0 -67
  182. package/dist/src/workspaceLock.d.ts.map +0 -1
  183. package/dist/src/workspaceLock.js +0 -217
  184. package/dist/src/workspaceLock.js.map +0 -1
@@ -21,15 +21,60 @@
21
21
  * output is written to the workspace and dependents are notified only after the
22
22
  * write completes, ensuring downstream tasks see consistent state.
23
23
  */
24
- import { decodeBeast2For } from '@elaraai/east';
24
+ import { decodeBeast2For, encodeBeast2For, variant } from '@elaraai/east';
25
25
  import { PackageObjectType, TaskObjectType, WorkspaceStateType, pathToString, } from '@elaraai/e3-types';
26
- import { objectRead } from './objects.js';
27
- import { taskExecute, executionGetOutput, inputsHash, } from './executions.js';
26
+ import { executionGetOutput, inputsHash, } from './executions.js';
27
+ import { uuidv7 } from './uuid.js';
28
+ import { taskExecute } from './execution/LocalTaskRunner.js';
28
29
  import { workspaceGetDatasetHash, workspaceSetDatasetByHash, } from './trees.js';
29
- import { E3Error, WorkspaceNotFoundError, WorkspaceNotDeployedError, TaskNotFoundError, DataflowError, DataflowAbortedError, isNotFoundError, } from './errors.js';
30
- import { acquireWorkspaceLock, } from './workspaceLock.js';
31
- import * as fs from 'fs/promises';
32
- import * as path from 'path';
30
+ import { E3Error, WorkspaceNotFoundError, WorkspaceNotDeployedError, WorkspaceLockError, TaskNotFoundError, DataflowError, DataflowAbortedError, } from './errors.js';
31
+ // =============================================================================
32
+ // Path Parsing Helper
33
+ // =============================================================================
34
+ /**
35
+ * Parse a keypath string (from pathToString) back to TreePath.
36
+ *
37
+ * The keypath format is: .field1.field2 (dot-separated field names)
38
+ * Quoted identifiers use backticks: .field1.`complex/name`
39
+ *
40
+ * @param pathStr - The path string in keypath format
41
+ * @returns TreePath array of path segments
42
+ */
43
+ export function parsePathString(pathStr) {
44
+ if (!pathStr.startsWith('.')) {
45
+ throw new Error(`Invalid path string: expected '.' prefix, got '${pathStr}'`);
46
+ }
47
+ const segments = [];
48
+ let i = 1; // Skip the leading '.'
49
+ while (i < pathStr.length) {
50
+ let fieldName;
51
+ if (pathStr[i] === '`') {
52
+ // Quoted identifier: find closing backtick
53
+ const endQuote = pathStr.indexOf('`', i + 1);
54
+ if (endQuote === -1) {
55
+ throw new Error(`Invalid path string: unclosed backtick at position ${i}`);
56
+ }
57
+ fieldName = pathStr.slice(i + 1, endQuote);
58
+ i = endQuote + 1;
59
+ }
60
+ else {
61
+ // Unquoted identifier: read until '.' or end
62
+ let end = pathStr.indexOf('.', i);
63
+ if (end === -1)
64
+ end = pathStr.length;
65
+ fieldName = pathStr.slice(i, end);
66
+ i = end;
67
+ }
68
+ if (fieldName) {
69
+ segments.push(variant('field', fieldName));
70
+ }
71
+ // Skip the '.' separator
72
+ if (i < pathStr.length && pathStr[i] === '.') {
73
+ i++;
74
+ }
75
+ }
76
+ return segments;
77
+ }
33
78
  // =============================================================================
34
79
  // Async Mutex for Workspace Updates
35
80
  // =============================================================================
@@ -81,27 +126,20 @@ class AsyncMutex {
81
126
  // Workspace State Reader
82
127
  // =============================================================================
83
128
  /**
84
- * Read workspace state from file.
129
+ * Read workspace state.
85
130
  * @throws {WorkspaceNotFoundError} If workspace doesn't exist
86
131
  * @throws {WorkspaceNotDeployedError} If workspace has no package deployed
87
132
  */
88
- async function readWorkspaceState(repoPath, ws) {
89
- const stateFile = path.join(repoPath, 'workspaces', `${ws}.beast2`);
90
- let data;
91
- try {
92
- data = await fs.readFile(stateFile);
93
- }
94
- catch (err) {
95
- if (isNotFoundError(err)) {
96
- throw new WorkspaceNotFoundError(ws);
97
- }
98
- throw err;
133
+ async function readWorkspaceState(storage, repo, ws) {
134
+ const data = await storage.refs.workspaceRead(repo, ws);
135
+ if (data === null) {
136
+ throw new WorkspaceNotFoundError(ws);
99
137
  }
100
138
  if (data.length === 0) {
101
139
  throw new WorkspaceNotDeployedError(ws);
102
140
  }
103
141
  const decoder = decodeBeast2For(WorkspaceStateType);
104
- return decoder(data);
142
+ return decoder(Buffer.from(data));
105
143
  }
106
144
  // =============================================================================
107
145
  // Dependency Graph Building
@@ -114,11 +152,11 @@ async function readWorkspaceState(repoPath, ws) {
114
152
  * - outputToTask: Map of output path string -> task name
115
153
  * - taskDependents: Map of task name -> set of dependent task names
116
154
  */
117
- async function buildDependencyGraph(repoPath, ws) {
155
+ async function buildDependencyGraph(storage, repo, ws) {
118
156
  // Read workspace state to get package hash
119
- const state = await readWorkspaceState(repoPath, ws);
157
+ const state = await readWorkspaceState(storage, repo, ws);
120
158
  // Read package object to get tasks map
121
- const pkgData = await objectRead(repoPath, state.packageHash);
159
+ const pkgData = await storage.objects.read(repo, state.packageHash);
122
160
  const pkgDecoder = decodeBeast2For(PackageObjectType);
123
161
  const pkgObject = pkgDecoder(Buffer.from(pkgData));
124
162
  const taskNodes = new Map();
@@ -126,7 +164,7 @@ async function buildDependencyGraph(repoPath, ws) {
126
164
  // First pass: load all tasks and build output->task map
127
165
  const taskDecoder = decodeBeast2For(TaskObjectType);
128
166
  for (const [taskName, taskHash] of pkgObject.tasks) {
129
- const taskData = await objectRead(repoPath, taskHash);
167
+ const taskData = await storage.objects.read(repo, taskHash);
130
168
  const task = taskDecoder(Buffer.from(taskData));
131
169
  const outputPathStr = pathToString(task.output);
132
170
  outputToTask.set(outputPathStr, taskName);
@@ -159,7 +197,7 @@ async function buildDependencyGraph(repoPath, ws) {
159
197
  }
160
198
  // If not produced by a task, it's an external input - check if assigned
161
199
  else {
162
- const { refType } = await workspaceGetDatasetHash(repoPath, ws, inputPath);
200
+ const { refType } = await workspaceGetDatasetHash(storage, repo, ws, inputPath);
163
201
  if (refType === 'unassigned') {
164
202
  // External input that is unassigned - this task can never run
165
203
  node.unresolvedCount++;
@@ -183,7 +221,8 @@ async function buildDependencyGraph(repoPath, ws) {
183
221
  * to prevent concurrent modifications. If options.lock is provided, uses that
184
222
  * lock instead (caller is responsible for releasing it).
185
223
  *
186
- * @param repoPath - Path to .e3 repository
224
+ * @param storage - Storage backend
225
+ * @param repo - Repository identifier (for local storage, the path to e3 repository directory)
187
226
  * @param ws - Workspace name
188
227
  * @param options - Execution options
189
228
  * @returns Result of the dataflow execution
@@ -193,12 +232,16 @@ async function buildDependencyGraph(repoPath, ws) {
193
232
  * @throws {TaskNotFoundError} If filter specifies a task that doesn't exist
194
233
  * @throws {DataflowError} If execution fails for other reasons
195
234
  */
196
- export async function dataflowExecute(repoPath, ws, options = {}) {
235
+ export async function dataflowExecute(storage, repo, ws, options = {}) {
197
236
  // Acquire lock if not provided externally
198
237
  const externalLock = options.lock;
199
- const lock = externalLock ?? await acquireWorkspaceLock(repoPath, ws);
238
+ const lock = externalLock ?? await storage.locks.acquire(repo, ws, variant('dataflow', null));
239
+ if (!lock) {
240
+ // Lock couldn't be acquired - the LockService returns null instead of throwing
241
+ throw new WorkspaceLockError(ws);
242
+ }
200
243
  try {
201
- return await dataflowExecuteWithLock(repoPath, ws, options);
244
+ return await dataflowExecuteWithLock(storage, repo, ws, options);
202
245
  }
203
246
  finally {
204
247
  // Only release the lock if we acquired it internally
@@ -213,7 +256,8 @@ export async function dataflowExecute(repoPath, ws, options = {}) {
213
256
  * Returns a promise immediately without awaiting execution. The lock is
214
257
  * released automatically when execution completes.
215
258
  *
216
- * @param repoPath - Path to .e3 repository
259
+ * @param storage - Storage backend
260
+ * @param repo - Repository identifier (for local storage, the path to e3 repository directory)
217
261
  * @param ws - Workspace name
218
262
  * @param options - Execution options (lock must be provided)
219
263
  * @returns Promise that resolves when execution completes
@@ -222,23 +266,31 @@ export async function dataflowExecute(repoPath, ws, options = {}) {
222
266
  * @throws {TaskNotFoundError} If filter specifies a task that doesn't exist
223
267
  * @throws {DataflowError} If execution fails for other reasons
224
268
  */
225
- export function dataflowStart(repoPath, ws, options) {
226
- return dataflowExecuteWithLock(repoPath, ws, options)
269
+ export function dataflowStart(storage, repo, ws, options) {
270
+ return dataflowExecuteWithLock(storage, repo, ws, options)
227
271
  .finally(() => options.lock.release());
228
272
  }
229
273
  /**
230
274
  * Internal: Execute dataflow with lock already held.
231
275
  */
232
- async function dataflowExecuteWithLock(repoPath, ws, options) {
276
+ async function dataflowExecuteWithLock(storage, repo, ws, options) {
233
277
  const startTime = Date.now();
278
+ const startedAt = new Date();
234
279
  const concurrency = options.concurrency ?? 4;
280
+ // Generate run ID for this execution
281
+ const runId = uuidv7();
235
282
  let taskNodes;
236
283
  let taskDependents;
284
+ let outputToTask;
285
+ let wsState;
237
286
  try {
287
+ // Read workspace state for run tracking
288
+ wsState = await readWorkspaceState(storage, repo, ws);
238
289
  // Build dependency graph
239
- const graph = await buildDependencyGraph(repoPath, ws);
240
- taskNodes = graph.taskNodes;
241
- taskDependents = graph.taskDependents;
290
+ const graphResult = await buildDependencyGraph(storage, repo, ws);
291
+ taskNodes = graphResult.taskNodes;
292
+ taskDependents = graphResult.taskDependents;
293
+ outputToTask = graphResult.outputToTask;
242
294
  }
243
295
  catch (err) {
244
296
  // Re-throw E3Errors as-is
@@ -247,6 +299,54 @@ async function dataflowExecuteWithLock(repoPath, ws, options) {
247
299
  // Wrap unexpected errors
248
300
  throw new DataflowError(`Failed to build dependency graph: ${err instanceof Error ? err.message : err}`);
249
301
  }
302
+ // Clean up all previous runs (we hold the lock, so no concurrent runs)
303
+ const allRunIds = await storage.refs.dataflowRunList(repo, ws);
304
+ for (const oldRunId of allRunIds) {
305
+ await storage.refs.dataflowRunDelete(repo, ws, oldRunId);
306
+ }
307
+ // Initialize task execution records map
308
+ const taskExecutions = new Map();
309
+ // Create initial DataflowRun record
310
+ const initialRun = {
311
+ runId,
312
+ workspaceName: ws,
313
+ packageRef: `${wsState.packageName}@${wsState.packageVersion}`,
314
+ startedAt,
315
+ completedAt: variant('none', null),
316
+ status: variant('running', {}),
317
+ inputSnapshot: wsState.rootHash,
318
+ outputSnapshot: variant('none', null),
319
+ taskExecutions: taskExecutions,
320
+ summary: {
321
+ total: BigInt(taskNodes.size),
322
+ completed: 0n,
323
+ cached: 0n,
324
+ failed: 0n,
325
+ skipped: 0n,
326
+ },
327
+ };
328
+ // Write initial run record
329
+ await storage.refs.dataflowRunWrite(repo, ws, initialRun);
330
+ // Build DataflowGraph for use with decomposed building blocks
331
+ const dataflowGraph = {
332
+ tasks: Array.from(taskNodes.entries()).map(([taskName, node]) => {
333
+ const dependsOn = [];
334
+ for (const inputPath of node.inputPaths) {
335
+ const inputPathStr = pathToString(inputPath);
336
+ const producerTask = outputToTask.get(inputPathStr);
337
+ if (producerTask) {
338
+ dependsOn.push(producerTask);
339
+ }
340
+ }
341
+ return {
342
+ name: taskName,
343
+ hash: node.hash,
344
+ inputs: node.inputPaths.map(pathToString),
345
+ output: pathToString(node.outputPath),
346
+ dependsOn,
347
+ };
348
+ }),
349
+ };
250
350
  // Apply filter if specified
251
351
  const filteredTaskNames = options.filter
252
352
  ? new Set([options.filter])
@@ -278,6 +378,7 @@ async function dataflowExecuteWithLock(repoPath, ws, options) {
278
378
  const readyQueue = [];
279
379
  const completed = new Set();
280
380
  const inProgress = new Set();
381
+ const skippedTasks = new Set(); // Track skipped tasks separately for dataflowGetDependentsToSkip
281
382
  // Initialize ready queue with tasks that have no unresolved dependencies
282
383
  // and pass the filter (if any)
283
384
  for (const [taskName, node] of taskNodes) {
@@ -288,13 +389,13 @@ async function dataflowExecuteWithLock(repoPath, ws, options) {
288
389
  }
289
390
  }
290
391
  // Check if the task has a valid cached execution for current inputs
291
- // Returns the output hash if cached, null if re-execution is needed
392
+ // Returns the output hash and executionId if cached, null if re-execution is needed
292
393
  async function getCachedOutput(taskName) {
293
394
  const node = taskNodes.get(taskName);
294
395
  // Gather current input hashes
295
396
  const currentInputHashes = [];
296
397
  for (const inputPath of node.inputPaths) {
297
- const { refType, hash } = await workspaceGetDatasetHash(repoPath, ws, inputPath);
398
+ const { refType, hash } = await workspaceGetDatasetHash(storage, repo, ws, inputPath);
298
399
  if (refType !== 'value' || hash === null) {
299
400
  // Input not assigned, can't be cached
300
401
  return null;
@@ -303,20 +404,26 @@ async function dataflowExecuteWithLock(repoPath, ws, options) {
303
404
  }
304
405
  // Check if there's a cached execution for these inputs
305
406
  const inHash = inputsHash(currentInputHashes);
306
- const cachedOutputHash = await executionGetOutput(repoPath, node.hash, inHash);
407
+ const cachedOutputHash = await executionGetOutput(storage, repo, node.hash, inHash);
307
408
  if (cachedOutputHash === null) {
308
409
  // No cached execution for current inputs
309
410
  return null;
310
411
  }
412
+ // Get the latest execution status to retrieve the executionId
413
+ const latestStatus = await storage.refs.executionGetLatest(repo, node.hash, inHash);
414
+ if (!latestStatus || latestStatus.type !== 'success') {
415
+ // Latest execution wasn't a success
416
+ return null;
417
+ }
311
418
  // Also verify the workspace output matches the cached output
312
419
  // (in case the workspace was modified outside of execution)
313
- const { refType, hash: wsOutputHash } = await workspaceGetDatasetHash(repoPath, ws, node.outputPath);
420
+ const { refType, hash: wsOutputHash } = await workspaceGetDatasetHash(storage, repo, ws, node.outputPath);
314
421
  if (refType !== 'value' || wsOutputHash !== cachedOutputHash) {
315
422
  // Workspace output doesn't match cached output, need to re-execute
316
423
  // (or update workspace with cached value)
317
424
  return null;
318
425
  }
319
- return cachedOutputHash;
426
+ return { outputHash: cachedOutputHash, executionId: latestStatus.value.executionId };
320
427
  }
321
428
  // Execute a single task (does NOT write to workspace - caller must do that)
322
429
  async function executeTask(taskName) {
@@ -326,7 +433,7 @@ async function dataflowExecuteWithLock(repoPath, ws, options) {
326
433
  // Gather input hashes
327
434
  const inputHashes = [];
328
435
  for (const inputPath of node.inputPaths) {
329
- const { refType, hash } = await workspaceGetDatasetHash(repoPath, ws, inputPath);
436
+ const { refType, hash } = await workspaceGetDatasetHash(storage, repo, ws, inputPath);
330
437
  if (refType !== 'value' || hash === null) {
331
438
  // Input not available - should not happen if dependency tracking is correct
332
439
  return {
@@ -339,18 +446,22 @@ async function dataflowExecuteWithLock(repoPath, ws, options) {
339
446
  }
340
447
  inputHashes.push(hash);
341
448
  }
342
- // Execute the task
449
+ // Execute the task using either the provided runner or direct taskExecute()
343
450
  const execOptions = {
344
451
  force: options.force,
345
452
  signal: options.signal,
346
453
  onStdout: options.onStdout ? (data) => options.onStdout(taskName, data) : undefined,
347
454
  onStderr: options.onStderr ? (data) => options.onStderr(taskName, data) : undefined,
348
455
  };
349
- const result = await taskExecute(repoPath, node.hash, inputHashes, execOptions);
456
+ // Use provided runner if available, otherwise call taskExecute directly
457
+ const result = options.runner
458
+ ? await options.runner.execute(storage, node.hash, inputHashes, execOptions)
459
+ : await taskExecute(storage, repo, node.hash, inputHashes, execOptions);
350
460
  // Build task result (NOTE: workspace update happens later, in mutex-protected section)
351
461
  const taskResult = {
352
462
  name: taskName,
353
463
  cached: result.cached,
464
+ executionId: result.executionId,
354
465
  state: result.state,
355
466
  duration: Date.now() - taskStartTime,
356
467
  };
@@ -359,7 +470,6 @@ async function dataflowExecuteWithLock(repoPath, ws, options) {
359
470
  }
360
471
  else if (result.state === 'failed') {
361
472
  taskResult.exitCode = result.exitCode ?? undefined;
362
- taskResult.error = result.error ?? undefined;
363
473
  }
364
474
  // Pass output hash to caller for workspace update (if successful)
365
475
  if (result.state === 'success' && result.outputHash) {
@@ -383,17 +493,17 @@ async function dataflowExecuteWithLock(repoPath, ws, options) {
383
493
  }
384
494
  }
385
495
  }
386
- // Mark dependents as skipped when a task fails
496
+ // Mark dependents as skipped when a task fails.
497
+ // Uses dataflowGetDependentsToSkip to find all transitive dependents at once
498
+ // (shared with distributed execution in e3-aws).
387
499
  function skipDependents(taskName) {
388
- const dependents = taskDependents.get(taskName) ?? new Set();
389
- for (const depName of dependents) {
390
- if (completed.has(depName) || inProgress.has(depName))
391
- continue;
392
- // Skip dependents not in the filter
393
- if (filteredTaskNames && !filteredTaskNames.has(depName))
394
- continue;
395
- // Recursively skip
500
+ // Get all tasks to skip (excludes already completed, already skipped, and in-progress)
501
+ const toSkip = dataflowGetDependentsToSkip(dataflowGraph, taskName, completed, skippedTasks)
502
+ .filter(name => !inProgress.has(name)) // Also exclude in-progress tasks
503
+ .filter(name => !filteredTaskNames || filteredTaskNames.has(name)); // Apply filter
504
+ for (const depName of toSkip) {
396
505
  completed.add(depName);
506
+ skippedTasks.add(depName);
397
507
  skipped++;
398
508
  results.push({
399
509
  name: depName,
@@ -407,7 +517,6 @@ async function dataflowExecuteWithLock(repoPath, ws, options) {
407
517
  state: 'skipped',
408
518
  duration: 0,
409
519
  });
410
- skipDependents(depName);
411
520
  }
412
521
  }
413
522
  // Main execution loop using a work-stealing approach
@@ -424,8 +533,8 @@ async function dataflowExecuteWithLock(repoPath, ws, options) {
424
533
  if (completed.has(taskName) || inProgress.has(taskName))
425
534
  continue;
426
535
  // Check if there's a valid cached execution for current inputs
427
- const cachedOutputHash = await getCachedOutput(taskName);
428
- if (cachedOutputHash !== null && !options.force) {
536
+ const cachedResult = await getCachedOutput(taskName);
537
+ if (cachedResult !== null && !options.force) {
429
538
  // Valid cached execution exists for current inputs.
430
539
  // No workspace write needed (output already matches), but we still
431
540
  // need mutex protection for state updates to prevent races with
@@ -436,12 +545,18 @@ async function dataflowExecuteWithLock(repoPath, ws, options) {
436
545
  const result = {
437
546
  name: taskName,
438
547
  cached: true,
548
+ executionId: cachedResult.executionId,
439
549
  state: 'success',
440
550
  duration: 0,
441
551
  };
442
552
  results.push(result);
443
553
  options.onTaskComplete?.(result);
444
554
  notifyDependents(taskName);
555
+ // Track in taskExecutions map
556
+ taskExecutions.set(taskName, {
557
+ executionId: cachedResult.executionId,
558
+ cached: true,
559
+ });
445
560
  });
446
561
  continue;
447
562
  }
@@ -456,7 +571,7 @@ async function dataflowExecuteWithLock(repoPath, ws, options) {
456
571
  // Write output to workspace BEFORE notifying dependents
457
572
  if (result.state === 'success' && result.outputHash) {
458
573
  const node = taskNodes.get(taskName);
459
- await workspaceSetDatasetByHash(repoPath, ws, node.outputPath, result.outputHash);
574
+ await workspaceSetDatasetByHash(storage, repo, ws, node.outputPath, result.outputHash);
460
575
  }
461
576
  // Now safe to update execution state and notify dependents
462
577
  inProgress.delete(taskName);
@@ -471,11 +586,25 @@ async function dataflowExecuteWithLock(repoPath, ws, options) {
471
586
  executed++;
472
587
  }
473
588
  notifyDependents(taskName);
589
+ // Track in taskExecutions map
590
+ if (result.executionId) {
591
+ taskExecutions.set(taskName, {
592
+ executionId: result.executionId,
593
+ cached: result.cached,
594
+ });
595
+ }
474
596
  }
475
597
  else {
476
598
  failed++;
477
599
  hasFailure = true;
478
600
  skipDependents(taskName);
601
+ // Track failed execution too
602
+ if (result.executionId) {
603
+ taskExecutions.set(taskName, {
604
+ executionId: result.executionId,
605
+ cached: false,
606
+ });
607
+ }
479
608
  }
480
609
  });
481
610
  }
@@ -489,8 +618,10 @@ async function dataflowExecuteWithLock(repoPath, ws, options) {
489
618
  if (runningPromises.size > 0) {
490
619
  await Promise.race(runningPromises.values());
491
620
  }
492
- else if (readyQueue.length === 0) {
493
- // No running tasks and no ready tasks - we might have unresolvable dependencies
621
+ else if (readyQueue.length === 0 || aborted) {
622
+ // No running tasks and either:
623
+ // - no ready tasks (unresolvable dependencies)
624
+ // - aborted (stop processing)
494
625
  break;
495
626
  }
496
627
  }
@@ -502,12 +633,79 @@ async function dataflowExecuteWithLock(repoPath, ws, options) {
502
633
  }
503
634
  // Check for abort one final time
504
635
  checkAborted();
505
- // If aborted, throw with partial results
636
+ // If aborted, throw with partial results (also update run record)
506
637
  if (aborted) {
638
+ const finalWsState = await readWorkspaceState(storage, repo, ws);
639
+ const cancelledRun = {
640
+ runId,
641
+ workspaceName: ws,
642
+ packageRef: `${wsState.packageName}@${wsState.packageVersion}`,
643
+ startedAt,
644
+ completedAt: variant('some', new Date()),
645
+ status: variant('cancelled', {}),
646
+ inputSnapshot: wsState.rootHash,
647
+ outputSnapshot: variant('some', finalWsState.rootHash),
648
+ taskExecutions,
649
+ summary: {
650
+ total: BigInt(taskNodes.size),
651
+ completed: BigInt(executed + cached),
652
+ cached: BigInt(cached),
653
+ failed: BigInt(failed),
654
+ skipped: BigInt(skipped),
655
+ },
656
+ };
657
+ await storage.refs.dataflowRunWrite(repo, ws, cancelledRun);
507
658
  throw new DataflowAbortedError(results);
508
659
  }
660
+ // Read final workspace state for output snapshot
661
+ const finalWsState = await readWorkspaceState(storage, repo, ws);
662
+ // Determine final status
663
+ let finalStatus;
664
+ if (hasFailure) {
665
+ // Find the failed task
666
+ const failedTask = results.find(r => r.state === 'failed' || r.state === 'error');
667
+ finalStatus = variant('failed', {
668
+ failedTask: failedTask?.name ?? 'unknown',
669
+ error: failedTask?.error ?? failedTask?.exitCode?.toString() ?? 'Task failed',
670
+ });
671
+ }
672
+ else {
673
+ finalStatus = variant('completed', {});
674
+ }
675
+ // Write final DataflowRun record
676
+ const finalRun = {
677
+ runId,
678
+ workspaceName: ws,
679
+ packageRef: `${wsState.packageName}@${wsState.packageVersion}`,
680
+ startedAt,
681
+ completedAt: variant('some', new Date()),
682
+ status: finalStatus,
683
+ inputSnapshot: wsState.rootHash,
684
+ outputSnapshot: variant('some', finalWsState.rootHash),
685
+ taskExecutions,
686
+ summary: {
687
+ total: BigInt(taskNodes.size),
688
+ completed: BigInt(executed + cached),
689
+ cached: BigInt(cached),
690
+ failed: BigInt(failed),
691
+ skipped: BigInt(skipped),
692
+ },
693
+ };
694
+ await storage.refs.dataflowRunWrite(repo, ws, finalRun);
695
+ // Update workspace state with currentRunId on success
696
+ if (!hasFailure) {
697
+ // Read, update, write workspace state
698
+ const currentState = await readWorkspaceState(storage, repo, ws);
699
+ const updatedState = {
700
+ ...currentState,
701
+ currentRunId: variant('some', runId),
702
+ };
703
+ const encoder = encodeBeast2For(WorkspaceStateType);
704
+ await storage.refs.workspaceWrite(repo, ws, encoder(updatedState));
705
+ }
509
706
  return {
510
707
  success: !hasFailure,
708
+ runId,
511
709
  executed,
512
710
  cached,
513
711
  failed,
@@ -519,18 +717,19 @@ async function dataflowExecuteWithLock(repoPath, ws, options) {
519
717
  /**
520
718
  * Get the dependency graph for a workspace (for visualization/debugging).
521
719
  *
522
- * @param repoPath - Path to .e3 repository
720
+ * @param storage - Storage backend
721
+ * @param repo - Repository identifier (for local storage, the path to e3 repository directory)
523
722
  * @param ws - Workspace name
524
723
  * @returns Graph information
525
724
  * @throws {WorkspaceNotFoundError} If workspace doesn't exist
526
725
  * @throws {WorkspaceNotDeployedError} If workspace has no package deployed
527
726
  * @throws {DataflowError} If graph building fails for other reasons
528
727
  */
529
- export async function dataflowGetGraph(repoPath, ws) {
728
+ export async function dataflowGetGraph(storage, repo, ws) {
530
729
  let taskNodes;
531
730
  let outputToTask;
532
731
  try {
533
- const graph = await buildDependencyGraph(repoPath, ws);
732
+ const graph = await buildDependencyGraph(storage, repo, ws);
534
733
  taskNodes = graph.taskNodes;
535
734
  outputToTask = graph.outputToTask;
536
735
  }
@@ -559,4 +758,168 @@ export async function dataflowGetGraph(repoPath, ws) {
559
758
  }
560
759
  return { tasks };
561
760
  }
761
+ /**
762
+ * Get tasks that are ready to execute given the set of completed tasks.
763
+ *
764
+ * A task is ready when all tasks it depends on have completed.
765
+ * This is useful for distributed execution (e.g., AWS Step Functions)
766
+ * where a coordinator needs to determine which tasks can run next.
767
+ *
768
+ * @param graph - The dependency graph from dataflowGetGraph
769
+ * @param completedTasks - Set of task names that have completed
770
+ * @returns Array of task names that are ready to execute
771
+ *
772
+ * @example
773
+ * ```typescript
774
+ * const graph = await dataflowGetGraph(storage, repo, 'production');
775
+ * const ready = dataflowGetReadyTasks(graph, new Set()); // Initial ready tasks
776
+ * // Execute ready[0]...
777
+ * const nextReady = dataflowGetReadyTasks(graph, new Set([ready[0]]));
778
+ * ```
779
+ */
780
+ export function dataflowGetReadyTasks(graph, completedTasks) {
781
+ const ready = [];
782
+ for (const task of graph.tasks) {
783
+ // Skip already completed tasks
784
+ if (completedTasks.has(task.name)) {
785
+ continue;
786
+ }
787
+ // Check if all dependencies are satisfied
788
+ const allDepsCompleted = task.dependsOn.every(dep => completedTasks.has(dep));
789
+ if (allDepsCompleted) {
790
+ ready.push(task.name);
791
+ }
792
+ }
793
+ return ready;
794
+ }
795
+ /**
796
+ * Check if a task execution is cached for the given inputs.
797
+ *
798
+ * This is useful for distributed execution where a Lambda handler needs
799
+ * to check if a task can be skipped before spawning execution.
800
+ *
801
+ * @param storage - Storage backend
802
+ * @param repo - Repository path
803
+ * @param taskHash - Hash of the TaskObject
804
+ * @param inputHashes - Array of input dataset hashes (in order)
805
+ * @returns Output hash if cached, null if execution needed
806
+ *
807
+ * @example
808
+ * ```typescript
809
+ * const outputHash = await dataflowCheckCache(storage, repo, taskHash, inputHashes);
810
+ * if (outputHash) {
811
+ * // Task is cached, use outputHash directly
812
+ * } else {
813
+ * // Need to execute task
814
+ * }
815
+ * ```
816
+ */
817
+ export async function dataflowCheckCache(storage, repo, taskHash, inputHashes) {
818
+ const inHash = inputsHash(inputHashes);
819
+ return executionGetOutput(storage, repo, taskHash, inHash);
820
+ }
821
+ /**
822
+ * Find tasks that should be skipped when a task fails.
823
+ *
824
+ * Returns all tasks that transitively depend on the failed task
825
+ * (directly or through other tasks), excluding already completed
826
+ * or already skipped tasks.
827
+ *
828
+ * This is useful for distributed execution where the coordinator
829
+ * needs to mark downstream tasks as skipped after a failure.
830
+ *
831
+ * @param graph - The dependency graph from dataflowGetGraph
832
+ * @param failedTask - Name of the task that failed
833
+ * @param completedTasks - Set of task names already completed (won't be skipped)
834
+ * @param skippedTasks - Set of task names already skipped (won't be returned again)
835
+ * @returns Array of task names that should be skipped
836
+ *
837
+ * @example
838
+ * ```typescript
839
+ * const graph = await dataflowGetGraph(storage, repo, 'production');
840
+ * // Task 'etl' failed...
841
+ * const toSkip = dataflowGetDependentsToSkip(graph, 'etl', completed, skipped);
842
+ * // toSkip might be ['transform', 'aggregate', 'report'] - all downstream tasks
843
+ * ```
844
+ */
845
+ export function dataflowGetDependentsToSkip(graph, failedTask, completedTasks, skippedTasks) {
846
+ // Build reverse dependency map: task -> tasks that depend on it
847
+ const dependents = new Map();
848
+ for (const task of graph.tasks) {
849
+ dependents.set(task.name, []);
850
+ }
851
+ for (const task of graph.tasks) {
852
+ for (const dep of task.dependsOn) {
853
+ dependents.get(dep)?.push(task.name);
854
+ }
855
+ }
856
+ // BFS to find all transitive dependents
857
+ const toSkip = [];
858
+ const visited = new Set();
859
+ const queue = [failedTask];
860
+ while (queue.length > 0) {
861
+ const current = queue.shift();
862
+ const deps = dependents.get(current) ?? [];
863
+ for (const dep of deps) {
864
+ // Skip if already processed
865
+ if (visited.has(dep)) {
866
+ continue;
867
+ }
868
+ visited.add(dep);
869
+ // Skip if already completed (no need to explore further - completed tasks break the chain)
870
+ if (completedTasks.has(dep)) {
871
+ continue;
872
+ }
873
+ // If already skipped, still explore dependents but don't add to result again
874
+ if (skippedTasks.has(dep)) {
875
+ queue.push(dep);
876
+ continue;
877
+ }
878
+ // New task to skip
879
+ toSkip.push(dep);
880
+ queue.push(dep);
881
+ }
882
+ }
883
+ return toSkip;
884
+ }
885
+ /**
886
+ * Resolve input hashes for a task from current workspace state.
887
+ *
888
+ * Returns an array of hashes in the same order as the task's inputs.
889
+ * If any input is unassigned, returns null for that position.
890
+ *
891
+ * This is useful for distributed execution where the input hashes
892
+ * need to be resolved before checking cache or executing.
893
+ *
894
+ * @param storage - Storage backend
895
+ * @param repo - Repository path
896
+ * @param ws - Workspace name
897
+ * @param task - Task info from the graph (needs inputs array)
898
+ * @returns Array of hashes (null if input is unassigned)
899
+ *
900
+ * @example
901
+ * ```typescript
902
+ * const graph = await dataflowGetGraph(storage, repo, 'production');
903
+ * const task = graph.tasks.find(t => t.name === 'etl')!;
904
+ * const inputHashes = await dataflowResolveInputHashes(storage, repo, 'production', task);
905
+ * if (!inputHashes.includes(null)) {
906
+ * const cached = await dataflowCheckCache(storage, repo, task.hash, inputHashes);
907
+ * }
908
+ * ```
909
+ */
910
+ export async function dataflowResolveInputHashes(storage, repo, ws, task) {
911
+ const hashes = [];
912
+ for (const inputPathStr of task.inputs) {
913
+ // Parse the keypath string back to TreePath
914
+ const inputPath = parsePathString(inputPathStr);
915
+ const { refType, hash } = await workspaceGetDatasetHash(storage, repo, ws, inputPath);
916
+ if (refType === 'value' && hash !== null) {
917
+ hashes.push(hash);
918
+ }
919
+ else {
920
+ hashes.push(null);
921
+ }
922
+ }
923
+ return hashes;
924
+ }
562
925
  //# sourceMappingURL=dataflow.js.map