@elaraai/e3-core 0.0.2-beta.5 → 0.0.2-beta.50

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. package/README.md +25 -22
  2. package/dist/src/dataflow/api-compat.d.ts +90 -0
  3. package/dist/src/dataflow/api-compat.d.ts.map +1 -0
  4. package/dist/src/dataflow/api-compat.js +139 -0
  5. package/dist/src/dataflow/api-compat.js.map +1 -0
  6. package/dist/src/dataflow/index.d.ts +18 -0
  7. package/dist/src/dataflow/index.d.ts.map +1 -0
  8. package/dist/src/dataflow/index.js +23 -0
  9. package/dist/src/dataflow/index.js.map +1 -0
  10. package/dist/src/dataflow/orchestrator/LocalOrchestrator.d.ts +76 -0
  11. package/dist/src/dataflow/orchestrator/LocalOrchestrator.d.ts.map +1 -0
  12. package/dist/src/dataflow/orchestrator/LocalOrchestrator.js +729 -0
  13. package/dist/src/dataflow/orchestrator/LocalOrchestrator.js.map +1 -0
  14. package/dist/src/dataflow/orchestrator/index.d.ts +12 -0
  15. package/dist/src/dataflow/orchestrator/index.d.ts.map +1 -0
  16. package/dist/src/dataflow/orchestrator/index.js +12 -0
  17. package/dist/src/dataflow/orchestrator/index.js.map +1 -0
  18. package/dist/src/dataflow/orchestrator/interfaces.d.ts +163 -0
  19. package/dist/src/dataflow/orchestrator/interfaces.d.ts.map +1 -0
  20. package/dist/src/dataflow/orchestrator/interfaces.js +52 -0
  21. package/dist/src/dataflow/orchestrator/interfaces.js.map +1 -0
  22. package/dist/src/dataflow/state-store/FileStateStore.d.ts +67 -0
  23. package/dist/src/dataflow/state-store/FileStateStore.d.ts.map +1 -0
  24. package/dist/src/dataflow/state-store/FileStateStore.js +300 -0
  25. package/dist/src/dataflow/state-store/FileStateStore.js.map +1 -0
  26. package/dist/src/dataflow/state-store/InMemoryStateStore.d.ts +42 -0
  27. package/dist/src/dataflow/state-store/InMemoryStateStore.d.ts.map +1 -0
  28. package/dist/src/dataflow/state-store/InMemoryStateStore.js +229 -0
  29. package/dist/src/dataflow/state-store/InMemoryStateStore.js.map +1 -0
  30. package/dist/src/dataflow/state-store/index.d.ts +13 -0
  31. package/dist/src/dataflow/state-store/index.d.ts.map +1 -0
  32. package/dist/src/dataflow/state-store/index.js +13 -0
  33. package/dist/src/dataflow/state-store/index.js.map +1 -0
  34. package/dist/src/dataflow/state-store/interfaces.d.ts +159 -0
  35. package/dist/src/dataflow/state-store/interfaces.d.ts.map +1 -0
  36. package/dist/src/dataflow/state-store/interfaces.js +6 -0
  37. package/dist/src/dataflow/state-store/interfaces.js.map +1 -0
  38. package/dist/src/dataflow/steps.d.ts +222 -0
  39. package/dist/src/dataflow/steps.d.ts.map +1 -0
  40. package/dist/src/dataflow/steps.js +707 -0
  41. package/dist/src/dataflow/steps.js.map +1 -0
  42. package/dist/src/dataflow/types.d.ts +127 -0
  43. package/dist/src/dataflow/types.d.ts.map +1 -0
  44. package/dist/src/dataflow/types.js +7 -0
  45. package/dist/src/dataflow/types.js.map +1 -0
  46. package/dist/src/dataflow.d.ts +113 -38
  47. package/dist/src/dataflow.d.ts.map +1 -1
  48. package/dist/src/dataflow.js +269 -416
  49. package/dist/src/dataflow.js.map +1 -1
  50. package/dist/src/dataset-refs.d.ts +124 -0
  51. package/dist/src/dataset-refs.d.ts.map +1 -0
  52. package/dist/src/dataset-refs.js +319 -0
  53. package/dist/src/dataset-refs.js.map +1 -0
  54. package/dist/src/errors.d.ts +39 -9
  55. package/dist/src/errors.d.ts.map +1 -1
  56. package/dist/src/errors.js +51 -8
  57. package/dist/src/errors.js.map +1 -1
  58. package/dist/src/execution/LocalTaskRunner.d.ts +73 -0
  59. package/dist/src/execution/LocalTaskRunner.d.ts.map +1 -0
  60. package/dist/src/execution/LocalTaskRunner.js +399 -0
  61. package/dist/src/execution/LocalTaskRunner.js.map +1 -0
  62. package/dist/src/execution/MockTaskRunner.d.ts +49 -0
  63. package/dist/src/execution/MockTaskRunner.d.ts.map +1 -0
  64. package/dist/src/execution/MockTaskRunner.js +54 -0
  65. package/dist/src/execution/MockTaskRunner.js.map +1 -0
  66. package/dist/src/execution/index.d.ts +16 -0
  67. package/dist/src/execution/index.d.ts.map +1 -0
  68. package/dist/src/execution/index.js +8 -0
  69. package/dist/src/execution/index.js.map +1 -0
  70. package/dist/src/execution/interfaces.d.ts +246 -0
  71. package/dist/src/execution/interfaces.d.ts.map +1 -0
  72. package/dist/src/execution/interfaces.js +6 -0
  73. package/dist/src/execution/interfaces.js.map +1 -0
  74. package/dist/src/execution/processHelpers.d.ts +20 -0
  75. package/dist/src/execution/processHelpers.d.ts.map +1 -0
  76. package/dist/src/execution/processHelpers.js +62 -0
  77. package/dist/src/execution/processHelpers.js.map +1 -0
  78. package/dist/src/executions.d.ts +71 -104
  79. package/dist/src/executions.d.ts.map +1 -1
  80. package/dist/src/executions.js +110 -476
  81. package/dist/src/executions.js.map +1 -1
  82. package/dist/src/index.d.ts +20 -10
  83. package/dist/src/index.d.ts.map +1 -1
  84. package/dist/src/index.js +48 -18
  85. package/dist/src/index.js.map +1 -1
  86. package/dist/src/objects.d.ts +7 -53
  87. package/dist/src/objects.d.ts.map +1 -1
  88. package/dist/src/objects.js +13 -232
  89. package/dist/src/objects.js.map +1 -1
  90. package/dist/src/packages.d.ts +41 -14
  91. package/dist/src/packages.d.ts.map +1 -1
  92. package/dist/src/packages.js +145 -88
  93. package/dist/src/packages.js.map +1 -1
  94. package/dist/src/storage/in-memory/InMemoryRepoStore.d.ts +35 -0
  95. package/dist/src/storage/in-memory/InMemoryRepoStore.d.ts.map +1 -0
  96. package/dist/src/storage/in-memory/InMemoryRepoStore.js +107 -0
  97. package/dist/src/storage/in-memory/InMemoryRepoStore.js.map +1 -0
  98. package/dist/src/storage/in-memory/InMemoryStorage.d.ts +139 -0
  99. package/dist/src/storage/in-memory/InMemoryStorage.d.ts.map +1 -0
  100. package/dist/src/storage/in-memory/InMemoryStorage.js +439 -0
  101. package/dist/src/storage/in-memory/InMemoryStorage.js.map +1 -0
  102. package/dist/src/storage/in-memory/index.d.ts +12 -0
  103. package/dist/src/storage/in-memory/index.d.ts.map +1 -0
  104. package/dist/src/storage/in-memory/index.js +12 -0
  105. package/dist/src/storage/in-memory/index.js.map +1 -0
  106. package/dist/src/storage/index.d.ts +18 -0
  107. package/dist/src/storage/index.d.ts.map +1 -0
  108. package/dist/src/storage/index.js +10 -0
  109. package/dist/src/storage/index.js.map +1 -0
  110. package/dist/src/storage/interfaces.d.ts +581 -0
  111. package/dist/src/storage/interfaces.d.ts.map +1 -0
  112. package/dist/src/storage/interfaces.js +6 -0
  113. package/dist/src/storage/interfaces.js.map +1 -0
  114. package/dist/src/storage/local/LocalBackend.d.ts +56 -0
  115. package/dist/src/storage/local/LocalBackend.d.ts.map +1 -0
  116. package/dist/src/storage/local/LocalBackend.js +145 -0
  117. package/dist/src/storage/local/LocalBackend.js.map +1 -0
  118. package/dist/src/storage/local/LocalDatasetRefStore.d.ts +22 -0
  119. package/dist/src/storage/local/LocalDatasetRefStore.d.ts.map +1 -0
  120. package/dist/src/storage/local/LocalDatasetRefStore.js +118 -0
  121. package/dist/src/storage/local/LocalDatasetRefStore.js.map +1 -0
  122. package/dist/src/storage/local/LocalLockService.d.ts +111 -0
  123. package/dist/src/storage/local/LocalLockService.d.ts.map +1 -0
  124. package/dist/src/storage/local/LocalLockService.js +364 -0
  125. package/dist/src/storage/local/LocalLockService.js.map +1 -0
  126. package/dist/src/storage/local/LocalLogStore.d.ts +23 -0
  127. package/dist/src/storage/local/LocalLogStore.d.ts.map +1 -0
  128. package/dist/src/storage/local/LocalLogStore.js +66 -0
  129. package/dist/src/storage/local/LocalLogStore.js.map +1 -0
  130. package/dist/src/storage/local/LocalObjectStore.d.ts +55 -0
  131. package/dist/src/storage/local/LocalObjectStore.d.ts.map +1 -0
  132. package/dist/src/storage/local/LocalObjectStore.js +300 -0
  133. package/dist/src/storage/local/LocalObjectStore.js.map +1 -0
  134. package/dist/src/storage/local/LocalRefStore.d.ts +50 -0
  135. package/dist/src/storage/local/LocalRefStore.d.ts.map +1 -0
  136. package/dist/src/storage/local/LocalRefStore.js +337 -0
  137. package/dist/src/storage/local/LocalRefStore.js.map +1 -0
  138. package/dist/src/storage/local/LocalRepoStore.d.ts +55 -0
  139. package/dist/src/storage/local/LocalRepoStore.d.ts.map +1 -0
  140. package/dist/src/storage/local/LocalRepoStore.js +365 -0
  141. package/dist/src/storage/local/LocalRepoStore.js.map +1 -0
  142. package/dist/src/storage/local/gc.d.ts +92 -0
  143. package/dist/src/storage/local/gc.d.ts.map +1 -0
  144. package/dist/src/storage/local/gc.js +377 -0
  145. package/dist/src/storage/local/gc.js.map +1 -0
  146. package/dist/src/storage/local/index.d.ts +18 -0
  147. package/dist/src/storage/local/index.d.ts.map +1 -0
  148. package/dist/src/storage/local/index.js +18 -0
  149. package/dist/src/storage/local/index.js.map +1 -0
  150. package/dist/src/storage/local/localHelpers.d.ts +25 -0
  151. package/dist/src/storage/local/localHelpers.d.ts.map +1 -0
  152. package/dist/src/storage/local/localHelpers.js +69 -0
  153. package/dist/src/storage/local/localHelpers.js.map +1 -0
  154. package/dist/src/{repository.d.ts → storage/local/repository.d.ts} +8 -4
  155. package/dist/src/storage/local/repository.d.ts.map +1 -0
  156. package/dist/src/{repository.js → storage/local/repository.js} +31 -29
  157. package/dist/src/storage/local/repository.js.map +1 -0
  158. package/dist/src/tasks.d.ts +16 -10
  159. package/dist/src/tasks.d.ts.map +1 -1
  160. package/dist/src/tasks.js +35 -41
  161. package/dist/src/tasks.js.map +1 -1
  162. package/dist/src/test-helpers.d.ts +5 -4
  163. package/dist/src/test-helpers.d.ts.map +1 -1
  164. package/dist/src/test-helpers.js +9 -21
  165. package/dist/src/test-helpers.js.map +1 -1
  166. package/dist/src/transfer/InMemoryTransferBackend.d.ts +75 -0
  167. package/dist/src/transfer/InMemoryTransferBackend.d.ts.map +1 -0
  168. package/dist/src/transfer/InMemoryTransferBackend.js +211 -0
  169. package/dist/src/transfer/InMemoryTransferBackend.js.map +1 -0
  170. package/dist/src/transfer/index.d.ts +9 -0
  171. package/dist/src/transfer/index.d.ts.map +1 -0
  172. package/dist/src/transfer/index.js +11 -0
  173. package/dist/src/transfer/index.js.map +1 -0
  174. package/dist/src/transfer/interfaces.d.ts +103 -0
  175. package/dist/src/transfer/interfaces.d.ts.map +1 -0
  176. package/dist/src/transfer/interfaces.js +6 -0
  177. package/dist/src/transfer/interfaces.js.map +1 -0
  178. package/dist/src/transfer/process.d.ts +55 -0
  179. package/dist/src/transfer/process.d.ts.map +1 -0
  180. package/dist/src/transfer/process.js +144 -0
  181. package/dist/src/transfer/process.js.map +1 -0
  182. package/dist/src/transfer/types.d.ts +106 -0
  183. package/dist/src/transfer/types.d.ts.map +1 -0
  184. package/dist/src/transfer/types.js +61 -0
  185. package/dist/src/transfer/types.js.map +1 -0
  186. package/dist/src/trees.d.ts +147 -59
  187. package/dist/src/trees.d.ts.map +1 -1
  188. package/dist/src/trees.js +372 -419
  189. package/dist/src/trees.js.map +1 -1
  190. package/dist/src/uuid.d.ts +26 -0
  191. package/dist/src/uuid.d.ts.map +1 -0
  192. package/dist/src/uuid.js +80 -0
  193. package/dist/src/uuid.js.map +1 -0
  194. package/dist/src/workspaceStatus.d.ts +6 -4
  195. package/dist/src/workspaceStatus.d.ts.map +1 -1
  196. package/dist/src/workspaceStatus.js +46 -60
  197. package/dist/src/workspaceStatus.js.map +1 -1
  198. package/dist/src/workspaces.d.ts +46 -47
  199. package/dist/src/workspaces.d.ts.map +1 -1
  200. package/dist/src/workspaces.js +281 -221
  201. package/dist/src/workspaces.js.map +1 -1
  202. package/package.json +4 -4
  203. package/dist/src/gc.d.ts +0 -54
  204. package/dist/src/gc.d.ts.map +0 -1
  205. package/dist/src/gc.js +0 -233
  206. package/dist/src/gc.js.map +0 -1
  207. package/dist/src/repository.d.ts.map +0 -1
  208. package/dist/src/repository.js.map +0 -1
  209. package/dist/src/workspaceLock.d.ts +0 -67
  210. package/dist/src/workspaceLock.d.ts.map +0 -1
  211. package/dist/src/workspaceLock.js +0 -217
  212. package/dist/src/workspaceLock.js.map +0 -1
@@ -5,128 +5,101 @@
5
5
  /**
6
6
  * Dataflow execution for e3 workspaces.
7
7
  *
8
- * Executes tasks in a workspace based on their dependency graph. Tasks are
9
- * executed in parallel where possible, respecting a concurrency limit.
8
+ * Provides the high-level `dataflowExecute` entry point (which delegates
9
+ * to `LocalOrchestrator`) and shared graph-building utilities used by
10
+ * both local and cloud execution paths.
10
11
  *
11
- * The execution model is event-driven with a work queue:
12
- * 1. Build dependency graph from tasks (input paths -> task -> output path)
13
- * 2. Compute reverse dependencies (which tasks depend on each output)
14
- * 3. Initialize ready queue with tasks whose inputs are all assigned
15
- * 4. Execute tasks from ready queue, respecting concurrency limit
16
- * 5. On task completion, queue workspace update then check dependents for readiness
17
- * 6. On failure, stop launching new tasks but wait for running ones
18
- *
19
- * IMPORTANT: Workspace state updates are serialized through an async queue to
20
- * prevent race conditions when multiple tasks complete concurrently. Each task's
21
- * output is written to the workspace and dependents are notified only after the
22
- * write completes, ensuring downstream tasks see consistent state.
12
+ * The reactive execution logic (input change detection, task invalidation,
13
+ * version vector consistency) lives in `dataflow/steps.ts` and is orchestrated
14
+ * by `dataflow/orchestrator/LocalOrchestrator.ts`.
23
15
  */
24
- import { decodeBeast2For } from '@elaraai/east';
16
+ import { decodeBeast2For, variant } from '@elaraai/east';
25
17
  import { PackageObjectType, TaskObjectType, WorkspaceStateType, pathToString, } from '@elaraai/e3-types';
26
- import { objectRead } from './objects.js';
27
- import { taskExecute, executionGetOutput, inputsHash, } from './executions.js';
28
- import { workspaceGetDatasetHash, workspaceSetDatasetByHash, } from './trees.js';
29
- import { E3Error, WorkspaceNotFoundError, WorkspaceNotDeployedError, TaskNotFoundError, DataflowError, DataflowAbortedError, isNotFoundError, } from './errors.js';
30
- import { acquireWorkspaceLock, } from './workspaceLock.js';
31
- import * as fs from 'fs/promises';
32
- import * as path from 'path';
18
+ import { executionGetOutput, inputsHash, } from './executions.js';
19
+ import { workspaceGetDatasetHash, } from './trees.js';
20
+ import { E3Error, WorkspaceNotFoundError, WorkspaceNotDeployedError, DataflowError, } from './errors.js';
33
21
  // =============================================================================
34
- // Async Mutex for Workspace Updates
22
+ // Path Parsing Helper
35
23
  // =============================================================================
36
24
  /**
37
- * Simple async mutex to serialize workspace state updates.
25
+ * Parse a keypath string (from pathToString) back to TreePath.
26
+ *
27
+ * The keypath format is: .field1.field2 (dot-separated field names)
28
+ * Quoted identifiers use backticks: .field1.`complex/name`
38
29
  *
39
- * When multiple tasks complete concurrently, their workspace writes must be
40
- * serialized to prevent race conditions (read-modify-write on the workspace
41
- * root hash). This mutex ensures only one update runs at a time.
30
+ * @param pathStr - The path string in keypath format
31
+ * @returns TreePath array of path segments
42
32
  */
43
- class AsyncMutex {
44
- queue = [];
45
- locked = false;
46
- /**
47
- * Acquire the mutex, execute the callback, then release.
48
- * If the mutex is already held, waits until it's available.
49
- */
50
- async runExclusive(fn) {
51
- await this.acquire();
52
- try {
53
- return await fn();
54
- }
55
- finally {
56
- this.release();
57
- }
33
+ export function parsePathString(pathStr) {
34
+ if (!pathStr.startsWith('.')) {
35
+ throw new Error(`Invalid path string: expected '.' prefix, got '${pathStr}'`);
58
36
  }
59
- acquire() {
60
- return new Promise((resolve) => {
61
- if (!this.locked) {
62
- this.locked = true;
63
- resolve();
37
+ const segments = [];
38
+ let i = 1; // Skip the leading '.'
39
+ while (i < pathStr.length) {
40
+ let fieldName;
41
+ if (pathStr[i] === '`') {
42
+ // Quoted identifier: find closing backtick
43
+ const endQuote = pathStr.indexOf('`', i + 1);
44
+ if (endQuote === -1) {
45
+ throw new Error(`Invalid path string: unclosed backtick at position ${i}`);
64
46
  }
65
- else {
66
- this.queue.push(resolve);
67
- }
68
- });
69
- }
70
- release() {
71
- const next = this.queue.shift();
72
- if (next) {
73
- next();
47
+ fieldName = pathStr.slice(i + 1, endQuote);
48
+ i = endQuote + 1;
74
49
  }
75
50
  else {
76
- this.locked = false;
51
+ // Unquoted identifier: read until '.' or end
52
+ let end = pathStr.indexOf('.', i);
53
+ if (end === -1)
54
+ end = pathStr.length;
55
+ fieldName = pathStr.slice(i, end);
56
+ i = end;
57
+ }
58
+ if (fieldName) {
59
+ segments.push(variant('field', fieldName));
60
+ }
61
+ // Skip the '.' separator
62
+ if (i < pathStr.length && pathStr[i] === '.') {
63
+ i++;
77
64
  }
78
65
  }
66
+ return segments;
79
67
  }
80
68
  // =============================================================================
81
69
  // Workspace State Reader
82
70
  // =============================================================================
83
71
  /**
84
- * Read workspace state from file.
72
+ * Read workspace state.
85
73
  * @throws {WorkspaceNotFoundError} If workspace doesn't exist
86
74
  * @throws {WorkspaceNotDeployedError} If workspace has no package deployed
87
75
  */
88
- async function readWorkspaceState(repoPath, ws) {
89
- const stateFile = path.join(repoPath, 'workspaces', `${ws}.beast2`);
90
- let data;
91
- try {
92
- data = await fs.readFile(stateFile);
93
- }
94
- catch (err) {
95
- if (isNotFoundError(err)) {
96
- throw new WorkspaceNotFoundError(ws);
97
- }
98
- throw err;
76
+ async function readWorkspaceState(storage, repo, ws) {
77
+ const data = await storage.refs.workspaceRead(repo, ws);
78
+ if (data === null) {
79
+ throw new WorkspaceNotFoundError(ws);
99
80
  }
100
81
  if (data.length === 0) {
101
82
  throw new WorkspaceNotDeployedError(ws);
102
83
  }
103
84
  const decoder = decodeBeast2For(WorkspaceStateType);
104
- return decoder(data);
85
+ return decoder(Buffer.from(data));
105
86
  }
106
87
  // =============================================================================
107
88
  // Dependency Graph Building
108
89
  // =============================================================================
109
90
  /**
110
91
  * Build the dependency graph for a workspace.
111
- *
112
- * Returns:
113
- * - taskNodes: Map of task name -> TaskNode
114
- * - outputToTask: Map of output path string -> task name
115
- * - taskDependents: Map of task name -> set of dependent task names
116
92
  */
117
- async function buildDependencyGraph(repoPath, ws) {
118
- // Read workspace state to get package hash
119
- const state = await readWorkspaceState(repoPath, ws);
120
- // Read package object to get tasks map
121
- const pkgData = await objectRead(repoPath, state.packageHash);
93
+ async function buildDependencyGraph(storage, repo, ws) {
94
+ const state = await readWorkspaceState(storage, repo, ws);
95
+ const pkgData = await storage.objects.read(repo, state.packageHash);
122
96
  const pkgDecoder = decodeBeast2For(PackageObjectType);
123
97
  const pkgObject = pkgDecoder(Buffer.from(pkgData));
124
98
  const taskNodes = new Map();
125
- const outputToTask = new Map(); // output path -> task name
126
- // First pass: load all tasks and build output->task map
99
+ const outputToTask = new Map();
127
100
  const taskDecoder = decodeBeast2For(TaskObjectType);
128
101
  for (const [taskName, taskHash] of pkgObject.tasks) {
129
- const taskData = await objectRead(repoPath, taskHash);
102
+ const taskData = await storage.objects.read(repo, taskHash);
130
103
  const task = taskDecoder(Buffer.from(taskData));
131
104
  const outputPathStr = pathToString(task.output);
132
105
  outputToTask.set(outputPathStr, taskName);
@@ -136,32 +109,24 @@ async function buildDependencyGraph(repoPath, ws) {
136
109
  task,
137
110
  inputPaths: task.inputs,
138
111
  outputPath: task.output,
139
- unresolvedCount: 0, // Will be computed below
112
+ unresolvedCount: 0,
140
113
  });
141
114
  }
142
- // Build reverse dependency map: task -> tasks that depend on it
143
115
  const taskDependents = new Map();
144
116
  for (const taskName of taskNodes.keys()) {
145
117
  taskDependents.set(taskName, new Set());
146
118
  }
147
- // Second pass: compute dependencies and unresolved counts
148
119
  for (const [taskName, node] of taskNodes) {
149
120
  for (const inputPath of node.inputPaths) {
150
121
  const inputPathStr = pathToString(inputPath);
151
122
  const producerTask = outputToTask.get(inputPathStr);
152
123
  if (producerTask) {
153
- // This input comes from another task's output.
154
- // The task cannot run until the producer task completes,
155
- // regardless of whether the output is currently assigned
156
- // (it might be stale from a previous run).
157
124
  taskDependents.get(producerTask).add(taskName);
158
125
  node.unresolvedCount++;
159
126
  }
160
- // If not produced by a task, it's an external input - check if assigned
161
127
  else {
162
- const { refType } = await workspaceGetDatasetHash(repoPath, ws, inputPath);
128
+ const { refType } = await workspaceGetDatasetHash(storage, repo, ws, inputPath);
163
129
  if (refType === 'unassigned') {
164
- // External input that is unassigned - this task can never run
165
130
  node.unresolvedCount++;
166
131
  }
167
132
  }
@@ -175,362 +140,109 @@ async function buildDependencyGraph(repoPath, ws) {
175
140
  /**
176
141
  * Execute all tasks in a workspace according to the dependency graph.
177
142
  *
178
- * Tasks are executed in parallel where dependencies allow, respecting
179
- * the concurrency limit. On failure, no new tasks are launched but
180
- * running tasks are allowed to complete.
143
+ * Delegates to `LocalOrchestrator` which implements reactive fixpoint
144
+ * execution using step functions. After each task completes, input changes
145
+ * are detected and affected tasks are invalidated and re-executed.
181
146
  *
182
- * Acquires an exclusive lock on the workspace for the duration of execution
183
- * to prevent concurrent modifications. If options.lock is provided, uses that
184
- * lock instead (caller is responsible for releasing it).
185
- *
186
- * @param repoPath - Path to .e3 repository
147
+ * @param storage - Storage backend
148
+ * @param repo - Repository identifier
187
149
  * @param ws - Workspace name
188
150
  * @param options - Execution options
189
151
  * @returns Result of the dataflow execution
152
+ *
190
153
  * @throws {WorkspaceLockError} If workspace is locked by another process
191
154
  * @throws {WorkspaceNotFoundError} If workspace doesn't exist
192
155
  * @throws {WorkspaceNotDeployedError} If workspace has no package deployed
193
156
  * @throws {TaskNotFoundError} If filter specifies a task that doesn't exist
194
157
  * @throws {DataflowError} If execution fails for other reasons
195
158
  */
196
- export async function dataflowExecute(repoPath, ws, options = {}) {
197
- // Acquire lock if not provided externally
198
- const externalLock = options.lock;
199
- const lock = externalLock ?? await acquireWorkspaceLock(repoPath, ws);
200
- try {
201
- return await dataflowExecuteWithLock(repoPath, ws, options);
202
- }
203
- finally {
204
- // Only release the lock if we acquired it internally
205
- if (!externalLock) {
206
- await lock.release();
207
- }
208
- }
159
+ export async function dataflowExecute(storage, repo, ws, options = {}) {
160
+ const { LocalOrchestrator } = await import('./dataflow/orchestrator/LocalOrchestrator.js');
161
+ const orchestrator = new LocalOrchestrator();
162
+ const taskResults = [];
163
+ const handle = await orchestrator.start(storage, repo, ws, {
164
+ concurrency: options.concurrency,
165
+ force: options.force,
166
+ filter: options.filter,
167
+ signal: options.signal,
168
+ lock: options.lock,
169
+ runner: options.runner,
170
+ onTaskStart: options.onTaskStart,
171
+ onTaskComplete: (result) => {
172
+ taskResults.push({
173
+ name: result.name,
174
+ cached: result.cached,
175
+ state: result.state,
176
+ error: result.error,
177
+ exitCode: result.exitCode,
178
+ duration: result.duration,
179
+ });
180
+ options.onTaskComplete?.({
181
+ name: result.name,
182
+ cached: result.cached,
183
+ state: result.state,
184
+ error: result.error,
185
+ exitCode: result.exitCode,
186
+ duration: result.duration,
187
+ });
188
+ },
189
+ onStdout: options.onStdout,
190
+ onStderr: options.onStderr,
191
+ onInputChanged: options.onInputChanged,
192
+ onTaskInvalidated: options.onTaskInvalidated,
193
+ onTaskDeferred: options.onTaskDeferred,
194
+ });
195
+ const result = await orchestrator.wait(handle);
196
+ return {
197
+ success: result.success,
198
+ runId: result.runId,
199
+ executed: result.executed,
200
+ cached: result.cached,
201
+ failed: result.failed,
202
+ skipped: result.skipped,
203
+ reexecuted: result.reexecuted,
204
+ tasks: taskResults,
205
+ duration: result.duration,
206
+ };
209
207
  }
210
208
  /**
211
- * Start dataflow execution in the background (non-blocking).
212
- *
213
- * Returns a promise immediately without awaiting execution. The lock is
214
- * released automatically when execution completes.
209
+ * Execute dataflow with an externally-held lock.
210
+ * The lock is released automatically when execution completes or fails.
215
211
  *
216
- * @param repoPath - Path to .e3 repository
212
+ * @param storage - Storage backend
213
+ * @param repo - Repository identifier
217
214
  * @param ws - Workspace name
218
215
  * @param options - Execution options (lock must be provided)
219
216
  * @returns Promise that resolves when execution completes
220
- * @throws {WorkspaceNotFoundError} If workspace doesn't exist
221
- * @throws {WorkspaceNotDeployedError} If workspace has no package deployed
222
- * @throws {TaskNotFoundError} If filter specifies a task that doesn't exist
223
- * @throws {DataflowError} If execution fails for other reasons
224
217
  */
225
- export function dataflowStart(repoPath, ws, options) {
226
- return dataflowExecuteWithLock(repoPath, ws, options)
227
- .finally(() => options.lock.release());
228
- }
229
- /**
230
- * Internal: Execute dataflow with lock already held.
231
- */
232
- async function dataflowExecuteWithLock(repoPath, ws, options) {
233
- const startTime = Date.now();
234
- const concurrency = options.concurrency ?? 4;
235
- let taskNodes;
236
- let taskDependents;
218
+ export async function dataflowStart(storage, repo, ws, options) {
237
219
  try {
238
- // Build dependency graph
239
- const graph = await buildDependencyGraph(repoPath, ws);
240
- taskNodes = graph.taskNodes;
241
- taskDependents = graph.taskDependents;
242
- }
243
- catch (err) {
244
- // Re-throw E3Errors as-is
245
- if (err instanceof E3Error)
246
- throw err;
247
- // Wrap unexpected errors
248
- throw new DataflowError(`Failed to build dependency graph: ${err instanceof Error ? err.message : err}`);
249
- }
250
- // Apply filter if specified
251
- const filteredTaskNames = options.filter
252
- ? new Set([options.filter])
253
- : null;
254
- // Validate filter
255
- if (filteredTaskNames && options.filter && !taskNodes.has(options.filter)) {
256
- throw new TaskNotFoundError(options.filter);
257
- }
258
- // Track execution state
259
- const results = [];
260
- let executed = 0;
261
- let cached = 0;
262
- let failed = 0;
263
- let skipped = 0;
264
- let hasFailure = false;
265
- let aborted = false;
266
- // Check for abort signal
267
- const checkAborted = () => {
268
- if (options.signal?.aborted && !aborted) {
269
- aborted = true;
270
- }
271
- return aborted;
272
- };
273
- // Mutex to serialize workspace state updates.
274
- // When multiple tasks complete concurrently, their writes to the workspace
275
- // must be serialized to prevent lost updates (read-modify-write race).
276
- const workspaceUpdateMutex = new AsyncMutex();
277
- // Ready queue: tasks with all dependencies resolved
278
- const readyQueue = [];
279
- const completed = new Set();
280
- const inProgress = new Set();
281
- // Initialize ready queue with tasks that have no unresolved dependencies
282
- // and pass the filter (if any)
283
- for (const [taskName, node] of taskNodes) {
284
- if (node.unresolvedCount === 0) {
285
- if (!filteredTaskNames || filteredTaskNames.has(taskName)) {
286
- readyQueue.push(taskName);
287
- }
288
- }
289
- }
290
- // Check if the task has a valid cached execution for current inputs
291
- // Returns the output hash if cached, null if re-execution is needed
292
- async function getCachedOutput(taskName) {
293
- const node = taskNodes.get(taskName);
294
- // Gather current input hashes
295
- const currentInputHashes = [];
296
- for (const inputPath of node.inputPaths) {
297
- const { refType, hash } = await workspaceGetDatasetHash(repoPath, ws, inputPath);
298
- if (refType !== 'value' || hash === null) {
299
- // Input not assigned, can't be cached
300
- return null;
301
- }
302
- currentInputHashes.push(hash);
303
- }
304
- // Check if there's a cached execution for these inputs
305
- const inHash = inputsHash(currentInputHashes);
306
- const cachedOutputHash = await executionGetOutput(repoPath, node.hash, inHash);
307
- if (cachedOutputHash === null) {
308
- // No cached execution for current inputs
309
- return null;
310
- }
311
- // Also verify the workspace output matches the cached output
312
- // (in case the workspace was modified outside of execution)
313
- const { refType, hash: wsOutputHash } = await workspaceGetDatasetHash(repoPath, ws, node.outputPath);
314
- if (refType !== 'value' || wsOutputHash !== cachedOutputHash) {
315
- // Workspace output doesn't match cached output, need to re-execute
316
- // (or update workspace with cached value)
317
- return null;
318
- }
319
- return cachedOutputHash;
320
- }
321
- // Execute a single task (does NOT write to workspace - caller must do that)
322
- async function executeTask(taskName) {
323
- const node = taskNodes.get(taskName);
324
- const taskStartTime = Date.now();
325
- options.onTaskStart?.(taskName);
326
- // Gather input hashes
327
- const inputHashes = [];
328
- for (const inputPath of node.inputPaths) {
329
- const { refType, hash } = await workspaceGetDatasetHash(repoPath, ws, inputPath);
330
- if (refType !== 'value' || hash === null) {
331
- // Input not available - should not happen if dependency tracking is correct
332
- return {
333
- name: taskName,
334
- cached: false,
335
- state: 'error',
336
- error: `Input at ${pathToString(inputPath)} is not assigned (refType: ${refType})`,
337
- duration: Date.now() - taskStartTime,
338
- };
339
- }
340
- inputHashes.push(hash);
341
- }
342
- // Execute the task
343
- const execOptions = {
344
- force: options.force,
345
- signal: options.signal,
346
- onStdout: options.onStdout ? (data) => options.onStdout(taskName, data) : undefined,
347
- onStderr: options.onStderr ? (data) => options.onStderr(taskName, data) : undefined,
348
- };
349
- const result = await taskExecute(repoPath, node.hash, inputHashes, execOptions);
350
- // Build task result (NOTE: workspace update happens later, in mutex-protected section)
351
- const taskResult = {
352
- name: taskName,
353
- cached: result.cached,
354
- state: result.state,
355
- duration: Date.now() - taskStartTime,
356
- };
357
- if (result.state === 'error') {
358
- taskResult.error = result.error ?? undefined;
359
- }
360
- else if (result.state === 'failed') {
361
- taskResult.exitCode = result.exitCode ?? undefined;
362
- taskResult.error = result.error ?? undefined;
363
- }
364
- // Pass output hash to caller for workspace update (if successful)
365
- if (result.state === 'success' && result.outputHash) {
366
- taskResult.outputHash = result.outputHash;
367
- }
368
- return taskResult;
369
- }
370
- // Process dependents when a task completes
371
- function notifyDependents(taskName) {
372
- const dependents = taskDependents.get(taskName) ?? new Set();
373
- for (const depName of dependents) {
374
- if (completed.has(depName) || inProgress.has(depName))
375
- continue;
376
- // Skip dependents not in the filter
377
- if (filteredTaskNames && !filteredTaskNames.has(depName))
378
- continue;
379
- const depNode = taskNodes.get(depName);
380
- depNode.unresolvedCount--;
381
- if (depNode.unresolvedCount === 0 && !readyQueue.includes(depName)) {
382
- readyQueue.push(depName);
383
- }
384
- }
220
+ return await dataflowExecute(storage, repo, ws, options);
385
221
  }
386
- // Mark dependents as skipped when a task fails
387
- function skipDependents(taskName) {
388
- const dependents = taskDependents.get(taskName) ?? new Set();
389
- for (const depName of dependents) {
390
- if (completed.has(depName) || inProgress.has(depName))
391
- continue;
392
- // Skip dependents not in the filter
393
- if (filteredTaskNames && !filteredTaskNames.has(depName))
394
- continue;
395
- // Recursively skip
396
- completed.add(depName);
397
- skipped++;
398
- results.push({
399
- name: depName,
400
- cached: false,
401
- state: 'skipped',
402
- duration: 0,
403
- });
404
- options.onTaskComplete?.({
405
- name: depName,
406
- cached: false,
407
- state: 'skipped',
408
- duration: 0,
409
- });
410
- skipDependents(depName);
411
- }
412
- }
413
- // Main execution loop using a work-stealing approach
414
- const runningPromises = new Map();
415
- async function processQueue() {
416
- while (true) {
417
- // Check if we're done
418
- if (readyQueue.length === 0 && runningPromises.size === 0) {
419
- break;
420
- }
421
- // Launch tasks up to concurrency limit if no failure and not aborted
422
- while (!hasFailure && !checkAborted() && readyQueue.length > 0 && runningPromises.size < concurrency) {
423
- const taskName = readyQueue.shift();
424
- if (completed.has(taskName) || inProgress.has(taskName))
425
- continue;
426
- // Check if there's a valid cached execution for current inputs
427
- const cachedOutputHash = await getCachedOutput(taskName);
428
- if (cachedOutputHash !== null && !options.force) {
429
- // Valid cached execution exists for current inputs.
430
- // No workspace write needed (output already matches), but we still
431
- // need mutex protection for state updates to prevent races with
432
- // concurrent task completions.
433
- await workspaceUpdateMutex.runExclusive(() => {
434
- completed.add(taskName);
435
- cached++;
436
- const result = {
437
- name: taskName,
438
- cached: true,
439
- state: 'success',
440
- duration: 0,
441
- };
442
- results.push(result);
443
- options.onTaskComplete?.(result);
444
- notifyDependents(taskName);
445
- });
446
- continue;
447
- }
448
- inProgress.add(taskName);
449
- const promise = (async () => {
450
- try {
451
- const result = await executeTask(taskName);
452
- // Use mutex to serialize workspace updates and dependent notifications.
453
- // This prevents race conditions where two tasks complete simultaneously,
454
- // both read the same workspace state, and one overwrites the other's changes.
455
- await workspaceUpdateMutex.runExclusive(async () => {
456
- // Write output to workspace BEFORE notifying dependents
457
- if (result.state === 'success' && result.outputHash) {
458
- const node = taskNodes.get(taskName);
459
- await workspaceSetDatasetByHash(repoPath, ws, node.outputPath, result.outputHash);
460
- }
461
- // Now safe to update execution state and notify dependents
462
- inProgress.delete(taskName);
463
- completed.add(taskName);
464
- results.push(result);
465
- options.onTaskComplete?.(result);
466
- if (result.state === 'success') {
467
- if (result.cached) {
468
- cached++;
469
- }
470
- else {
471
- executed++;
472
- }
473
- notifyDependents(taskName);
474
- }
475
- else {
476
- failed++;
477
- hasFailure = true;
478
- skipDependents(taskName);
479
- }
480
- });
481
- }
482
- finally {
483
- runningPromises.delete(taskName);
484
- }
485
- })();
486
- runningPromises.set(taskName, promise);
487
- }
488
- // Wait for at least one task to complete if we can't launch more
489
- if (runningPromises.size > 0) {
490
- await Promise.race(runningPromises.values());
491
- }
492
- else if (readyQueue.length === 0) {
493
- // No running tasks and no ready tasks - we might have unresolvable dependencies
494
- break;
495
- }
496
- }
497
- }
498
- await processQueue();
499
- // Wait for any remaining tasks
500
- if (runningPromises.size > 0) {
501
- await Promise.all(runningPromises.values());
502
- }
503
- // Check for abort one final time
504
- checkAborted();
505
- // If aborted, throw with partial results
506
- if (aborted) {
507
- throw new DataflowAbortedError(results);
222
+ finally {
223
+ await options.lock.release();
508
224
  }
509
- return {
510
- success: !hasFailure,
511
- executed,
512
- cached,
513
- failed,
514
- skipped,
515
- tasks: results,
516
- duration: Date.now() - startTime,
517
- };
518
225
  }
226
+ // =============================================================================
227
+ // Graph Queries (shared between local and cloud execution)
228
+ // =============================================================================
519
229
  /**
520
230
  * Get the dependency graph for a workspace (for visualization/debugging).
521
231
  *
522
- * @param repoPath - Path to .e3 repository
232
+ * @param storage - Storage backend
233
+ * @param repo - Repository identifier
523
234
  * @param ws - Workspace name
524
235
  * @returns Graph information
236
+ *
525
237
  * @throws {WorkspaceNotFoundError} If workspace doesn't exist
526
238
  * @throws {WorkspaceNotDeployedError} If workspace has no package deployed
527
239
  * @throws {DataflowError} If graph building fails for other reasons
528
240
  */
529
- export async function dataflowGetGraph(repoPath, ws) {
241
+ export async function dataflowGetGraph(storage, repo, ws) {
530
242
  let taskNodes;
531
243
  let outputToTask;
532
244
  try {
533
- const graph = await buildDependencyGraph(repoPath, ws);
245
+ const graph = await buildDependencyGraph(storage, repo, ws);
534
246
  taskNodes = graph.taskNodes;
535
247
  outputToTask = graph.outputToTask;
536
248
  }
@@ -559,4 +271,145 @@ export async function dataflowGetGraph(repoPath, ws) {
559
271
  }
560
272
  return { tasks };
561
273
  }
274
+ /**
275
+ * Find all tasks affected by input changes (transitive dependents).
276
+ * An affected task is one whose output could change due to the input change.
277
+ *
278
+ * @param graph - The dependency graph
279
+ * @param changes - Array of changed input paths
280
+ * @returns Array of affected task names
281
+ */
282
+ export function findAffectedTasks(graph, changes) {
283
+ const changedPaths = new Set(changes.map(c => c.path));
284
+ const affected = new Set();
285
+ const queue = [];
286
+ // Build forward dep map: task name → tasks that depend on its output
287
+ const taskToDependents = new Map();
288
+ for (const task of graph.tasks) {
289
+ for (const dep of task.dependsOn) {
290
+ if (!taskToDependents.has(dep))
291
+ taskToDependents.set(dep, []);
292
+ taskToDependents.get(dep).push(task.name);
293
+ }
294
+ }
295
+ // Seed: tasks that directly read a changed input
296
+ for (const task of graph.tasks) {
297
+ if (task.inputs.some(inp => changedPaths.has(inp))) {
298
+ queue.push(task.name);
299
+ }
300
+ }
301
+ // BFS through dependency graph
302
+ while (queue.length > 0) {
303
+ const name = queue.shift();
304
+ if (affected.has(name))
305
+ continue;
306
+ affected.add(name);
307
+ for (const dep of taskToDependents.get(name) ?? []) {
308
+ queue.push(dep);
309
+ }
310
+ }
311
+ return Array.from(affected);
312
+ }
313
+ /**
314
+ * Get tasks that are ready to execute given the set of completed tasks.
315
+ *
316
+ * A task is ready when all tasks it depends on have completed.
317
+ *
318
+ * @param graph - The dependency graph from dataflowGetGraph
319
+ * @param completedTasks - Set of task names that have completed
320
+ * @returns Array of task names that are ready to execute
321
+ */
322
+ export function dataflowGetReadyTasks(graph, completedTasks) {
323
+ const ready = [];
324
+ for (const task of graph.tasks) {
325
+ if (completedTasks.has(task.name)) {
326
+ continue;
327
+ }
328
+ const allDepsCompleted = task.dependsOn.every(dep => completedTasks.has(dep));
329
+ if (allDepsCompleted) {
330
+ ready.push(task.name);
331
+ }
332
+ }
333
+ return ready;
334
+ }
335
+ /**
336
+ * Check if a task execution is cached for the given inputs.
337
+ *
338
+ * @param storage - Storage backend
339
+ * @param repo - Repository path
340
+ * @param taskHash - Hash of the TaskObject
341
+ * @param inputHashes - Array of input dataset hashes (in order)
342
+ * @returns Output hash if cached, null if execution needed
343
+ */
344
+ export async function dataflowCheckCache(storage, repo, taskHash, inputHashes) {
345
+ const inHash = inputsHash(inputHashes);
346
+ return executionGetOutput(storage, repo, taskHash, inHash);
347
+ }
348
+ /**
349
+ * Find tasks that should be skipped when a task fails.
350
+ *
351
+ * Returns all tasks that transitively depend on the failed task,
352
+ * excluding already completed or already skipped tasks.
353
+ *
354
+ * @param graph - The dependency graph from dataflowGetGraph
355
+ * @param failedTask - Name of the task that failed
356
+ * @param completedTasks - Set of task names already completed
357
+ * @param skippedTasks - Set of task names already skipped
358
+ * @returns Array of task names that should be skipped
359
+ */
360
+ export function dataflowGetDependentsToSkip(graph, failedTask, completedTasks, skippedTasks) {
361
+ const dependents = new Map();
362
+ for (const task of graph.tasks) {
363
+ dependents.set(task.name, []);
364
+ }
365
+ for (const task of graph.tasks) {
366
+ for (const dep of task.dependsOn) {
367
+ dependents.get(dep)?.push(task.name);
368
+ }
369
+ }
370
+ const toSkip = [];
371
+ const visited = new Set();
372
+ const queue = [failedTask];
373
+ while (queue.length > 0) {
374
+ const current = queue.shift();
375
+ const deps = dependents.get(current) ?? [];
376
+ for (const dep of deps) {
377
+ if (visited.has(dep))
378
+ continue;
379
+ visited.add(dep);
380
+ if (completedTasks.has(dep))
381
+ continue;
382
+ if (skippedTasks.has(dep)) {
383
+ queue.push(dep);
384
+ continue;
385
+ }
386
+ toSkip.push(dep);
387
+ queue.push(dep);
388
+ }
389
+ }
390
+ return toSkip;
391
+ }
392
+ /**
393
+ * Resolve input hashes for a task from current workspace state.
394
+ *
395
+ * @param storage - Storage backend
396
+ * @param repo - Repository path
397
+ * @param ws - Workspace name
398
+ * @param task - Task info from the graph
399
+ * @returns Array of hashes (null if input is unassigned)
400
+ */
401
+ export async function dataflowResolveInputHashes(storage, repo, ws, task) {
402
+ const hashes = [];
403
+ for (const inputPathStr of task.inputs) {
404
+ const inputPath = parsePathString(inputPathStr);
405
+ const { refType, hash } = await workspaceGetDatasetHash(storage, repo, ws, inputPath);
406
+ if (refType === 'value' && hash !== null) {
407
+ hashes.push(hash);
408
+ }
409
+ else {
410
+ hashes.push(null);
411
+ }
412
+ }
413
+ return hashes;
414
+ }
562
415
  //# sourceMappingURL=dataflow.js.map