@elaraai/e3-core 0.0.2-beta.5 → 0.0.2-beta.50

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. package/README.md +25 -22
  2. package/dist/src/dataflow/api-compat.d.ts +90 -0
  3. package/dist/src/dataflow/api-compat.d.ts.map +1 -0
  4. package/dist/src/dataflow/api-compat.js +139 -0
  5. package/dist/src/dataflow/api-compat.js.map +1 -0
  6. package/dist/src/dataflow/index.d.ts +18 -0
  7. package/dist/src/dataflow/index.d.ts.map +1 -0
  8. package/dist/src/dataflow/index.js +23 -0
  9. package/dist/src/dataflow/index.js.map +1 -0
  10. package/dist/src/dataflow/orchestrator/LocalOrchestrator.d.ts +76 -0
  11. package/dist/src/dataflow/orchestrator/LocalOrchestrator.d.ts.map +1 -0
  12. package/dist/src/dataflow/orchestrator/LocalOrchestrator.js +729 -0
  13. package/dist/src/dataflow/orchestrator/LocalOrchestrator.js.map +1 -0
  14. package/dist/src/dataflow/orchestrator/index.d.ts +12 -0
  15. package/dist/src/dataflow/orchestrator/index.d.ts.map +1 -0
  16. package/dist/src/dataflow/orchestrator/index.js +12 -0
  17. package/dist/src/dataflow/orchestrator/index.js.map +1 -0
  18. package/dist/src/dataflow/orchestrator/interfaces.d.ts +163 -0
  19. package/dist/src/dataflow/orchestrator/interfaces.d.ts.map +1 -0
  20. package/dist/src/dataflow/orchestrator/interfaces.js +52 -0
  21. package/dist/src/dataflow/orchestrator/interfaces.js.map +1 -0
  22. package/dist/src/dataflow/state-store/FileStateStore.d.ts +67 -0
  23. package/dist/src/dataflow/state-store/FileStateStore.d.ts.map +1 -0
  24. package/dist/src/dataflow/state-store/FileStateStore.js +300 -0
  25. package/dist/src/dataflow/state-store/FileStateStore.js.map +1 -0
  26. package/dist/src/dataflow/state-store/InMemoryStateStore.d.ts +42 -0
  27. package/dist/src/dataflow/state-store/InMemoryStateStore.d.ts.map +1 -0
  28. package/dist/src/dataflow/state-store/InMemoryStateStore.js +229 -0
  29. package/dist/src/dataflow/state-store/InMemoryStateStore.js.map +1 -0
  30. package/dist/src/dataflow/state-store/index.d.ts +13 -0
  31. package/dist/src/dataflow/state-store/index.d.ts.map +1 -0
  32. package/dist/src/dataflow/state-store/index.js +13 -0
  33. package/dist/src/dataflow/state-store/index.js.map +1 -0
  34. package/dist/src/dataflow/state-store/interfaces.d.ts +159 -0
  35. package/dist/src/dataflow/state-store/interfaces.d.ts.map +1 -0
  36. package/dist/src/dataflow/state-store/interfaces.js +6 -0
  37. package/dist/src/dataflow/state-store/interfaces.js.map +1 -0
  38. package/dist/src/dataflow/steps.d.ts +222 -0
  39. package/dist/src/dataflow/steps.d.ts.map +1 -0
  40. package/dist/src/dataflow/steps.js +707 -0
  41. package/dist/src/dataflow/steps.js.map +1 -0
  42. package/dist/src/dataflow/types.d.ts +127 -0
  43. package/dist/src/dataflow/types.d.ts.map +1 -0
  44. package/dist/src/dataflow/types.js +7 -0
  45. package/dist/src/dataflow/types.js.map +1 -0
  46. package/dist/src/dataflow.d.ts +113 -38
  47. package/dist/src/dataflow.d.ts.map +1 -1
  48. package/dist/src/dataflow.js +269 -416
  49. package/dist/src/dataflow.js.map +1 -1
  50. package/dist/src/dataset-refs.d.ts +124 -0
  51. package/dist/src/dataset-refs.d.ts.map +1 -0
  52. package/dist/src/dataset-refs.js +319 -0
  53. package/dist/src/dataset-refs.js.map +1 -0
  54. package/dist/src/errors.d.ts +39 -9
  55. package/dist/src/errors.d.ts.map +1 -1
  56. package/dist/src/errors.js +51 -8
  57. package/dist/src/errors.js.map +1 -1
  58. package/dist/src/execution/LocalTaskRunner.d.ts +73 -0
  59. package/dist/src/execution/LocalTaskRunner.d.ts.map +1 -0
  60. package/dist/src/execution/LocalTaskRunner.js +399 -0
  61. package/dist/src/execution/LocalTaskRunner.js.map +1 -0
  62. package/dist/src/execution/MockTaskRunner.d.ts +49 -0
  63. package/dist/src/execution/MockTaskRunner.d.ts.map +1 -0
  64. package/dist/src/execution/MockTaskRunner.js +54 -0
  65. package/dist/src/execution/MockTaskRunner.js.map +1 -0
  66. package/dist/src/execution/index.d.ts +16 -0
  67. package/dist/src/execution/index.d.ts.map +1 -0
  68. package/dist/src/execution/index.js +8 -0
  69. package/dist/src/execution/index.js.map +1 -0
  70. package/dist/src/execution/interfaces.d.ts +246 -0
  71. package/dist/src/execution/interfaces.d.ts.map +1 -0
  72. package/dist/src/execution/interfaces.js +6 -0
  73. package/dist/src/execution/interfaces.js.map +1 -0
  74. package/dist/src/execution/processHelpers.d.ts +20 -0
  75. package/dist/src/execution/processHelpers.d.ts.map +1 -0
  76. package/dist/src/execution/processHelpers.js +62 -0
  77. package/dist/src/execution/processHelpers.js.map +1 -0
  78. package/dist/src/executions.d.ts +71 -104
  79. package/dist/src/executions.d.ts.map +1 -1
  80. package/dist/src/executions.js +110 -476
  81. package/dist/src/executions.js.map +1 -1
  82. package/dist/src/index.d.ts +20 -10
  83. package/dist/src/index.d.ts.map +1 -1
  84. package/dist/src/index.js +48 -18
  85. package/dist/src/index.js.map +1 -1
  86. package/dist/src/objects.d.ts +7 -53
  87. package/dist/src/objects.d.ts.map +1 -1
  88. package/dist/src/objects.js +13 -232
  89. package/dist/src/objects.js.map +1 -1
  90. package/dist/src/packages.d.ts +41 -14
  91. package/dist/src/packages.d.ts.map +1 -1
  92. package/dist/src/packages.js +145 -88
  93. package/dist/src/packages.js.map +1 -1
  94. package/dist/src/storage/in-memory/InMemoryRepoStore.d.ts +35 -0
  95. package/dist/src/storage/in-memory/InMemoryRepoStore.d.ts.map +1 -0
  96. package/dist/src/storage/in-memory/InMemoryRepoStore.js +107 -0
  97. package/dist/src/storage/in-memory/InMemoryRepoStore.js.map +1 -0
  98. package/dist/src/storage/in-memory/InMemoryStorage.d.ts +139 -0
  99. package/dist/src/storage/in-memory/InMemoryStorage.d.ts.map +1 -0
  100. package/dist/src/storage/in-memory/InMemoryStorage.js +439 -0
  101. package/dist/src/storage/in-memory/InMemoryStorage.js.map +1 -0
  102. package/dist/src/storage/in-memory/index.d.ts +12 -0
  103. package/dist/src/storage/in-memory/index.d.ts.map +1 -0
  104. package/dist/src/storage/in-memory/index.js +12 -0
  105. package/dist/src/storage/in-memory/index.js.map +1 -0
  106. package/dist/src/storage/index.d.ts +18 -0
  107. package/dist/src/storage/index.d.ts.map +1 -0
  108. package/dist/src/storage/index.js +10 -0
  109. package/dist/src/storage/index.js.map +1 -0
  110. package/dist/src/storage/interfaces.d.ts +581 -0
  111. package/dist/src/storage/interfaces.d.ts.map +1 -0
  112. package/dist/src/storage/interfaces.js +6 -0
  113. package/dist/src/storage/interfaces.js.map +1 -0
  114. package/dist/src/storage/local/LocalBackend.d.ts +56 -0
  115. package/dist/src/storage/local/LocalBackend.d.ts.map +1 -0
  116. package/dist/src/storage/local/LocalBackend.js +145 -0
  117. package/dist/src/storage/local/LocalBackend.js.map +1 -0
  118. package/dist/src/storage/local/LocalDatasetRefStore.d.ts +22 -0
  119. package/dist/src/storage/local/LocalDatasetRefStore.d.ts.map +1 -0
  120. package/dist/src/storage/local/LocalDatasetRefStore.js +118 -0
  121. package/dist/src/storage/local/LocalDatasetRefStore.js.map +1 -0
  122. package/dist/src/storage/local/LocalLockService.d.ts +111 -0
  123. package/dist/src/storage/local/LocalLockService.d.ts.map +1 -0
  124. package/dist/src/storage/local/LocalLockService.js +364 -0
  125. package/dist/src/storage/local/LocalLockService.js.map +1 -0
  126. package/dist/src/storage/local/LocalLogStore.d.ts +23 -0
  127. package/dist/src/storage/local/LocalLogStore.d.ts.map +1 -0
  128. package/dist/src/storage/local/LocalLogStore.js +66 -0
  129. package/dist/src/storage/local/LocalLogStore.js.map +1 -0
  130. package/dist/src/storage/local/LocalObjectStore.d.ts +55 -0
  131. package/dist/src/storage/local/LocalObjectStore.d.ts.map +1 -0
  132. package/dist/src/storage/local/LocalObjectStore.js +300 -0
  133. package/dist/src/storage/local/LocalObjectStore.js.map +1 -0
  134. package/dist/src/storage/local/LocalRefStore.d.ts +50 -0
  135. package/dist/src/storage/local/LocalRefStore.d.ts.map +1 -0
  136. package/dist/src/storage/local/LocalRefStore.js +337 -0
  137. package/dist/src/storage/local/LocalRefStore.js.map +1 -0
  138. package/dist/src/storage/local/LocalRepoStore.d.ts +55 -0
  139. package/dist/src/storage/local/LocalRepoStore.d.ts.map +1 -0
  140. package/dist/src/storage/local/LocalRepoStore.js +365 -0
  141. package/dist/src/storage/local/LocalRepoStore.js.map +1 -0
  142. package/dist/src/storage/local/gc.d.ts +92 -0
  143. package/dist/src/storage/local/gc.d.ts.map +1 -0
  144. package/dist/src/storage/local/gc.js +377 -0
  145. package/dist/src/storage/local/gc.js.map +1 -0
  146. package/dist/src/storage/local/index.d.ts +18 -0
  147. package/dist/src/storage/local/index.d.ts.map +1 -0
  148. package/dist/src/storage/local/index.js +18 -0
  149. package/dist/src/storage/local/index.js.map +1 -0
  150. package/dist/src/storage/local/localHelpers.d.ts +25 -0
  151. package/dist/src/storage/local/localHelpers.d.ts.map +1 -0
  152. package/dist/src/storage/local/localHelpers.js +69 -0
  153. package/dist/src/storage/local/localHelpers.js.map +1 -0
  154. package/dist/src/{repository.d.ts → storage/local/repository.d.ts} +8 -4
  155. package/dist/src/storage/local/repository.d.ts.map +1 -0
  156. package/dist/src/{repository.js → storage/local/repository.js} +31 -29
  157. package/dist/src/storage/local/repository.js.map +1 -0
  158. package/dist/src/tasks.d.ts +16 -10
  159. package/dist/src/tasks.d.ts.map +1 -1
  160. package/dist/src/tasks.js +35 -41
  161. package/dist/src/tasks.js.map +1 -1
  162. package/dist/src/test-helpers.d.ts +5 -4
  163. package/dist/src/test-helpers.d.ts.map +1 -1
  164. package/dist/src/test-helpers.js +9 -21
  165. package/dist/src/test-helpers.js.map +1 -1
  166. package/dist/src/transfer/InMemoryTransferBackend.d.ts +75 -0
  167. package/dist/src/transfer/InMemoryTransferBackend.d.ts.map +1 -0
  168. package/dist/src/transfer/InMemoryTransferBackend.js +211 -0
  169. package/dist/src/transfer/InMemoryTransferBackend.js.map +1 -0
  170. package/dist/src/transfer/index.d.ts +9 -0
  171. package/dist/src/transfer/index.d.ts.map +1 -0
  172. package/dist/src/transfer/index.js +11 -0
  173. package/dist/src/transfer/index.js.map +1 -0
  174. package/dist/src/transfer/interfaces.d.ts +103 -0
  175. package/dist/src/transfer/interfaces.d.ts.map +1 -0
  176. package/dist/src/transfer/interfaces.js +6 -0
  177. package/dist/src/transfer/interfaces.js.map +1 -0
  178. package/dist/src/transfer/process.d.ts +55 -0
  179. package/dist/src/transfer/process.d.ts.map +1 -0
  180. package/dist/src/transfer/process.js +144 -0
  181. package/dist/src/transfer/process.js.map +1 -0
  182. package/dist/src/transfer/types.d.ts +106 -0
  183. package/dist/src/transfer/types.d.ts.map +1 -0
  184. package/dist/src/transfer/types.js +61 -0
  185. package/dist/src/transfer/types.js.map +1 -0
  186. package/dist/src/trees.d.ts +147 -59
  187. package/dist/src/trees.d.ts.map +1 -1
  188. package/dist/src/trees.js +372 -419
  189. package/dist/src/trees.js.map +1 -1
  190. package/dist/src/uuid.d.ts +26 -0
  191. package/dist/src/uuid.d.ts.map +1 -0
  192. package/dist/src/uuid.js +80 -0
  193. package/dist/src/uuid.js.map +1 -0
  194. package/dist/src/workspaceStatus.d.ts +6 -4
  195. package/dist/src/workspaceStatus.d.ts.map +1 -1
  196. package/dist/src/workspaceStatus.js +46 -60
  197. package/dist/src/workspaceStatus.js.map +1 -1
  198. package/dist/src/workspaces.d.ts +46 -47
  199. package/dist/src/workspaces.d.ts.map +1 -1
  200. package/dist/src/workspaces.js +281 -221
  201. package/dist/src/workspaces.js.map +1 -1
  202. package/package.json +4 -4
  203. package/dist/src/gc.d.ts +0 -54
  204. package/dist/src/gc.d.ts.map +0 -1
  205. package/dist/src/gc.js +0 -233
  206. package/dist/src/gc.js.map +0 -1
  207. package/dist/src/repository.d.ts.map +0 -1
  208. package/dist/src/repository.js.map +0 -1
  209. package/dist/src/workspaceLock.d.ts +0 -67
  210. package/dist/src/workspaceLock.d.ts.map +0 -1
  211. package/dist/src/workspaceLock.js +0 -217
  212. package/dist/src/workspaceLock.js.map +0 -1
@@ -0,0 +1,707 @@
1
+ /**
2
+ * Copyright (c) 2025 Elara AI Pty Ltd
3
+ * Licensed under BSL 1.1. See LICENSE for details.
4
+ */
5
+ /**
6
+ * Step functions for resumable dataflow execution.
7
+ *
8
+ * Each step function represents a single unit of work that can be:
9
+ * - Called locally in a loop (LocalOrchestrator)
10
+ * - Invoked as a Lambda handler (Step Functions orchestration)
11
+ *
12
+ * Step functions are designed to be:
13
+ * - Small and focused (one step = one Lambda invocation)
14
+ * - Deterministic where possible (pure functions marked as such)
15
+ * - Idempotent for retries
16
+ *
17
+ * Reactive step functions (stepDetectInputChanges, stepInvalidateTasks,
18
+ * stepCheckVersionConsistency) enable the reactive fixpoint loop where
19
+ * input changes during execution trigger re-execution of affected tasks.
20
+ */
21
+ import { variant, some, none } from '@elaraai/east';
22
+ import { dataflowGetGraph, dataflowGetReadyTasks, dataflowGetDependentsToSkip, dataflowResolveInputHashes, dataflowCheckCache, findAffectedTasks, } from '../dataflow.js';
23
+ import { workspaceGetDatasetHash, workspaceSetDatasetByHash, } from '../trees.js';
24
+ import { checkVersionConsistency, mergeVersionVectors, inputVersionVector, snapshotInputVersions, detectInputChanges, } from '../dataset-refs.js';
25
+ /**
26
+ * Initialize a new dataflow execution.
27
+ *
28
+ * Builds the dependency graph, snapshots root input versions, initializes
29
+ * version vectors, and creates the initial execution state.
30
+ *
31
+ * @param storage - Storage backend
32
+ * @param repo - Repository identifier
33
+ * @param workspace - Workspace name
34
+ * @param executionId - Unique execution ID
35
+ * @param options - Execution options
36
+ * @returns Initial state and ready tasks
37
+ *
38
+ * @throws {WorkspaceNotFoundError} If workspace doesn't exist
39
+ * @throws {WorkspaceNotDeployedError} If workspace has no package deployed
40
+ * @throws {TaskNotFoundError} If filter specifies a task that doesn't exist
41
+ */
42
+ export async function stepInitialize(storage, repo, workspace, executionId, options = {}) {
43
+ const concurrency = options.concurrency ?? 4;
44
+ const force = options.force ?? false;
45
+ const filter = options.filter ?? null;
46
+ // Build the dependency graph
47
+ const graph = await dataflowGetGraph(storage, repo, workspace);
48
+ // Validate filter
49
+ if (filter !== null) {
50
+ const taskExists = graph.tasks.some(t => t.name === filter);
51
+ if (!taskExists) {
52
+ // Import here to avoid circular dependency
53
+ const { TaskNotFoundError } = await import('../errors.js');
54
+ throw new TaskNotFoundError(filter);
55
+ }
56
+ }
57
+ // Compute task output paths (datasets produced by tasks)
58
+ const taskOutputPathsSet = new Set();
59
+ for (const task of graph.tasks) {
60
+ taskOutputPathsSet.add(task.output);
61
+ }
62
+ const taskOutputPaths = Array.from(taskOutputPathsSet);
63
+ // Read package structure for input snapshot
64
+ const structure = await getWorkspaceStructure(storage, repo, workspace);
65
+ // Snapshot root input hashes
66
+ const inputSnapshotMap = await snapshotInputVersions(storage, repo, workspace, structure, taskOutputPathsSet);
67
+ // Initialize version vectors for root inputs
68
+ const versionVectors = new Map();
69
+ for (const [keypath, hash] of inputSnapshotMap) {
70
+ versionVectors.set(keypath, inputVersionVector(keypath, hash));
71
+ }
72
+ // Initialize task states
73
+ const tasks = new Map();
74
+ for (const task of graph.tasks) {
75
+ tasks.set(task.name, {
76
+ name: task.name,
77
+ status: 'pending',
78
+ cached: none,
79
+ outputHash: none,
80
+ error: none,
81
+ exitCode: none,
82
+ startedAt: none,
83
+ completedAt: none,
84
+ duration: none,
85
+ });
86
+ }
87
+ // Create initial state
88
+ const state = {
89
+ id: executionId,
90
+ repo,
91
+ workspace,
92
+ startedAt: new Date(),
93
+ concurrency: BigInt(concurrency),
94
+ force,
95
+ filter: filter !== null ? some(filter) : none,
96
+ graph: some(graph),
97
+ graphHash: none,
98
+ tasks,
99
+ executed: 0n,
100
+ cached: 0n,
101
+ failed: 0n,
102
+ skipped: 0n,
103
+ status: 'running',
104
+ completedAt: none,
105
+ error: none,
106
+ versionVectors,
107
+ inputSnapshot: inputSnapshotMap,
108
+ taskOutputPaths,
109
+ reexecuted: 0n,
110
+ events: [],
111
+ eventSeq: 0n,
112
+ };
113
+ // Find initially ready tasks
114
+ const readyTasks = stepGetReady(state);
115
+ // Update task states to 'ready' (cast to mutable)
116
+ for (const taskName of readyTasks) {
117
+ const taskState = tasks.get(taskName);
118
+ if (taskState) {
119
+ taskState.status = 'ready';
120
+ }
121
+ }
122
+ return { state, readyTasks };
123
+ }
124
+ // =============================================================================
125
+ // Helper Functions
126
+ // =============================================================================
127
+ /**
128
+ * Get the graph from state, throwing if not available.
129
+ *
130
+ * For cloud execution, the graph may be stored separately and loaded
131
+ * via graphHash. This helper ensures the graph is present before use.
132
+ */
133
+ function getGraph(state) {
134
+ if (state.graph.type !== 'some') {
135
+ throw new Error('Execution state has no graph. For cloud execution, load the graph using graphHash before calling step functions.');
136
+ }
137
+ return state.graph.value;
138
+ }
139
+ /**
140
+ * Read workspace structure from storage.
141
+ *
142
+ * @internal
143
+ */
144
+ async function getWorkspaceStructure(storage, repo, workspace) {
145
+ const { decodeBeast2For } = await import('@elaraai/east');
146
+ const { PackageObjectType, WorkspaceStateType } = await import('@elaraai/e3-types');
147
+ const wsData = await storage.refs.workspaceRead(repo, workspace);
148
+ if (wsData === null || wsData.length === 0) {
149
+ throw new Error(`Workspace '${workspace}' not found or not deployed`);
150
+ }
151
+ const wsDecoder = decodeBeast2For(WorkspaceStateType);
152
+ const wsState = wsDecoder(wsData);
153
+ const pkgData = await storage.objects.read(repo, wsState.packageHash);
154
+ const pkgDecoder = decodeBeast2For(PackageObjectType);
155
+ const pkgObject = pkgDecoder(Buffer.from(pkgData));
156
+ return pkgObject.data.structure;
157
+ }
158
+ // =============================================================================
159
+ // Pure Step Functions
160
+ // =============================================================================
161
+ /**
162
+ * Get tasks that are ready to execute.
163
+ *
164
+ * A task is ready when:
165
+ * 1. All tasks it depends on have completed (not just started)
166
+ * 2. It passes the filter (if any)
167
+ * 3. It is not already completed, in-progress, failed, skipped, or deferred
168
+ *
169
+ * This is a pure function - it only reads state.
170
+ *
171
+ * @param state - Current execution state
172
+ * @returns Array of task names that are ready to execute
173
+ */
174
+ export function stepGetReady(state) {
175
+ const completedTasks = new Set();
176
+ for (const [name, taskState] of state.tasks) {
177
+ if (taskState.status === 'completed') {
178
+ completedTasks.add(name);
179
+ }
180
+ }
181
+ // Get ready tasks from graph
182
+ const graphReady = dataflowGetReadyTasks(getGraph(state), completedTasks);
183
+ // Get filter value (handle Option type)
184
+ const filterValue = state.filter.type === 'some' ? state.filter.value : null;
185
+ // Filter by state and filter option
186
+ return graphReady.filter(taskName => {
187
+ const taskState = state.tasks.get(taskName);
188
+ if (!taskState)
189
+ return false;
190
+ // Skip tasks that are already being processed or deferred
191
+ if (taskState.status !== 'pending' &&
192
+ taskState.status !== 'ready') {
193
+ return false;
194
+ }
195
+ // Apply task filter
196
+ if (filterValue !== null && taskName !== filterValue) {
197
+ return false;
198
+ }
199
+ return true;
200
+ });
201
+ }
202
+ /**
203
+ * Check if the execution is complete.
204
+ *
205
+ * An execution is complete when all tasks are in a terminal state
206
+ * (completed, failed, skipped) or have permanently unresolvable
207
+ * dependencies. Returns false if any non-terminal tasks remain.
208
+ *
209
+ * This is a pure function - it only reads state.
210
+ *
211
+ * @param state - Current execution state
212
+ * @returns True if execution is complete
213
+ */
214
+ export function stepIsComplete(state) {
215
+ const filterValue = state.filter.type === 'some' ? state.filter.value : null;
216
+ for (const taskState of state.tasks.values()) {
217
+ if (taskState.status === 'pending' ||
218
+ taskState.status === 'ready' ||
219
+ taskState.status === 'in_progress' ||
220
+ taskState.status === 'deferred') {
221
+ if (taskState.status === 'in_progress') {
222
+ return false;
223
+ }
224
+ if (taskState.status === 'deferred') {
225
+ return false;
226
+ }
227
+ // pending or ready — check if it can ever become ready
228
+ const task = getGraph(state).tasks.find(t => t.name === taskState.name);
229
+ if (task) {
230
+ const hasUnmetDeps = task.dependsOn.some(dep => {
231
+ const depState = state.tasks.get(dep);
232
+ return depState && (depState.status === 'failed' || depState.status === 'skipped');
233
+ });
234
+ if (!hasUnmetDeps) {
235
+ if (filterValue !== null && taskState.name !== filterValue) {
236
+ continue; // Filtered out, doesn't affect completion
237
+ }
238
+ return false;
239
+ }
240
+ }
241
+ }
242
+ }
243
+ return true;
244
+ }
245
+ // =============================================================================
246
+ // Reactive Step Functions
247
+ // =============================================================================
248
+ /**
249
+ * Detect input changes since the last snapshot.
250
+ *
251
+ * Reads current root input dataset hashes from storage and compares
252
+ * against the snapshot stored in state. For each change, updates the
253
+ * input snapshot and version vectors, and emits input_changed events.
254
+ *
255
+ * @param storage - Storage backend
256
+ * @param state - Execution state to mutate (inputSnapshot, versionVectors)
257
+ * @returns Changes detected and events emitted
258
+ */
259
+ export async function stepDetectInputChanges(storage, state, cachedStructure) {
260
+ const structure = cachedStructure ?? await getWorkspaceStructure(storage, state.repo, state.workspace);
261
+ const taskOutputPathsSet = new Set(state.taskOutputPaths);
262
+ const changes = await detectInputChanges(storage, state.repo, state.workspace, state.inputSnapshot, structure, taskOutputPathsSet);
263
+ const events = [];
264
+ const mutableState = state;
265
+ for (const change of changes) {
266
+ // Update input snapshot
267
+ state.inputSnapshot.set(change.path, change.newHash);
268
+ // Update version vector for this input
269
+ state.versionVectors.set(change.path, inputVersionVector(change.path, change.newHash));
270
+ // Emit event
271
+ mutableState.eventSeq = state.eventSeq + 1n;
272
+ const event = variant('input_changed', {
273
+ seq: mutableState.eventSeq,
274
+ timestamp: new Date(),
275
+ path: change.path,
276
+ previousHash: change.previousHash ?? '',
277
+ newHash: change.newHash,
278
+ });
279
+ mutableState.events.push(event);
280
+ events.push(event);
281
+ }
282
+ return { changes, events };
283
+ }
284
+ /**
285
+ * Invalidate tasks affected by input changes.
286
+ *
287
+ * For each affected task that is not currently in_progress:
288
+ * - completed: reset to pending, decrement executed/cached counter, emit task_invalidated
289
+ * - deferred: reset to pending
290
+ * - pending/ready: leave as-is (will pick up new inputs naturally)
291
+ * - skipped: leave as-is (upstream failure still applies)
292
+ * - failed: leave as-is (task already failed; orchestrator is winding down)
293
+ *
294
+ * @param state - Execution state to mutate
295
+ * @param changes - Input changes from stepDetectInputChanges
296
+ * @returns Invalidated task names and events
297
+ */
298
+ export function stepInvalidateTasks(state, changes) {
299
+ const graph = getGraph(state);
300
+ const affectedTaskNames = findAffectedTasks(graph, changes);
301
+ const invalidated = [];
302
+ const events = [];
303
+ const mutableState = state;
304
+ for (const taskName of affectedTaskNames) {
305
+ const taskState = state.tasks.get(taskName);
306
+ if (!taskState)
307
+ continue;
308
+ // Skip tasks that are currently running — they'll be checked when they finish
309
+ if (taskState.status === 'in_progress')
310
+ continue;
311
+ // Leave failed — orchestrator is winding down
312
+ if (taskState.status === 'failed')
313
+ continue;
314
+ // Skip tasks not affected (already in skipped terminal state from failure)
315
+ if (taskState.status === 'skipped')
316
+ continue;
317
+ if (taskState.status === 'completed') {
318
+ // Reset completed task for re-execution
319
+ const wasCached = taskState.cached.type === 'some' && taskState.cached.value;
320
+ taskState.status = 'pending';
321
+ taskState.cached = none;
322
+ taskState.outputHash = none;
323
+ taskState.completedAt = none;
324
+ taskState.duration = none;
325
+ // Decrement counters
326
+ if (wasCached) {
327
+ mutableState.cached = state.cached - 1n;
328
+ }
329
+ else {
330
+ mutableState.executed = state.executed - 1n;
331
+ }
332
+ invalidated.push(taskName);
333
+ // Emit event
334
+ mutableState.eventSeq = state.eventSeq + 1n;
335
+ const event = variant('task_invalidated', {
336
+ seq: mutableState.eventSeq,
337
+ timestamp: new Date(),
338
+ task: taskName,
339
+ reason: `input changed: ${changes.map(c => c.path).join(', ')}`,
340
+ });
341
+ mutableState.events.push(event);
342
+ events.push(event);
343
+ }
344
+ else if (taskState.status === 'deferred') {
345
+ // Un-defer — it will be re-evaluated for readiness
346
+ // Don't add to invalidated — task was never executed
347
+ taskState.status = 'pending';
348
+ }
349
+ }
350
+ return { invalidated, events };
351
+ }
352
+ /**
353
+ * Check version vector consistency for a task's inputs.
354
+ *
355
+ * All input version vectors must agree on shared keys (same root input path
356
+ * must have the same hash in every vector that contains it).
357
+ *
358
+ * @param state - Execution state
359
+ * @param taskName - Task to check
360
+ * @returns Consistency result with merged VV or conflict path
361
+ */
362
+ export function stepCheckVersionConsistency(state, taskName) {
363
+ const graph = getGraph(state);
364
+ const task = graph.tasks.find(t => t.name === taskName);
365
+ if (!task) {
366
+ throw new Error(`Task '${taskName}' not found in graph`);
367
+ }
368
+ const inputVVs = [];
369
+ for (const inputPath of task.inputs) {
370
+ const vv = state.versionVectors.get(inputPath) ?? new Map();
371
+ inputVVs.push(vv);
372
+ }
373
+ const result = checkVersionConsistency(inputVVs);
374
+ if (result.consistent) {
375
+ return { consistent: true, mergedVV: result.merged };
376
+ }
377
+ return { consistent: false, conflictPath: result.conflictPath };
378
+ }
379
+ // =============================================================================
380
+ // Async Step Functions (I/O operations)
381
+ // =============================================================================
382
+ /**
383
+ * Prepare a task for execution by resolving inputs and checking cache.
384
+ *
385
+ * This async operation:
386
+ * 1. Resolves input hashes from current workspace state
387
+ * 2. Checks if there's a valid cached execution
388
+ *
389
+ * @param storage - Storage backend
390
+ * @param state - Current execution state
391
+ * @param taskName - Name of the task to prepare
392
+ * @returns Preparation result with input hashes and cache status
393
+ */
394
+ export async function stepPrepareTask(storage, state, taskName) {
395
+ const graph = getGraph(state);
396
+ const task = graph.tasks.find(t => t.name === taskName);
397
+ if (!task) {
398
+ throw new Error(`Task '${taskName}' not found in graph`);
399
+ }
400
+ // Resolve input hashes
401
+ const inputHashes = await dataflowResolveInputHashes(storage, state.repo, state.workspace, task);
402
+ // Check for null inputs (unassigned)
403
+ const validInputHashes = [];
404
+ for (const hash of inputHashes) {
405
+ if (hash === null) {
406
+ throw new Error(`Task '${taskName}' has unassigned input`);
407
+ }
408
+ validInputHashes.push(hash);
409
+ }
410
+ // Check cache if not forcing re-execution
411
+ let cachedOutputHash = null;
412
+ if (!state.force) {
413
+ cachedOutputHash = await dataflowCheckCache(storage, state.repo, task.hash, validInputHashes);
414
+ // Also verify the workspace output matches the cached output
415
+ if (cachedOutputHash !== null) {
416
+ const { parsePathString } = await import('../dataflow.js');
417
+ const outputPath = parsePathString(task.output);
418
+ const { refType, hash: wsOutputHash } = await workspaceGetDatasetHash(storage, state.repo, state.workspace, outputPath);
419
+ if (refType !== 'value' || wsOutputHash !== cachedOutputHash) {
420
+ // Workspace output doesn't match cached output, need to re-execute
421
+ cachedOutputHash = null;
422
+ }
423
+ }
424
+ }
425
+ return {
426
+ task: taskName,
427
+ taskHash: task.hash,
428
+ inputHashes: validInputHashes,
429
+ outputPath: task.output,
430
+ cachedOutputHash,
431
+ };
432
+ }
433
+ // =============================================================================
434
+ // State Mutation Step Functions
435
+ // =============================================================================
436
+ /**
437
+ * Mark a task as started (in-progress).
438
+ *
439
+ * Mutates the execution state to record that a task has begun execution.
440
+ *
441
+ * @param state - Execution state to mutate
442
+ * @param taskName - Name of the task
443
+ * @returns Event to record
444
+ */
445
+ export function stepTaskStarted(state, taskName) {
446
+ const taskState = state.tasks.get(taskName);
447
+ if (!taskState) {
448
+ throw new Error(`Task '${taskName}' not found in state`);
449
+ }
450
+ const now = new Date();
451
+ taskState.status = 'in_progress';
452
+ taskState.startedAt = some(now);
453
+ const mutableState = state;
454
+ mutableState.eventSeq = state.eventSeq + 1n;
455
+ const event = variant('task_started', {
456
+ seq: mutableState.eventSeq,
457
+ timestamp: now,
458
+ task: taskName,
459
+ });
460
+ mutableState.events.push(event);
461
+ return event;
462
+ }
463
+ /**
464
+ * Mark a task as completed successfully.
465
+ *
466
+ * Mutates the execution state, computes the merged version vector for the
467
+ * task's output, and returns the newly ready tasks.
468
+ *
469
+ * @param state - Execution state to mutate
470
+ * @param taskName - Name of the task
471
+ * @param outputHash - Hash of the output dataset
472
+ * @param cached - Whether the result was from cache
473
+ * @param duration - Execution duration in milliseconds
474
+ * @returns Result with newly ready tasks and event
475
+ */
476
+ export function stepTaskCompleted(state, taskName, outputHash, cached, duration) {
477
+ const taskState = state.tasks.get(taskName);
478
+ if (!taskState) {
479
+ throw new Error(`Task '${taskName}' not found in state`);
480
+ }
481
+ const now = new Date();
482
+ const mutableState = state;
483
+ taskState.status = 'completed';
484
+ taskState.cached = some(cached);
485
+ taskState.outputHash = some(outputHash);
486
+ taskState.completedAt = some(now);
487
+ taskState.duration = some(BigInt(duration));
488
+ // Update counters
489
+ if (cached) {
490
+ mutableState.cached = state.cached + 1n;
491
+ }
492
+ else {
493
+ mutableState.executed = state.executed + 1n;
494
+ }
495
+ // Compute and store merged VV for the task's output
496
+ const graph = getGraph(state);
497
+ const task = graph.tasks.find(t => t.name === taskName);
498
+ if (task) {
499
+ const inputVVs = [];
500
+ for (const inputPath of task.inputs) {
501
+ const vv = state.versionVectors.get(inputPath) ?? new Map();
502
+ inputVVs.push(vv);
503
+ }
504
+ const mergedVV = mergeVersionVectors(inputVVs);
505
+ state.versionVectors.set(task.output, mergedVV);
506
+ }
507
+ // Find newly ready tasks
508
+ const newlyReady = stepGetReady(state);
509
+ for (const name of newlyReady) {
510
+ const ts = state.tasks.get(name);
511
+ if (ts && ts.status === 'pending') {
512
+ ts.status = 'ready';
513
+ }
514
+ }
515
+ mutableState.eventSeq = state.eventSeq + 1n;
516
+ const event = variant('task_completed', {
517
+ seq: mutableState.eventSeq,
518
+ timestamp: now,
519
+ task: taskName,
520
+ cached,
521
+ outputHash,
522
+ duration: BigInt(duration),
523
+ });
524
+ mutableState.events.push(event);
525
+ return { result: { newlyReady }, event };
526
+ }
527
+ /**
528
+ * Mark a task as failed.
529
+ *
530
+ * Mutates the execution state and returns tasks that should be skipped.
531
+ *
532
+ * @param state - Execution state to mutate
533
+ * @param taskName - Name of the failed task
534
+ * @param error - Error message (optional)
535
+ * @param exitCode - Exit code if process failed (optional)
536
+ * @param duration - Execution duration in milliseconds
537
+ * @returns Result with tasks to skip and event
538
+ */
539
+ export function stepTaskFailed(state, taskName, error, exitCode, duration) {
540
+ const taskState = state.tasks.get(taskName);
541
+ if (!taskState) {
542
+ throw new Error(`Task '${taskName}' not found in state`);
543
+ }
544
+ const now = new Date();
545
+ const mutableState = state;
546
+ taskState.status = 'failed';
547
+ taskState.error = error !== undefined ? some(error) : none;
548
+ taskState.exitCode = exitCode !== undefined ? some(BigInt(exitCode)) : none;
549
+ taskState.completedAt = some(now);
550
+ taskState.duration = some(BigInt(duration));
551
+ // Update counters
552
+ mutableState.failed = state.failed + 1n;
553
+ // Get filter value (handle Option type)
554
+ const filterValue = state.filter.type === 'some' ? state.filter.value : null;
555
+ // Find tasks to skip (transitive dependents)
556
+ const completedSet = new Set();
557
+ const skippedSet = new Set();
558
+ for (const [name, ts] of state.tasks) {
559
+ if (ts.status === 'completed')
560
+ completedSet.add(name);
561
+ if (ts.status === 'skipped')
562
+ skippedSet.add(name);
563
+ }
564
+ const toSkip = dataflowGetDependentsToSkip(getGraph(state), taskName, completedSet, skippedSet).filter(name => {
565
+ // Also exclude in-progress tasks and apply filter
566
+ const ts = state.tasks.get(name);
567
+ if (!ts || ts.status === 'in_progress')
568
+ return false;
569
+ if (filterValue !== null && name !== filterValue)
570
+ return false;
571
+ return true;
572
+ });
573
+ mutableState.eventSeq = state.eventSeq + 1n;
574
+ const event = variant('task_failed', {
575
+ seq: mutableState.eventSeq,
576
+ timestamp: now,
577
+ task: taskName,
578
+ error: error !== undefined ? some(error) : none,
579
+ exitCode: exitCode !== undefined ? some(BigInt(exitCode)) : none,
580
+ duration: BigInt(duration),
581
+ });
582
+ mutableState.events.push(event);
583
+ return { result: { toSkip }, event };
584
+ }
585
+ /**
586
+ * Mark tasks as skipped due to upstream failure.
587
+ *
588
+ * @param state - Execution state to mutate
589
+ * @param taskNames - Names of tasks to skip
590
+ * @param cause - Name of the task that caused the skip
591
+ * @returns Array of events to record
592
+ */
593
+ export function stepTasksSkipped(state, taskNames, cause) {
594
+ const events = [];
595
+ const now = new Date();
596
+ const mutableState = state;
597
+ for (const taskName of taskNames) {
598
+ const taskState = state.tasks.get(taskName);
599
+ if (!taskState)
600
+ continue;
601
+ taskState.status = 'skipped';
602
+ taskState.completedAt = some(now);
603
+ taskState.duration = some(0n);
604
+ mutableState.skipped = mutableState.skipped + 1n;
605
+ mutableState.eventSeq = mutableState.eventSeq + 1n;
606
+ const event = variant('task_skipped', {
607
+ seq: mutableState.eventSeq,
608
+ timestamp: now,
609
+ task: taskName,
610
+ cause,
611
+ });
612
+ mutableState.events.push(event);
613
+ events.push(event);
614
+ }
615
+ return events;
616
+ }
617
+ /**
618
+ * Finalize the execution and return the result.
619
+ *
620
+ * Mutates the execution state to mark it as completed or failed.
621
+ *
622
+ * @param state - Execution state to mutate
623
+ * @param runId - Dataflow run ID (UUIDv7) from the orchestrator
624
+ * @returns Final result
625
+ */
626
+ export function stepFinalize(state, runId) {
627
+ const now = new Date();
628
+ const startTime = state.startedAt.getTime();
629
+ const duration = Date.now() - startTime;
630
+ const mutableState = state;
631
+ // Determine success
632
+ const success = state.failed === 0n;
633
+ // Update state
634
+ mutableState.status = success ? 'completed' : 'failed';
635
+ mutableState.completedAt = some(now);
636
+ mutableState.eventSeq = state.eventSeq + 1n;
637
+ const event = variant('execution_completed', {
638
+ seq: mutableState.eventSeq,
639
+ timestamp: now,
640
+ success,
641
+ executed: state.executed,
642
+ cached: state.cached,
643
+ failed: state.failed,
644
+ skipped: state.skipped,
645
+ duration: BigInt(duration),
646
+ });
647
+ mutableState.events.push(event);
648
+ const result = {
649
+ success,
650
+ runId,
651
+ executed: Number(state.executed),
652
+ cached: Number(state.cached),
653
+ failed: Number(state.failed),
654
+ skipped: Number(state.skipped),
655
+ reexecuted: Number(state.reexecuted),
656
+ duration,
657
+ };
658
+ return { result, event };
659
+ }
660
+ /**
661
+ * Cancel the execution.
662
+ *
663
+ * @param state - Execution state to mutate
664
+ * @param reason - Reason for cancellation
665
+ * @returns Event to record
666
+ */
667
+ export function stepCancel(state, reason) {
668
+ const now = new Date();
669
+ const mutableState = state;
670
+ mutableState.status = 'cancelled';
671
+ mutableState.completedAt = some(now);
672
+ mutableState.error = some(reason ?? 'Execution was cancelled');
673
+ mutableState.eventSeq = state.eventSeq + 1n;
674
+ const event = variant('execution_cancelled', {
675
+ seq: mutableState.eventSeq,
676
+ timestamp: now,
677
+ reason: reason !== undefined ? some(reason) : none,
678
+ });
679
+ mutableState.events.push(event);
680
+ return event;
681
+ }
682
+ // =============================================================================
683
+ // Tree Update Step Function
684
+ // =============================================================================
685
+ /**
686
+ * Apply a task's output to the workspace tree with version vector.
687
+ *
688
+ * Writes the output ref file with the merged version vector from the task's
689
+ * inputs. Per-dataset ref writes are atomic and independent, so no
690
+ * serialization is needed for concurrent writes to different paths.
691
+ *
692
+ * @param storage - Storage backend
693
+ * @param repo - Repository identifier
694
+ * @param workspace - Workspace name
695
+ * @param outputPathStr - Output path as a keypath string (e.g., ".results.data")
696
+ * @param outputHash - Hash of the output dataset to write
697
+ * @param versions - Merged version vector for provenance tracking
698
+ * @returns Result indicating success
699
+ */
700
+ export async function stepApplyTreeUpdate(storage, repo, workspace, outputPathStr, outputHash, versions) {
701
+ const { parsePathString } = await import('../dataflow.js');
702
+ const outputPath = parsePathString(outputPathStr);
703
+ // Write the output ref with version vector
704
+ await workspaceSetDatasetByHash(storage, repo, workspace, outputPath, outputHash, versions);
705
+ return { ok: true };
706
+ }
707
+ //# sourceMappingURL=steps.js.map