py-data-engine 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. data_engine/__init__.py +37 -0
  2. data_engine/application/__init__.py +39 -0
  3. data_engine/application/actions.py +42 -0
  4. data_engine/application/catalog.py +151 -0
  5. data_engine/application/control.py +213 -0
  6. data_engine/application/details.py +73 -0
  7. data_engine/application/runtime.py +449 -0
  8. data_engine/application/workspace.py +62 -0
  9. data_engine/authoring/__init__.py +14 -0
  10. data_engine/authoring/builder.py +31 -0
  11. data_engine/authoring/execution/__init__.py +6 -0
  12. data_engine/authoring/execution/app.py +6 -0
  13. data_engine/authoring/execution/context.py +82 -0
  14. data_engine/authoring/execution/continuous.py +176 -0
  15. data_engine/authoring/execution/grouped.py +106 -0
  16. data_engine/authoring/execution/logging.py +83 -0
  17. data_engine/authoring/execution/polling.py +135 -0
  18. data_engine/authoring/execution/runner.py +210 -0
  19. data_engine/authoring/execution/single.py +171 -0
  20. data_engine/authoring/flow.py +361 -0
  21. data_engine/authoring/helpers.py +160 -0
  22. data_engine/authoring/model.py +59 -0
  23. data_engine/authoring/primitives.py +430 -0
  24. data_engine/authoring/services.py +42 -0
  25. data_engine/devtools/__init__.py +3 -0
  26. data_engine/devtools/project_ast_map.py +503 -0
  27. data_engine/docs/__init__.py +1 -0
  28. data_engine/docs/sphinx_source/_static/custom.css +13 -0
  29. data_engine/docs/sphinx_source/api.rst +42 -0
  30. data_engine/docs/sphinx_source/conf.py +37 -0
  31. data_engine/docs/sphinx_source/guides/app-runtime-and-workspaces.md +397 -0
  32. data_engine/docs/sphinx_source/guides/authoring-flow-modules.md +215 -0
  33. data_engine/docs/sphinx_source/guides/configuring-flows.md +185 -0
  34. data_engine/docs/sphinx_source/guides/core-concepts.md +208 -0
  35. data_engine/docs/sphinx_source/guides/database-methods.md +107 -0
  36. data_engine/docs/sphinx_source/guides/duckdb-helpers.md +462 -0
  37. data_engine/docs/sphinx_source/guides/flow-context.md +538 -0
  38. data_engine/docs/sphinx_source/guides/flow-methods.md +206 -0
  39. data_engine/docs/sphinx_source/guides/getting-started.md +271 -0
  40. data_engine/docs/sphinx_source/guides/project-inventory.md +5683 -0
  41. data_engine/docs/sphinx_source/guides/project-map.md +118 -0
  42. data_engine/docs/sphinx_source/guides/recipes.md +268 -0
  43. data_engine/docs/sphinx_source/index.rst +22 -0
  44. data_engine/domain/__init__.py +92 -0
  45. data_engine/domain/actions.py +69 -0
  46. data_engine/domain/catalog.py +128 -0
  47. data_engine/domain/details.py +214 -0
  48. data_engine/domain/diagnostics.py +56 -0
  49. data_engine/domain/errors.py +104 -0
  50. data_engine/domain/inspection.py +99 -0
  51. data_engine/domain/logs.py +118 -0
  52. data_engine/domain/operations.py +172 -0
  53. data_engine/domain/operator.py +72 -0
  54. data_engine/domain/runs.py +155 -0
  55. data_engine/domain/runtime.py +279 -0
  56. data_engine/domain/source_state.py +17 -0
  57. data_engine/domain/support.py +54 -0
  58. data_engine/domain/time.py +23 -0
  59. data_engine/domain/workspace.py +159 -0
  60. data_engine/flow_modules/__init__.py +1 -0
  61. data_engine/flow_modules/flow_module_compiler.py +179 -0
  62. data_engine/flow_modules/flow_module_loader.py +201 -0
  63. data_engine/helpers/__init__.py +25 -0
  64. data_engine/helpers/duckdb.py +705 -0
  65. data_engine/hosts/__init__.py +1 -0
  66. data_engine/hosts/daemon/__init__.py +23 -0
  67. data_engine/hosts/daemon/app.py +221 -0
  68. data_engine/hosts/daemon/bootstrap.py +69 -0
  69. data_engine/hosts/daemon/client.py +465 -0
  70. data_engine/hosts/daemon/commands.py +64 -0
  71. data_engine/hosts/daemon/composition.py +310 -0
  72. data_engine/hosts/daemon/constants.py +15 -0
  73. data_engine/hosts/daemon/entrypoints.py +97 -0
  74. data_engine/hosts/daemon/lifecycle.py +191 -0
  75. data_engine/hosts/daemon/manager.py +272 -0
  76. data_engine/hosts/daemon/ownership.py +126 -0
  77. data_engine/hosts/daemon/runtime_commands.py +188 -0
  78. data_engine/hosts/daemon/runtime_control.py +31 -0
  79. data_engine/hosts/daemon/server.py +84 -0
  80. data_engine/hosts/daemon/shared_state.py +147 -0
  81. data_engine/hosts/daemon/state_sync.py +101 -0
  82. data_engine/platform/__init__.py +1 -0
  83. data_engine/platform/identity.py +35 -0
  84. data_engine/platform/local_settings.py +146 -0
  85. data_engine/platform/theme.py +259 -0
  86. data_engine/platform/workspace_models.py +190 -0
  87. data_engine/platform/workspace_policy.py +333 -0
  88. data_engine/runtime/__init__.py +1 -0
  89. data_engine/runtime/file_watch.py +185 -0
  90. data_engine/runtime/ledger_models.py +116 -0
  91. data_engine/runtime/runtime_db.py +938 -0
  92. data_engine/runtime/shared_state.py +523 -0
  93. data_engine/services/__init__.py +49 -0
  94. data_engine/services/daemon.py +64 -0
  95. data_engine/services/daemon_state.py +40 -0
  96. data_engine/services/flow_catalog.py +102 -0
  97. data_engine/services/flow_execution.py +48 -0
  98. data_engine/services/ledger.py +85 -0
  99. data_engine/services/logs.py +65 -0
  100. data_engine/services/runtime_binding.py +105 -0
  101. data_engine/services/runtime_execution.py +126 -0
  102. data_engine/services/runtime_history.py +62 -0
  103. data_engine/services/settings.py +58 -0
  104. data_engine/services/shared_state.py +28 -0
  105. data_engine/services/theme.py +59 -0
  106. data_engine/services/workspace_provisioning.py +224 -0
  107. data_engine/services/workspaces.py +74 -0
  108. data_engine/ui/__init__.py +3 -0
  109. data_engine/ui/cli/__init__.py +19 -0
  110. data_engine/ui/cli/app.py +161 -0
  111. data_engine/ui/cli/commands_doctor.py +178 -0
  112. data_engine/ui/cli/commands_run.py +80 -0
  113. data_engine/ui/cli/commands_start.py +100 -0
  114. data_engine/ui/cli/commands_workspace.py +97 -0
  115. data_engine/ui/cli/dependencies.py +44 -0
  116. data_engine/ui/cli/parser.py +56 -0
  117. data_engine/ui/gui/__init__.py +25 -0
  118. data_engine/ui/gui/app.py +116 -0
  119. data_engine/ui/gui/bootstrap.py +487 -0
  120. data_engine/ui/gui/bootstrapper.py +140 -0
  121. data_engine/ui/gui/cache_models.py +23 -0
  122. data_engine/ui/gui/control_support.py +185 -0
  123. data_engine/ui/gui/controllers/__init__.py +6 -0
  124. data_engine/ui/gui/controllers/flows.py +439 -0
  125. data_engine/ui/gui/controllers/runtime.py +245 -0
  126. data_engine/ui/gui/dialogs/__init__.py +12 -0
  127. data_engine/ui/gui/dialogs/messages.py +88 -0
  128. data_engine/ui/gui/dialogs/previews.py +222 -0
  129. data_engine/ui/gui/helpers/__init__.py +62 -0
  130. data_engine/ui/gui/helpers/inspection.py +81 -0
  131. data_engine/ui/gui/helpers/lifecycle.py +112 -0
  132. data_engine/ui/gui/helpers/scroll.py +28 -0
  133. data_engine/ui/gui/helpers/theming.py +87 -0
  134. data_engine/ui/gui/icons/dark_light.svg +12 -0
  135. data_engine/ui/gui/icons/documentation.svg +1 -0
  136. data_engine/ui/gui/icons/failed.svg +3 -0
  137. data_engine/ui/gui/icons/group.svg +4 -0
  138. data_engine/ui/gui/icons/home.svg +2 -0
  139. data_engine/ui/gui/icons/manual.svg +2 -0
  140. data_engine/ui/gui/icons/poll.svg +2 -0
  141. data_engine/ui/gui/icons/schedule.svg +4 -0
  142. data_engine/ui/gui/icons/settings.svg +2 -0
  143. data_engine/ui/gui/icons/started.svg +3 -0
  144. data_engine/ui/gui/icons/success.svg +3 -0
  145. data_engine/ui/gui/icons/view-log.svg +3 -0
  146. data_engine/ui/gui/icons.py +50 -0
  147. data_engine/ui/gui/launcher.py +48 -0
  148. data_engine/ui/gui/presenters/__init__.py +72 -0
  149. data_engine/ui/gui/presenters/docs.py +140 -0
  150. data_engine/ui/gui/presenters/logs.py +58 -0
  151. data_engine/ui/gui/presenters/runtime_projection.py +29 -0
  152. data_engine/ui/gui/presenters/sidebar.py +88 -0
  153. data_engine/ui/gui/presenters/steps.py +148 -0
  154. data_engine/ui/gui/presenters/workspace.py +39 -0
  155. data_engine/ui/gui/presenters/workspace_binding.py +75 -0
  156. data_engine/ui/gui/presenters/workspace_settings.py +182 -0
  157. data_engine/ui/gui/preview_models.py +37 -0
  158. data_engine/ui/gui/render_support.py +241 -0
  159. data_engine/ui/gui/rendering/__init__.py +12 -0
  160. data_engine/ui/gui/rendering/artifacts.py +95 -0
  161. data_engine/ui/gui/rendering/icons.py +50 -0
  162. data_engine/ui/gui/runtime.py +47 -0
  163. data_engine/ui/gui/state_support.py +193 -0
  164. data_engine/ui/gui/support.py +214 -0
  165. data_engine/ui/gui/surface.py +209 -0
  166. data_engine/ui/gui/theme.py +720 -0
  167. data_engine/ui/gui/widgets/__init__.py +34 -0
  168. data_engine/ui/gui/widgets/config.py +41 -0
  169. data_engine/ui/gui/widgets/logs.py +62 -0
  170. data_engine/ui/gui/widgets/panels.py +507 -0
  171. data_engine/ui/gui/widgets/sidebar.py +130 -0
  172. data_engine/ui/gui/widgets/steps.py +84 -0
  173. data_engine/ui/tui/__init__.py +5 -0
  174. data_engine/ui/tui/app.py +222 -0
  175. data_engine/ui/tui/bootstrap.py +475 -0
  176. data_engine/ui/tui/bootstrapper.py +117 -0
  177. data_engine/ui/tui/controllers/__init__.py +6 -0
  178. data_engine/ui/tui/controllers/flows.py +349 -0
  179. data_engine/ui/tui/controllers/runtime.py +167 -0
  180. data_engine/ui/tui/runtime.py +34 -0
  181. data_engine/ui/tui/state_support.py +141 -0
  182. data_engine/ui/tui/support.py +63 -0
  183. data_engine/ui/tui/theme.py +204 -0
  184. data_engine/ui/tui/widgets.py +123 -0
  185. data_engine/views/__init__.py +109 -0
  186. data_engine/views/actions.py +80 -0
  187. data_engine/views/artifacts.py +58 -0
  188. data_engine/views/flow_display.py +69 -0
  189. data_engine/views/logs.py +54 -0
  190. data_engine/views/models.py +96 -0
  191. data_engine/views/presentation.py +133 -0
  192. data_engine/views/runs.py +62 -0
  193. data_engine/views/state.py +39 -0
  194. data_engine/views/status.py +13 -0
  195. data_engine/views/text.py +109 -0
  196. py_data_engine-0.1.0.dist-info/METADATA +330 -0
  197. py_data_engine-0.1.0.dist-info/RECORD +200 -0
  198. py_data_engine-0.1.0.dist-info/WHEEL +5 -0
  199. py_data_engine-0.1.0.dist-info/entry_points.txt +2 -0
  200. py_data_engine-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,397 @@
1
+ # App Runtime and Workspaces
2
+
3
+ This guide explains how the desktop app, authored workspaces, shared workspace state, and machine-local runtime state fit together.
4
+
5
+ If you are writing flows, this is the missing "how the whole thing hangs together" page.
6
+
7
+ ## The two roots to keep in mind
8
+
9
+ There are usually two important folders:
10
+
11
+ - the workspace collection root
12
+ - one authored workspace inside that collection
13
+
14
+ Example:
15
+
16
+ ```text
17
+ workspaces/
18
+ example_workspace/
19
+ flow_modules/
20
+ flow_modules/flow_helpers/
21
+ config/
22
+ databases/
23
+ .workspace_state/
24
+ ```
25
+
26
+ The collection root is the parent folder that contains one or more authored workspaces.
27
+
28
+ The authored workspace is the folder that contains the authoring surface for one logical workspace:
29
+
30
+ - `flow_modules/`
31
+ - `flow_modules/flow_helpers/`
32
+ - `config/`
33
+ - `databases/`
34
+
35
+ That authored workspace is what the app binds to when you select a workspace in the UI.
36
+
37
+ ## How the app is structured
38
+
39
+ The desktop app is a single-window operator surface that binds to one authored workspace at a time.
40
+
41
+ When you change the selected workspace, the app rebinds:
42
+
43
+ - workspace paths
44
+ - flow discovery
45
+ - daemon client and daemon manager
46
+ - local runtime ledger
47
+ - visible run history and log views
48
+ - control state and lease state
49
+
50
+ This means the app is multi-workspace for discovery and selection, but single-workspace for active runtime context.
51
+
52
+ That distinction matters when you are reasoning about:
53
+
54
+ - what is cheap to inspect globally
55
+ - what is authoritative for the currently selected workspace
56
+ - why the UI can feel like one workspace "becomes" the app until you switch again
57
+
58
+ ## Authored files vs generated runtime artifacts
59
+
60
+ The authored workspace is intentionally small and human-owned.
61
+
62
+ Author-owned folders:
63
+
64
+ - `flow_modules/`: runnable flow modules
65
+ - `flow_modules/flow_helpers/`: reusable helper code imported by flows
66
+ - `config/`: workspace-local TOML config files
67
+ - `databases/`: a conventional home for workspace-local database files
68
+
69
+ Generated or runtime-managed state lives elsewhere:
70
+
71
+ - shared workspace state inside `.workspace_state/`
72
+ - machine-local runtime artifacts under the app runtime root
73
+
74
+ That split is deliberate:
75
+
76
+ - the authored workspace is what you share, edit, and reason about
77
+ - runtime caches and ledgers are free to be machine-local and disposable
78
+
79
+ ## Shared workspace state
80
+
81
+ Every authored workspace can also contain a shared control and checkpoint folder:
82
+
83
+ ```text
84
+ .workspace_state/
85
+ available/
86
+ leased/
87
+ stale/
88
+ leases/
89
+ control_requests/
90
+ state/
91
+ runs/
92
+ step_runs/
93
+ logs/
94
+ file_state/
95
+ ```
96
+
97
+ This is the workspace-coordination layer.
98
+
99
+ It is used for:
100
+
101
+ - control ownership
102
+ - lease heartbeat/checkpoint state
103
+ - stale-lease recovery
104
+ - handoff requests between workstations
105
+ - shared runtime snapshots
106
+
107
+ ### Available, leased, and stale
108
+
109
+ The app and daemon use simple marker folders to represent workspace control:
110
+
111
+ - `available/<workspace_id>` means nobody currently owns the workspace
112
+ - `leased/<workspace_id>` means one machine currently owns it
113
+ - `stale/...` is where stale leased markers are quarantined during recovery
114
+
115
+ Only one workstation should actively own a workspace at a time.
116
+
117
+ ### Lease metadata and heartbeat
118
+
119
+ When a daemon owns a workspace, it writes lease metadata in:
120
+
121
+ - `.workspace_state/leases/<workspace_id>.parquet`
122
+
123
+ That metadata includes:
124
+
125
+ - workspace id
126
+ - machine id / host name
127
+ - daemon id
128
+ - PID
129
+ - status
130
+ - started time
131
+ - last checkpoint time
132
+ - app version
133
+ - snapshot generation id
134
+
135
+ The checkpoint time is the heartbeat signal. The daemon refreshes it during normal operation so other clients can tell:
136
+
137
+ - the workspace is still controlled
138
+ - who controls it
139
+ - whether the controlling daemon looks healthy
140
+
141
+ The control model currently uses:
142
+
143
+ - a target checkpoint interval of 30 seconds
144
+ - a stale threshold of 90 seconds
145
+
146
+ Those numbers are part of the runtime domain model, not an arbitrary UI convention.
147
+
148
+ ### Shared runtime snapshots
149
+
150
+ The shared runtime snapshot is written into parquet files beneath:
151
+
152
+ - `.workspace_state/state/runs/`
153
+ - `.workspace_state/state/step_runs/`
154
+ - `.workspace_state/state/logs/`
155
+ - `.workspace_state/state/file_state/`
156
+
157
+ These files let one workstation publish the current runtime picture so another workstation can hydrate a local read model without owning the workspace.
158
+
159
+ This is how the app can show meaningful status while observing another machine's daemon rather than controlling the workspace directly.
160
+
161
+ ## Local state vs workspace state
162
+
163
+ Data Engine uses both shared workspace state and machine-local state.
164
+
165
+ ### Shared workspace state
166
+
167
+ Shared workspace state lives inside the authored workspace under `.workspace_state/`.
168
+
169
+ It exists so multiple workstations can coordinate around:
170
+
171
+ - control ownership
172
+ - control requests
173
+ - shared run history snapshots
174
+ - shared logs
175
+ - file freshness state
176
+
177
+ ### Machine-local state
178
+
179
+ Machine-local state lives under the app runtime root and local settings store.
180
+
181
+ This includes:
182
+
183
+ - the local SQLite runtime ledger for the currently selected workspace
184
+ - compiled flow-module artifacts
185
+ - runtime caches
186
+ - daemon log files
187
+ - app-local workspace selection and collection-root settings
188
+
189
+ The local runtime ledger path is resolved per workspace. It is machine-local, not shared.
190
+
191
+ That local ledger is important because the desktop app needs a fast local read model even when the authoritative daemon is elsewhere.
192
+
193
+ ### Why both exist
194
+
195
+ The split gives the system two useful properties:
196
+
197
+ - one workstation can own and publish runtime state for a workspace
198
+ - another workstation can still open the workspace and observe it without taking control
199
+
200
+ It also keeps the authored workspace from becoming a dumping ground for every cache and local artifact.
201
+
202
+ ## Control, handoff, and control requests
203
+
204
+ Workspace control is intentionally conservative.
205
+
206
+ The basic model is:
207
+
208
+ 1. a workstation claims the workspace
209
+ 2. that workstation's daemon becomes the active owner
210
+ 3. it keeps the lease alive through checkpoints
211
+ 4. other workstations observe that the workspace is leased
212
+
213
+ If another workstation wants control, it can request it. Those requests are written to:
214
+
215
+ - `.workspace_state/control_requests/<workspace_id>.parquet`
216
+
217
+ A control request records:
218
+
219
+ - requester machine id
220
+ - requester host name
221
+ - requester pid
222
+ - requester client kind
223
+ - request time
224
+
225
+ The app surfaces this as "control requested" rather than silently stealing ownership.
226
+
227
+ ### Handoff and takeover
228
+
229
+ The control UI distinguishes between:
230
+
231
+ - local ownership
232
+ - another machine owning the workspace
233
+ - a pending local request for takeover
234
+ - takeover becoming available after the remote lease appears stale
235
+
236
+ That behavior comes from `WorkspaceControlState`, which derives operator-facing status from:
237
+
238
+ - the last daemon snapshot
239
+ - whether the daemon is live
240
+ - the current lease metadata checkpoint age
241
+ - any pending control request
242
+
243
+ ### When a takeover is available
244
+
245
+ If a workspace is leased but the last checkpoint is older than the stale threshold, the UI can surface takeover availability.
246
+
247
+ The system can also quarantine stale lease state and recover it into the `stale/` area before reclaiming the workspace.
248
+
249
+ ## The daemon and the selected workspace
250
+
251
+ The desktop app talks to a per-workspace local daemon.
252
+
253
+ For GUI use, the daemon lifecycle is intentionally ephemeral:
254
+
255
+ - it is created for the selected workspace as needed
256
+ - it can survive workspace switches when active work is still running
257
+ - it is not supposed to linger forever just because the GUI once touched that workspace
258
+
259
+ The important behavior is this:
260
+
261
+ - switching away from a workspace should not tear down active work
262
+ - switching back should rehydrate the selected workspace's daemon state immediately
263
+
264
+ That immediate rehydration is what keeps engine state, manual runs, and control state accurate after a workspace switch.
265
+
266
+ ## Workspace selection
267
+
268
+ The workspace selector in the app chooses which authored workspace the window is currently bound to.
269
+
270
+ When you switch workspaces, the app:
271
+
272
+ - closes workspace-scoped preview dialogs
273
+ - invalidates stale deferred message-box callbacks
274
+ - hides the selector popup
275
+ - queues the actual rebind one Qt tick later
276
+
277
+ That last step is important because it lets the native combo-box popup finish closing before the rest of the workspace state is rebuilt.
278
+
279
+ Practically, the selected workspace governs:
280
+
281
+ - which flows are loaded
282
+ - which runtime ledger is open
283
+ - which daemon is being queried or controlled
284
+ - which logs and runs are visible in the main view
285
+ - which workspace-relative `context.config(...)` and `context.database(...)` calls make sense during authoring
286
+
287
+ ## Workspace provisioning
288
+
289
+ Provisioning is deliberately safe and additive.
290
+
291
+ Provisioning a workspace creates missing conventional folders without overwriting existing files:
292
+
293
+ - `flow_modules/`
294
+ - `flow_modules/helpers/`
295
+ - `config/`
296
+ - `databases/`
297
+ - `.vscode/settings.json`
298
+
299
+ Provisioning also writes a `.vscode/settings.json` at the collection root.
300
+
301
+ If those files already exist, the provisioning service preserves them by default rather than overwriting them.
302
+
303
+ This is meant to make a new workspace usable immediately without turning provisioning into a heavy bootstrap system.
304
+
305
+ ## VS Code provisioning
306
+
307
+ Data Engine now writes VS Code settings in two places:
308
+
309
+ - at the workspace collection root
310
+ - at the individual authored workspace root
311
+
312
+ Both settings files use a workspace-relative interpreter:
313
+
314
+ ```json
315
+ "python.defaultInterpreterPath": "${workspaceFolder}/.venv"
316
+ ```
317
+
318
+ That makes the settings portable across workstations as long as each workstation keeps its venv in the same relative place.
319
+
320
+ The generated settings also:
321
+
322
+ - hide `.workspace_state` from Explorer and search
323
+ - set terminal environment variables for Data Engine paths
324
+ - add `src/` to `python.analysis.extraPaths` when running from a checkout
325
+ - enable pytest configuration when a checkout-local `tests/` folder exists
326
+
327
+ The collection-root settings are for the "open the whole workspace collection in VS Code" workflow.
328
+
329
+ The authored-workspace settings are for the "open just one workspace" workflow.
330
+
331
+ ## Logging and run history
332
+
333
+ There are a few different log and history concepts that are easy to blur together.
334
+
335
+ ### Shared runtime logs
336
+
337
+ The daemon publishes shared log snapshots into `.workspace_state/state/logs/`.
338
+
339
+ Those snapshots are part of the shared runtime picture used by observing clients.
340
+
341
+ ### Local runtime ledger
342
+
343
+ The selected workspace also has a machine-local SQLite runtime ledger. That is the app's fast local runtime store and is what powers most local querying, hydrated snapshots, and UI views.
344
+
345
+ ### GUI run history limits
346
+
347
+ The GUI intentionally limits how much visible run history it renders at once. The current run-history sidebar/view is capped to 100 visible run groups in the UI.
348
+
349
+ That cap is a presentation choice, not a statement that only 100 runs exist.
350
+
351
+ ### "Runs last 30 days"
352
+
353
+ The small footer tag on the home view shows:
354
+
355
+ - modules
356
+ - groups
357
+ - flows
358
+ - runs in the last 30 days
359
+
360
+ That 30-day value is a summary count for the currently selected workspace. It is not a documented retention policy for deleting logs or runs.
361
+
362
+ ## The kill switch
363
+
364
+ The Settings pane exposes an emergency kill switch for the selected workspace daemon.
365
+
366
+ This is intentionally coarse.
367
+
368
+ It does not try to kill one hung step or one worker thread. Instead it:
369
+
370
+ 1. asks the daemon to shut down normally
371
+ 2. waits briefly for a graceful exit
372
+ 3. force-kills the daemon process if it is still alive
373
+ 4. performs best-effort cleanup of local daemon/lease state
374
+
375
+ That is the right emergency tool when a flow is stuck inside a blocking native call or an uninterruptible external library path.
376
+
377
+ It is intentionally user-driven. The system does not try to infer "stuck" from heuristics before surfacing the action.
378
+
379
+ ## How this affects flow authors
380
+
381
+ The important authoring consequence is that a flow module is only one part of the overall system.
382
+
383
+ Your flow code runs inside:
384
+
385
+ - one authored workspace
386
+ - one selected app binding
387
+ - one daemon or manual run context
388
+ - one shared-control model
389
+
390
+ That is why the `FlowContext` surface is so valuable:
391
+
392
+ - `context.source` and `context.mirror` understand source-relative and output-relative paths
393
+ - `context.config` gives you structured workspace-local TOML config
394
+ - `context.database(...)` gives you a conventional workspace-local database path
395
+ - `context.metadata` lets you publish runtime details back into the UI/runtime model
396
+
397
+ For the authoring-level details, continue with [FlowContext](flow-context.md).
@@ -0,0 +1,215 @@
1
+ # Authoring Flow Modules
2
+
3
+ Flow modules live in:
4
+
5
+ - `workspaces/<workspace_id>/flow_modules/<name>.ipynb`
6
+ - `workspaces/<workspace_id>/flow_modules/<name>.py`
7
+
8
+ Reusable helper modules can live in:
9
+
10
+ - `workspaces/<workspace_id>/flow_modules/flow_helpers/<name>.py`
11
+
12
+ Each flow module should export:
13
+
14
+ - optional `DESCRIPTION`
15
+ - `build() -> Flow`
16
+
17
+ The flow-module filename is the durable flow identity used by discovery and runtime state. If you rename the file, you are effectively creating a different flow as far as the system is concerned.
18
+
19
+ ## Required contract
20
+
21
+ ```python
22
+ from data_engine import Flow
23
+
24
+ DESCRIPTION = "Reads workbook inputs and writes mirrored parquet outputs."
25
+
26
+
27
+ def build():
28
+ return Flow(group="Claims")
29
+ ```
30
+
31
+ When you want a custom display title in the UI, set `label=` on the returned `Flow(...)`. Otherwise the UI derives a readable title from the flow-module filename.
32
+
33
+ `build()` must not accept any parameters.
34
+
35
+ Keep module import-time code side-effect free. The app needs to discover flows safely and repeatedly, so top-level code should not:
36
+
37
+ - run queries
38
+ - write files
39
+ - start background work
40
+ - depend on interactive state
41
+
42
+ Do that work inside steps instead.
43
+
44
+ ## Step style
45
+
46
+ Every `step(...)` callable receives one `context` argument:
47
+
48
+ ```python
49
+ def read_claims(context):
50
+ ...
51
+
52
+
53
+ def clean_claims(context):
54
+ ...
55
+ ```
56
+
57
+ `map(...)` and `step_each(...)` are the batch-oriented exception. They accept either:
58
+
59
+ ```python
60
+ def validate_pdf(file_ref):
61
+ ...
62
+
63
+
64
+ def validate_pdf_with_context(context, file_ref):
65
+ ...
66
+ ```
67
+
68
+ `map(...)` always returns a `Batch`, and `step_each(...)` is the equivalent alias. Both raise immediately when the current batch is empty.
69
+
70
+ Use native libraries directly inside those steps:
71
+
72
+ - Polars for dataframe reads, transforms, and writes
73
+ - DuckDB for SQL and database work
74
+ - `pathlib` and normal Python for filesystem logic
75
+
76
+ That simplicity is the intended authoring experience. Flow modules should feel like normal Python modules with a small orchestration surface, not like a second programming language.
77
+
78
+ ## Good patterns
79
+
80
+ - keep import-time code side-effect free
81
+ - keep expensive work inside steps
82
+ - use `save_as=` and `use=` to preserve intermediate objects
83
+ - use `build().preview(use="name")` in notebooks when you want to inspect one saved intermediate object quickly
84
+ - use `collect(...)` when you want a batch of files
85
+ - use `map(...)` or `step_each(...)` when the same callable should run once per batch item
86
+ - use `context.source` for source-relative paths
87
+ - use `context.mirror` for write-ready output paths
88
+ - return the written `Path` from output steps so the UI can enable `Inspect`
89
+ - move shared parsing, SQL, and utility code into `flow_modules/flow_helpers/*.py` and import it from flows with `from flow_helpers.<name> import ...`
90
+
91
+ Also good:
92
+
93
+ - use `context.config.require("name")` for required TOML config
94
+ - use `context.database("analytics/db.duckdb")` for workspace-local database paths
95
+ - record useful UI/runtime details in `context.metadata`
96
+ - keep writer steps narrow and explicit
97
+ - split "build data" and "write data" into separate steps when you want a previewable intermediate
98
+
99
+ Usually worth avoiding:
100
+
101
+ - monolithic steps that read, transform, and write everything at once
102
+ - hand-built relative path logic when `context.source` or `context.mirror` already models it
103
+ - hidden global state in helper modules
104
+ - returning a path that was never actually written
105
+
106
+ ## Helper modules
107
+
108
+ Helper modules are regular Python files under `flow_modules/flow_helpers/`. They are mirrored into compiled workspace artifacts and are importable from both notebook-authored and Python-authored flows.
109
+
110
+ Example:
111
+
112
+ ```python
113
+ # flow_modules/flow_helpers/claims_sql.py
114
+ LATEST_CLAIMS_SQL = "select * from claims where is_latest = true"
115
+ ```
116
+
117
+ ```python
118
+ # flow_modules/claims_report.py
119
+ from flow_helpers.claims_sql import LATEST_CLAIMS_SQL
120
+ from data_engine import Flow
121
+
122
+
123
+ def build():
124
+ return Flow(group="Claims")
125
+ ```
126
+
127
+ Files in `flow_modules/flow_helpers/` are not discovered as runnable flows. They exist only to support authored flow modules.
128
+
129
+ This is the right home for:
130
+
131
+ - shared SQL strings
132
+ - parsing helpers
133
+ - file naming utilities
134
+ - common dataframe transforms
135
+ - shared constants
136
+
137
+ It is not the right home for code that tries to secretly become a flow. If it should run independently and appear in the app, it belongs in its own flow module with its own `build()`.
138
+
139
+ ## Example
140
+
141
+ ```python
142
+ from data_engine import Flow
143
+ import polars as pl
144
+
145
+
146
+ def read_claims(file_ref):
147
+ return pl.read_excel(file_ref.path)
148
+
149
+
150
+ def concat_claims(context):
151
+ return pl.concat(context.current, how="vertical_relaxed")
152
+
153
+
154
+ def keep_completed(context):
155
+ return context.current.filter(pl.col("Step TO") == "COMPLETED")
156
+
157
+
158
+ def write_target(context):
159
+ output = context.mirror.file("example_completed.parquet")
160
+ context.current.write_parquet(output)
161
+ return output
162
+
163
+
164
+ def build():
165
+ return (
166
+ Flow(group="Claims")
167
+ .watch(mode="schedule", run_as="batch", interval="15m", source="../../example_data/Input/claims_flat")
168
+ .mirror(root="../../example_data/Output/example_completed")
169
+ .collect([".xlsx"], save_as="claim_files")
170
+ .map(read_claims, use="claim_files", save_as="claim_frames")
171
+ .step(concat_claims, use="claim_frames", save_as="raw_df")
172
+ .step(keep_completed, use="raw_df", save_as="clean_df")
173
+ .step(write_target, use="clean_df")
174
+ )
175
+ ```
176
+
177
+ That example shows `map(...)` in context:
178
+
179
+ - `collect(...)` gathers a batch of `FileRef` items
180
+ - `map(...)` reads each file into one dataframe
181
+ - later `step(...)` callables operate on the whole batch result
182
+
183
+ There is no separate config layer that turns one flow module into multiple named flow variants after build time.
184
+
185
+ ## Notebook-authored vs Python-authored modules
186
+
187
+ Both notebook and Python flow modules participate in the same discovery model:
188
+
189
+ - they export one `build() -> Flow`
190
+ - they can import helper modules
191
+ - they compile into runtime-ready Python modules
192
+
193
+ Python modules are usually better for:
194
+
195
+ - shared flows
196
+ - helper-heavy logic
197
+ - larger code review surfaces
198
+
199
+ Notebooks are usually better for:
200
+
201
+ - exploratory authoring
202
+ - iterative preview-driven development
203
+ - flows that benefit from inline inspection while being built
204
+
205
+ ## A practical authoring checklist
206
+
207
+ Before calling a flow module "done," it is worth checking:
208
+
209
+ - `build()` returns one `Flow`
210
+ - the module imports cleanly with no side effects
211
+ - the step labels are readable in the UI
212
+ - saved object names are meaningful
213
+ - required config is documented or obvious
214
+ - writer steps return actual existing paths when you want inspectability
215
+ - any helper modules sit under `flow_modules/flow_helpers/`