py-data-engine 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data_engine/__init__.py +37 -0
- data_engine/application/__init__.py +39 -0
- data_engine/application/actions.py +42 -0
- data_engine/application/catalog.py +151 -0
- data_engine/application/control.py +213 -0
- data_engine/application/details.py +73 -0
- data_engine/application/runtime.py +449 -0
- data_engine/application/workspace.py +62 -0
- data_engine/authoring/__init__.py +14 -0
- data_engine/authoring/builder.py +31 -0
- data_engine/authoring/execution/__init__.py +6 -0
- data_engine/authoring/execution/app.py +6 -0
- data_engine/authoring/execution/context.py +82 -0
- data_engine/authoring/execution/continuous.py +176 -0
- data_engine/authoring/execution/grouped.py +106 -0
- data_engine/authoring/execution/logging.py +83 -0
- data_engine/authoring/execution/polling.py +135 -0
- data_engine/authoring/execution/runner.py +210 -0
- data_engine/authoring/execution/single.py +171 -0
- data_engine/authoring/flow.py +361 -0
- data_engine/authoring/helpers.py +160 -0
- data_engine/authoring/model.py +59 -0
- data_engine/authoring/primitives.py +430 -0
- data_engine/authoring/services.py +42 -0
- data_engine/devtools/__init__.py +3 -0
- data_engine/devtools/project_ast_map.py +503 -0
- data_engine/docs/__init__.py +1 -0
- data_engine/docs/sphinx_source/_static/custom.css +13 -0
- data_engine/docs/sphinx_source/api.rst +42 -0
- data_engine/docs/sphinx_source/conf.py +37 -0
- data_engine/docs/sphinx_source/guides/app-runtime-and-workspaces.md +397 -0
- data_engine/docs/sphinx_source/guides/authoring-flow-modules.md +215 -0
- data_engine/docs/sphinx_source/guides/configuring-flows.md +185 -0
- data_engine/docs/sphinx_source/guides/core-concepts.md +208 -0
- data_engine/docs/sphinx_source/guides/database-methods.md +107 -0
- data_engine/docs/sphinx_source/guides/duckdb-helpers.md +462 -0
- data_engine/docs/sphinx_source/guides/flow-context.md +538 -0
- data_engine/docs/sphinx_source/guides/flow-methods.md +206 -0
- data_engine/docs/sphinx_source/guides/getting-started.md +271 -0
- data_engine/docs/sphinx_source/guides/project-inventory.md +5683 -0
- data_engine/docs/sphinx_source/guides/project-map.md +118 -0
- data_engine/docs/sphinx_source/guides/recipes.md +268 -0
- data_engine/docs/sphinx_source/index.rst +22 -0
- data_engine/domain/__init__.py +92 -0
- data_engine/domain/actions.py +69 -0
- data_engine/domain/catalog.py +128 -0
- data_engine/domain/details.py +214 -0
- data_engine/domain/diagnostics.py +56 -0
- data_engine/domain/errors.py +104 -0
- data_engine/domain/inspection.py +99 -0
- data_engine/domain/logs.py +118 -0
- data_engine/domain/operations.py +172 -0
- data_engine/domain/operator.py +72 -0
- data_engine/domain/runs.py +155 -0
- data_engine/domain/runtime.py +279 -0
- data_engine/domain/source_state.py +17 -0
- data_engine/domain/support.py +54 -0
- data_engine/domain/time.py +23 -0
- data_engine/domain/workspace.py +159 -0
- data_engine/flow_modules/__init__.py +1 -0
- data_engine/flow_modules/flow_module_compiler.py +179 -0
- data_engine/flow_modules/flow_module_loader.py +201 -0
- data_engine/helpers/__init__.py +25 -0
- data_engine/helpers/duckdb.py +705 -0
- data_engine/hosts/__init__.py +1 -0
- data_engine/hosts/daemon/__init__.py +23 -0
- data_engine/hosts/daemon/app.py +221 -0
- data_engine/hosts/daemon/bootstrap.py +69 -0
- data_engine/hosts/daemon/client.py +465 -0
- data_engine/hosts/daemon/commands.py +64 -0
- data_engine/hosts/daemon/composition.py +310 -0
- data_engine/hosts/daemon/constants.py +15 -0
- data_engine/hosts/daemon/entrypoints.py +97 -0
- data_engine/hosts/daemon/lifecycle.py +191 -0
- data_engine/hosts/daemon/manager.py +272 -0
- data_engine/hosts/daemon/ownership.py +126 -0
- data_engine/hosts/daemon/runtime_commands.py +188 -0
- data_engine/hosts/daemon/runtime_control.py +31 -0
- data_engine/hosts/daemon/server.py +84 -0
- data_engine/hosts/daemon/shared_state.py +147 -0
- data_engine/hosts/daemon/state_sync.py +101 -0
- data_engine/platform/__init__.py +1 -0
- data_engine/platform/identity.py +35 -0
- data_engine/platform/local_settings.py +146 -0
- data_engine/platform/theme.py +259 -0
- data_engine/platform/workspace_models.py +190 -0
- data_engine/platform/workspace_policy.py +333 -0
- data_engine/runtime/__init__.py +1 -0
- data_engine/runtime/file_watch.py +185 -0
- data_engine/runtime/ledger_models.py +116 -0
- data_engine/runtime/runtime_db.py +938 -0
- data_engine/runtime/shared_state.py +523 -0
- data_engine/services/__init__.py +49 -0
- data_engine/services/daemon.py +64 -0
- data_engine/services/daemon_state.py +40 -0
- data_engine/services/flow_catalog.py +102 -0
- data_engine/services/flow_execution.py +48 -0
- data_engine/services/ledger.py +85 -0
- data_engine/services/logs.py +65 -0
- data_engine/services/runtime_binding.py +105 -0
- data_engine/services/runtime_execution.py +126 -0
- data_engine/services/runtime_history.py +62 -0
- data_engine/services/settings.py +58 -0
- data_engine/services/shared_state.py +28 -0
- data_engine/services/theme.py +59 -0
- data_engine/services/workspace_provisioning.py +224 -0
- data_engine/services/workspaces.py +74 -0
- data_engine/ui/__init__.py +3 -0
- data_engine/ui/cli/__init__.py +19 -0
- data_engine/ui/cli/app.py +161 -0
- data_engine/ui/cli/commands_doctor.py +178 -0
- data_engine/ui/cli/commands_run.py +80 -0
- data_engine/ui/cli/commands_start.py +100 -0
- data_engine/ui/cli/commands_workspace.py +97 -0
- data_engine/ui/cli/dependencies.py +44 -0
- data_engine/ui/cli/parser.py +56 -0
- data_engine/ui/gui/__init__.py +25 -0
- data_engine/ui/gui/app.py +116 -0
- data_engine/ui/gui/bootstrap.py +487 -0
- data_engine/ui/gui/bootstrapper.py +140 -0
- data_engine/ui/gui/cache_models.py +23 -0
- data_engine/ui/gui/control_support.py +185 -0
- data_engine/ui/gui/controllers/__init__.py +6 -0
- data_engine/ui/gui/controllers/flows.py +439 -0
- data_engine/ui/gui/controllers/runtime.py +245 -0
- data_engine/ui/gui/dialogs/__init__.py +12 -0
- data_engine/ui/gui/dialogs/messages.py +88 -0
- data_engine/ui/gui/dialogs/previews.py +222 -0
- data_engine/ui/gui/helpers/__init__.py +62 -0
- data_engine/ui/gui/helpers/inspection.py +81 -0
- data_engine/ui/gui/helpers/lifecycle.py +112 -0
- data_engine/ui/gui/helpers/scroll.py +28 -0
- data_engine/ui/gui/helpers/theming.py +87 -0
- data_engine/ui/gui/icons/dark_light.svg +12 -0
- data_engine/ui/gui/icons/documentation.svg +1 -0
- data_engine/ui/gui/icons/failed.svg +3 -0
- data_engine/ui/gui/icons/group.svg +4 -0
- data_engine/ui/gui/icons/home.svg +2 -0
- data_engine/ui/gui/icons/manual.svg +2 -0
- data_engine/ui/gui/icons/poll.svg +2 -0
- data_engine/ui/gui/icons/schedule.svg +4 -0
- data_engine/ui/gui/icons/settings.svg +2 -0
- data_engine/ui/gui/icons/started.svg +3 -0
- data_engine/ui/gui/icons/success.svg +3 -0
- data_engine/ui/gui/icons/view-log.svg +3 -0
- data_engine/ui/gui/icons.py +50 -0
- data_engine/ui/gui/launcher.py +48 -0
- data_engine/ui/gui/presenters/__init__.py +72 -0
- data_engine/ui/gui/presenters/docs.py +140 -0
- data_engine/ui/gui/presenters/logs.py +58 -0
- data_engine/ui/gui/presenters/runtime_projection.py +29 -0
- data_engine/ui/gui/presenters/sidebar.py +88 -0
- data_engine/ui/gui/presenters/steps.py +148 -0
- data_engine/ui/gui/presenters/workspace.py +39 -0
- data_engine/ui/gui/presenters/workspace_binding.py +75 -0
- data_engine/ui/gui/presenters/workspace_settings.py +182 -0
- data_engine/ui/gui/preview_models.py +37 -0
- data_engine/ui/gui/render_support.py +241 -0
- data_engine/ui/gui/rendering/__init__.py +12 -0
- data_engine/ui/gui/rendering/artifacts.py +95 -0
- data_engine/ui/gui/rendering/icons.py +50 -0
- data_engine/ui/gui/runtime.py +47 -0
- data_engine/ui/gui/state_support.py +193 -0
- data_engine/ui/gui/support.py +214 -0
- data_engine/ui/gui/surface.py +209 -0
- data_engine/ui/gui/theme.py +720 -0
- data_engine/ui/gui/widgets/__init__.py +34 -0
- data_engine/ui/gui/widgets/config.py +41 -0
- data_engine/ui/gui/widgets/logs.py +62 -0
- data_engine/ui/gui/widgets/panels.py +507 -0
- data_engine/ui/gui/widgets/sidebar.py +130 -0
- data_engine/ui/gui/widgets/steps.py +84 -0
- data_engine/ui/tui/__init__.py +5 -0
- data_engine/ui/tui/app.py +222 -0
- data_engine/ui/tui/bootstrap.py +475 -0
- data_engine/ui/tui/bootstrapper.py +117 -0
- data_engine/ui/tui/controllers/__init__.py +6 -0
- data_engine/ui/tui/controllers/flows.py +349 -0
- data_engine/ui/tui/controllers/runtime.py +167 -0
- data_engine/ui/tui/runtime.py +34 -0
- data_engine/ui/tui/state_support.py +141 -0
- data_engine/ui/tui/support.py +63 -0
- data_engine/ui/tui/theme.py +204 -0
- data_engine/ui/tui/widgets.py +123 -0
- data_engine/views/__init__.py +109 -0
- data_engine/views/actions.py +80 -0
- data_engine/views/artifacts.py +58 -0
- data_engine/views/flow_display.py +69 -0
- data_engine/views/logs.py +54 -0
- data_engine/views/models.py +96 -0
- data_engine/views/presentation.py +133 -0
- data_engine/views/runs.py +62 -0
- data_engine/views/state.py +39 -0
- data_engine/views/status.py +13 -0
- data_engine/views/text.py +109 -0
- py_data_engine-0.1.0.dist-info/METADATA +330 -0
- py_data_engine-0.1.0.dist-info/RECORD +200 -0
- py_data_engine-0.1.0.dist-info/WHEEL +5 -0
- py_data_engine-0.1.0.dist-info/entry_points.txt +2 -0
- py_data_engine-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,397 @@
|
|
|
1
|
+
# App Runtime and Workspaces
|
|
2
|
+
|
|
3
|
+
This guide explains how the desktop app, authored workspaces, shared workspace state, and machine-local runtime state fit together.
|
|
4
|
+
|
|
5
|
+
If you are writing flows, this is the missing "how the whole thing hangs together" page.
|
|
6
|
+
|
|
7
|
+
## The two roots to keep in mind
|
|
8
|
+
|
|
9
|
+
There are usually two important folders:
|
|
10
|
+
|
|
11
|
+
- the workspace collection root
|
|
12
|
+
- one authored workspace inside that collection
|
|
13
|
+
|
|
14
|
+
Example:
|
|
15
|
+
|
|
16
|
+
```text
|
|
17
|
+
workspaces/
|
|
18
|
+
example_workspace/
|
|
19
|
+
flow_modules/
|
|
20
|
+
flow_modules/flow_helpers/
|
|
21
|
+
config/
|
|
22
|
+
databases/
|
|
23
|
+
.workspace_state/
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
The collection root is the parent folder that contains one or more authored workspaces.
|
|
27
|
+
|
|
28
|
+
The authored workspace is the folder that contains the authoring surface for one logical workspace:
|
|
29
|
+
|
|
30
|
+
- `flow_modules/`
|
|
31
|
+
- `flow_modules/flow_helpers/`
|
|
32
|
+
- `config/`
|
|
33
|
+
- `databases/`
|
|
34
|
+
|
|
35
|
+
That authored workspace is what the app binds to when you select a workspace in the UI.
|
|
36
|
+
|
|
37
|
+
## How the app is structured
|
|
38
|
+
|
|
39
|
+
The desktop app is a single-window operator surface that binds to one authored workspace at a time.
|
|
40
|
+
|
|
41
|
+
When you change the selected workspace, the app rebinds:
|
|
42
|
+
|
|
43
|
+
- workspace paths
|
|
44
|
+
- flow discovery
|
|
45
|
+
- daemon client and daemon manager
|
|
46
|
+
- local runtime ledger
|
|
47
|
+
- visible run history and log views
|
|
48
|
+
- control state and lease state
|
|
49
|
+
|
|
50
|
+
This means the app is multi-workspace for discovery and selection, but single-workspace for active runtime context.
|
|
51
|
+
|
|
52
|
+
That distinction matters when you are reasoning about:
|
|
53
|
+
|
|
54
|
+
- what is cheap to inspect globally
|
|
55
|
+
- what is authoritative for the currently selected workspace
|
|
56
|
+
- why the UI can feel like one workspace "becomes" the app until you switch again
|
|
57
|
+
|
|
58
|
+
## Authored files vs generated runtime artifacts
|
|
59
|
+
|
|
60
|
+
The authored workspace is intentionally small and human-owned.
|
|
61
|
+
|
|
62
|
+
Author-owned folders:
|
|
63
|
+
|
|
64
|
+
- `flow_modules/`: runnable flow modules
|
|
65
|
+
- `flow_modules/flow_helpers/`: reusable helper code imported by flows
|
|
66
|
+
- `config/`: workspace-local TOML config files
|
|
67
|
+
- `databases/`: a conventional home for workspace-local database files
|
|
68
|
+
|
|
69
|
+
Generated or runtime-managed state lives elsewhere:
|
|
70
|
+
|
|
71
|
+
- shared workspace state inside `.workspace_state/`
|
|
72
|
+
- machine-local runtime artifacts under the app runtime root
|
|
73
|
+
|
|
74
|
+
That split is deliberate:
|
|
75
|
+
|
|
76
|
+
- the authored workspace is what you share, edit, and reason about
|
|
77
|
+
- runtime caches and ledgers are free to be machine-local and disposable
|
|
78
|
+
|
|
79
|
+
## Shared workspace state
|
|
80
|
+
|
|
81
|
+
Every authored workspace can also contain a shared control and checkpoint folder:
|
|
82
|
+
|
|
83
|
+
```text
|
|
84
|
+
.workspace_state/
|
|
85
|
+
available/
|
|
86
|
+
leased/
|
|
87
|
+
stale/
|
|
88
|
+
leases/
|
|
89
|
+
control_requests/
|
|
90
|
+
state/
|
|
91
|
+
runs/
|
|
92
|
+
step_runs/
|
|
93
|
+
logs/
|
|
94
|
+
file_state/
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
This is the workspace-coordination layer.
|
|
98
|
+
|
|
99
|
+
It is used for:
|
|
100
|
+
|
|
101
|
+
- control ownership
|
|
102
|
+
- lease heartbeat/checkpoint state
|
|
103
|
+
- stale-lease recovery
|
|
104
|
+
- handoff requests between workstations
|
|
105
|
+
- shared runtime snapshots
|
|
106
|
+
|
|
107
|
+
### Available, leased, and stale
|
|
108
|
+
|
|
109
|
+
The app and daemon use simple marker folders to represent workspace control:
|
|
110
|
+
|
|
111
|
+
- `available/<workspace_id>` means nobody currently owns the workspace
|
|
112
|
+
- `leased/<workspace_id>` means one machine currently owns it
|
|
113
|
+
- `stale/...` is where stale leased markers are quarantined during recovery
|
|
114
|
+
|
|
115
|
+
Only one workstation should actively own a workspace at a time.
|
|
116
|
+
|
|
117
|
+
### Lease metadata and heartbeat
|
|
118
|
+
|
|
119
|
+
When a daemon owns a workspace, it writes lease metadata in:
|
|
120
|
+
|
|
121
|
+
- `.workspace_state/leases/<workspace_id>.parquet`
|
|
122
|
+
|
|
123
|
+
That metadata includes:
|
|
124
|
+
|
|
125
|
+
- workspace id
|
|
126
|
+
- machine id / host name
|
|
127
|
+
- daemon id
|
|
128
|
+
- PID
|
|
129
|
+
- status
|
|
130
|
+
- started time
|
|
131
|
+
- last checkpoint time
|
|
132
|
+
- app version
|
|
133
|
+
- snapshot generation id
|
|
134
|
+
|
|
135
|
+
The checkpoint time is the heartbeat signal. The daemon refreshes it during normal operation so other clients can tell:
|
|
136
|
+
|
|
137
|
+
- the workspace is still controlled
|
|
138
|
+
- who controls it
|
|
139
|
+
- whether the controlling daemon looks healthy
|
|
140
|
+
|
|
141
|
+
The control model currently uses:
|
|
142
|
+
|
|
143
|
+
- a target checkpoint interval of 30 seconds
|
|
144
|
+
- a stale threshold of 90 seconds
|
|
145
|
+
|
|
146
|
+
Those numbers are part of the runtime domain model, not an arbitrary UI convention.
|
|
147
|
+
|
|
148
|
+
### Shared runtime snapshots
|
|
149
|
+
|
|
150
|
+
The shared runtime snapshot is written into parquet files beneath:
|
|
151
|
+
|
|
152
|
+
- `.workspace_state/state/runs/`
|
|
153
|
+
- `.workspace_state/state/step_runs/`
|
|
154
|
+
- `.workspace_state/state/logs/`
|
|
155
|
+
- `.workspace_state/state/file_state/`
|
|
156
|
+
|
|
157
|
+
These files let one workstation publish the current runtime picture so another workstation can hydrate a local read model without owning the workspace.
|
|
158
|
+
|
|
159
|
+
This is how the app can show meaningful status while observing another machine's daemon rather than controlling the workspace directly.
|
|
160
|
+
|
|
161
|
+
## Local state vs workspace state
|
|
162
|
+
|
|
163
|
+
Data Engine uses both shared workspace state and machine-local state.
|
|
164
|
+
|
|
165
|
+
### Shared workspace state
|
|
166
|
+
|
|
167
|
+
Shared workspace state lives inside the authored workspace under `.workspace_state/`.
|
|
168
|
+
|
|
169
|
+
It exists so multiple workstations can coordinate around:
|
|
170
|
+
|
|
171
|
+
- control ownership
|
|
172
|
+
- control requests
|
|
173
|
+
- shared run history snapshots
|
|
174
|
+
- shared logs
|
|
175
|
+
- file freshness state
|
|
176
|
+
|
|
177
|
+
### Machine-local state
|
|
178
|
+
|
|
179
|
+
Machine-local state lives under the app runtime root and local settings store.
|
|
180
|
+
|
|
181
|
+
This includes:
|
|
182
|
+
|
|
183
|
+
- the local SQLite runtime ledger for the currently selected workspace
|
|
184
|
+
- compiled flow-module artifacts
|
|
185
|
+
- runtime caches
|
|
186
|
+
- daemon log files
|
|
187
|
+
- app-local workspace selection and collection-root settings
|
|
188
|
+
|
|
189
|
+
The local runtime ledger path is resolved per workspace. It is machine-local, not shared.
|
|
190
|
+
|
|
191
|
+
That local ledger is important because the desktop app needs a fast local read model even when the authoritative daemon is elsewhere.
|
|
192
|
+
|
|
193
|
+
### Why both exist
|
|
194
|
+
|
|
195
|
+
The split gives the system two useful properties:
|
|
196
|
+
|
|
197
|
+
- one workstation can own and publish runtime state for a workspace
|
|
198
|
+
- another workstation can still open the workspace and observe it without taking control
|
|
199
|
+
|
|
200
|
+
It also keeps the authored workspace from becoming a dumping ground for every cache and local artifact.
|
|
201
|
+
|
|
202
|
+
## Control, handoff, and control requests
|
|
203
|
+
|
|
204
|
+
Workspace control is intentionally conservative.
|
|
205
|
+
|
|
206
|
+
The basic model is:
|
|
207
|
+
|
|
208
|
+
1. a workstation claims the workspace
|
|
209
|
+
2. that workstation's daemon becomes the active owner
|
|
210
|
+
3. it keeps the lease alive through checkpoints
|
|
211
|
+
4. other workstations observe that the workspace is leased
|
|
212
|
+
|
|
213
|
+
If another workstation wants control, it can request it. Those requests are written to:
|
|
214
|
+
|
|
215
|
+
- `.workspace_state/control_requests/<workspace_id>.parquet`
|
|
216
|
+
|
|
217
|
+
A control request records:
|
|
218
|
+
|
|
219
|
+
- requester machine id
|
|
220
|
+
- requester host name
|
|
221
|
+
- requester pid
|
|
222
|
+
- requester client kind
|
|
223
|
+
- request time
|
|
224
|
+
|
|
225
|
+
The app surfaces this as "control requested" rather than silently stealing ownership.
|
|
226
|
+
|
|
227
|
+
### Handoff and takeover
|
|
228
|
+
|
|
229
|
+
The control UI distinguishes between:
|
|
230
|
+
|
|
231
|
+
- local ownership
|
|
232
|
+
- another machine owning the workspace
|
|
233
|
+
- a pending local request for takeover
|
|
234
|
+
- takeover becoming available after the remote lease appears stale
|
|
235
|
+
|
|
236
|
+
That behavior comes from `WorkspaceControlState`, which derives operator-facing status from:
|
|
237
|
+
|
|
238
|
+
- the last daemon snapshot
|
|
239
|
+
- whether the daemon is live
|
|
240
|
+
- the current lease metadata checkpoint age
|
|
241
|
+
- any pending control request
|
|
242
|
+
|
|
243
|
+
### When a takeover is available
|
|
244
|
+
|
|
245
|
+
If a workspace is leased but the last checkpoint is older than the stale threshold, the UI can surface takeover availability.
|
|
246
|
+
|
|
247
|
+
The system can also quarantine stale lease state and recover it into the `stale/` area before reclaiming the workspace.
|
|
248
|
+
|
|
249
|
+
## The daemon and the selected workspace
|
|
250
|
+
|
|
251
|
+
The desktop app talks to a per-workspace local daemon.
|
|
252
|
+
|
|
253
|
+
For GUI use, the daemon lifecycle is intentionally ephemeral:
|
|
254
|
+
|
|
255
|
+
- it is created for the selected workspace as needed
|
|
256
|
+
- it can survive workspace switches when active work is still running
|
|
257
|
+
- it is not supposed to linger forever just because the GUI once touched that workspace
|
|
258
|
+
|
|
259
|
+
The important behavior is this:
|
|
260
|
+
|
|
261
|
+
- switching away from a workspace should not tear down active work
|
|
262
|
+
- switching back should rehydrate the selected workspace's daemon state immediately
|
|
263
|
+
|
|
264
|
+
That immediate rehydration is what keeps engine state, manual runs, and control state accurate after a workspace switch.
|
|
265
|
+
|
|
266
|
+
## Workspace selection
|
|
267
|
+
|
|
268
|
+
The workspace selector in the app chooses which authored workspace the window is currently bound to.
|
|
269
|
+
|
|
270
|
+
When you switch workspaces, the app:
|
|
271
|
+
|
|
272
|
+
- closes workspace-scoped preview dialogs
|
|
273
|
+
- invalidates stale deferred message-box callbacks
|
|
274
|
+
- hides the selector popup
|
|
275
|
+
- queues the actual rebind one Qt tick later
|
|
276
|
+
|
|
277
|
+
That last step is important because it lets the native combo-box popup finish closing before the rest of the workspace state is rebuilt.
|
|
278
|
+
|
|
279
|
+
Practically, the selected workspace governs:
|
|
280
|
+
|
|
281
|
+
- which flows are loaded
|
|
282
|
+
- which runtime ledger is open
|
|
283
|
+
- which daemon is being queried or controlled
|
|
284
|
+
- which logs and runs are visible in the main view
|
|
285
|
+
- which workspace-relative `context.config(...)` and `context.database(...)` calls make sense during authoring
|
|
286
|
+
|
|
287
|
+
## Workspace provisioning
|
|
288
|
+
|
|
289
|
+
Provisioning is deliberately safe and additive.
|
|
290
|
+
|
|
291
|
+
Provisioning a workspace creates missing conventional folders without overwriting existing files:
|
|
292
|
+
|
|
293
|
+
- `flow_modules/`
|
|
294
|
+
- `flow_modules/helpers/`
|
|
295
|
+
- `config/`
|
|
296
|
+
- `databases/`
|
|
297
|
+
- `.vscode/settings.json`
|
|
298
|
+
|
|
299
|
+
Provisioning also writes a `.vscode/settings.json` at the collection root.
|
|
300
|
+
|
|
301
|
+
If those files already exist, the provisioning service preserves them by default rather than overwriting them.
|
|
302
|
+
|
|
303
|
+
This is meant to make a new workspace usable immediately without turning provisioning into a heavy bootstrap system.
|
|
304
|
+
|
|
305
|
+
## VS Code provisioning
|
|
306
|
+
|
|
307
|
+
Data Engine now writes VS Code settings in two places:
|
|
308
|
+
|
|
309
|
+
- at the workspace collection root
|
|
310
|
+
- at the individual authored workspace root
|
|
311
|
+
|
|
312
|
+
Both settings files use a workspace-relative interpreter:
|
|
313
|
+
|
|
314
|
+
```json
|
|
315
|
+
"python.defaultInterpreterPath": "${workspaceFolder}/.venv"
|
|
316
|
+
```
|
|
317
|
+
|
|
318
|
+
That makes the settings portable across workstations as long as each workstation keeps its venv in the same relative place.
|
|
319
|
+
|
|
320
|
+
The generated settings also:
|
|
321
|
+
|
|
322
|
+
- hide `.workspace_state` from Explorer and search
|
|
323
|
+
- set terminal environment variables for Data Engine paths
|
|
324
|
+
- add `src/` to `python.analysis.extraPaths` when running from a checkout
|
|
325
|
+
- enable pytest configuration when a checkout-local `tests/` folder exists
|
|
326
|
+
|
|
327
|
+
The collection-root settings are for the "open the whole workspace collection in VS Code" workflow.
|
|
328
|
+
|
|
329
|
+
The authored-workspace settings are for the "open just one workspace" workflow.
|
|
330
|
+
|
|
331
|
+
## Logging and run history
|
|
332
|
+
|
|
333
|
+
There are a few different log and history concepts that are easy to blur together.
|
|
334
|
+
|
|
335
|
+
### Shared runtime logs
|
|
336
|
+
|
|
337
|
+
The daemon publishes shared log snapshots into `.workspace_state/state/logs/`.
|
|
338
|
+
|
|
339
|
+
Those snapshots are part of the shared runtime picture used by observing clients.
|
|
340
|
+
|
|
341
|
+
### Local runtime ledger
|
|
342
|
+
|
|
343
|
+
The selected workspace also has a machine-local SQLite runtime ledger. That is the app's fast local runtime store and is what powers most local querying, hydrated snapshots, and UI views.
|
|
344
|
+
|
|
345
|
+
### GUI run history limits
|
|
346
|
+
|
|
347
|
+
The GUI intentionally limits how much visible run history it renders at once. The current run-history sidebar/view is capped to 100 visible run groups in the UI.
|
|
348
|
+
|
|
349
|
+
That cap is a presentation choice, not a statement that only 100 runs exist.
|
|
350
|
+
|
|
351
|
+
### "Runs last 30 days"
|
|
352
|
+
|
|
353
|
+
The small footer tag on the home view shows:
|
|
354
|
+
|
|
355
|
+
- modules
|
|
356
|
+
- groups
|
|
357
|
+
- flows
|
|
358
|
+
- runs in the last 30 days
|
|
359
|
+
|
|
360
|
+
That 30-day value is a summary count for the currently selected workspace. It is not a documented retention policy for deleting logs or runs.
|
|
361
|
+
|
|
362
|
+
## The kill switch
|
|
363
|
+
|
|
364
|
+
The Settings pane exposes an emergency kill switch for the selected workspace daemon.
|
|
365
|
+
|
|
366
|
+
This is intentionally coarse.
|
|
367
|
+
|
|
368
|
+
It does not try to kill one hung step or one worker thread. Instead it:
|
|
369
|
+
|
|
370
|
+
1. asks the daemon to shut down normally
|
|
371
|
+
2. waits briefly for a graceful exit
|
|
372
|
+
3. force-kills the daemon process if it is still alive
|
|
373
|
+
4. performs best-effort cleanup of local daemon/lease state
|
|
374
|
+
|
|
375
|
+
That is the right emergency tool when a flow is stuck inside a blocking native call or an uninterruptible external library path.
|
|
376
|
+
|
|
377
|
+
It is intentionally user-driven. The system does not try to infer "stuck" from heuristics before surfacing the action.
|
|
378
|
+
|
|
379
|
+
## How this affects flow authors
|
|
380
|
+
|
|
381
|
+
The important authoring consequence is that a flow module is only one part of the overall system.
|
|
382
|
+
|
|
383
|
+
Your flow code runs inside:
|
|
384
|
+
|
|
385
|
+
- one authored workspace
|
|
386
|
+
- one selected app binding
|
|
387
|
+
- one daemon or manual run context
|
|
388
|
+
- one shared-control model
|
|
389
|
+
|
|
390
|
+
That is why the `FlowContext` surface is so valuable:
|
|
391
|
+
|
|
392
|
+
- `context.source` and `context.mirror` understand source-relative and output-relative paths
|
|
393
|
+
- `context.config` gives you structured workspace-local TOML config
|
|
394
|
+
- `context.database(...)` gives you a conventional workspace-local database path
|
|
395
|
+
- `context.metadata` lets you publish runtime details back into the UI/runtime model
|
|
396
|
+
|
|
397
|
+
For the authoring-level details, continue with [FlowContext](flow-context.md).
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
# Authoring Flow Modules
|
|
2
|
+
|
|
3
|
+
Flow modules live in:
|
|
4
|
+
|
|
5
|
+
- `workspaces/<workspace_id>/flow_modules/<name>.ipynb`
|
|
6
|
+
- `workspaces/<workspace_id>/flow_modules/<name>.py`
|
|
7
|
+
|
|
8
|
+
Reusable helper modules can live in:
|
|
9
|
+
|
|
10
|
+
- `workspaces/<workspace_id>/flow_modules/flow_helpers/<name>.py`
|
|
11
|
+
|
|
12
|
+
Each flow module should export:
|
|
13
|
+
|
|
14
|
+
- optional `DESCRIPTION`
|
|
15
|
+
- `build() -> Flow`
|
|
16
|
+
|
|
17
|
+
The flow-module filename is the durable flow identity used by discovery and runtime state. If you rename the file, you are effectively creating a different flow as far as the system is concerned.
|
|
18
|
+
|
|
19
|
+
## Required contract
|
|
20
|
+
|
|
21
|
+
```python
|
|
22
|
+
from data_engine import Flow
|
|
23
|
+
|
|
24
|
+
DESCRIPTION = "Reads workbook inputs and writes mirrored parquet outputs."
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def build():
|
|
28
|
+
return Flow(group="Claims")
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
When you want a custom display title in the UI, set `label=` on the returned `Flow(...)`. Otherwise the UI derives a readable title from the flow-module filename.
|
|
32
|
+
|
|
33
|
+
`build()` must not accept any parameters.
|
|
34
|
+
|
|
35
|
+
Keep module import-time code side-effect free. The app needs to discover flows safely and repeatedly, so top-level code should not:
|
|
36
|
+
|
|
37
|
+
- run queries
|
|
38
|
+
- write files
|
|
39
|
+
- start background work
|
|
40
|
+
- depend on interactive state
|
|
41
|
+
|
|
42
|
+
Do that work inside steps instead.
|
|
43
|
+
|
|
44
|
+
## Step style
|
|
45
|
+
|
|
46
|
+
Every `step(...)` callable receives one `context` argument:
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
def read_claims(context):
|
|
50
|
+
...
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def clean_claims(context):
|
|
54
|
+
...
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
`map(...)` and `step_each(...)` are the batch-oriented exception. They accept either:
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
def validate_pdf(file_ref):
|
|
61
|
+
...
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def validate_pdf_with_context(context, file_ref):
|
|
65
|
+
...
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
`map(...)` always returns a `Batch`, and `step_each(...)` is the equivalent alias. Both raise immediately when the current batch is empty.
|
|
69
|
+
|
|
70
|
+
Use native libraries directly inside those steps:
|
|
71
|
+
|
|
72
|
+
- Polars for dataframe reads, transforms, and writes
|
|
73
|
+
- DuckDB for SQL and database work
|
|
74
|
+
- `pathlib` and normal Python for filesystem logic
|
|
75
|
+
|
|
76
|
+
That simplicity is the intended authoring experience. Flow modules should feel like normal Python modules with a small orchestration surface, not like a second programming language.
|
|
77
|
+
|
|
78
|
+
## Good patterns
|
|
79
|
+
|
|
80
|
+
- keep import-time code side-effect free
|
|
81
|
+
- keep expensive work inside steps
|
|
82
|
+
- use `save_as=` and `use=` to preserve intermediate objects
|
|
83
|
+
- use `build().preview(use="name")` in notebooks when you want to inspect one saved intermediate object quickly
|
|
84
|
+
- use `collect(...)` when you want a batch of files
|
|
85
|
+
- use `map(...)` or `step_each(...)` when the same callable should run once per batch item
|
|
86
|
+
- use `context.source` for source-relative paths
|
|
87
|
+
- use `context.mirror` for write-ready output paths
|
|
88
|
+
- return the written `Path` from output steps so the UI can enable `Inspect`
|
|
89
|
+
- move shared parsing, SQL, and utility code into `flow_modules/flow_helpers/*.py` and import it from flows with `from flow_helpers.<name> import ...`
|
|
90
|
+
|
|
91
|
+
Also good:
|
|
92
|
+
|
|
93
|
+
- use `context.config.require("name")` for required TOML config
|
|
94
|
+
- use `context.database("analytics/db.duckdb")` for workspace-local database paths
|
|
95
|
+
- record useful UI/runtime details in `context.metadata`
|
|
96
|
+
- keep writer steps narrow and explicit
|
|
97
|
+
- split "build data" and "write data" into separate steps when you want a previewable intermediate
|
|
98
|
+
|
|
99
|
+
Usually worth avoiding:
|
|
100
|
+
|
|
101
|
+
- monolithic steps that read, transform, and write everything at once
|
|
102
|
+
- hand-built relative path logic when `context.source` or `context.mirror` already models it
|
|
103
|
+
- hidden global state in helper modules
|
|
104
|
+
- returning a path that was never actually written
|
|
105
|
+
|
|
106
|
+
## Helper modules
|
|
107
|
+
|
|
108
|
+
Helper modules are regular Python files under `flow_modules/flow_helpers/`. They are mirrored into compiled workspace artifacts and are importable from both notebook-authored and Python-authored flows.
|
|
109
|
+
|
|
110
|
+
Example:
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
# flow_modules/flow_helpers/claims_sql.py
|
|
114
|
+
LATEST_CLAIMS_SQL = "select * from claims where is_latest = true"
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
# flow_modules/claims_report.py
|
|
119
|
+
from flow_helpers.claims_sql import LATEST_CLAIMS_SQL
|
|
120
|
+
from data_engine import Flow
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def build():
|
|
124
|
+
return Flow(group="Claims")
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
Files in `flow_modules/flow_helpers/` are not discovered as runnable flows. They exist only to support authored flow modules.
|
|
128
|
+
|
|
129
|
+
This is the right home for:
|
|
130
|
+
|
|
131
|
+
- shared SQL strings
|
|
132
|
+
- parsing helpers
|
|
133
|
+
- file naming utilities
|
|
134
|
+
- common dataframe transforms
|
|
135
|
+
- shared constants
|
|
136
|
+
|
|
137
|
+
It is not the right home for code that tries to secretly become a flow. If it should run independently and appear in the app, it belongs in its own flow module with its own `build()`.
|
|
138
|
+
|
|
139
|
+
## Example
|
|
140
|
+
|
|
141
|
+
```python
|
|
142
|
+
from data_engine import Flow
|
|
143
|
+
import polars as pl
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def read_claims(file_ref):
|
|
147
|
+
return pl.read_excel(file_ref.path)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def concat_claims(context):
|
|
151
|
+
return pl.concat(context.current, how="vertical_relaxed")
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def keep_completed(context):
|
|
155
|
+
return context.current.filter(pl.col("Step TO") == "COMPLETED")
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def write_target(context):
|
|
159
|
+
output = context.mirror.file("example_completed.parquet")
|
|
160
|
+
context.current.write_parquet(output)
|
|
161
|
+
return output
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def build():
|
|
165
|
+
return (
|
|
166
|
+
Flow(group="Claims")
|
|
167
|
+
.watch(mode="schedule", run_as="batch", interval="15m", source="../../example_data/Input/claims_flat")
|
|
168
|
+
.mirror(root="../../example_data/Output/example_completed")
|
|
169
|
+
.collect([".xlsx"], save_as="claim_files")
|
|
170
|
+
.map(read_claims, use="claim_files", save_as="claim_frames")
|
|
171
|
+
.step(concat_claims, use="claim_frames", save_as="raw_df")
|
|
172
|
+
.step(keep_completed, use="raw_df", save_as="clean_df")
|
|
173
|
+
.step(write_target, use="clean_df")
|
|
174
|
+
)
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
That example shows `map(...)` in context:
|
|
178
|
+
|
|
179
|
+
- `collect(...)` gathers a batch of `FileRef` items
|
|
180
|
+
- `map(...)` reads each file into one dataframe
|
|
181
|
+
- later `step(...)` callables operate on the whole batch result
|
|
182
|
+
|
|
183
|
+
There is no separate config layer that turns one flow module into multiple named flow variants after build time.
|
|
184
|
+
|
|
185
|
+
## Notebook-authored vs Python-authored modules
|
|
186
|
+
|
|
187
|
+
Both notebook and Python flow modules participate in the same discovery model:
|
|
188
|
+
|
|
189
|
+
- they export one `build() -> Flow`
|
|
190
|
+
- they can import helper modules
|
|
191
|
+
- they compile into runtime-ready Python modules
|
|
192
|
+
|
|
193
|
+
Python modules are usually better for:
|
|
194
|
+
|
|
195
|
+
- shared flows
|
|
196
|
+
- helper-heavy logic
|
|
197
|
+
- larger code review surfaces
|
|
198
|
+
|
|
199
|
+
Notebooks are usually better for:
|
|
200
|
+
|
|
201
|
+
- exploratory authoring
|
|
202
|
+
- iterative preview-driven development
|
|
203
|
+
- flows that benefit from inline inspection while being built
|
|
204
|
+
|
|
205
|
+
## A practical authoring checklist
|
|
206
|
+
|
|
207
|
+
Before calling a flow module "done," it is worth checking:
|
|
208
|
+
|
|
209
|
+
- `build()` returns one `Flow`
|
|
210
|
+
- the module imports cleanly with no side effects
|
|
211
|
+
- the step labels are readable in the UI
|
|
212
|
+
- saved object names are meaningful
|
|
213
|
+
- required config is documented or obvious
|
|
214
|
+
- writer steps return actual existing paths when you want inspectability
|
|
215
|
+
- any helper modules sit under `flow_modules/flow_helpers/`
|