py-data-engine 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. data_engine/__init__.py +37 -0
  2. data_engine/application/__init__.py +39 -0
  3. data_engine/application/actions.py +42 -0
  4. data_engine/application/catalog.py +151 -0
  5. data_engine/application/control.py +213 -0
  6. data_engine/application/details.py +73 -0
  7. data_engine/application/runtime.py +449 -0
  8. data_engine/application/workspace.py +62 -0
  9. data_engine/authoring/__init__.py +14 -0
  10. data_engine/authoring/builder.py +31 -0
  11. data_engine/authoring/execution/__init__.py +6 -0
  12. data_engine/authoring/execution/app.py +6 -0
  13. data_engine/authoring/execution/context.py +82 -0
  14. data_engine/authoring/execution/continuous.py +176 -0
  15. data_engine/authoring/execution/grouped.py +106 -0
  16. data_engine/authoring/execution/logging.py +83 -0
  17. data_engine/authoring/execution/polling.py +135 -0
  18. data_engine/authoring/execution/runner.py +210 -0
  19. data_engine/authoring/execution/single.py +171 -0
  20. data_engine/authoring/flow.py +361 -0
  21. data_engine/authoring/helpers.py +160 -0
  22. data_engine/authoring/model.py +59 -0
  23. data_engine/authoring/primitives.py +430 -0
  24. data_engine/authoring/services.py +42 -0
  25. data_engine/devtools/__init__.py +3 -0
  26. data_engine/devtools/project_ast_map.py +503 -0
  27. data_engine/docs/__init__.py +1 -0
  28. data_engine/docs/sphinx_source/_static/custom.css +13 -0
  29. data_engine/docs/sphinx_source/api.rst +42 -0
  30. data_engine/docs/sphinx_source/conf.py +37 -0
  31. data_engine/docs/sphinx_source/guides/app-runtime-and-workspaces.md +397 -0
  32. data_engine/docs/sphinx_source/guides/authoring-flow-modules.md +215 -0
  33. data_engine/docs/sphinx_source/guides/configuring-flows.md +185 -0
  34. data_engine/docs/sphinx_source/guides/core-concepts.md +208 -0
  35. data_engine/docs/sphinx_source/guides/database-methods.md +107 -0
  36. data_engine/docs/sphinx_source/guides/duckdb-helpers.md +462 -0
  37. data_engine/docs/sphinx_source/guides/flow-context.md +538 -0
  38. data_engine/docs/sphinx_source/guides/flow-methods.md +206 -0
  39. data_engine/docs/sphinx_source/guides/getting-started.md +271 -0
  40. data_engine/docs/sphinx_source/guides/project-inventory.md +5683 -0
  41. data_engine/docs/sphinx_source/guides/project-map.md +118 -0
  42. data_engine/docs/sphinx_source/guides/recipes.md +268 -0
  43. data_engine/docs/sphinx_source/index.rst +22 -0
  44. data_engine/domain/__init__.py +92 -0
  45. data_engine/domain/actions.py +69 -0
  46. data_engine/domain/catalog.py +128 -0
  47. data_engine/domain/details.py +214 -0
  48. data_engine/domain/diagnostics.py +56 -0
  49. data_engine/domain/errors.py +104 -0
  50. data_engine/domain/inspection.py +99 -0
  51. data_engine/domain/logs.py +118 -0
  52. data_engine/domain/operations.py +172 -0
  53. data_engine/domain/operator.py +72 -0
  54. data_engine/domain/runs.py +155 -0
  55. data_engine/domain/runtime.py +279 -0
  56. data_engine/domain/source_state.py +17 -0
  57. data_engine/domain/support.py +54 -0
  58. data_engine/domain/time.py +23 -0
  59. data_engine/domain/workspace.py +159 -0
  60. data_engine/flow_modules/__init__.py +1 -0
  61. data_engine/flow_modules/flow_module_compiler.py +179 -0
  62. data_engine/flow_modules/flow_module_loader.py +201 -0
  63. data_engine/helpers/__init__.py +25 -0
  64. data_engine/helpers/duckdb.py +705 -0
  65. data_engine/hosts/__init__.py +1 -0
  66. data_engine/hosts/daemon/__init__.py +23 -0
  67. data_engine/hosts/daemon/app.py +221 -0
  68. data_engine/hosts/daemon/bootstrap.py +69 -0
  69. data_engine/hosts/daemon/client.py +465 -0
  70. data_engine/hosts/daemon/commands.py +64 -0
  71. data_engine/hosts/daemon/composition.py +310 -0
  72. data_engine/hosts/daemon/constants.py +15 -0
  73. data_engine/hosts/daemon/entrypoints.py +97 -0
  74. data_engine/hosts/daemon/lifecycle.py +191 -0
  75. data_engine/hosts/daemon/manager.py +272 -0
  76. data_engine/hosts/daemon/ownership.py +126 -0
  77. data_engine/hosts/daemon/runtime_commands.py +188 -0
  78. data_engine/hosts/daemon/runtime_control.py +31 -0
  79. data_engine/hosts/daemon/server.py +84 -0
  80. data_engine/hosts/daemon/shared_state.py +147 -0
  81. data_engine/hosts/daemon/state_sync.py +101 -0
  82. data_engine/platform/__init__.py +1 -0
  83. data_engine/platform/identity.py +35 -0
  84. data_engine/platform/local_settings.py +146 -0
  85. data_engine/platform/theme.py +259 -0
  86. data_engine/platform/workspace_models.py +190 -0
  87. data_engine/platform/workspace_policy.py +333 -0
  88. data_engine/runtime/__init__.py +1 -0
  89. data_engine/runtime/file_watch.py +185 -0
  90. data_engine/runtime/ledger_models.py +116 -0
  91. data_engine/runtime/runtime_db.py +938 -0
  92. data_engine/runtime/shared_state.py +523 -0
  93. data_engine/services/__init__.py +49 -0
  94. data_engine/services/daemon.py +64 -0
  95. data_engine/services/daemon_state.py +40 -0
  96. data_engine/services/flow_catalog.py +102 -0
  97. data_engine/services/flow_execution.py +48 -0
  98. data_engine/services/ledger.py +85 -0
  99. data_engine/services/logs.py +65 -0
  100. data_engine/services/runtime_binding.py +105 -0
  101. data_engine/services/runtime_execution.py +126 -0
  102. data_engine/services/runtime_history.py +62 -0
  103. data_engine/services/settings.py +58 -0
  104. data_engine/services/shared_state.py +28 -0
  105. data_engine/services/theme.py +59 -0
  106. data_engine/services/workspace_provisioning.py +224 -0
  107. data_engine/services/workspaces.py +74 -0
  108. data_engine/ui/__init__.py +3 -0
  109. data_engine/ui/cli/__init__.py +19 -0
  110. data_engine/ui/cli/app.py +161 -0
  111. data_engine/ui/cli/commands_doctor.py +178 -0
  112. data_engine/ui/cli/commands_run.py +80 -0
  113. data_engine/ui/cli/commands_start.py +100 -0
  114. data_engine/ui/cli/commands_workspace.py +97 -0
  115. data_engine/ui/cli/dependencies.py +44 -0
  116. data_engine/ui/cli/parser.py +56 -0
  117. data_engine/ui/gui/__init__.py +25 -0
  118. data_engine/ui/gui/app.py +116 -0
  119. data_engine/ui/gui/bootstrap.py +487 -0
  120. data_engine/ui/gui/bootstrapper.py +140 -0
  121. data_engine/ui/gui/cache_models.py +23 -0
  122. data_engine/ui/gui/control_support.py +185 -0
  123. data_engine/ui/gui/controllers/__init__.py +6 -0
  124. data_engine/ui/gui/controllers/flows.py +439 -0
  125. data_engine/ui/gui/controllers/runtime.py +245 -0
  126. data_engine/ui/gui/dialogs/__init__.py +12 -0
  127. data_engine/ui/gui/dialogs/messages.py +88 -0
  128. data_engine/ui/gui/dialogs/previews.py +222 -0
  129. data_engine/ui/gui/helpers/__init__.py +62 -0
  130. data_engine/ui/gui/helpers/inspection.py +81 -0
  131. data_engine/ui/gui/helpers/lifecycle.py +112 -0
  132. data_engine/ui/gui/helpers/scroll.py +28 -0
  133. data_engine/ui/gui/helpers/theming.py +87 -0
  134. data_engine/ui/gui/icons/dark_light.svg +12 -0
  135. data_engine/ui/gui/icons/documentation.svg +1 -0
  136. data_engine/ui/gui/icons/failed.svg +3 -0
  137. data_engine/ui/gui/icons/group.svg +4 -0
  138. data_engine/ui/gui/icons/home.svg +2 -0
  139. data_engine/ui/gui/icons/manual.svg +2 -0
  140. data_engine/ui/gui/icons/poll.svg +2 -0
  141. data_engine/ui/gui/icons/schedule.svg +4 -0
  142. data_engine/ui/gui/icons/settings.svg +2 -0
  143. data_engine/ui/gui/icons/started.svg +3 -0
  144. data_engine/ui/gui/icons/success.svg +3 -0
  145. data_engine/ui/gui/icons/view-log.svg +3 -0
  146. data_engine/ui/gui/icons.py +50 -0
  147. data_engine/ui/gui/launcher.py +48 -0
  148. data_engine/ui/gui/presenters/__init__.py +72 -0
  149. data_engine/ui/gui/presenters/docs.py +140 -0
  150. data_engine/ui/gui/presenters/logs.py +58 -0
  151. data_engine/ui/gui/presenters/runtime_projection.py +29 -0
  152. data_engine/ui/gui/presenters/sidebar.py +88 -0
  153. data_engine/ui/gui/presenters/steps.py +148 -0
  154. data_engine/ui/gui/presenters/workspace.py +39 -0
  155. data_engine/ui/gui/presenters/workspace_binding.py +75 -0
  156. data_engine/ui/gui/presenters/workspace_settings.py +182 -0
  157. data_engine/ui/gui/preview_models.py +37 -0
  158. data_engine/ui/gui/render_support.py +241 -0
  159. data_engine/ui/gui/rendering/__init__.py +12 -0
  160. data_engine/ui/gui/rendering/artifacts.py +95 -0
  161. data_engine/ui/gui/rendering/icons.py +50 -0
  162. data_engine/ui/gui/runtime.py +47 -0
  163. data_engine/ui/gui/state_support.py +193 -0
  164. data_engine/ui/gui/support.py +214 -0
  165. data_engine/ui/gui/surface.py +209 -0
  166. data_engine/ui/gui/theme.py +720 -0
  167. data_engine/ui/gui/widgets/__init__.py +34 -0
  168. data_engine/ui/gui/widgets/config.py +41 -0
  169. data_engine/ui/gui/widgets/logs.py +62 -0
  170. data_engine/ui/gui/widgets/panels.py +507 -0
  171. data_engine/ui/gui/widgets/sidebar.py +130 -0
  172. data_engine/ui/gui/widgets/steps.py +84 -0
  173. data_engine/ui/tui/__init__.py +5 -0
  174. data_engine/ui/tui/app.py +222 -0
  175. data_engine/ui/tui/bootstrap.py +475 -0
  176. data_engine/ui/tui/bootstrapper.py +117 -0
  177. data_engine/ui/tui/controllers/__init__.py +6 -0
  178. data_engine/ui/tui/controllers/flows.py +349 -0
  179. data_engine/ui/tui/controllers/runtime.py +167 -0
  180. data_engine/ui/tui/runtime.py +34 -0
  181. data_engine/ui/tui/state_support.py +141 -0
  182. data_engine/ui/tui/support.py +63 -0
  183. data_engine/ui/tui/theme.py +204 -0
  184. data_engine/ui/tui/widgets.py +123 -0
  185. data_engine/views/__init__.py +109 -0
  186. data_engine/views/actions.py +80 -0
  187. data_engine/views/artifacts.py +58 -0
  188. data_engine/views/flow_display.py +69 -0
  189. data_engine/views/logs.py +54 -0
  190. data_engine/views/models.py +96 -0
  191. data_engine/views/presentation.py +133 -0
  192. data_engine/views/runs.py +62 -0
  193. data_engine/views/state.py +39 -0
  194. data_engine/views/status.py +13 -0
  195. data_engine/views/text.py +109 -0
  196. py_data_engine-0.1.0.dist-info/METADATA +330 -0
  197. py_data_engine-0.1.0.dist-info/RECORD +200 -0
  198. py_data_engine-0.1.0.dist-info/WHEEL +5 -0
  199. py_data_engine-0.1.0.dist-info/entry_points.txt +2 -0
  200. py_data_engine-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,523 @@
1
+ """Shared workspace lease and checkpoint helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import asdict
6
+ from datetime import UTC, datetime, timedelta
7
+ import os
8
+ from pathlib import Path
9
+ import shutil
10
+ from uuid import uuid4
11
+ from typing import Any
12
+
13
+ import polars as pl
14
+
15
+ from data_engine.platform.workspace_models import WorkspacePaths
16
+ from data_engine.runtime.ledger_models import (
17
+ PersistedFileState,
18
+ PersistedLogEntry,
19
+ PersistedRun,
20
+ PersistedStepRun,
21
+ )
22
+ from data_engine.runtime.runtime_db import (
23
+ RuntimeLedger,
24
+ parse_utc_text,
25
+ )
26
+
27
+
28
+ _LEASE_METADATA_SCHEMA: dict[str, pl.DataType] = {
29
+ "snapshot_generation_id": pl.String,
30
+ "workspace_id": pl.String,
31
+ "machine_id": pl.String,
32
+ "host_name": pl.String,
33
+ "daemon_id": pl.String,
34
+ "pid": pl.Int64,
35
+ "status": pl.String,
36
+ "last_checkpoint_at_utc": pl.String,
37
+ "started_at_utc": pl.String,
38
+ "app_version": pl.String,
39
+ }
40
+
41
+ _CONTROL_REQUEST_SCHEMA: dict[str, pl.DataType] = {
42
+ "workspace_id": pl.String,
43
+ "requester_machine_id": pl.String,
44
+ "requester_host_name": pl.String,
45
+ "requester_pid": pl.Int64,
46
+ "requester_client_kind": pl.String,
47
+ "requested_at_utc": pl.String,
48
+ }
49
+
50
+ _RUNS_SCHEMA: dict[str, pl.DataType] = {
51
+ "snapshot_generation_id": pl.String,
52
+ "run_id": pl.String,
53
+ "flow_name": pl.String,
54
+ "group_name": pl.String,
55
+ "source_path": pl.String,
56
+ "status": pl.String,
57
+ "started_at_utc": pl.String,
58
+ "finished_at_utc": pl.String,
59
+ "error_text": pl.String,
60
+ }
61
+
62
+ _STEP_RUNS_SCHEMA: dict[str, pl.DataType] = {
63
+ "snapshot_generation_id": pl.String,
64
+ "id": pl.Int64,
65
+ "run_id": pl.String,
66
+ "flow_name": pl.String,
67
+ "step_label": pl.String,
68
+ "status": pl.String,
69
+ "started_at_utc": pl.String,
70
+ "finished_at_utc": pl.String,
71
+ "elapsed_ms": pl.Int64,
72
+ "error_text": pl.String,
73
+ "output_path": pl.String,
74
+ }
75
+
76
+ _LOGS_SCHEMA: dict[str, pl.DataType] = {
77
+ "snapshot_generation_id": pl.String,
78
+ "id": pl.Int64,
79
+ "run_id": pl.String,
80
+ "flow_name": pl.String,
81
+ "step_label": pl.String,
82
+ "level": pl.String,
83
+ "message": pl.String,
84
+ "created_at_utc": pl.String,
85
+ }
86
+
87
+ _FILE_STATE_SCHEMA: dict[str, pl.DataType] = {
88
+ "snapshot_generation_id": pl.String,
89
+ "flow_name": pl.String,
90
+ "source_path": pl.String,
91
+ "mtime_ns": pl.Int64,
92
+ "size_bytes": pl.Int64,
93
+ "last_success_run_id": pl.String,
94
+ "last_success_at_utc": pl.String,
95
+ "last_status": pl.String,
96
+ "last_error_text": pl.String,
97
+ }
98
+
99
+ _PARQUET_READ_RETRIES = 3
100
+
101
+
102
+ def initialize_workspace_state(paths: WorkspacePaths) -> None:
103
+ """Ensure the shared-state folder tree and initial availability marker exist."""
104
+ for directory in (
105
+ paths.workspace_state_dir,
106
+ paths.available_markers_dir,
107
+ paths.leased_markers_dir,
108
+ paths.stale_markers_dir,
109
+ paths.lease_metadata_dir,
110
+ paths.control_requests_dir,
111
+ paths.shared_state_dir / "runs",
112
+ paths.shared_state_dir / "step_runs",
113
+ paths.shared_state_dir / "logs",
114
+ paths.shared_state_dir / "file_state",
115
+ ):
116
+ directory.mkdir(parents=True, exist_ok=True)
117
+ available = paths.available_markers_dir / paths.workspace_id
118
+ leased = paths.leased_markers_dir / paths.workspace_id
119
+ if available.exists() and leased.exists():
120
+ raise RuntimeError(f"Workspace {paths.workspace_id!r} has invalid marker state: both available and leased exist.")
121
+ if not available.exists() and not leased.exists():
122
+ available.mkdir(parents=True, exist_ok=True)
123
+
124
+
125
+ def claim_workspace(paths: WorkspacePaths) -> bool:
126
+ """Try to claim the workspace by renaming available marker to leased."""
127
+ initialize_workspace_state(paths)
128
+ available = paths.available_markers_dir / paths.workspace_id
129
+ leased = paths.leased_markers_dir / paths.workspace_id
130
+ if leased.exists() and not available.exists():
131
+ return False
132
+ if not available.exists():
133
+ available.mkdir(parents=True, exist_ok=True)
134
+ try:
135
+ available.rename(leased)
136
+ except FileNotFoundError:
137
+ return False
138
+ except OSError:
139
+ return False
140
+ return True
141
+
142
+
143
+ def release_workspace(paths: WorkspacePaths) -> None:
144
+ """Return the claimed workspace marker to available state."""
145
+ available = paths.available_markers_dir / paths.workspace_id
146
+ leased = paths.leased_markers_dir / paths.workspace_id
147
+ if leased.exists():
148
+ if available.exists():
149
+ shutil.rmtree(available)
150
+ leased.rename(available)
151
+
152
+
153
+ def lease_is_stale(paths: WorkspacePaths, *, stale_after_seconds: float) -> bool:
154
+ """Return whether the current lease metadata is stale enough for recovery."""
155
+ metadata = read_lease_metadata(paths)
156
+ if metadata is None:
157
+ return True
158
+ parsed = parse_utc_text(str(metadata.get("last_checkpoint_at_utc")))
159
+ if parsed is None:
160
+ return True
161
+ return datetime.now(UTC) - parsed > timedelta(seconds=max(stale_after_seconds, 0.0))
162
+
163
+
164
+ def recover_stale_workspace(
165
+ paths: WorkspacePaths,
166
+ *,
167
+ machine_id: str,
168
+ stale_after_seconds: float,
169
+ reclaim: bool = True,
170
+ ) -> bool:
171
+ """Recover one stale workspace by quarantining the leased marker and optionally reclaiming it."""
172
+ leased = paths.leased_markers_dir / paths.workspace_id
173
+ if not leased.exists():
174
+ return False
175
+ if not lease_is_stale(paths, stale_after_seconds=stale_after_seconds):
176
+ return False
177
+ timestamp = datetime.now(UTC).strftime("%Y%m%dT%H%M%S%fZ")
178
+ stale_bundle = paths.stale_markers_dir / f"{paths.workspace_id}__{timestamp}__{machine_id}"
179
+ stale_bundle.parent.mkdir(parents=True, exist_ok=True)
180
+ try:
181
+ leased.rename(stale_bundle)
182
+ except OSError:
183
+ return False
184
+ if paths.lease_metadata_path.exists():
185
+ (stale_bundle / "metadata").mkdir(parents=True, exist_ok=True)
186
+ try:
187
+ paths.lease_metadata_path.rename(stale_bundle / "metadata" / "lease.parquet")
188
+ except OSError:
189
+ pass
190
+ available = paths.available_markers_dir / paths.workspace_id
191
+ if not available.exists():
192
+ available.mkdir(parents=True, exist_ok=True)
193
+ if not reclaim:
194
+ return True
195
+ return claim_workspace(paths)
196
+
197
+
198
+ def checkpoint_workspace_state(
199
+ paths: WorkspacePaths,
200
+ ledger: RuntimeLedger,
201
+ *,
202
+ workspace_id: str,
203
+ machine_id: str,
204
+ daemon_id: str,
205
+ pid: int,
206
+ status: str,
207
+ started_at_utc: str,
208
+ last_checkpoint_at_utc: str,
209
+ app_version: str | None,
210
+ ) -> None:
211
+ """Write shared workspace snapshots and lease metadata."""
212
+ initialize_workspace_state(paths)
213
+ snapshot_generation_id = uuid4().hex
214
+ _write_runs(paths.shared_runs_path, ledger.list_runs(), snapshot_generation_id=snapshot_generation_id)
215
+ step_runs = tuple(step for run in ledger.list_runs() for step in ledger.list_step_runs(run.run_id))
216
+ _write_step_runs(paths.shared_step_runs_path, step_runs, snapshot_generation_id=snapshot_generation_id)
217
+ _write_logs(paths.shared_logs_path, ledger.list_logs(), snapshot_generation_id=snapshot_generation_id)
218
+ _write_file_states(paths.shared_file_state_path, ledger.list_file_states(), snapshot_generation_id=snapshot_generation_id)
219
+ _write_lease_metadata(
220
+ paths.lease_metadata_path,
221
+ {
222
+ "snapshot_generation_id": snapshot_generation_id,
223
+ "workspace_id": workspace_id,
224
+ "machine_id": machine_id,
225
+ "host_name": machine_id,
226
+ "daemon_id": daemon_id,
227
+ "pid": pid,
228
+ "status": status,
229
+ "last_checkpoint_at_utc": last_checkpoint_at_utc,
230
+ "started_at_utc": started_at_utc,
231
+ "app_version": app_version,
232
+ },
233
+ )
234
+
235
+
236
+ def write_lease_metadata(
237
+ paths: WorkspacePaths,
238
+ *,
239
+ workspace_id: str,
240
+ machine_id: str,
241
+ daemon_id: str,
242
+ pid: int,
243
+ status: str,
244
+ started_at_utc: str,
245
+ last_checkpoint_at_utc: str,
246
+ app_version: str | None,
247
+ ) -> None:
248
+ """Write lease metadata without rewriting the shared runtime snapshot."""
249
+ initialize_workspace_state(paths)
250
+ _write_lease_metadata(
251
+ paths.lease_metadata_path,
252
+ {
253
+ "snapshot_generation_id": uuid4().hex,
254
+ "workspace_id": workspace_id,
255
+ "machine_id": machine_id,
256
+ "host_name": machine_id,
257
+ "daemon_id": daemon_id,
258
+ "pid": pid,
259
+ "status": status,
260
+ "last_checkpoint_at_utc": last_checkpoint_at_utc,
261
+ "started_at_utc": started_at_utc,
262
+ "app_version": app_version,
263
+ },
264
+ )
265
+
266
+
267
+ def hydrate_local_runtime_state(paths: WorkspacePaths, ledger: RuntimeLedger) -> None:
268
+ """Replace local SQLite runtime tables from shared parquet snapshots when present."""
269
+ snapshot = _read_consistent_runtime_snapshot(paths)
270
+ if snapshot is None:
271
+ return
272
+ runs, step_runs, logs, file_states = snapshot
273
+ ledger.replace_runtime_snapshot(runs=runs, step_runs=step_runs, logs=logs, file_states=file_states)
274
+
275
+
276
+ def read_lease_metadata(paths: WorkspacePaths) -> dict[str, Any] | None:
277
+ """Return shared lease metadata for one workspace when present."""
278
+ return _read_single_row_parquet(paths.lease_metadata_path)
279
+
280
+
281
+ def read_control_request(paths: WorkspacePaths) -> dict[str, Any] | None:
282
+ """Return one pending control-request row when present."""
283
+ return _read_single_row_parquet(paths.control_request_path)
284
+
285
+
286
+ def remove_lease_metadata(paths: WorkspacePaths) -> None:
287
+ """Delete the shared lease metadata parquet when present."""
288
+ try:
289
+ paths.lease_metadata_path.unlink()
290
+ except FileNotFoundError:
291
+ pass
292
+
293
+
294
+ def write_control_request(
295
+ paths: WorkspacePaths,
296
+ *,
297
+ workspace_id: str,
298
+ requester_machine_id: str,
299
+ requester_host_name: str,
300
+ requester_pid: int,
301
+ requester_client_kind: str,
302
+ requested_at_utc: str,
303
+ ) -> None:
304
+ """Persist one pending request to transfer workspace control."""
305
+ _atomic_write_parquet(
306
+ paths.control_request_path,
307
+ _frame_with_schema(
308
+ [
309
+ {
310
+ "workspace_id": workspace_id,
311
+ "requester_machine_id": requester_machine_id,
312
+ "requester_host_name": requester_host_name,
313
+ "requester_pid": requester_pid,
314
+ "requester_client_kind": requester_client_kind,
315
+ "requested_at_utc": requested_at_utc,
316
+ }
317
+ ],
318
+ _CONTROL_REQUEST_SCHEMA,
319
+ ),
320
+ )
321
+
322
+
323
+ def remove_control_request(paths: WorkspacePaths) -> None:
324
+ """Delete one pending control-request parquet when present."""
325
+ try:
326
+ paths.control_request_path.unlink()
327
+ except FileNotFoundError:
328
+ pass
329
+
330
+
331
+ def _atomic_write_parquet(path: Path, frame: pl.DataFrame) -> None:
332
+ path.parent.mkdir(parents=True, exist_ok=True)
333
+ tmp_path = path.with_name(f".{path.name}.tmp")
334
+ frame.write_parquet(tmp_path)
335
+ os.replace(tmp_path, path)
336
+
337
+
338
+ def _frame_with_schema(rows: list[dict[str, Any]], schema: dict[str, pl.DataType]) -> pl.DataFrame:
339
+ """Build one parquet-ready frame with stable column dtypes, even when values are all null."""
340
+ if not rows:
341
+ return pl.DataFrame(schema=schema)
342
+ return pl.DataFrame(rows, schema=schema, infer_schema_length=None)
343
+
344
+
345
+ def _write_lease_metadata(path: Path, row: dict[str, Any]) -> None:
346
+ _atomic_write_parquet(path, _frame_with_schema([row], _LEASE_METADATA_SCHEMA))
347
+
348
+
349
+ def _write_runs(path: Path, rows: tuple[PersistedRun, ...], *, snapshot_generation_id: str) -> None:
350
+ if not rows:
351
+ remove_file_if_exists(path)
352
+ return
353
+ _atomic_write_parquet(
354
+ path,
355
+ _frame_with_schema(
356
+ [{"snapshot_generation_id": snapshot_generation_id, **asdict(row)} for row in rows],
357
+ _RUNS_SCHEMA,
358
+ ),
359
+ )
360
+
361
+
362
+ def _write_step_runs(path: Path, rows: tuple[PersistedStepRun, ...], *, snapshot_generation_id: str) -> None:
363
+ if not rows:
364
+ remove_file_if_exists(path)
365
+ return
366
+ _atomic_write_parquet(
367
+ path,
368
+ _frame_with_schema(
369
+ [{"snapshot_generation_id": snapshot_generation_id, **asdict(row)} for row in rows],
370
+ _STEP_RUNS_SCHEMA,
371
+ ),
372
+ )
373
+
374
+
375
+ def _write_logs(path: Path, rows: tuple[PersistedLogEntry, ...], *, snapshot_generation_id: str) -> None:
376
+ if not rows:
377
+ remove_file_if_exists(path)
378
+ return
379
+ _atomic_write_parquet(
380
+ path,
381
+ _frame_with_schema(
382
+ [{"snapshot_generation_id": snapshot_generation_id, **asdict(row)} for row in rows],
383
+ _LOGS_SCHEMA,
384
+ ),
385
+ )
386
+
387
+
388
+ def _write_file_states(path: Path, rows: tuple[PersistedFileState, ...], *, snapshot_generation_id: str) -> None:
389
+ if not rows:
390
+ remove_file_if_exists(path)
391
+ return
392
+ _atomic_write_parquet(
393
+ path,
394
+ _frame_with_schema(
395
+ [{"snapshot_generation_id": snapshot_generation_id, **asdict(row)} for row in rows],
396
+ _FILE_STATE_SCHEMA,
397
+ ),
398
+ )
399
+
400
+
401
+ def remove_file_if_exists(path: Path) -> None:
402
+ """Delete one file when it exists."""
403
+ try:
404
+ path.unlink()
405
+ except FileNotFoundError:
406
+ pass
407
+
408
+
409
+ def _read_runs(path: Path) -> tuple[PersistedRun, ...]:
410
+ if not path.is_file():
411
+ return ()
412
+ frame = _read_parquet_with_retries(path)
413
+ return tuple(PersistedRun(**_drop_snapshot_generation_id(row)) for row in frame.to_dicts())
414
+
415
+
416
+ def _read_step_runs(path: Path) -> tuple[PersistedStepRun, ...]:
417
+ if not path.is_file():
418
+ return ()
419
+ frame = _read_parquet_with_retries(path)
420
+ return tuple(PersistedStepRun(**_drop_snapshot_generation_id(row)) for row in frame.to_dicts())
421
+
422
+
423
+ def _read_logs(path: Path) -> tuple[PersistedLogEntry, ...]:
424
+ if not path.is_file():
425
+ return ()
426
+ frame = _read_parquet_with_retries(path)
427
+ return tuple(PersistedLogEntry(**_drop_snapshot_generation_id(row)) for row in frame.to_dicts())
428
+
429
+
430
+ def _read_file_states(path: Path) -> tuple[PersistedFileState, ...]:
431
+ if not path.is_file():
432
+ return ()
433
+ frame = _read_parquet_with_retries(path)
434
+ return tuple(PersistedFileState(**_drop_snapshot_generation_id(row)) for row in frame.to_dicts())
435
+
436
+
437
+ def _snapshot_generation_id_from_frame(frame: pl.DataFrame) -> str | None:
438
+ if frame.height == 0 or "snapshot_generation_id" not in frame.columns:
439
+ return None
440
+ generation_ids = [value for value in frame.get_column("snapshot_generation_id").drop_nulls().unique().to_list() if isinstance(value, str) and value.strip()]
441
+ if len(generation_ids) != 1:
442
+ return None
443
+ return generation_ids[0]
444
+
445
+
446
+ def _drop_snapshot_generation_id(row: dict[str, Any]) -> dict[str, Any]:
447
+ row = dict(row)
448
+ row.pop("snapshot_generation_id", None)
449
+ return row
450
+
451
+
452
+ def _read_parquet_with_retries(path: Path, *, retries: int = _PARQUET_READ_RETRIES) -> pl.DataFrame:
453
+ last_error: Exception | None = None
454
+ for _ in range(max(retries, 1)):
455
+ if not path.is_file():
456
+ return pl.DataFrame()
457
+ try:
458
+ return pl.read_parquet(path)
459
+ except (FileNotFoundError, OSError, pl.exceptions.PolarsError) as exc:
460
+ last_error = exc
461
+ continue
462
+ if last_error is not None:
463
+ raise last_error
464
+ return pl.DataFrame()
465
+
466
+
467
+ def _read_single_row_parquet(path: Path) -> dict[str, Any] | None:
468
+ frame = _read_parquet_with_retries(path)
469
+ if frame.height == 0:
470
+ return None
471
+ return frame.row(0, named=True)
472
+
473
+
474
+ def _read_consistent_runtime_snapshot(
475
+ paths: WorkspacePaths,
476
+ *,
477
+ retries: int = _PARQUET_READ_RETRIES,
478
+ ) -> tuple[
479
+ tuple[PersistedRun, ...],
480
+ tuple[PersistedStepRun, ...],
481
+ tuple[PersistedLogEntry, ...],
482
+ tuple[PersistedFileState, ...],
483
+ ] | None:
484
+ for _ in range(max(retries, 1)):
485
+ runs_frame = _read_parquet_with_retries(paths.shared_runs_path)
486
+ step_runs_frame = _read_parquet_with_retries(paths.shared_step_runs_path)
487
+ logs_frame = _read_parquet_with_retries(paths.shared_logs_path)
488
+ file_states_frame = _read_parquet_with_retries(paths.shared_file_state_path)
489
+ generations = {
490
+ generation
491
+ for generation in (
492
+ _snapshot_generation_id_from_frame(runs_frame),
493
+ _snapshot_generation_id_from_frame(step_runs_frame),
494
+ _snapshot_generation_id_from_frame(logs_frame),
495
+ _snapshot_generation_id_from_frame(file_states_frame),
496
+ )
497
+ if generation is not None
498
+ }
499
+ if len(generations) <= 1:
500
+ return (
501
+ tuple(PersistedRun(**_drop_snapshot_generation_id(row)) for row in runs_frame.to_dicts()),
502
+ tuple(PersistedStepRun(**_drop_snapshot_generation_id(row)) for row in step_runs_frame.to_dicts()),
503
+ tuple(PersistedLogEntry(**_drop_snapshot_generation_id(row)) for row in logs_frame.to_dicts()),
504
+ tuple(PersistedFileState(**_drop_snapshot_generation_id(row)) for row in file_states_frame.to_dicts()),
505
+ )
506
+ return None
507
+
508
+
509
+ __all__ = [
510
+ "checkpoint_workspace_state",
511
+ "claim_workspace",
512
+ "hydrate_local_runtime_state",
513
+ "initialize_workspace_state",
514
+ "lease_is_stale",
515
+ "read_control_request",
516
+ "read_lease_metadata",
517
+ "recover_stale_workspace",
518
+ "remove_control_request",
519
+ "release_workspace",
520
+ "remove_lease_metadata",
521
+ "write_control_request",
522
+ "write_lease_metadata",
523
+ ]
@@ -0,0 +1,49 @@
1
+ """Injectable application service objects."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from importlib import import_module
6
+
7
+ __all__ = [
8
+ "DaemonService",
9
+ "DaemonStateService",
10
+ "FlowCatalogService",
11
+ "FlowExecutionService",
12
+ "LedgerService",
13
+ "LogService",
14
+ "RuntimeExecutionService",
15
+ "WorkspaceRuntimeBinding",
16
+ "WorkspaceRuntimeBindingService",
17
+ "RuntimeHistoryService",
18
+ "SettingsService",
19
+ "SharedStateService",
20
+ "ThemeService",
21
+ "WorkspaceProvisioningService",
22
+ "WorkspaceService",
23
+ ]
24
+
25
+ _SERVICE_MODULES = {
26
+ "DaemonService": "data_engine.services.daemon",
27
+ "DaemonStateService": "data_engine.services.daemon_state",
28
+ "FlowCatalogService": "data_engine.services.flow_catalog",
29
+ "FlowExecutionService": "data_engine.services.flow_execution",
30
+ "LedgerService": "data_engine.services.ledger",
31
+ "LogService": "data_engine.services.logs",
32
+ "RuntimeExecutionService": "data_engine.services.runtime_execution",
33
+ "WorkspaceRuntimeBinding": "data_engine.services.runtime_binding",
34
+ "WorkspaceRuntimeBindingService": "data_engine.services.runtime_binding",
35
+ "RuntimeHistoryService": "data_engine.services.runtime_history",
36
+ "SettingsService": "data_engine.services.settings",
37
+ "SharedStateService": "data_engine.services.shared_state",
38
+ "ThemeService": "data_engine.services.theme",
39
+ "WorkspaceProvisioningService": "data_engine.services.workspace_provisioning",
40
+ "WorkspaceService": "data_engine.services.workspaces",
41
+ }
42
+
43
+
44
+ def __getattr__(name: str):
45
+ module_name = _SERVICE_MODULES.get(name)
46
+ if module_name is None:
47
+ raise AttributeError(name)
48
+ module = import_module(module_name)
49
+ return getattr(module, name)
@@ -0,0 +1,64 @@
1
+ """Daemon IPC and lifecycle services."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Callable
6
+ from typing import Any
7
+
8
+ from data_engine.domain import DaemonLifecyclePolicy
9
+ from data_engine.hosts.daemon.app import (
10
+ DaemonClientError,
11
+ daemon_request,
12
+ force_shutdown_daemon_process,
13
+ is_daemon_live,
14
+ spawn_daemon_process,
15
+ )
16
+ from data_engine.platform.workspace_models import WorkspacePaths
17
+
18
+
19
+ class DaemonService:
20
+ """Thin injectable wrapper around daemon lifecycle and IPC helpers."""
21
+
22
+ def __init__(
23
+ self,
24
+ *,
25
+ spawn_process_func: Callable[..., object] = spawn_daemon_process,
26
+ request_func: Callable[..., dict[str, Any]] = daemon_request,
27
+ is_live_func: Callable[[WorkspacePaths], bool] = is_daemon_live,
28
+ force_shutdown_func: Callable[..., None] = force_shutdown_daemon_process,
29
+ client_error_type: type[Exception] = DaemonClientError,
30
+ ) -> None:
31
+ self._spawn_process = spawn_process_func
32
+ self._request = request_func
33
+ self._is_live = is_live_func
34
+ self._force_shutdown = force_shutdown_func
35
+ self._client_error_type = client_error_type
36
+
37
+ def spawn(
38
+ self,
39
+ paths: WorkspacePaths,
40
+ *,
41
+ lifecycle_policy: DaemonLifecyclePolicy = DaemonLifecyclePolicy.PERSISTENT,
42
+ ) -> object:
43
+ """Start the local workspace daemon process for the given paths."""
44
+ return self._spawn_process(paths, lifecycle_policy=lifecycle_policy)
45
+
46
+ def request(self, paths: WorkspacePaths, payload: dict[str, Any], *, timeout: float = 0.0) -> dict[str, Any]:
47
+ """Send one request to the local workspace daemon."""
48
+ return self._request(paths, payload, timeout=timeout)
49
+
50
+ def is_live(self, paths: WorkspacePaths) -> bool:
51
+ """Return whether the local workspace daemon is reachable."""
52
+ return self._is_live(paths)
53
+
54
+ def force_shutdown(self, paths: WorkspacePaths, *, timeout: float = 0.5) -> None:
55
+ """Force-stop the local workspace daemon for the given paths."""
56
+ self._force_shutdown(paths, timeout=timeout)
57
+
58
+ @property
59
+ def client_error_type(self) -> type[Exception]:
60
+ """Return the daemon client exception type."""
61
+ return self._client_error_type
62
+
63
+
64
+ __all__ = ["DaemonService"]
@@ -0,0 +1,40 @@
1
+ """Workspace daemon state and control services."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from data_engine.hosts.daemon.manager import WorkspaceDaemonManager, WorkspaceDaemonSnapshot
6
+ from data_engine.hosts.daemon.shared_state import DaemonSharedStateAdapter
7
+ from data_engine.domain import WorkspaceControlState
8
+ from data_engine.platform.workspace_models import WorkspacePaths
9
+
10
+
11
+ class DaemonStateService:
12
+ """Own workspace daemon-manager construction and normalized snapshot access."""
13
+
14
+ def __init__(self, *, shared_state_adapter: DaemonSharedStateAdapter | None = None) -> None:
15
+ self.shared_state_adapter = shared_state_adapter or DaemonSharedStateAdapter()
16
+
17
+ def create_manager(self, paths: WorkspacePaths) -> WorkspaceDaemonManager:
18
+ """Create one daemon-state manager for a workspace."""
19
+ return WorkspaceDaemonManager(paths, shared_state_adapter=self.shared_state_adapter)
20
+
21
+ def sync(self, manager: WorkspaceDaemonManager) -> WorkspaceDaemonSnapshot:
22
+ """Fetch one normalized daemon snapshot."""
23
+ return manager.sync()
24
+
25
+ def control_state(
26
+ self,
27
+ manager: WorkspaceDaemonManager,
28
+ snapshot: WorkspaceDaemonSnapshot,
29
+ *,
30
+ daemon_startup_in_progress: bool = False,
31
+ ) -> WorkspaceControlState:
32
+ """Build structured workspace control state from one daemon snapshot."""
33
+ return manager.control_state(snapshot, daemon_startup_in_progress=daemon_startup_in_progress)
34
+
35
+ def request_control(self, manager: WorkspaceDaemonManager) -> str:
36
+ """Request workspace control through one daemon-state manager."""
37
+ return manager.request_control()
38
+
39
+
40
+ __all__ = ["DaemonStateService"]