py-data-engine 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. data_engine/__init__.py +37 -0
  2. data_engine/application/__init__.py +39 -0
  3. data_engine/application/actions.py +42 -0
  4. data_engine/application/catalog.py +151 -0
  5. data_engine/application/control.py +213 -0
  6. data_engine/application/details.py +73 -0
  7. data_engine/application/runtime.py +449 -0
  8. data_engine/application/workspace.py +62 -0
  9. data_engine/authoring/__init__.py +14 -0
  10. data_engine/authoring/builder.py +31 -0
  11. data_engine/authoring/execution/__init__.py +6 -0
  12. data_engine/authoring/execution/app.py +6 -0
  13. data_engine/authoring/execution/context.py +82 -0
  14. data_engine/authoring/execution/continuous.py +176 -0
  15. data_engine/authoring/execution/grouped.py +106 -0
  16. data_engine/authoring/execution/logging.py +83 -0
  17. data_engine/authoring/execution/polling.py +135 -0
  18. data_engine/authoring/execution/runner.py +210 -0
  19. data_engine/authoring/execution/single.py +171 -0
  20. data_engine/authoring/flow.py +361 -0
  21. data_engine/authoring/helpers.py +160 -0
  22. data_engine/authoring/model.py +59 -0
  23. data_engine/authoring/primitives.py +430 -0
  24. data_engine/authoring/services.py +42 -0
  25. data_engine/devtools/__init__.py +3 -0
  26. data_engine/devtools/project_ast_map.py +503 -0
  27. data_engine/docs/__init__.py +1 -0
  28. data_engine/docs/sphinx_source/_static/custom.css +13 -0
  29. data_engine/docs/sphinx_source/api.rst +42 -0
  30. data_engine/docs/sphinx_source/conf.py +37 -0
  31. data_engine/docs/sphinx_source/guides/app-runtime-and-workspaces.md +397 -0
  32. data_engine/docs/sphinx_source/guides/authoring-flow-modules.md +215 -0
  33. data_engine/docs/sphinx_source/guides/configuring-flows.md +185 -0
  34. data_engine/docs/sphinx_source/guides/core-concepts.md +208 -0
  35. data_engine/docs/sphinx_source/guides/database-methods.md +107 -0
  36. data_engine/docs/sphinx_source/guides/duckdb-helpers.md +462 -0
  37. data_engine/docs/sphinx_source/guides/flow-context.md +538 -0
  38. data_engine/docs/sphinx_source/guides/flow-methods.md +206 -0
  39. data_engine/docs/sphinx_source/guides/getting-started.md +271 -0
  40. data_engine/docs/sphinx_source/guides/project-inventory.md +5683 -0
  41. data_engine/docs/sphinx_source/guides/project-map.md +118 -0
  42. data_engine/docs/sphinx_source/guides/recipes.md +268 -0
  43. data_engine/docs/sphinx_source/index.rst +22 -0
  44. data_engine/domain/__init__.py +92 -0
  45. data_engine/domain/actions.py +69 -0
  46. data_engine/domain/catalog.py +128 -0
  47. data_engine/domain/details.py +214 -0
  48. data_engine/domain/diagnostics.py +56 -0
  49. data_engine/domain/errors.py +104 -0
  50. data_engine/domain/inspection.py +99 -0
  51. data_engine/domain/logs.py +118 -0
  52. data_engine/domain/operations.py +172 -0
  53. data_engine/domain/operator.py +72 -0
  54. data_engine/domain/runs.py +155 -0
  55. data_engine/domain/runtime.py +279 -0
  56. data_engine/domain/source_state.py +17 -0
  57. data_engine/domain/support.py +54 -0
  58. data_engine/domain/time.py +23 -0
  59. data_engine/domain/workspace.py +159 -0
  60. data_engine/flow_modules/__init__.py +1 -0
  61. data_engine/flow_modules/flow_module_compiler.py +179 -0
  62. data_engine/flow_modules/flow_module_loader.py +201 -0
  63. data_engine/helpers/__init__.py +25 -0
  64. data_engine/helpers/duckdb.py +705 -0
  65. data_engine/hosts/__init__.py +1 -0
  66. data_engine/hosts/daemon/__init__.py +23 -0
  67. data_engine/hosts/daemon/app.py +221 -0
  68. data_engine/hosts/daemon/bootstrap.py +69 -0
  69. data_engine/hosts/daemon/client.py +465 -0
  70. data_engine/hosts/daemon/commands.py +64 -0
  71. data_engine/hosts/daemon/composition.py +310 -0
  72. data_engine/hosts/daemon/constants.py +15 -0
  73. data_engine/hosts/daemon/entrypoints.py +97 -0
  74. data_engine/hosts/daemon/lifecycle.py +191 -0
  75. data_engine/hosts/daemon/manager.py +272 -0
  76. data_engine/hosts/daemon/ownership.py +126 -0
  77. data_engine/hosts/daemon/runtime_commands.py +188 -0
  78. data_engine/hosts/daemon/runtime_control.py +31 -0
  79. data_engine/hosts/daemon/server.py +84 -0
  80. data_engine/hosts/daemon/shared_state.py +147 -0
  81. data_engine/hosts/daemon/state_sync.py +101 -0
  82. data_engine/platform/__init__.py +1 -0
  83. data_engine/platform/identity.py +35 -0
  84. data_engine/platform/local_settings.py +146 -0
  85. data_engine/platform/theme.py +259 -0
  86. data_engine/platform/workspace_models.py +190 -0
  87. data_engine/platform/workspace_policy.py +333 -0
  88. data_engine/runtime/__init__.py +1 -0
  89. data_engine/runtime/file_watch.py +185 -0
  90. data_engine/runtime/ledger_models.py +116 -0
  91. data_engine/runtime/runtime_db.py +938 -0
  92. data_engine/runtime/shared_state.py +523 -0
  93. data_engine/services/__init__.py +49 -0
  94. data_engine/services/daemon.py +64 -0
  95. data_engine/services/daemon_state.py +40 -0
  96. data_engine/services/flow_catalog.py +102 -0
  97. data_engine/services/flow_execution.py +48 -0
  98. data_engine/services/ledger.py +85 -0
  99. data_engine/services/logs.py +65 -0
  100. data_engine/services/runtime_binding.py +105 -0
  101. data_engine/services/runtime_execution.py +126 -0
  102. data_engine/services/runtime_history.py +62 -0
  103. data_engine/services/settings.py +58 -0
  104. data_engine/services/shared_state.py +28 -0
  105. data_engine/services/theme.py +59 -0
  106. data_engine/services/workspace_provisioning.py +224 -0
  107. data_engine/services/workspaces.py +74 -0
  108. data_engine/ui/__init__.py +3 -0
  109. data_engine/ui/cli/__init__.py +19 -0
  110. data_engine/ui/cli/app.py +161 -0
  111. data_engine/ui/cli/commands_doctor.py +178 -0
  112. data_engine/ui/cli/commands_run.py +80 -0
  113. data_engine/ui/cli/commands_start.py +100 -0
  114. data_engine/ui/cli/commands_workspace.py +97 -0
  115. data_engine/ui/cli/dependencies.py +44 -0
  116. data_engine/ui/cli/parser.py +56 -0
  117. data_engine/ui/gui/__init__.py +25 -0
  118. data_engine/ui/gui/app.py +116 -0
  119. data_engine/ui/gui/bootstrap.py +487 -0
  120. data_engine/ui/gui/bootstrapper.py +140 -0
  121. data_engine/ui/gui/cache_models.py +23 -0
  122. data_engine/ui/gui/control_support.py +185 -0
  123. data_engine/ui/gui/controllers/__init__.py +6 -0
  124. data_engine/ui/gui/controllers/flows.py +439 -0
  125. data_engine/ui/gui/controllers/runtime.py +245 -0
  126. data_engine/ui/gui/dialogs/__init__.py +12 -0
  127. data_engine/ui/gui/dialogs/messages.py +88 -0
  128. data_engine/ui/gui/dialogs/previews.py +222 -0
  129. data_engine/ui/gui/helpers/__init__.py +62 -0
  130. data_engine/ui/gui/helpers/inspection.py +81 -0
  131. data_engine/ui/gui/helpers/lifecycle.py +112 -0
  132. data_engine/ui/gui/helpers/scroll.py +28 -0
  133. data_engine/ui/gui/helpers/theming.py +87 -0
  134. data_engine/ui/gui/icons/dark_light.svg +12 -0
  135. data_engine/ui/gui/icons/documentation.svg +1 -0
  136. data_engine/ui/gui/icons/failed.svg +3 -0
  137. data_engine/ui/gui/icons/group.svg +4 -0
  138. data_engine/ui/gui/icons/home.svg +2 -0
  139. data_engine/ui/gui/icons/manual.svg +2 -0
  140. data_engine/ui/gui/icons/poll.svg +2 -0
  141. data_engine/ui/gui/icons/schedule.svg +4 -0
  142. data_engine/ui/gui/icons/settings.svg +2 -0
  143. data_engine/ui/gui/icons/started.svg +3 -0
  144. data_engine/ui/gui/icons/success.svg +3 -0
  145. data_engine/ui/gui/icons/view-log.svg +3 -0
  146. data_engine/ui/gui/icons.py +50 -0
  147. data_engine/ui/gui/launcher.py +48 -0
  148. data_engine/ui/gui/presenters/__init__.py +72 -0
  149. data_engine/ui/gui/presenters/docs.py +140 -0
  150. data_engine/ui/gui/presenters/logs.py +58 -0
  151. data_engine/ui/gui/presenters/runtime_projection.py +29 -0
  152. data_engine/ui/gui/presenters/sidebar.py +88 -0
  153. data_engine/ui/gui/presenters/steps.py +148 -0
  154. data_engine/ui/gui/presenters/workspace.py +39 -0
  155. data_engine/ui/gui/presenters/workspace_binding.py +75 -0
  156. data_engine/ui/gui/presenters/workspace_settings.py +182 -0
  157. data_engine/ui/gui/preview_models.py +37 -0
  158. data_engine/ui/gui/render_support.py +241 -0
  159. data_engine/ui/gui/rendering/__init__.py +12 -0
  160. data_engine/ui/gui/rendering/artifacts.py +95 -0
  161. data_engine/ui/gui/rendering/icons.py +50 -0
  162. data_engine/ui/gui/runtime.py +47 -0
  163. data_engine/ui/gui/state_support.py +193 -0
  164. data_engine/ui/gui/support.py +214 -0
  165. data_engine/ui/gui/surface.py +209 -0
  166. data_engine/ui/gui/theme.py +720 -0
  167. data_engine/ui/gui/widgets/__init__.py +34 -0
  168. data_engine/ui/gui/widgets/config.py +41 -0
  169. data_engine/ui/gui/widgets/logs.py +62 -0
  170. data_engine/ui/gui/widgets/panels.py +507 -0
  171. data_engine/ui/gui/widgets/sidebar.py +130 -0
  172. data_engine/ui/gui/widgets/steps.py +84 -0
  173. data_engine/ui/tui/__init__.py +5 -0
  174. data_engine/ui/tui/app.py +222 -0
  175. data_engine/ui/tui/bootstrap.py +475 -0
  176. data_engine/ui/tui/bootstrapper.py +117 -0
  177. data_engine/ui/tui/controllers/__init__.py +6 -0
  178. data_engine/ui/tui/controllers/flows.py +349 -0
  179. data_engine/ui/tui/controllers/runtime.py +167 -0
  180. data_engine/ui/tui/runtime.py +34 -0
  181. data_engine/ui/tui/state_support.py +141 -0
  182. data_engine/ui/tui/support.py +63 -0
  183. data_engine/ui/tui/theme.py +204 -0
  184. data_engine/ui/tui/widgets.py +123 -0
  185. data_engine/views/__init__.py +109 -0
  186. data_engine/views/actions.py +80 -0
  187. data_engine/views/artifacts.py +58 -0
  188. data_engine/views/flow_display.py +69 -0
  189. data_engine/views/logs.py +54 -0
  190. data_engine/views/models.py +96 -0
  191. data_engine/views/presentation.py +133 -0
  192. data_engine/views/runs.py +62 -0
  193. data_engine/views/state.py +39 -0
  194. data_engine/views/status.py +13 -0
  195. data_engine/views/text.py +109 -0
  196. py_data_engine-0.1.0.dist-info/METADATA +330 -0
  197. py_data_engine-0.1.0.dist-info/RECORD +200 -0
  198. py_data_engine-0.1.0.dist-info/WHEEL +5 -0
  199. py_data_engine-0.1.0.dist-info/entry_points.txt +2 -0
  200. py_data_engine-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,333 @@
1
+ """Workspace state, discovery, and runtime-layout policy services."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ import os
7
+ from pathlib import Path
8
+
9
+ from data_engine.platform.identity import (
10
+ APP_ARTIFACTS_DIR_NAME,
11
+ APP_RUNTIME_NAMESPACE,
12
+ RUNTIME_STATE_DIR_NAME,
13
+ WORKSPACE_CACHE_DIR_NAME,
14
+ )
15
+ from data_engine.platform.local_settings import LocalSettingsStore, default_settings_db_path, default_state_root
16
+ from data_engine.platform.workspace_models import (
17
+ APP_ROOT_PATH,
18
+ DATA_ENGINE_APP_ROOT_ENV_VAR,
19
+ DATA_ENGINE_RUNTIME_ROOT_ENV_VAR,
20
+ DATA_ENGINE_WORKSPACE_COLLECTION_ROOT_ENV_VAR,
21
+ DATA_ENGINE_WORKSPACE_ID_ENV_VAR,
22
+ DATA_ENGINE_WORKSPACE_ROOT_ENV_VAR,
23
+ DiscoveredWorkspace,
24
+ WORKSPACE_AVAILABLE_MARKERS_DIR_NAME,
25
+ WORKSPACE_CONFIG_DIR_NAME,
26
+ WORKSPACE_CONTROL_REQUESTS_DIR_NAME,
27
+ WORKSPACE_DATABASES_DIR_NAME,
28
+ WORKSPACE_FLOW_MODULES_DIR_NAME,
29
+ WORKSPACE_LEASED_MARKERS_DIR_NAME,
30
+ WORKSPACE_LEASE_METADATA_DIR_NAME,
31
+ WORKSPACE_SHARED_FILE_STATE_DIR_NAME,
32
+ WORKSPACE_SHARED_LOGS_DIR_NAME,
33
+ WORKSPACE_SHARED_RUNS_DIR_NAME,
34
+ WORKSPACE_SHARED_STATE_DIR_NAME,
35
+ WORKSPACE_SHARED_STEP_RUNS_DIR_NAME,
36
+ WORKSPACE_STALE_MARKERS_DIR_NAME,
37
+ WORKSPACE_STATE_DIR_NAME,
38
+ WorkspacePaths,
39
+ WorkspaceSettings,
40
+ normalized_path_text,
41
+ validate_workspace_id,
42
+ )
43
+
44
+
45
+ class AppStatePolicy:
46
+ """Resolve machine-local app settings and mutable-state roots."""
47
+
48
+ def effective_app_root(self, *, app_root: Path | None = None) -> Path:
49
+ """Resolve the effective project/app root from an explicit argument or env."""
50
+ if app_root is not None:
51
+ return app_root.resolve()
52
+ env_value = os.environ.get(DATA_ENGINE_APP_ROOT_ENV_VAR)
53
+ if env_value and env_value.strip():
54
+ return Path(env_value).expanduser().resolve()
55
+ return APP_ROOT_PATH
56
+
57
+ def settings_path(self, *, app_root: Path | None = None) -> Path:
58
+ """Return the machine-local app settings database path."""
59
+ return default_settings_db_path(app_root=app_root)
60
+
61
+ def load_settings(self, *, app_root: Path | None = None) -> WorkspaceSettings:
62
+ """Load machine-local workspace settings or synthesize defaults."""
63
+ root = self.effective_app_root(app_root=app_root)
64
+ settings_path = self.settings_path(app_root=root)
65
+ state_root = default_state_root(app_root=root)
66
+ store = LocalSettingsStore(settings_path)
67
+ env_collection = os.environ.get(DATA_ENGINE_WORKSPACE_COLLECTION_ROOT_ENV_VAR)
68
+ env_workspace_root = os.environ.get(DATA_ENGINE_WORKSPACE_ROOT_ENV_VAR)
69
+ env_runtime_root = os.environ.get(DATA_ENGINE_RUNTIME_ROOT_ENV_VAR)
70
+ runtime_root = (
71
+ Path(env_runtime_root).expanduser().resolve()
72
+ if env_runtime_root and env_runtime_root.strip()
73
+ else store.runtime_root() or state_root / APP_ARTIFACTS_DIR_NAME
74
+ )
75
+ if env_collection and env_collection.strip():
76
+ collection_root = Path(env_collection).expanduser().resolve()
77
+ default_selected = os.environ.get(DATA_ENGINE_WORKSPACE_ID_ENV_VAR) or store.default_workspace_id()
78
+ return WorkspaceSettings(root, settings_path, state_root, runtime_root, collection_root, default_selected)
79
+ if env_workspace_root and env_workspace_root.strip():
80
+ explicit_root = Path(env_workspace_root).expanduser().resolve()
81
+ collection_root = explicit_root.parent
82
+ default_selected = os.environ.get(DATA_ENGINE_WORKSPACE_ID_ENV_VAR) or explicit_root.name or store.default_workspace_id()
83
+ return WorkspaceSettings(root, settings_path, state_root, runtime_root, collection_root, default_selected)
84
+ return WorkspaceSettings(
85
+ app_root=root,
86
+ settings_path=settings_path,
87
+ state_root=state_root,
88
+ runtime_root=runtime_root,
89
+ workspace_collection_root=store.workspace_collection_root(),
90
+ default_selected=store.default_workspace_id(),
91
+ )
92
+
93
+ def write_settings(self, settings: WorkspaceSettings) -> None:
94
+ """Persist machine-local settings through the SQLite settings store."""
95
+ store = LocalSettingsStore(settings.settings_path)
96
+ store.set_workspace_collection_root(settings.workspace_collection_root)
97
+ store.set_default_workspace_id(settings.default_selected)
98
+ store.set_runtime_root(settings.runtime_root)
99
+
100
+
101
+ class WorkspaceDiscoveryPolicy:
102
+ """Resolve authored workspace discovery and selection."""
103
+
104
+ PLACEHOLDER_WORKSPACE_ROOT_NAME = ".workspace_unconfigured"
105
+ PLACEHOLDER_WORKSPACE_ID = "unconfigured"
106
+
107
+ def __init__(self, *, app_state_policy: AppStatePolicy | None = None) -> None:
108
+ self.app_state_policy = app_state_policy or AppStatePolicy()
109
+
110
+ def _normalize_workspace_id(self, candidate: str, *, fallback: str | None = None) -> str:
111
+ value = str(candidate).strip()
112
+ if value == self.PLACEHOLDER_WORKSPACE_ROOT_NAME:
113
+ value = self.PLACEHOLDER_WORKSPACE_ID
114
+ if not value and fallback is not None:
115
+ value = fallback
116
+ return validate_workspace_id(value)
117
+
118
+ def _placeholder_workspace(self, *, app_root: Path, preferred_id: str | None = None) -> tuple[DiscoveredWorkspace, Path, bool]:
119
+ placeholder_root = app_root / self.PLACEHOLDER_WORKSPACE_ROOT_NAME
120
+ placeholder_id = (
121
+ self._normalize_workspace_id(preferred_id, fallback=self.PLACEHOLDER_WORKSPACE_ID)
122
+ if preferred_id is not None
123
+ else self.PLACEHOLDER_WORKSPACE_ID
124
+ )
125
+ return (
126
+ DiscoveredWorkspace(workspace_id=placeholder_id, workspace_root=placeholder_root),
127
+ placeholder_root.parent,
128
+ False,
129
+ )
130
+
131
+ def discover(
132
+ self,
133
+ *,
134
+ app_root: Path | None = None,
135
+ workspace_collection_root: Path | None = None,
136
+ explicit_workspace_root: Path | None = None,
137
+ ) -> tuple[DiscoveredWorkspace, ...]:
138
+ """Discover valid workspaces beneath the collection root."""
139
+ if explicit_workspace_root is not None:
140
+ root = Path(explicit_workspace_root).expanduser().resolve()
141
+ workspace_id = os.environ.get(DATA_ENGINE_WORKSPACE_ID_ENV_VAR) or root.name or "default"
142
+ return (DiscoveredWorkspace(workspace_id=workspace_id, workspace_root=root),)
143
+
144
+ settings = self.app_state_policy.load_settings(app_root=app_root)
145
+ collection_root = (
146
+ Path(workspace_collection_root).expanduser().resolve()
147
+ if workspace_collection_root is not None
148
+ else settings.workspace_collection_root
149
+ )
150
+ if collection_root is None:
151
+ return ()
152
+ if not collection_root.exists():
153
+ return ()
154
+ if (collection_root / WORKSPACE_FLOW_MODULES_DIR_NAME).is_dir():
155
+ workspace_id = os.environ.get(DATA_ENGINE_WORKSPACE_ID_ENV_VAR) or "default"
156
+ return (DiscoveredWorkspace(workspace_id=workspace_id, workspace_root=collection_root.resolve()),)
157
+ discovered: list[DiscoveredWorkspace] = []
158
+ for candidate in sorted(path for path in collection_root.iterdir() if path.is_dir()):
159
+ if (candidate / WORKSPACE_FLOW_MODULES_DIR_NAME).is_dir():
160
+ discovered.append(DiscoveredWorkspace(workspace_id=candidate.name, workspace_root=candidate.resolve()))
161
+ return tuple(discovered)
162
+
163
+ def select_workspace(
164
+ self,
165
+ *,
166
+ app_root: Path,
167
+ workspace_id: str | None,
168
+ workspace_root: Path | None,
169
+ workspace_collection_root: Path | None,
170
+ data_root: Path | None,
171
+ ) -> tuple[DiscoveredWorkspace, Path, bool]:
172
+ """Select one authored workspace and collection root from the current policy."""
173
+ explicit_root = workspace_root if workspace_root is not None else data_root
174
+ env_workspace_root = os.environ.get(DATA_ENGINE_WORKSPACE_ROOT_ENV_VAR)
175
+ if explicit_root is not None:
176
+ discovered_root = Path(explicit_root).expanduser().resolve()
177
+ fallback_id = self.PLACEHOLDER_WORKSPACE_ID if discovered_root.name == self.PLACEHOLDER_WORKSPACE_ROOT_NAME else (discovered_root.name or "default")
178
+ selected_id = self._normalize_workspace_id(workspace_id) if workspace_id is not None else fallback_id
179
+ collection_root = (
180
+ Path(workspace_collection_root).expanduser().resolve()
181
+ if workspace_collection_root is not None
182
+ else discovered_root.parent
183
+ )
184
+ return DiscoveredWorkspace(workspace_id=selected_id, workspace_root=discovered_root), collection_root, True
185
+ if env_workspace_root and env_workspace_root.strip():
186
+ discovered_root = Path(env_workspace_root).expanduser().resolve()
187
+ env_workspace_id = os.environ.get(DATA_ENGINE_WORKSPACE_ID_ENV_VAR)
188
+ if workspace_id is not None:
189
+ selected_id = self._normalize_workspace_id(workspace_id)
190
+ elif env_workspace_id and env_workspace_id.strip():
191
+ selected_id = self._normalize_workspace_id(env_workspace_id)
192
+ else:
193
+ fallback_id = self.PLACEHOLDER_WORKSPACE_ID if discovered_root.name == self.PLACEHOLDER_WORKSPACE_ROOT_NAME else (discovered_root.name or "default")
194
+ selected_id = fallback_id
195
+ collection_root = (
196
+ Path(workspace_collection_root).expanduser().resolve()
197
+ if workspace_collection_root is not None
198
+ else discovered_root.parent
199
+ )
200
+ return DiscoveredWorkspace(workspace_id=selected_id, workspace_root=discovered_root), collection_root, True
201
+
202
+ settings = self.app_state_policy.load_settings(app_root=app_root)
203
+ collection_root = (
204
+ Path(workspace_collection_root).expanduser().resolve()
205
+ if workspace_collection_root is not None
206
+ else settings.workspace_collection_root
207
+ )
208
+ if collection_root is None:
209
+ return self._placeholder_workspace(
210
+ app_root=app_root,
211
+ preferred_id=workspace_id or settings.default_selected,
212
+ )
213
+ discovered_all = self.discover(app_root=app_root, workspace_collection_root=collection_root)
214
+ if not discovered_all:
215
+ if workspace_id is not None:
216
+ selected_id = self._normalize_workspace_id(workspace_id)
217
+ else:
218
+ env_workspace_id = os.environ.get(DATA_ENGINE_WORKSPACE_ID_ENV_VAR)
219
+ if env_workspace_id and env_workspace_id.strip():
220
+ selected_id = self._normalize_workspace_id(env_workspace_id)
221
+ else:
222
+ selected_id = (
223
+ self._normalize_workspace_id(settings.default_selected, fallback=self.PLACEHOLDER_WORKSPACE_ID)
224
+ if settings.default_selected is not None
225
+ else self.PLACEHOLDER_WORKSPACE_ID
226
+ )
227
+ discovered = DiscoveredWorkspace(workspace_id=selected_id, workspace_root=(collection_root / selected_id).resolve())
228
+ return discovered, collection_root, True
229
+
230
+ if workspace_id is not None:
231
+ requested_id = self._normalize_workspace_id(workspace_id)
232
+ else:
233
+ env_workspace_id = os.environ.get(DATA_ENGINE_WORKSPACE_ID_ENV_VAR)
234
+ requested_id = env_workspace_id if env_workspace_id and env_workspace_id.strip() else settings.default_selected
235
+ if requested_id is None:
236
+ return discovered_all[0], collection_root, True
237
+ requested_id = self._normalize_workspace_id(requested_id, fallback=self.PLACEHOLDER_WORKSPACE_ID)
238
+ by_id = {item.workspace_id: item for item in discovered_all}
239
+ return by_id.get(requested_id, discovered_all[0]), collection_root, True
240
+
241
+
242
+ class RuntimeLayoutPolicy:
243
+ """Resolve local runtime and artifact layout for one workspace."""
244
+
245
+ def __init__(self, *, app_state_policy: AppStatePolicy | None = None, discovery_policy: WorkspaceDiscoveryPolicy | None = None) -> None:
246
+ self.app_state_policy = app_state_policy or AppStatePolicy()
247
+ self.discovery_policy = discovery_policy or WorkspaceDiscoveryPolicy(app_state_policy=self.app_state_policy)
248
+
249
+ def resolve_paths(
250
+ self,
251
+ *,
252
+ workspace_id: str | None = None,
253
+ workspace_root: Path | None = None,
254
+ workspace_collection_root: Path | None = None,
255
+ data_root: Path | None = None,
256
+ app_root: Path | None = None,
257
+ ) -> WorkspacePaths:
258
+ """Resolve authored, shared, and local-artifact paths for one selected workspace."""
259
+ resolved_app_root = self.app_state_policy.effective_app_root(app_root=app_root)
260
+ discovered, collection_root, workspace_configured = self.discovery_policy.select_workspace(
261
+ app_root=resolved_app_root,
262
+ workspace_id=workspace_id,
263
+ workspace_root=workspace_root,
264
+ workspace_collection_root=workspace_collection_root,
265
+ data_root=data_root,
266
+ )
267
+ settings = self.app_state_policy.load_settings(app_root=resolved_app_root)
268
+ workspace_state_dir = discovered.workspace_root / WORKSPACE_STATE_DIR_NAME
269
+ shared_state_dir = workspace_state_dir / WORKSPACE_SHARED_STATE_DIR_NAME
270
+ lease_metadata_dir = workspace_state_dir / WORKSPACE_LEASE_METADATA_DIR_NAME
271
+ control_requests_dir = workspace_state_dir / WORKSPACE_CONTROL_REQUESTS_DIR_NAME
272
+ local_namespace = self.local_workspace_namespace(discovered.workspace_root, discovered.workspace_id)
273
+ artifacts_dir = settings.runtime_root
274
+ workspace_cache_dir = artifacts_dir / WORKSPACE_CACHE_DIR_NAME / local_namespace
275
+ runtime_state_dir = artifacts_dir / RUNTIME_STATE_DIR_NAME / local_namespace
276
+ daemon_endpoint_kind, daemon_endpoint_path = self.daemon_endpoint(
277
+ runtime_state_dir=runtime_state_dir,
278
+ workspace_id=discovered.workspace_id,
279
+ )
280
+ return WorkspacePaths(
281
+ app_root=resolved_app_root,
282
+ workspace_collection_root=collection_root,
283
+ workspace_id=discovered.workspace_id,
284
+ workspace_root=discovered.workspace_root,
285
+ config_dir=discovered.workspace_root / WORKSPACE_CONFIG_DIR_NAME,
286
+ flow_modules_dir=discovered.workspace_root / WORKSPACE_FLOW_MODULES_DIR_NAME,
287
+ databases_dir=discovered.workspace_root / WORKSPACE_DATABASES_DIR_NAME,
288
+ workspace_state_dir=workspace_state_dir,
289
+ available_markers_dir=workspace_state_dir / WORKSPACE_AVAILABLE_MARKERS_DIR_NAME,
290
+ leased_markers_dir=workspace_state_dir / WORKSPACE_LEASED_MARKERS_DIR_NAME,
291
+ stale_markers_dir=workspace_state_dir / WORKSPACE_STALE_MARKERS_DIR_NAME,
292
+ lease_metadata_dir=lease_metadata_dir,
293
+ lease_metadata_path=lease_metadata_dir / f"{discovered.workspace_id}.parquet",
294
+ control_requests_dir=control_requests_dir,
295
+ control_request_path=control_requests_dir / f"{discovered.workspace_id}.parquet",
296
+ shared_state_dir=shared_state_dir,
297
+ shared_runs_path=shared_state_dir / WORKSPACE_SHARED_RUNS_DIR_NAME / f"{discovered.workspace_id}.parquet",
298
+ shared_step_runs_path=shared_state_dir / WORKSPACE_SHARED_STEP_RUNS_DIR_NAME / f"{discovered.workspace_id}.parquet",
299
+ shared_logs_path=shared_state_dir / WORKSPACE_SHARED_LOGS_DIR_NAME / f"{discovered.workspace_id}.parquet",
300
+ shared_file_state_path=shared_state_dir / WORKSPACE_SHARED_FILE_STATE_DIR_NAME / f"{discovered.workspace_id}.parquet",
301
+ artifacts_dir=artifacts_dir,
302
+ workspace_cache_dir=workspace_cache_dir,
303
+ compiled_flow_modules_dir=workspace_cache_dir / "compiled_flow_modules",
304
+ runtime_state_dir=runtime_state_dir,
305
+ runtime_db_path=runtime_state_dir / "runtime_ledger.sqlite",
306
+ daemon_log_path=runtime_state_dir / "daemon.log",
307
+ documentation_dir=artifacts_dir / "documentation",
308
+ daemon_endpoint_kind=daemon_endpoint_kind,
309
+ daemon_endpoint_path=daemon_endpoint_path,
310
+ sphinx_source_dir=resolved_app_root / "src" / "data_engine" / "docs" / "sphinx_source",
311
+ workspace_configured=workspace_configured,
312
+ )
313
+
314
+ @staticmethod
315
+ def local_workspace_namespace(workspace_root: Path | str, workspace_id: str) -> str:
316
+ """Return the machine-local namespace for one workspace root."""
317
+ return f"{validate_workspace_id(workspace_id)}_{hashlib.sha1(normalized_path_text(Path(workspace_root).expanduser().resolve()).encode('utf-8')).hexdigest()[:12]}"
318
+
319
+ @staticmethod
320
+ def daemon_endpoint(*, runtime_state_dir: Path, workspace_id: str) -> tuple[str, str]:
321
+ """Return the cross-platform local IPC endpoint."""
322
+ workspace_id = validate_workspace_id(workspace_id)
323
+ digest = hashlib.sha1(normalized_path_text(runtime_state_dir).encode("utf-8")).hexdigest()[:12]
324
+ if os.name == "nt":
325
+ return "pipe", rf"\\.\pipe\{APP_RUNTIME_NAMESPACE}_{workspace_id}_{digest}"
326
+ return "unix", normalized_path_text(Path("/tmp") / f"{APP_RUNTIME_NAMESPACE}_{workspace_id}_{digest}.sock")
327
+
328
+
329
+ __all__ = [
330
+ "AppStatePolicy",
331
+ "RuntimeLayoutPolicy",
332
+ "WorkspaceDiscoveryPolicy",
333
+ ]
@@ -0,0 +1 @@
1
+ """Runtime execution, file watching, persistence, and shared state."""
@@ -0,0 +1,185 @@
1
+ """Filesystem discovery and polling services used by the flow runtime."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import Iterable, Protocol, runtime_checkable
7
+
8
+ from data_engine.authoring.model import FlowValidationError
9
+ from data_engine.platform.workspace_models import normalized_path_text
10
+
11
+
12
+ def _normalize_path(value: Path) -> Path:
13
+ """Normalize a path for stable comparisons across filesystems."""
14
+ return Path(normalized_path_text(value))
15
+
16
+
17
+ def _normalized_name(value: str) -> str:
18
+ """Normalize a filename for case-insensitive temporary-file checks."""
19
+ return normalized_path_text(value).casefold()
20
+
21
+
22
+ def _queue_key(path: Path) -> str:
23
+ """Return a stable sort key for a filesystem path."""
24
+ return _normalize_path(path).as_posix().casefold()
25
+
26
+
27
+ def _normalize_extensions(extensions: tuple[str, ...] | None) -> tuple[str, ...] | None:
28
+ """Normalize extension filters to lowercase dotted forms."""
29
+ if extensions is None:
30
+ return None
31
+ normalized: list[str] = []
32
+ for ext in extensions:
33
+ value = str(ext).strip().lower()
34
+ if not value:
35
+ raise FlowValidationError("Empty extension is not allowed.")
36
+ if not value.startswith("."):
37
+ value = f".{value}"
38
+ normalized.append(value)
39
+ return tuple(normalized)
40
+
41
+
42
+ def is_temporary_file_path(path: Path) -> bool:
43
+ """Return whether a path looks like a transient temp or download file."""
44
+ name = _normalized_name(path.name)
45
+ if name.startswith(".~lock.") and name.endswith("#"):
46
+ return True
47
+ if name.startswith(".") or name.startswith("~$") or name.startswith("._"):
48
+ return True
49
+ if name.endswith("~"):
50
+ return True
51
+ if any(name.endswith(suffix) for suffix in (".tmp", ".temp", ".part", ".partial", ".crdownload", ".download", ".swp")):
52
+ return True
53
+ return False
54
+
55
+
56
+ def iter_candidate_paths(
57
+ input_root: Path,
58
+ *,
59
+ extensions: tuple[str, ...] | None = None,
60
+ recursive: bool = True,
61
+ allow_missing: bool = False,
62
+ ) -> Iterable[Path]:
63
+ """Yield candidate files from one root using optional extension filters."""
64
+ if not input_root.exists():
65
+ if allow_missing:
66
+ return
67
+ raise FlowValidationError(f"Input root not found: {input_root}")
68
+
69
+ normalized_extensions = _normalize_extensions(extensions)
70
+ if input_root.is_file():
71
+ candidates: Iterable[Path] = (input_root,)
72
+ else:
73
+ globber = input_root.rglob("*") if recursive else input_root.glob("*")
74
+ candidates = sorted(globber, key=_queue_key)
75
+
76
+ for path in candidates:
77
+ if not path.is_file():
78
+ continue
79
+ if normalized_extensions is not None and path.suffix.lower() not in normalized_extensions:
80
+ continue
81
+ if is_temporary_file_path(path):
82
+ continue
83
+ yield path
84
+
85
+
86
+ @runtime_checkable
87
+ class IFileWatcher(Protocol):
88
+ """Common interface for runtime file watchers."""
89
+
90
+ def start(self) -> None:
91
+ """Begin watching for filesystem changes."""
92
+
93
+ def stop(self) -> None:
94
+ """Stop watching for filesystem changes."""
95
+
96
+ def drain_events(self) -> list[Path]:
97
+ """Return any queued filesystem events observed since the last drain."""
98
+
99
+
100
+ class PollingWatcher:
101
+ """Filesystem polling watcher that reacts to changes observed after startup."""
102
+
103
+ def __init__(
104
+ self,
105
+ input_root: Path,
106
+ *,
107
+ recursive: bool = True,
108
+ extensions: tuple[str, ...] | None = None,
109
+ settle: int = 1,
110
+ ) -> None:
111
+ """Initialize a polling watcher for one file or directory root."""
112
+ if settle < 0:
113
+ raise FlowValidationError("settle must be zero or greater.")
114
+ self.input_root = input_root
115
+ self.recursive = recursive
116
+ self.extensions = _normalize_extensions(extensions)
117
+ self.settle = settle
118
+ self._seen: dict[Path, tuple[int, int, int]] = {}
119
+ self._stable_counts: dict[Path, int] = {}
120
+ self._emitted: dict[Path, tuple[int, int, int]] = {}
121
+ self._running = False
122
+
123
+ def start(self) -> None:
124
+ """Capture an initial filesystem snapshot and begin watching."""
125
+ self._seen = self._snapshot()
126
+ self._stable_counts = {path: 0 for path in self._seen}
127
+ self._emitted = dict(self._seen)
128
+ self._running = True
129
+
130
+ def stop(self) -> None:
131
+ """Stop watching for new filesystem events."""
132
+ self._running = False
133
+
134
+ def drain_events(self) -> list[Path]:
135
+ """Return newly stable files observed since the last poll."""
136
+ if not self._running:
137
+ return []
138
+
139
+ current = self._snapshot()
140
+ events: list[Path] = []
141
+ stable_counts: dict[Path, int] = {}
142
+
143
+ for path, signature in current.items():
144
+ prior_signature = self._seen.get(path)
145
+ if prior_signature == signature:
146
+ stable_counts[path] = self._stable_counts.get(path, 0) + 1
147
+ else:
148
+ stable_counts[path] = 0
149
+
150
+ if self._emitted.get(path) == signature:
151
+ continue
152
+ if stable_counts[path] < self.settle:
153
+ continue
154
+ events.append(path)
155
+ self._emitted[path] = signature
156
+
157
+ self._stable_counts = stable_counts
158
+ self._seen = current
159
+ return events
160
+
161
+ def _snapshot(self) -> dict[Path, tuple[int, int, int]]:
162
+ """Capture the current file signatures for all candidate paths."""
163
+ result: dict[Path, tuple[int, int, int]] = {}
164
+ for path in iter_candidate_paths(self.input_root, extensions=self.extensions, recursive=self.recursive, allow_missing=True):
165
+ try:
166
+ stat = path.stat()
167
+ result[path] = (
168
+ stat.st_mtime_ns,
169
+ stat.st_size,
170
+ getattr(stat, "st_ctime_ns", 0),
171
+ )
172
+ except FileNotFoundError:
173
+ continue
174
+
175
+ self._prune_removed_paths(result)
176
+ return result
177
+
178
+ def _prune_removed_paths(self, current: dict[Path, tuple[int, int, int]]) -> None:
179
+ """Drop removed paths from watcher state maps."""
180
+ current_paths = set(current)
181
+ self._stable_counts = {path: count for path, count in self._stable_counts.items() if path in current_paths}
182
+ self._emitted = {path: sig for path, sig in self._emitted.items() if path in current_paths}
183
+
184
+
185
+ __all__ = ["IFileWatcher", "PollingWatcher", "iter_candidate_paths", "is_temporary_file_path"]
@@ -0,0 +1,116 @@
1
+ """Persisted runtime ledger record models."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+
7
+ from data_engine.domain.time import parse_utc_text
8
+
9
+
10
+ def elapsed_seconds(started_at_utc: str, finished_at_utc: str | None) -> float | None:
11
+ """Return elapsed seconds derived from persisted UTC text timestamps."""
12
+ started = parse_utc_text(started_at_utc)
13
+ finished = parse_utc_text(finished_at_utc)
14
+ if started is None or finished is None:
15
+ return None
16
+ return max((finished - started).total_seconds(), 0.0)
17
+
18
+
19
+ @dataclass(frozen=True)
20
+ class PersistedRun:
21
+ """One persisted runtime run summary."""
22
+
23
+ run_id: str
24
+ flow_name: str
25
+ group_name: str
26
+ source_path: str | None
27
+ status: str
28
+ started_at_utc: str
29
+ finished_at_utc: str | None
30
+ error_text: str | None
31
+
32
+ @property
33
+ def elapsed_seconds(self) -> float | None:
34
+ return elapsed_seconds(self.started_at_utc, self.finished_at_utc)
35
+
36
+
37
+ @dataclass(frozen=True)
38
+ class PersistedStepRun:
39
+ """One persisted runtime step execution."""
40
+
41
+ id: int
42
+ run_id: str
43
+ flow_name: str
44
+ step_label: str
45
+ status: str
46
+ started_at_utc: str
47
+ finished_at_utc: str | None
48
+ elapsed_ms: int | None
49
+ error_text: str | None
50
+ output_path: str | None
51
+
52
+
53
+ @dataclass(frozen=True)
54
+ class PersistedLogEntry:
55
+ """One persisted runtime log line."""
56
+
57
+ id: int
58
+ run_id: str | None
59
+ flow_name: str | None
60
+ step_label: str | None
61
+ level: str
62
+ message: str
63
+ created_at_utc: str
64
+
65
+
66
+ @dataclass(frozen=True)
67
+ class PersistedFileState:
68
+ """One persisted current file freshness row."""
69
+
70
+ flow_name: str
71
+ source_path: str
72
+ mtime_ns: int
73
+ size_bytes: int
74
+ last_success_run_id: str | None
75
+ last_success_at_utc: str | None
76
+ last_status: str
77
+ last_error_text: str | None
78
+
79
+
80
+ @dataclass(frozen=True)
81
+ class PersistedDaemonState:
82
+ """One persisted daemon metadata row."""
83
+
84
+ workspace_id: str
85
+ pid: int
86
+ endpoint_kind: str
87
+ endpoint_path: str
88
+ started_at_utc: str
89
+ last_checkpoint_at_utc: str
90
+ status: str
91
+ app_root: str
92
+ workspace_root: str
93
+ version_text: str | None
94
+
95
+
96
+ @dataclass(frozen=True)
97
+ class PersistedClientSession:
98
+ """One persisted local client session row."""
99
+
100
+ client_id: str
101
+ workspace_id: str
102
+ client_kind: str
103
+ pid: int
104
+ started_at_utc: str
105
+ updated_at_utc: str
106
+
107
+
108
+ __all__ = [
109
+ "PersistedClientSession",
110
+ "PersistedDaemonState",
111
+ "PersistedFileState",
112
+ "PersistedLogEntry",
113
+ "PersistedRun",
114
+ "PersistedStepRun",
115
+ "elapsed_seconds",
116
+ ]