py-data-engine 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. data_engine/__init__.py +37 -0
  2. data_engine/application/__init__.py +39 -0
  3. data_engine/application/actions.py +42 -0
  4. data_engine/application/catalog.py +151 -0
  5. data_engine/application/control.py +213 -0
  6. data_engine/application/details.py +73 -0
  7. data_engine/application/runtime.py +449 -0
  8. data_engine/application/workspace.py +62 -0
  9. data_engine/authoring/__init__.py +14 -0
  10. data_engine/authoring/builder.py +31 -0
  11. data_engine/authoring/execution/__init__.py +6 -0
  12. data_engine/authoring/execution/app.py +6 -0
  13. data_engine/authoring/execution/context.py +82 -0
  14. data_engine/authoring/execution/continuous.py +176 -0
  15. data_engine/authoring/execution/grouped.py +106 -0
  16. data_engine/authoring/execution/logging.py +83 -0
  17. data_engine/authoring/execution/polling.py +135 -0
  18. data_engine/authoring/execution/runner.py +210 -0
  19. data_engine/authoring/execution/single.py +171 -0
  20. data_engine/authoring/flow.py +361 -0
  21. data_engine/authoring/helpers.py +160 -0
  22. data_engine/authoring/model.py +59 -0
  23. data_engine/authoring/primitives.py +430 -0
  24. data_engine/authoring/services.py +42 -0
  25. data_engine/devtools/__init__.py +3 -0
  26. data_engine/devtools/project_ast_map.py +503 -0
  27. data_engine/docs/__init__.py +1 -0
  28. data_engine/docs/sphinx_source/_static/custom.css +13 -0
  29. data_engine/docs/sphinx_source/api.rst +42 -0
  30. data_engine/docs/sphinx_source/conf.py +37 -0
  31. data_engine/docs/sphinx_source/guides/app-runtime-and-workspaces.md +397 -0
  32. data_engine/docs/sphinx_source/guides/authoring-flow-modules.md +215 -0
  33. data_engine/docs/sphinx_source/guides/configuring-flows.md +185 -0
  34. data_engine/docs/sphinx_source/guides/core-concepts.md +208 -0
  35. data_engine/docs/sphinx_source/guides/database-methods.md +107 -0
  36. data_engine/docs/sphinx_source/guides/duckdb-helpers.md +462 -0
  37. data_engine/docs/sphinx_source/guides/flow-context.md +538 -0
  38. data_engine/docs/sphinx_source/guides/flow-methods.md +206 -0
  39. data_engine/docs/sphinx_source/guides/getting-started.md +271 -0
  40. data_engine/docs/sphinx_source/guides/project-inventory.md +5683 -0
  41. data_engine/docs/sphinx_source/guides/project-map.md +118 -0
  42. data_engine/docs/sphinx_source/guides/recipes.md +268 -0
  43. data_engine/docs/sphinx_source/index.rst +22 -0
  44. data_engine/domain/__init__.py +92 -0
  45. data_engine/domain/actions.py +69 -0
  46. data_engine/domain/catalog.py +128 -0
  47. data_engine/domain/details.py +214 -0
  48. data_engine/domain/diagnostics.py +56 -0
  49. data_engine/domain/errors.py +104 -0
  50. data_engine/domain/inspection.py +99 -0
  51. data_engine/domain/logs.py +118 -0
  52. data_engine/domain/operations.py +172 -0
  53. data_engine/domain/operator.py +72 -0
  54. data_engine/domain/runs.py +155 -0
  55. data_engine/domain/runtime.py +279 -0
  56. data_engine/domain/source_state.py +17 -0
  57. data_engine/domain/support.py +54 -0
  58. data_engine/domain/time.py +23 -0
  59. data_engine/domain/workspace.py +159 -0
  60. data_engine/flow_modules/__init__.py +1 -0
  61. data_engine/flow_modules/flow_module_compiler.py +179 -0
  62. data_engine/flow_modules/flow_module_loader.py +201 -0
  63. data_engine/helpers/__init__.py +25 -0
  64. data_engine/helpers/duckdb.py +705 -0
  65. data_engine/hosts/__init__.py +1 -0
  66. data_engine/hosts/daemon/__init__.py +23 -0
  67. data_engine/hosts/daemon/app.py +221 -0
  68. data_engine/hosts/daemon/bootstrap.py +69 -0
  69. data_engine/hosts/daemon/client.py +465 -0
  70. data_engine/hosts/daemon/commands.py +64 -0
  71. data_engine/hosts/daemon/composition.py +310 -0
  72. data_engine/hosts/daemon/constants.py +15 -0
  73. data_engine/hosts/daemon/entrypoints.py +97 -0
  74. data_engine/hosts/daemon/lifecycle.py +191 -0
  75. data_engine/hosts/daemon/manager.py +272 -0
  76. data_engine/hosts/daemon/ownership.py +126 -0
  77. data_engine/hosts/daemon/runtime_commands.py +188 -0
  78. data_engine/hosts/daemon/runtime_control.py +31 -0
  79. data_engine/hosts/daemon/server.py +84 -0
  80. data_engine/hosts/daemon/shared_state.py +147 -0
  81. data_engine/hosts/daemon/state_sync.py +101 -0
  82. data_engine/platform/__init__.py +1 -0
  83. data_engine/platform/identity.py +35 -0
  84. data_engine/platform/local_settings.py +146 -0
  85. data_engine/platform/theme.py +259 -0
  86. data_engine/platform/workspace_models.py +190 -0
  87. data_engine/platform/workspace_policy.py +333 -0
  88. data_engine/runtime/__init__.py +1 -0
  89. data_engine/runtime/file_watch.py +185 -0
  90. data_engine/runtime/ledger_models.py +116 -0
  91. data_engine/runtime/runtime_db.py +938 -0
  92. data_engine/runtime/shared_state.py +523 -0
  93. data_engine/services/__init__.py +49 -0
  94. data_engine/services/daemon.py +64 -0
  95. data_engine/services/daemon_state.py +40 -0
  96. data_engine/services/flow_catalog.py +102 -0
  97. data_engine/services/flow_execution.py +48 -0
  98. data_engine/services/ledger.py +85 -0
  99. data_engine/services/logs.py +65 -0
  100. data_engine/services/runtime_binding.py +105 -0
  101. data_engine/services/runtime_execution.py +126 -0
  102. data_engine/services/runtime_history.py +62 -0
  103. data_engine/services/settings.py +58 -0
  104. data_engine/services/shared_state.py +28 -0
  105. data_engine/services/theme.py +59 -0
  106. data_engine/services/workspace_provisioning.py +224 -0
  107. data_engine/services/workspaces.py +74 -0
  108. data_engine/ui/__init__.py +3 -0
  109. data_engine/ui/cli/__init__.py +19 -0
  110. data_engine/ui/cli/app.py +161 -0
  111. data_engine/ui/cli/commands_doctor.py +178 -0
  112. data_engine/ui/cli/commands_run.py +80 -0
  113. data_engine/ui/cli/commands_start.py +100 -0
  114. data_engine/ui/cli/commands_workspace.py +97 -0
  115. data_engine/ui/cli/dependencies.py +44 -0
  116. data_engine/ui/cli/parser.py +56 -0
  117. data_engine/ui/gui/__init__.py +25 -0
  118. data_engine/ui/gui/app.py +116 -0
  119. data_engine/ui/gui/bootstrap.py +487 -0
  120. data_engine/ui/gui/bootstrapper.py +140 -0
  121. data_engine/ui/gui/cache_models.py +23 -0
  122. data_engine/ui/gui/control_support.py +185 -0
  123. data_engine/ui/gui/controllers/__init__.py +6 -0
  124. data_engine/ui/gui/controllers/flows.py +439 -0
  125. data_engine/ui/gui/controllers/runtime.py +245 -0
  126. data_engine/ui/gui/dialogs/__init__.py +12 -0
  127. data_engine/ui/gui/dialogs/messages.py +88 -0
  128. data_engine/ui/gui/dialogs/previews.py +222 -0
  129. data_engine/ui/gui/helpers/__init__.py +62 -0
  130. data_engine/ui/gui/helpers/inspection.py +81 -0
  131. data_engine/ui/gui/helpers/lifecycle.py +112 -0
  132. data_engine/ui/gui/helpers/scroll.py +28 -0
  133. data_engine/ui/gui/helpers/theming.py +87 -0
  134. data_engine/ui/gui/icons/dark_light.svg +12 -0
  135. data_engine/ui/gui/icons/documentation.svg +1 -0
  136. data_engine/ui/gui/icons/failed.svg +3 -0
  137. data_engine/ui/gui/icons/group.svg +4 -0
  138. data_engine/ui/gui/icons/home.svg +2 -0
  139. data_engine/ui/gui/icons/manual.svg +2 -0
  140. data_engine/ui/gui/icons/poll.svg +2 -0
  141. data_engine/ui/gui/icons/schedule.svg +4 -0
  142. data_engine/ui/gui/icons/settings.svg +2 -0
  143. data_engine/ui/gui/icons/started.svg +3 -0
  144. data_engine/ui/gui/icons/success.svg +3 -0
  145. data_engine/ui/gui/icons/view-log.svg +3 -0
  146. data_engine/ui/gui/icons.py +50 -0
  147. data_engine/ui/gui/launcher.py +48 -0
  148. data_engine/ui/gui/presenters/__init__.py +72 -0
  149. data_engine/ui/gui/presenters/docs.py +140 -0
  150. data_engine/ui/gui/presenters/logs.py +58 -0
  151. data_engine/ui/gui/presenters/runtime_projection.py +29 -0
  152. data_engine/ui/gui/presenters/sidebar.py +88 -0
  153. data_engine/ui/gui/presenters/steps.py +148 -0
  154. data_engine/ui/gui/presenters/workspace.py +39 -0
  155. data_engine/ui/gui/presenters/workspace_binding.py +75 -0
  156. data_engine/ui/gui/presenters/workspace_settings.py +182 -0
  157. data_engine/ui/gui/preview_models.py +37 -0
  158. data_engine/ui/gui/render_support.py +241 -0
  159. data_engine/ui/gui/rendering/__init__.py +12 -0
  160. data_engine/ui/gui/rendering/artifacts.py +95 -0
  161. data_engine/ui/gui/rendering/icons.py +50 -0
  162. data_engine/ui/gui/runtime.py +47 -0
  163. data_engine/ui/gui/state_support.py +193 -0
  164. data_engine/ui/gui/support.py +214 -0
  165. data_engine/ui/gui/surface.py +209 -0
  166. data_engine/ui/gui/theme.py +720 -0
  167. data_engine/ui/gui/widgets/__init__.py +34 -0
  168. data_engine/ui/gui/widgets/config.py +41 -0
  169. data_engine/ui/gui/widgets/logs.py +62 -0
  170. data_engine/ui/gui/widgets/panels.py +507 -0
  171. data_engine/ui/gui/widgets/sidebar.py +130 -0
  172. data_engine/ui/gui/widgets/steps.py +84 -0
  173. data_engine/ui/tui/__init__.py +5 -0
  174. data_engine/ui/tui/app.py +222 -0
  175. data_engine/ui/tui/bootstrap.py +475 -0
  176. data_engine/ui/tui/bootstrapper.py +117 -0
  177. data_engine/ui/tui/controllers/__init__.py +6 -0
  178. data_engine/ui/tui/controllers/flows.py +349 -0
  179. data_engine/ui/tui/controllers/runtime.py +167 -0
  180. data_engine/ui/tui/runtime.py +34 -0
  181. data_engine/ui/tui/state_support.py +141 -0
  182. data_engine/ui/tui/support.py +63 -0
  183. data_engine/ui/tui/theme.py +204 -0
  184. data_engine/ui/tui/widgets.py +123 -0
  185. data_engine/views/__init__.py +109 -0
  186. data_engine/views/actions.py +80 -0
  187. data_engine/views/artifacts.py +58 -0
  188. data_engine/views/flow_display.py +69 -0
  189. data_engine/views/logs.py +54 -0
  190. data_engine/views/models.py +96 -0
  191. data_engine/views/presentation.py +133 -0
  192. data_engine/views/runs.py +62 -0
  193. data_engine/views/state.py +39 -0
  194. data_engine/views/status.py +13 -0
  195. data_engine/views/text.py +109 -0
  196. py_data_engine-0.1.0.dist-info/METADATA +330 -0
  197. py_data_engine-0.1.0.dist-info/RECORD +200 -0
  198. py_data_engine-0.1.0.dist-info/WHEEL +5 -0
  199. py_data_engine-0.1.0.dist-info/entry_points.txt +2 -0
  200. py_data_engine-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,938 @@
1
+ """SQLite-backed runtime ledger for flow lifecycle, staleness, and log history."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from datetime import UTC, datetime, timedelta
6
+ import os
7
+ from pathlib import Path
8
+ import sqlite3
9
+ import threading
10
+
11
+ from data_engine.domain.source_state import SourceSignature
12
+ from data_engine.domain.time import parse_utc_text, utcnow_text
13
+ from data_engine.platform.workspace_models import DATA_ENGINE_RUNTIME_DB_PATH_ENV_VAR, normalized_path_text
14
+ from data_engine.platform.workspace_policy import RuntimeLayoutPolicy
15
+ from data_engine.runtime.ledger_models import (
16
+ PersistedClientSession,
17
+ PersistedDaemonState,
18
+ PersistedFileState,
19
+ PersistedLogEntry,
20
+ PersistedRun,
21
+ PersistedStepRun,
22
+ elapsed_seconds,
23
+ )
24
+
25
+
26
+ class RuntimeLedger:
27
+ """Own the SQLite runtime ledger and expose narrow read/write helpers."""
28
+
29
+ HISTORY_RETENTION_DAYS = 30
30
+
31
+ def __init__(self, db_path: Path) -> None:
32
+ self.db_path = Path(db_path).expanduser().resolve()
33
+ self._connections: dict[int, sqlite3.Connection] = {}
34
+ self._connections_lock = threading.RLock()
35
+ self._ensure_parent_dir()
36
+ self._initialize_schema()
37
+
38
+ @classmethod
39
+ def open_default(cls, *, data_root: Path | None = None) -> "RuntimeLedger":
40
+ """Open the default workspace runtime ledger."""
41
+ env_override_raw = os.environ.get(DATA_ENGINE_RUNTIME_DB_PATH_ENV_VAR)
42
+ if env_override_raw is not None and env_override_raw.strip():
43
+ return cls(Path(env_override_raw).expanduser().resolve())
44
+ return cls(RuntimeLayoutPolicy().resolve_paths(data_root=data_root).runtime_db_path)
45
+
46
+ def _ensure_parent_dir(self) -> None:
47
+ self.db_path.parent.mkdir(parents=True, exist_ok=True)
48
+
49
+ def _connection(self) -> sqlite3.Connection:
50
+ thread_id = threading.get_ident()
51
+ with self._connections_lock:
52
+ connection = self._connections.get(thread_id)
53
+ if connection is None:
54
+ connection = sqlite3.connect(
55
+ self.db_path,
56
+ timeout=5.0,
57
+ isolation_level=None,
58
+ check_same_thread=False,
59
+ )
60
+ connection.row_factory = sqlite3.Row
61
+ connection.execute("PRAGMA foreign_keys = ON")
62
+ connection.execute("PRAGMA busy_timeout = 5000")
63
+ self._connections[thread_id] = connection
64
+ return connection
65
+
66
+ def close(self) -> None:
67
+ """Close all SQLite connections opened for this ledger across threads."""
68
+ with self._connections_lock:
69
+ connections = tuple(self._connections.values())
70
+ self._connections.clear()
71
+ for connection in connections:
72
+ connection.close()
73
+
74
+ def __del__(self) -> None:
75
+ """Best-effort cleanup for ledger connections when callers forget to close."""
76
+ try:
77
+ self.close()
78
+ except Exception:
79
+ pass
80
+
81
+ def _initialize_schema(self) -> None:
82
+ connection = self._connection()
83
+ connection.execute("PRAGMA journal_mode = WAL")
84
+ connection.execute(
85
+ """
86
+ CREATE TABLE IF NOT EXISTS runs (
87
+ run_id TEXT PRIMARY KEY,
88
+ flow_name TEXT NOT NULL,
89
+ group_name TEXT NOT NULL,
90
+ source_path TEXT,
91
+ status TEXT NOT NULL,
92
+ started_at_utc TEXT NOT NULL,
93
+ finished_at_utc TEXT,
94
+ error_text TEXT
95
+ )
96
+ """
97
+ )
98
+ connection.execute(
99
+ """
100
+ CREATE TABLE IF NOT EXISTS step_runs (
101
+ id INTEGER PRIMARY KEY,
102
+ run_id TEXT NOT NULL,
103
+ flow_name TEXT NOT NULL,
104
+ step_label TEXT NOT NULL,
105
+ status TEXT NOT NULL,
106
+ started_at_utc TEXT NOT NULL,
107
+ finished_at_utc TEXT,
108
+ elapsed_ms INTEGER,
109
+ error_text TEXT,
110
+ output_path TEXT,
111
+ FOREIGN KEY (run_id) REFERENCES runs(run_id)
112
+ )
113
+ """
114
+ )
115
+ columns = {str(row["name"]) for row in connection.execute("PRAGMA table_info(step_runs)").fetchall()}
116
+ if "output_path" not in columns:
117
+ connection.execute("ALTER TABLE step_runs ADD COLUMN output_path TEXT")
118
+ connection.execute(
119
+ """
120
+ CREATE TABLE IF NOT EXISTS daemon_state (
121
+ workspace_id TEXT PRIMARY KEY,
122
+ pid INTEGER NOT NULL,
123
+ endpoint_kind TEXT NOT NULL,
124
+ endpoint_path TEXT NOT NULL,
125
+ started_at_utc TEXT NOT NULL,
126
+ last_checkpoint_at_utc TEXT NOT NULL,
127
+ status TEXT NOT NULL,
128
+ app_root TEXT NOT NULL,
129
+ workspace_root TEXT NOT NULL,
130
+ version_text TEXT
131
+ )
132
+ """
133
+ )
134
+ connection.execute(
135
+ """
136
+ CREATE TABLE IF NOT EXISTS client_sessions (
137
+ client_id TEXT PRIMARY KEY,
138
+ workspace_id TEXT NOT NULL,
139
+ client_kind TEXT NOT NULL,
140
+ pid INTEGER NOT NULL,
141
+ started_at_utc TEXT NOT NULL,
142
+ updated_at_utc TEXT NOT NULL
143
+ )
144
+ """
145
+ )
146
+ connection.execute(
147
+ """
148
+ CREATE TABLE IF NOT EXISTS file_state (
149
+ flow_name TEXT NOT NULL,
150
+ source_path TEXT NOT NULL,
151
+ mtime_ns INTEGER NOT NULL,
152
+ size_bytes INTEGER NOT NULL,
153
+ last_success_run_id TEXT,
154
+ last_success_at_utc TEXT,
155
+ last_status TEXT NOT NULL,
156
+ last_error_text TEXT,
157
+ PRIMARY KEY (flow_name, source_path)
158
+ )
159
+ """
160
+ )
161
+ connection.execute(
162
+ """
163
+ CREATE TABLE IF NOT EXISTS logs (
164
+ id INTEGER PRIMARY KEY,
165
+ run_id TEXT,
166
+ flow_name TEXT,
167
+ step_label TEXT,
168
+ level TEXT NOT NULL,
169
+ message TEXT NOT NULL,
170
+ created_at_utc TEXT NOT NULL
171
+ )
172
+ """
173
+ )
174
+ connection.execute("CREATE INDEX IF NOT EXISTS idx_runs_flow_started ON runs(flow_name, started_at_utc DESC)")
175
+ connection.execute("CREATE INDEX IF NOT EXISTS idx_step_runs_run ON step_runs(run_id, id)")
176
+ connection.execute("CREATE INDEX IF NOT EXISTS idx_logs_flow_created ON logs(flow_name, created_at_utc, id)")
177
+ connection.execute("CREATE INDEX IF NOT EXISTS idx_logs_run_created ON logs(run_id, created_at_utc, id)")
178
+ connection.execute("CREATE INDEX IF NOT EXISTS idx_client_sessions_workspace ON client_sessions(workspace_id, updated_at_utc DESC)")
179
+
180
+ def upsert_daemon_state(
181
+ self,
182
+ *,
183
+ workspace_id: str,
184
+ pid: int,
185
+ endpoint_kind: str,
186
+ endpoint_path: str,
187
+ started_at_utc: str,
188
+ last_checkpoint_at_utc: str,
189
+ status: str,
190
+ app_root: str,
191
+ workspace_root: str,
192
+ version_text: str | None = None,
193
+ ) -> None:
194
+ """Insert or replace one daemon metadata row."""
195
+ self._connection().execute(
196
+ """
197
+ INSERT INTO daemon_state(
198
+ workspace_id,
199
+ pid,
200
+ endpoint_kind,
201
+ endpoint_path,
202
+ started_at_utc,
203
+ last_checkpoint_at_utc,
204
+ status,
205
+ app_root,
206
+ workspace_root,
207
+ version_text
208
+ )
209
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
210
+ ON CONFLICT(workspace_id) DO UPDATE SET
211
+ pid = excluded.pid,
212
+ endpoint_kind = excluded.endpoint_kind,
213
+ endpoint_path = excluded.endpoint_path,
214
+ started_at_utc = excluded.started_at_utc,
215
+ last_checkpoint_at_utc = excluded.last_checkpoint_at_utc,
216
+ status = excluded.status,
217
+ app_root = excluded.app_root,
218
+ workspace_root = excluded.workspace_root,
219
+ version_text = excluded.version_text
220
+ """,
221
+ (
222
+ workspace_id,
223
+ pid,
224
+ endpoint_kind,
225
+ endpoint_path,
226
+ started_at_utc,
227
+ last_checkpoint_at_utc,
228
+ status,
229
+ app_root,
230
+ workspace_root,
231
+ version_text,
232
+ ),
233
+ )
234
+
235
+ def get_daemon_state(self, workspace_id: str) -> PersistedDaemonState | None:
236
+ """Return daemon metadata for one workspace when present."""
237
+ row = self._connection().execute(
238
+ """
239
+ SELECT workspace_id, pid, endpoint_kind, endpoint_path, started_at_utc, last_checkpoint_at_utc, status, app_root, workspace_root, version_text
240
+ FROM daemon_state
241
+ WHERE workspace_id = ?
242
+ """,
243
+ (workspace_id,),
244
+ ).fetchone()
245
+ if row is None:
246
+ return None
247
+ return PersistedDaemonState(
248
+ workspace_id=str(row["workspace_id"]),
249
+ pid=int(row["pid"]),
250
+ endpoint_kind=str(row["endpoint_kind"]),
251
+ endpoint_path=str(row["endpoint_path"]),
252
+ started_at_utc=str(row["started_at_utc"]),
253
+ last_checkpoint_at_utc=str(row["last_checkpoint_at_utc"]),
254
+ status=str(row["status"]),
255
+ app_root=str(row["app_root"]),
256
+ workspace_root=str(row["workspace_root"]),
257
+ version_text=row["version_text"],
258
+ )
259
+
260
+ def clear_daemon_state(self, workspace_id: str) -> None:
261
+ """Delete daemon metadata for one workspace."""
262
+ self._connection().execute("DELETE FROM daemon_state WHERE workspace_id = ?", (workspace_id,))
263
+
264
+ def upsert_client_session(
265
+ self,
266
+ *,
267
+ client_id: str,
268
+ workspace_id: str,
269
+ client_kind: str,
270
+ pid: int,
271
+ ) -> None:
272
+ """Insert or refresh one local client session row."""
273
+ row = self._connection().execute(
274
+ "SELECT started_at_utc FROM client_sessions WHERE client_id = ?",
275
+ (client_id,),
276
+ ).fetchone()
277
+ started_at_utc = str(row["started_at_utc"]) if row is not None and row["started_at_utc"] else utcnow_text()
278
+ updated_at_utc = utcnow_text()
279
+ self._connection().execute(
280
+ """
281
+ INSERT INTO client_sessions(
282
+ client_id,
283
+ workspace_id,
284
+ client_kind,
285
+ pid,
286
+ started_at_utc,
287
+ updated_at_utc
288
+ )
289
+ VALUES (?, ?, ?, ?, ?, ?)
290
+ ON CONFLICT(client_id) DO UPDATE SET
291
+ workspace_id = excluded.workspace_id,
292
+ client_kind = excluded.client_kind,
293
+ pid = excluded.pid,
294
+ updated_at_utc = excluded.updated_at_utc
295
+ """,
296
+ (client_id, workspace_id, client_kind, pid, started_at_utc, updated_at_utc),
297
+ )
298
+
299
+ def remove_client_session(self, client_id: str) -> None:
300
+ """Delete one local client session row."""
301
+ self._connection().execute("DELETE FROM client_sessions WHERE client_id = ?", (client_id,))
302
+
303
+ def remove_client_sessions_for_process(self, *, workspace_id: str, client_kind: str, pid: int) -> None:
304
+ """Delete all client session rows for one workspace/client-kind/process tuple."""
305
+ self._connection().execute(
306
+ """
307
+ DELETE FROM client_sessions
308
+ WHERE workspace_id = ?
309
+ AND client_kind = ?
310
+ AND pid = ?
311
+ """,
312
+ (workspace_id, client_kind, pid),
313
+ )
314
+
315
+ def count_live_client_sessions(self, workspace_id: str, *, exclude_client_id: str | None = None) -> int:
316
+ """Return the number of live client sessions for one workspace."""
317
+ rows = self._connection().execute(
318
+ """
319
+ SELECT client_id, pid
320
+ FROM client_sessions
321
+ WHERE workspace_id = ?
322
+ """,
323
+ (workspace_id,),
324
+ ).fetchall()
325
+ live_count = 0
326
+ stale_client_ids: list[str] = []
327
+ for row in rows:
328
+ client_id = str(row["client_id"])
329
+ if exclude_client_id is not None and client_id == exclude_client_id:
330
+ continue
331
+ pid = int(row["pid"])
332
+ if self._pid_is_running(pid):
333
+ live_count += 1
334
+ else:
335
+ stale_client_ids.append(client_id)
336
+ if stale_client_ids:
337
+ self._connection().executemany(
338
+ "DELETE FROM client_sessions WHERE client_id = ?",
339
+ ((client_id,) for client_id in stale_client_ids),
340
+ )
341
+ return live_count
342
+
343
+ @staticmethod
344
+ def _pid_is_running(pid: int) -> bool:
345
+ """Return whether the OS still reports the given PID as alive."""
346
+ if pid <= 0:
347
+ return False
348
+ try:
349
+ os.kill(pid, 0)
350
+ except ProcessLookupError:
351
+ return False
352
+ except PermissionError:
353
+ return True
354
+ except OSError:
355
+ return False
356
+ return True
357
+
358
+ def normalize_source_path(self, source_path: Path | str) -> str:
359
+ """Normalize a source path for stable persistence and comparisons."""
360
+ return normalized_path_text(Path(source_path).expanduser().resolve())
361
+
362
+ def source_signature_for_path(self, source_path: Path) -> SourceSignature | None:
363
+ """Return the current source signature when the file exists."""
364
+ try:
365
+ stat = source_path.stat()
366
+ except FileNotFoundError:
367
+ return None
368
+ return SourceSignature(
369
+ source_path=self.normalize_source_path(source_path),
370
+ mtime_ns=stat.st_mtime_ns,
371
+ size_bytes=stat.st_size,
372
+ )
373
+
374
+ def is_poll_source_stale(self, flow_name: str, signature: SourceSignature | None) -> bool:
375
+ """Return whether a concrete source signature should be rerun."""
376
+ if signature is None:
377
+ return False
378
+ row = self._connection().execute(
379
+ """
380
+ SELECT mtime_ns, size_bytes, last_status
381
+ FROM file_state
382
+ WHERE flow_name = ? AND source_path = ?
383
+ """,
384
+ (flow_name, signature.source_path),
385
+ ).fetchone()
386
+ if row is None:
387
+ return True
388
+ if int(row["mtime_ns"]) != signature.mtime_ns or int(row["size_bytes"]) != signature.size_bytes:
389
+ return True
390
+ return str(row["last_status"]) != "success"
391
+
392
+ def record_run_started(
393
+ self,
394
+ *,
395
+ run_id: str,
396
+ flow_name: str,
397
+ group_name: str,
398
+ source_path: str | None,
399
+ started_at_utc: str,
400
+ ) -> None:
401
+ """Insert one started run row."""
402
+ self._connection().execute(
403
+ """
404
+ INSERT INTO runs(run_id, flow_name, group_name, source_path, status, started_at_utc)
405
+ VALUES (?, ?, ?, ?, 'started', ?)
406
+ """,
407
+ (run_id, flow_name, group_name, source_path, started_at_utc),
408
+ )
409
+
410
+ def record_run_finished(
411
+ self,
412
+ *,
413
+ run_id: str,
414
+ status: str,
415
+ finished_at_utc: str,
416
+ error_text: str | None = None,
417
+ ) -> None:
418
+ """Finalize one persisted run row."""
419
+ self._connection().execute(
420
+ """
421
+ UPDATE runs
422
+ SET status = ?, finished_at_utc = ?, error_text = ?
423
+ WHERE run_id = ?
424
+ """,
425
+ (status, finished_at_utc, error_text, run_id),
426
+ )
427
+ self.prune_history(retention_days=self.HISTORY_RETENTION_DAYS)
428
+
429
+ def record_step_started(
430
+ self,
431
+ *,
432
+ run_id: str,
433
+ flow_name: str,
434
+ step_label: str,
435
+ started_at_utc: str,
436
+ ) -> int:
437
+ """Insert one started step row and return its surrogate key."""
438
+ cursor = self._connection().execute(
439
+ """
440
+ INSERT INTO step_runs(run_id, flow_name, step_label, status, started_at_utc)
441
+ VALUES (?, ?, ?, 'started', ?)
442
+ """,
443
+ (run_id, flow_name, step_label, started_at_utc),
444
+ )
445
+ return int(cursor.lastrowid)
446
+
447
+ def record_step_finished(
448
+ self,
449
+ *,
450
+ step_run_id: int,
451
+ status: str,
452
+ finished_at_utc: str,
453
+ elapsed_ms: int | None,
454
+ error_text: str | None = None,
455
+ output_path: str | None = None,
456
+ ) -> None:
457
+ """Finalize one persisted step row."""
458
+ self._connection().execute(
459
+ """
460
+ UPDATE step_runs
461
+ SET status = ?, finished_at_utc = ?, elapsed_ms = ?, error_text = ?, output_path = ?
462
+ WHERE id = ?
463
+ """,
464
+ (status, finished_at_utc, elapsed_ms, error_text, output_path, step_run_id),
465
+ )
466
+
467
+ def upsert_file_state(
468
+ self,
469
+ *,
470
+ flow_name: str,
471
+ signature: SourceSignature,
472
+ status: str,
473
+ run_id: str | None = None,
474
+ finished_at_utc: str | None = None,
475
+ error_text: str | None = None,
476
+ ) -> None:
477
+ """Upsert one file-state row for a polled source file."""
478
+ success_run_id = run_id if status == "success" else None
479
+ success_at = finished_at_utc if status == "success" else None
480
+ self._connection().execute(
481
+ """
482
+ INSERT INTO file_state(
483
+ flow_name,
484
+ source_path,
485
+ mtime_ns,
486
+ size_bytes,
487
+ last_success_run_id,
488
+ last_success_at_utc,
489
+ last_status,
490
+ last_error_text
491
+ )
492
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
493
+ ON CONFLICT(flow_name, source_path) DO UPDATE SET
494
+ mtime_ns = excluded.mtime_ns,
495
+ size_bytes = excluded.size_bytes,
496
+ last_success_run_id = CASE
497
+ WHEN excluded.last_status = 'success' THEN excluded.last_success_run_id
498
+ ELSE file_state.last_success_run_id
499
+ END,
500
+ last_success_at_utc = CASE
501
+ WHEN excluded.last_status = 'success' THEN excluded.last_success_at_utc
502
+ ELSE file_state.last_success_at_utc
503
+ END,
504
+ last_status = excluded.last_status,
505
+ last_error_text = excluded.last_error_text
506
+ """,
507
+ (
508
+ flow_name,
509
+ signature.source_path,
510
+ signature.mtime_ns,
511
+ signature.size_bytes,
512
+ success_run_id,
513
+ success_at,
514
+ status,
515
+ error_text,
516
+ ),
517
+ )
518
+
519
+ def append_log(
520
+ self,
521
+ *,
522
+ level: str,
523
+ message: str,
524
+ created_at_utc: str,
525
+ run_id: str | None = None,
526
+ flow_name: str | None = None,
527
+ step_label: str | None = None,
528
+ ) -> None:
529
+ """Persist one runtime log line."""
530
+ self._connection().execute(
531
+ """
532
+ INSERT INTO logs(run_id, flow_name, step_label, level, message, created_at_utc)
533
+ VALUES (?, ?, ?, ?, ?, ?)
534
+ """,
535
+ (run_id, flow_name, step_label, level, message, created_at_utc),
536
+ )
537
+
538
+ def prune_history(self, *, retention_days: int) -> None:
539
+ """Delete run, step, and log history older than the retention window."""
540
+ if retention_days <= 0:
541
+ raise ValueError("retention_days must be positive.")
542
+ cutoff = (datetime.now(UTC) - timedelta(days=retention_days)).isoformat()
543
+ connection = self._connection()
544
+ stale_run_ids = tuple(
545
+ str(row["run_id"])
546
+ for row in connection.execute(
547
+ """
548
+ SELECT run_id
549
+ FROM runs
550
+ WHERE COALESCE(finished_at_utc, started_at_utc) < ?
551
+ """,
552
+ (cutoff,),
553
+ ).fetchall()
554
+ )
555
+ if not stale_run_ids:
556
+ return
557
+ placeholders = ", ".join("?" for _ in stale_run_ids)
558
+ connection.execute(f"DELETE FROM logs WHERE run_id IN ({placeholders})", stale_run_ids)
559
+ connection.execute(f"DELETE FROM step_runs WHERE run_id IN ({placeholders})", stale_run_ids)
560
+ connection.execute(f"DELETE FROM runs WHERE run_id IN ({placeholders})", stale_run_ids)
561
+ connection.execute(
562
+ f"""
563
+ UPDATE file_state
564
+ SET last_success_run_id = NULL
565
+ WHERE last_success_run_id IN ({placeholders})
566
+ """,
567
+ stale_run_ids,
568
+ )
569
+
570
+ def prune_missing_file_state(self, *, flow_name: str, current_source_paths: set[str]) -> None:
571
+ """Delete file-state rows for one flow when the source file no longer exists."""
572
+ connection = self._connection()
573
+ rows = connection.execute(
574
+ """
575
+ SELECT source_path
576
+ FROM file_state
577
+ WHERE flow_name = ?
578
+ """,
579
+ (flow_name,),
580
+ ).fetchall()
581
+ stale_paths = [
582
+ str(row["source_path"])
583
+ for row in rows
584
+ if str(row["source_path"]) not in current_source_paths
585
+ ]
586
+ if not stale_paths:
587
+ return
588
+ placeholders = ", ".join("?" for _ in stale_paths)
589
+ connection.execute(
590
+ f"DELETE FROM file_state WHERE flow_name = ? AND source_path IN ({placeholders})",
591
+ (flow_name, *stale_paths),
592
+ )
593
+
594
+ def list_logs(self, *, flow_name: str | None = None, run_id: str | None = None) -> tuple[PersistedLogEntry, ...]:
595
+ """Return persisted runtime logs in creation order."""
596
+ clauses: list[str] = []
597
+ params: list[object] = []
598
+ if flow_name is not None:
599
+ clauses.append("flow_name = ?")
600
+ params.append(flow_name)
601
+ if run_id is not None:
602
+ clauses.append("run_id = ?")
603
+ params.append(run_id)
604
+ where = f"WHERE {' AND '.join(clauses)}" if clauses else ""
605
+ rows = self._connection().execute(
606
+ f"""
607
+ SELECT id, run_id, flow_name, step_label, level, message, created_at_utc
608
+ FROM logs
609
+ {where}
610
+ ORDER BY created_at_utc, id
611
+ """,
612
+ params,
613
+ ).fetchall()
614
+ return tuple(
615
+ PersistedLogEntry(
616
+ id=int(row["id"]),
617
+ run_id=row["run_id"],
618
+ flow_name=row["flow_name"],
619
+ step_label=row["step_label"],
620
+ level=str(row["level"]),
621
+ message=str(row["message"]),
622
+ created_at_utc=str(row["created_at_utc"]),
623
+ )
624
+ for row in rows
625
+ )
626
+
627
+ def list_runs(self, *, flow_name: str | None = None) -> tuple[PersistedRun, ...]:
628
+ """Return persisted runs, newest first."""
629
+ if flow_name is None:
630
+ rows = self._connection().execute(
631
+ """
632
+ SELECT run_id, flow_name, group_name, source_path, status, started_at_utc, finished_at_utc, error_text
633
+ FROM runs
634
+ ORDER BY started_at_utc DESC, run_id DESC
635
+ """
636
+ ).fetchall()
637
+ else:
638
+ rows = self._connection().execute(
639
+ """
640
+ SELECT run_id, flow_name, group_name, source_path, status, started_at_utc, finished_at_utc, error_text
641
+ FROM runs
642
+ WHERE flow_name = ?
643
+ ORDER BY started_at_utc DESC, run_id DESC
644
+ """,
645
+ (flow_name,),
646
+ ).fetchall()
647
+ return tuple(
648
+ PersistedRun(
649
+ run_id=str(row["run_id"]),
650
+ flow_name=str(row["flow_name"]),
651
+ group_name=str(row["group_name"]),
652
+ source_path=row["source_path"],
653
+ status=str(row["status"]),
654
+ started_at_utc=str(row["started_at_utc"]),
655
+ finished_at_utc=row["finished_at_utc"],
656
+ error_text=row["error_text"],
657
+ )
658
+ for row in rows
659
+ )
660
+
661
+ def list_step_runs(self, run_id: str) -> tuple[PersistedStepRun, ...]:
662
+ """Return persisted step runs for one run id."""
663
+ rows = self._connection().execute(
664
+ """
665
+ SELECT id, run_id, flow_name, step_label, status, started_at_utc, finished_at_utc, elapsed_ms, error_text, output_path
666
+ FROM step_runs
667
+ WHERE run_id = ?
668
+ ORDER BY id
669
+ """,
670
+ (run_id,),
671
+ ).fetchall()
672
+ return tuple(
673
+ PersistedStepRun(
674
+ id=int(row["id"]),
675
+ run_id=str(row["run_id"]),
676
+ flow_name=str(row["flow_name"]),
677
+ step_label=str(row["step_label"]),
678
+ status=str(row["status"]),
679
+ started_at_utc=str(row["started_at_utc"]),
680
+ finished_at_utc=row["finished_at_utc"],
681
+ elapsed_ms=row["elapsed_ms"],
682
+ error_text=row["error_text"],
683
+ output_path=row["output_path"],
684
+ )
685
+ for row in rows
686
+ )
687
+
688
+ def list_file_states(self, *, flow_name: str | None = None) -> tuple[PersistedFileState, ...]:
689
+ """Return current persisted file-state rows."""
690
+ if flow_name is None:
691
+ rows = self._connection().execute(
692
+ """
693
+ SELECT flow_name, source_path, mtime_ns, size_bytes, last_success_run_id, last_success_at_utc, last_status, last_error_text
694
+ FROM file_state
695
+ ORDER BY flow_name, source_path
696
+ """
697
+ ).fetchall()
698
+ else:
699
+ rows = self._connection().execute(
700
+ """
701
+ SELECT flow_name, source_path, mtime_ns, size_bytes, last_success_run_id, last_success_at_utc, last_status, last_error_text
702
+ FROM file_state
703
+ WHERE flow_name = ?
704
+ ORDER BY source_path
705
+ """,
706
+ (flow_name,),
707
+ ).fetchall()
708
+ return tuple(
709
+ PersistedFileState(
710
+ flow_name=str(row["flow_name"]),
711
+ source_path=str(row["source_path"]),
712
+ mtime_ns=int(row["mtime_ns"]),
713
+ size_bytes=int(row["size_bytes"]),
714
+ last_success_run_id=row["last_success_run_id"],
715
+ last_success_at_utc=row["last_success_at_utc"],
716
+ last_status=str(row["last_status"]),
717
+ last_error_text=row["last_error_text"],
718
+ )
719
+ for row in rows
720
+ )
721
+
722
+ def replace_runs(self, rows: tuple[PersistedRun, ...]) -> None:
723
+ """Replace all persisted run rows with one snapshot."""
724
+ connection = self._connection()
725
+ connection.execute("DELETE FROM step_runs")
726
+ connection.execute("DELETE FROM logs")
727
+ connection.execute("DELETE FROM runs")
728
+ if not rows:
729
+ return
730
+ connection.executemany(
731
+ """
732
+ INSERT INTO runs(run_id, flow_name, group_name, source_path, status, started_at_utc, finished_at_utc, error_text)
733
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
734
+ """,
735
+ [
736
+ (
737
+ row.run_id,
738
+ row.flow_name,
739
+ row.group_name,
740
+ row.source_path,
741
+ row.status,
742
+ row.started_at_utc,
743
+ row.finished_at_utc,
744
+ row.error_text,
745
+ )
746
+ for row in rows
747
+ ],
748
+ )
749
+
750
+ def replace_step_runs(self, rows: tuple[PersistedStepRun, ...]) -> None:
751
+ """Replace all persisted step rows with one snapshot."""
752
+ connection = self._connection()
753
+ connection.execute("DELETE FROM step_runs")
754
+ if not rows:
755
+ return
756
+ connection.executemany(
757
+ """
758
+ INSERT INTO step_runs(id, run_id, flow_name, step_label, status, started_at_utc, finished_at_utc, elapsed_ms, error_text, output_path)
759
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
760
+ """,
761
+ [
762
+ (
763
+ row.id,
764
+ row.run_id,
765
+ row.flow_name,
766
+ row.step_label,
767
+ row.status,
768
+ row.started_at_utc,
769
+ row.finished_at_utc,
770
+ row.elapsed_ms,
771
+ row.error_text,
772
+ row.output_path,
773
+ )
774
+ for row in rows
775
+ ],
776
+ )
777
+
778
+ def replace_logs(self, rows: tuple[PersistedLogEntry, ...]) -> None:
779
+ """Replace all persisted log rows with one snapshot."""
780
+ connection = self._connection()
781
+ connection.execute("DELETE FROM logs")
782
+ if not rows:
783
+ return
784
+ connection.executemany(
785
+ """
786
+ INSERT INTO logs(id, run_id, flow_name, step_label, level, message, created_at_utc)
787
+ VALUES (?, ?, ?, ?, ?, ?, ?)
788
+ """,
789
+ [
790
+ (
791
+ row.id,
792
+ row.run_id,
793
+ row.flow_name,
794
+ row.step_label,
795
+ row.level,
796
+ row.message,
797
+ row.created_at_utc,
798
+ )
799
+ for row in rows
800
+ ],
801
+ )
802
+
803
+ def replace_file_states(self, rows: tuple[PersistedFileState, ...]) -> None:
804
+ """Replace all persisted file-state rows with one snapshot."""
805
+ connection = self._connection()
806
+ connection.execute("DELETE FROM file_state")
807
+ if not rows:
808
+ return
809
+ connection.executemany(
810
+ """
811
+ INSERT INTO file_state(flow_name, source_path, mtime_ns, size_bytes, last_success_run_id, last_success_at_utc, last_status, last_error_text)
812
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
813
+ """,
814
+ [
815
+ (
816
+ row.flow_name,
817
+ row.source_path,
818
+ row.mtime_ns,
819
+ row.size_bytes,
820
+ row.last_success_run_id,
821
+ row.last_success_at_utc,
822
+ row.last_status,
823
+ row.last_error_text,
824
+ )
825
+ for row in rows
826
+ ],
827
+ )
828
+
829
+ def replace_runtime_snapshot(
830
+ self,
831
+ *,
832
+ runs: tuple[PersistedRun, ...],
833
+ step_runs: tuple[PersistedStepRun, ...],
834
+ logs: tuple[PersistedLogEntry, ...],
835
+ file_states: tuple[PersistedFileState, ...],
836
+ ) -> None:
837
+ """Replace the runtime snapshot tables in foreign-key-safe order."""
838
+ connection = self._connection()
839
+ connection.execute("BEGIN IMMEDIATE")
840
+ try:
841
+ connection.execute("DELETE FROM step_runs")
842
+ connection.execute("DELETE FROM logs")
843
+ connection.execute("DELETE FROM runs")
844
+ connection.execute("DELETE FROM file_state")
845
+ if runs:
846
+ connection.executemany(
847
+ """
848
+ INSERT INTO runs(run_id, flow_name, group_name, source_path, status, started_at_utc, finished_at_utc, error_text)
849
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
850
+ """,
851
+ [
852
+ (
853
+ row.run_id,
854
+ row.flow_name,
855
+ row.group_name,
856
+ row.source_path,
857
+ row.status,
858
+ row.started_at_utc,
859
+ row.finished_at_utc,
860
+ row.error_text,
861
+ )
862
+ for row in runs
863
+ ],
864
+ )
865
+ if step_runs:
866
+ connection.executemany(
867
+ """
868
+ INSERT INTO step_runs(run_id, flow_name, step_label, status, started_at_utc, finished_at_utc, elapsed_ms, error_text, output_path)
869
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
870
+ """,
871
+ [
872
+ (
873
+ row.run_id,
874
+ row.flow_name,
875
+ row.step_label,
876
+ row.status,
877
+ row.started_at_utc,
878
+ row.finished_at_utc,
879
+ row.elapsed_ms,
880
+ row.error_text,
881
+ row.output_path,
882
+ )
883
+ for row in step_runs
884
+ ],
885
+ )
886
+ if logs:
887
+ connection.executemany(
888
+ """
889
+ INSERT INTO logs(run_id, flow_name, step_label, level, message, created_at_utc)
890
+ VALUES (?, ?, ?, ?, ?, ?)
891
+ """,
892
+ [
893
+ (
894
+ row.run_id,
895
+ row.flow_name,
896
+ row.step_label,
897
+ row.level,
898
+ row.message,
899
+ row.created_at_utc,
900
+ )
901
+ for row in logs
902
+ ],
903
+ )
904
+ if file_states:
905
+ deduped_file_states: dict[tuple[str, str], PersistedFileState] = {}
906
+ for row in file_states:
907
+ deduped_file_states[(row.flow_name, row.source_path)] = row
908
+ connection.executemany(
909
+ """
910
+ INSERT INTO file_state(flow_name, source_path, mtime_ns, size_bytes, last_success_run_id, last_success_at_utc, last_status, last_error_text)
911
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
912
+ """,
913
+ [
914
+ (
915
+ row.flow_name,
916
+ row.source_path,
917
+ row.mtime_ns,
918
+ row.size_bytes,
919
+ row.last_success_run_id,
920
+ row.last_success_at_utc,
921
+ row.last_status,
922
+ row.last_error_text,
923
+ )
924
+ for row in deduped_file_states.values()
925
+ ],
926
+ )
927
+ except Exception:
928
+ connection.rollback()
929
+ raise
930
+ else:
931
+ connection.commit()
932
+
933
+
934
+ __all__ = [
935
+ "RuntimeLedger",
936
+ "parse_utc_text",
937
+ "utcnow_text",
938
+ ]