py-data-engine 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. data_engine/__init__.py +37 -0
  2. data_engine/application/__init__.py +39 -0
  3. data_engine/application/actions.py +42 -0
  4. data_engine/application/catalog.py +151 -0
  5. data_engine/application/control.py +213 -0
  6. data_engine/application/details.py +73 -0
  7. data_engine/application/runtime.py +449 -0
  8. data_engine/application/workspace.py +62 -0
  9. data_engine/authoring/__init__.py +14 -0
  10. data_engine/authoring/builder.py +31 -0
  11. data_engine/authoring/execution/__init__.py +6 -0
  12. data_engine/authoring/execution/app.py +6 -0
  13. data_engine/authoring/execution/context.py +82 -0
  14. data_engine/authoring/execution/continuous.py +176 -0
  15. data_engine/authoring/execution/grouped.py +106 -0
  16. data_engine/authoring/execution/logging.py +83 -0
  17. data_engine/authoring/execution/polling.py +135 -0
  18. data_engine/authoring/execution/runner.py +210 -0
  19. data_engine/authoring/execution/single.py +171 -0
  20. data_engine/authoring/flow.py +361 -0
  21. data_engine/authoring/helpers.py +160 -0
  22. data_engine/authoring/model.py +59 -0
  23. data_engine/authoring/primitives.py +430 -0
  24. data_engine/authoring/services.py +42 -0
  25. data_engine/devtools/__init__.py +3 -0
  26. data_engine/devtools/project_ast_map.py +503 -0
  27. data_engine/docs/__init__.py +1 -0
  28. data_engine/docs/sphinx_source/_static/custom.css +13 -0
  29. data_engine/docs/sphinx_source/api.rst +42 -0
  30. data_engine/docs/sphinx_source/conf.py +37 -0
  31. data_engine/docs/sphinx_source/guides/app-runtime-and-workspaces.md +397 -0
  32. data_engine/docs/sphinx_source/guides/authoring-flow-modules.md +215 -0
  33. data_engine/docs/sphinx_source/guides/configuring-flows.md +185 -0
  34. data_engine/docs/sphinx_source/guides/core-concepts.md +208 -0
  35. data_engine/docs/sphinx_source/guides/database-methods.md +107 -0
  36. data_engine/docs/sphinx_source/guides/duckdb-helpers.md +462 -0
  37. data_engine/docs/sphinx_source/guides/flow-context.md +538 -0
  38. data_engine/docs/sphinx_source/guides/flow-methods.md +206 -0
  39. data_engine/docs/sphinx_source/guides/getting-started.md +271 -0
  40. data_engine/docs/sphinx_source/guides/project-inventory.md +5683 -0
  41. data_engine/docs/sphinx_source/guides/project-map.md +118 -0
  42. data_engine/docs/sphinx_source/guides/recipes.md +268 -0
  43. data_engine/docs/sphinx_source/index.rst +22 -0
  44. data_engine/domain/__init__.py +92 -0
  45. data_engine/domain/actions.py +69 -0
  46. data_engine/domain/catalog.py +128 -0
  47. data_engine/domain/details.py +214 -0
  48. data_engine/domain/diagnostics.py +56 -0
  49. data_engine/domain/errors.py +104 -0
  50. data_engine/domain/inspection.py +99 -0
  51. data_engine/domain/logs.py +118 -0
  52. data_engine/domain/operations.py +172 -0
  53. data_engine/domain/operator.py +72 -0
  54. data_engine/domain/runs.py +155 -0
  55. data_engine/domain/runtime.py +279 -0
  56. data_engine/domain/source_state.py +17 -0
  57. data_engine/domain/support.py +54 -0
  58. data_engine/domain/time.py +23 -0
  59. data_engine/domain/workspace.py +159 -0
  60. data_engine/flow_modules/__init__.py +1 -0
  61. data_engine/flow_modules/flow_module_compiler.py +179 -0
  62. data_engine/flow_modules/flow_module_loader.py +201 -0
  63. data_engine/helpers/__init__.py +25 -0
  64. data_engine/helpers/duckdb.py +705 -0
  65. data_engine/hosts/__init__.py +1 -0
  66. data_engine/hosts/daemon/__init__.py +23 -0
  67. data_engine/hosts/daemon/app.py +221 -0
  68. data_engine/hosts/daemon/bootstrap.py +69 -0
  69. data_engine/hosts/daemon/client.py +465 -0
  70. data_engine/hosts/daemon/commands.py +64 -0
  71. data_engine/hosts/daemon/composition.py +310 -0
  72. data_engine/hosts/daemon/constants.py +15 -0
  73. data_engine/hosts/daemon/entrypoints.py +97 -0
  74. data_engine/hosts/daemon/lifecycle.py +191 -0
  75. data_engine/hosts/daemon/manager.py +272 -0
  76. data_engine/hosts/daemon/ownership.py +126 -0
  77. data_engine/hosts/daemon/runtime_commands.py +188 -0
  78. data_engine/hosts/daemon/runtime_control.py +31 -0
  79. data_engine/hosts/daemon/server.py +84 -0
  80. data_engine/hosts/daemon/shared_state.py +147 -0
  81. data_engine/hosts/daemon/state_sync.py +101 -0
  82. data_engine/platform/__init__.py +1 -0
  83. data_engine/platform/identity.py +35 -0
  84. data_engine/platform/local_settings.py +146 -0
  85. data_engine/platform/theme.py +259 -0
  86. data_engine/platform/workspace_models.py +190 -0
  87. data_engine/platform/workspace_policy.py +333 -0
  88. data_engine/runtime/__init__.py +1 -0
  89. data_engine/runtime/file_watch.py +185 -0
  90. data_engine/runtime/ledger_models.py +116 -0
  91. data_engine/runtime/runtime_db.py +938 -0
  92. data_engine/runtime/shared_state.py +523 -0
  93. data_engine/services/__init__.py +49 -0
  94. data_engine/services/daemon.py +64 -0
  95. data_engine/services/daemon_state.py +40 -0
  96. data_engine/services/flow_catalog.py +102 -0
  97. data_engine/services/flow_execution.py +48 -0
  98. data_engine/services/ledger.py +85 -0
  99. data_engine/services/logs.py +65 -0
  100. data_engine/services/runtime_binding.py +105 -0
  101. data_engine/services/runtime_execution.py +126 -0
  102. data_engine/services/runtime_history.py +62 -0
  103. data_engine/services/settings.py +58 -0
  104. data_engine/services/shared_state.py +28 -0
  105. data_engine/services/theme.py +59 -0
  106. data_engine/services/workspace_provisioning.py +224 -0
  107. data_engine/services/workspaces.py +74 -0
  108. data_engine/ui/__init__.py +3 -0
  109. data_engine/ui/cli/__init__.py +19 -0
  110. data_engine/ui/cli/app.py +161 -0
  111. data_engine/ui/cli/commands_doctor.py +178 -0
  112. data_engine/ui/cli/commands_run.py +80 -0
  113. data_engine/ui/cli/commands_start.py +100 -0
  114. data_engine/ui/cli/commands_workspace.py +97 -0
  115. data_engine/ui/cli/dependencies.py +44 -0
  116. data_engine/ui/cli/parser.py +56 -0
  117. data_engine/ui/gui/__init__.py +25 -0
  118. data_engine/ui/gui/app.py +116 -0
  119. data_engine/ui/gui/bootstrap.py +487 -0
  120. data_engine/ui/gui/bootstrapper.py +140 -0
  121. data_engine/ui/gui/cache_models.py +23 -0
  122. data_engine/ui/gui/control_support.py +185 -0
  123. data_engine/ui/gui/controllers/__init__.py +6 -0
  124. data_engine/ui/gui/controllers/flows.py +439 -0
  125. data_engine/ui/gui/controllers/runtime.py +245 -0
  126. data_engine/ui/gui/dialogs/__init__.py +12 -0
  127. data_engine/ui/gui/dialogs/messages.py +88 -0
  128. data_engine/ui/gui/dialogs/previews.py +222 -0
  129. data_engine/ui/gui/helpers/__init__.py +62 -0
  130. data_engine/ui/gui/helpers/inspection.py +81 -0
  131. data_engine/ui/gui/helpers/lifecycle.py +112 -0
  132. data_engine/ui/gui/helpers/scroll.py +28 -0
  133. data_engine/ui/gui/helpers/theming.py +87 -0
  134. data_engine/ui/gui/icons/dark_light.svg +12 -0
  135. data_engine/ui/gui/icons/documentation.svg +1 -0
  136. data_engine/ui/gui/icons/failed.svg +3 -0
  137. data_engine/ui/gui/icons/group.svg +4 -0
  138. data_engine/ui/gui/icons/home.svg +2 -0
  139. data_engine/ui/gui/icons/manual.svg +2 -0
  140. data_engine/ui/gui/icons/poll.svg +2 -0
  141. data_engine/ui/gui/icons/schedule.svg +4 -0
  142. data_engine/ui/gui/icons/settings.svg +2 -0
  143. data_engine/ui/gui/icons/started.svg +3 -0
  144. data_engine/ui/gui/icons/success.svg +3 -0
  145. data_engine/ui/gui/icons/view-log.svg +3 -0
  146. data_engine/ui/gui/icons.py +50 -0
  147. data_engine/ui/gui/launcher.py +48 -0
  148. data_engine/ui/gui/presenters/__init__.py +72 -0
  149. data_engine/ui/gui/presenters/docs.py +140 -0
  150. data_engine/ui/gui/presenters/logs.py +58 -0
  151. data_engine/ui/gui/presenters/runtime_projection.py +29 -0
  152. data_engine/ui/gui/presenters/sidebar.py +88 -0
  153. data_engine/ui/gui/presenters/steps.py +148 -0
  154. data_engine/ui/gui/presenters/workspace.py +39 -0
  155. data_engine/ui/gui/presenters/workspace_binding.py +75 -0
  156. data_engine/ui/gui/presenters/workspace_settings.py +182 -0
  157. data_engine/ui/gui/preview_models.py +37 -0
  158. data_engine/ui/gui/render_support.py +241 -0
  159. data_engine/ui/gui/rendering/__init__.py +12 -0
  160. data_engine/ui/gui/rendering/artifacts.py +95 -0
  161. data_engine/ui/gui/rendering/icons.py +50 -0
  162. data_engine/ui/gui/runtime.py +47 -0
  163. data_engine/ui/gui/state_support.py +193 -0
  164. data_engine/ui/gui/support.py +214 -0
  165. data_engine/ui/gui/surface.py +209 -0
  166. data_engine/ui/gui/theme.py +720 -0
  167. data_engine/ui/gui/widgets/__init__.py +34 -0
  168. data_engine/ui/gui/widgets/config.py +41 -0
  169. data_engine/ui/gui/widgets/logs.py +62 -0
  170. data_engine/ui/gui/widgets/panels.py +507 -0
  171. data_engine/ui/gui/widgets/sidebar.py +130 -0
  172. data_engine/ui/gui/widgets/steps.py +84 -0
  173. data_engine/ui/tui/__init__.py +5 -0
  174. data_engine/ui/tui/app.py +222 -0
  175. data_engine/ui/tui/bootstrap.py +475 -0
  176. data_engine/ui/tui/bootstrapper.py +117 -0
  177. data_engine/ui/tui/controllers/__init__.py +6 -0
  178. data_engine/ui/tui/controllers/flows.py +349 -0
  179. data_engine/ui/tui/controllers/runtime.py +167 -0
  180. data_engine/ui/tui/runtime.py +34 -0
  181. data_engine/ui/tui/state_support.py +141 -0
  182. data_engine/ui/tui/support.py +63 -0
  183. data_engine/ui/tui/theme.py +204 -0
  184. data_engine/ui/tui/widgets.py +123 -0
  185. data_engine/views/__init__.py +109 -0
  186. data_engine/views/actions.py +80 -0
  187. data_engine/views/artifacts.py +58 -0
  188. data_engine/views/flow_display.py +69 -0
  189. data_engine/views/logs.py +54 -0
  190. data_engine/views/models.py +96 -0
  191. data_engine/views/presentation.py +133 -0
  192. data_engine/views/runs.py +62 -0
  193. data_engine/views/state.py +39 -0
  194. data_engine/views/status.py +13 -0
  195. data_engine/views/text.py +109 -0
  196. py_data_engine-0.1.0.dist-info/METADATA +330 -0
  197. py_data_engine-0.1.0.dist-info/RECORD +200 -0
  198. py_data_engine-0.1.0.dist-info/WHEEL +5 -0
  199. py_data_engine-0.1.0.dist-info/entry_points.txt +2 -0
  200. py_data_engine-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,176 @@
1
+ """Continuous watch/schedule loop for one sequential flow runtime."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections import deque
6
+ from datetime import datetime
7
+ from time import monotonic, sleep
8
+ from typing import TYPE_CHECKING
9
+
10
+ from data_engine.authoring.execution.context import _QueuedJob
11
+ from data_engine.authoring.primitives import WatchSpec
12
+ from data_engine.runtime.file_watch import PollingWatcher
13
+
14
+ if TYPE_CHECKING:
15
+ from data_engine.authoring.execution.single import _FlowRuntime
16
+ from data_engine.authoring.primitives import FlowContext
17
+
18
+
19
+ class ContinuousRuntimeLoop:
20
+ """Own the watch/schedule loop for one sequential runtime."""
21
+
22
+ def __init__(self, runtime: "_FlowRuntime") -> None:
23
+ self.runtime = runtime
24
+
25
+ def run(self) -> list["FlowContext"]:
26
+ results: list["FlowContext"] = []
27
+ queue: deque[_QueuedJob] = deque()
28
+ queued_keys: set[tuple[str, str | None]] = set()
29
+ watch_entries: list[dict[str, object]] = []
30
+ schedule_entries: list[dict[str, object]] = []
31
+ now = monotonic()
32
+
33
+ for flow in self.runtime.flows:
34
+ if flow.trigger is None:
35
+ for source_path in self.runtime.polling.startup_sources(flow):
36
+ self.runtime.polling.enqueue_job(queue, queued_keys, flow, source_path)
37
+ continue
38
+ if isinstance(flow.trigger, WatchSpec) and flow.trigger.mode == "poll":
39
+ watcher = self.runtime.polling.make_watcher(flow.trigger)
40
+ watcher.start()
41
+ for source_path in self.runtime.polling.stale_poll_sources(flow):
42
+ batch_signatures = self.runtime.polling.stale_batch_poll_signatures(flow) if source_path is None else ()
43
+ self.runtime.polling.enqueue_job(queue, queued_keys, flow, source_path, batch_signatures=batch_signatures)
44
+ watch_entries.append(
45
+ {
46
+ "flow": flow,
47
+ "interval": flow.trigger.interval_seconds,
48
+ "next_poll": now + float(flow.trigger.interval_seconds),
49
+ "watcher": watcher,
50
+ }
51
+ )
52
+ elif isinstance(flow.trigger, WatchSpec) and flow.trigger.mode == "schedule":
53
+ if flow.trigger.interval_seconds is not None:
54
+ schedule_entries.append(
55
+ {
56
+ "flow": flow,
57
+ "kind": "every",
58
+ "interval": flow.trigger.interval_seconds,
59
+ "next_due": now + float(flow.trigger.interval_seconds),
60
+ "pending": False,
61
+ }
62
+ )
63
+ else:
64
+ for hour, minute in flow.trigger.time_slots:
65
+ schedule_entries.append(
66
+ {
67
+ "flow": flow,
68
+ "kind": "at",
69
+ "hour": hour,
70
+ "minute": minute,
71
+ "pending": False,
72
+ "last_run_date": None,
73
+ }
74
+ )
75
+
76
+ self.runtime._emit_status("Watcher/scheduler running.")
77
+ try:
78
+ while True:
79
+ if self.runtime.runtime_stop_event is not None and self.runtime.runtime_stop_event.is_set():
80
+ self.runtime._emit_status("Watcher/scheduler stopped.")
81
+ break
82
+ now = monotonic()
83
+ self._poll_watch_entries(watch_entries, queue, queued_keys, now)
84
+ self._update_schedule_entries(schedule_entries, now)
85
+
86
+ job = queue.popleft() if queue else None
87
+ if job is not None:
88
+ queued_keys.discard(self.runtime.polling.job_key(job.flow, job.source_path))
89
+ try:
90
+ results.append(
91
+ self.runtime.run_executor.run_one(
92
+ job.flow,
93
+ job.source_path,
94
+ batch_signatures=job.batch_signatures,
95
+ )
96
+ )
97
+ except FlowStoppedError:
98
+ if self.runtime.flow_stop_event is not None:
99
+ self.runtime.flow_stop_event.clear()
100
+ except Exception:
101
+ continue
102
+ continue
103
+
104
+ scheduled = next((entry for entry in schedule_entries if entry["pending"]), None)
105
+ if scheduled is not None:
106
+ scheduled["pending"] = False
107
+ scheduled_flow = scheduled["flow"]
108
+ try:
109
+ for source_path in self.runtime.polling.startup_sources(scheduled_flow):
110
+ results.append(self.runtime.run_executor.run_one(scheduled_flow, source_path))
111
+ except FlowStoppedError:
112
+ if self.runtime.flow_stop_event is not None:
113
+ self.runtime.flow_stop_event.clear()
114
+ except Exception:
115
+ continue
116
+ continue
117
+
118
+ sleep(0.05)
119
+ finally:
120
+ for entry in watch_entries:
121
+ watcher = entry["watcher"]
122
+ if isinstance(watcher, PollingWatcher):
123
+ watcher.stop()
124
+ return results
125
+
126
+ def _poll_watch_entries(
127
+ self,
128
+ watch_entries: list[dict[str, object]],
129
+ queue: deque[_QueuedJob],
130
+ queued_keys: set[tuple[str, str | None]],
131
+ now: float,
132
+ ) -> None:
133
+ for entry in watch_entries:
134
+ if now < entry["next_poll"]:
135
+ continue
136
+ watched_flow = entry["flow"]
137
+ watcher = entry["watcher"]
138
+ assert isinstance(watcher, PollingWatcher)
139
+ for path in watcher.drain_events():
140
+ watched_trigger = watched_flow.trigger
141
+ assert isinstance(watched_trigger, WatchSpec)
142
+ if watched_trigger.run_as == "batch" and watched_trigger.source is not None and watched_trigger.source.is_dir():
143
+ signature = self.runtime.polling.poll_source_signature(watched_flow, path)
144
+ self.runtime.polling.enqueue_job(
145
+ queue,
146
+ queued_keys,
147
+ watched_flow,
148
+ None,
149
+ batch_signatures=(signature,) if signature is not None else (),
150
+ )
151
+ break
152
+ if self.runtime.polling.is_poll_source_stale(watched_flow, path):
153
+ self.runtime.polling.enqueue_job(queue, queued_keys, watched_flow, path)
154
+ entry["next_poll"] = now + float(entry["interval"])
155
+
156
+ def _update_schedule_entries(self, schedule_entries: list[dict[str, object]], now: float) -> None:
157
+ for entry in schedule_entries:
158
+ if entry["kind"] == "every":
159
+ if now >= entry["next_due"]:
160
+ entry["pending"] = True
161
+ entry["next_due"] = now + float(entry["interval"])
162
+ continue
163
+ current_dt = datetime.now()
164
+ for entry in schedule_entries:
165
+ if entry["kind"] != "at":
166
+ continue
167
+ if entry["last_run_date"] == current_dt.date():
168
+ continue
169
+ if (current_dt.hour, current_dt.minute) >= (entry["hour"], entry["minute"]):
170
+ entry["pending"] = True
171
+ entry["last_run_date"] = current_dt.date()
172
+
173
+
174
+ from data_engine.authoring.model import FlowStoppedError
175
+
176
+ __all__ = ["ContinuousRuntimeLoop"]
@@ -0,0 +1,106 @@
1
+ """Grouped runtime orchestration for authored flows."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from queue import Queue
6
+ import threading
7
+ from typing import TYPE_CHECKING, Callable
8
+
9
+ from data_engine.authoring.primitives import FlowContext
10
+ from data_engine.authoring.execution.single import _FlowRuntime, RuntimeLedgerService, default_runtime_ledger_service
11
+ from data_engine.runtime.runtime_db import RuntimeLedger
12
+
13
+ if TYPE_CHECKING:
14
+ from data_engine.authoring.flow import Flow
15
+
16
+
17
+ class _GroupedFlowRuntime:
18
+ """Grouped orchestrator: sequential within a group, parallel across groups."""
19
+
20
+ def __init__(
21
+ self,
22
+ flows: tuple["Flow", ...],
23
+ *,
24
+ continuous: bool,
25
+ runtime_stop_event: threading.Event | None = None,
26
+ flow_stop_event: threading.Event | None = None,
27
+ status_callback: Callable[[str], None] | None = None,
28
+ runtime_ledger: RuntimeLedger | None = None,
29
+ runtime_ledger_service: RuntimeLedgerService | None = None,
30
+ runtime_ledger_factory: Callable[[], RuntimeLedger] | None = None,
31
+ ) -> None:
32
+ self.flows = tuple(flows)
33
+ self.continuous = continuous
34
+ self.runtime_stop_event = runtime_stop_event
35
+ self.flow_stop_event = flow_stop_event
36
+ self.status_callback = status_callback
37
+ self._runtime_ledger_service = runtime_ledger_service or default_runtime_ledger_service()
38
+ self._runtime_ledger_factory = runtime_ledger_factory or self._runtime_ledger_service.open_runtime_ledger
39
+ self.runtime_ledger = runtime_ledger or self._runtime_ledger_factory()
40
+
41
+ def run(self) -> list[FlowContext]:
42
+ grouped = self._grouped_flows()
43
+ if len(grouped) <= 1:
44
+ only = next(iter(grouped.values()), ())
45
+ return _FlowRuntime(
46
+ tuple(only),
47
+ continuous=self.continuous,
48
+ runtime_stop_event=self.runtime_stop_event,
49
+ flow_stop_event=self.flow_stop_event,
50
+ status_callback=self.status_callback,
51
+ runtime_ledger=self.runtime_ledger,
52
+ runtime_ledger_service=self._runtime_ledger_service,
53
+ ).run()
54
+
55
+ results_by_group: dict[str, list[FlowContext]] = {name: [] for name in grouped}
56
+ errors: Queue[tuple[str, Exception]] = Queue()
57
+ threads: list[threading.Thread] = []
58
+ internal_runtime_stop = self.runtime_stop_event or threading.Event()
59
+ internal_flow_stop = self.flow_stop_event or threading.Event()
60
+
61
+ def run_group(group_name: str, group_flows: tuple["Flow", ...]) -> None:
62
+ try:
63
+ runtime = _FlowRuntime(
64
+ group_flows,
65
+ continuous=self.continuous,
66
+ runtime_stop_event=internal_runtime_stop,
67
+ flow_stop_event=internal_flow_stop,
68
+ status_callback=self.status_callback,
69
+ runtime_ledger=self.runtime_ledger,
70
+ runtime_ledger_service=self._runtime_ledger_service,
71
+ runtime_ledger_factory=self._runtime_ledger_factory,
72
+ )
73
+ results_by_group[group_name] = runtime.run()
74
+ except Exception as exc: # pragma: no cover
75
+ errors.put((group_name, exc))
76
+ if not self.continuous:
77
+ internal_runtime_stop.set()
78
+ finally:
79
+ self.runtime_ledger.close()
80
+
81
+ for group_name, group_flows in grouped.items():
82
+ thread = threading.Thread(target=run_group, args=(group_name, group_flows), daemon=True)
83
+ threads.append(thread)
84
+ thread.start()
85
+
86
+ for thread in threads:
87
+ thread.join()
88
+
89
+ if not self.continuous and not errors.empty():
90
+ _, exc = errors.get()
91
+ raise exc
92
+
93
+ ordered_results: list[FlowContext] = []
94
+ for group_name in grouped:
95
+ ordered_results.extend(results_by_group[group_name])
96
+ return ordered_results
97
+
98
+ def _grouped_flows(self) -> dict[str, tuple["Flow", ...]]:
99
+ grouped: dict[str, list["Flow"]] = {}
100
+ for index, flow in enumerate(self.flows):
101
+ key = flow.group or f"group-{index}"
102
+ grouped.setdefault(key, []).append(flow)
103
+ return {name: tuple(group_flows) for name, group_flows in grouped.items()}
104
+
105
+
106
+ __all__ = ["_GroupedFlowRuntime"]
@@ -0,0 +1,83 @@
1
+ """Runtime log and ledger emission helpers for authored flows."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from pathlib import Path
7
+
8
+ from data_engine.domain.time import utcnow_text
9
+ from data_engine.runtime.runtime_db import RuntimeLedger
10
+
11
+ LOGGER = logging.getLogger(__name__)
12
+
13
+
14
+ class RuntimeLogEmitter:
15
+ """Own runtime log persistence and logger emission."""
16
+
17
+ def __init__(self, runtime_ledger: RuntimeLedger) -> None:
18
+ self.runtime_ledger = runtime_ledger
19
+
20
+ def log_runtime_message(
21
+ self,
22
+ message: str,
23
+ *,
24
+ level: str,
25
+ run_id: str | None,
26
+ flow_name: str | None,
27
+ step_label: str | None = None,
28
+ exc_info: bool = False,
29
+ ) -> None:
30
+ created_at_utc = utcnow_text()
31
+ self.runtime_ledger.append_log(
32
+ level=level.upper(),
33
+ message=message,
34
+ created_at_utc=created_at_utc,
35
+ run_id=run_id,
36
+ flow_name=flow_name,
37
+ step_label=step_label,
38
+ )
39
+ logger_method = LOGGER.error if level == "error" else LOGGER.info
40
+ logger_method(message, exc_info=exc_info)
41
+
42
+ def log_flow_event(
43
+ self,
44
+ run_id: str,
45
+ flow_name: str,
46
+ source_path: Path | None,
47
+ *,
48
+ status: str,
49
+ elapsed: float | None = None,
50
+ level: str = "info",
51
+ exc_info: bool = False,
52
+ ) -> None:
53
+ message = f"run={run_id} flow={flow_name} source={source_path} status={status}"
54
+ if elapsed is not None:
55
+ message = f"{message} elapsed={elapsed:.6f}"
56
+ self.log_runtime_message(message, level=level, run_id=run_id, flow_name=flow_name, exc_info=exc_info)
57
+
58
+ def log_step_event(
59
+ self,
60
+ run_id: str,
61
+ flow_name: str,
62
+ step_label: str,
63
+ source_path: Path | None,
64
+ *,
65
+ status: str,
66
+ elapsed: float | None = None,
67
+ level: str = "info",
68
+ exc_info: bool = False,
69
+ ) -> None:
70
+ message = f"run={run_id} flow={flow_name} step={step_label} source={source_path} status={status}"
71
+ if elapsed is not None:
72
+ message = f"{message} elapsed={elapsed:.6f}"
73
+ self.log_runtime_message(
74
+ message,
75
+ level=level,
76
+ run_id=run_id,
77
+ flow_name=flow_name,
78
+ step_label=step_label,
79
+ exc_info=exc_info,
80
+ )
81
+
82
+
83
+ __all__ = ["RuntimeLogEmitter"]
@@ -0,0 +1,135 @@
1
+ """Polling, scheduling, and source-queue helpers for authored flows."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections import deque
6
+ from pathlib import Path
7
+
8
+ from data_engine.authoring.primitives import WatchSpec
9
+ from data_engine.authoring.execution.context import _QueuedJob
10
+ from data_engine.domain.source_state import SourceSignature
11
+ from data_engine.runtime.file_watch import PollingWatcher, iter_candidate_paths
12
+ from data_engine.runtime.runtime_db import RuntimeLedger
13
+
14
+
15
+ class RuntimePollingSupport:
16
+ """Own watcher creation, queueing, and stale-source detection."""
17
+
18
+ def __init__(self, runtime_ledger: RuntimeLedger) -> None:
19
+ self.runtime_ledger = runtime_ledger
20
+
21
+ def make_watcher(self, trigger: WatchSpec) -> PollingWatcher:
22
+ return PollingWatcher(trigger.source, recursive=True, extensions=trigger.extensions, settle=trigger.settle)
23
+
24
+ def startup_sources(self, flow: "Flow", *, allow_missing: bool = False) -> list[Path | None]:
25
+ trigger = flow.trigger
26
+ if not isinstance(trigger, WatchSpec) or trigger.source is None:
27
+ return [None]
28
+ if not trigger.source.exists():
29
+ return [None]
30
+ if trigger.source.is_file():
31
+ return [trigger.source]
32
+ if trigger.run_as == "batch":
33
+ return [None]
34
+ return list(
35
+ iter_candidate_paths(
36
+ trigger.source,
37
+ extensions=trigger.extensions,
38
+ recursive=True,
39
+ allow_missing=allow_missing,
40
+ )
41
+ )
42
+
43
+ def enqueue_job(
44
+ self,
45
+ queue: deque[_QueuedJob],
46
+ queued_keys: set[tuple[str, str | None]],
47
+ flow: "Flow",
48
+ source_path: Path | None,
49
+ *,
50
+ batch_signatures: tuple[SourceSignature, ...] = (),
51
+ ) -> None:
52
+ key = self.job_key(flow, source_path)
53
+ if key in queued_keys:
54
+ if batch_signatures:
55
+ for index, job in enumerate(queue):
56
+ if self.job_key(job.flow, job.source_path) != key:
57
+ continue
58
+ merged = {signature.source_path: signature for signature in job.batch_signatures}
59
+ for signature in batch_signatures:
60
+ merged[signature.source_path] = signature
61
+ queue[index] = _QueuedJob(
62
+ flow=job.flow,
63
+ source_path=job.source_path,
64
+ batch_signatures=tuple(merged[path] for path in sorted(merged)),
65
+ )
66
+ break
67
+ return
68
+ queue.append(_QueuedJob(flow, source_path, batch_signatures))
69
+ queued_keys.add(key)
70
+
71
+ def job_key(self, flow: "Flow", source_path: Path | None) -> tuple[str, str | None]:
72
+ return (flow.name, str(source_path) if source_path is not None else None)
73
+
74
+ def stale_poll_sources(self, flow: "Flow") -> list[Path | None]:
75
+ current_source_paths: set[str] = set()
76
+ stale: list[Path | None] = []
77
+ trigger = flow.trigger
78
+ if not isinstance(trigger, WatchSpec) or trigger.mode != "poll" or trigger.source is None:
79
+ return stale
80
+ if not trigger.source.exists():
81
+ return [None]
82
+ if trigger.run_as == "batch" and trigger.source.is_dir():
83
+ for source_path in iter_candidate_paths(trigger.source, extensions=trigger.extensions, recursive=True, allow_missing=True):
84
+ current_source_paths.add(self.runtime_ledger.normalize_source_path(source_path))
85
+ if self.is_poll_source_stale(flow, source_path):
86
+ stale.append(None)
87
+ break
88
+ self.runtime_ledger.prune_missing_file_state(flow_name=flow.name, current_source_paths=current_source_paths)
89
+ return stale
90
+ for source_path in self.startup_sources(flow, allow_missing=True):
91
+ if source_path is None:
92
+ stale.append(None)
93
+ continue
94
+ current_source_paths.add(self.runtime_ledger.normalize_source_path(source_path))
95
+ if self.is_poll_source_stale(flow, source_path):
96
+ stale.append(source_path)
97
+ self.runtime_ledger.prune_missing_file_state(flow_name=flow.name, current_source_paths=current_source_paths)
98
+ return stale
99
+
100
+ def stale_batch_poll_signatures(self, flow: "Flow") -> tuple[SourceSignature, ...]:
101
+ trigger = flow.trigger
102
+ if not isinstance(trigger, WatchSpec) or trigger.mode != "poll" or trigger.source is None or not trigger.source.is_dir():
103
+ return ()
104
+ signatures: dict[str, SourceSignature] = {}
105
+ for source_path in iter_candidate_paths(trigger.source, extensions=trigger.extensions, recursive=True, allow_missing=True):
106
+ if not self.is_poll_source_stale(flow, source_path):
107
+ continue
108
+ signature = self.poll_source_signature(flow, source_path)
109
+ if signature is not None:
110
+ signatures[signature.source_path] = signature
111
+ return tuple(signatures[path] for path in sorted(signatures))
112
+
113
+ def is_poll_source_stale(self, flow: "Flow", source_path: Path | None) -> bool:
114
+ trigger = flow.trigger
115
+ if not isinstance(trigger, WatchSpec) or trigger.mode != "poll":
116
+ return False
117
+ if source_path is None or not source_path.exists():
118
+ return True
119
+ signature = self.poll_source_signature(flow, source_path)
120
+ if signature is None and trigger.source is not None and trigger.source.exists() and trigger.source.is_file():
121
+ return True
122
+ return self.runtime_ledger.is_poll_source_stale(flow.name, signature)
123
+
124
+ def poll_source_signature(self, flow: "Flow", source_path: Path | None) -> SourceSignature | None:
125
+ if source_path is None or not isinstance(flow.trigger, WatchSpec) or flow.trigger.mode != "poll":
126
+ return None
127
+ return self.runtime_ledger.source_signature_for_path(source_path)
128
+
129
+ def normalized_source_path(self, source_path: Path | None) -> str | None:
130
+ if source_path is None:
131
+ return None
132
+ return self.runtime_ledger.normalize_source_path(source_path)
133
+
134
+
135
+ __all__ = ["RuntimePollingSupport"]