py-data-engine 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data_engine/__init__.py +37 -0
- data_engine/application/__init__.py +39 -0
- data_engine/application/actions.py +42 -0
- data_engine/application/catalog.py +151 -0
- data_engine/application/control.py +213 -0
- data_engine/application/details.py +73 -0
- data_engine/application/runtime.py +449 -0
- data_engine/application/workspace.py +62 -0
- data_engine/authoring/__init__.py +14 -0
- data_engine/authoring/builder.py +31 -0
- data_engine/authoring/execution/__init__.py +6 -0
- data_engine/authoring/execution/app.py +6 -0
- data_engine/authoring/execution/context.py +82 -0
- data_engine/authoring/execution/continuous.py +176 -0
- data_engine/authoring/execution/grouped.py +106 -0
- data_engine/authoring/execution/logging.py +83 -0
- data_engine/authoring/execution/polling.py +135 -0
- data_engine/authoring/execution/runner.py +210 -0
- data_engine/authoring/execution/single.py +171 -0
- data_engine/authoring/flow.py +361 -0
- data_engine/authoring/helpers.py +160 -0
- data_engine/authoring/model.py +59 -0
- data_engine/authoring/primitives.py +430 -0
- data_engine/authoring/services.py +42 -0
- data_engine/devtools/__init__.py +3 -0
- data_engine/devtools/project_ast_map.py +503 -0
- data_engine/docs/__init__.py +1 -0
- data_engine/docs/sphinx_source/_static/custom.css +13 -0
- data_engine/docs/sphinx_source/api.rst +42 -0
- data_engine/docs/sphinx_source/conf.py +37 -0
- data_engine/docs/sphinx_source/guides/app-runtime-and-workspaces.md +397 -0
- data_engine/docs/sphinx_source/guides/authoring-flow-modules.md +215 -0
- data_engine/docs/sphinx_source/guides/configuring-flows.md +185 -0
- data_engine/docs/sphinx_source/guides/core-concepts.md +208 -0
- data_engine/docs/sphinx_source/guides/database-methods.md +107 -0
- data_engine/docs/sphinx_source/guides/duckdb-helpers.md +462 -0
- data_engine/docs/sphinx_source/guides/flow-context.md +538 -0
- data_engine/docs/sphinx_source/guides/flow-methods.md +206 -0
- data_engine/docs/sphinx_source/guides/getting-started.md +271 -0
- data_engine/docs/sphinx_source/guides/project-inventory.md +5683 -0
- data_engine/docs/sphinx_source/guides/project-map.md +118 -0
- data_engine/docs/sphinx_source/guides/recipes.md +268 -0
- data_engine/docs/sphinx_source/index.rst +22 -0
- data_engine/domain/__init__.py +92 -0
- data_engine/domain/actions.py +69 -0
- data_engine/domain/catalog.py +128 -0
- data_engine/domain/details.py +214 -0
- data_engine/domain/diagnostics.py +56 -0
- data_engine/domain/errors.py +104 -0
- data_engine/domain/inspection.py +99 -0
- data_engine/domain/logs.py +118 -0
- data_engine/domain/operations.py +172 -0
- data_engine/domain/operator.py +72 -0
- data_engine/domain/runs.py +155 -0
- data_engine/domain/runtime.py +279 -0
- data_engine/domain/source_state.py +17 -0
- data_engine/domain/support.py +54 -0
- data_engine/domain/time.py +23 -0
- data_engine/domain/workspace.py +159 -0
- data_engine/flow_modules/__init__.py +1 -0
- data_engine/flow_modules/flow_module_compiler.py +179 -0
- data_engine/flow_modules/flow_module_loader.py +201 -0
- data_engine/helpers/__init__.py +25 -0
- data_engine/helpers/duckdb.py +705 -0
- data_engine/hosts/__init__.py +1 -0
- data_engine/hosts/daemon/__init__.py +23 -0
- data_engine/hosts/daemon/app.py +221 -0
- data_engine/hosts/daemon/bootstrap.py +69 -0
- data_engine/hosts/daemon/client.py +465 -0
- data_engine/hosts/daemon/commands.py +64 -0
- data_engine/hosts/daemon/composition.py +310 -0
- data_engine/hosts/daemon/constants.py +15 -0
- data_engine/hosts/daemon/entrypoints.py +97 -0
- data_engine/hosts/daemon/lifecycle.py +191 -0
- data_engine/hosts/daemon/manager.py +272 -0
- data_engine/hosts/daemon/ownership.py +126 -0
- data_engine/hosts/daemon/runtime_commands.py +188 -0
- data_engine/hosts/daemon/runtime_control.py +31 -0
- data_engine/hosts/daemon/server.py +84 -0
- data_engine/hosts/daemon/shared_state.py +147 -0
- data_engine/hosts/daemon/state_sync.py +101 -0
- data_engine/platform/__init__.py +1 -0
- data_engine/platform/identity.py +35 -0
- data_engine/platform/local_settings.py +146 -0
- data_engine/platform/theme.py +259 -0
- data_engine/platform/workspace_models.py +190 -0
- data_engine/platform/workspace_policy.py +333 -0
- data_engine/runtime/__init__.py +1 -0
- data_engine/runtime/file_watch.py +185 -0
- data_engine/runtime/ledger_models.py +116 -0
- data_engine/runtime/runtime_db.py +938 -0
- data_engine/runtime/shared_state.py +523 -0
- data_engine/services/__init__.py +49 -0
- data_engine/services/daemon.py +64 -0
- data_engine/services/daemon_state.py +40 -0
- data_engine/services/flow_catalog.py +102 -0
- data_engine/services/flow_execution.py +48 -0
- data_engine/services/ledger.py +85 -0
- data_engine/services/logs.py +65 -0
- data_engine/services/runtime_binding.py +105 -0
- data_engine/services/runtime_execution.py +126 -0
- data_engine/services/runtime_history.py +62 -0
- data_engine/services/settings.py +58 -0
- data_engine/services/shared_state.py +28 -0
- data_engine/services/theme.py +59 -0
- data_engine/services/workspace_provisioning.py +224 -0
- data_engine/services/workspaces.py +74 -0
- data_engine/ui/__init__.py +3 -0
- data_engine/ui/cli/__init__.py +19 -0
- data_engine/ui/cli/app.py +161 -0
- data_engine/ui/cli/commands_doctor.py +178 -0
- data_engine/ui/cli/commands_run.py +80 -0
- data_engine/ui/cli/commands_start.py +100 -0
- data_engine/ui/cli/commands_workspace.py +97 -0
- data_engine/ui/cli/dependencies.py +44 -0
- data_engine/ui/cli/parser.py +56 -0
- data_engine/ui/gui/__init__.py +25 -0
- data_engine/ui/gui/app.py +116 -0
- data_engine/ui/gui/bootstrap.py +487 -0
- data_engine/ui/gui/bootstrapper.py +140 -0
- data_engine/ui/gui/cache_models.py +23 -0
- data_engine/ui/gui/control_support.py +185 -0
- data_engine/ui/gui/controllers/__init__.py +6 -0
- data_engine/ui/gui/controllers/flows.py +439 -0
- data_engine/ui/gui/controllers/runtime.py +245 -0
- data_engine/ui/gui/dialogs/__init__.py +12 -0
- data_engine/ui/gui/dialogs/messages.py +88 -0
- data_engine/ui/gui/dialogs/previews.py +222 -0
- data_engine/ui/gui/helpers/__init__.py +62 -0
- data_engine/ui/gui/helpers/inspection.py +81 -0
- data_engine/ui/gui/helpers/lifecycle.py +112 -0
- data_engine/ui/gui/helpers/scroll.py +28 -0
- data_engine/ui/gui/helpers/theming.py +87 -0
- data_engine/ui/gui/icons/dark_light.svg +12 -0
- data_engine/ui/gui/icons/documentation.svg +1 -0
- data_engine/ui/gui/icons/failed.svg +3 -0
- data_engine/ui/gui/icons/group.svg +4 -0
- data_engine/ui/gui/icons/home.svg +2 -0
- data_engine/ui/gui/icons/manual.svg +2 -0
- data_engine/ui/gui/icons/poll.svg +2 -0
- data_engine/ui/gui/icons/schedule.svg +4 -0
- data_engine/ui/gui/icons/settings.svg +2 -0
- data_engine/ui/gui/icons/started.svg +3 -0
- data_engine/ui/gui/icons/success.svg +3 -0
- data_engine/ui/gui/icons/view-log.svg +3 -0
- data_engine/ui/gui/icons.py +50 -0
- data_engine/ui/gui/launcher.py +48 -0
- data_engine/ui/gui/presenters/__init__.py +72 -0
- data_engine/ui/gui/presenters/docs.py +140 -0
- data_engine/ui/gui/presenters/logs.py +58 -0
- data_engine/ui/gui/presenters/runtime_projection.py +29 -0
- data_engine/ui/gui/presenters/sidebar.py +88 -0
- data_engine/ui/gui/presenters/steps.py +148 -0
- data_engine/ui/gui/presenters/workspace.py +39 -0
- data_engine/ui/gui/presenters/workspace_binding.py +75 -0
- data_engine/ui/gui/presenters/workspace_settings.py +182 -0
- data_engine/ui/gui/preview_models.py +37 -0
- data_engine/ui/gui/render_support.py +241 -0
- data_engine/ui/gui/rendering/__init__.py +12 -0
- data_engine/ui/gui/rendering/artifacts.py +95 -0
- data_engine/ui/gui/rendering/icons.py +50 -0
- data_engine/ui/gui/runtime.py +47 -0
- data_engine/ui/gui/state_support.py +193 -0
- data_engine/ui/gui/support.py +214 -0
- data_engine/ui/gui/surface.py +209 -0
- data_engine/ui/gui/theme.py +720 -0
- data_engine/ui/gui/widgets/__init__.py +34 -0
- data_engine/ui/gui/widgets/config.py +41 -0
- data_engine/ui/gui/widgets/logs.py +62 -0
- data_engine/ui/gui/widgets/panels.py +507 -0
- data_engine/ui/gui/widgets/sidebar.py +130 -0
- data_engine/ui/gui/widgets/steps.py +84 -0
- data_engine/ui/tui/__init__.py +5 -0
- data_engine/ui/tui/app.py +222 -0
- data_engine/ui/tui/bootstrap.py +475 -0
- data_engine/ui/tui/bootstrapper.py +117 -0
- data_engine/ui/tui/controllers/__init__.py +6 -0
- data_engine/ui/tui/controllers/flows.py +349 -0
- data_engine/ui/tui/controllers/runtime.py +167 -0
- data_engine/ui/tui/runtime.py +34 -0
- data_engine/ui/tui/state_support.py +141 -0
- data_engine/ui/tui/support.py +63 -0
- data_engine/ui/tui/theme.py +204 -0
- data_engine/ui/tui/widgets.py +123 -0
- data_engine/views/__init__.py +109 -0
- data_engine/views/actions.py +80 -0
- data_engine/views/artifacts.py +58 -0
- data_engine/views/flow_display.py +69 -0
- data_engine/views/logs.py +54 -0
- data_engine/views/models.py +96 -0
- data_engine/views/presentation.py +133 -0
- data_engine/views/runs.py +62 -0
- data_engine/views/state.py +39 -0
- data_engine/views/status.py +13 -0
- data_engine/views/text.py +109 -0
- py_data_engine-0.1.0.dist-info/METADATA +330 -0
- py_data_engine-0.1.0.dist-info/RECORD +200 -0
- py_data_engine-0.1.0.dist-info/WHEEL +5 -0
- py_data_engine-0.1.0.dist-info/entry_points.txt +2 -0
- py_data_engine-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
"""Continuous watch/schedule loop for one sequential flow runtime."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections import deque
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
from time import monotonic, sleep
|
|
8
|
+
from typing import TYPE_CHECKING
|
|
9
|
+
|
|
10
|
+
from data_engine.authoring.execution.context import _QueuedJob
|
|
11
|
+
from data_engine.authoring.primitives import WatchSpec
|
|
12
|
+
from data_engine.runtime.file_watch import PollingWatcher
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from data_engine.authoring.execution.single import _FlowRuntime
|
|
16
|
+
from data_engine.authoring.primitives import FlowContext
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ContinuousRuntimeLoop:
|
|
20
|
+
"""Own the watch/schedule loop for one sequential runtime."""
|
|
21
|
+
|
|
22
|
+
def __init__(self, runtime: "_FlowRuntime") -> None:
|
|
23
|
+
self.runtime = runtime
|
|
24
|
+
|
|
25
|
+
def run(self) -> list["FlowContext"]:
|
|
26
|
+
results: list["FlowContext"] = []
|
|
27
|
+
queue: deque[_QueuedJob] = deque()
|
|
28
|
+
queued_keys: set[tuple[str, str | None]] = set()
|
|
29
|
+
watch_entries: list[dict[str, object]] = []
|
|
30
|
+
schedule_entries: list[dict[str, object]] = []
|
|
31
|
+
now = monotonic()
|
|
32
|
+
|
|
33
|
+
for flow in self.runtime.flows:
|
|
34
|
+
if flow.trigger is None:
|
|
35
|
+
for source_path in self.runtime.polling.startup_sources(flow):
|
|
36
|
+
self.runtime.polling.enqueue_job(queue, queued_keys, flow, source_path)
|
|
37
|
+
continue
|
|
38
|
+
if isinstance(flow.trigger, WatchSpec) and flow.trigger.mode == "poll":
|
|
39
|
+
watcher = self.runtime.polling.make_watcher(flow.trigger)
|
|
40
|
+
watcher.start()
|
|
41
|
+
for source_path in self.runtime.polling.stale_poll_sources(flow):
|
|
42
|
+
batch_signatures = self.runtime.polling.stale_batch_poll_signatures(flow) if source_path is None else ()
|
|
43
|
+
self.runtime.polling.enqueue_job(queue, queued_keys, flow, source_path, batch_signatures=batch_signatures)
|
|
44
|
+
watch_entries.append(
|
|
45
|
+
{
|
|
46
|
+
"flow": flow,
|
|
47
|
+
"interval": flow.trigger.interval_seconds,
|
|
48
|
+
"next_poll": now + float(flow.trigger.interval_seconds),
|
|
49
|
+
"watcher": watcher,
|
|
50
|
+
}
|
|
51
|
+
)
|
|
52
|
+
elif isinstance(flow.trigger, WatchSpec) and flow.trigger.mode == "schedule":
|
|
53
|
+
if flow.trigger.interval_seconds is not None:
|
|
54
|
+
schedule_entries.append(
|
|
55
|
+
{
|
|
56
|
+
"flow": flow,
|
|
57
|
+
"kind": "every",
|
|
58
|
+
"interval": flow.trigger.interval_seconds,
|
|
59
|
+
"next_due": now + float(flow.trigger.interval_seconds),
|
|
60
|
+
"pending": False,
|
|
61
|
+
}
|
|
62
|
+
)
|
|
63
|
+
else:
|
|
64
|
+
for hour, minute in flow.trigger.time_slots:
|
|
65
|
+
schedule_entries.append(
|
|
66
|
+
{
|
|
67
|
+
"flow": flow,
|
|
68
|
+
"kind": "at",
|
|
69
|
+
"hour": hour,
|
|
70
|
+
"minute": minute,
|
|
71
|
+
"pending": False,
|
|
72
|
+
"last_run_date": None,
|
|
73
|
+
}
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
self.runtime._emit_status("Watcher/scheduler running.")
|
|
77
|
+
try:
|
|
78
|
+
while True:
|
|
79
|
+
if self.runtime.runtime_stop_event is not None and self.runtime.runtime_stop_event.is_set():
|
|
80
|
+
self.runtime._emit_status("Watcher/scheduler stopped.")
|
|
81
|
+
break
|
|
82
|
+
now = monotonic()
|
|
83
|
+
self._poll_watch_entries(watch_entries, queue, queued_keys, now)
|
|
84
|
+
self._update_schedule_entries(schedule_entries, now)
|
|
85
|
+
|
|
86
|
+
job = queue.popleft() if queue else None
|
|
87
|
+
if job is not None:
|
|
88
|
+
queued_keys.discard(self.runtime.polling.job_key(job.flow, job.source_path))
|
|
89
|
+
try:
|
|
90
|
+
results.append(
|
|
91
|
+
self.runtime.run_executor.run_one(
|
|
92
|
+
job.flow,
|
|
93
|
+
job.source_path,
|
|
94
|
+
batch_signatures=job.batch_signatures,
|
|
95
|
+
)
|
|
96
|
+
)
|
|
97
|
+
except FlowStoppedError:
|
|
98
|
+
if self.runtime.flow_stop_event is not None:
|
|
99
|
+
self.runtime.flow_stop_event.clear()
|
|
100
|
+
except Exception:
|
|
101
|
+
continue
|
|
102
|
+
continue
|
|
103
|
+
|
|
104
|
+
scheduled = next((entry for entry in schedule_entries if entry["pending"]), None)
|
|
105
|
+
if scheduled is not None:
|
|
106
|
+
scheduled["pending"] = False
|
|
107
|
+
scheduled_flow = scheduled["flow"]
|
|
108
|
+
try:
|
|
109
|
+
for source_path in self.runtime.polling.startup_sources(scheduled_flow):
|
|
110
|
+
results.append(self.runtime.run_executor.run_one(scheduled_flow, source_path))
|
|
111
|
+
except FlowStoppedError:
|
|
112
|
+
if self.runtime.flow_stop_event is not None:
|
|
113
|
+
self.runtime.flow_stop_event.clear()
|
|
114
|
+
except Exception:
|
|
115
|
+
continue
|
|
116
|
+
continue
|
|
117
|
+
|
|
118
|
+
sleep(0.05)
|
|
119
|
+
finally:
|
|
120
|
+
for entry in watch_entries:
|
|
121
|
+
watcher = entry["watcher"]
|
|
122
|
+
if isinstance(watcher, PollingWatcher):
|
|
123
|
+
watcher.stop()
|
|
124
|
+
return results
|
|
125
|
+
|
|
126
|
+
def _poll_watch_entries(
|
|
127
|
+
self,
|
|
128
|
+
watch_entries: list[dict[str, object]],
|
|
129
|
+
queue: deque[_QueuedJob],
|
|
130
|
+
queued_keys: set[tuple[str, str | None]],
|
|
131
|
+
now: float,
|
|
132
|
+
) -> None:
|
|
133
|
+
for entry in watch_entries:
|
|
134
|
+
if now < entry["next_poll"]:
|
|
135
|
+
continue
|
|
136
|
+
watched_flow = entry["flow"]
|
|
137
|
+
watcher = entry["watcher"]
|
|
138
|
+
assert isinstance(watcher, PollingWatcher)
|
|
139
|
+
for path in watcher.drain_events():
|
|
140
|
+
watched_trigger = watched_flow.trigger
|
|
141
|
+
assert isinstance(watched_trigger, WatchSpec)
|
|
142
|
+
if watched_trigger.run_as == "batch" and watched_trigger.source is not None and watched_trigger.source.is_dir():
|
|
143
|
+
signature = self.runtime.polling.poll_source_signature(watched_flow, path)
|
|
144
|
+
self.runtime.polling.enqueue_job(
|
|
145
|
+
queue,
|
|
146
|
+
queued_keys,
|
|
147
|
+
watched_flow,
|
|
148
|
+
None,
|
|
149
|
+
batch_signatures=(signature,) if signature is not None else (),
|
|
150
|
+
)
|
|
151
|
+
break
|
|
152
|
+
if self.runtime.polling.is_poll_source_stale(watched_flow, path):
|
|
153
|
+
self.runtime.polling.enqueue_job(queue, queued_keys, watched_flow, path)
|
|
154
|
+
entry["next_poll"] = now + float(entry["interval"])
|
|
155
|
+
|
|
156
|
+
def _update_schedule_entries(self, schedule_entries: list[dict[str, object]], now: float) -> None:
|
|
157
|
+
for entry in schedule_entries:
|
|
158
|
+
if entry["kind"] == "every":
|
|
159
|
+
if now >= entry["next_due"]:
|
|
160
|
+
entry["pending"] = True
|
|
161
|
+
entry["next_due"] = now + float(entry["interval"])
|
|
162
|
+
continue
|
|
163
|
+
current_dt = datetime.now()
|
|
164
|
+
for entry in schedule_entries:
|
|
165
|
+
if entry["kind"] != "at":
|
|
166
|
+
continue
|
|
167
|
+
if entry["last_run_date"] == current_dt.date():
|
|
168
|
+
continue
|
|
169
|
+
if (current_dt.hour, current_dt.minute) >= (entry["hour"], entry["minute"]):
|
|
170
|
+
entry["pending"] = True
|
|
171
|
+
entry["last_run_date"] = current_dt.date()
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
from data_engine.authoring.model import FlowStoppedError
|
|
175
|
+
|
|
176
|
+
__all__ = ["ContinuousRuntimeLoop"]
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"""Grouped runtime orchestration for authored flows."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from queue import Queue
|
|
6
|
+
import threading
|
|
7
|
+
from typing import TYPE_CHECKING, Callable
|
|
8
|
+
|
|
9
|
+
from data_engine.authoring.primitives import FlowContext
|
|
10
|
+
from data_engine.authoring.execution.single import _FlowRuntime, RuntimeLedgerService, default_runtime_ledger_service
|
|
11
|
+
from data_engine.runtime.runtime_db import RuntimeLedger
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from data_engine.authoring.flow import Flow
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class _GroupedFlowRuntime:
|
|
18
|
+
"""Grouped orchestrator: sequential within a group, parallel across groups."""
|
|
19
|
+
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
flows: tuple["Flow", ...],
|
|
23
|
+
*,
|
|
24
|
+
continuous: bool,
|
|
25
|
+
runtime_stop_event: threading.Event | None = None,
|
|
26
|
+
flow_stop_event: threading.Event | None = None,
|
|
27
|
+
status_callback: Callable[[str], None] | None = None,
|
|
28
|
+
runtime_ledger: RuntimeLedger | None = None,
|
|
29
|
+
runtime_ledger_service: RuntimeLedgerService | None = None,
|
|
30
|
+
runtime_ledger_factory: Callable[[], RuntimeLedger] | None = None,
|
|
31
|
+
) -> None:
|
|
32
|
+
self.flows = tuple(flows)
|
|
33
|
+
self.continuous = continuous
|
|
34
|
+
self.runtime_stop_event = runtime_stop_event
|
|
35
|
+
self.flow_stop_event = flow_stop_event
|
|
36
|
+
self.status_callback = status_callback
|
|
37
|
+
self._runtime_ledger_service = runtime_ledger_service or default_runtime_ledger_service()
|
|
38
|
+
self._runtime_ledger_factory = runtime_ledger_factory or self._runtime_ledger_service.open_runtime_ledger
|
|
39
|
+
self.runtime_ledger = runtime_ledger or self._runtime_ledger_factory()
|
|
40
|
+
|
|
41
|
+
def run(self) -> list[FlowContext]:
|
|
42
|
+
grouped = self._grouped_flows()
|
|
43
|
+
if len(grouped) <= 1:
|
|
44
|
+
only = next(iter(grouped.values()), ())
|
|
45
|
+
return _FlowRuntime(
|
|
46
|
+
tuple(only),
|
|
47
|
+
continuous=self.continuous,
|
|
48
|
+
runtime_stop_event=self.runtime_stop_event,
|
|
49
|
+
flow_stop_event=self.flow_stop_event,
|
|
50
|
+
status_callback=self.status_callback,
|
|
51
|
+
runtime_ledger=self.runtime_ledger,
|
|
52
|
+
runtime_ledger_service=self._runtime_ledger_service,
|
|
53
|
+
).run()
|
|
54
|
+
|
|
55
|
+
results_by_group: dict[str, list[FlowContext]] = {name: [] for name in grouped}
|
|
56
|
+
errors: Queue[tuple[str, Exception]] = Queue()
|
|
57
|
+
threads: list[threading.Thread] = []
|
|
58
|
+
internal_runtime_stop = self.runtime_stop_event or threading.Event()
|
|
59
|
+
internal_flow_stop = self.flow_stop_event or threading.Event()
|
|
60
|
+
|
|
61
|
+
def run_group(group_name: str, group_flows: tuple["Flow", ...]) -> None:
|
|
62
|
+
try:
|
|
63
|
+
runtime = _FlowRuntime(
|
|
64
|
+
group_flows,
|
|
65
|
+
continuous=self.continuous,
|
|
66
|
+
runtime_stop_event=internal_runtime_stop,
|
|
67
|
+
flow_stop_event=internal_flow_stop,
|
|
68
|
+
status_callback=self.status_callback,
|
|
69
|
+
runtime_ledger=self.runtime_ledger,
|
|
70
|
+
runtime_ledger_service=self._runtime_ledger_service,
|
|
71
|
+
runtime_ledger_factory=self._runtime_ledger_factory,
|
|
72
|
+
)
|
|
73
|
+
results_by_group[group_name] = runtime.run()
|
|
74
|
+
except Exception as exc: # pragma: no cover
|
|
75
|
+
errors.put((group_name, exc))
|
|
76
|
+
if not self.continuous:
|
|
77
|
+
internal_runtime_stop.set()
|
|
78
|
+
finally:
|
|
79
|
+
self.runtime_ledger.close()
|
|
80
|
+
|
|
81
|
+
for group_name, group_flows in grouped.items():
|
|
82
|
+
thread = threading.Thread(target=run_group, args=(group_name, group_flows), daemon=True)
|
|
83
|
+
threads.append(thread)
|
|
84
|
+
thread.start()
|
|
85
|
+
|
|
86
|
+
for thread in threads:
|
|
87
|
+
thread.join()
|
|
88
|
+
|
|
89
|
+
if not self.continuous and not errors.empty():
|
|
90
|
+
_, exc = errors.get()
|
|
91
|
+
raise exc
|
|
92
|
+
|
|
93
|
+
ordered_results: list[FlowContext] = []
|
|
94
|
+
for group_name in grouped:
|
|
95
|
+
ordered_results.extend(results_by_group[group_name])
|
|
96
|
+
return ordered_results
|
|
97
|
+
|
|
98
|
+
def _grouped_flows(self) -> dict[str, tuple["Flow", ...]]:
|
|
99
|
+
grouped: dict[str, list["Flow"]] = {}
|
|
100
|
+
for index, flow in enumerate(self.flows):
|
|
101
|
+
key = flow.group or f"group-{index}"
|
|
102
|
+
grouped.setdefault(key, []).append(flow)
|
|
103
|
+
return {name: tuple(group_flows) for name, group_flows in grouped.items()}
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
__all__ = ["_GroupedFlowRuntime"]
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""Runtime log and ledger emission helpers for authored flows."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from data_engine.domain.time import utcnow_text
|
|
9
|
+
from data_engine.runtime.runtime_db import RuntimeLedger
|
|
10
|
+
|
|
11
|
+
LOGGER = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class RuntimeLogEmitter:
|
|
15
|
+
"""Own runtime log persistence and logger emission."""
|
|
16
|
+
|
|
17
|
+
def __init__(self, runtime_ledger: RuntimeLedger) -> None:
|
|
18
|
+
self.runtime_ledger = runtime_ledger
|
|
19
|
+
|
|
20
|
+
def log_runtime_message(
|
|
21
|
+
self,
|
|
22
|
+
message: str,
|
|
23
|
+
*,
|
|
24
|
+
level: str,
|
|
25
|
+
run_id: str | None,
|
|
26
|
+
flow_name: str | None,
|
|
27
|
+
step_label: str | None = None,
|
|
28
|
+
exc_info: bool = False,
|
|
29
|
+
) -> None:
|
|
30
|
+
created_at_utc = utcnow_text()
|
|
31
|
+
self.runtime_ledger.append_log(
|
|
32
|
+
level=level.upper(),
|
|
33
|
+
message=message,
|
|
34
|
+
created_at_utc=created_at_utc,
|
|
35
|
+
run_id=run_id,
|
|
36
|
+
flow_name=flow_name,
|
|
37
|
+
step_label=step_label,
|
|
38
|
+
)
|
|
39
|
+
logger_method = LOGGER.error if level == "error" else LOGGER.info
|
|
40
|
+
logger_method(message, exc_info=exc_info)
|
|
41
|
+
|
|
42
|
+
def log_flow_event(
|
|
43
|
+
self,
|
|
44
|
+
run_id: str,
|
|
45
|
+
flow_name: str,
|
|
46
|
+
source_path: Path | None,
|
|
47
|
+
*,
|
|
48
|
+
status: str,
|
|
49
|
+
elapsed: float | None = None,
|
|
50
|
+
level: str = "info",
|
|
51
|
+
exc_info: bool = False,
|
|
52
|
+
) -> None:
|
|
53
|
+
message = f"run={run_id} flow={flow_name} source={source_path} status={status}"
|
|
54
|
+
if elapsed is not None:
|
|
55
|
+
message = f"{message} elapsed={elapsed:.6f}"
|
|
56
|
+
self.log_runtime_message(message, level=level, run_id=run_id, flow_name=flow_name, exc_info=exc_info)
|
|
57
|
+
|
|
58
|
+
def log_step_event(
|
|
59
|
+
self,
|
|
60
|
+
run_id: str,
|
|
61
|
+
flow_name: str,
|
|
62
|
+
step_label: str,
|
|
63
|
+
source_path: Path | None,
|
|
64
|
+
*,
|
|
65
|
+
status: str,
|
|
66
|
+
elapsed: float | None = None,
|
|
67
|
+
level: str = "info",
|
|
68
|
+
exc_info: bool = False,
|
|
69
|
+
) -> None:
|
|
70
|
+
message = f"run={run_id} flow={flow_name} step={step_label} source={source_path} status={status}"
|
|
71
|
+
if elapsed is not None:
|
|
72
|
+
message = f"{message} elapsed={elapsed:.6f}"
|
|
73
|
+
self.log_runtime_message(
|
|
74
|
+
message,
|
|
75
|
+
level=level,
|
|
76
|
+
run_id=run_id,
|
|
77
|
+
flow_name=flow_name,
|
|
78
|
+
step_label=step_label,
|
|
79
|
+
exc_info=exc_info,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
__all__ = ["RuntimeLogEmitter"]
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""Polling, scheduling, and source-queue helpers for authored flows."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections import deque
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from data_engine.authoring.primitives import WatchSpec
|
|
9
|
+
from data_engine.authoring.execution.context import _QueuedJob
|
|
10
|
+
from data_engine.domain.source_state import SourceSignature
|
|
11
|
+
from data_engine.runtime.file_watch import PollingWatcher, iter_candidate_paths
|
|
12
|
+
from data_engine.runtime.runtime_db import RuntimeLedger
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class RuntimePollingSupport:
|
|
16
|
+
"""Own watcher creation, queueing, and stale-source detection."""
|
|
17
|
+
|
|
18
|
+
def __init__(self, runtime_ledger: RuntimeLedger) -> None:
|
|
19
|
+
self.runtime_ledger = runtime_ledger
|
|
20
|
+
|
|
21
|
+
def make_watcher(self, trigger: WatchSpec) -> PollingWatcher:
|
|
22
|
+
return PollingWatcher(trigger.source, recursive=True, extensions=trigger.extensions, settle=trigger.settle)
|
|
23
|
+
|
|
24
|
+
def startup_sources(self, flow: "Flow", *, allow_missing: bool = False) -> list[Path | None]:
|
|
25
|
+
trigger = flow.trigger
|
|
26
|
+
if not isinstance(trigger, WatchSpec) or trigger.source is None:
|
|
27
|
+
return [None]
|
|
28
|
+
if not trigger.source.exists():
|
|
29
|
+
return [None]
|
|
30
|
+
if trigger.source.is_file():
|
|
31
|
+
return [trigger.source]
|
|
32
|
+
if trigger.run_as == "batch":
|
|
33
|
+
return [None]
|
|
34
|
+
return list(
|
|
35
|
+
iter_candidate_paths(
|
|
36
|
+
trigger.source,
|
|
37
|
+
extensions=trigger.extensions,
|
|
38
|
+
recursive=True,
|
|
39
|
+
allow_missing=allow_missing,
|
|
40
|
+
)
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
def enqueue_job(
|
|
44
|
+
self,
|
|
45
|
+
queue: deque[_QueuedJob],
|
|
46
|
+
queued_keys: set[tuple[str, str | None]],
|
|
47
|
+
flow: "Flow",
|
|
48
|
+
source_path: Path | None,
|
|
49
|
+
*,
|
|
50
|
+
batch_signatures: tuple[SourceSignature, ...] = (),
|
|
51
|
+
) -> None:
|
|
52
|
+
key = self.job_key(flow, source_path)
|
|
53
|
+
if key in queued_keys:
|
|
54
|
+
if batch_signatures:
|
|
55
|
+
for index, job in enumerate(queue):
|
|
56
|
+
if self.job_key(job.flow, job.source_path) != key:
|
|
57
|
+
continue
|
|
58
|
+
merged = {signature.source_path: signature for signature in job.batch_signatures}
|
|
59
|
+
for signature in batch_signatures:
|
|
60
|
+
merged[signature.source_path] = signature
|
|
61
|
+
queue[index] = _QueuedJob(
|
|
62
|
+
flow=job.flow,
|
|
63
|
+
source_path=job.source_path,
|
|
64
|
+
batch_signatures=tuple(merged[path] for path in sorted(merged)),
|
|
65
|
+
)
|
|
66
|
+
break
|
|
67
|
+
return
|
|
68
|
+
queue.append(_QueuedJob(flow, source_path, batch_signatures))
|
|
69
|
+
queued_keys.add(key)
|
|
70
|
+
|
|
71
|
+
def job_key(self, flow: "Flow", source_path: Path | None) -> tuple[str, str | None]:
|
|
72
|
+
return (flow.name, str(source_path) if source_path is not None else None)
|
|
73
|
+
|
|
74
|
+
def stale_poll_sources(self, flow: "Flow") -> list[Path | None]:
|
|
75
|
+
current_source_paths: set[str] = set()
|
|
76
|
+
stale: list[Path | None] = []
|
|
77
|
+
trigger = flow.trigger
|
|
78
|
+
if not isinstance(trigger, WatchSpec) or trigger.mode != "poll" or trigger.source is None:
|
|
79
|
+
return stale
|
|
80
|
+
if not trigger.source.exists():
|
|
81
|
+
return [None]
|
|
82
|
+
if trigger.run_as == "batch" and trigger.source.is_dir():
|
|
83
|
+
for source_path in iter_candidate_paths(trigger.source, extensions=trigger.extensions, recursive=True, allow_missing=True):
|
|
84
|
+
current_source_paths.add(self.runtime_ledger.normalize_source_path(source_path))
|
|
85
|
+
if self.is_poll_source_stale(flow, source_path):
|
|
86
|
+
stale.append(None)
|
|
87
|
+
break
|
|
88
|
+
self.runtime_ledger.prune_missing_file_state(flow_name=flow.name, current_source_paths=current_source_paths)
|
|
89
|
+
return stale
|
|
90
|
+
for source_path in self.startup_sources(flow, allow_missing=True):
|
|
91
|
+
if source_path is None:
|
|
92
|
+
stale.append(None)
|
|
93
|
+
continue
|
|
94
|
+
current_source_paths.add(self.runtime_ledger.normalize_source_path(source_path))
|
|
95
|
+
if self.is_poll_source_stale(flow, source_path):
|
|
96
|
+
stale.append(source_path)
|
|
97
|
+
self.runtime_ledger.prune_missing_file_state(flow_name=flow.name, current_source_paths=current_source_paths)
|
|
98
|
+
return stale
|
|
99
|
+
|
|
100
|
+
def stale_batch_poll_signatures(self, flow: "Flow") -> tuple[SourceSignature, ...]:
|
|
101
|
+
trigger = flow.trigger
|
|
102
|
+
if not isinstance(trigger, WatchSpec) or trigger.mode != "poll" or trigger.source is None or not trigger.source.is_dir():
|
|
103
|
+
return ()
|
|
104
|
+
signatures: dict[str, SourceSignature] = {}
|
|
105
|
+
for source_path in iter_candidate_paths(trigger.source, extensions=trigger.extensions, recursive=True, allow_missing=True):
|
|
106
|
+
if not self.is_poll_source_stale(flow, source_path):
|
|
107
|
+
continue
|
|
108
|
+
signature = self.poll_source_signature(flow, source_path)
|
|
109
|
+
if signature is not None:
|
|
110
|
+
signatures[signature.source_path] = signature
|
|
111
|
+
return tuple(signatures[path] for path in sorted(signatures))
|
|
112
|
+
|
|
113
|
+
def is_poll_source_stale(self, flow: "Flow", source_path: Path | None) -> bool:
|
|
114
|
+
trigger = flow.trigger
|
|
115
|
+
if not isinstance(trigger, WatchSpec) or trigger.mode != "poll":
|
|
116
|
+
return False
|
|
117
|
+
if source_path is None or not source_path.exists():
|
|
118
|
+
return True
|
|
119
|
+
signature = self.poll_source_signature(flow, source_path)
|
|
120
|
+
if signature is None and trigger.source is not None and trigger.source.exists() and trigger.source.is_file():
|
|
121
|
+
return True
|
|
122
|
+
return self.runtime_ledger.is_poll_source_stale(flow.name, signature)
|
|
123
|
+
|
|
124
|
+
def poll_source_signature(self, flow: "Flow", source_path: Path | None) -> SourceSignature | None:
|
|
125
|
+
if source_path is None or not isinstance(flow.trigger, WatchSpec) or flow.trigger.mode != "poll":
|
|
126
|
+
return None
|
|
127
|
+
return self.runtime_ledger.source_signature_for_path(source_path)
|
|
128
|
+
|
|
129
|
+
def normalized_source_path(self, source_path: Path | None) -> str | None:
|
|
130
|
+
if source_path is None:
|
|
131
|
+
return None
|
|
132
|
+
return self.runtime_ledger.normalize_source_path(source_path)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
__all__ = ["RuntimePollingSupport"]
|