py-data-engine 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. data_engine/__init__.py +37 -0
  2. data_engine/application/__init__.py +39 -0
  3. data_engine/application/actions.py +42 -0
  4. data_engine/application/catalog.py +151 -0
  5. data_engine/application/control.py +213 -0
  6. data_engine/application/details.py +73 -0
  7. data_engine/application/runtime.py +449 -0
  8. data_engine/application/workspace.py +62 -0
  9. data_engine/authoring/__init__.py +14 -0
  10. data_engine/authoring/builder.py +31 -0
  11. data_engine/authoring/execution/__init__.py +6 -0
  12. data_engine/authoring/execution/app.py +6 -0
  13. data_engine/authoring/execution/context.py +82 -0
  14. data_engine/authoring/execution/continuous.py +176 -0
  15. data_engine/authoring/execution/grouped.py +106 -0
  16. data_engine/authoring/execution/logging.py +83 -0
  17. data_engine/authoring/execution/polling.py +135 -0
  18. data_engine/authoring/execution/runner.py +210 -0
  19. data_engine/authoring/execution/single.py +171 -0
  20. data_engine/authoring/flow.py +361 -0
  21. data_engine/authoring/helpers.py +160 -0
  22. data_engine/authoring/model.py +59 -0
  23. data_engine/authoring/primitives.py +430 -0
  24. data_engine/authoring/services.py +42 -0
  25. data_engine/devtools/__init__.py +3 -0
  26. data_engine/devtools/project_ast_map.py +503 -0
  27. data_engine/docs/__init__.py +1 -0
  28. data_engine/docs/sphinx_source/_static/custom.css +13 -0
  29. data_engine/docs/sphinx_source/api.rst +42 -0
  30. data_engine/docs/sphinx_source/conf.py +37 -0
  31. data_engine/docs/sphinx_source/guides/app-runtime-and-workspaces.md +397 -0
  32. data_engine/docs/sphinx_source/guides/authoring-flow-modules.md +215 -0
  33. data_engine/docs/sphinx_source/guides/configuring-flows.md +185 -0
  34. data_engine/docs/sphinx_source/guides/core-concepts.md +208 -0
  35. data_engine/docs/sphinx_source/guides/database-methods.md +107 -0
  36. data_engine/docs/sphinx_source/guides/duckdb-helpers.md +462 -0
  37. data_engine/docs/sphinx_source/guides/flow-context.md +538 -0
  38. data_engine/docs/sphinx_source/guides/flow-methods.md +206 -0
  39. data_engine/docs/sphinx_source/guides/getting-started.md +271 -0
  40. data_engine/docs/sphinx_source/guides/project-inventory.md +5683 -0
  41. data_engine/docs/sphinx_source/guides/project-map.md +118 -0
  42. data_engine/docs/sphinx_source/guides/recipes.md +268 -0
  43. data_engine/docs/sphinx_source/index.rst +22 -0
  44. data_engine/domain/__init__.py +92 -0
  45. data_engine/domain/actions.py +69 -0
  46. data_engine/domain/catalog.py +128 -0
  47. data_engine/domain/details.py +214 -0
  48. data_engine/domain/diagnostics.py +56 -0
  49. data_engine/domain/errors.py +104 -0
  50. data_engine/domain/inspection.py +99 -0
  51. data_engine/domain/logs.py +118 -0
  52. data_engine/domain/operations.py +172 -0
  53. data_engine/domain/operator.py +72 -0
  54. data_engine/domain/runs.py +155 -0
  55. data_engine/domain/runtime.py +279 -0
  56. data_engine/domain/source_state.py +17 -0
  57. data_engine/domain/support.py +54 -0
  58. data_engine/domain/time.py +23 -0
  59. data_engine/domain/workspace.py +159 -0
  60. data_engine/flow_modules/__init__.py +1 -0
  61. data_engine/flow_modules/flow_module_compiler.py +179 -0
  62. data_engine/flow_modules/flow_module_loader.py +201 -0
  63. data_engine/helpers/__init__.py +25 -0
  64. data_engine/helpers/duckdb.py +705 -0
  65. data_engine/hosts/__init__.py +1 -0
  66. data_engine/hosts/daemon/__init__.py +23 -0
  67. data_engine/hosts/daemon/app.py +221 -0
  68. data_engine/hosts/daemon/bootstrap.py +69 -0
  69. data_engine/hosts/daemon/client.py +465 -0
  70. data_engine/hosts/daemon/commands.py +64 -0
  71. data_engine/hosts/daemon/composition.py +310 -0
  72. data_engine/hosts/daemon/constants.py +15 -0
  73. data_engine/hosts/daemon/entrypoints.py +97 -0
  74. data_engine/hosts/daemon/lifecycle.py +191 -0
  75. data_engine/hosts/daemon/manager.py +272 -0
  76. data_engine/hosts/daemon/ownership.py +126 -0
  77. data_engine/hosts/daemon/runtime_commands.py +188 -0
  78. data_engine/hosts/daemon/runtime_control.py +31 -0
  79. data_engine/hosts/daemon/server.py +84 -0
  80. data_engine/hosts/daemon/shared_state.py +147 -0
  81. data_engine/hosts/daemon/state_sync.py +101 -0
  82. data_engine/platform/__init__.py +1 -0
  83. data_engine/platform/identity.py +35 -0
  84. data_engine/platform/local_settings.py +146 -0
  85. data_engine/platform/theme.py +259 -0
  86. data_engine/platform/workspace_models.py +190 -0
  87. data_engine/platform/workspace_policy.py +333 -0
  88. data_engine/runtime/__init__.py +1 -0
  89. data_engine/runtime/file_watch.py +185 -0
  90. data_engine/runtime/ledger_models.py +116 -0
  91. data_engine/runtime/runtime_db.py +938 -0
  92. data_engine/runtime/shared_state.py +523 -0
  93. data_engine/services/__init__.py +49 -0
  94. data_engine/services/daemon.py +64 -0
  95. data_engine/services/daemon_state.py +40 -0
  96. data_engine/services/flow_catalog.py +102 -0
  97. data_engine/services/flow_execution.py +48 -0
  98. data_engine/services/ledger.py +85 -0
  99. data_engine/services/logs.py +65 -0
  100. data_engine/services/runtime_binding.py +105 -0
  101. data_engine/services/runtime_execution.py +126 -0
  102. data_engine/services/runtime_history.py +62 -0
  103. data_engine/services/settings.py +58 -0
  104. data_engine/services/shared_state.py +28 -0
  105. data_engine/services/theme.py +59 -0
  106. data_engine/services/workspace_provisioning.py +224 -0
  107. data_engine/services/workspaces.py +74 -0
  108. data_engine/ui/__init__.py +3 -0
  109. data_engine/ui/cli/__init__.py +19 -0
  110. data_engine/ui/cli/app.py +161 -0
  111. data_engine/ui/cli/commands_doctor.py +178 -0
  112. data_engine/ui/cli/commands_run.py +80 -0
  113. data_engine/ui/cli/commands_start.py +100 -0
  114. data_engine/ui/cli/commands_workspace.py +97 -0
  115. data_engine/ui/cli/dependencies.py +44 -0
  116. data_engine/ui/cli/parser.py +56 -0
  117. data_engine/ui/gui/__init__.py +25 -0
  118. data_engine/ui/gui/app.py +116 -0
  119. data_engine/ui/gui/bootstrap.py +487 -0
  120. data_engine/ui/gui/bootstrapper.py +140 -0
  121. data_engine/ui/gui/cache_models.py +23 -0
  122. data_engine/ui/gui/control_support.py +185 -0
  123. data_engine/ui/gui/controllers/__init__.py +6 -0
  124. data_engine/ui/gui/controllers/flows.py +439 -0
  125. data_engine/ui/gui/controllers/runtime.py +245 -0
  126. data_engine/ui/gui/dialogs/__init__.py +12 -0
  127. data_engine/ui/gui/dialogs/messages.py +88 -0
  128. data_engine/ui/gui/dialogs/previews.py +222 -0
  129. data_engine/ui/gui/helpers/__init__.py +62 -0
  130. data_engine/ui/gui/helpers/inspection.py +81 -0
  131. data_engine/ui/gui/helpers/lifecycle.py +112 -0
  132. data_engine/ui/gui/helpers/scroll.py +28 -0
  133. data_engine/ui/gui/helpers/theming.py +87 -0
  134. data_engine/ui/gui/icons/dark_light.svg +12 -0
  135. data_engine/ui/gui/icons/documentation.svg +1 -0
  136. data_engine/ui/gui/icons/failed.svg +3 -0
  137. data_engine/ui/gui/icons/group.svg +4 -0
  138. data_engine/ui/gui/icons/home.svg +2 -0
  139. data_engine/ui/gui/icons/manual.svg +2 -0
  140. data_engine/ui/gui/icons/poll.svg +2 -0
  141. data_engine/ui/gui/icons/schedule.svg +4 -0
  142. data_engine/ui/gui/icons/settings.svg +2 -0
  143. data_engine/ui/gui/icons/started.svg +3 -0
  144. data_engine/ui/gui/icons/success.svg +3 -0
  145. data_engine/ui/gui/icons/view-log.svg +3 -0
  146. data_engine/ui/gui/icons.py +50 -0
  147. data_engine/ui/gui/launcher.py +48 -0
  148. data_engine/ui/gui/presenters/__init__.py +72 -0
  149. data_engine/ui/gui/presenters/docs.py +140 -0
  150. data_engine/ui/gui/presenters/logs.py +58 -0
  151. data_engine/ui/gui/presenters/runtime_projection.py +29 -0
  152. data_engine/ui/gui/presenters/sidebar.py +88 -0
  153. data_engine/ui/gui/presenters/steps.py +148 -0
  154. data_engine/ui/gui/presenters/workspace.py +39 -0
  155. data_engine/ui/gui/presenters/workspace_binding.py +75 -0
  156. data_engine/ui/gui/presenters/workspace_settings.py +182 -0
  157. data_engine/ui/gui/preview_models.py +37 -0
  158. data_engine/ui/gui/render_support.py +241 -0
  159. data_engine/ui/gui/rendering/__init__.py +12 -0
  160. data_engine/ui/gui/rendering/artifacts.py +95 -0
  161. data_engine/ui/gui/rendering/icons.py +50 -0
  162. data_engine/ui/gui/runtime.py +47 -0
  163. data_engine/ui/gui/state_support.py +193 -0
  164. data_engine/ui/gui/support.py +214 -0
  165. data_engine/ui/gui/surface.py +209 -0
  166. data_engine/ui/gui/theme.py +720 -0
  167. data_engine/ui/gui/widgets/__init__.py +34 -0
  168. data_engine/ui/gui/widgets/config.py +41 -0
  169. data_engine/ui/gui/widgets/logs.py +62 -0
  170. data_engine/ui/gui/widgets/panels.py +507 -0
  171. data_engine/ui/gui/widgets/sidebar.py +130 -0
  172. data_engine/ui/gui/widgets/steps.py +84 -0
  173. data_engine/ui/tui/__init__.py +5 -0
  174. data_engine/ui/tui/app.py +222 -0
  175. data_engine/ui/tui/bootstrap.py +475 -0
  176. data_engine/ui/tui/bootstrapper.py +117 -0
  177. data_engine/ui/tui/controllers/__init__.py +6 -0
  178. data_engine/ui/tui/controllers/flows.py +349 -0
  179. data_engine/ui/tui/controllers/runtime.py +167 -0
  180. data_engine/ui/tui/runtime.py +34 -0
  181. data_engine/ui/tui/state_support.py +141 -0
  182. data_engine/ui/tui/support.py +63 -0
  183. data_engine/ui/tui/theme.py +204 -0
  184. data_engine/ui/tui/widgets.py +123 -0
  185. data_engine/views/__init__.py +109 -0
  186. data_engine/views/actions.py +80 -0
  187. data_engine/views/artifacts.py +58 -0
  188. data_engine/views/flow_display.py +69 -0
  189. data_engine/views/logs.py +54 -0
  190. data_engine/views/models.py +96 -0
  191. data_engine/views/presentation.py +133 -0
  192. data_engine/views/runs.py +62 -0
  193. data_engine/views/state.py +39 -0
  194. data_engine/views/status.py +13 -0
  195. data_engine/views/text.py +109 -0
  196. py_data_engine-0.1.0.dist-info/METADATA +330 -0
  197. py_data_engine-0.1.0.dist-info/RECORD +200 -0
  198. py_data_engine-0.1.0.dist-info/WHEEL +5 -0
  199. py_data_engine-0.1.0.dist-info/entry_points.txt +2 -0
  200. py_data_engine-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,430 @@
1
+ """Authoring specs, runtime contexts, and small containers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from datetime import datetime, timezone
7
+ from pathlib import Path
8
+ import tomllib
9
+ from typing import Callable, Generic, Iterator, TypeVar
10
+
11
+ from data_engine.authoring.helpers import _normalize_extensions, _resolve_flow_path
12
+ from data_engine.authoring.model import FlowValidationError
13
+ from data_engine.platform.workspace_models import WORKSPACE_CONFIG_DIR_NAME, WORKSPACE_DATABASES_DIR_NAME
14
+
15
+ T = TypeVar("T")
16
+
17
+
18
+ @dataclass(frozen=True)
19
+ class WatchSpec:
20
+ """Normalized runtime watch configuration."""
21
+
22
+ mode: str
23
+ run_as: str
24
+ source: Path | None = None
25
+ interval: str | None = None
26
+ interval_seconds: float | None = None
27
+ time: str | tuple[str, ...] | None = None
28
+ times: tuple[str, ...] = ()
29
+ time_slots: tuple[tuple[int, int], ...] = ()
30
+ extensions: tuple[str, ...] | None = None
31
+ settle: int = 1
32
+
33
+
34
+ @dataclass(frozen=True)
35
+ class MirrorSpec:
36
+ """Static flow-level mirror binding."""
37
+
38
+ root: Path
39
+
40
+
41
+ @dataclass(frozen=True)
42
+ class StepSpec:
43
+ """One generic callable step in a flow."""
44
+
45
+ fn: Callable[..., object]
46
+ use: str | None
47
+ save_as: str | None
48
+ label: str
49
+ function_name: str
50
+
51
+
52
+ @dataclass(frozen=True)
53
+ class SourceMetadata:
54
+ """Resolved filesystem metadata for the current source file."""
55
+
56
+ path: Path
57
+ name: str
58
+ size_bytes: int
59
+ modified_at_utc: datetime
60
+
61
+
62
+ @dataclass
63
+ class WorkspaceConfigContext:
64
+ """Lazy read-only access to workspace-local TOML config files."""
65
+
66
+ workspace_root: Path | None = None
67
+ _cache: dict[str, dict[str, object]] = field(default_factory=dict)
68
+ _names: tuple[str, ...] | None = None
69
+
70
+ @property
71
+ def config_dir(self) -> Path | None:
72
+ """Return the conventional config directory for the authored workspace."""
73
+ if self.workspace_root is None:
74
+ return None
75
+ return self.workspace_root / WORKSPACE_CONFIG_DIR_NAME
76
+
77
+ def names(self) -> tuple[str, ...]:
78
+ """Return available config file stems beneath config/."""
79
+ if self._names is not None:
80
+ return self._names
81
+ config_dir = self.config_dir
82
+ if config_dir is None or not config_dir.is_dir():
83
+ self._names = ()
84
+ return self._names
85
+ self._names = tuple(
86
+ path.stem
87
+ for path in sorted(config_dir.glob("*.toml"))
88
+ if path.is_file() and not path.name.startswith(".")
89
+ )
90
+ return self._names
91
+
92
+ def get(self, name: str) -> dict[str, object] | None:
93
+ """Return one parsed config mapping when available."""
94
+ normalized_name = str(name).strip()
95
+ if not normalized_name:
96
+ raise FlowValidationError("config.get() name must be non-empty.")
97
+ if normalized_name in self._cache:
98
+ return dict(self._cache[normalized_name])
99
+ config_dir = self.config_dir
100
+ if config_dir is None:
101
+ return None
102
+ config_path = config_dir / f"{normalized_name}.toml"
103
+ if not config_path.is_file():
104
+ return None
105
+ try:
106
+ with config_path.open("rb") as handle:
107
+ parsed = tomllib.load(handle)
108
+ except tomllib.TOMLDecodeError as exc:
109
+ raise FlowValidationError(f"Config file {config_path} is not valid TOML: {exc}") from exc
110
+ self._cache[normalized_name] = parsed
111
+ return dict(parsed)
112
+
113
+ def require(self, name: str) -> dict[str, object]:
114
+ """Return one parsed config mapping or fail loudly when missing."""
115
+ parsed = self.get(name)
116
+ if parsed is not None:
117
+ return parsed
118
+ config_dir = self.config_dir
119
+ if config_dir is None:
120
+ raise FlowValidationError("config.require() is only available for authored workspace flows.")
121
+ raise FlowValidationError(f"Required config file was not found: {config_dir / f'{str(name).strip()}.toml'}")
122
+
123
+ def all(self) -> dict[str, dict[str, object]]:
124
+ """Return all parsed config mappings keyed by file stem."""
125
+ return {name: self.require(name) for name in self.names()}
126
+
127
+
128
+ @dataclass(frozen=True)
129
+ class MirrorContext:
130
+ """Write-ready mirrored output namespace for one runtime source."""
131
+
132
+ root: Path
133
+ source_path: Path | None = None
134
+ relative_path: Path | None = None
135
+
136
+ def __post_init__(self) -> None:
137
+ object.__setattr__(self, "root", Path(self.root).resolve())
138
+ if self.source_path is not None:
139
+ object.__setattr__(self, "source_path", Path(self.source_path).resolve())
140
+ if self.relative_path is not None:
141
+ object.__setattr__(self, "relative_path", Path(self.relative_path))
142
+
143
+ def _prepare(self, path: Path) -> Path:
144
+ resolved = path.resolve()
145
+ resolved.parent.mkdir(parents=True, exist_ok=True)
146
+ return resolved
147
+
148
+ @property
149
+ def dir(self) -> Path:
150
+ """Return a write-ready namespace directory for derived files."""
151
+ if self.source_path is None or self.relative_path is None:
152
+ self.root.mkdir(parents=True, exist_ok=True)
153
+ return self.root
154
+ directory = self.root / self.relative_path.with_suffix("")
155
+ directory.mkdir(parents=True, exist_ok=True)
156
+ return directory.resolve()
157
+
158
+ @property
159
+ def folder(self) -> Path:
160
+ """Return the mirrored parent folder for the current source file."""
161
+ if self.relative_path is None:
162
+ self.root.mkdir(parents=True, exist_ok=True)
163
+ return self.root
164
+ directory = self.root / self.relative_path.parent
165
+ directory.mkdir(parents=True, exist_ok=True)
166
+ return directory.resolve()
167
+
168
+ def with_suffix(self, suffix: str) -> Path:
169
+ """Return the canonical mirrored source path with a replaced suffix."""
170
+ if self.source_path is None or self.relative_path is None:
171
+ raise FlowValidationError("mirror.with_suffix() requires a concrete source file.")
172
+ normalized_suffix = _normalize_extensions((suffix,))[0]
173
+ return self._prepare((self.root / self.relative_path).with_suffix(normalized_suffix))
174
+
175
+ def with_extension(self, suffix: str) -> Path:
176
+ """Return the canonical mirrored source path with a replaced extension."""
177
+ return self.with_suffix(suffix)
178
+
179
+ def file(self, name: str | Path) -> Path:
180
+ """Return a write-ready file path in the mirrored source folder."""
181
+ candidate = Path(name)
182
+ if candidate.is_absolute():
183
+ raise FlowValidationError("mirror.file() name must be relative.")
184
+ if not str(candidate).strip():
185
+ raise FlowValidationError("mirror.file() name must be non-empty.")
186
+ return self._prepare(self.folder / candidate)
187
+
188
+ def namespaced_file(self, name: str | Path) -> Path:
189
+ """Return a write-ready derived file path inside the mirrored source namespace."""
190
+ candidate = Path(name)
191
+ if candidate.is_absolute():
192
+ raise FlowValidationError("mirror.namespaced_file() name must be relative.")
193
+ if not str(candidate).strip():
194
+ raise FlowValidationError("mirror.namespaced_file() name must be non-empty.")
195
+ return self._prepare(self.dir / candidate)
196
+
197
+ def root_file(self, name: str | Path) -> Path:
198
+ """Return a write-ready file path directly beneath the mirror root."""
199
+ candidate = Path(name)
200
+ if candidate.is_absolute():
201
+ raise FlowValidationError("mirror.root_file() name must be relative.")
202
+ if not str(candidate).strip():
203
+ raise FlowValidationError("mirror.root_file() name must be non-empty.")
204
+ return self._prepare(self.root / candidate)
205
+
206
+
207
+ @dataclass(frozen=True)
208
+ class SourceContext:
209
+ """Resolved source namespace for one runtime source."""
210
+
211
+ root: Path
212
+ path: Path | None = None
213
+ relative_path: Path | None = None
214
+
215
+ def __post_init__(self) -> None:
216
+ object.__setattr__(self, "root", Path(self.root).resolve())
217
+ if self.path is not None:
218
+ object.__setattr__(self, "path", Path(self.path).resolve())
219
+ if self.relative_path is not None:
220
+ object.__setattr__(self, "relative_path", Path(self.relative_path))
221
+
222
+ @property
223
+ def dir(self) -> Path:
224
+ """Return the namespace directory for files derived from the active source."""
225
+ if self.path is None or self.relative_path is None:
226
+ return self.root
227
+ return (self.root / self.relative_path.with_suffix("")).resolve()
228
+
229
+ @property
230
+ def folder(self) -> Path:
231
+ """Return the parent folder for the active source file."""
232
+ if self.relative_path is None:
233
+ return self.root
234
+ return (self.root / self.relative_path.parent).resolve()
235
+
236
+ def with_suffix(self, suffix: str) -> Path:
237
+ """Return the source path with a replaced suffix."""
238
+ if self.path is None or self.relative_path is None:
239
+ raise FlowValidationError("source.with_suffix() requires a concrete source file.")
240
+ normalized_suffix = _normalize_extensions((suffix,))[0]
241
+ return (self.root / self.relative_path).with_suffix(normalized_suffix).resolve()
242
+
243
+ def with_extension(self, suffix: str) -> Path:
244
+ """Return the source path with a replaced extension."""
245
+ return self.with_suffix(suffix)
246
+
247
+ def file(self, name: str | Path) -> Path:
248
+ """Return a derived file path in the active source folder."""
249
+ candidate = Path(name)
250
+ if candidate.is_absolute():
251
+ raise FlowValidationError("source.file() name must be relative.")
252
+ if not str(candidate).strip():
253
+ raise FlowValidationError("source.file() name must be non-empty.")
254
+ return (self.folder / candidate).resolve()
255
+
256
+ def namespaced_file(self, name: str | Path) -> Path:
257
+ """Return a derived file path inside the active source namespace."""
258
+ candidate = Path(name)
259
+ if candidate.is_absolute():
260
+ raise FlowValidationError("source.namespaced_file() name must be relative.")
261
+ if not str(candidate).strip():
262
+ raise FlowValidationError("source.namespaced_file() name must be non-empty.")
263
+ if self.path is None or self.relative_path is None:
264
+ raise FlowValidationError("source.namespaced_file() requires a concrete source file.")
265
+ return (self.dir / candidate).resolve()
266
+
267
+ def root_file(self, name: str | Path) -> Path:
268
+ """Return a file path directly beneath the source root."""
269
+ candidate = Path(name)
270
+ if candidate.is_absolute():
271
+ raise FlowValidationError("source.root_file() name must be relative.")
272
+ if not str(candidate).strip():
273
+ raise FlowValidationError("source.root_file() name must be non-empty.")
274
+ return (self.root / candidate).resolve()
275
+
276
+
277
+ @dataclass
278
+ class FlowContext:
279
+ """Mutable runtime state shared across steps during one flow execution."""
280
+
281
+ flow_name: str
282
+ group: str
283
+ source: SourceContext | None = None
284
+ mirror: MirrorContext | None = None
285
+ current: object | None = None
286
+ objects: dict[str, object] = field(default_factory=dict)
287
+ metadata: dict[str, object] = field(default_factory=dict)
288
+ config: WorkspaceConfigContext = field(default_factory=WorkspaceConfigContext)
289
+
290
+ def source_metadata(self) -> SourceMetadata | None:
291
+ """Return filesystem metadata for the current source file when available."""
292
+ source_path = self.source.path if self.source is not None else None
293
+ if source_path is None:
294
+ return None
295
+ stat = source_path.stat()
296
+ return SourceMetadata(
297
+ path=source_path,
298
+ name=source_path.name,
299
+ size_bytes=stat.st_size,
300
+ modified_at_utc=datetime.fromtimestamp(stat.st_mtime, timezone.utc),
301
+ )
302
+
303
+ def database(self, name: str | Path) -> Path:
304
+ """Return a write-ready path beneath the authored workspace databases directory."""
305
+ if self.config.workspace_root is None:
306
+ raise FlowValidationError("context.database() is only available for authored workspace flows.")
307
+ candidate = Path(name)
308
+ if candidate.is_absolute():
309
+ raise FlowValidationError("context.database() name must be relative.")
310
+ if not str(candidate).strip():
311
+ raise FlowValidationError("context.database() name must be non-empty.")
312
+ path = (self.config.workspace_root / WORKSPACE_DATABASES_DIR_NAME / candidate).resolve()
313
+ path.parent.mkdir(parents=True, exist_ok=True)
314
+ return path
315
+
316
+
317
+ @dataclass(frozen=True)
318
+ class FileRef:
319
+ """Thin runtime wrapper for one filesystem path in a batch-oriented flow."""
320
+
321
+ path: Path
322
+
323
+ def __post_init__(self) -> None:
324
+ object.__setattr__(self, "path", Path(self.path).resolve())
325
+
326
+ @property
327
+ def name(self) -> str:
328
+ return self.path.name
329
+
330
+ @property
331
+ def stem(self) -> str:
332
+ return self.path.stem
333
+
334
+ @property
335
+ def suffix(self) -> str:
336
+ return self.path.suffix
337
+
338
+ @property
339
+ def parent(self) -> Path:
340
+ return self.path.parent
341
+
342
+ def exists(self) -> bool:
343
+ return self.path.exists()
344
+
345
+ def __fspath__(self) -> str:
346
+ return str(self.path)
347
+
348
+ def __str__(self) -> str:
349
+ return str(self.path)
350
+
351
+
352
+ @dataclass(frozen=True)
353
+ class Batch(Generic[T]):
354
+ """Small iterable runtime container used instead of exposing raw lists by default."""
355
+
356
+ items: tuple[T, ...]
357
+
358
+ def __iter__(self) -> Iterator[T]:
359
+ return iter(self.items)
360
+
361
+ def __len__(self) -> int:
362
+ return len(self.items)
363
+
364
+ def __getitem__(self, index: int) -> T:
365
+ return self.items[index]
366
+
367
+ def names(self) -> tuple[str, ...]:
368
+ names: list[str] = []
369
+ for item in self.items:
370
+ value = getattr(item, "name", None)
371
+ if callable(value):
372
+ value = value()
373
+ if not isinstance(value, str):
374
+ raise FlowValidationError("Batch item does not expose a usable name.")
375
+ names.append(value)
376
+ return tuple(names)
377
+
378
+ def paths(self) -> tuple[Path, ...]:
379
+ paths: list[Path] = []
380
+ for item in self.items:
381
+ value = getattr(item, "path", None)
382
+ if not isinstance(value, Path):
383
+ raise FlowValidationError("Batch item does not expose a usable path.")
384
+ paths.append(value)
385
+ return tuple(paths)
386
+
387
+
388
+ def collect_files(
389
+ extensions: tuple[str, ...] | list[str] | set[str],
390
+ *,
391
+ root: str | Path | None = None,
392
+ recursive: bool = False,
393
+ ) -> Callable[[FlowContext], Batch[FileRef]]:
394
+ """Return a step callable that collects matching files into a Batch of FileRef items."""
395
+ normalized_extensions = _normalize_extensions(extensions)
396
+ assert normalized_extensions is not None
397
+ resolved_root = _resolve_flow_path(root) if root is not None else None
398
+
399
+ def _collect(context: FlowContext) -> Batch[FileRef]:
400
+ base = resolved_root
401
+ if base is None and context.source is not None:
402
+ base = context.source.root
403
+ if base is None:
404
+ raise FlowValidationError("collect_files() requires an explicit root or a flow context with source.")
405
+ if not base.exists():
406
+ return Batch(())
407
+ matcher = base.rglob if recursive else base.glob
408
+ items = tuple(
409
+ FileRef(path)
410
+ for path in sorted(matcher("*"))
411
+ if path.is_file() and path.suffix.lower() in normalized_extensions
412
+ )
413
+ return Batch(items)
414
+
415
+ return _collect
416
+
417
+
418
+ __all__ = [
419
+ "Batch",
420
+ "FileRef",
421
+ "FlowContext",
422
+ "MirrorContext",
423
+ "MirrorSpec",
424
+ "SourceContext",
425
+ "SourceMetadata",
426
+ "StepSpec",
427
+ "WatchSpec",
428
+ "WorkspaceConfigContext",
429
+ "collect_files",
430
+ ]
@@ -0,0 +1,42 @@
1
+ """Shared collaborator bundle for public authoring entrypoints."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from functools import lru_cache
7
+
8
+ from data_engine.services.flow_execution import FlowExecutionService
9
+ from data_engine.services.runtime_execution import RuntimeExecutionService
10
+
11
+
12
+ @dataclass(frozen=True)
13
+ class AuthoringServices:
14
+ """Concrete collaborators shared by the public authoring API."""
15
+
16
+ runtime_execution_service: RuntimeExecutionService
17
+ flow_execution_service: FlowExecutionService
18
+
19
+
20
+ def build_authoring_services(
21
+ *,
22
+ runtime_execution_service: RuntimeExecutionService | None = None,
23
+ flow_execution_service: FlowExecutionService | None = None,
24
+ ) -> AuthoringServices:
25
+ """Build one authoring collaborator bundle with optional overrides."""
26
+ return AuthoringServices(
27
+ runtime_execution_service=runtime_execution_service or RuntimeExecutionService(),
28
+ flow_execution_service=flow_execution_service or FlowExecutionService(),
29
+ )
30
+
31
+
32
+ @lru_cache(maxsize=1)
33
+ def default_authoring_services() -> AuthoringServices:
34
+ """Return the shared default authoring collaborator bundle."""
35
+ return build_authoring_services()
36
+
37
+
38
+ __all__ = [
39
+ "AuthoringServices",
40
+ "build_authoring_services",
41
+ "default_authoring_services",
42
+ ]
@@ -0,0 +1,3 @@
1
+ """Developer-facing analysis helpers."""
2
+
3
+ __all__ = []