py-data-engine 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. data_engine/__init__.py +37 -0
  2. data_engine/application/__init__.py +39 -0
  3. data_engine/application/actions.py +42 -0
  4. data_engine/application/catalog.py +151 -0
  5. data_engine/application/control.py +213 -0
  6. data_engine/application/details.py +73 -0
  7. data_engine/application/runtime.py +449 -0
  8. data_engine/application/workspace.py +62 -0
  9. data_engine/authoring/__init__.py +14 -0
  10. data_engine/authoring/builder.py +31 -0
  11. data_engine/authoring/execution/__init__.py +6 -0
  12. data_engine/authoring/execution/app.py +6 -0
  13. data_engine/authoring/execution/context.py +82 -0
  14. data_engine/authoring/execution/continuous.py +176 -0
  15. data_engine/authoring/execution/grouped.py +106 -0
  16. data_engine/authoring/execution/logging.py +83 -0
  17. data_engine/authoring/execution/polling.py +135 -0
  18. data_engine/authoring/execution/runner.py +210 -0
  19. data_engine/authoring/execution/single.py +171 -0
  20. data_engine/authoring/flow.py +361 -0
  21. data_engine/authoring/helpers.py +160 -0
  22. data_engine/authoring/model.py +59 -0
  23. data_engine/authoring/primitives.py +430 -0
  24. data_engine/authoring/services.py +42 -0
  25. data_engine/devtools/__init__.py +3 -0
  26. data_engine/devtools/project_ast_map.py +503 -0
  27. data_engine/docs/__init__.py +1 -0
  28. data_engine/docs/sphinx_source/_static/custom.css +13 -0
  29. data_engine/docs/sphinx_source/api.rst +42 -0
  30. data_engine/docs/sphinx_source/conf.py +37 -0
  31. data_engine/docs/sphinx_source/guides/app-runtime-and-workspaces.md +397 -0
  32. data_engine/docs/sphinx_source/guides/authoring-flow-modules.md +215 -0
  33. data_engine/docs/sphinx_source/guides/configuring-flows.md +185 -0
  34. data_engine/docs/sphinx_source/guides/core-concepts.md +208 -0
  35. data_engine/docs/sphinx_source/guides/database-methods.md +107 -0
  36. data_engine/docs/sphinx_source/guides/duckdb-helpers.md +462 -0
  37. data_engine/docs/sphinx_source/guides/flow-context.md +538 -0
  38. data_engine/docs/sphinx_source/guides/flow-methods.md +206 -0
  39. data_engine/docs/sphinx_source/guides/getting-started.md +271 -0
  40. data_engine/docs/sphinx_source/guides/project-inventory.md +5683 -0
  41. data_engine/docs/sphinx_source/guides/project-map.md +118 -0
  42. data_engine/docs/sphinx_source/guides/recipes.md +268 -0
  43. data_engine/docs/sphinx_source/index.rst +22 -0
  44. data_engine/domain/__init__.py +92 -0
  45. data_engine/domain/actions.py +69 -0
  46. data_engine/domain/catalog.py +128 -0
  47. data_engine/domain/details.py +214 -0
  48. data_engine/domain/diagnostics.py +56 -0
  49. data_engine/domain/errors.py +104 -0
  50. data_engine/domain/inspection.py +99 -0
  51. data_engine/domain/logs.py +118 -0
  52. data_engine/domain/operations.py +172 -0
  53. data_engine/domain/operator.py +72 -0
  54. data_engine/domain/runs.py +155 -0
  55. data_engine/domain/runtime.py +279 -0
  56. data_engine/domain/source_state.py +17 -0
  57. data_engine/domain/support.py +54 -0
  58. data_engine/domain/time.py +23 -0
  59. data_engine/domain/workspace.py +159 -0
  60. data_engine/flow_modules/__init__.py +1 -0
  61. data_engine/flow_modules/flow_module_compiler.py +179 -0
  62. data_engine/flow_modules/flow_module_loader.py +201 -0
  63. data_engine/helpers/__init__.py +25 -0
  64. data_engine/helpers/duckdb.py +705 -0
  65. data_engine/hosts/__init__.py +1 -0
  66. data_engine/hosts/daemon/__init__.py +23 -0
  67. data_engine/hosts/daemon/app.py +221 -0
  68. data_engine/hosts/daemon/bootstrap.py +69 -0
  69. data_engine/hosts/daemon/client.py +465 -0
  70. data_engine/hosts/daemon/commands.py +64 -0
  71. data_engine/hosts/daemon/composition.py +310 -0
  72. data_engine/hosts/daemon/constants.py +15 -0
  73. data_engine/hosts/daemon/entrypoints.py +97 -0
  74. data_engine/hosts/daemon/lifecycle.py +191 -0
  75. data_engine/hosts/daemon/manager.py +272 -0
  76. data_engine/hosts/daemon/ownership.py +126 -0
  77. data_engine/hosts/daemon/runtime_commands.py +188 -0
  78. data_engine/hosts/daemon/runtime_control.py +31 -0
  79. data_engine/hosts/daemon/server.py +84 -0
  80. data_engine/hosts/daemon/shared_state.py +147 -0
  81. data_engine/hosts/daemon/state_sync.py +101 -0
  82. data_engine/platform/__init__.py +1 -0
  83. data_engine/platform/identity.py +35 -0
  84. data_engine/platform/local_settings.py +146 -0
  85. data_engine/platform/theme.py +259 -0
  86. data_engine/platform/workspace_models.py +190 -0
  87. data_engine/platform/workspace_policy.py +333 -0
  88. data_engine/runtime/__init__.py +1 -0
  89. data_engine/runtime/file_watch.py +185 -0
  90. data_engine/runtime/ledger_models.py +116 -0
  91. data_engine/runtime/runtime_db.py +938 -0
  92. data_engine/runtime/shared_state.py +523 -0
  93. data_engine/services/__init__.py +49 -0
  94. data_engine/services/daemon.py +64 -0
  95. data_engine/services/daemon_state.py +40 -0
  96. data_engine/services/flow_catalog.py +102 -0
  97. data_engine/services/flow_execution.py +48 -0
  98. data_engine/services/ledger.py +85 -0
  99. data_engine/services/logs.py +65 -0
  100. data_engine/services/runtime_binding.py +105 -0
  101. data_engine/services/runtime_execution.py +126 -0
  102. data_engine/services/runtime_history.py +62 -0
  103. data_engine/services/settings.py +58 -0
  104. data_engine/services/shared_state.py +28 -0
  105. data_engine/services/theme.py +59 -0
  106. data_engine/services/workspace_provisioning.py +224 -0
  107. data_engine/services/workspaces.py +74 -0
  108. data_engine/ui/__init__.py +3 -0
  109. data_engine/ui/cli/__init__.py +19 -0
  110. data_engine/ui/cli/app.py +161 -0
  111. data_engine/ui/cli/commands_doctor.py +178 -0
  112. data_engine/ui/cli/commands_run.py +80 -0
  113. data_engine/ui/cli/commands_start.py +100 -0
  114. data_engine/ui/cli/commands_workspace.py +97 -0
  115. data_engine/ui/cli/dependencies.py +44 -0
  116. data_engine/ui/cli/parser.py +56 -0
  117. data_engine/ui/gui/__init__.py +25 -0
  118. data_engine/ui/gui/app.py +116 -0
  119. data_engine/ui/gui/bootstrap.py +487 -0
  120. data_engine/ui/gui/bootstrapper.py +140 -0
  121. data_engine/ui/gui/cache_models.py +23 -0
  122. data_engine/ui/gui/control_support.py +185 -0
  123. data_engine/ui/gui/controllers/__init__.py +6 -0
  124. data_engine/ui/gui/controllers/flows.py +439 -0
  125. data_engine/ui/gui/controllers/runtime.py +245 -0
  126. data_engine/ui/gui/dialogs/__init__.py +12 -0
  127. data_engine/ui/gui/dialogs/messages.py +88 -0
  128. data_engine/ui/gui/dialogs/previews.py +222 -0
  129. data_engine/ui/gui/helpers/__init__.py +62 -0
  130. data_engine/ui/gui/helpers/inspection.py +81 -0
  131. data_engine/ui/gui/helpers/lifecycle.py +112 -0
  132. data_engine/ui/gui/helpers/scroll.py +28 -0
  133. data_engine/ui/gui/helpers/theming.py +87 -0
  134. data_engine/ui/gui/icons/dark_light.svg +12 -0
  135. data_engine/ui/gui/icons/documentation.svg +1 -0
  136. data_engine/ui/gui/icons/failed.svg +3 -0
  137. data_engine/ui/gui/icons/group.svg +4 -0
  138. data_engine/ui/gui/icons/home.svg +2 -0
  139. data_engine/ui/gui/icons/manual.svg +2 -0
  140. data_engine/ui/gui/icons/poll.svg +2 -0
  141. data_engine/ui/gui/icons/schedule.svg +4 -0
  142. data_engine/ui/gui/icons/settings.svg +2 -0
  143. data_engine/ui/gui/icons/started.svg +3 -0
  144. data_engine/ui/gui/icons/success.svg +3 -0
  145. data_engine/ui/gui/icons/view-log.svg +3 -0
  146. data_engine/ui/gui/icons.py +50 -0
  147. data_engine/ui/gui/launcher.py +48 -0
  148. data_engine/ui/gui/presenters/__init__.py +72 -0
  149. data_engine/ui/gui/presenters/docs.py +140 -0
  150. data_engine/ui/gui/presenters/logs.py +58 -0
  151. data_engine/ui/gui/presenters/runtime_projection.py +29 -0
  152. data_engine/ui/gui/presenters/sidebar.py +88 -0
  153. data_engine/ui/gui/presenters/steps.py +148 -0
  154. data_engine/ui/gui/presenters/workspace.py +39 -0
  155. data_engine/ui/gui/presenters/workspace_binding.py +75 -0
  156. data_engine/ui/gui/presenters/workspace_settings.py +182 -0
  157. data_engine/ui/gui/preview_models.py +37 -0
  158. data_engine/ui/gui/render_support.py +241 -0
  159. data_engine/ui/gui/rendering/__init__.py +12 -0
  160. data_engine/ui/gui/rendering/artifacts.py +95 -0
  161. data_engine/ui/gui/rendering/icons.py +50 -0
  162. data_engine/ui/gui/runtime.py +47 -0
  163. data_engine/ui/gui/state_support.py +193 -0
  164. data_engine/ui/gui/support.py +214 -0
  165. data_engine/ui/gui/surface.py +209 -0
  166. data_engine/ui/gui/theme.py +720 -0
  167. data_engine/ui/gui/widgets/__init__.py +34 -0
  168. data_engine/ui/gui/widgets/config.py +41 -0
  169. data_engine/ui/gui/widgets/logs.py +62 -0
  170. data_engine/ui/gui/widgets/panels.py +507 -0
  171. data_engine/ui/gui/widgets/sidebar.py +130 -0
  172. data_engine/ui/gui/widgets/steps.py +84 -0
  173. data_engine/ui/tui/__init__.py +5 -0
  174. data_engine/ui/tui/app.py +222 -0
  175. data_engine/ui/tui/bootstrap.py +475 -0
  176. data_engine/ui/tui/bootstrapper.py +117 -0
  177. data_engine/ui/tui/controllers/__init__.py +6 -0
  178. data_engine/ui/tui/controllers/flows.py +349 -0
  179. data_engine/ui/tui/controllers/runtime.py +167 -0
  180. data_engine/ui/tui/runtime.py +34 -0
  181. data_engine/ui/tui/state_support.py +141 -0
  182. data_engine/ui/tui/support.py +63 -0
  183. data_engine/ui/tui/theme.py +204 -0
  184. data_engine/ui/tui/widgets.py +123 -0
  185. data_engine/views/__init__.py +109 -0
  186. data_engine/views/actions.py +80 -0
  187. data_engine/views/artifacts.py +58 -0
  188. data_engine/views/flow_display.py +69 -0
  189. data_engine/views/logs.py +54 -0
  190. data_engine/views/models.py +96 -0
  191. data_engine/views/presentation.py +133 -0
  192. data_engine/views/runs.py +62 -0
  193. data_engine/views/state.py +39 -0
  194. data_engine/views/status.py +13 -0
  195. data_engine/views/text.py +109 -0
  196. py_data_engine-0.1.0.dist-info/METADATA +330 -0
  197. py_data_engine-0.1.0.dist-info/RECORD +200 -0
  198. py_data_engine-0.1.0.dist-info/WHEEL +5 -0
  199. py_data_engine-0.1.0.dist-info/entry_points.txt +2 -0
  200. py_data_engine-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,17 @@
1
+ """Domain models for source-file freshness and change detection."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+
7
+
8
+ @dataclass(frozen=True)
9
+ class SourceSignature:
10
+ """One concrete source file signature used for freshness checks."""
11
+
12
+ source_path: str
13
+ mtime_ns: int
14
+ size_bytes: int
15
+
16
+
17
+ __all__ = ["SourceSignature"]
@@ -0,0 +1,54 @@
1
+ """Domain models for documentation support state."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, replace
6
+ from pathlib import Path
7
+
8
+
9
+ @dataclass(frozen=True)
10
+ class DocumentationSessionState:
11
+ """Built documentation session state for one operator surface."""
12
+
13
+ build_running: bool = False
14
+ root_dir: Path | None = None
15
+
16
+ @classmethod
17
+ def empty(cls) -> "DocumentationSessionState":
18
+ """Return the idle documentation state."""
19
+ return cls()
20
+
21
+ @property
22
+ def available(self) -> bool:
23
+ """Return whether built documentation is available."""
24
+ return self.root_dir is not None
25
+
26
+ def with_build_running(self, running: bool) -> "DocumentationSessionState":
27
+ """Return a copy with the build-running flag replaced."""
28
+ return replace(self, build_running=bool(running))
29
+
30
+ def with_root_dir(self, root_dir: Path | None) -> "DocumentationSessionState":
31
+ """Return a copy with the built-docs root replaced."""
32
+ return replace(self, root_dir=root_dir)
33
+
34
+
35
+ @dataclass(frozen=True)
36
+ class WorkspaceSupportState:
37
+ """Combined support state for one operator surface."""
38
+
39
+ documentation: DocumentationSessionState
40
+
41
+ @classmethod
42
+ def empty(cls) -> "WorkspaceSupportState":
43
+ """Return the idle workspace-support state."""
44
+ return cls(documentation=DocumentationSessionState.empty())
45
+
46
+ def with_documentation(self, documentation: DocumentationSessionState) -> "WorkspaceSupportState":
47
+ """Return a copy with documentation state replaced."""
48
+ return replace(self, documentation=documentation)
49
+
50
+
51
+ __all__ = [
52
+ "DocumentationSessionState",
53
+ "WorkspaceSupportState",
54
+ ]
@@ -0,0 +1,23 @@
1
+ """Shared UTC timestamp helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from datetime import UTC, datetime
6
+
7
+
8
+ def utcnow_text() -> str:
9
+ """Return the current UTC timestamp in ISO 8601 text form."""
10
+ return datetime.now(UTC).isoformat()
11
+
12
+
13
+ def parse_utc_text(value: str | None) -> datetime | None:
14
+ """Return a parsed UTC datetime for persisted timestamp text."""
15
+ if value in {None, ""}:
16
+ return None
17
+ parsed = datetime.fromisoformat(value)
18
+ if parsed.tzinfo is None:
19
+ return parsed.replace(tzinfo=UTC)
20
+ return parsed.astimezone(UTC)
21
+
22
+
23
+ __all__ = ["parse_utc_text", "utcnow_text"]
@@ -0,0 +1,159 @@
1
+ """Domain models for workspace selection and collection-root state."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, replace
6
+ from pathlib import Path
7
+ from typing import TYPE_CHECKING, Iterable
8
+
9
+ if TYPE_CHECKING:
10
+ from data_engine.platform.workspace_models import WorkspacePaths
11
+
12
+
13
+ @dataclass(frozen=True)
14
+ class WorkspaceRootState:
15
+ """Workspace collection-root state for one operator surface."""
16
+
17
+ effective_root: Path | None
18
+ configured: bool = True
19
+ override_root: Path | None = None
20
+
21
+ @classmethod
22
+ def from_paths(
23
+ cls,
24
+ workspace_paths: "WorkspacePaths",
25
+ *,
26
+ override_root: Path | None = None,
27
+ ) -> "WorkspaceRootState":
28
+ """Build one root-state value from resolved workspace paths and an override."""
29
+ configured = bool(getattr(workspace_paths, "workspace_configured", True))
30
+ return cls(
31
+ effective_root=workspace_paths.workspace_collection_root if configured else None,
32
+ configured=configured,
33
+ override_root=override_root.resolve() if override_root is not None else None,
34
+ )
35
+
36
+ @property
37
+ def using_override(self) -> bool:
38
+ """Return whether a machine-local collection-root override is active."""
39
+ return self.override_root is not None
40
+
41
+ @property
42
+ def input_text(self) -> str:
43
+ """Return the text that should populate workspace-root controls."""
44
+ if self.override_root is not None:
45
+ return str(self.override_root)
46
+ return str(self.effective_root) if self.effective_root is not None else ""
47
+
48
+ @property
49
+ def status_text(self) -> str:
50
+ """Return plain-language root-source status text for operator surfaces."""
51
+ if not self.configured:
52
+ return "Workspace folder is not configured."
53
+ return f"Workspace folder: {self.override_root or self.effective_root}"
54
+
55
+ def with_override_root(self, override_root: Path | None) -> "WorkspaceRootState":
56
+ """Return a copy with the override root replaced."""
57
+ return replace(self, override_root=override_root.resolve() if override_root is not None else None)
58
+
59
+
60
+ @dataclass(frozen=True)
61
+ class WorkspaceSelectionState:
62
+ """Workspace selection/discovery state for one operator surface."""
63
+
64
+ current_workspace_id: str
65
+ discovered_workspace_ids: tuple[str, ...] = ()
66
+
67
+ @classmethod
68
+ def from_paths(
69
+ cls,
70
+ workspace_paths: "WorkspacePaths",
71
+ *,
72
+ discovered_workspace_ids: Iterable[str] = (),
73
+ ) -> "WorkspaceSelectionState":
74
+ """Build one selection state from resolved paths and discovered ids."""
75
+ return cls(
76
+ current_workspace_id=workspace_paths.workspace_id,
77
+ discovered_workspace_ids=tuple(discovered_workspace_ids),
78
+ )
79
+
80
+ @property
81
+ def selector_enabled(self) -> bool:
82
+ """Return whether the workspace selector should be interactive."""
83
+ return bool(self.discovered_workspace_ids)
84
+
85
+ @property
86
+ def selector_options(self) -> tuple[str, ...]:
87
+ """Return selector option ids in display order."""
88
+ return self.discovered_workspace_ids
89
+
90
+ def with_discovered_workspace_ids(self, workspace_ids: Iterable[str]) -> "WorkspaceSelectionState":
91
+ """Return a copy with the discovered workspace ids replaced."""
92
+ return replace(self, discovered_workspace_ids=tuple(workspace_ids))
93
+
94
+ def with_current_workspace_id(self, workspace_id: str) -> "WorkspaceSelectionState":
95
+ """Return a copy with the current selected workspace id replaced."""
96
+ return replace(self, current_workspace_id=workspace_id)
97
+
98
+
99
+ @dataclass(frozen=True)
100
+ class WorkspaceSessionState:
101
+ """Combined workspace selection and collection-root state."""
102
+
103
+ root: WorkspaceRootState
104
+ selection: WorkspaceSelectionState
105
+
106
+ @classmethod
107
+ def from_paths(
108
+ cls,
109
+ workspace_paths: "WorkspacePaths",
110
+ *,
111
+ override_root: Path | None = None,
112
+ discovered_workspace_ids: Iterable[str] = (),
113
+ ) -> "WorkspaceSessionState":
114
+ """Build one workspace-session value from resolved paths and discovery state."""
115
+ return cls(
116
+ root=WorkspaceRootState.from_paths(workspace_paths, override_root=override_root),
117
+ selection=WorkspaceSelectionState.from_paths(
118
+ workspace_paths,
119
+ discovered_workspace_ids=discovered_workspace_ids,
120
+ ),
121
+ )
122
+
123
+ @property
124
+ def workspace_collection_root_override(self) -> Path | None:
125
+ """Return the active machine-local collection-root override, if any."""
126
+ return self.root.override_root
127
+
128
+ @property
129
+ def discovered_workspace_ids(self) -> tuple[str, ...]:
130
+ """Return discovered workspace ids for selector-like surfaces."""
131
+ return self.selection.discovered_workspace_ids
132
+
133
+ @property
134
+ def current_workspace_id(self) -> str:
135
+ """Return the currently selected workspace id."""
136
+ return self.selection.current_workspace_id
137
+
138
+ def with_paths(self, workspace_paths: "WorkspacePaths") -> "WorkspaceSessionState":
139
+ """Return a copy rebound to a new resolved workspace path set."""
140
+ return type(self).from_paths(
141
+ workspace_paths,
142
+ override_root=self.root.override_root,
143
+ discovered_workspace_ids=self.selection.discovered_workspace_ids,
144
+ )
145
+
146
+ def with_override_root(self, override_root: Path | None) -> "WorkspaceSessionState":
147
+ """Return a copy with the override root replaced."""
148
+ return replace(self, root=self.root.with_override_root(override_root))
149
+
150
+ def with_discovered_workspace_ids(self, workspace_ids: Iterable[str]) -> "WorkspaceSessionState":
151
+ """Return a copy with the discovered workspace ids replaced."""
152
+ return replace(self, selection=self.selection.with_discovered_workspace_ids(workspace_ids))
153
+
154
+ def with_current_workspace_id(self, workspace_id: str) -> "WorkspaceSessionState":
155
+ """Return a copy with the current workspace id replaced."""
156
+ return replace(self, selection=self.selection.with_current_workspace_id(workspace_id))
157
+
158
+
159
+ __all__ = ["WorkspaceRootState", "WorkspaceSelectionState", "WorkspaceSessionState"]
@@ -0,0 +1 @@
1
+ """Flow-module discovery and compilation helpers."""
@@ -0,0 +1,179 @@
1
+ """Flow-module compilation and mirroring for Data Engine modules."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ import json
7
+ from pathlib import Path
8
+ import shutil
9
+
10
+ from data_engine.authoring.model import FlowValidationError
11
+ from data_engine.platform.workspace_models import WORKSPACE_FLOW_HELPERS_DIR_NAME
12
+ from data_engine.platform.workspace_policy import RuntimeLayoutPolicy
13
+
14
+
15
+ @dataclass(frozen=True)
16
+ class CompiledFlowModule:
17
+ """Information about one compiled flow module."""
18
+
19
+ name: str
20
+ source_path: Path
21
+ module_path: Path
22
+
23
+
24
+ def compile_stale_flow_module_notebooks(
25
+ *,
26
+ data_root: Path | None = None,
27
+ ) -> tuple[CompiledFlowModule, ...]:
28
+ """Compile notebook flow modules and mirror authored Python modules into compiled output."""
29
+ flow_modules_dir, modules_dir = resolve_flow_module_paths(
30
+ data_root=data_root,
31
+ )
32
+ modules_dir.mkdir(parents=True, exist_ok=True)
33
+
34
+ if not flow_modules_dir.exists():
35
+ return ()
36
+
37
+ notebook_paths = sorted(flow_modules_dir.glob("*.ipynb"))
38
+ python_paths = sorted(path for path in flow_modules_dir.glob("*.py") if path.name != "__init__.py")
39
+ _validate_unique_authored_flow_module_stems(notebook_paths, python_paths)
40
+
41
+ helper_modules_dir = flow_modules_dir / WORKSPACE_FLOW_HELPERS_DIR_NAME
42
+ compiled_helper_modules_dir = modules_dir / WORKSPACE_FLOW_HELPERS_DIR_NAME
43
+ authored_names = {path.stem for path in notebook_paths} | {path.stem for path in python_paths}
44
+ _remove_orphaned_compiled_modules(modules_dir, authored_names)
45
+ _mirror_helper_modules(helper_modules_dir, compiled_helper_modules_dir)
46
+
47
+ compiled: list[CompiledFlowModule] = []
48
+ for notebook_path in notebook_paths:
49
+ module_path = modules_dir / f"{notebook_path.stem}.py"
50
+ if module_path.exists() and module_path.stat().st_mtime >= notebook_path.stat().st_mtime:
51
+ continue
52
+ compile_flow_module_notebook(notebook_path, module_path)
53
+ compiled.append(CompiledFlowModule(name=notebook_path.stem, source_path=notebook_path, module_path=module_path))
54
+ for source_path in python_paths:
55
+ module_path = modules_dir / source_path.name
56
+ if module_path.exists() and module_path.stat().st_mtime >= source_path.stat().st_mtime:
57
+ continue
58
+ mirror_flow_module_python_module(source_path, module_path)
59
+ compiled.append(CompiledFlowModule(name=source_path.stem, source_path=source_path, module_path=module_path))
60
+ return tuple(compiled)
61
+
62
+
63
+ def resolve_flow_module_paths(
64
+ *,
65
+ data_root: Path | None = None,
66
+ ) -> tuple[Path, Path]:
67
+ """Resolve the authored flow-module and compiled output directories."""
68
+ workspace = RuntimeLayoutPolicy().resolve_paths(data_root=data_root)
69
+ return workspace.flow_modules_dir, workspace.compiled_flow_modules_dir
70
+
71
+
72
+ def compile_flow_module_notebook(notebook_path: Path, module_path: Path) -> None:
73
+ """Compile one notebook-authored flow module into a Python module."""
74
+ payload = json.loads(notebook_path.read_text(encoding="utf-8"))
75
+ cells = payload.get("cells")
76
+ if not isinstance(cells, list):
77
+ raise FlowValidationError(f"Notebook cells payload is invalid in {notebook_path}")
78
+
79
+ code_blocks: list[str] = []
80
+ for cell in cells:
81
+ if not isinstance(cell, dict) or cell.get("cell_type") != "code":
82
+ continue
83
+ source = cell.get("source", [])
84
+ if isinstance(source, str):
85
+ text = source
86
+ elif isinstance(source, list) and all(isinstance(line, str) for line in source):
87
+ text = "".join(source)
88
+ else:
89
+ raise FlowValidationError(f"Notebook code cell source is invalid in {notebook_path}")
90
+ stripped = text.strip()
91
+ if not stripped:
92
+ continue
93
+ for line in stripped.splitlines():
94
+ if line.lstrip().startswith("%") or line.lstrip().startswith("!"):
95
+ raise FlowValidationError(f"Notebook magics and shell commands are not allowed in {notebook_path}")
96
+ code_blocks.append(stripped)
97
+
98
+ if not code_blocks:
99
+ raise FlowValidationError(f"Notebook does not contain any code cells to compile: {notebook_path}")
100
+
101
+ rendered = [
102
+ '"""Auto-compiled flow module. Source notebook is authoritative."""',
103
+ "",
104
+ "from __future__ import annotations",
105
+ "",
106
+ f"# Source notebook: {notebook_path.as_posix()}",
107
+ "",
108
+ ]
109
+ rendered.append("\n\n".join(code_blocks))
110
+ rendered.append("")
111
+
112
+ module_path.parent.mkdir(parents=True, exist_ok=True)
113
+ module_path.write_text("\n".join(rendered), encoding="utf-8")
114
+
115
+
116
+ def mirror_flow_module_python_module(source_path: Path, module_path: Path) -> None:
117
+ """Mirror one authored Python flow/helper module into compiled output."""
118
+ source_text = source_path.read_text(encoding="utf-8")
119
+ rendered = [
120
+ f"# Mirrored flow module. Source file is authoritative: {source_path.as_posix()}",
121
+ "",
122
+ source_text.rstrip(),
123
+ "",
124
+ ]
125
+ module_path.parent.mkdir(parents=True, exist_ok=True)
126
+ module_path.write_text("\n".join(rendered), encoding="utf-8")
127
+
128
+
129
+ def _mirror_helper_modules(helper_modules_dir: Path, compiled_helper_modules_dir: Path) -> None:
130
+ """Mirror authored helper modules into compiled output as an importable package."""
131
+ if compiled_helper_modules_dir.exists():
132
+ shutil.rmtree(compiled_helper_modules_dir)
133
+ if not helper_modules_dir.is_dir():
134
+ return
135
+ shutil.copytree(helper_modules_dir, compiled_helper_modules_dir)
136
+ init_path = compiled_helper_modules_dir / "__init__.py"
137
+ if not init_path.exists():
138
+ init_path.write_text('"""Authored flow helper modules for flow-module imports."""\n', encoding="utf-8")
139
+
140
+
141
+ def _validate_unique_authored_flow_module_stems(notebook_paths: list[Path], python_paths: list[Path]) -> None:
142
+ """Reject authored flow-module directories that define the same module stem twice."""
143
+ notebook_stems = {path.stem for path in notebook_paths}
144
+ python_stems = {path.stem for path in python_paths}
145
+ overlaps = sorted(notebook_stems & python_stems)
146
+ if overlaps:
147
+ names = ", ".join(overlaps)
148
+ raise FlowValidationError(f"Flow module sources conflict between .ipynb and .py files: {names}")
149
+
150
+
151
+ def _remove_orphaned_compiled_modules(modules_dir: Path, authored_names: set[str]) -> None:
152
+ """Delete generated modules and caches that no longer have a notebook source."""
153
+ for module_path in modules_dir.glob("*.py"):
154
+ if module_path.name == "__init__.py" or module_path.stem.startswith("_"):
155
+ continue
156
+ if module_path.stem not in authored_names and _is_generated_module(module_path):
157
+ module_path.unlink()
158
+
159
+ pycache_dir = modules_dir / "__pycache__"
160
+ if pycache_dir.exists():
161
+ shutil.rmtree(pycache_dir)
162
+
163
+
164
+ def _is_generated_module(module_path: Path) -> bool:
165
+ """Return whether a compiled module was generated or mirrored from authored sources."""
166
+ try:
167
+ first_line = module_path.read_text(encoding="utf-8").splitlines()[0]
168
+ except (FileNotFoundError, IndexError):
169
+ return False
170
+ return "Auto-compiled flow module" in first_line or "Mirrored flow module" in first_line
171
+
172
+
173
+ __all__ = [
174
+ "CompiledFlowModule",
175
+ "compile_flow_module_notebook",
176
+ "compile_stale_flow_module_notebooks",
177
+ "mirror_flow_module_python_module",
178
+ "resolve_flow_module_paths",
179
+ ]
@@ -0,0 +1,201 @@
1
+ """Code-native flow-module discovery and loading for Data Engine flows."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from contextlib import contextmanager
7
+ from contextvars import ContextVar
8
+ from importlib.util import module_from_spec, spec_from_file_location
9
+ import inspect
10
+ from pathlib import Path
11
+ import sys
12
+ from typing import TYPE_CHECKING, Callable
13
+
14
+ from data_engine.authoring.model import FlowExecutionError, FlowValidationError
15
+ from data_engine.flow_modules.flow_module_compiler import compile_stale_flow_module_notebooks, resolve_flow_module_paths
16
+ from data_engine.platform.workspace_models import APP_INTERNAL_ID
17
+
18
+ if TYPE_CHECKING:
19
+ from data_engine.authoring.builder import Flow
20
+
21
+
22
+ _COMPILED_FLOW_MODULE_CONTEXT: ContextVar[bool] = ContextVar("compiled_flow_module_context", default=False)
23
+ _COMPILED_FLOW_MODULE_DIR: ContextVar[Path | None] = ContextVar("compiled_flow_module_dir", default=None)
24
+
25
+
26
+ @dataclass(frozen=True)
27
+ class FlowModuleDefinition:
28
+ """Loaded flow-module callable plus optional UI metadata."""
29
+
30
+ name: str
31
+ description: str | None
32
+ module_path: Path
33
+ build: Callable[[], "Flow"]
34
+
35
+
36
+ def _load_module(name: str, *, data_root: Path | None = None):
37
+ compile_stale_flow_module_notebooks(data_root=data_root)
38
+ flow_modules_dir, compiled_flow_modules_dir = resolve_flow_module_paths(data_root=data_root)
39
+ module_path = compiled_flow_modules_dir / f"{name}.py"
40
+ if not module_path.exists():
41
+ source_path = _authored_flow_module_source_path(name, flow_modules_dir=flow_modules_dir)
42
+ if source_path is not None:
43
+ raise FlowValidationError(
44
+ f"Flow module {name!r} could not be compiled from {source_path}. No compiled module was produced."
45
+ )
46
+ available = _available_flow_module_names(flow_modules_dir=flow_modules_dir)
47
+ if available:
48
+ names_text = ", ".join(available)
49
+ raise FlowValidationError(
50
+ f"Flow module {name!r} is not available in {flow_modules_dir}. Available flow modules: {names_text}."
51
+ )
52
+ raise FlowValidationError(f"Flow module {name!r} is not available in {flow_modules_dir}.")
53
+
54
+ module_name = f"{APP_INTERNAL_ID}_user_flow_module_{name}"
55
+ try:
56
+ spec = spec_from_file_location(module_name, module_path)
57
+ if spec is None or spec.loader is None:
58
+ raise FlowValidationError(f"Flow module {name!r} could not be loaded from {module_path}.")
59
+ module = module_from_spec(spec)
60
+ with compiled_flow_module_context(flow_modules_dir), _compiled_flow_module_import_path(module_path.parent):
61
+ spec.loader.exec_module(module)
62
+ except FlowValidationError:
63
+ raise
64
+ except Exception as exc:
65
+ raise FlowExecutionError(
66
+ flow_name=name,
67
+ phase="import",
68
+ detail=f"{type(exc).__name__}: {exc} ({module_path})",
69
+ ) from exc
70
+
71
+ return module, module_path, flow_modules_dir
72
+
73
+
74
+ def _authored_flow_module_source_path(name: str, *, flow_modules_dir: Path) -> Path | None:
75
+ """Return the authored notebook or Python source path for one flow module when present."""
76
+ for suffix in (".py", ".ipynb"):
77
+ source_path = flow_modules_dir / f"{name}{suffix}"
78
+ if source_path.exists():
79
+ return source_path
80
+ return None
81
+
82
+
83
+ def _available_flow_module_names(*, flow_modules_dir: Path) -> tuple[str, ...]:
84
+ """Return the authored flow-module names currently present in one workspace."""
85
+ names = {
86
+ path.stem
87
+ for pattern in ("*.py", "*.ipynb")
88
+ for path in flow_modules_dir.glob(pattern)
89
+ if path.name != "__init__.py" and not path.stem.startswith("_")
90
+ }
91
+ return tuple(sorted(names))
92
+
93
+
94
+ @contextmanager
95
+ def _compiled_flow_module_import_path(compiled_flow_modules_dir: Path):
96
+ """Temporarily expose the compiled flow-module directory for sibling helper imports."""
97
+ path_text = str(compiled_flow_modules_dir)
98
+ inserted = False
99
+ if path_text not in sys.path:
100
+ sys.path.insert(0, path_text)
101
+ inserted = True
102
+ try:
103
+ yield
104
+ finally:
105
+ if inserted:
106
+ try:
107
+ sys.path.remove(path_text)
108
+ except ValueError:
109
+ pass
110
+
111
+
112
+ def load_flow_module_definition(name: str, *, data_root: Path | None = None) -> FlowModuleDefinition:
113
+ """Load one compiled flow-module definition by module name."""
114
+ module, module_path, flow_modules_dir = _load_module(name, data_root=data_root)
115
+
116
+ build = getattr(module, "build", None)
117
+ if build is None or not callable(build):
118
+ raise FlowValidationError(f"Flow module {name!r} does not export a callable build().")
119
+
120
+ signature = inspect.signature(build)
121
+ if len(signature.parameters) != 0:
122
+ raise FlowValidationError(f"Flow module {name!r} build() must not accept any parameters.")
123
+
124
+ description = getattr(module, "DESCRIPTION", None)
125
+ if description is not None and not isinstance(description, str):
126
+ raise FlowValidationError(f"Flow module {name!r} DESCRIPTION must be a string.")
127
+
128
+ def guarded_build() -> "Flow":
129
+ with compiled_flow_module_context(flow_modules_dir):
130
+ from data_engine.authoring.builder import Flow
131
+
132
+ try:
133
+ built = build()
134
+ except FlowValidationError:
135
+ raise
136
+ except Exception as exc:
137
+ raise FlowExecutionError(
138
+ flow_name=name,
139
+ phase="build",
140
+ function_name=getattr(build, "__name__", "build"),
141
+ detail=f"{type(exc).__name__}: {exc}",
142
+ ) from exc
143
+ if not isinstance(built, Flow):
144
+ raise FlowValidationError(f"Flow module {name!r} build() did not return a Flow.")
145
+ return built._clone(name=name, _workspace_root=flow_modules_dir.parent.resolve())
146
+
147
+ return FlowModuleDefinition(
148
+ name=name,
149
+ description=description,
150
+ module_path=module_path,
151
+ build=guarded_build,
152
+ )
153
+
154
+
155
+ def discover_flow_module_definitions(*, data_root: Path | None = None) -> tuple[FlowModuleDefinition, ...]:
156
+ """Discover and load all compiled flow-module definitions from the workspace."""
157
+ compile_stale_flow_module_notebooks(data_root=data_root)
158
+ flow_modules_dir, compiled_flow_modules_dir = resolve_flow_module_paths(data_root=data_root)
159
+ if not flow_modules_dir.is_dir():
160
+ return ()
161
+ if not compiled_flow_modules_dir.exists():
162
+ return ()
163
+
164
+ discovered: list[FlowModuleDefinition] = []
165
+ for module_path in sorted(compiled_flow_modules_dir.glob("*.py")):
166
+ if module_path.name == "__init__.py" or module_path.stem.startswith("_"):
167
+ continue
168
+ discovered.append(load_flow_module_definition(module_path.stem, data_root=data_root))
169
+ return tuple(discovered)
170
+
171
+
172
+ def in_compiled_flow_module_context() -> bool:
173
+ """Return whether execution is currently inside a compiled flow-module context."""
174
+ return _COMPILED_FLOW_MODULE_CONTEXT.get()
175
+
176
+
177
+ def current_compiled_flow_module_dir() -> Path | None:
178
+ """Return the compiled flow-module directory active for the current import/build context."""
179
+ return _COMPILED_FLOW_MODULE_DIR.get()
180
+
181
+
182
+ @contextmanager
183
+ def compiled_flow_module_context(flow_modules_dir: Path | None = None):
184
+ """Mark the current execution context as a compiled flow-module import/build."""
185
+ token = _COMPILED_FLOW_MODULE_CONTEXT.set(True)
186
+ dir_token = _COMPILED_FLOW_MODULE_DIR.set(flow_modules_dir.resolve() if flow_modules_dir is not None else None)
187
+ try:
188
+ yield
189
+ finally:
190
+ _COMPILED_FLOW_MODULE_DIR.reset(dir_token)
191
+ _COMPILED_FLOW_MODULE_CONTEXT.reset(token)
192
+
193
+
194
+ __all__ = [
195
+ "FlowModuleDefinition",
196
+ "compiled_flow_module_context",
197
+ "current_compiled_flow_module_dir",
198
+ "discover_flow_module_definitions",
199
+ "in_compiled_flow_module_context",
200
+ "load_flow_module_definition",
201
+ ]
@@ -0,0 +1,25 @@
1
+ """Public authoring helper modules."""
2
+
3
+ from data_engine.helpers.duckdb import attach_dimension
4
+ from data_engine.helpers.duckdb import build_dimension
5
+ from data_engine.helpers.duckdb import denormalize_columns
6
+ from data_engine.helpers.duckdb import normalize_columns
7
+ from data_engine.helpers.duckdb import read_rows_by_values
8
+ from data_engine.helpers.duckdb import read_sql
9
+ from data_engine.helpers.duckdb import read_table
10
+ from data_engine.helpers.duckdb import replace_rows_by_file
11
+ from data_engine.helpers.duckdb import replace_rows_by_values
12
+ from data_engine.helpers.duckdb import replace_table
13
+
14
+ __all__ = [
15
+ "attach_dimension",
16
+ "build_dimension",
17
+ "denormalize_columns",
18
+ "normalize_columns",
19
+ "read_rows_by_values",
20
+ "read_sql",
21
+ "read_table",
22
+ "replace_rows_by_file",
23
+ "replace_rows_by_values",
24
+ "replace_table",
25
+ ]