py-data-engine 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. data_engine/__init__.py +37 -0
  2. data_engine/application/__init__.py +39 -0
  3. data_engine/application/actions.py +42 -0
  4. data_engine/application/catalog.py +151 -0
  5. data_engine/application/control.py +213 -0
  6. data_engine/application/details.py +73 -0
  7. data_engine/application/runtime.py +449 -0
  8. data_engine/application/workspace.py +62 -0
  9. data_engine/authoring/__init__.py +14 -0
  10. data_engine/authoring/builder.py +31 -0
  11. data_engine/authoring/execution/__init__.py +6 -0
  12. data_engine/authoring/execution/app.py +6 -0
  13. data_engine/authoring/execution/context.py +82 -0
  14. data_engine/authoring/execution/continuous.py +176 -0
  15. data_engine/authoring/execution/grouped.py +106 -0
  16. data_engine/authoring/execution/logging.py +83 -0
  17. data_engine/authoring/execution/polling.py +135 -0
  18. data_engine/authoring/execution/runner.py +210 -0
  19. data_engine/authoring/execution/single.py +171 -0
  20. data_engine/authoring/flow.py +361 -0
  21. data_engine/authoring/helpers.py +160 -0
  22. data_engine/authoring/model.py +59 -0
  23. data_engine/authoring/primitives.py +430 -0
  24. data_engine/authoring/services.py +42 -0
  25. data_engine/devtools/__init__.py +3 -0
  26. data_engine/devtools/project_ast_map.py +503 -0
  27. data_engine/docs/__init__.py +1 -0
  28. data_engine/docs/sphinx_source/_static/custom.css +13 -0
  29. data_engine/docs/sphinx_source/api.rst +42 -0
  30. data_engine/docs/sphinx_source/conf.py +37 -0
  31. data_engine/docs/sphinx_source/guides/app-runtime-and-workspaces.md +397 -0
  32. data_engine/docs/sphinx_source/guides/authoring-flow-modules.md +215 -0
  33. data_engine/docs/sphinx_source/guides/configuring-flows.md +185 -0
  34. data_engine/docs/sphinx_source/guides/core-concepts.md +208 -0
  35. data_engine/docs/sphinx_source/guides/database-methods.md +107 -0
  36. data_engine/docs/sphinx_source/guides/duckdb-helpers.md +462 -0
  37. data_engine/docs/sphinx_source/guides/flow-context.md +538 -0
  38. data_engine/docs/sphinx_source/guides/flow-methods.md +206 -0
  39. data_engine/docs/sphinx_source/guides/getting-started.md +271 -0
  40. data_engine/docs/sphinx_source/guides/project-inventory.md +5683 -0
  41. data_engine/docs/sphinx_source/guides/project-map.md +118 -0
  42. data_engine/docs/sphinx_source/guides/recipes.md +268 -0
  43. data_engine/docs/sphinx_source/index.rst +22 -0
  44. data_engine/domain/__init__.py +92 -0
  45. data_engine/domain/actions.py +69 -0
  46. data_engine/domain/catalog.py +128 -0
  47. data_engine/domain/details.py +214 -0
  48. data_engine/domain/diagnostics.py +56 -0
  49. data_engine/domain/errors.py +104 -0
  50. data_engine/domain/inspection.py +99 -0
  51. data_engine/domain/logs.py +118 -0
  52. data_engine/domain/operations.py +172 -0
  53. data_engine/domain/operator.py +72 -0
  54. data_engine/domain/runs.py +155 -0
  55. data_engine/domain/runtime.py +279 -0
  56. data_engine/domain/source_state.py +17 -0
  57. data_engine/domain/support.py +54 -0
  58. data_engine/domain/time.py +23 -0
  59. data_engine/domain/workspace.py +159 -0
  60. data_engine/flow_modules/__init__.py +1 -0
  61. data_engine/flow_modules/flow_module_compiler.py +179 -0
  62. data_engine/flow_modules/flow_module_loader.py +201 -0
  63. data_engine/helpers/__init__.py +25 -0
  64. data_engine/helpers/duckdb.py +705 -0
  65. data_engine/hosts/__init__.py +1 -0
  66. data_engine/hosts/daemon/__init__.py +23 -0
  67. data_engine/hosts/daemon/app.py +221 -0
  68. data_engine/hosts/daemon/bootstrap.py +69 -0
  69. data_engine/hosts/daemon/client.py +465 -0
  70. data_engine/hosts/daemon/commands.py +64 -0
  71. data_engine/hosts/daemon/composition.py +310 -0
  72. data_engine/hosts/daemon/constants.py +15 -0
  73. data_engine/hosts/daemon/entrypoints.py +97 -0
  74. data_engine/hosts/daemon/lifecycle.py +191 -0
  75. data_engine/hosts/daemon/manager.py +272 -0
  76. data_engine/hosts/daemon/ownership.py +126 -0
  77. data_engine/hosts/daemon/runtime_commands.py +188 -0
  78. data_engine/hosts/daemon/runtime_control.py +31 -0
  79. data_engine/hosts/daemon/server.py +84 -0
  80. data_engine/hosts/daemon/shared_state.py +147 -0
  81. data_engine/hosts/daemon/state_sync.py +101 -0
  82. data_engine/platform/__init__.py +1 -0
  83. data_engine/platform/identity.py +35 -0
  84. data_engine/platform/local_settings.py +146 -0
  85. data_engine/platform/theme.py +259 -0
  86. data_engine/platform/workspace_models.py +190 -0
  87. data_engine/platform/workspace_policy.py +333 -0
  88. data_engine/runtime/__init__.py +1 -0
  89. data_engine/runtime/file_watch.py +185 -0
  90. data_engine/runtime/ledger_models.py +116 -0
  91. data_engine/runtime/runtime_db.py +938 -0
  92. data_engine/runtime/shared_state.py +523 -0
  93. data_engine/services/__init__.py +49 -0
  94. data_engine/services/daemon.py +64 -0
  95. data_engine/services/daemon_state.py +40 -0
  96. data_engine/services/flow_catalog.py +102 -0
  97. data_engine/services/flow_execution.py +48 -0
  98. data_engine/services/ledger.py +85 -0
  99. data_engine/services/logs.py +65 -0
  100. data_engine/services/runtime_binding.py +105 -0
  101. data_engine/services/runtime_execution.py +126 -0
  102. data_engine/services/runtime_history.py +62 -0
  103. data_engine/services/settings.py +58 -0
  104. data_engine/services/shared_state.py +28 -0
  105. data_engine/services/theme.py +59 -0
  106. data_engine/services/workspace_provisioning.py +224 -0
  107. data_engine/services/workspaces.py +74 -0
  108. data_engine/ui/__init__.py +3 -0
  109. data_engine/ui/cli/__init__.py +19 -0
  110. data_engine/ui/cli/app.py +161 -0
  111. data_engine/ui/cli/commands_doctor.py +178 -0
  112. data_engine/ui/cli/commands_run.py +80 -0
  113. data_engine/ui/cli/commands_start.py +100 -0
  114. data_engine/ui/cli/commands_workspace.py +97 -0
  115. data_engine/ui/cli/dependencies.py +44 -0
  116. data_engine/ui/cli/parser.py +56 -0
  117. data_engine/ui/gui/__init__.py +25 -0
  118. data_engine/ui/gui/app.py +116 -0
  119. data_engine/ui/gui/bootstrap.py +487 -0
  120. data_engine/ui/gui/bootstrapper.py +140 -0
  121. data_engine/ui/gui/cache_models.py +23 -0
  122. data_engine/ui/gui/control_support.py +185 -0
  123. data_engine/ui/gui/controllers/__init__.py +6 -0
  124. data_engine/ui/gui/controllers/flows.py +439 -0
  125. data_engine/ui/gui/controllers/runtime.py +245 -0
  126. data_engine/ui/gui/dialogs/__init__.py +12 -0
  127. data_engine/ui/gui/dialogs/messages.py +88 -0
  128. data_engine/ui/gui/dialogs/previews.py +222 -0
  129. data_engine/ui/gui/helpers/__init__.py +62 -0
  130. data_engine/ui/gui/helpers/inspection.py +81 -0
  131. data_engine/ui/gui/helpers/lifecycle.py +112 -0
  132. data_engine/ui/gui/helpers/scroll.py +28 -0
  133. data_engine/ui/gui/helpers/theming.py +87 -0
  134. data_engine/ui/gui/icons/dark_light.svg +12 -0
  135. data_engine/ui/gui/icons/documentation.svg +1 -0
  136. data_engine/ui/gui/icons/failed.svg +3 -0
  137. data_engine/ui/gui/icons/group.svg +4 -0
  138. data_engine/ui/gui/icons/home.svg +2 -0
  139. data_engine/ui/gui/icons/manual.svg +2 -0
  140. data_engine/ui/gui/icons/poll.svg +2 -0
  141. data_engine/ui/gui/icons/schedule.svg +4 -0
  142. data_engine/ui/gui/icons/settings.svg +2 -0
  143. data_engine/ui/gui/icons/started.svg +3 -0
  144. data_engine/ui/gui/icons/success.svg +3 -0
  145. data_engine/ui/gui/icons/view-log.svg +3 -0
  146. data_engine/ui/gui/icons.py +50 -0
  147. data_engine/ui/gui/launcher.py +48 -0
  148. data_engine/ui/gui/presenters/__init__.py +72 -0
  149. data_engine/ui/gui/presenters/docs.py +140 -0
  150. data_engine/ui/gui/presenters/logs.py +58 -0
  151. data_engine/ui/gui/presenters/runtime_projection.py +29 -0
  152. data_engine/ui/gui/presenters/sidebar.py +88 -0
  153. data_engine/ui/gui/presenters/steps.py +148 -0
  154. data_engine/ui/gui/presenters/workspace.py +39 -0
  155. data_engine/ui/gui/presenters/workspace_binding.py +75 -0
  156. data_engine/ui/gui/presenters/workspace_settings.py +182 -0
  157. data_engine/ui/gui/preview_models.py +37 -0
  158. data_engine/ui/gui/render_support.py +241 -0
  159. data_engine/ui/gui/rendering/__init__.py +12 -0
  160. data_engine/ui/gui/rendering/artifacts.py +95 -0
  161. data_engine/ui/gui/rendering/icons.py +50 -0
  162. data_engine/ui/gui/runtime.py +47 -0
  163. data_engine/ui/gui/state_support.py +193 -0
  164. data_engine/ui/gui/support.py +214 -0
  165. data_engine/ui/gui/surface.py +209 -0
  166. data_engine/ui/gui/theme.py +720 -0
  167. data_engine/ui/gui/widgets/__init__.py +34 -0
  168. data_engine/ui/gui/widgets/config.py +41 -0
  169. data_engine/ui/gui/widgets/logs.py +62 -0
  170. data_engine/ui/gui/widgets/panels.py +507 -0
  171. data_engine/ui/gui/widgets/sidebar.py +130 -0
  172. data_engine/ui/gui/widgets/steps.py +84 -0
  173. data_engine/ui/tui/__init__.py +5 -0
  174. data_engine/ui/tui/app.py +222 -0
  175. data_engine/ui/tui/bootstrap.py +475 -0
  176. data_engine/ui/tui/bootstrapper.py +117 -0
  177. data_engine/ui/tui/controllers/__init__.py +6 -0
  178. data_engine/ui/tui/controllers/flows.py +349 -0
  179. data_engine/ui/tui/controllers/runtime.py +167 -0
  180. data_engine/ui/tui/runtime.py +34 -0
  181. data_engine/ui/tui/state_support.py +141 -0
  182. data_engine/ui/tui/support.py +63 -0
  183. data_engine/ui/tui/theme.py +204 -0
  184. data_engine/ui/tui/widgets.py +123 -0
  185. data_engine/views/__init__.py +109 -0
  186. data_engine/views/actions.py +80 -0
  187. data_engine/views/artifacts.py +58 -0
  188. data_engine/views/flow_display.py +69 -0
  189. data_engine/views/logs.py +54 -0
  190. data_engine/views/models.py +96 -0
  191. data_engine/views/presentation.py +133 -0
  192. data_engine/views/runs.py +62 -0
  193. data_engine/views/state.py +39 -0
  194. data_engine/views/status.py +13 -0
  195. data_engine/views/text.py +109 -0
  196. py_data_engine-0.1.0.dist-info/METADATA +330 -0
  197. py_data_engine-0.1.0.dist-info/RECORD +200 -0
  198. py_data_engine-0.1.0.dist-info/WHEEL +5 -0
  199. py_data_engine-0.1.0.dist-info/entry_points.txt +2 -0
  200. py_data_engine-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,118 @@
1
+ # Project Map
2
+
3
+ This page is a small structural map of the current `data_engine` package, based on the AST mapper in `src/data_engine/devtools/project_ast_map.py`.
4
+
5
+ It is meant to answer:
6
+
7
+ - where the codebase is heaviest
8
+ - how the main packages are divided
9
+ - which modules are acting like stitching points
10
+
11
+ This is not a hand-wavy architecture diagram. It is a lightweight snapshot derived from the current Python source tree.
12
+
13
+ ## Regenerating the map
14
+
15
+ The source for this page comes from:
16
+
17
+ ```bash
18
+ python -m data_engine.devtools.project_ast_map \
19
+ src/data_engine
20
+ ```
21
+
22
+ If the package structure changes substantially, this page should be refreshed.
23
+
24
+ ## Package Rollup
25
+
26
+ These counts are package-level rollups from the current AST snapshot.
27
+
28
+ | Package | Modules | Functions | Classes | Flows | Lines |
29
+ | --- | ---: | ---: | ---: | ---: | ---: |
30
+ | `data_engine` | 1 | 1 | 0 | 0 | 37 |
31
+ | `data_engine.application` | 7 | 5 | 19 | 0 | 1029 |
32
+ | `data_engine.authoring` | 16 | 20 | 24 | 0 | 2072 |
33
+ | `data_engine.devtools` | 2 | 12 | 5 | 0 | 360 |
34
+ | `data_engine.docs` | 2 | 0 | 0 | 0 | 38 |
35
+ | `data_engine.domain` | 16 | 9 | 40 | 0 | 1811 |
36
+ | `data_engine.flow_modules` | 3 | 17 | 2 | 0 | 381 |
37
+ | `data_engine.helpers` | 2 | 20 | 0 | 0 | 662 |
38
+ | `data_engine.hosts` | 17 | 55 | 14 | 0 | 2383 |
39
+ | `data_engine.platform` | 6 | 20 | 9 | 0 | 933 |
40
+ | `data_engine.runtime` | 5 | 37 | 9 | 0 | 1763 |
41
+ | `data_engine.services` | 15 | 13 | 16 | 0 | 1189 |
42
+ | `data_engine.ui` | 65 | 174 | 35 | 0 | 8607 |
43
+ | `data_engine.views` | 11 | 29 | 9 | 0 | 822 |
44
+
45
+ ## How To Read It
46
+
47
+ The package split currently looks like this:
48
+
49
+ - `data_engine.ui` is by far the largest surface. That is expected because it includes both the Qt GUI and the TUI, plus their presenters, controllers, widgets, dialogs, rendering helpers, and bootstrapping.
50
+ - `data_engine.hosts`, `data_engine.runtime`, and `data_engine.application` are the runtime control spine. That is where daemon orchestration, runtime state, and host-agnostic application use cases live.
51
+ - `data_engine.authoring`, `data_engine.helpers`, and `data_engine.flow_modules` are the flow-authoring side of the package.
52
+ - `data_engine.domain`, `data_engine.platform`, `data_engine.services`, and `data_engine.views` are the supporting layers that hold shared models, path policy, services, and rendering/state helpers.
53
+
54
+ That means the current codebase is not “all runtime” or “all UI.” It is a UI-heavy operator product built on a fairly distinct runtime and authoring core.
55
+
56
+ ## Largest Modules
57
+
58
+ The largest modules in the current tree are:
59
+
60
+ | Module | Lines | Functions | Classes |
61
+ | --- | ---: | ---: | ---: |
62
+ | `data_engine.runtime.runtime_db` | 938 | 0 | 1 |
63
+ | `data_engine.ui.gui.theme` | 720 | 1 | 0 |
64
+ | `data_engine.helpers.duckdb` | 639 | 20 | 0 |
65
+ | `data_engine.runtime.shared_state` | 523 | 30 | 0 |
66
+ | `data_engine.ui.gui.widgets.panels` | 507 | 12 | 0 |
67
+ | `data_engine.ui.gui.bootstrap` | 487 | 6 | 2 |
68
+ | `data_engine.ui.tui.bootstrap` | 475 | 6 | 2 |
69
+ | `data_engine.hosts.daemon.client` | 465 | 26 | 2 |
70
+ | `data_engine.application.runtime` | 449 | 4 | 8 |
71
+ | `data_engine.ui.gui.controllers.flows` | 439 | 0 | 3 |
72
+
73
+ ### What jumps out
74
+
75
+ - `runtime_db` is the densest persistence hotspot.
76
+ - `helpers.duckdb` has already become a meaningful public convenience layer.
77
+ - `ui.gui.theme` is large in a very different way: it is styling density, not orchestration density.
78
+ - GUI and TUI bootstraps are both sizable, which means the app has two real presentation surfaces, not one thin shell around the other.
79
+
80
+ ## Internal Stitching Points
81
+
82
+ The AST map also highlights modules with the most internal import fan-out. These tend to be the places where many parts of the system are assembled together.
83
+
84
+ | Module | Internal Imports | Lines |
85
+ | --- | ---: | ---: |
86
+ | `data_engine.domain` | 65 | 92 |
87
+ | `data_engine.views` | 61 | 109 |
88
+ | `data_engine.hosts.daemon.app` | 45 | 199 |
89
+ | `data_engine.ui.gui.render_support` | 43 | 241 |
90
+ | `data_engine.ui.gui.bootstrap` | 40 | 487 |
91
+ | `data_engine.ui.tui.bootstrap` | 39 | 475 |
92
+ | `data_engine.ui.gui.presenters` | 39 | 72 |
93
+ | `data_engine.platform.workspace_policy` | 35 | 302 |
94
+ | `data_engine.ui.gui.helpers` | 30 | 62 |
95
+ | `data_engine.authoring.flow` | 29 | 361 |
96
+
97
+ ### What that means
98
+
99
+ - `data_engine.domain` and `data_engine.views` are acting as aggregation packages.
100
+ - `data_engine.hosts.daemon.app` is a strong assembly point for the daemon host.
101
+ - `data_engine.ui.gui.bootstrap` and `data_engine.ui.tui.bootstrap` are real composition roots.
102
+ - `data_engine.platform.workspace_policy` is central enough that path/layout drift shows up there quickly.
103
+ - `data_engine.authoring.flow` remains one of the most important authoring core modules.
104
+
105
+ ## Practical Mental Model
106
+
107
+ If you are navigating the repo, this is a good compact way to think about it:
108
+
109
+ 1. Start in `data_engine.authoring` when you are changing how flows are expressed or executed.
110
+ 2. Start in `data_engine.helpers` when you are improving operator-friendly flow utilities like the DuckDB helpers.
111
+ 3. Start in `data_engine.runtime` and `data_engine.hosts` when the problem is about daemon behavior, state publication, logging, leasing, or checkpoints.
112
+ 4. Start in `data_engine.application` when the issue is host-agnostic use-case behavior rather than UI details.
113
+ 5. Start in `data_engine.ui` when the issue is interaction, rendering, presentation, or operator workflow.
114
+ 6. Start in `data_engine.platform.workspace_policy` when the issue is workspace discovery, path resolution, or local-vs-shared state layout.
115
+
116
+ ## Current Shape In One Sentence
117
+
118
+ The package is currently a UI-heavy operator application wrapped around a fairly well-separated runtime, authoring, and workspace-control core.
@@ -0,0 +1,268 @@
1
+ # Recipes
2
+
3
+ This page collects complete end-to-end examples.
4
+
5
+ When a recipe matches a shipped starter flow, the starter flow name is called out explicitly.
6
+
7
+ ## Recipe: Mirror every workbook
8
+
9
+ Starter flow: `example_mirror`
10
+
11
+ ```python
12
+ from data_engine import Flow
13
+ import polars as pl
14
+
15
+
16
+ def read_claims(context):
17
+ return pl.read_excel(context.source.path)
18
+
19
+
20
+ def write_target(context):
21
+ output = context.mirror.with_suffix(".parquet")
22
+ context.current.write_parquet(output)
23
+ return output
24
+
25
+
26
+ def build():
27
+ return (
28
+ Flow(group="Claims")
29
+ .watch(
30
+ mode="poll",
31
+ source="../../example_data/Input/claims_flat",
32
+ interval="5s",
33
+ extensions=[".xlsx", ".xlsm"],
34
+ )
35
+ .mirror(root="../../example_data/Output/example_mirror")
36
+ .step(read_claims, label="Read Excel")
37
+ .step(write_target, label="Write Parquet")
38
+ )
39
+ ```
40
+
41
+ Why this pattern is useful:
42
+
43
+ - poll reacts to new or changed source files
44
+ - `mirror.with_suffix(...)` preserves source-relative output naming
45
+ - returning the parquet path makes the output inspectable in the UI
46
+
47
+ ## Recipe: Filter rows and write a cleaned output
48
+
49
+ Starter flow: `example_completed`
50
+
51
+ ```python
52
+ import polars as pl
53
+
54
+
55
+ def read_claims(context):
56
+ return pl.read_excel(context.source.path)
57
+
58
+
59
+ def keep_completed(context):
60
+ return context.current.filter(pl.col("Step TO") == "COMPLETED")
61
+
62
+
63
+ def write_target(context):
64
+ output = context.mirror.with_suffix(".parquet")
65
+ context.current.write_parquet(output)
66
+ return output
67
+
68
+
69
+ def build():
70
+ return (
71
+ Flow(group="Claims")
72
+ .watch(
73
+ mode="poll",
74
+ source="../../example_data/Input/claims_flat",
75
+ interval="5s",
76
+ extensions=[".xlsx", ".xlsm"],
77
+ )
78
+ .mirror(root="../../example_data/Output/example_completed")
79
+ .step(read_claims, save_as="raw_df")
80
+ .step(keep_completed, use="raw_df", save_as="clean_df")
81
+ .step(write_target, use="clean_df")
82
+ )
83
+ ```
84
+
85
+ This is the classic "read -> filter -> write" shape, and it is a good default when you want clear previewable intermediates.
86
+
87
+ ## Recipe: Capture source metadata during processing
88
+
89
+ Starter flow: `example_metadata`
90
+
91
+ ```python
92
+ def read_claims(context):
93
+ return pl.read_excel(context.source.path)
94
+
95
+
96
+ def capture_source_info(context):
97
+ metadata = context.source_metadata()
98
+ if metadata is not None:
99
+ context.metadata["source_name"] = metadata.name
100
+ context.metadata["source_size_bytes"] = metadata.size_bytes
101
+ return context.current
102
+ ```
103
+
104
+ This is useful when you want provenance details recorded in `context.metadata` without changing the main pipeline object.
105
+
106
+ ## Recipe: Produce a stable latest snapshot
107
+
108
+ Starter flow: `example_snapshot`
109
+
110
+ ```python
111
+ def write_latest_snapshot(context):
112
+ snapshot = context.mirror.root_file("artifacts/example_snapshot.parquet")
113
+ context.current.write_parquet(snapshot)
114
+ return snapshot
115
+ ```
116
+
117
+ Use `mirror.root_file(...)` when the result should be one stable artifact for the whole flow rather than one file per source item.
118
+
119
+ ## Recipe: Read selected worksheets from a multi-sheet workbook
120
+
121
+ Starter flow: `example_multisheet`
122
+
123
+ ```python
124
+ def read_selected_sheets(context):
125
+ return pl.read_excel(context.source.path, sheet_name=["Claims", "Summary"])
126
+ ```
127
+
128
+ This is a good reminder that step code stays native. Data Engine does not wrap the underlying dataframe library calls.
129
+
130
+ ## Recipe: Single-file settings workflow
131
+
132
+ Starter flows: `example_single_watch` and `example_schedule`
133
+
134
+ ```python
135
+ def read_settings(context):
136
+ return pl.read_excel(context.source.path)
137
+
138
+
139
+ def write_settings(context):
140
+ output = context.mirror.with_suffix(".parquet")
141
+ context.current.write_parquet(output)
142
+ return output
143
+
144
+
145
+ def build():
146
+ return (
147
+ Flow(group="Settings")
148
+ .watch(
149
+ mode="schedule",
150
+ run_as="batch",
151
+ interval="15m",
152
+ source="../../example_data/Settings/single_watch.xlsx",
153
+ )
154
+ .mirror(root="../../example_data/Output/example_schedule")
155
+ .step(read_settings, save_as="settings_df")
156
+ .step(write_settings, use="settings_df", label="Write Parquet")
157
+ )
158
+ ```
159
+
160
+ This is the right shape when the flow should rerun on a schedule against one well-known source file.
161
+
162
+ ## Recipe: Batch read with `map(...)` or `step_each(...)`
163
+
164
+ Starter flow shape: `example_summary`
165
+
166
+ ```python
167
+ from data_engine import Flow
168
+ import polars as pl
169
+
170
+
171
+ def read_claims(file_ref):
172
+ return pl.read_excel(file_ref.path)
173
+
174
+
175
+ def combine_claims(context):
176
+ return pl.concat(context.current, how="vertical_relaxed")
177
+
178
+
179
+ def build():
180
+ return (
181
+ Flow(group="Analytics")
182
+ .watch(mode="schedule", run_as="batch", interval="15m", source="../../example_data/Input/claims_flat")
183
+ .collect([".xlsx"], save_as="claim_files")
184
+ .map(read_claims, use="claim_files", save_as="claim_frames")
185
+ .step(combine_claims, use="claim_frames")
186
+ )
187
+ ```
188
+
189
+ `map(...)` is the right tool when the same callable should run once per collected file, and `step_each(...)` is the equivalent alias. Both raise immediately when the batch is empty.
190
+
191
+ ## Recipe: Load into DuckDB and export a summary
192
+
193
+ Starter flow: `example_summary`
194
+
195
+ ```python
196
+ import duckdb
197
+
198
+
199
+ def read_claims(file_ref):
200
+ return pl.read_excel(file_ref.path)
201
+
202
+
203
+ def combine_claims(context):
204
+ return pl.concat(context.current, how="vertical_relaxed")
205
+
206
+
207
+ def build_summary(context):
208
+ conn = duckdb.connect(context.database("analytics.duckdb"))
209
+ try:
210
+ conn.register("input", context.current)
211
+ return conn.sql(
212
+ """
213
+ select
214
+ workflow,
215
+ count(*) as row_count
216
+ from input
217
+ group by workflow
218
+ order by row_count desc
219
+ """
220
+ ).pl()
221
+ finally:
222
+ conn.close()
223
+
224
+
225
+ def write_summary(context):
226
+ output = context.mirror.file("workflow_summary.parquet")
227
+ context.current.write_parquet(output)
228
+ return output
229
+
230
+
231
+ def build():
232
+ return (
233
+ Flow(group="Analytics")
234
+ .watch(mode="schedule", run_as="batch", interval="15m", source="../../example_data/Input/claims_flat")
235
+ .mirror(root="../../example_data/Output/example_summary")
236
+ .collect([".xlsx"], save_as="claim_files")
237
+ .map(read_claims, use="claim_files", save_as="claim_frames")
238
+ .step(combine_claims, use="claim_frames", save_as="raw_df")
239
+ .step(build_summary, use="raw_df", save_as="summary_df")
240
+ .step(write_summary, use="summary_df")
241
+ )
242
+ ```
243
+
244
+ That last example is a good place to prefer `context.database(...)`, because the DuckDB file is acting like a workspace-local database asset rather than like a one-off mirrored source artifact.
245
+
246
+ ## Recipe: Use TOML workspace config
247
+
248
+ ```python
249
+ def apply_threshold(context):
250
+ cfg = context.config.require("claims")
251
+ threshold = cfg.get("filters", {}).get("minimum_amount", 0)
252
+ return context.current.filter(pl.col("amount") >= threshold)
253
+ ```
254
+
255
+ This is a clean way to keep operator-tunable values out of the flow chain while still making the dependency explicit.
256
+
257
+ ## Recipe: Write several outputs for one source
258
+
259
+ ```python
260
+ def write_outputs(context):
261
+ open_path = context.mirror.namespaced_file("open_claims.parquet")
262
+ closed_path = context.mirror.namespaced_file("closed_claims.parquet")
263
+ context.current.filter(pl.col("status") == "OPEN").write_parquet(open_path)
264
+ context.current.filter(pl.col("status") == "CLOSED").write_parquet(closed_path)
265
+ return open_path
266
+ ```
267
+
268
+ Use `namespaced_file(...)` when one source item naturally produces several derived outputs.
@@ -0,0 +1,22 @@
1
+ Data Engine documentation
2
+ =========================
3
+
4
+ This site combines hand-written author guides with generated API reference material.
5
+
6
+ .. toctree::
7
+ :maxdepth: 2
8
+ :caption: Contents
9
+
10
+ guides/getting-started
11
+ guides/core-concepts
12
+ guides/configuring-flows
13
+ guides/authoring-flow-modules
14
+ guides/flow-methods
15
+ guides/database-methods
16
+ guides/duckdb-helpers
17
+ guides/recipes
18
+ guides/app-runtime-and-workspaces
19
+ guides/flow-context
20
+ api
21
+ guides/project-map
22
+ guides/project-inventory
@@ -0,0 +1,92 @@
1
+ """Domain models for Data Engine."""
2
+
3
+ from data_engine.domain.actions import OperatorActionContext, SelectedFlowState
4
+ from data_engine.domain.catalog import FlowCatalogEntry, FlowCatalogLike, FlowCatalogState, default_flow_state, flow_category
5
+ from data_engine.domain.diagnostics import ClassifiedProcessInfo, DoctorCheck, ProcessInfo, WorkspaceLeaseDiagnostic
6
+ from data_engine.domain.details import (
7
+ FlowSummaryState,
8
+ FlowSummaryRow,
9
+ OperationDetailRow,
10
+ RunDetailState,
11
+ RunStepDetailRow,
12
+ SelectedFlowDetailState,
13
+ )
14
+ from data_engine.domain.errors import StructuredErrorField, StructuredErrorState
15
+ from data_engine.domain.inspection import ConfigPreviewState, FlowStepOutputsState, StepOutputIndex
16
+ from data_engine.domain.logs import (
17
+ FlowLogEntry,
18
+ LogKind,
19
+ RuntimeStepEvent,
20
+ format_log_line,
21
+ format_runtime_message,
22
+ parse_runtime_event,
23
+ parse_runtime_message,
24
+ short_source_label,
25
+ )
26
+ from data_engine.domain.operations import OperationFlowState, OperationRowState, OperationSessionState
27
+ from data_engine.domain.operator import OperatorSessionState
28
+ from data_engine.domain.runtime import (
29
+ DaemonLifecyclePolicy,
30
+ DaemonStatusState,
31
+ ManualRunState,
32
+ RuntimeSessionState,
33
+ WorkspaceControlState,
34
+ )
35
+ from data_engine.domain.runs import FlowRunState, RunKey, RunStepState
36
+ from data_engine.domain.source_state import SourceSignature
37
+ from data_engine.domain.support import DocumentationSessionState, WorkspaceSupportState
38
+ from data_engine.domain.time import parse_utc_text, utcnow_text
39
+ from data_engine.domain.workspace import WorkspaceRootState, WorkspaceSelectionState, WorkspaceSessionState
40
+
41
+ __all__ = [
42
+ "OperatorActionContext",
43
+ "SelectedFlowState",
44
+ "FlowCatalogEntry",
45
+ "FlowCatalogLike",
46
+ "FlowCatalogState",
47
+ "default_flow_state",
48
+ "flow_category",
49
+ "ClassifiedProcessInfo",
50
+ "ConfigPreviewState",
51
+ "DoctorCheck",
52
+ "FlowSummaryState",
53
+ "FlowSummaryRow",
54
+ "FlowStepOutputsState",
55
+ "ProcessInfo",
56
+ "OperationDetailRow",
57
+ "RunDetailState",
58
+ "RunStepDetailRow",
59
+ "SelectedFlowDetailState",
60
+ "StructuredErrorField",
61
+ "StructuredErrorState",
62
+ "StepOutputIndex",
63
+ "WorkspaceLeaseDiagnostic",
64
+ "FlowLogEntry",
65
+ "LogKind",
66
+ "RuntimeStepEvent",
67
+ "format_log_line",
68
+ "format_runtime_message",
69
+ "parse_runtime_event",
70
+ "parse_runtime_message",
71
+ "short_source_label",
72
+ "OperationFlowState",
73
+ "OperationRowState",
74
+ "OperationSessionState",
75
+ "OperatorSessionState",
76
+ "parse_utc_text",
77
+ "DaemonStatusState",
78
+ "DaemonLifecyclePolicy",
79
+ "ManualRunState",
80
+ "RuntimeSessionState",
81
+ "WorkspaceControlState",
82
+ "FlowRunState",
83
+ "RunKey",
84
+ "RunStepState",
85
+ "SourceSignature",
86
+ "DocumentationSessionState",
87
+ "WorkspaceSupportState",
88
+ "utcnow_text",
89
+ "WorkspaceRootState",
90
+ "WorkspaceSelectionState",
91
+ "WorkspaceSessionState",
92
+ ]
@@ -0,0 +1,69 @@
1
+ """Domain models for operator action availability and selected-flow state."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from typing import TYPE_CHECKING, Container, Mapping
7
+
8
+ from data_engine.domain.catalog import FlowCatalogLike
9
+ from data_engine.domain.runtime import RuntimeSessionState
10
+
11
+
12
+ @dataclass(frozen=True)
13
+ class SelectedFlowState:
14
+ """Resolved state for one selected flow."""
15
+
16
+ card: FlowCatalogLike | None
17
+ state: str = ""
18
+ has_logs: bool = False
19
+ group_active: bool = False
20
+
21
+ @property
22
+ def present(self) -> bool:
23
+ return self.card is not None
24
+
25
+ @property
26
+ def valid(self) -> bool:
27
+ return bool(self.card is not None and self.card.valid)
28
+
29
+ @property
30
+ def running(self) -> bool:
31
+ return bool(self.state)
32
+
33
+ @classmethod
34
+ def from_runtime(
35
+ cls,
36
+ *,
37
+ card: FlowCatalogLike | None,
38
+ flow_states: Mapping[str, str],
39
+ runtime_session: RuntimeSessionState,
40
+ flow_groups_by_name: Mapping[str, str],
41
+ active_flow_states: Container[str],
42
+ has_logs: bool,
43
+ ) -> "SelectedFlowState":
44
+ """Build one selected-flow state from current runtime and selection inputs."""
45
+ if card is None:
46
+ return cls(card=None)
47
+ state = flow_states.get(card.name, card.state)
48
+ return cls(
49
+ card=card,
50
+ state=state if state in active_flow_states else "",
51
+ has_logs=has_logs,
52
+ group_active=runtime_session.is_group_active(card.group, flow_groups_by_name),
53
+ )
54
+
55
+
56
+ @dataclass(frozen=True)
57
+ class OperatorActionContext:
58
+ """All state required to derive operator action availability."""
59
+
60
+ runtime_session: RuntimeSessionState
61
+ selected_flow: SelectedFlowState
62
+ has_automated_flows: bool
63
+ workspace_available: bool = True
64
+ selected_run_group_present: bool = False
65
+
66
+ __all__ = [
67
+ "OperatorActionContext",
68
+ "SelectedFlowState",
69
+ ]
@@ -0,0 +1,128 @@
1
+ """Domain models for discovered flow catalog state."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, replace
6
+ from typing import Iterable, Protocol
7
+
8
+
9
+ @dataclass(frozen=True)
10
+ class FlowCatalogEntry:
11
+ """Service/domain representation of one discovered flow."""
12
+
13
+ name: str
14
+ group: str | None
15
+ title: str
16
+ description: str
17
+ source_root: str
18
+ target_root: str
19
+ mode: str
20
+ interval: str
21
+ operations: str
22
+ operation_items: tuple[str, ...]
23
+ state: str
24
+ valid: bool
25
+ category: str
26
+ error: str = ""
27
+
28
+
29
+ class FlowCatalogLike(Protocol):
30
+ """Structural flow metadata contract shared by domain and presentation layers."""
31
+
32
+ name: str
33
+ group: str | None
34
+ title: str
35
+ description: str
36
+ source_root: str
37
+ target_root: str
38
+ mode: str
39
+ interval: str
40
+ operations: str
41
+ operation_items: tuple[str, ...]
42
+ state: str
43
+ valid: bool
44
+ category: str
45
+ error: str
46
+
47
+
48
+ def flow_category(mode: str) -> str:
49
+ """Return the top-level category for one flow mode."""
50
+ return "automated" if mode in {"poll", "schedule"} else "manual"
51
+
52
+
53
+ def default_flow_state(mode: str | None) -> str:
54
+ """Return the default idle state label for one flow mode."""
55
+ if mode == "poll":
56
+ return "poll ready"
57
+ if mode == "schedule":
58
+ return "schedule ready"
59
+ return "manual"
60
+
61
+
62
+ @dataclass(frozen=True)
63
+ class FlowCatalogState:
64
+ """Surface-agnostic state for discovered flows and current selection."""
65
+
66
+ entries: tuple[FlowCatalogEntry, ...] = ()
67
+ flow_states: dict[str, str] | None = None
68
+ selected_flow_name: str | None = None
69
+ empty_message: str = ""
70
+
71
+ @classmethod
72
+ def empty(cls, *, empty_message: str = "") -> "FlowCatalogState":
73
+ """Return the empty flow-catalog state."""
74
+ return cls(entries=(), flow_states={}, selected_flow_name=None, empty_message=empty_message)
75
+
76
+ @property
77
+ def entries_by_name(self) -> dict[str, FlowCatalogEntry]:
78
+ """Return discovered entries keyed by internal flow name."""
79
+ return {entry.name: entry for entry in self.entries}
80
+
81
+ @property
82
+ def valid_entries(self) -> tuple[FlowCatalogEntry, ...]:
83
+ """Return only valid discovered flow entries."""
84
+ return tuple(entry for entry in self.entries if entry.valid)
85
+
86
+ @property
87
+ def has_automated_flows(self) -> bool:
88
+ """Return whether the catalog contains any valid automated flows."""
89
+ return any(entry.valid and entry.mode in {"poll", "schedule"} for entry in self.entries)
90
+
91
+ @property
92
+ def selected_entry(self) -> FlowCatalogEntry | None:
93
+ """Return the currently selected entry, if it still exists."""
94
+ if self.selected_flow_name is None:
95
+ return None
96
+ return self.entries_by_name.get(self.selected_flow_name)
97
+
98
+ def with_entries(self, entries: Iterable[FlowCatalogEntry]) -> "FlowCatalogState":
99
+ """Return a copy with entries replaced and selection normalized."""
100
+ entry_tuple = tuple(entries)
101
+ entry_names = {entry.name for entry in entry_tuple}
102
+ selected = self.selected_flow_name if self.selected_flow_name in entry_names else (entry_tuple[0].name if entry_tuple else None)
103
+ flow_states = {
104
+ entry.name: (self.flow_states or {}).get(entry.name, entry.state if entry.valid else "invalid")
105
+ for entry in entry_tuple
106
+ }
107
+ return replace(self, entries=entry_tuple, flow_states=flow_states, selected_flow_name=selected)
108
+
109
+ def with_selected_flow_name(self, flow_name: str | None) -> "FlowCatalogState":
110
+ """Return a copy with the selected flow name replaced."""
111
+ return replace(self, selected_flow_name=flow_name)
112
+
113
+ def with_flow_states(self, flow_states: dict[str, str]) -> "FlowCatalogState":
114
+ """Return a copy with flow states replaced."""
115
+ return replace(self, flow_states=dict(flow_states))
116
+
117
+ def with_empty_message(self, message: str) -> "FlowCatalogState":
118
+ """Return a copy with the empty/error message replaced."""
119
+ return replace(self, empty_message=message)
120
+
121
+
122
+ __all__ = [
123
+ "FlowCatalogEntry",
124
+ "FlowCatalogLike",
125
+ "FlowCatalogState",
126
+ "default_flow_state",
127
+ "flow_category",
128
+ ]