PyPI - py-data-engine - Versions diffs - 0.1.0__py3-none-any.whl - Mend

py-data-engine 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (200) hide show

data_engine/__init__.py +37 -0
data_engine/application/__init__.py +39 -0
data_engine/application/actions.py +42 -0
data_engine/application/catalog.py +151 -0
data_engine/application/control.py +213 -0
data_engine/application/details.py +73 -0
data_engine/application/runtime.py +449 -0
data_engine/application/workspace.py +62 -0
data_engine/authoring/__init__.py +14 -0
data_engine/authoring/builder.py +31 -0
data_engine/authoring/execution/__init__.py +6 -0
data_engine/authoring/execution/app.py +6 -0
data_engine/authoring/execution/context.py +82 -0
data_engine/authoring/execution/continuous.py +176 -0
data_engine/authoring/execution/grouped.py +106 -0
data_engine/authoring/execution/logging.py +83 -0
data_engine/authoring/execution/polling.py +135 -0
data_engine/authoring/execution/runner.py +210 -0
data_engine/authoring/execution/single.py +171 -0
data_engine/authoring/flow.py +361 -0
data_engine/authoring/helpers.py +160 -0
data_engine/authoring/model.py +59 -0
data_engine/authoring/primitives.py +430 -0
data_engine/authoring/services.py +42 -0
data_engine/devtools/__init__.py +3 -0
data_engine/devtools/project_ast_map.py +503 -0
data_engine/docs/__init__.py +1 -0
data_engine/docs/sphinx_source/_static/custom.css +13 -0
data_engine/docs/sphinx_source/api.rst +42 -0
data_engine/docs/sphinx_source/conf.py +37 -0
data_engine/docs/sphinx_source/guides/app-runtime-and-workspaces.md +397 -0
data_engine/docs/sphinx_source/guides/authoring-flow-modules.md +215 -0
data_engine/docs/sphinx_source/guides/configuring-flows.md +185 -0
data_engine/docs/sphinx_source/guides/core-concepts.md +208 -0
data_engine/docs/sphinx_source/guides/database-methods.md +107 -0
data_engine/docs/sphinx_source/guides/duckdb-helpers.md +462 -0
data_engine/docs/sphinx_source/guides/flow-context.md +538 -0
data_engine/docs/sphinx_source/guides/flow-methods.md +206 -0
data_engine/docs/sphinx_source/guides/getting-started.md +271 -0
data_engine/docs/sphinx_source/guides/project-inventory.md +5683 -0
data_engine/docs/sphinx_source/guides/project-map.md +118 -0
data_engine/docs/sphinx_source/guides/recipes.md +268 -0
data_engine/docs/sphinx_source/index.rst +22 -0
data_engine/domain/__init__.py +92 -0
data_engine/domain/actions.py +69 -0
data_engine/domain/catalog.py +128 -0
data_engine/domain/details.py +214 -0
data_engine/domain/diagnostics.py +56 -0
data_engine/domain/errors.py +104 -0
data_engine/domain/inspection.py +99 -0
data_engine/domain/logs.py +118 -0
data_engine/domain/operations.py +172 -0
data_engine/domain/operator.py +72 -0
data_engine/domain/runs.py +155 -0
data_engine/domain/runtime.py +279 -0
data_engine/domain/source_state.py +17 -0
data_engine/domain/support.py +54 -0
data_engine/domain/time.py +23 -0
data_engine/domain/workspace.py +159 -0
data_engine/flow_modules/__init__.py +1 -0
data_engine/flow_modules/flow_module_compiler.py +179 -0
data_engine/flow_modules/flow_module_loader.py +201 -0
data_engine/helpers/__init__.py +25 -0
data_engine/helpers/duckdb.py +705 -0
data_engine/hosts/__init__.py +1 -0
data_engine/hosts/daemon/__init__.py +23 -0
data_engine/hosts/daemon/app.py +221 -0
data_engine/hosts/daemon/bootstrap.py +69 -0
data_engine/hosts/daemon/client.py +465 -0
data_engine/hosts/daemon/commands.py +64 -0
data_engine/hosts/daemon/composition.py +310 -0
data_engine/hosts/daemon/constants.py +15 -0
data_engine/hosts/daemon/entrypoints.py +97 -0
data_engine/hosts/daemon/lifecycle.py +191 -0
data_engine/hosts/daemon/manager.py +272 -0
data_engine/hosts/daemon/ownership.py +126 -0
data_engine/hosts/daemon/runtime_commands.py +188 -0
data_engine/hosts/daemon/runtime_control.py +31 -0
data_engine/hosts/daemon/server.py +84 -0
data_engine/hosts/daemon/shared_state.py +147 -0
data_engine/hosts/daemon/state_sync.py +101 -0
data_engine/platform/__init__.py +1 -0
data_engine/platform/identity.py +35 -0
data_engine/platform/local_settings.py +146 -0
data_engine/platform/theme.py +259 -0
data_engine/platform/workspace_models.py +190 -0
data_engine/platform/workspace_policy.py +333 -0
data_engine/runtime/__init__.py +1 -0
data_engine/runtime/file_watch.py +185 -0
data_engine/runtime/ledger_models.py +116 -0
data_engine/runtime/runtime_db.py +938 -0
data_engine/runtime/shared_state.py +523 -0
data_engine/services/__init__.py +49 -0
data_engine/services/daemon.py +64 -0
data_engine/services/daemon_state.py +40 -0
data_engine/services/flow_catalog.py +102 -0
data_engine/services/flow_execution.py +48 -0
data_engine/services/ledger.py +85 -0
data_engine/services/logs.py +65 -0
data_engine/services/runtime_binding.py +105 -0
data_engine/services/runtime_execution.py +126 -0
data_engine/services/runtime_history.py +62 -0
data_engine/services/settings.py +58 -0
data_engine/services/shared_state.py +28 -0
data_engine/services/theme.py +59 -0
data_engine/services/workspace_provisioning.py +224 -0
data_engine/services/workspaces.py +74 -0
data_engine/ui/__init__.py +3 -0
data_engine/ui/cli/__init__.py +19 -0
data_engine/ui/cli/app.py +161 -0
data_engine/ui/cli/commands_doctor.py +178 -0
data_engine/ui/cli/commands_run.py +80 -0
data_engine/ui/cli/commands_start.py +100 -0
data_engine/ui/cli/commands_workspace.py +97 -0
data_engine/ui/cli/dependencies.py +44 -0
data_engine/ui/cli/parser.py +56 -0
data_engine/ui/gui/__init__.py +25 -0
data_engine/ui/gui/app.py +116 -0
data_engine/ui/gui/bootstrap.py +487 -0
data_engine/ui/gui/bootstrapper.py +140 -0
data_engine/ui/gui/cache_models.py +23 -0
data_engine/ui/gui/control_support.py +185 -0
data_engine/ui/gui/controllers/__init__.py +6 -0
data_engine/ui/gui/controllers/flows.py +439 -0
data_engine/ui/gui/controllers/runtime.py +245 -0
data_engine/ui/gui/dialogs/__init__.py +12 -0
data_engine/ui/gui/dialogs/messages.py +88 -0
data_engine/ui/gui/dialogs/previews.py +222 -0
data_engine/ui/gui/helpers/__init__.py +62 -0
data_engine/ui/gui/helpers/inspection.py +81 -0
data_engine/ui/gui/helpers/lifecycle.py +112 -0
data_engine/ui/gui/helpers/scroll.py +28 -0
data_engine/ui/gui/helpers/theming.py +87 -0
data_engine/ui/gui/icons/dark_light.svg +12 -0
data_engine/ui/gui/icons/documentation.svg +1 -0
data_engine/ui/gui/icons/failed.svg +3 -0
data_engine/ui/gui/icons/group.svg +4 -0
data_engine/ui/gui/icons/home.svg +2 -0
data_engine/ui/gui/icons/manual.svg +2 -0
data_engine/ui/gui/icons/poll.svg +2 -0
data_engine/ui/gui/icons/schedule.svg +4 -0
data_engine/ui/gui/icons/settings.svg +2 -0
data_engine/ui/gui/icons/started.svg +3 -0
data_engine/ui/gui/icons/success.svg +3 -0
data_engine/ui/gui/icons/view-log.svg +3 -0
data_engine/ui/gui/icons.py +50 -0
data_engine/ui/gui/launcher.py +48 -0
data_engine/ui/gui/presenters/__init__.py +72 -0
data_engine/ui/gui/presenters/docs.py +140 -0
data_engine/ui/gui/presenters/logs.py +58 -0
data_engine/ui/gui/presenters/runtime_projection.py +29 -0
data_engine/ui/gui/presenters/sidebar.py +88 -0
data_engine/ui/gui/presenters/steps.py +148 -0
data_engine/ui/gui/presenters/workspace.py +39 -0
data_engine/ui/gui/presenters/workspace_binding.py +75 -0
data_engine/ui/gui/presenters/workspace_settings.py +182 -0
data_engine/ui/gui/preview_models.py +37 -0
data_engine/ui/gui/render_support.py +241 -0
data_engine/ui/gui/rendering/__init__.py +12 -0
data_engine/ui/gui/rendering/artifacts.py +95 -0
data_engine/ui/gui/rendering/icons.py +50 -0
data_engine/ui/gui/runtime.py +47 -0
data_engine/ui/gui/state_support.py +193 -0
data_engine/ui/gui/support.py +214 -0
data_engine/ui/gui/surface.py +209 -0
data_engine/ui/gui/theme.py +720 -0
data_engine/ui/gui/widgets/__init__.py +34 -0
data_engine/ui/gui/widgets/config.py +41 -0
data_engine/ui/gui/widgets/logs.py +62 -0
data_engine/ui/gui/widgets/panels.py +507 -0
data_engine/ui/gui/widgets/sidebar.py +130 -0
data_engine/ui/gui/widgets/steps.py +84 -0
data_engine/ui/tui/__init__.py +5 -0
data_engine/ui/tui/app.py +222 -0
data_engine/ui/tui/bootstrap.py +475 -0
data_engine/ui/tui/bootstrapper.py +117 -0
data_engine/ui/tui/controllers/__init__.py +6 -0
data_engine/ui/tui/controllers/flows.py +349 -0
data_engine/ui/tui/controllers/runtime.py +167 -0
data_engine/ui/tui/runtime.py +34 -0
data_engine/ui/tui/state_support.py +141 -0
data_engine/ui/tui/support.py +63 -0
data_engine/ui/tui/theme.py +204 -0
data_engine/ui/tui/widgets.py +123 -0
data_engine/views/__init__.py +109 -0
data_engine/views/actions.py +80 -0
data_engine/views/artifacts.py +58 -0
data_engine/views/flow_display.py +69 -0
data_engine/views/logs.py +54 -0
data_engine/views/models.py +96 -0
data_engine/views/presentation.py +133 -0
data_engine/views/runs.py +62 -0
data_engine/views/state.py +39 -0
data_engine/views/status.py +13 -0
data_engine/views/text.py +109 -0
py_data_engine-0.1.0.dist-info/METADATA +330 -0
py_data_engine-0.1.0.dist-info/RECORD +200 -0
py_data_engine-0.1.0.dist-info/WHEEL +5 -0
py_data_engine-0.1.0.dist-info/entry_points.txt +2 -0
py_data_engine-0.1.0.dist-info/top_level.txt +1 -0

data_engine/docs/sphinx_source/guides/project-map.md ADDED Viewed

@@ -0,0 +1,118 @@
+# Project Map
+This page is a small structural map of the current `data_engine` package, based on the AST mapper in `src/data_engine/devtools/project_ast_map.py`.
+It is meant to answer:
+- where the codebase is heaviest
+- how the main packages are divided
+- which modules are acting like stitching points
+This is not a hand-wavy architecture diagram. It is a lightweight snapshot derived from the current Python source tree.
+## Regenerating the map
+The source for this page comes from:
+```bash
+python -m data_engine.devtools.project_ast_map \
+  src/data_engine
+```
+If the package structure changes substantially, this page should be refreshed.
+## Package Rollup
+These counts are package-level rollups from the current AST snapshot.
+| Package | Modules | Functions | Classes | Flows | Lines |
+| --- | ---: | ---: | ---: | ---: | ---: |
+| `data_engine` | 1 | 1 | 0 | 0 | 37 |
+| `data_engine.application` | 7 | 5 | 19 | 0 | 1029 |
+| `data_engine.authoring` | 16 | 20 | 24 | 0 | 2072 |
+| `data_engine.devtools` | 2 | 12 | 5 | 0 | 360 |
+| `data_engine.docs` | 2 | 0 | 0 | 0 | 38 |
+| `data_engine.domain` | 16 | 9 | 40 | 0 | 1811 |
+| `data_engine.flow_modules` | 3 | 17 | 2 | 0 | 381 |
+| `data_engine.helpers` | 2 | 20 | 0 | 0 | 662 |
+| `data_engine.hosts` | 17 | 55 | 14 | 0 | 2383 |
+| `data_engine.platform` | 6 | 20 | 9 | 0 | 933 |
+| `data_engine.runtime` | 5 | 37 | 9 | 0 | 1763 |
+| `data_engine.services` | 15 | 13 | 16 | 0 | 1189 |
+| `data_engine.ui` | 65 | 174 | 35 | 0 | 8607 |
+| `data_engine.views` | 11 | 29 | 9 | 0 | 822 |
+## How To Read It
+The package split currently looks like this:
+- `data_engine.ui` is by far the largest surface. That is expected because it includes both the Qt GUI and the TUI, plus their presenters, controllers, widgets, dialogs, rendering helpers, and bootstrapping.
+- `data_engine.hosts`, `data_engine.runtime`, and `data_engine.application` are the runtime control spine. That is where daemon orchestration, runtime state, and host-agnostic application use cases live.
+- `data_engine.authoring`, `data_engine.helpers`, and `data_engine.flow_modules` are the flow-authoring side of the package.
+- `data_engine.domain`, `data_engine.platform`, `data_engine.services`, and `data_engine.views` are the supporting layers that hold shared models, path policy, services, and rendering/state helpers.
+That means the current codebase is not “all runtime” or “all UI.” It is a UI-heavy operator product built on a fairly distinct runtime and authoring core.
+## Largest Modules
+The largest modules in the current tree are:
+| Module | Lines | Functions | Classes |
+| --- | ---: | ---: | ---: |
+| `data_engine.runtime.runtime_db` | 938 | 0 | 1 |
+| `data_engine.ui.gui.theme` | 720 | 1 | 0 |
+| `data_engine.helpers.duckdb` | 639 | 20 | 0 |
+| `data_engine.runtime.shared_state` | 523 | 30 | 0 |
+| `data_engine.ui.gui.widgets.panels` | 507 | 12 | 0 |
+| `data_engine.ui.gui.bootstrap` | 487 | 6 | 2 |
+| `data_engine.ui.tui.bootstrap` | 475 | 6 | 2 |
+| `data_engine.hosts.daemon.client` | 465 | 26 | 2 |
+| `data_engine.application.runtime` | 449 | 4 | 8 |
+| `data_engine.ui.gui.controllers.flows` | 439 | 0 | 3 |
+### What jumps out
+- `runtime_db` is the densest persistence hotspot.
+- `helpers.duckdb` has already become a meaningful public convenience layer.
+- `ui.gui.theme` is large in a very different way: it is styling density, not orchestration density.
+- GUI and TUI bootstraps are both sizable, which means the app has two real presentation surfaces, not one thin shell around the other.
+## Internal Stitching Points
+The AST map also highlights modules with the most internal import fan-out. These tend to be the places where many parts of the system are assembled together.
+| Module | Internal Imports | Lines |
+| --- | ---: | ---: |
+| `data_engine.domain` | 65 | 92 |
+| `data_engine.views` | 61 | 109 |
+| `data_engine.hosts.daemon.app` | 45 | 199 |
+| `data_engine.ui.gui.render_support` | 43 | 241 |
+| `data_engine.ui.gui.bootstrap` | 40 | 487 |
+| `data_engine.ui.tui.bootstrap` | 39 | 475 |
+| `data_engine.ui.gui.presenters` | 39 | 72 |
+| `data_engine.platform.workspace_policy` | 35 | 302 |
+| `data_engine.ui.gui.helpers` | 30 | 62 |
+| `data_engine.authoring.flow` | 29 | 361 |
+### What that means
+- `data_engine.domain` and `data_engine.views` are acting as aggregation packages.
+- `data_engine.hosts.daemon.app` is a strong assembly point for the daemon host.
+- `data_engine.ui.gui.bootstrap` and `data_engine.ui.tui.bootstrap` are real composition roots.
+- `data_engine.platform.workspace_policy` is central enough that path/layout drift shows up there quickly.
+- `data_engine.authoring.flow` remains one of the most important authoring core modules.
+## Practical Mental Model
+If you are navigating the repo, this is a good compact way to think about it:
+1. Start in `data_engine.authoring` when you are changing how flows are expressed or executed.
+2. Start in `data_engine.helpers` when you are improving operator-friendly flow utilities like the DuckDB helpers.
+3. Start in `data_engine.runtime` and `data_engine.hosts` when the problem is about daemon behavior, state publication, logging, leasing, or checkpoints.
+4. Start in `data_engine.application` when the issue is host-agnostic use-case behavior rather than UI details.
+5. Start in `data_engine.ui` when the issue is interaction, rendering, presentation, or operator workflow.
+6. Start in `data_engine.platform.workspace_policy` when the issue is workspace discovery, path resolution, or local-vs-shared state layout.
+## Current Shape In One Sentence
+The package is currently a UI-heavy operator application wrapped around a fairly well-separated runtime, authoring, and workspace-control core.

data_engine/docs/sphinx_source/guides/recipes.md ADDED Viewed

@@ -0,0 +1,268 @@
+# Recipes
+This page collects complete end-to-end examples.
+When a recipe matches a shipped starter flow, the starter flow name is called out explicitly.
+## Recipe: Mirror every workbook
+Starter flow: `example_mirror`
+```python
+from data_engine import Flow
+import polars as pl
+def read_claims(context):
+    return pl.read_excel(context.source.path)
+def write_target(context):
+    output = context.mirror.with_suffix(".parquet")
+    context.current.write_parquet(output)
+    return output
+def build():
+    return (
+        Flow(group="Claims")
+        .watch(
+            mode="poll",
+            source="../../example_data/Input/claims_flat",
+            interval="5s",
+            extensions=[".xlsx", ".xlsm"],
+        )
+        .mirror(root="../../example_data/Output/example_mirror")
+        .step(read_claims, label="Read Excel")
+        .step(write_target, label="Write Parquet")
+    )
+```
+Why this pattern is useful:
+- poll reacts to new or changed source files
+- `mirror.with_suffix(...)` preserves source-relative output naming
+- returning the parquet path makes the output inspectable in the UI
+## Recipe: Filter rows and write a cleaned output
+Starter flow: `example_completed`
+```python
+import polars as pl
+def read_claims(context):
+    return pl.read_excel(context.source.path)
+def keep_completed(context):
+    return context.current.filter(pl.col("Step TO") == "COMPLETED")
+def write_target(context):
+    output = context.mirror.with_suffix(".parquet")
+    context.current.write_parquet(output)
+    return output
+def build():
+    return (
+        Flow(group="Claims")
+        .watch(
+            mode="poll",
+            source="../../example_data/Input/claims_flat",
+            interval="5s",
+            extensions=[".xlsx", ".xlsm"],
+        )
+        .mirror(root="../../example_data/Output/example_completed")
+        .step(read_claims, save_as="raw_df")
+        .step(keep_completed, use="raw_df", save_as="clean_df")
+        .step(write_target, use="clean_df")
+    )
+```
+This is the classic "read -> filter -> write" shape, and it is a good default when you want clear previewable intermediates.
+## Recipe: Capture source metadata during processing
+Starter flow: `example_metadata`
+```python
+def read_claims(context):
+    return pl.read_excel(context.source.path)
+def capture_source_info(context):
+    metadata = context.source_metadata()
+    if metadata is not None:
+        context.metadata["source_name"] = metadata.name
+        context.metadata["source_size_bytes"] = metadata.size_bytes
+    return context.current
+```
+This is useful when you want provenance details recorded in `context.metadata` without changing the main pipeline object.
+## Recipe: Produce a stable latest snapshot
+Starter flow: `example_snapshot`
+```python
+def write_latest_snapshot(context):
+    snapshot = context.mirror.root_file("artifacts/example_snapshot.parquet")
+    context.current.write_parquet(snapshot)
+    return snapshot
+```
+Use `mirror.root_file(...)` when the result should be one stable artifact for the whole flow rather than one file per source item.
+## Recipe: Read selected worksheets from a multi-sheet workbook
+Starter flow: `example_multisheet`
+```python
+def read_selected_sheets(context):
+    return pl.read_excel(context.source.path, sheet_name=["Claims", "Summary"])
+```
+This is a good reminder that step code stays native. Data Engine does not wrap the underlying dataframe library calls.
+## Recipe: Single-file settings workflow
+Starter flows: `example_single_watch` and `example_schedule`
+```python
+def read_settings(context):
+    return pl.read_excel(context.source.path)
+def write_settings(context):
+    output = context.mirror.with_suffix(".parquet")
+    context.current.write_parquet(output)
+    return output
+def build():
+    return (
+        Flow(group="Settings")
+        .watch(
+            mode="schedule",
+            run_as="batch",
+            interval="15m",
+            source="../../example_data/Settings/single_watch.xlsx",
+        )
+        .mirror(root="../../example_data/Output/example_schedule")
+        .step(read_settings, save_as="settings_df")
+        .step(write_settings, use="settings_df", label="Write Parquet")
+    )
+```
+This is the right shape when the flow should rerun on a schedule against one well-known source file.
+## Recipe: Batch read with `map(...)` or `step_each(...)`
+Starter flow shape: `example_summary`
+```python
+from data_engine import Flow
+import polars as pl
+def read_claims(file_ref):
+    return pl.read_excel(file_ref.path)
+def combine_claims(context):
+    return pl.concat(context.current, how="vertical_relaxed")
+def build():
+    return (
+        Flow(group="Analytics")
+        .watch(mode="schedule", run_as="batch", interval="15m", source="../../example_data/Input/claims_flat")
+        .collect([".xlsx"], save_as="claim_files")
+        .map(read_claims, use="claim_files", save_as="claim_frames")
+        .step(combine_claims, use="claim_frames")
+    )
+```
+`map(...)` is the right tool when the same callable should run once per collected file, and `step_each(...)` is the equivalent alias. Both raise immediately when the batch is empty.
+## Recipe: Load into DuckDB and export a summary
+Starter flow: `example_summary`
+```python
+import duckdb
+def read_claims(file_ref):
+    return pl.read_excel(file_ref.path)
+def combine_claims(context):
+    return pl.concat(context.current, how="vertical_relaxed")
+def build_summary(context):
+    conn = duckdb.connect(context.database("analytics.duckdb"))
+    try:
+        conn.register("input", context.current)
+        return conn.sql(
+            """
+            select
+                workflow,
+                count(*) as row_count
+            from input
+            group by workflow
+            order by row_count desc
+            """
+        ).pl()
+    finally:
+        conn.close()
+def write_summary(context):
+    output = context.mirror.file("workflow_summary.parquet")
+    context.current.write_parquet(output)
+    return output
+def build():
+    return (
+        Flow(group="Analytics")
+        .watch(mode="schedule", run_as="batch", interval="15m", source="../../example_data/Input/claims_flat")
+        .mirror(root="../../example_data/Output/example_summary")
+        .collect([".xlsx"], save_as="claim_files")
+        .map(read_claims, use="claim_files", save_as="claim_frames")
+        .step(combine_claims, use="claim_frames", save_as="raw_df")
+        .step(build_summary, use="raw_df", save_as="summary_df")
+        .step(write_summary, use="summary_df")
+    )
+```
+That last example is a good place to prefer `context.database(...)`, because the DuckDB file is acting like a workspace-local database asset rather than like a one-off mirrored source artifact.
+## Recipe: Use TOML workspace config
+```python
+def apply_threshold(context):
+    cfg = context.config.require("claims")
+    threshold = cfg.get("filters", {}).get("minimum_amount", 0)
+    return context.current.filter(pl.col("amount") >= threshold)
+```
+This is a clean way to keep operator-tunable values out of the flow chain while still making the dependency explicit.
+## Recipe: Write several outputs for one source
+```python
+def write_outputs(context):
+    open_path = context.mirror.namespaced_file("open_claims.parquet")
+    closed_path = context.mirror.namespaced_file("closed_claims.parquet")
+    context.current.filter(pl.col("status") == "OPEN").write_parquet(open_path)
+    context.current.filter(pl.col("status") == "CLOSED").write_parquet(closed_path)
+    return open_path
+```
+Use `namespaced_file(...)` when one source item naturally produces several derived outputs.

data_engine/docs/sphinx_source/index.rst ADDED Viewed

@@ -0,0 +1,22 @@
+Data Engine documentation
+=========================
+This site combines hand-written author guides with generated API reference material.
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents
+   guides/getting-started
+   guides/core-concepts
+   guides/configuring-flows
+   guides/authoring-flow-modules
+   guides/flow-methods
+   guides/database-methods
+   guides/duckdb-helpers
+   guides/recipes
+   guides/app-runtime-and-workspaces
+   guides/flow-context
+   api
+   guides/project-map
+   guides/project-inventory

data_engine/domain/__init__.py ADDED Viewed

@@ -0,0 +1,92 @@
+"""Domain models for Data Engine."""
+from data_engine.domain.actions import OperatorActionContext, SelectedFlowState
+from data_engine.domain.catalog import FlowCatalogEntry, FlowCatalogLike, FlowCatalogState, default_flow_state, flow_category
+from data_engine.domain.diagnostics import ClassifiedProcessInfo, DoctorCheck, ProcessInfo, WorkspaceLeaseDiagnostic
+from data_engine.domain.details import (
+    FlowSummaryState,
+    FlowSummaryRow,
+    OperationDetailRow,
+    RunDetailState,
+    RunStepDetailRow,
+    SelectedFlowDetailState,
+)
+from data_engine.domain.errors import StructuredErrorField, StructuredErrorState
+from data_engine.domain.inspection import ConfigPreviewState, FlowStepOutputsState, StepOutputIndex
+from data_engine.domain.logs import (
+    FlowLogEntry,
+    LogKind,
+    RuntimeStepEvent,
+    format_log_line,
+    format_runtime_message,
+    parse_runtime_event,
+    parse_runtime_message,
+    short_source_label,
+)
+from data_engine.domain.operations import OperationFlowState, OperationRowState, OperationSessionState
+from data_engine.domain.operator import OperatorSessionState
+from data_engine.domain.runtime import (
+    DaemonLifecyclePolicy,
+    DaemonStatusState,
+    ManualRunState,
+    RuntimeSessionState,
+    WorkspaceControlState,
+)
+from data_engine.domain.runs import FlowRunState, RunKey, RunStepState
+from data_engine.domain.source_state import SourceSignature
+from data_engine.domain.support import DocumentationSessionState, WorkspaceSupportState
+from data_engine.domain.time import parse_utc_text, utcnow_text
+from data_engine.domain.workspace import WorkspaceRootState, WorkspaceSelectionState, WorkspaceSessionState
+__all__ = [
+    "OperatorActionContext",
+    "SelectedFlowState",
+    "FlowCatalogEntry",
+    "FlowCatalogLike",
+    "FlowCatalogState",
+    "default_flow_state",
+    "flow_category",
+    "ClassifiedProcessInfo",
+    "ConfigPreviewState",
+    "DoctorCheck",
+    "FlowSummaryState",
+    "FlowSummaryRow",
+    "FlowStepOutputsState",
+    "ProcessInfo",
+    "OperationDetailRow",
+    "RunDetailState",
+    "RunStepDetailRow",
+    "SelectedFlowDetailState",
+    "StructuredErrorField",
+    "StructuredErrorState",
+    "StepOutputIndex",
+    "WorkspaceLeaseDiagnostic",
+    "FlowLogEntry",
+    "LogKind",
+    "RuntimeStepEvent",
+    "format_log_line",
+    "format_runtime_message",
+    "parse_runtime_event",
+    "parse_runtime_message",
+    "short_source_label",
+    "OperationFlowState",
+    "OperationRowState",
+    "OperationSessionState",
+    "OperatorSessionState",
+    "parse_utc_text",
+    "DaemonStatusState",
+    "DaemonLifecyclePolicy",
+    "ManualRunState",
+    "RuntimeSessionState",
+    "WorkspaceControlState",
+    "FlowRunState",
+    "RunKey",
+    "RunStepState",
+    "SourceSignature",
+    "DocumentationSessionState",
+    "WorkspaceSupportState",
+    "utcnow_text",
+    "WorkspaceRootState",
+    "WorkspaceSelectionState",
+    "WorkspaceSessionState",
+]

data_engine/domain/actions.py ADDED Viewed

@@ -0,0 +1,69 @@
+"""Domain models for operator action availability and selected-flow state."""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Container, Mapping
+from data_engine.domain.catalog import FlowCatalogLike
+from data_engine.domain.runtime import RuntimeSessionState
+@dataclass(frozen=True)
+class SelectedFlowState:
+    """Resolved state for one selected flow."""
+    card: FlowCatalogLike | None
+    state: str = ""
+    has_logs: bool = False
+    group_active: bool = False
+    @property
+    def present(self) -> bool:
+        return self.card is not None
+    @property
+    def valid(self) -> bool:
+        return bool(self.card is not None and self.card.valid)
+    @property
+    def running(self) -> bool:
+        return bool(self.state)
+    @classmethod
+    def from_runtime(
+        cls,
+        *,
+        card: FlowCatalogLike | None,
+        flow_states: Mapping[str, str],
+        runtime_session: RuntimeSessionState,
+        flow_groups_by_name: Mapping[str, str],
+        active_flow_states: Container[str],
+        has_logs: bool,
+    ) -> "SelectedFlowState":
+        """Build one selected-flow state from current runtime and selection inputs."""
+        if card is None:
+            return cls(card=None)
+        state = flow_states.get(card.name, card.state)
+        return cls(
+            card=card,
+            state=state if state in active_flow_states else "",
+            has_logs=has_logs,
+            group_active=runtime_session.is_group_active(card.group, flow_groups_by_name),
+        )
+@dataclass(frozen=True)
+class OperatorActionContext:
+    """All state required to derive operator action availability."""
+    runtime_session: RuntimeSessionState
+    selected_flow: SelectedFlowState
+    has_automated_flows: bool
+    workspace_available: bool = True
+    selected_run_group_present: bool = False
+__all__ = [
+    "OperatorActionContext",
+    "SelectedFlowState",
+]

data_engine/domain/catalog.py ADDED Viewed

@@ -0,0 +1,128 @@
+"""Domain models for discovered flow catalog state."""
+from __future__ import annotations
+from dataclasses import dataclass, replace
+from typing import Iterable, Protocol
+@dataclass(frozen=True)
+class FlowCatalogEntry:
+    """Service/domain representation of one discovered flow."""
+    name: str
+    group: str | None
+    title: str
+    description: str
+    source_root: str
+    target_root: str
+    mode: str
+    interval: str
+    operations: str
+    operation_items: tuple[str, ...]
+    state: str
+    valid: bool
+    category: str
+    error: str = ""
+class FlowCatalogLike(Protocol):
+    """Structural flow metadata contract shared by domain and presentation layers."""
+    name: str
+    group: str | None
+    title: str
+    description: str
+    source_root: str
+    target_root: str
+    mode: str
+    interval: str
+    operations: str
+    operation_items: tuple[str, ...]
+    state: str
+    valid: bool
+    category: str
+    error: str
+def flow_category(mode: str) -> str:
+    """Return the top-level category for one flow mode."""
+    return "automated" if mode in {"poll", "schedule"} else "manual"
+def default_flow_state(mode: str | None) -> str:
+    """Return the default idle state label for one flow mode."""
+    if mode == "poll":
+        return "poll ready"
+    if mode == "schedule":
+        return "schedule ready"
+    return "manual"
+@dataclass(frozen=True)
+class FlowCatalogState:
+    """Surface-agnostic state for discovered flows and current selection."""
+    entries: tuple[FlowCatalogEntry, ...] = ()
+    flow_states: dict[str, str] | None = None
+    selected_flow_name: str | None = None
+    empty_message: str = ""
+    @classmethod
+    def empty(cls, *, empty_message: str = "") -> "FlowCatalogState":
+        """Return the empty flow-catalog state."""
+        return cls(entries=(), flow_states={}, selected_flow_name=None, empty_message=empty_message)
+    @property
+    def entries_by_name(self) -> dict[str, FlowCatalogEntry]:
+        """Return discovered entries keyed by internal flow name."""
+        return {entry.name: entry for entry in self.entries}
+    @property
+    def valid_entries(self) -> tuple[FlowCatalogEntry, ...]:
+        """Return only valid discovered flow entries."""
+        return tuple(entry for entry in self.entries if entry.valid)
+    @property
+    def has_automated_flows(self) -> bool:
+        """Return whether the catalog contains any valid automated flows."""
+        return any(entry.valid and entry.mode in {"poll", "schedule"} for entry in self.entries)
+    @property
+    def selected_entry(self) -> FlowCatalogEntry | None:
+        """Return the currently selected entry, if it still exists."""
+        if self.selected_flow_name is None:
+            return None
+        return self.entries_by_name.get(self.selected_flow_name)
+    def with_entries(self, entries: Iterable[FlowCatalogEntry]) -> "FlowCatalogState":
+        """Return a copy with entries replaced and selection normalized."""
+        entry_tuple = tuple(entries)
+        entry_names = {entry.name for entry in entry_tuple}
+        selected = self.selected_flow_name if self.selected_flow_name in entry_names else (entry_tuple[0].name if entry_tuple else None)
+        flow_states = {
+            entry.name: (self.flow_states or {}).get(entry.name, entry.state if entry.valid else "invalid")
+            for entry in entry_tuple
+        }
+        return replace(self, entries=entry_tuple, flow_states=flow_states, selected_flow_name=selected)
+    def with_selected_flow_name(self, flow_name: str | None) -> "FlowCatalogState":
+        """Return a copy with the selected flow name replaced."""
+        return replace(self, selected_flow_name=flow_name)
+    def with_flow_states(self, flow_states: dict[str, str]) -> "FlowCatalogState":
+        """Return a copy with flow states replaced."""
+        return replace(self, flow_states=dict(flow_states))
+    def with_empty_message(self, message: str) -> "FlowCatalogState":
+        """Return a copy with the empty/error message replaced."""
+        return replace(self, empty_message=message)
+__all__ = [
+    "FlowCatalogEntry",
+    "FlowCatalogLike",
+    "FlowCatalogState",
+    "default_flow_state",
+    "flow_category",
+]