aptdata 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. aptdata/__init__.py +3 -0
  2. aptdata/cli/__init__.py +5 -0
  3. aptdata/cli/app.py +247 -0
  4. aptdata/cli/commands/__init__.py +9 -0
  5. aptdata/cli/commands/config_cmd.py +128 -0
  6. aptdata/cli/commands/mesh_cmd.py +435 -0
  7. aptdata/cli/commands/plugin_cmd.py +107 -0
  8. aptdata/cli/commands/system_cmd.py +90 -0
  9. aptdata/cli/commands/telemetry_cmd.py +57 -0
  10. aptdata/cli/completions.py +56 -0
  11. aptdata/cli/interactive.py +269 -0
  12. aptdata/cli/rendering/__init__.py +31 -0
  13. aptdata/cli/rendering/console.py +119 -0
  14. aptdata/cli/rendering/logger.py +26 -0
  15. aptdata/cli/rendering/panels.py +87 -0
  16. aptdata/cli/rendering/tables.py +81 -0
  17. aptdata/cli/scaffold.py +1089 -0
  18. aptdata/config/__init__.py +13 -0
  19. aptdata/config/parser.py +136 -0
  20. aptdata/config/schema.py +27 -0
  21. aptdata/config/secrets.py +60 -0
  22. aptdata/core/__init__.py +46 -0
  23. aptdata/core/context.py +31 -0
  24. aptdata/core/dataset.py +39 -0
  25. aptdata/core/lineage.py +213 -0
  26. aptdata/core/state.py +27 -0
  27. aptdata/core/system.py +317 -0
  28. aptdata/core/workflow.py +372 -0
  29. aptdata/mcp/__init__.py +5 -0
  30. aptdata/mcp/server.py +198 -0
  31. aptdata/plugins/__init__.py +77 -0
  32. aptdata/plugins/ai/__init__.py +6 -0
  33. aptdata/plugins/ai/chunking.py +66 -0
  34. aptdata/plugins/ai/embeddings.py +56 -0
  35. aptdata/plugins/base.py +57 -0
  36. aptdata/plugins/dataset.py +62 -0
  37. aptdata/plugins/governance/__init__.py +32 -0
  38. aptdata/plugins/governance/catalog.py +115 -0
  39. aptdata/plugins/governance/classification.py +44 -0
  40. aptdata/plugins/governance/lineage_store.py +49 -0
  41. aptdata/plugins/governance/rules.py +180 -0
  42. aptdata/plugins/local_fs.py +241 -0
  43. aptdata/plugins/manager.py +142 -0
  44. aptdata/plugins/postgres.py +113 -0
  45. aptdata/plugins/quality/__init__.py +39 -0
  46. aptdata/plugins/quality/contract.py +128 -0
  47. aptdata/plugins/quality/expectations.py +310 -0
  48. aptdata/plugins/quality/report.py +94 -0
  49. aptdata/plugins/quality/validator.py +139 -0
  50. aptdata/plugins/rest.py +135 -0
  51. aptdata/plugins/transform/__init__.py +14 -0
  52. aptdata/plugins/transform/pandas.py +129 -0
  53. aptdata/plugins/transform/spark.py +134 -0
  54. aptdata/plugins/vector/__init__.py +6 -0
  55. aptdata/plugins/vector/base.py +19 -0
  56. aptdata/plugins/vector/qdrant.py +41 -0
  57. aptdata/telemetry/__init__.py +5 -0
  58. aptdata/telemetry/instrumentation.py +164 -0
  59. aptdata/tui/__init__.py +5 -0
  60. aptdata/tui/monitor.py +279 -0
  61. aptdata-0.0.2.dist-info/METADATA +330 -0
  62. aptdata-0.0.2.dist-info/RECORD +65 -0
  63. aptdata-0.0.2.dist-info/WHEEL +4 -0
  64. aptdata-0.0.2.dist-info/entry_points.txt +3 -0
  65. aptdata-0.0.2.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,13 @@
1
+ """Declarative configuration helpers for aptdata."""
2
+
3
+ from aptdata.config.parser import ParsedConfig, YamlConfigParser
4
+ from aptdata.config.schema import export_domain_schema, write_domain_schema
5
+ from aptdata.config.secrets import SecretManager
6
+
7
+ __all__ = [
8
+ "ParsedConfig",
9
+ "YamlConfigParser",
10
+ "export_domain_schema",
11
+ "write_domain_schema",
12
+ "SecretManager",
13
+ ]
@@ -0,0 +1,136 @@
1
+ """YAML parser for declarative aptdata system definitions."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import field
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ import yaml
10
+ from pydantic import TypeAdapter
11
+ from pydantic.dataclasses import dataclass as pydantic_dataclass
12
+
13
+ from aptdata.config.secrets import SecretManager
14
+ from aptdata.core.dataset import IDataset
15
+ from aptdata.core.system import BaseComponent, BaseFlow, BaseSystem, IFlow
16
+
17
+
18
+ @pydantic_dataclass
19
+ class ConfigEdge:
20
+ """Serializable flow edge for declarative YAML files."""
21
+
22
+ source_id: str
23
+ target_id: str
24
+ condition: str = ""
25
+
26
+
27
+ @pydantic_dataclass
28
+ class ConfigComponent(BaseComponent):
29
+ """Concrete component used by configuration hydration."""
30
+
31
+ def validate_inputs(self, inputs: list[IDataset]) -> bool: # noqa: ARG002
32
+ return True
33
+
34
+ def execute(self, inputs: list[IDataset]) -> list[IDataset]:
35
+ return inputs
36
+
37
+
38
+ @pydantic_dataclass
39
+ class ConfigFlow(BaseFlow):
40
+ """Concrete flow used by configuration hydration."""
41
+
42
+ components: list[ConfigComponent] = field(default_factory=list)
43
+ edges: list[ConfigEdge] = field(default_factory=list)
44
+
45
+ def add_component(self, component: BaseComponent) -> None:
46
+ if not isinstance(component, ConfigComponent):
47
+ raise TypeError("ConfigFlow only accepts ConfigComponent instances.")
48
+ self.components.append(component)
49
+
50
+ def connect(
51
+ self,
52
+ source_id: str,
53
+ target_id: str,
54
+ condition: str | None = None,
55
+ ) -> None:
56
+ self.edges.append(
57
+ ConfigEdge(
58
+ source_id=source_id,
59
+ target_id=target_id,
60
+ condition=condition or "",
61
+ )
62
+ )
63
+
64
+ def compile(self) -> None:
65
+ pass
66
+
67
+ def run(self, initial_inputs: list[IDataset]) -> list[IDataset]:
68
+ return initial_inputs
69
+
70
+
71
+ @pydantic_dataclass
72
+ class ConfigSystem(BaseSystem):
73
+ """Concrete system used by configuration hydration."""
74
+
75
+ flows: list[ConfigFlow] = field(default_factory=list)
76
+
77
+ def register_flow(self, flow: IFlow) -> None:
78
+ if not isinstance(flow, ConfigFlow):
79
+ raise TypeError("ConfigSystem only accepts ConfigFlow instances.")
80
+ self.flows.append(flow)
81
+
82
+ def run(self) -> None:
83
+ pass
84
+
85
+
86
+ @pydantic_dataclass
87
+ class ParsedConfig:
88
+ """Hydrated config document with metadata and validated domain objects."""
89
+
90
+ metadata: dict[str, Any] = field(default_factory=dict)
91
+ system: ConfigSystem = field(
92
+ default_factory=lambda: ConfigSystem(system_id="default")
93
+ )
94
+
95
+
96
+ class YamlConfigParser:
97
+ """Parse YAML files into validated aptdata domain objects."""
98
+
99
+ _component_adapter = TypeAdapter(ConfigComponent)
100
+ _flow_adapter = TypeAdapter(ConfigFlow)
101
+ _edge_adapter = TypeAdapter(ConfigEdge)
102
+ _system_adapter = TypeAdapter(ConfigSystem)
103
+
104
+ def __init__(self, secret_manager: SecretManager | None = None) -> None:
105
+ self._secret_manager = secret_manager or SecretManager()
106
+
107
+ def parse_file(self, path: str | Path) -> ParsedConfig:
108
+ """Read and parse a YAML config file."""
109
+ config_path = Path(path)
110
+ raw = yaml.safe_load(config_path.read_text(encoding="utf-8")) or {}
111
+ if not isinstance(raw, dict):
112
+ raise ValueError("YAML root must be a mapping/object.")
113
+ return self.parse_data(raw)
114
+
115
+ def parse_data(self, payload: dict[str, Any]) -> ParsedConfig:
116
+ """Parse a loaded YAML dictionary."""
117
+ payload = self._secret_manager.resolve(payload)
118
+ metadata = payload.get("metadata", {})
119
+ system_payload = dict(payload.get("system", {}))
120
+ flow_payloads = system_payload.pop("flows", payload.get("flows", []))
121
+
122
+ system = self._system_adapter.validate_python(system_payload)
123
+ for flow_payload in flow_payloads:
124
+ flow_data = dict(flow_payload)
125
+ component_payloads = flow_data.pop("components", [])
126
+ edge_payloads = flow_data.pop("edges", [])
127
+
128
+ flow = self._flow_adapter.validate_python(flow_data)
129
+ for component_payload in component_payloads:
130
+ component = self._component_adapter.validate_python(component_payload)
131
+ flow.add_component(component)
132
+ for edge_payload in edge_payloads:
133
+ flow.edges.append(self._edge_adapter.validate_python(edge_payload))
134
+ system.register_flow(flow)
135
+
136
+ return ParsedConfig(metadata=metadata, system=system)
@@ -0,0 +1,27 @@
1
+ """JSON Schema utilities for declarative aptdata configs."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ from pydantic import TypeAdapter
10
+
11
+ from aptdata.config.parser import ParsedConfig
12
+
13
+
14
+ def export_domain_schema() -> dict[str, Any]:
15
+ """Export JSON Schema for the full declarative config domain."""
16
+ return TypeAdapter(ParsedConfig).json_schema()
17
+
18
+
19
+ def write_domain_schema(output: str | Path) -> Path:
20
+ """Write the domain JSON Schema to *output*."""
21
+ output_path = Path(output)
22
+ output_path.parent.mkdir(parents=True, exist_ok=True)
23
+ output_path.write_text(
24
+ json.dumps(export_domain_schema(), indent=2, ensure_ascii=False),
25
+ encoding="utf-8",
26
+ )
27
+ return output_path
@@ -0,0 +1,60 @@
1
+ """Secret resolution helpers for aptdata configuration and plugins."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ import re
7
+ from typing import Any
8
+
9
+ from aptdata.telemetry.instrumentation import register_secret
10
+
11
+ _ENV_PATTERN = re.compile(r"\$\{([A-Za-z0-9_]+)\}")
12
+
13
+
14
+ class SecretManager:
15
+ """Resolve `${ENV_VAR}` placeholders using environment variables."""
16
+
17
+ def __init__(self, *, load_dotenv_file: bool = True) -> None:
18
+ if load_dotenv_file:
19
+ try:
20
+ from dotenv import load_dotenv # noqa: WPS433
21
+ except ImportError:
22
+ load_dotenv = None
23
+ if load_dotenv is not None:
24
+ load_dotenv(dotenv_path=".env")
25
+ self._injected_keys: set[str] = set()
26
+
27
+ def get(self, key: str, default: str | None = None) -> str:
28
+ """Return environment value for *key* and register it as a secret."""
29
+ value = os.getenv(key, default)
30
+ if value is None:
31
+ raise KeyError(f"Missing required secret: {key}")
32
+ self._injected_keys.add(key)
33
+ register_secret(key, value)
34
+ return value
35
+
36
+ def resolve(self, value: Any) -> Any:
37
+ """Recursively resolve `${ENV_VAR}` placeholders in nested structures."""
38
+ if isinstance(value, str):
39
+ return self._resolve_string(value)
40
+ if isinstance(value, dict):
41
+ return {k: self.resolve(v) for k, v in value.items()}
42
+ if isinstance(value, list):
43
+ return [self.resolve(item) for item in value]
44
+ if isinstance(value, tuple):
45
+ return tuple(self.resolve(item) for item in value)
46
+ return value
47
+
48
+ def injected_keys(self) -> list[str]:
49
+ """Return sorted secret names injected in this manager session."""
50
+ return sorted(self._injected_keys)
51
+
52
+ def _resolve_string(self, value: str) -> str:
53
+ matches = _ENV_PATTERN.findall(value)
54
+ if not matches:
55
+ return value
56
+ resolved = value
57
+ for key in matches:
58
+ resolved_secret = self.get(key)
59
+ resolved = resolved.replace(f"${{{key}}}", resolved_secret)
60
+ return resolved
@@ -0,0 +1,46 @@
1
+ """Core interfaces and base classes for aptdata."""
2
+
3
+ from aptdata.core.context import ExecutionContext
4
+ from aptdata.core.dataset import BaseDataset, IDataset
5
+ from aptdata.core.state import StateBackend
6
+ from aptdata.core.system import (
7
+ BaseComponent,
8
+ BaseFlow,
9
+ BaseSystem,
10
+ ComponentKind,
11
+ ComponentMeta,
12
+ FlowEdge,
13
+ FlowNode,
14
+ IComponent,
15
+ IFlow,
16
+ ISystem,
17
+ )
18
+ from aptdata.core.workflow import (
19
+ BaseWorkflow,
20
+ IWorkflow,
21
+ Workflow,
22
+ WorkflowEdge,
23
+ WorkflowNode,
24
+ )
25
+
26
+ __all__ = [
27
+ "IDataset",
28
+ "BaseDataset",
29
+ "ExecutionContext",
30
+ "ComponentKind",
31
+ "ComponentMeta",
32
+ "IComponent",
33
+ "BaseComponent",
34
+ "FlowEdge",
35
+ "FlowNode",
36
+ "IFlow",
37
+ "BaseFlow",
38
+ "ISystem",
39
+ "BaseSystem",
40
+ "WorkflowEdge",
41
+ "WorkflowNode",
42
+ "IWorkflow",
43
+ "BaseWorkflow",
44
+ "Workflow",
45
+ "StateBackend",
46
+ ]
@@ -0,0 +1,31 @@
1
+ """Injectable execution context for state sharing across runs."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import field
6
+ from typing import Any
7
+
8
+ from pydantic.dataclasses import dataclass as pydantic_dataclass
9
+
10
+
11
+ @pydantic_dataclass
12
+ class ExecutionContext:
13
+ """Simple in-memory key/value state container."""
14
+
15
+ memory: dict[str, Any] = field(default_factory=dict)
16
+
17
+ def get(self, key: str, default: Any = None) -> Any:
18
+ """Return value for *key* or *default* if not present."""
19
+ return self.memory.get(key, default)
20
+
21
+ def set(self, key: str, value: Any) -> None:
22
+ """Store *value* under *key*."""
23
+ self.memory[key] = value
24
+
25
+ def update(self, values: dict[str, Any]) -> None:
26
+ """Merge mapping into memory state."""
27
+ self.memory.update(values)
28
+
29
+ def clear(self) -> None:
30
+ """Remove all state."""
31
+ self.memory.clear()
@@ -0,0 +1,39 @@
1
+ """Dataset interface and base class."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from dataclasses import dataclass, field
5
+ from typing import Any
6
+
7
+ from pydantic.dataclasses import dataclass as pydantic_dataclass
8
+
9
+
10
+ @dataclass
11
+ class IDataset(ABC):
12
+ """Dataclass interface for dataset types.
13
+
14
+ All dataset contracts must implement :meth:`read` and :meth:`write`.
15
+ No concrete fields are defined here – field declarations live in
16
+ :class:`BaseDataset` and its subclasses.
17
+ """
18
+
19
+ @abstractmethod
20
+ def read(self) -> Any:
21
+ """Read and return data from the dataset."""
22
+
23
+ @abstractmethod
24
+ def write(self, data: Any) -> None:
25
+ """Write data to the dataset."""
26
+
27
+
28
+ @pydantic_dataclass
29
+ class BaseDataset(IDataset):
30
+ """Base dataset with Pydantic-validated fields.
31
+
32
+ Provides the canonical ``uri`` and ``schema_metadata`` fields.
33
+ Concrete dataset implementations must inherit from this class and
34
+ implement the :meth:`read` and :meth:`write` abstract methods
35
+ inherited from :class:`IDataset`.
36
+ """
37
+
38
+ uri: str
39
+ schema_metadata: dict[str, Any] = field(default_factory=dict)
@@ -0,0 +1,213 @@
1
+ """Data lineage graph for tracking dataset transformations and provenance.
2
+
3
+ Provides :class:`LineageGraph` which is built up during a workflow run and
4
+ stores :class:`LineageNode` instances that capture every read, transform,
5
+ quality-check, and write event.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from dataclasses import dataclass, field
11
+ from datetime import datetime, timezone
12
+ from enum import Enum
13
+ from typing import Any
14
+ from uuid import uuid4
15
+
16
+
17
+ class LineageEventType(str, Enum):
18
+ """Enumeration of lineage event types."""
19
+
20
+ READ = "READ"
21
+ TRANSFORM = "TRANSFORM"
22
+ QUALITY_CHECK = "QUALITY_CHECK"
23
+ BUSINESS_RULE = "BUSINESS_RULE"
24
+ WRITE = "WRITE"
25
+ SCHEMA_CHANGE = "SCHEMA_CHANGE"
26
+
27
+
28
+ @dataclass
29
+ class ColumnLineage:
30
+ """Column-level lineage mapping a target column to its source columns.
31
+
32
+ Parameters
33
+ ----------
34
+ target_column:
35
+ Name of the output column.
36
+ source_columns:
37
+ Names of the input columns that contributed to *target_column*.
38
+ transformation:
39
+ Human-readable description of the transformation applied.
40
+ source_dataset_uri:
41
+ URI of the dataset that contains the source columns.
42
+ """
43
+
44
+ target_column: str
45
+ source_columns: list[str] = field(default_factory=list)
46
+ transformation: str = ""
47
+ source_dataset_uri: str = ""
48
+
49
+
50
+ @dataclass
51
+ class LineageNode:
52
+ """A single event node in the lineage graph.
53
+
54
+ Parameters
55
+ ----------
56
+ dataset_uri:
57
+ Logical URI of the dataset involved in this event.
58
+ event_type:
59
+ The kind of lineage event (see :class:`LineageEventType`).
60
+ workflow_name:
61
+ Name of the workflow that produced this node.
62
+ step_name:
63
+ Name of the individual step within the workflow.
64
+ transformer_name:
65
+ Name of the transformer (for TRANSFORM events).
66
+ engine:
67
+ Processing engine (e.g. ``"pandas"`` or ``"pyspark"``).
68
+ rows_in:
69
+ Number of input rows.
70
+ rows_out:
71
+ Number of output rows.
72
+ columns_in:
73
+ List of input column names.
74
+ columns_out:
75
+ List of output column names.
76
+ column_lineage:
77
+ Column-level lineage mappings.
78
+ business_rules_applied:
79
+ IDs of business rules applied during this event.
80
+ quality_checks:
81
+ Names of quality checks executed.
82
+ quality_status:
83
+ Overall quality check outcome (e.g. ``"PASSED"`` or ``"FAILED"``).
84
+ trace_id:
85
+ OpenTelemetry trace identifier.
86
+ span_id:
87
+ OpenTelemetry span identifier.
88
+ parent_node_ids:
89
+ IDs of upstream :class:`LineageNode` objects.
90
+ metadata:
91
+ Arbitrary extra metadata.
92
+ node_id:
93
+ Auto-generated unique identifier (UUID hex).
94
+ timestamp:
95
+ UTC ISO-8601 timestamp of the event.
96
+ """
97
+
98
+ dataset_uri: str
99
+ event_type: LineageEventType
100
+ workflow_name: str = ""
101
+ step_name: str = ""
102
+ transformer_name: str = ""
103
+ engine: str = ""
104
+ rows_in: int = 0
105
+ rows_out: int = 0
106
+ columns_in: list[str] = field(default_factory=list)
107
+ columns_out: list[str] = field(default_factory=list)
108
+ column_lineage: list[ColumnLineage] = field(default_factory=list)
109
+ business_rules_applied: list[str] = field(default_factory=list)
110
+ quality_checks: list[str] = field(default_factory=list)
111
+ quality_status: str = ""
112
+ trace_id: str = ""
113
+ span_id: str = ""
114
+ parent_node_ids: list[str] = field(default_factory=list)
115
+ metadata: dict[str, Any] = field(default_factory=dict)
116
+ node_id: str = field(default_factory=lambda: uuid4().hex)
117
+ timestamp: str = field(
118
+ default_factory=lambda: datetime.now(timezone.utc).isoformat()
119
+ )
120
+
121
+ def to_dict(self) -> dict[str, Any]:
122
+ """Serialize this node to a plain dictionary."""
123
+ return {
124
+ "node_id": self.node_id,
125
+ "dataset_uri": self.dataset_uri,
126
+ "event_type": self.event_type.value,
127
+ "workflow_name": self.workflow_name,
128
+ "step_name": self.step_name,
129
+ "transformer_name": self.transformer_name,
130
+ "engine": self.engine,
131
+ "rows_in": self.rows_in,
132
+ "rows_out": self.rows_out,
133
+ "columns_in": self.columns_in,
134
+ "columns_out": self.columns_out,
135
+ "column_lineage": [
136
+ {
137
+ "target_column": cl.target_column,
138
+ "source_columns": cl.source_columns,
139
+ "transformation": cl.transformation,
140
+ "source_dataset_uri": cl.source_dataset_uri,
141
+ }
142
+ for cl in self.column_lineage
143
+ ],
144
+ "business_rules_applied": self.business_rules_applied,
145
+ "quality_checks": self.quality_checks,
146
+ "quality_status": self.quality_status,
147
+ "trace_id": self.trace_id,
148
+ "span_id": self.span_id,
149
+ "parent_node_ids": self.parent_node_ids,
150
+ "metadata": self.metadata,
151
+ "timestamp": self.timestamp,
152
+ }
153
+
154
+
155
+ @dataclass
156
+ class LineageGraph:
157
+ """A complete lineage graph for a single workflow run.
158
+
159
+ Parameters
160
+ ----------
161
+ run_id:
162
+ Unique identifier for the workflow run.
163
+ workflow_name:
164
+ Human-readable name of the workflow.
165
+ nodes:
166
+ Ordered list of :class:`LineageNode` objects.
167
+ started_at:
168
+ UTC ISO-8601 timestamp when the run started.
169
+ """
170
+
171
+ run_id: str
172
+ workflow_name: str
173
+ nodes: list[LineageNode] = field(default_factory=list)
174
+ started_at: str = field(
175
+ default_factory=lambda: datetime.now(timezone.utc).isoformat()
176
+ )
177
+
178
+ def add_node(self, node: LineageNode) -> None:
179
+ """Append *node* to the graph."""
180
+ self.nodes.append(node)
181
+
182
+ def get_upstream(self, node_id: str) -> list[LineageNode]:
183
+ """Return all nodes that are direct parents of *node_id*.
184
+
185
+ Nodes are considered parents when their ``node_id`` appears in
186
+ the target node's :attr:`~LineageNode.parent_node_ids` list.
187
+ """
188
+ target = next((n for n in self.nodes if n.node_id == node_id), None)
189
+ if target is None:
190
+ return []
191
+ parent_ids = set(target.parent_node_ids)
192
+ return [n for n in self.nodes if n.node_id in parent_ids]
193
+
194
+ def get_downstream(self, node_id: str) -> list[LineageNode]:
195
+ """Return all nodes that list *node_id* as a parent."""
196
+ return [n for n in self.nodes if node_id in n.parent_node_ids]
197
+
198
+ def to_dict(self) -> dict[str, Any]:
199
+ """Serialize the full graph to a plain dictionary."""
200
+ return {
201
+ "run_id": self.run_id,
202
+ "workflow_name": self.workflow_name,
203
+ "started_at": self.started_at,
204
+ "nodes": [n.to_dict() for n in self.nodes],
205
+ }
206
+
207
+
208
+ __all__ = [
209
+ "LineageEventType",
210
+ "ColumnLineage",
211
+ "LineageNode",
212
+ "LineageGraph",
213
+ ]
aptdata/core/state.py ADDED
@@ -0,0 +1,27 @@
1
+ """Persistent workflow execution state backend."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+
10
+ class StateBackend:
11
+ """Simple JSON-on-disk backend used for checkpointing workflow state."""
12
+
13
+ def __init__(self, base_dir: str | Path = ".aptdata_state") -> None:
14
+ self.base_dir = Path(base_dir)
15
+ self.base_dir.mkdir(parents=True, exist_ok=True)
16
+
17
+ def save(self, run_id: str, state: dict[str, Any]) -> None:
18
+ """Persist *state* for *run_id*."""
19
+ path = self.base_dir / f"{run_id}.json"
20
+ path.write_text(
21
+ json.dumps(state, ensure_ascii=False, default=str), encoding="utf-8"
22
+ )
23
+
24
+ def load(self, run_id: str) -> dict[str, Any]:
25
+ """Load state for *run_id*."""
26
+ path = self.base_dir / f"{run_id}.json"
27
+ return json.loads(path.read_text(encoding="utf-8"))