aptdata 0.0.2__tar.gz → 0.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. {aptdata-0.0.2 → aptdata-0.0.3}/PKG-INFO +21 -2
  2. {aptdata-0.0.2 → aptdata-0.0.3}/README.md +18 -0
  3. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/__init__.py +1 -1
  4. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/cli/commands/mesh_cmd.py +21 -27
  5. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/core/__init__.py +10 -2
  6. aptdata-0.0.3/aptdata/core/context.py +95 -0
  7. aptdata-0.0.3/aptdata/core/dataset.py +121 -0
  8. aptdata-0.0.3/aptdata/core/decorators.py +140 -0
  9. aptdata-0.0.3/aptdata/core/events.py +104 -0
  10. aptdata-0.0.3/aptdata/core/registry.py +31 -0
  11. aptdata-0.0.3/aptdata/core/system.py +568 -0
  12. aptdata-0.0.3/aptdata/core/yaml_builder.py +126 -0
  13. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/mcp/server.py +123 -9
  14. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/plugins/dataset.py +10 -4
  15. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/plugins/transform/pandas.py +57 -1
  16. aptdata-0.0.3/aptdata/telemetry/__init__.py +6 -0
  17. aptdata-0.0.3/aptdata/telemetry/provider.py +28 -0
  18. {aptdata-0.0.2 → aptdata-0.0.3}/pyproject.toml +10 -4
  19. aptdata-0.0.2/aptdata/core/context.py +0 -31
  20. aptdata-0.0.2/aptdata/core/dataset.py +0 -39
  21. aptdata-0.0.2/aptdata/core/system.py +0 -317
  22. aptdata-0.0.2/aptdata/telemetry/__init__.py +0 -5
  23. {aptdata-0.0.2 → aptdata-0.0.3}/LICENSE +0 -0
  24. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/cli/__init__.py +0 -0
  25. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/cli/app.py +0 -0
  26. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/cli/commands/__init__.py +0 -0
  27. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/cli/commands/config_cmd.py +0 -0
  28. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/cli/commands/plugin_cmd.py +0 -0
  29. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/cli/commands/system_cmd.py +0 -0
  30. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/cli/commands/telemetry_cmd.py +0 -0
  31. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/cli/completions.py +0 -0
  32. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/cli/interactive.py +0 -0
  33. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/cli/rendering/__init__.py +0 -0
  34. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/cli/rendering/console.py +0 -0
  35. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/cli/rendering/logger.py +0 -0
  36. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/cli/rendering/panels.py +0 -0
  37. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/cli/rendering/tables.py +0 -0
  38. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/cli/scaffold.py +0 -0
  39. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/config/__init__.py +0 -0
  40. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/config/parser.py +0 -0
  41. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/config/schema.py +0 -0
  42. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/config/secrets.py +0 -0
  43. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/core/lineage.py +0 -0
  44. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/core/state.py +0 -0
  45. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/core/workflow.py +0 -0
  46. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/mcp/__init__.py +0 -0
  47. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/plugins/__init__.py +0 -0
  48. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/plugins/ai/__init__.py +0 -0
  49. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/plugins/ai/chunking.py +0 -0
  50. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/plugins/ai/embeddings.py +0 -0
  51. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/plugins/base.py +0 -0
  52. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/plugins/governance/__init__.py +0 -0
  53. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/plugins/governance/catalog.py +0 -0
  54. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/plugins/governance/classification.py +0 -0
  55. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/plugins/governance/lineage_store.py +0 -0
  56. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/plugins/governance/rules.py +0 -0
  57. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/plugins/local_fs.py +0 -0
  58. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/plugins/manager.py +0 -0
  59. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/plugins/postgres.py +0 -0
  60. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/plugins/quality/__init__.py +0 -0
  61. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/plugins/quality/contract.py +0 -0
  62. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/plugins/quality/expectations.py +0 -0
  63. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/plugins/quality/report.py +0 -0
  64. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/plugins/quality/validator.py +0 -0
  65. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/plugins/rest.py +0 -0
  66. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/plugins/transform/__init__.py +0 -0
  67. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/plugins/transform/spark.py +0 -0
  68. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/plugins/vector/__init__.py +0 -0
  69. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/plugins/vector/base.py +0 -0
  70. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/plugins/vector/qdrant.py +0 -0
  71. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/telemetry/instrumentation.py +0 -0
  72. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/tui/__init__.py +0 -0
  73. {aptdata-0.0.2 → aptdata-0.0.3}/aptdata/tui/monitor.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aptdata
3
- Version: 0.0.2
3
+ Version: 0.0.3
4
4
  Summary: A declarative, extensible framework for building smart data pipelines in Python
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -17,12 +17,13 @@ Classifier: Programming Language :: Python :: 3.12
17
17
  Classifier: Programming Language :: Python :: 3.13
18
18
  Classifier: Programming Language :: Python :: 3.14
19
19
  Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
20
+ Provides-Extra: ai
20
21
  Provides-Extra: all
21
22
  Provides-Extra: pandas
22
23
  Provides-Extra: plugins
23
24
  Provides-Extra: spark
24
25
  Requires-Dist: httpx (>=0.27,<0.28) ; extra == "plugins" or extra == "all"
25
- Requires-Dist: mcp (>=1.26.0,<2.0.0)
26
+ Requires-Dist: mcp (>=1.26.0,<2.0.0) ; extra == "ai" or extra == "all"
26
27
  Requires-Dist: opentelemetry-api (>=1.40.0,<2.0.0)
27
28
  Requires-Dist: opentelemetry-sdk (>=1.40.0,<2.0.0)
28
29
  Requires-Dist: pandas (>=2.2,<3.0) ; extra == "pandas" or extra == "all"
@@ -95,6 +96,7 @@ pip install aptdata
95
96
  pip install aptdata[pandas] # pandas support
96
97
  pip install aptdata[spark] # PySpark support
97
98
  pip install aptdata[plugins] # REST, PostgreSQL, Parquet I/O
99
+ pip install aptdata[ai] # MCP server for AI agents
98
100
  pip install aptdata[all] # everything
99
101
  ```
100
102
 
@@ -273,6 +275,23 @@ See [Governance docs](docs/governance.md) for the full API.
273
275
 
274
276
  ---
275
277
 
278
+ ## AI Agents & MCP Server
279
+
280
+ aptdata ships with a built-in [Model Context Protocol](https://modelcontextprotocol.io/) server (`mcp-start`). This transforms AI assistants (like Claude, Copilot, or Devin) into autonomous data engineers with direct access to:
281
+
282
+ - **Pipeline Execution:** Trigger and monitor data flows (`run_flow`).
283
+ - **Data Quality:** Audit the latest quality test results (`quality://reports/...`).
284
+ - **Data Governance:** Read business rules to prevent violations (`governance://rules`).
285
+ - **Lineage:** Trace upstream dependencies and column-level provenance (`get_pipeline_lineage`).
286
+
287
+ ```bash
288
+ aptdata mcp-start --transport stdio
289
+ ```
290
+
291
+ See the [MCP Documentation](docs/mcp.md) for setup instructions.
292
+
293
+ ---
294
+
276
295
  ## Release process
277
296
 
278
297
  Releases are automated via the [Release workflow](.github/workflows/release.yml).
@@ -51,6 +51,7 @@ pip install aptdata
51
51
  pip install aptdata[pandas] # pandas support
52
52
  pip install aptdata[spark] # PySpark support
53
53
  pip install aptdata[plugins] # REST, PostgreSQL, Parquet I/O
54
+ pip install aptdata[ai] # MCP server for AI agents
54
55
  pip install aptdata[all] # everything
55
56
  ```
56
57
 
@@ -229,6 +230,23 @@ See [Governance docs](docs/governance.md) for the full API.
229
230
 
230
231
  ---
231
232
 
233
+ ## AI Agents & MCP Server
234
+
235
+ aptdata ships with a built-in [Model Context Protocol](https://modelcontextprotocol.io/) server (`mcp-start`). This transforms AI assistants (like Claude, Copilot, or Devin) into autonomous data engineers with direct access to:
236
+
237
+ - **Pipeline Execution:** Trigger and monitor data flows (`run_flow`).
238
+ - **Data Quality:** Audit the latest quality test results (`quality://reports/...`).
239
+ - **Data Governance:** Read business rules to prevent violations (`governance://rules`).
240
+ - **Lineage:** Trace upstream dependencies and column-level provenance (`get_pipeline_lineage`).
241
+
242
+ ```bash
243
+ aptdata mcp-start --transport stdio
244
+ ```
245
+
246
+ See the [MCP Documentation](docs/mcp.md) for setup instructions.
247
+
248
+ ---
249
+
232
250
  ## Release process
233
251
 
234
252
  Releases are automated via the [Release workflow](.github/workflows/release.yml).
@@ -1,3 +1,3 @@
1
1
  """aptdata: A framework for smart data pipelines."""
2
2
 
3
- __version__ = "0.0.2"
3
+ __version__ = "0.0.3"
@@ -47,6 +47,25 @@ def _find_mesh_yaml(directory: Path) -> Path | None: # noqa: UP007
47
47
  return candidate if candidate.exists() else None
48
48
 
49
49
 
50
+ def _resolve_mesh_file(root: Path, component: str) -> Path | None: # noqa: UP007
51
+ """Find mesh.yaml for the given component (by name or direct path)."""
52
+ component_path = root / component
53
+ if component_path.is_dir():
54
+ mesh_file = _find_mesh_yaml(component_path)
55
+ if mesh_file:
56
+ return mesh_file
57
+
58
+ for candidate in root.rglob(_MESH_FILE):
59
+ try:
60
+ data = _load_mesh(candidate)
61
+ if data.get("component") == component:
62
+ return candidate
63
+ except Exception: # noqa: BLE001
64
+ continue
65
+
66
+ return None
67
+
68
+
50
69
  @mesh_app.command("list")
51
70
  def mesh_list(
52
71
  directory: Path = typer.Option(
@@ -152,20 +171,7 @@ def mesh_run(
152
171
  console = SmartConsole(json_mode=json_mode)
153
172
  root = directory.resolve()
154
173
 
155
- # Find mesh.yaml for the given component (by name or direct path)
156
- mesh_file: Path | None = None # noqa: UP007
157
- component_path = root / component
158
- if component_path.is_dir():
159
- mesh_file = _find_mesh_yaml(component_path)
160
- if mesh_file is None:
161
- for candidate in root.rglob(_MESH_FILE):
162
- try:
163
- data = _load_mesh(candidate)
164
- if data.get("component") == component:
165
- mesh_file = candidate
166
- break
167
- except Exception: # noqa: BLE001
168
- continue
174
+ mesh_file = _resolve_mesh_file(root, component)
169
175
 
170
176
  if mesh_file is None:
171
177
  msg = f"Component '{component}' not found. No mesh.yaml located under '{root}'."
@@ -271,19 +277,7 @@ def mesh_build(
271
277
  console = SmartConsole(json_mode=json_mode)
272
278
  root = directory.resolve()
273
279
 
274
- mesh_file: Path | None = None # noqa: UP007
275
- component_path = root / component
276
- if component_path.is_dir():
277
- mesh_file = _find_mesh_yaml(component_path)
278
- if mesh_file is None:
279
- for candidate in root.rglob(_MESH_FILE):
280
- try:
281
- data = _load_mesh(candidate)
282
- if data.get("component") == component:
283
- mesh_file = candidate
284
- break
285
- except Exception: # noqa: BLE001
286
- continue
280
+ mesh_file = _resolve_mesh_file(root, component)
287
281
 
288
282
  if mesh_file is None:
289
283
  msg = f"Component '{component}' not found. No mesh.yaml located under '{root}'."
@@ -1,7 +1,12 @@
1
1
  """Core interfaces and base classes for aptdata."""
2
2
 
3
- from aptdata.core.context import ExecutionContext
4
- from aptdata.core.dataset import BaseDataset, IDataset
3
+ from aptdata.core.context import ExecutionContext, IContext
4
+ from aptdata.core.dataset import (
5
+ BaseDataset,
6
+ DataContractError,
7
+ IDataset,
8
+ PydanticDataset,
9
+ )
5
10
  from aptdata.core.state import StateBackend
6
11
  from aptdata.core.system import (
7
12
  BaseComponent,
@@ -26,7 +31,10 @@ from aptdata.core.workflow import (
26
31
  __all__ = [
27
32
  "IDataset",
28
33
  "BaseDataset",
34
+ "PydanticDataset",
35
+ "DataContractError",
29
36
  "ExecutionContext",
37
+ "IContext",
30
38
  "ComponentKind",
31
39
  "ComponentMeta",
32
40
  "IComponent",
@@ -0,0 +1,95 @@
1
+ """Injectable execution context for state sharing across runs."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from abc import ABC, abstractmethod
7
+ from dataclasses import field
8
+ from typing import Any
9
+
10
+ from pydantic.dataclasses import dataclass as pydantic_dataclass
11
+
12
+ from aptdata.core.events import EventBus, IEventBus
13
+ from aptdata.telemetry import TelemetryProvider
14
+
15
+
16
+ class IContext(ABC):
17
+ """Interface for an execution context with logging and telemetry."""
18
+
19
+ @property
20
+ @abstractmethod
21
+ def logger(self) -> logging.Logger:
22
+ """Structured logger for this context."""
23
+
24
+ @property
25
+ @abstractmethod
26
+ def telemetry(self) -> TelemetryProvider:
27
+ """Telemetry provider for this context."""
28
+
29
+ @property
30
+ @abstractmethod
31
+ def event_bus(self) -> IEventBus:
32
+ """Event bus for decoupled communication and observability."""
33
+
34
+ @abstractmethod
35
+ def get(self, key: str, default: Any = None) -> Any:
36
+ """Return value for *key* or *default* if not present."""
37
+
38
+ @abstractmethod
39
+ def set(self, key: str, value: Any) -> None:
40
+ """Store *value* under *key*."""
41
+
42
+ @abstractmethod
43
+ def update(self, values: dict[str, Any]) -> None:
44
+ """Merge mapping into memory state."""
45
+
46
+ @abstractmethod
47
+ def clear(self) -> None:
48
+ """Remove all state."""
49
+
50
+
51
+ from pydantic import ConfigDict # noqa: E402
52
+
53
+
54
+ @pydantic_dataclass(config=ConfigDict(arbitrary_types_allowed=True))
55
+ class ExecutionContext(IContext):
56
+ """Simple in-memory key/value state container."""
57
+
58
+ memory: dict[str, Any] = field(default_factory=dict)
59
+ _logger: logging.Logger | None = field(default=None, init=False, repr=False)
60
+ _telemetry: TelemetryProvider | None = field(default=None, init=False, repr=False)
61
+ _event_bus: IEventBus | None = field(default=None, init=False, repr=False)
62
+
63
+ @property
64
+ def logger(self) -> logging.Logger:
65
+ if self._logger is None:
66
+ self._logger = logging.getLogger("aptdata.context")
67
+ return self._logger
68
+
69
+ @property
70
+ def telemetry(self) -> TelemetryProvider:
71
+ if self._telemetry is None:
72
+ self._telemetry = TelemetryProvider.get_instance()
73
+ return self._telemetry
74
+
75
+ @property
76
+ def event_bus(self) -> IEventBus:
77
+ if self._event_bus is None:
78
+ self._event_bus = EventBus()
79
+ return self._event_bus
80
+
81
+ @event_bus.setter
82
+ def event_bus(self, bus: IEventBus) -> None:
83
+ self._event_bus = bus
84
+
85
+ def get(self, key: str, default: Any = None) -> Any:
86
+ return self.memory.get(key, default)
87
+
88
+ def set(self, key: str, value: Any) -> None:
89
+ self.memory[key] = value
90
+
91
+ def update(self, values: dict[str, Any]) -> None:
92
+ self.memory.update(values)
93
+
94
+ def clear(self) -> None:
95
+ self.memory.clear()
@@ -0,0 +1,121 @@
1
+ """Dataset interface and base class."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from dataclasses import dataclass, field
5
+ from typing import Any, Generic, TypeVar
6
+
7
+ from pydantic import BaseModel
8
+ from pydantic.dataclasses import dataclass as pydantic_dataclass
9
+
10
+
11
+ class DataContractError(Exception):
12
+ """Exception raised when dataset data does not conform to the expected Pydantic
13
+ contract."""
14
+
15
+ pass
16
+
17
+
18
+ @dataclass
19
+ class IDataset(ABC):
20
+ """Dataclass interface for dataset types.
21
+
22
+ All dataset contracts must implement :meth:`read` and :meth:`write`.
23
+ No concrete fields are defined here – field declarations live in
24
+ :class:`BaseDataset` and its subclasses.
25
+ """
26
+
27
+ @abstractmethod
28
+ def read(self) -> Any:
29
+ """Read and return data from the dataset."""
30
+
31
+ @abstractmethod
32
+ def write(self, data: Any) -> None:
33
+ """Write data to the dataset."""
34
+
35
+
36
+ @pydantic_dataclass
37
+ class BaseDataset(IDataset):
38
+ """Base dataset with Pydantic-validated fields.
39
+
40
+ Provides the canonical ``uri`` and ``schema_metadata`` fields.
41
+ Concrete dataset implementations must inherit from this class and
42
+ implement the :meth:`read` and :meth:`write` abstract methods
43
+ inherited from :class:`IDataset`.
44
+ """
45
+
46
+ uri: str
47
+ schema_metadata: dict[str, Any] = field(default_factory=dict)
48
+
49
+
50
+ T = TypeVar("T", bound=BaseModel)
51
+
52
+
53
+ @pydantic_dataclass
54
+ class PydanticDataset(BaseDataset, Generic[T]):
55
+ """A dataset implementation that enforces a Pydantic model contract.
56
+
57
+ Data validation is performed when data is written to or read from the dataset.
58
+ This implementation optimized for Pandas dataframes by converting the Pydantic
59
+ schema into pandas dtypes, ensuring fail-fast execution without row-by-row
60
+ iteration.
61
+ """
62
+
63
+ contract: type[T] | None = field(default=None)
64
+ _data: Any = field(default=None, init=False, repr=False)
65
+
66
+ def read(self) -> Any:
67
+ return self._data
68
+
69
+ def write(self, data: Any) -> None:
70
+ if self.contract is not None:
71
+ self._validate(data)
72
+ self._data = data
73
+
74
+ def _validate(self, data: Any) -> None:
75
+ """Validates the input data against the configured Pydantic contract."""
76
+ if self.contract is None:
77
+ return
78
+
79
+ try:
80
+ import pandas as pd
81
+ except ImportError:
82
+ # Fallback to pure pydantic if pandas isn't installed.
83
+ # We assume data is a list of dicts.
84
+ if isinstance(data, list):
85
+ for row in data:
86
+ try:
87
+ self.contract.model_validate(row)
88
+ except Exception as e:
89
+ raise DataContractError(f"Validation failed for row {row}: {e}")
90
+ return
91
+
92
+ if isinstance(data, pd.DataFrame):
93
+ # Optimised pandas validation
94
+ schema_fields = self.contract.model_fields
95
+ actual_columns = set(data.columns)
96
+
97
+ # Check for missing columns
98
+ required_columns = {k for k, v in schema_fields.items() if v.is_required()}
99
+ missing_required = required_columns - actual_columns
100
+ if missing_required:
101
+ raise DataContractError(
102
+ f"DataFrame is missing required columns: {missing_required}"
103
+ )
104
+
105
+ # Optionally check types (fail-fast type checking without row-by-row)
106
+ # This is a basic conversion check
107
+ for col, field_info in schema_fields.items():
108
+ if col in data.columns:
109
+ # In a real-world scenario, we would map pydantic types to
110
+ # numpy/pandas dtypes and ensure the types match perfectly.
111
+ # For now we rely on missing columns and basic null checks.
112
+ if field_info.is_required() and data[col].isnull().any():
113
+ raise DataContractError(
114
+ f"Column '{col}' contains null values but is required."
115
+ )
116
+ elif isinstance(data, list):
117
+ for row in data:
118
+ try:
119
+ self.contract.model_validate(row)
120
+ except Exception as e:
121
+ raise DataContractError(f"Validation failed for row {row}: {e}")
@@ -0,0 +1,140 @@
1
+ import inspect
2
+ from collections.abc import Callable
3
+ from typing import Any
4
+
5
+ from aptdata.core.context import IContext
6
+ from aptdata.core.dataset import IDataset
7
+ from aptdata.core.registry import ComponentRegistry
8
+ from aptdata.core.system import BaseComponent
9
+
10
+
11
+ class FunctionComponentAdapter(BaseComponent):
12
+ """Adapter to make a simple python function behave like a BaseComponent."""
13
+
14
+ def __init__(self, func: Callable, name: str, **kwargs: Any):
15
+ # Determine component_id. User might have passed it in kwargs via yaml builder.
16
+ # Otherwise fallback to the decorator's name
17
+ comp_id = kwargs.pop("component_id", name)
18
+ super().__init__(component_id=comp_id, **kwargs)
19
+ self._func = func
20
+
21
+ def validate_inputs(self, inputs: list[IDataset]) -> bool:
22
+ """Default validation passes everything."""
23
+ return True
24
+
25
+ def execute(self, inputs: list[IDataset]) -> list[IDataset]:
26
+ return self._execute(inputs)
27
+
28
+ def _execute(self, inputs: list[IDataset]) -> list[IDataset]:
29
+ # For simple functional components, we assume the signature can be:
30
+ # func(inputs: list[IDataset], context: IContext) -> list[IDataset]
31
+ # or just func(inputs: list[IDataset]) -> list[IDataset]
32
+
33
+ sig = inspect.signature(self._func)
34
+ kwargs = {}
35
+
36
+ # Determine if the function expects context
37
+ has_context_param = False
38
+ for param_name, param in sig.parameters.items():
39
+ if param.annotation == IContext or param_name == "context":
40
+ kwargs[param_name] = self.context
41
+ has_context_param = True
42
+ elif param_name == "inputs":
43
+ kwargs[param_name] = inputs
44
+
45
+ # If the parameter isn't explicitly named "inputs", we'll just pass inputs
46
+ # as the first arg if it takes positional args
47
+ if "inputs" not in kwargs and len(sig.parameters) > 0:
48
+ first_param = list(sig.parameters.keys())[0]
49
+ if first_param != "context" or not has_context_param:
50
+ kwargs[first_param] = inputs
51
+
52
+ return self._func(**kwargs)
53
+
54
+
55
+ def component(name: str | None = None) -> Callable:
56
+ """Decorator to register a component class or a function in the global
57
+ ComponentRegistry.
58
+
59
+ If used on a function, it wraps it in an adapter that implements BaseComponent.
60
+ """
61
+
62
+ def decorator(
63
+ target: type[BaseComponent] | Callable,
64
+ ) -> type[BaseComponent] | Callable:
65
+ # Determine registry name
66
+ registry_name = name or target.__name__
67
+
68
+ if isinstance(target, type) and issubclass(target, BaseComponent):
69
+ # Target is a component class
70
+ ComponentRegistry.register(registry_name, target)
71
+ return target
72
+ else:
73
+ # Target is a function
74
+ # We must create a dynamically generated subclass to
75
+ # easily instantiate later.
76
+ class DynamicFunctionComponent(FunctionComponentAdapter):
77
+ def __init__(self, **kwargs):
78
+ super().__init__(func=target, name=registry_name, **kwargs)
79
+
80
+ # Change the __name__ to match
81
+ DynamicFunctionComponent.__name__ = target.__name__ + "Component"
82
+ ComponentRegistry.register(registry_name, DynamicFunctionComponent)
83
+ return target
84
+
85
+ return decorator
86
+
87
+
88
+ def pandas_component(name: str | None = None) -> Callable:
89
+ """Decorator to register a pandas-specific function in the global
90
+ ComponentRegistry.
91
+
92
+ The decorated function should take a pd.DataFrame and optionally an IContext,
93
+ and return a pd.DataFrame. The adapter will handle unwrapping/wrapping IDataset.
94
+ """
95
+
96
+ def decorator(target: Callable) -> Callable:
97
+ registry_name = name or target.__name__
98
+
99
+ class DynamicPandasComponent(FunctionComponentAdapter):
100
+ def __init__(self, **kwargs):
101
+ super().__init__(func=target, name=registry_name, **kwargs)
102
+
103
+ def _execute(self, inputs: list[IDataset]) -> list[IDataset]:
104
+ from aptdata.plugins.dataset import InMemoryDataset
105
+
106
+ if not inputs:
107
+ raise ValueError(
108
+ f"Pandas component '{self.component_id}' "
109
+ "requires at least one input dataset."
110
+ )
111
+
112
+ # Unwrap the first input dataset to a pandas DataFrame
113
+ df = inputs[0].read()
114
+
115
+ sig = inspect.signature(self._func)
116
+ kwargs = {}
117
+
118
+ for param_name, param in sig.parameters.items():
119
+ if param.annotation == IContext or param_name == "context":
120
+ kwargs[param_name] = self.context
121
+ else:
122
+ kwargs[param_name] = df
123
+
124
+ if len(kwargs) == 0 and len(sig.parameters) > 0:
125
+ first_param = list(sig.parameters.keys())[0]
126
+ kwargs[first_param] = df
127
+
128
+ # Execute user function
129
+ result_df = self._func(**kwargs)
130
+
131
+ # Wrap the result back into an IDataset
132
+ out_ds = InMemoryDataset(uri=f"memory://{self.component_id}_out")
133
+ out_ds.write(result_df)
134
+ return [out_ds]
135
+
136
+ DynamicPandasComponent.__name__ = target.__name__ + "PandasComponent"
137
+ ComponentRegistry.register(registry_name, DynamicPandasComponent)
138
+ return target
139
+
140
+ return decorator
@@ -0,0 +1,104 @@
1
+ """Event Bus and Lifecycle Hooks for observing system execution."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import queue
7
+ import threading
8
+ from abc import ABC, abstractmethod
9
+ from collections.abc import Callable
10
+ from datetime import datetime, timezone
11
+
12
+ from pydantic import BaseModel, ConfigDict, Field
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class EventPayload(BaseModel):
18
+ """Base class for all events emitted by the framework.
19
+ All events must be serializable to JSON Lines for TUI/MCP."""
20
+
21
+ model_config = ConfigDict(extra="allow")
22
+
23
+ event_type: str = Field(..., description="The type/topic of the event.")
24
+ timestamp: datetime = Field(
25
+ default_factory=lambda: datetime.now(timezone.utc),
26
+ description="When the event occurred.",
27
+ )
28
+
29
+
30
+ class ComponentExecutionEvent(EventPayload):
31
+ """Event emitted during a component's lifecycle."""
32
+
33
+ component_id: str
34
+ status: str
35
+ execution_time: float | None = None
36
+ io_uris: list[str] = Field(default_factory=list)
37
+ error_message: str | None = None
38
+
39
+
40
+ class IEventBus(ABC):
41
+ """Interface for an Event Bus."""
42
+
43
+ @abstractmethod
44
+ def subscribe(
45
+ self, event_type: str, listener: Callable[[EventPayload], None]
46
+ ) -> None:
47
+ """Register a listener for a specific event type."""
48
+ pass
49
+
50
+ @abstractmethod
51
+ def dispatch(self, event: EventPayload) -> None:
52
+ """Dispatch an event asynchronously or non-blocking."""
53
+ pass
54
+
55
+
56
+ class EventBus(IEventBus):
57
+ """An asynchronous, non-blocking event bus for the core system.
58
+ Dispatches events using a background thread and a thread-safe queue.
59
+ Exceptions in listeners are caught and logged as warnings to prevent
60
+ blocking data processing or the background thread."""
61
+
62
+ def __init__(self) -> None:
63
+ self._listeners: dict[str, list[Callable[[EventPayload], None]]] = {}
64
+ self._queue: queue.Queue[EventPayload] = queue.Queue()
65
+ self._stop_event = threading.Event()
66
+ self._worker_thread = threading.Thread(target=self._worker, daemon=True)
67
+ self._worker_thread.start()
68
+
69
+ def subscribe(
70
+ self, event_type: str, listener: Callable[[EventPayload], None]
71
+ ) -> None:
72
+ if event_type not in self._listeners:
73
+ self._listeners[event_type] = []
74
+ self._listeners[event_type].append(listener)
75
+
76
+ def dispatch(self, event: EventPayload) -> None:
77
+ """Enqueue event for background dispatch. Non-blocking."""
78
+ self._queue.put(event)
79
+
80
+ def _worker(self) -> None:
81
+ """Background worker to process events from the queue."""
82
+ while not self._stop_event.is_set():
83
+ try:
84
+ # Use a timeout to allow checking _stop_event periodically
85
+ event = self._queue.get(timeout=0.1)
86
+ except queue.Empty:
87
+ continue
88
+
89
+ listeners = self._listeners.get(event.event_type, [])
90
+ for listener in listeners:
91
+ try:
92
+ listener(event)
93
+ except Exception as e:
94
+ logger.warning(
95
+ f"Listener {getattr(listener, '__name__', str(listener))} "
96
+ f"failed on event {event.event_type}: {e}"
97
+ )
98
+ self._queue.task_done()
99
+
100
+ def shutdown(self, timeout: float | None = None) -> None:
101
+ """Wait for all events to be processed and shut down the worker thread."""
102
+ self._queue.join()
103
+ self._stop_event.set()
104
+ self._worker_thread.join(timeout=timeout)
@@ -0,0 +1,31 @@
1
+ import logging
2
+
3
+ from aptdata.core.system import BaseComponent
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+
8
+ class ComponentRegistry:
9
+ """Global registry for dynamically registering and resolving components by name."""
10
+
11
+ _components: dict[str, type[BaseComponent]] = {}
12
+
13
+ @classmethod
14
+ def register(cls, name: str, component_class: type[BaseComponent]) -> None:
15
+ """Register a component class with a specific name."""
16
+ if name in cls._components:
17
+ logger.warning(f"Component '{name}' is already registered. Overwriting.")
18
+ cls._components[name] = component_class
19
+ logger.debug(f"Registered component '{name}' -> {component_class.__name__}")
20
+
21
+ @classmethod
22
+ def get(cls, name: str) -> type[BaseComponent]:
23
+ """Retrieve a component class by name."""
24
+ if name not in cls._components:
25
+ raise KeyError(f"Component '{name}' is not registered.")
26
+ return cls._components[name]
27
+
28
+ @classmethod
29
+ def clear(cls) -> None:
30
+ """Clear all registered components."""
31
+ cls._components.clear()