aptdata 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. aptdata/__init__.py +3 -0
  2. aptdata/cli/__init__.py +5 -0
  3. aptdata/cli/app.py +247 -0
  4. aptdata/cli/commands/__init__.py +9 -0
  5. aptdata/cli/commands/config_cmd.py +128 -0
  6. aptdata/cli/commands/mesh_cmd.py +435 -0
  7. aptdata/cli/commands/plugin_cmd.py +107 -0
  8. aptdata/cli/commands/system_cmd.py +90 -0
  9. aptdata/cli/commands/telemetry_cmd.py +57 -0
  10. aptdata/cli/completions.py +56 -0
  11. aptdata/cli/interactive.py +269 -0
  12. aptdata/cli/rendering/__init__.py +31 -0
  13. aptdata/cli/rendering/console.py +119 -0
  14. aptdata/cli/rendering/logger.py +26 -0
  15. aptdata/cli/rendering/panels.py +87 -0
  16. aptdata/cli/rendering/tables.py +81 -0
  17. aptdata/cli/scaffold.py +1089 -0
  18. aptdata/config/__init__.py +13 -0
  19. aptdata/config/parser.py +136 -0
  20. aptdata/config/schema.py +27 -0
  21. aptdata/config/secrets.py +60 -0
  22. aptdata/core/__init__.py +46 -0
  23. aptdata/core/context.py +31 -0
  24. aptdata/core/dataset.py +39 -0
  25. aptdata/core/lineage.py +213 -0
  26. aptdata/core/state.py +27 -0
  27. aptdata/core/system.py +317 -0
  28. aptdata/core/workflow.py +372 -0
  29. aptdata/mcp/__init__.py +5 -0
  30. aptdata/mcp/server.py +198 -0
  31. aptdata/plugins/__init__.py +77 -0
  32. aptdata/plugins/ai/__init__.py +6 -0
  33. aptdata/plugins/ai/chunking.py +66 -0
  34. aptdata/plugins/ai/embeddings.py +56 -0
  35. aptdata/plugins/base.py +57 -0
  36. aptdata/plugins/dataset.py +62 -0
  37. aptdata/plugins/governance/__init__.py +32 -0
  38. aptdata/plugins/governance/catalog.py +115 -0
  39. aptdata/plugins/governance/classification.py +44 -0
  40. aptdata/plugins/governance/lineage_store.py +49 -0
  41. aptdata/plugins/governance/rules.py +180 -0
  42. aptdata/plugins/local_fs.py +241 -0
  43. aptdata/plugins/manager.py +142 -0
  44. aptdata/plugins/postgres.py +113 -0
  45. aptdata/plugins/quality/__init__.py +39 -0
  46. aptdata/plugins/quality/contract.py +128 -0
  47. aptdata/plugins/quality/expectations.py +310 -0
  48. aptdata/plugins/quality/report.py +94 -0
  49. aptdata/plugins/quality/validator.py +139 -0
  50. aptdata/plugins/rest.py +135 -0
  51. aptdata/plugins/transform/__init__.py +14 -0
  52. aptdata/plugins/transform/pandas.py +129 -0
  53. aptdata/plugins/transform/spark.py +134 -0
  54. aptdata/plugins/vector/__init__.py +6 -0
  55. aptdata/plugins/vector/base.py +19 -0
  56. aptdata/plugins/vector/qdrant.py +41 -0
  57. aptdata/telemetry/__init__.py +5 -0
  58. aptdata/telemetry/instrumentation.py +164 -0
  59. aptdata/tui/__init__.py +5 -0
  60. aptdata/tui/monitor.py +279 -0
  61. aptdata-0.0.2.dist-info/METADATA +330 -0
  62. aptdata-0.0.2.dist-info/RECORD +65 -0
  63. aptdata-0.0.2.dist-info/WHEEL +4 -0
  64. aptdata-0.0.2.dist-info/entry_points.txt +3 -0
  65. aptdata-0.0.2.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,44 @@
1
+ """Data classification convenience re-exports and policy definitions."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from typing import Any
7
+
8
+ # Re-export for convenience so callers can do:
9
+ # from aptdata.plugins.governance.classification import ColumnClassification
10
+ from aptdata.plugins.quality.contract import ColumnClassification
11
+
12
+
13
+ @dataclass
14
+ class DataClassificationPolicy:
15
+ """Policy defining how data with a specific classification should be handled.
16
+
17
+ Parameters
18
+ ----------
19
+ name:
20
+ Policy name.
21
+ description:
22
+ Human-readable description of the policy.
23
+ pii_columns:
24
+ Column names that contain personally identifiable information.
25
+ retention_days:
26
+ Required data retention period in days.
27
+ encryption_required:
28
+ Whether data at rest must be encrypted.
29
+ access_roles:
30
+ Roles permitted to access data governed by this policy.
31
+ metadata:
32
+ Arbitrary extra metadata.
33
+ """
34
+
35
+ name: str
36
+ description: str = ""
37
+ pii_columns: list[str] = field(default_factory=list)
38
+ retention_days: int = 0
39
+ encryption_required: bool = False
40
+ access_roles: list[str] = field(default_factory=list)
41
+ metadata: dict[str, Any] = field(default_factory=dict)
42
+
43
+
44
+ __all__ = ["ColumnClassification", "DataClassificationPolicy"]
@@ -0,0 +1,49 @@
1
+ """In-memory lineage graph store for persisting and querying workflow runs."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from aptdata.core.lineage import LineageGraph
6
+
7
+
8
+ class LineageStore:
9
+ """In-memory store for :class:`~aptdata.core.lineage.LineageGraph` objects.
10
+
11
+ Each workflow run produces exactly one :class:`LineageGraph` which is
12
+ saved under its :attr:`~LineageGraph.run_id`.
13
+
14
+ Examples
15
+ --------
16
+ ::
17
+
18
+ store = LineageStore()
19
+ store.save(graph)
20
+ loaded = store.load(run_id)
21
+ runs = store.list_runs()
22
+ graphs = store.query_by_dataset("s3://bucket/data.parquet")
23
+ """
24
+
25
+ def __init__(self) -> None:
26
+ self._store: dict[str, LineageGraph] = {}
27
+
28
+ def save(self, graph: LineageGraph) -> None:
29
+ """Persist *graph* under its :attr:`~LineageGraph.run_id`."""
30
+ self._store[graph.run_id] = graph
31
+
32
+ def load(self, run_id: str) -> LineageGraph | None:
33
+ """Return the graph for *run_id*, or ``None`` if not found."""
34
+ return self._store.get(run_id)
35
+
36
+ def list_runs(self) -> list[str]:
37
+ """Return a sorted list of all stored run IDs."""
38
+ return sorted(self._store)
39
+
40
+ def query_by_dataset(self, uri: str) -> list[LineageGraph]:
41
+ """Return all graphs that contain at least one node referencing *uri*."""
42
+ return [
43
+ graph
44
+ for graph in self._store.values()
45
+ if any(node.dataset_uri == uri for node in graph.nodes)
46
+ ]
47
+
48
+
49
+ __all__ = ["LineageStore"]
@@ -0,0 +1,180 @@
1
+ """Business rules registry with audit logging."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from datetime import datetime, timezone
7
+ from enum import Enum
8
+ from typing import Any
9
+
10
+
11
+ class RuleStatus(str, Enum):
12
+ """Execution status of a business rule application."""
13
+
14
+ APPLIED = "APPLIED"
15
+ SKIPPED = "SKIPPED"
16
+ FAILED = "FAILED"
17
+
18
+
19
+ @dataclass
20
+ class BusinessRule:
21
+ """Declaration of a single business rule.
22
+
23
+ Parameters
24
+ ----------
25
+ rule_id:
26
+ Unique identifier (e.g. ``"BR-001"``).
27
+ name:
28
+ Human-readable rule name.
29
+ version:
30
+ Semantic version string (e.g. ``"1.0.0"``).
31
+ owner:
32
+ Team or person responsible for this rule.
33
+ description:
34
+ Detailed description of what the rule enforces.
35
+ expression:
36
+ Human-readable expression or pseudo-code representing the rule logic.
37
+ tags:
38
+ Free-form classification tags.
39
+ effective_from:
40
+ ISO-8601 date/time from which the rule is effective.
41
+ effective_until:
42
+ ISO-8601 date/time after which the rule expires (empty = no expiry).
43
+ metadata:
44
+ Arbitrary extra metadata.
45
+ """
46
+
47
+ rule_id: str
48
+ name: str
49
+ version: str = "1.0.0"
50
+ owner: str = ""
51
+ description: str = ""
52
+ expression: str = ""
53
+ tags: list[str] = field(default_factory=list)
54
+ effective_from: str = field(
55
+ default_factory=lambda: datetime.now(timezone.utc).isoformat()
56
+ )
57
+ effective_until: str = ""
58
+ metadata: dict[str, Any] = field(default_factory=dict)
59
+
60
+
61
+ @dataclass
62
+ class RuleAuditEntry:
63
+ """Audit record for a single rule application.
64
+
65
+ Parameters
66
+ ----------
67
+ rule_id:
68
+ ID of the rule that was applied.
69
+ rule_version:
70
+ Version of the rule that was applied.
71
+ status:
72
+ Outcome of the rule execution.
73
+ workflow_name:
74
+ Workflow in which the rule was executed.
75
+ step_name:
76
+ Step within the workflow.
77
+ trace_id:
78
+ OpenTelemetry trace identifier.
79
+ timestamp:
80
+ UTC ISO-8601 timestamp of the audit entry.
81
+ rows_affected:
82
+ Number of rows affected by the rule.
83
+ details:
84
+ Human-readable summary of what the rule did.
85
+ metadata:
86
+ Arbitrary extra metadata.
87
+ """
88
+
89
+ rule_id: str
90
+ rule_version: str = "1.0.0"
91
+ status: RuleStatus = RuleStatus.APPLIED
92
+ workflow_name: str = ""
93
+ step_name: str = ""
94
+ trace_id: str = ""
95
+ timestamp: str = field(
96
+ default_factory=lambda: datetime.now(timezone.utc).isoformat()
97
+ )
98
+ rows_affected: int = 0
99
+ details: str = ""
100
+ metadata: dict[str, Any] = field(default_factory=dict)
101
+
102
+
103
+ class RuleRegistry:
104
+ """In-memory registry for :class:`BusinessRule` definitions and audit logs.
105
+
106
+ Examples
107
+ --------
108
+ ::
109
+
110
+ registry = RuleRegistry()
111
+ registry.register(BusinessRule(rule_id="BR-001", name="Age must be positive"))
112
+ rule = registry.get("BR-001")
113
+ rules = registry.list_rules(tag="finance")
114
+ """
115
+
116
+ def __init__(self) -> None:
117
+ self._rules: dict[str, BusinessRule] = {}
118
+ self._audit_log: list[RuleAuditEntry] = []
119
+
120
+ def register(self, rule: BusinessRule) -> None:
121
+ """Register *rule* under its :attr:`~BusinessRule.rule_id`."""
122
+ self._rules[rule.rule_id] = rule
123
+
124
+ def get(self, rule_id: str) -> BusinessRule | None:
125
+ """Return the rule with *rule_id*, or ``None`` if not found."""
126
+ return self._rules.get(rule_id)
127
+
128
+ def list_rules(
129
+ self,
130
+ owner: str | None = None,
131
+ tag: str | None = None,
132
+ ) -> list[BusinessRule]:
133
+ """Return all registered rules, optionally filtered by *owner* or *tag*.
134
+
135
+ Parameters
136
+ ----------
137
+ owner:
138
+ If provided, only rules owned by this owner are returned.
139
+ tag:
140
+ If provided, only rules with this tag are returned.
141
+ """
142
+ results = list(self._rules.values())
143
+ if owner is not None:
144
+ results = [r for r in results if r.owner == owner]
145
+ if tag is not None:
146
+ results = [r for r in results if tag in r.tags]
147
+ return results
148
+
149
+ def record_audit(self, entry: RuleAuditEntry) -> None:
150
+ """Append *entry* to the audit log."""
151
+ self._audit_log.append(entry)
152
+
153
+ def get_audit_log(
154
+ self,
155
+ rule_id: str | None = None,
156
+ trace_id: str | None = None,
157
+ ) -> list[RuleAuditEntry]:
158
+ """Return audit entries, optionally filtered by *rule_id* or *trace_id*.
159
+
160
+ Parameters
161
+ ----------
162
+ rule_id:
163
+ If provided, only entries for this rule are returned.
164
+ trace_id:
165
+ If provided, only entries with this trace ID are returned.
166
+ """
167
+ entries = list(self._audit_log)
168
+ if rule_id is not None:
169
+ entries = [e for e in entries if e.rule_id == rule_id]
170
+ if trace_id is not None:
171
+ entries = [e for e in entries if e.trace_id == trace_id]
172
+ return entries
173
+
174
+
175
+ __all__ = [
176
+ "RuleStatus",
177
+ "BusinessRule",
178
+ "RuleAuditEntry",
179
+ "RuleRegistry",
180
+ ]
@@ -0,0 +1,241 @@
1
+ """Local filesystem plugin — CSV, JSON, and Parquet readers / writers.
2
+
3
+ All classes produce or consume :class:`~aptdata.plugins.dataset.InMemoryDataset`
4
+ instances backed by lists of dictionaries (records).
5
+
6
+ CSV and JSON support is built-in (stdlib ``csv`` / ``json``).
7
+ Parquet support requires the optional ``pyarrow`` package; a friendly
8
+ :class:`~aptdata.plugins.manager.PluginDependencyError` is raised if
9
+ it is not installed.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import csv
15
+ import json
16
+ from pathlib import Path
17
+ from typing import Any
18
+
19
+ from aptdata.core.dataset import BaseDataset
20
+ from aptdata.plugins.base import BaseReader, BaseWriter
21
+ from aptdata.plugins.dataset import InMemoryDataset
22
+ from aptdata.plugins.manager import PluginDependencyError
23
+
24
+ # ---------------------------------------------------------------------------
25
+ # CSV
26
+ # ---------------------------------------------------------------------------
27
+
28
+
29
+ class CSVReader(BaseReader):
30
+ """Read a CSV file into an :class:`InMemoryDataset`.
31
+
32
+ Parameters
33
+ ----------
34
+ filepath:
35
+ Path to the CSV file on the local filesystem.
36
+ encoding:
37
+ File encoding (default ``"utf-8"``).
38
+ delimiter:
39
+ Column delimiter (default ``","``).
40
+ """
41
+
42
+ def __init__(
43
+ self,
44
+ filepath: str,
45
+ *,
46
+ encoding: str = "utf-8",
47
+ delimiter: str = ",",
48
+ ) -> None:
49
+ self.filepath = Path(filepath)
50
+ self.encoding = encoding
51
+ self.delimiter = delimiter
52
+
53
+ def read(self, **kwargs: Any) -> InMemoryDataset:
54
+ with open(self.filepath, newline="", encoding=self.encoding) as fh:
55
+ reader = csv.DictReader(fh, delimiter=self.delimiter)
56
+ records = list(reader)
57
+ ds = InMemoryDataset(uri=str(self.filepath))
58
+ ds.write(records)
59
+ return ds
60
+
61
+
62
+ class CSVWriter(BaseWriter):
63
+ """Write an :class:`InMemoryDataset` to a CSV file.
64
+
65
+ Parameters
66
+ ----------
67
+ filepath:
68
+ Destination path on the local filesystem.
69
+ encoding:
70
+ File encoding (default ``"utf-8"``).
71
+ delimiter:
72
+ Column delimiter (default ``","``).
73
+ """
74
+
75
+ def __init__(
76
+ self,
77
+ filepath: str,
78
+ *,
79
+ encoding: str = "utf-8",
80
+ delimiter: str = ",",
81
+ ) -> None:
82
+ self.filepath = Path(filepath)
83
+ self.encoding = encoding
84
+ self.delimiter = delimiter
85
+
86
+ def write(self, dataset: BaseDataset, **kwargs: Any) -> None:
87
+ records: list[dict[str, Any]] = dataset.read()
88
+ if not records:
89
+ self.filepath.parent.mkdir(parents=True, exist_ok=True)
90
+ self.filepath.write_text("", encoding=self.encoding)
91
+ return
92
+
93
+ fieldnames = list(records[0].keys())
94
+ self.filepath.parent.mkdir(parents=True, exist_ok=True)
95
+ with open(self.filepath, "w", newline="", encoding=self.encoding) as fh:
96
+ writer = csv.DictWriter(fh, fieldnames=fieldnames, delimiter=self.delimiter)
97
+ writer.writeheader()
98
+ writer.writerows(records)
99
+
100
+
101
+ # ---------------------------------------------------------------------------
102
+ # JSON
103
+ # ---------------------------------------------------------------------------
104
+
105
+
106
+ class JSONReader(BaseReader):
107
+ """Read a JSON file (array of objects) into an :class:`InMemoryDataset`.
108
+
109
+ Parameters
110
+ ----------
111
+ filepath:
112
+ Path to the JSON file.
113
+ encoding:
114
+ File encoding (default ``"utf-8"``).
115
+ """
116
+
117
+ def __init__(self, filepath: str, *, encoding: str = "utf-8") -> None:
118
+ self.filepath = Path(filepath)
119
+ self.encoding = encoding
120
+
121
+ def read(self, **kwargs: Any) -> InMemoryDataset:
122
+ with open(self.filepath, encoding=self.encoding) as fh:
123
+ data = json.load(fh)
124
+ if not isinstance(data, list):
125
+ raise ValueError("JSON file must contain an array of objects.")
126
+ ds = InMemoryDataset(uri=str(self.filepath))
127
+ ds.write(data)
128
+ return ds
129
+
130
+
131
+ class JSONWriter(BaseWriter):
132
+ """Write an :class:`InMemoryDataset` to a JSON file (array of objects).
133
+
134
+ Parameters
135
+ ----------
136
+ filepath:
137
+ Destination path.
138
+ encoding:
139
+ File encoding (default ``"utf-8"``).
140
+ indent:
141
+ JSON indentation level (default ``2``).
142
+ """
143
+
144
+ def __init__(
145
+ self,
146
+ filepath: str,
147
+ *,
148
+ encoding: str = "utf-8",
149
+ indent: int = 2,
150
+ ) -> None:
151
+ self.filepath = Path(filepath)
152
+ self.encoding = encoding
153
+ self.indent = indent
154
+
155
+ def write(self, dataset: BaseDataset, **kwargs: Any) -> None:
156
+ records: list[dict[str, Any]] = dataset.read()
157
+ self.filepath.parent.mkdir(parents=True, exist_ok=True)
158
+ with open(self.filepath, "w", encoding=self.encoding) as fh:
159
+ json.dump(records, fh, indent=self.indent, ensure_ascii=False)
160
+
161
+
162
+ # ---------------------------------------------------------------------------
163
+ # Parquet (optional dependency: pyarrow)
164
+ # ---------------------------------------------------------------------------
165
+
166
+
167
+ class ParquetReader(BaseReader):
168
+ """Read a Parquet file into an :class:`InMemoryDataset`.
169
+
170
+ Requires the ``pyarrow`` package.
171
+
172
+ Parameters
173
+ ----------
174
+ filepath:
175
+ Path to the ``.parquet`` file.
176
+ """
177
+
178
+ def __init__(self, filepath: str) -> None:
179
+ self.filepath = Path(filepath)
180
+
181
+ def read(self, **kwargs: Any) -> InMemoryDataset:
182
+ try:
183
+ import pyarrow.parquet as pq # noqa: WPS433
184
+ except ImportError:
185
+ raise PluginDependencyError("ParquetReader", "pyarrow") from None
186
+
187
+ table = pq.read_table(str(self.filepath))
188
+ records: list[dict[str, Any]] = table.to_pydict()
189
+ # Convert columnar dict to list-of-dicts
190
+ keys = list(records.keys())
191
+ row_count = len(records[keys[0]]) if keys else 0
192
+ rows = [{k: records[k][i] for k in keys} for i in range(row_count)]
193
+
194
+ ds = InMemoryDataset(uri=str(self.filepath))
195
+ ds.write(rows)
196
+ return ds
197
+
198
+
199
+ class ParquetWriter(BaseWriter):
200
+ """Write an :class:`InMemoryDataset` to a Parquet file.
201
+
202
+ Requires the ``pyarrow`` package.
203
+
204
+ Parameters
205
+ ----------
206
+ filepath:
207
+ Destination ``.parquet`` path.
208
+ """
209
+
210
+ def __init__(self, filepath: str) -> None:
211
+ self.filepath = Path(filepath)
212
+
213
+ def write(self, dataset: BaseDataset, **kwargs: Any) -> None:
214
+ try:
215
+ import pyarrow as pa # noqa: WPS433
216
+ import pyarrow.parquet as pq # noqa: WPS433
217
+ except ImportError:
218
+ raise PluginDependencyError("ParquetWriter", "pyarrow") from None
219
+
220
+ records: list[dict[str, Any]] = dataset.read()
221
+ if not records:
222
+ # Write an empty parquet with no columns
223
+ table = pa.table({})
224
+ else:
225
+ # Convert list-of-dicts to columnar dict
226
+ keys = list(records[0].keys())
227
+ columnar = {k: [r[k] for r in records] for k in keys}
228
+ table = pa.table(columnar)
229
+
230
+ self.filepath.parent.mkdir(parents=True, exist_ok=True)
231
+ pq.write_table(table, str(self.filepath))
232
+
233
+
234
+ __all__ = [
235
+ "CSVReader",
236
+ "CSVWriter",
237
+ "JSONReader",
238
+ "JSONWriter",
239
+ "ParquetReader",
240
+ "ParquetWriter",
241
+ ]
@@ -0,0 +1,142 @@
1
+ """Dynamic plugin manager for aptdata.
2
+
3
+ Provides :class:`PluginManager` which can discover, register, and
4
+ instantiate reader / writer plugins. If a plugin requires an
5
+ optional third-party library that is not installed, a friendly
6
+ :class:`PluginDependencyError` is raised.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import importlib
12
+ import inspect
13
+ from typing import Any
14
+
15
+ from aptdata.plugins.base import BaseReader, BaseWriter
16
+
17
+ # ---------------------------------------------------------------------------
18
+ # Custom errors
19
+ # ---------------------------------------------------------------------------
20
+
21
+
22
+ class PluginDependencyError(ImportError):
23
+ """Raised when a plugin requires a library that is not installed."""
24
+
25
+ def __init__(self, plugin_name: str, package: str) -> None:
26
+ self.plugin_name = plugin_name
27
+ self.package = package
28
+ super().__init__(
29
+ f"Plugin '{plugin_name}' requires the '{package}' package. "
30
+ f"Install it with: pip install {package}"
31
+ )
32
+
33
+
34
+ # ---------------------------------------------------------------------------
35
+ # Plugin Manager
36
+ # ---------------------------------------------------------------------------
37
+
38
+
39
+ class PluginManager:
40
+ """Registry for reader / writer plugin classes.
41
+
42
+ Plugins are registered under a unique *name* and can be retrieved or
43
+ listed later. The manager also supports loading a plugin module by
44
+ its dotted Python path so that dynamic/entry-point-style discovery is
45
+ straightforward.
46
+ """
47
+
48
+ def __init__(self) -> None:
49
+ self._readers: dict[str, type[BaseReader]] = {}
50
+ self._writers: dict[str, type[BaseWriter]] = {}
51
+
52
+ # -- registration -------------------------------------------------------
53
+
54
+ def register_reader(self, name: str, reader_cls: type[BaseReader]) -> None:
55
+ """Register *reader_cls* under *name*."""
56
+ self._readers[name] = reader_cls
57
+
58
+ def register_writer(self, name: str, writer_cls: type[BaseWriter]) -> None:
59
+ """Register *writer_cls* under *name*."""
60
+ self._writers[name] = writer_cls
61
+
62
+ # -- lookup -------------------------------------------------------------
63
+
64
+ def get_reader(self, name: str) -> type[BaseReader] | None:
65
+ """Return the reader class registered under *name*, or ``None``."""
66
+ return self._readers.get(name)
67
+
68
+ def get_writer(self, name: str) -> type[BaseWriter] | None:
69
+ """Return the writer class registered under *name*, or ``None``."""
70
+ return self._writers.get(name)
71
+
72
+ # -- listing ------------------------------------------------------------
73
+
74
+ def list_readers(self) -> list[str]:
75
+ """Return a sorted list of registered reader names."""
76
+ return sorted(self._readers)
77
+
78
+ def list_writers(self) -> list[str]:
79
+ """Return a sorted list of registered writer names."""
80
+ return sorted(self._writers)
81
+
82
+ def list_plugins(self) -> dict[str, list[str]]:
83
+ """Return all registered plugins grouped by kind."""
84
+ return {
85
+ "readers": self.list_readers(),
86
+ "writers": self.list_writers(),
87
+ }
88
+
89
+ def get_plugin_schema(self, name: str) -> dict[str, Any]:
90
+ """Return constructor argument schema for a reader/writer plugin."""
91
+ plugin_cls: type[Any] | None = self.get_reader(name) or self.get_writer(name)
92
+ if plugin_cls is None:
93
+ raise KeyError(f"Plugin '{name}' is not registered.")
94
+
95
+ signature = inspect.signature(plugin_cls.__init__)
96
+ args: list[dict[str, Any]] = []
97
+ for param_name, param in signature.parameters.items():
98
+ if param_name == "self":
99
+ continue
100
+ args.append(
101
+ {
102
+ "name": param_name,
103
+ "required": param.default is inspect.Parameter.empty,
104
+ "default": None
105
+ if param.default is inspect.Parameter.empty
106
+ else param.default,
107
+ }
108
+ )
109
+
110
+ plugin_type = "reader" if self.get_reader(name) is not None else "writer"
111
+ return {"name": name, "type": plugin_type, "arguments": args}
112
+
113
+ def preview_dataset(self, plugin_name: str, **kwargs: Any) -> list[dict[str, Any]]:
114
+ """Run a reader plugin and return the first five records."""
115
+ reader_cls = self.get_reader(plugin_name)
116
+ if reader_cls is None:
117
+ raise KeyError(f"Reader plugin '{plugin_name}' is not registered.")
118
+ reader = reader_cls(**kwargs)
119
+ dataset = reader.read()
120
+ records = dataset.read()
121
+ return records[:5]
122
+
123
+ # -- dynamic loading ----------------------------------------------------
124
+
125
+ def load_module(self, module_path: str) -> Any:
126
+ """Import *module_path* and return the module object.
127
+
128
+ This is useful for loading plugin modules dynamically, e.g. from
129
+ an entry-point or a configuration file.
130
+
131
+ Raises
132
+ ------
133
+ ModuleNotFoundError
134
+ If the module cannot be imported.
135
+ """
136
+ return importlib.import_module(module_path)
137
+
138
+
139
+ #: Global singleton – import this in plugin modules and application code.
140
+ plugin_manager = PluginManager()
141
+
142
+ __all__ = ["PluginManager", "PluginDependencyError", "plugin_manager"]