aptdata 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. aptdata/__init__.py +3 -0
  2. aptdata/cli/__init__.py +5 -0
  3. aptdata/cli/app.py +247 -0
  4. aptdata/cli/commands/__init__.py +9 -0
  5. aptdata/cli/commands/config_cmd.py +128 -0
  6. aptdata/cli/commands/mesh_cmd.py +435 -0
  7. aptdata/cli/commands/plugin_cmd.py +107 -0
  8. aptdata/cli/commands/system_cmd.py +90 -0
  9. aptdata/cli/commands/telemetry_cmd.py +57 -0
  10. aptdata/cli/completions.py +56 -0
  11. aptdata/cli/interactive.py +269 -0
  12. aptdata/cli/rendering/__init__.py +31 -0
  13. aptdata/cli/rendering/console.py +119 -0
  14. aptdata/cli/rendering/logger.py +26 -0
  15. aptdata/cli/rendering/panels.py +87 -0
  16. aptdata/cli/rendering/tables.py +81 -0
  17. aptdata/cli/scaffold.py +1089 -0
  18. aptdata/config/__init__.py +13 -0
  19. aptdata/config/parser.py +136 -0
  20. aptdata/config/schema.py +27 -0
  21. aptdata/config/secrets.py +60 -0
  22. aptdata/core/__init__.py +46 -0
  23. aptdata/core/context.py +31 -0
  24. aptdata/core/dataset.py +39 -0
  25. aptdata/core/lineage.py +213 -0
  26. aptdata/core/state.py +27 -0
  27. aptdata/core/system.py +317 -0
  28. aptdata/core/workflow.py +372 -0
  29. aptdata/mcp/__init__.py +5 -0
  30. aptdata/mcp/server.py +198 -0
  31. aptdata/plugins/__init__.py +77 -0
  32. aptdata/plugins/ai/__init__.py +6 -0
  33. aptdata/plugins/ai/chunking.py +66 -0
  34. aptdata/plugins/ai/embeddings.py +56 -0
  35. aptdata/plugins/base.py +57 -0
  36. aptdata/plugins/dataset.py +62 -0
  37. aptdata/plugins/governance/__init__.py +32 -0
  38. aptdata/plugins/governance/catalog.py +115 -0
  39. aptdata/plugins/governance/classification.py +44 -0
  40. aptdata/plugins/governance/lineage_store.py +49 -0
  41. aptdata/plugins/governance/rules.py +180 -0
  42. aptdata/plugins/local_fs.py +241 -0
  43. aptdata/plugins/manager.py +142 -0
  44. aptdata/plugins/postgres.py +113 -0
  45. aptdata/plugins/quality/__init__.py +39 -0
  46. aptdata/plugins/quality/contract.py +128 -0
  47. aptdata/plugins/quality/expectations.py +310 -0
  48. aptdata/plugins/quality/report.py +94 -0
  49. aptdata/plugins/quality/validator.py +139 -0
  50. aptdata/plugins/rest.py +135 -0
  51. aptdata/plugins/transform/__init__.py +14 -0
  52. aptdata/plugins/transform/pandas.py +129 -0
  53. aptdata/plugins/transform/spark.py +134 -0
  54. aptdata/plugins/vector/__init__.py +6 -0
  55. aptdata/plugins/vector/base.py +19 -0
  56. aptdata/plugins/vector/qdrant.py +41 -0
  57. aptdata/telemetry/__init__.py +5 -0
  58. aptdata/telemetry/instrumentation.py +164 -0
  59. aptdata/tui/__init__.py +5 -0
  60. aptdata/tui/monitor.py +279 -0
  61. aptdata-0.0.2.dist-info/METADATA +330 -0
  62. aptdata-0.0.2.dist-info/RECORD +65 -0
  63. aptdata-0.0.2.dist-info/WHEEL +4 -0
  64. aptdata-0.0.2.dist-info/entry_points.txt +3 -0
  65. aptdata-0.0.2.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,164 @@
1
+ """OpenTelemetry bootstrap helpers for aptdata."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Mapping
6
+ from dataclasses import dataclass
7
+ from threading import Lock
8
+ from time import perf_counter
9
+
10
+ from opentelemetry import metrics, trace
11
+ from opentelemetry.sdk.metrics import MeterProvider
12
+ from opentelemetry.sdk.metrics.export import MetricReader
13
+ from opentelemetry.sdk.resources import Resource
14
+ from opentelemetry.sdk.trace import TracerProvider
15
+ from opentelemetry.sdk.trace.export import SimpleSpanProcessor, SpanExporter
16
+ from opentelemetry.trace import Tracer
17
+
18
+ _SENSITIVE_KEYS = ("password", "secret", "token", "authorization", "api_key")
19
+ _REGISTERED_SECRETS: dict[str, str] = {}
20
+ _METRICS_LOCK = Lock()
21
+ _TOKEN_COUNTER = None
22
+
23
+
24
+ @dataclass
25
+ class IngestionMetrics:
26
+ """Runtime ingestion metrics exposed to telemetry and the TUI monitor."""
27
+
28
+ documents_total: int = 0
29
+ documents_processed: int = 0
30
+ chunks_processed: int = 0
31
+ tokens_used: int = 0
32
+ started_at: float = 0.0
33
+
34
+
35
+ _INGESTION_METRICS = IngestionMetrics()
36
+
37
+
38
+ def register_secret(name: str, value: str) -> None:
39
+ """Register secret values so telemetry payloads can be masked."""
40
+ _REGISTERED_SECRETS[name] = value
41
+
42
+
43
+ def get_registered_secret_names() -> list[str]:
44
+ """Return registered secret keys, sorted for stable display."""
45
+ return sorted(_REGISTERED_SECRETS)
46
+
47
+
48
+ def mask_telemetry_value(value: object, *, key: str | None = None) -> object:
49
+ """Mask sensitive values before they are exported to telemetry/logs."""
50
+ if value is None:
51
+ return value
52
+ if key is not None and any(token in key.lower() for token in _SENSITIVE_KEYS):
53
+ return "****"
54
+ if isinstance(value, str):
55
+ if not _REGISTERED_SECRETS:
56
+ return value
57
+ masked = value
58
+ for secret_value in _REGISTERED_SECRETS.values():
59
+ if secret_value and secret_value in masked:
60
+ masked = masked.replace(secret_value, "****")
61
+ return masked
62
+ if isinstance(value, Mapping):
63
+ return {k: mask_telemetry_value(v, key=str(k)) for k, v in value.items()}
64
+ if isinstance(value, list):
65
+ return [mask_telemetry_value(item) for item in value]
66
+ if isinstance(value, tuple):
67
+ return tuple(mask_telemetry_value(item) for item in value)
68
+ return value
69
+
70
+
71
+ def configure_telemetry(
72
+ *,
73
+ service_name: str = "aptdata",
74
+ span_exporter: SpanExporter | None = None,
75
+ metric_reader: MetricReader | None = None,
76
+ ) -> tuple[TracerProvider, MeterProvider]:
77
+ """Configure and register global tracer/meter providers."""
78
+ resource = Resource.create({"service.name": service_name})
79
+ tracer_provider = TracerProvider(resource=resource)
80
+ if span_exporter is not None:
81
+ tracer_provider.add_span_processor(SimpleSpanProcessor(span_exporter))
82
+ trace.set_tracer_provider(tracer_provider)
83
+
84
+ metric_readers = [metric_reader] if metric_reader is not None else []
85
+ meter_provider = MeterProvider(resource=resource, metric_readers=metric_readers)
86
+ metrics.set_meter_provider(meter_provider)
87
+ return tracer_provider, meter_provider
88
+
89
+
90
+ def get_tracer(name: str = "aptdata.component") -> Tracer:
91
+ """Return a configured tracer instance."""
92
+ return trace.get_tracer(name)
93
+
94
+
95
+ def get_meter(name: str = "aptdata.component"):
96
+ """Return a configured meter instance."""
97
+ return metrics.get_meter(name)
98
+
99
+
100
+ def reset_ingestion_metrics() -> None:
101
+ """Reset in-memory ingestion metrics for a new workflow run."""
102
+ with _METRICS_LOCK:
103
+ _INGESTION_METRICS.documents_total = 0
104
+ _INGESTION_METRICS.documents_processed = 0
105
+ _INGESTION_METRICS.chunks_processed = 0
106
+ _INGESTION_METRICS.tokens_used = 0
107
+ _INGESTION_METRICS.started_at = perf_counter()
108
+
109
+
110
+ def set_ingestion_total_documents(total: int) -> None:
111
+ """Set total expected document count for progress tracking."""
112
+ with _METRICS_LOCK:
113
+ _INGESTION_METRICS.documents_total = max(total, 0)
114
+
115
+
116
+ def record_processed_documents(count: int) -> None:
117
+ """Increment processed document count."""
118
+ with _METRICS_LOCK:
119
+ _INGESTION_METRICS.documents_processed += max(count, 0)
120
+
121
+
122
+ def record_processed_chunks(count: int) -> None:
123
+ """Increment processed chunk count."""
124
+ with _METRICS_LOCK:
125
+ _INGESTION_METRICS.chunks_processed += max(count, 0)
126
+
127
+
128
+ def record_llm_tokens_used(tokens: int) -> None:
129
+ """Track consumed LLM tokens in memory and OpenTelemetry metrics."""
130
+ global _TOKEN_COUNTER
131
+ if tokens <= 0:
132
+ return
133
+ with _METRICS_LOCK:
134
+ _INGESTION_METRICS.tokens_used += tokens
135
+ if _TOKEN_COUNTER is None:
136
+ _TOKEN_COUNTER = get_meter("aptdata.ingestion").create_counter(
137
+ "llm.tokens.used",
138
+ description="Total LLM tokens consumed by embedding/LLM plugins.",
139
+ unit="1",
140
+ )
141
+ _TOKEN_COUNTER.add(tokens)
142
+
143
+
144
+ def get_ingestion_metrics() -> dict[str, float | int]:
145
+ """Return a snapshot of ingestion metrics with throughput and progress."""
146
+ with _METRICS_LOCK:
147
+ elapsed = max(perf_counter() - _INGESTION_METRICS.started_at, 0.0)
148
+ docs = _INGESTION_METRICS.documents_processed
149
+ total = _INGESTION_METRICS.documents_total
150
+ progress = (
151
+ 1.0 if (total == 0 and docs > 0) else ((docs / total) if total else 0.0)
152
+ )
153
+ throughput = (docs / elapsed) if elapsed > 0 else 0.0
154
+ return {
155
+ "documents_total": total,
156
+ "documents_processed": docs,
157
+ "chunks_processed": _INGESTION_METRICS.chunks_processed,
158
+ "tokens_used": _INGESTION_METRICS.tokens_used,
159
+ "throughput_docs_per_sec": throughput,
160
+ "progress_ratio": min(progress, 1.0),
161
+ }
162
+
163
+
164
+ reset_ingestion_metrics()
@@ -0,0 +1,5 @@
1
+ """Interactive TUI monitoring panel for aptdata."""
2
+
3
+ from aptdata.tui.monitor import MonitorApp
4
+
5
+ __all__ = ["MonitorApp"]
aptdata/tui/monitor.py ADDED
@@ -0,0 +1,279 @@
1
+ """Textual-based interactive monitoring dashboard.
2
+
3
+ Displays the pipeline DAG, memory usage, task status and agent trace in
4
+ real time via a tabbed interface.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import ClassVar
10
+
11
+ from textual.app import App, ComposeResult
12
+ from textual.binding import Binding
13
+ from textual.containers import Vertical
14
+ from textual.widgets import (
15
+ DataTable,
16
+ Footer,
17
+ Header,
18
+ RichLog,
19
+ Static,
20
+ TabbedContent,
21
+ TabPane,
22
+ )
23
+
24
+ from aptdata.mcp.server import get_mcp_status
25
+ from aptdata.telemetry.instrumentation import (
26
+ get_ingestion_metrics,
27
+ get_registered_secret_names,
28
+ )
29
+
30
+
31
+ class _DAGPanel(Static):
32
+ """Simple ASCII DAG visualisation panel."""
33
+
34
+ DEFAULT_CSS = """
35
+ _DAGPanel {
36
+ border: solid $success;
37
+ height: 1fr;
38
+ padding: 1 2;
39
+ }
40
+ """
41
+
42
+ def on_mount(self) -> None:
43
+ self.update(_placeholder_dag())
44
+
45
+
46
+ def _placeholder_dag() -> str:
47
+ """Return a placeholder ASCII DAG when no pipeline is loaded."""
48
+ return (
49
+ "[bold green]Pipeline DAG[/bold green]\n\n"
50
+ " [cyan]● step_1[/cyan]\n"
51
+ " │\n"
52
+ " [cyan]● step_2[/cyan]\n"
53
+ " │\n"
54
+ " [cyan]● step_3[/cyan]\n\n"
55
+ "[dim]No pipeline loaded – showing placeholder.[/dim]"
56
+ )
57
+
58
+
59
+ class _StatusTable(DataTable):
60
+ """Table showing per-task status and memory usage."""
61
+
62
+ DEFAULT_CSS = """
63
+ _StatusTable {
64
+ border: solid $primary;
65
+ height: 1fr;
66
+ }
67
+ """
68
+
69
+ def on_mount(self) -> None:
70
+ self.add_columns("Step", "Status", "Memory (MB)", "Elapsed (s)")
71
+ self.populate()
72
+
73
+ def populate(self) -> None:
74
+ self.clear()
75
+ # Placeholder rows – real data would come from a running pipeline
76
+ for step, status, mem, elapsed in [
77
+ ("step_1", "✅ done", "128", "0.42"),
78
+ ("step_2", "⏳ running", "256", "1.07"),
79
+ ("step_3", "⌛ pending", "—", "—"),
80
+ ]:
81
+ self.add_row(step, status, mem, elapsed)
82
+
83
+
84
+ class _MemoryBar(Static):
85
+ """Simple memory usage indicator."""
86
+
87
+ DEFAULT_CSS = """
88
+ _MemoryBar {
89
+ height: 3;
90
+ padding: 0 2;
91
+ background: $surface;
92
+ }
93
+ """
94
+
95
+ def on_mount(self) -> None:
96
+ self.refresh_memory()
97
+
98
+ def refresh_memory(self) -> None:
99
+ try:
100
+ import psutil # optional dependency
101
+
102
+ mem = psutil.virtual_memory()
103
+ pct = mem.percent
104
+ used_gb = mem.used / 1_073_741_824
105
+ total_gb = mem.total / 1_073_741_824
106
+ bar = "█" * int(pct / 5) + "░" * (20 - int(pct / 5))
107
+ self.update(
108
+ f"[bold]Memory:[/bold] [{bar}] {pct:.1f}% "
109
+ f"({used_gb:.2f} / {total_gb:.2f} GB)"
110
+ )
111
+ except ImportError:
112
+ # psutil not installed – show basic info from /proc/meminfo
113
+ try:
114
+ with open("/proc/meminfo") as f:
115
+ lines = {
116
+ k: int(v.split()[0])
117
+ for k, v in (
118
+ line.strip().split(":") for line in f if ":" in line
119
+ )
120
+ }
121
+ total = lines.get("MemTotal", 0)
122
+ avail = lines.get("MemAvailable", 0)
123
+ used = total - avail
124
+ pct = (used / total * 100) if total else 0
125
+ bar = "█" * int(pct / 5) + "░" * (20 - int(pct / 5))
126
+ self.update(
127
+ f"[bold]Memory:[/bold] [{bar}] {pct:.1f}% "
128
+ f"(install psutil for detailed metrics)"
129
+ )
130
+ except Exception: # noqa: BLE001
131
+ self.update("[bold]Memory:[/bold] unavailable")
132
+
133
+
134
+ class _IngestionMetricsPanel(Static):
135
+ """Panel with live ingestion throughput/cost/progress metrics."""
136
+
137
+ DEFAULT_CSS = """
138
+ _IngestionMetricsPanel {
139
+ border: solid $success;
140
+ height: 7;
141
+ padding: 1 2;
142
+ }
143
+ """
144
+
145
+ def on_mount(self) -> None:
146
+ self.refresh_metrics()
147
+
148
+ def refresh_metrics(self) -> None:
149
+ metrics = get_ingestion_metrics()
150
+ progress = float(metrics["progress_ratio"])
151
+ bar_width = 24
152
+ filled = int(progress * bar_width)
153
+ bar = "█" * filled + "░" * (bar_width - filled)
154
+ self.update(
155
+ "[bold green]Ingestion Metrics[/bold green]\n"
156
+ f"Throughput: {float(metrics['throughput_docs_per_sec']):.2f} docs/s\n"
157
+ f"Chunks: {metrics['chunks_processed']}"
158
+ f" | Tokens: {metrics['tokens_used']}\n"
159
+ f"Progress: [{bar}] {progress * 100:.1f}%"
160
+ )
161
+
162
+
163
+ class _AgentTraceLog(RichLog):
164
+ """Real-time log viewer for agent events and dynamic routing traces."""
165
+
166
+ DEFAULT_CSS = """
167
+ _AgentTraceLog {
168
+ border: solid $warning;
169
+ height: 1fr;
170
+ padding: 1 2;
171
+ }
172
+ """
173
+
174
+ def on_mount(self) -> None:
175
+ self.write("[bold yellow]Agent Trace[/bold yellow]")
176
+ self.write("[dim]Listening for branch_on / routing events…[/dim]")
177
+
178
+
179
+ class _MCPStatusPanel(Static):
180
+ """Panel showing MCP server status and secret injection metadata."""
181
+
182
+ DEFAULT_CSS = """
183
+ _MCPStatusPanel {
184
+ border: solid $accent;
185
+ height: 1fr;
186
+ padding: 1 2;
187
+ }
188
+ """
189
+
190
+ def on_mount(self) -> None:
191
+ self.refresh_status()
192
+
193
+ def refresh_status(self) -> None:
194
+ status = get_mcp_status()
195
+ secret_names = get_registered_secret_names()
196
+ if secret_names:
197
+ formatted_secrets = "\n".join(f"- {name}: ****" for name in secret_names)
198
+ else:
199
+ formatted_secrets = "- (none)"
200
+ self.update(
201
+ "[bold]MCP Status[/bold]\n"
202
+ f"- Active: {'yes' if status['active'] else 'no'}\n"
203
+ f"- Requests: {status['request_count']}\n\n"
204
+ "[bold]Injected Secrets[/bold]\n"
205
+ f"{formatted_secrets}"
206
+ )
207
+
208
+
209
+ class MonitorApp(App):
210
+ """Interactive monitoring dashboard for aptdata pipelines.
211
+
212
+ The dashboard is divided into four tabs:
213
+
214
+ 1. **DAG View** – ASCII topology of the current pipeline.
215
+ 2. **Metrics** – Resource usage table and memory bar.
216
+ 3. **Agent Trace** – Real-time log of agent and routing events.
217
+ 4. **MCP Status** – MCP server activity and injected secret names.
218
+
219
+ Parameters
220
+ ----------
221
+ refresh_interval:
222
+ How often (in seconds) the dashboard auto-refreshes.
223
+ """
224
+
225
+ TITLE = "aptdata monitor"
226
+ SUB_TITLE = "Pipeline DAG & Task Status"
227
+
228
+ BINDINGS: ClassVar[list[Binding]] = [
229
+ Binding("q", "quit", "Quit", show=True),
230
+ Binding("r", "refresh", "Refresh", show=True),
231
+ ]
232
+
233
+ CSS = """
234
+ Screen {
235
+ layout: vertical;
236
+ }
237
+ """
238
+
239
+ def __init__(self, refresh_interval: float = 1.0, **kwargs: object) -> None:
240
+ super().__init__(**kwargs)
241
+ self._refresh_interval = refresh_interval
242
+
243
+ def compose(self) -> ComposeResult:
244
+ yield Header()
245
+ with TabbedContent("DAG View", "Metrics", "Agent Trace", "MCP Status"):
246
+ with TabPane("DAG View", id="dag-tab"):
247
+ yield _DAGPanel(id="dag-panel")
248
+ with TabPane("Metrics", id="metrics-tab"):
249
+ with Vertical():
250
+ yield _MemoryBar()
251
+ yield _IngestionMetricsPanel()
252
+ yield _StatusTable()
253
+ with TabPane("Agent Trace", id="agent-trace-tab"):
254
+ yield _AgentTraceLog(id="agent-trace-log")
255
+ with TabPane("MCP Status", id="mcp-status-tab"):
256
+ yield _MCPStatusPanel(id="mcp-status-panel")
257
+ yield Footer()
258
+
259
+ def on_mount(self) -> None:
260
+ self.set_interval(self._refresh_interval, self.action_refresh)
261
+
262
+ def action_refresh(self) -> None:
263
+ """Refresh all panels."""
264
+ memory_bar = self.query_one(_MemoryBar)
265
+ memory_bar.refresh_memory()
266
+
267
+ table = self.query_one(_StatusTable)
268
+ table.populate()
269
+
270
+ ingestion_panel = self.query_one(_IngestionMetricsPanel)
271
+ ingestion_panel.refresh_metrics()
272
+
273
+ mcp_panel = self.query_one(_MCPStatusPanel)
274
+ mcp_panel.refresh_status()
275
+
276
+ def log_agent_event(self, message: str) -> None:
277
+ """Append *message* to the Agent Trace log tab."""
278
+ trace_log = self.query_one(_AgentTraceLog)
279
+ trace_log.write(message)