PyPI - aptdata - Versions diffs - 0.0.2__py3-none-any.whl - Mend

aptdata 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

aptdata/__init__.py +3 -0
aptdata/cli/__init__.py +5 -0
aptdata/cli/app.py +247 -0
aptdata/cli/commands/__init__.py +9 -0
aptdata/cli/commands/config_cmd.py +128 -0
aptdata/cli/commands/mesh_cmd.py +435 -0
aptdata/cli/commands/plugin_cmd.py +107 -0
aptdata/cli/commands/system_cmd.py +90 -0
aptdata/cli/commands/telemetry_cmd.py +57 -0
aptdata/cli/completions.py +56 -0
aptdata/cli/interactive.py +269 -0
aptdata/cli/rendering/__init__.py +31 -0
aptdata/cli/rendering/console.py +119 -0
aptdata/cli/rendering/logger.py +26 -0
aptdata/cli/rendering/panels.py +87 -0
aptdata/cli/rendering/tables.py +81 -0
aptdata/cli/scaffold.py +1089 -0
aptdata/config/__init__.py +13 -0
aptdata/config/parser.py +136 -0
aptdata/config/schema.py +27 -0
aptdata/config/secrets.py +60 -0
aptdata/core/__init__.py +46 -0
aptdata/core/context.py +31 -0
aptdata/core/dataset.py +39 -0
aptdata/core/lineage.py +213 -0
aptdata/core/state.py +27 -0
aptdata/core/system.py +317 -0
aptdata/core/workflow.py +372 -0
aptdata/mcp/__init__.py +5 -0
aptdata/mcp/server.py +198 -0
aptdata/plugins/__init__.py +77 -0
aptdata/plugins/ai/__init__.py +6 -0
aptdata/plugins/ai/chunking.py +66 -0
aptdata/plugins/ai/embeddings.py +56 -0
aptdata/plugins/base.py +57 -0
aptdata/plugins/dataset.py +62 -0
aptdata/plugins/governance/__init__.py +32 -0
aptdata/plugins/governance/catalog.py +115 -0
aptdata/plugins/governance/classification.py +44 -0
aptdata/plugins/governance/lineage_store.py +49 -0
aptdata/plugins/governance/rules.py +180 -0
aptdata/plugins/local_fs.py +241 -0
aptdata/plugins/manager.py +142 -0
aptdata/plugins/postgres.py +113 -0
aptdata/plugins/quality/__init__.py +39 -0
aptdata/plugins/quality/contract.py +128 -0
aptdata/plugins/quality/expectations.py +310 -0
aptdata/plugins/quality/report.py +94 -0
aptdata/plugins/quality/validator.py +139 -0
aptdata/plugins/rest.py +135 -0
aptdata/plugins/transform/__init__.py +14 -0
aptdata/plugins/transform/pandas.py +129 -0
aptdata/plugins/transform/spark.py +134 -0
aptdata/plugins/vector/__init__.py +6 -0
aptdata/plugins/vector/base.py +19 -0
aptdata/plugins/vector/qdrant.py +41 -0
aptdata/telemetry/__init__.py +5 -0
aptdata/telemetry/instrumentation.py +164 -0
aptdata/tui/__init__.py +5 -0
aptdata/tui/monitor.py +279 -0
aptdata-0.0.2.dist-info/METADATA +330 -0
aptdata-0.0.2.dist-info/RECORD +65 -0
aptdata-0.0.2.dist-info/WHEEL +4 -0
aptdata-0.0.2.dist-info/entry_points.txt +3 -0
aptdata-0.0.2.dist-info/licenses/LICENSE +21 -0

aptdata/telemetry/instrumentation.py ADDED Viewed

@@ -0,0 +1,164 @@
+"""OpenTelemetry bootstrap helpers for aptdata."""
+from __future__ import annotations
+from collections.abc import Mapping
+from dataclasses import dataclass
+from threading import Lock
+from time import perf_counter
+from opentelemetry import metrics, trace
+from opentelemetry.sdk.metrics import MeterProvider
+from opentelemetry.sdk.metrics.export import MetricReader
+from opentelemetry.sdk.resources import Resource
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import SimpleSpanProcessor, SpanExporter
+from opentelemetry.trace import Tracer
+_SENSITIVE_KEYS = ("password", "secret", "token", "authorization", "api_key")
+_REGISTERED_SECRETS: dict[str, str] = {}
+_METRICS_LOCK = Lock()
+_TOKEN_COUNTER = None
+@dataclass
+class IngestionMetrics:
+    """Runtime ingestion metrics exposed to telemetry and the TUI monitor."""
+    documents_total: int = 0
+    documents_processed: int = 0
+    chunks_processed: int = 0
+    tokens_used: int = 0
+    started_at: float = 0.0
+_INGESTION_METRICS = IngestionMetrics()
+def register_secret(name: str, value: str) -> None:
+    """Register secret values so telemetry payloads can be masked."""
+    _REGISTERED_SECRETS[name] = value
+def get_registered_secret_names() -> list[str]:
+    """Return registered secret keys, sorted for stable display."""
+    return sorted(_REGISTERED_SECRETS)
+def mask_telemetry_value(value: object, *, key: str | None = None) -> object:
+    """Mask sensitive values before they are exported to telemetry/logs."""
+    if value is None:
+        return value
+    if key is not None and any(token in key.lower() for token in _SENSITIVE_KEYS):
+        return "****"
+    if isinstance(value, str):
+        if not _REGISTERED_SECRETS:
+            return value
+        masked = value
+        for secret_value in _REGISTERED_SECRETS.values():
+            if secret_value and secret_value in masked:
+                masked = masked.replace(secret_value, "****")
+        return masked
+    if isinstance(value, Mapping):
+        return {k: mask_telemetry_value(v, key=str(k)) for k, v in value.items()}
+    if isinstance(value, list):
+        return [mask_telemetry_value(item) for item in value]
+    if isinstance(value, tuple):
+        return tuple(mask_telemetry_value(item) for item in value)
+    return value
+def configure_telemetry(
+    *,
+    service_name: str = "aptdata",
+    span_exporter: SpanExporter | None = None,
+    metric_reader: MetricReader | None = None,
+) -> tuple[TracerProvider, MeterProvider]:
+    """Configure and register global tracer/meter providers."""
+    resource = Resource.create({"service.name": service_name})
+    tracer_provider = TracerProvider(resource=resource)
+    if span_exporter is not None:
+        tracer_provider.add_span_processor(SimpleSpanProcessor(span_exporter))
+    trace.set_tracer_provider(tracer_provider)
+    metric_readers = [metric_reader] if metric_reader is not None else []
+    meter_provider = MeterProvider(resource=resource, metric_readers=metric_readers)
+    metrics.set_meter_provider(meter_provider)
+    return tracer_provider, meter_provider
+def get_tracer(name: str = "aptdata.component") -> Tracer:
+    """Return a configured tracer instance."""
+    return trace.get_tracer(name)
+def get_meter(name: str = "aptdata.component"):
+    """Return a configured meter instance."""
+    return metrics.get_meter(name)
+def reset_ingestion_metrics() -> None:
+    """Reset in-memory ingestion metrics for a new workflow run."""
+    with _METRICS_LOCK:
+        _INGESTION_METRICS.documents_total = 0
+        _INGESTION_METRICS.documents_processed = 0
+        _INGESTION_METRICS.chunks_processed = 0
+        _INGESTION_METRICS.tokens_used = 0
+        _INGESTION_METRICS.started_at = perf_counter()
+def set_ingestion_total_documents(total: int) -> None:
+    """Set total expected document count for progress tracking."""
+    with _METRICS_LOCK:
+        _INGESTION_METRICS.documents_total = max(total, 0)
+def record_processed_documents(count: int) -> None:
+    """Increment processed document count."""
+    with _METRICS_LOCK:
+        _INGESTION_METRICS.documents_processed += max(count, 0)
+def record_processed_chunks(count: int) -> None:
+    """Increment processed chunk count."""
+    with _METRICS_LOCK:
+        _INGESTION_METRICS.chunks_processed += max(count, 0)
+def record_llm_tokens_used(tokens: int) -> None:
+    """Track consumed LLM tokens in memory and OpenTelemetry metrics."""
+    global _TOKEN_COUNTER
+    if tokens <= 0:
+        return
+    with _METRICS_LOCK:
+        _INGESTION_METRICS.tokens_used += tokens
+    if _TOKEN_COUNTER is None:
+        _TOKEN_COUNTER = get_meter("aptdata.ingestion").create_counter(
+            "llm.tokens.used",
+            description="Total LLM tokens consumed by embedding/LLM plugins.",
+            unit="1",
+        )
+    _TOKEN_COUNTER.add(tokens)
+def get_ingestion_metrics() -> dict[str, float | int]:
+    """Return a snapshot of ingestion metrics with throughput and progress."""
+    with _METRICS_LOCK:
+        elapsed = max(perf_counter() - _INGESTION_METRICS.started_at, 0.0)
+        docs = _INGESTION_METRICS.documents_processed
+        total = _INGESTION_METRICS.documents_total
+        progress = (
+            1.0 if (total == 0 and docs > 0) else ((docs / total) if total else 0.0)
+        )
+        throughput = (docs / elapsed) if elapsed > 0 else 0.0
+        return {
+            "documents_total": total,
+            "documents_processed": docs,
+            "chunks_processed": _INGESTION_METRICS.chunks_processed,
+            "tokens_used": _INGESTION_METRICS.tokens_used,
+            "throughput_docs_per_sec": throughput,
+            "progress_ratio": min(progress, 1.0),
+        }
+reset_ingestion_metrics()

aptdata/tui/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Interactive TUI monitoring panel for aptdata."""
+from aptdata.tui.monitor import MonitorApp
+__all__ = ["MonitorApp"]

aptdata/tui/monitor.py ADDED Viewed

@@ -0,0 +1,279 @@
+"""Textual-based interactive monitoring dashboard.
+Displays the pipeline DAG, memory usage, task status and agent trace in
+real time via a tabbed interface.
+"""
+from __future__ import annotations
+from typing import ClassVar
+from textual.app import App, ComposeResult
+from textual.binding import Binding
+from textual.containers import Vertical
+from textual.widgets import (
+    DataTable,
+    Footer,
+    Header,
+    RichLog,
+    Static,
+    TabbedContent,
+    TabPane,
+)
+from aptdata.mcp.server import get_mcp_status
+from aptdata.telemetry.instrumentation import (
+    get_ingestion_metrics,
+    get_registered_secret_names,
+)
+class _DAGPanel(Static):
+    """Simple ASCII DAG visualisation panel."""
+    DEFAULT_CSS = """
+    _DAGPanel {
+        border: solid $success;
+        height: 1fr;
+        padding: 1 2;
+    }
+    """
+    def on_mount(self) -> None:
+        self.update(_placeholder_dag())
+def _placeholder_dag() -> str:
+    """Return a placeholder ASCII DAG when no pipeline is loaded."""
+    return (
+        "[bold green]Pipeline DAG[/bold green]\n\n"
+        "  [cyan]● step_1[/cyan]\n"
+        "      │\n"
+        "  [cyan]● step_2[/cyan]\n"
+        "      │\n"
+        "  [cyan]● step_3[/cyan]\n\n"
+        "[dim]No pipeline loaded – showing placeholder.[/dim]"
+    )
+class _StatusTable(DataTable):
+    """Table showing per-task status and memory usage."""
+    DEFAULT_CSS = """
+    _StatusTable {
+        border: solid $primary;
+        height: 1fr;
+    }
+    """
+    def on_mount(self) -> None:
+        self.add_columns("Step", "Status", "Memory (MB)", "Elapsed (s)")
+        self.populate()
+    def populate(self) -> None:
+        self.clear()
+        # Placeholder rows – real data would come from a running pipeline
+        for step, status, mem, elapsed in [
+            ("step_1", "✅ done", "128", "0.42"),
+            ("step_2", "⏳ running", "256", "1.07"),
+            ("step_3", "⌛ pending", "—", "—"),
+        ]:
+            self.add_row(step, status, mem, elapsed)
+class _MemoryBar(Static):
+    """Simple memory usage indicator."""
+    DEFAULT_CSS = """
+    _MemoryBar {
+        height: 3;
+        padding: 0 2;
+        background: $surface;
+    }
+    """
+    def on_mount(self) -> None:
+        self.refresh_memory()
+    def refresh_memory(self) -> None:
+        try:
+            import psutil  # optional dependency
+            mem = psutil.virtual_memory()
+            pct = mem.percent
+            used_gb = mem.used / 1_073_741_824
+            total_gb = mem.total / 1_073_741_824
+            bar = "█" * int(pct / 5) + "░" * (20 - int(pct / 5))
+            self.update(
+                f"[bold]Memory:[/bold] [{bar}] {pct:.1f}%  "
+                f"({used_gb:.2f} / {total_gb:.2f} GB)"
+            )
+        except ImportError:
+            # psutil not installed – show basic info from /proc/meminfo
+            try:
+                with open("/proc/meminfo") as f:
+                    lines = {
+                        k: int(v.split()[0])
+                        for k, v in (
+                            line.strip().split(":") for line in f if ":" in line
+                        )
+                    }
+                total = lines.get("MemTotal", 0)
+                avail = lines.get("MemAvailable", 0)
+                used = total - avail
+                pct = (used / total * 100) if total else 0
+                bar = "█" * int(pct / 5) + "░" * (20 - int(pct / 5))
+                self.update(
+                    f"[bold]Memory:[/bold] [{bar}] {pct:.1f}%  "
+                    f"(install psutil for detailed metrics)"
+                )
+            except Exception:  # noqa: BLE001
+                self.update("[bold]Memory:[/bold] unavailable")
+class _IngestionMetricsPanel(Static):
+    """Panel with live ingestion throughput/cost/progress metrics."""
+    DEFAULT_CSS = """
+    _IngestionMetricsPanel {
+        border: solid $success;
+        height: 7;
+        padding: 1 2;
+    }
+    """
+    def on_mount(self) -> None:
+        self.refresh_metrics()
+    def refresh_metrics(self) -> None:
+        metrics = get_ingestion_metrics()
+        progress = float(metrics["progress_ratio"])
+        bar_width = 24
+        filled = int(progress * bar_width)
+        bar = "█" * filled + "░" * (bar_width - filled)
+        self.update(
+            "[bold green]Ingestion Metrics[/bold green]\n"
+            f"Throughput: {float(metrics['throughput_docs_per_sec']):.2f} docs/s\n"
+            f"Chunks: {metrics['chunks_processed']}"
+            f" | Tokens: {metrics['tokens_used']}\n"
+            f"Progress: [{bar}] {progress * 100:.1f}%"
+        )
+class _AgentTraceLog(RichLog):
+    """Real-time log viewer for agent events and dynamic routing traces."""
+    DEFAULT_CSS = """
+    _AgentTraceLog {
+        border: solid $warning;
+        height: 1fr;
+        padding: 1 2;
+    }
+    """
+    def on_mount(self) -> None:
+        self.write("[bold yellow]Agent Trace[/bold yellow]")
+        self.write("[dim]Listening for branch_on / routing events…[/dim]")
+class _MCPStatusPanel(Static):
+    """Panel showing MCP server status and secret injection metadata."""
+    DEFAULT_CSS = """
+    _MCPStatusPanel {
+        border: solid $accent;
+        height: 1fr;
+        padding: 1 2;
+    }
+    """
+    def on_mount(self) -> None:
+        self.refresh_status()
+    def refresh_status(self) -> None:
+        status = get_mcp_status()
+        secret_names = get_registered_secret_names()
+        if secret_names:
+            formatted_secrets = "\n".join(f"- {name}: ****" for name in secret_names)
+        else:
+            formatted_secrets = "- (none)"
+        self.update(
+            "[bold]MCP Status[/bold]\n"
+            f"- Active: {'yes' if status['active'] else 'no'}\n"
+            f"- Requests: {status['request_count']}\n\n"
+            "[bold]Injected Secrets[/bold]\n"
+            f"{formatted_secrets}"
+        )
+class MonitorApp(App):
+    """Interactive monitoring dashboard for aptdata pipelines.
+    The dashboard is divided into four tabs:
+    1. **DAG View** – ASCII topology of the current pipeline.
+    2. **Metrics** – Resource usage table and memory bar.
+    3. **Agent Trace** – Real-time log of agent and routing events.
+    4. **MCP Status** – MCP server activity and injected secret names.
+    Parameters
+    ----------
+    refresh_interval:
+        How often (in seconds) the dashboard auto-refreshes.
+    """
+    TITLE = "aptdata monitor"
+    SUB_TITLE = "Pipeline DAG & Task Status"
+    BINDINGS: ClassVar[list[Binding]] = [
+        Binding("q", "quit", "Quit", show=True),
+        Binding("r", "refresh", "Refresh", show=True),
+    ]
+    CSS = """
+    Screen {
+        layout: vertical;
+    }
+    """
+    def __init__(self, refresh_interval: float = 1.0, **kwargs: object) -> None:
+        super().__init__(**kwargs)
+        self._refresh_interval = refresh_interval
+    def compose(self) -> ComposeResult:
+        yield Header()
+        with TabbedContent("DAG View", "Metrics", "Agent Trace", "MCP Status"):
+            with TabPane("DAG View", id="dag-tab"):
+                yield _DAGPanel(id="dag-panel")
+            with TabPane("Metrics", id="metrics-tab"):
+                with Vertical():
+                    yield _MemoryBar()
+                    yield _IngestionMetricsPanel()
+                    yield _StatusTable()
+            with TabPane("Agent Trace", id="agent-trace-tab"):
+                yield _AgentTraceLog(id="agent-trace-log")
+            with TabPane("MCP Status", id="mcp-status-tab"):
+                yield _MCPStatusPanel(id="mcp-status-panel")
+        yield Footer()
+    def on_mount(self) -> None:
+        self.set_interval(self._refresh_interval, self.action_refresh)
+    def action_refresh(self) -> None:
+        """Refresh all panels."""
+        memory_bar = self.query_one(_MemoryBar)
+        memory_bar.refresh_memory()
+        table = self.query_one(_StatusTable)
+        table.populate()
+        ingestion_panel = self.query_one(_IngestionMetricsPanel)
+        ingestion_panel.refresh_metrics()
+        mcp_panel = self.query_one(_MCPStatusPanel)
+        mcp_panel.refresh_status()
+    def log_agent_event(self, message: str) -> None:
+        """Append *message* to the Agent Trace log tab."""
+        trace_log = self.query_one(_AgentTraceLog)
+        trace_log.write(message)