PyPI - qcp-cli - Versions diffs - 0.1.5__py3-none-any.whl - Mend

qcp-cli 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

qcp/__init__.py +8 -0
qcp/agent.py +165 -0
qcp/cli.py +191 -0
qcp/config.py +85 -0
qcp/db.py +176 -0
qcp/errors.py +61 -0
qcp/llm.py +61 -0
qcp/memory.py +94 -0
qcp/models.py +125 -0
qcp/output.py +119 -0
qcp/tools.py +168 -0
qcp_cli-0.1.5.dist-info/METADATA +207 -0
qcp_cli-0.1.5.dist-info/RECORD +16 -0
qcp_cli-0.1.5.dist-info/WHEEL +4 -0
qcp_cli-0.1.5.dist-info/entry_points.txt +2 -0
qcp_cli-0.1.5.dist-info/licenses/LICENSE +21 -0

qcp/llm.py ADDED Viewed

@@ -0,0 +1,61 @@
+"""LangChain model construction and Gemini credential validation."""
+from __future__ import annotations
+import os
+from abc import ABC, abstractmethod
+from langchain_core.language_models import BaseChatModel
+from langchain_google_genai import ChatGoogleGenerativeAI
+from qcp import config as cfg
+from qcp.errors import NoApiKeyConfiguredError
+DEFAULT_GEMINI_MODEL = "gemini-2.5-flash"
+def get_model() -> str:
+    """Resolve the Gemini model from environment, config, or default."""
+    return str(os.environ.get("GEMINI_MODEL") or cfg.get("gemini_model") or DEFAULT_GEMINI_MODEL)
+def require_api_key() -> str:
+    """Return the configured Gemini key or raise an actionable error."""
+    api_key = cfg.get_gemini_api_key()
+    if not api_key:
+        raise NoApiKeyConfiguredError("gemini")
+    return api_key
+class ChatModelFactory(ABC):
+    """Contract for constructing the chat model used by QCP agents."""
+    @abstractmethod
+    def create(self) -> BaseChatModel:
+        """Create a configured LangChain chat model."""
+class GeminiChatModelFactory(ChatModelFactory):
+    """Create Gemini 2.5 Flash models through LangChain."""
+    def __init__(self, api_key: str, model: str | None = None) -> None:
+        """Initialize explicit credentials and model selection."""
+        self._api_key = api_key
+        self._model = model or get_model()
+    def create(self) -> ChatGoogleGenerativeAI:
+        """Create a deterministic Gemini chat model with tool support."""
+        return ChatGoogleGenerativeAI(
+            model=self._model,
+            google_api_key=self._api_key,
+            temperature=0.1,
+        )
+def validate_api_key(api_key: str) -> tuple[bool, str]:
+    """Validate a Gemini key through the same LangChain integration QCP uses."""
+    try:
+        GeminiChatModelFactory(api_key).create().invoke("Reply with the single word pong.")
+    except Exception as error:
+        return False, str(error)[:300]
+    return True, ""

qcp/memory.py ADDED Viewed

@@ -0,0 +1,94 @@
+"""Local, credential-free storage for database schema snapshots."""
+from __future__ import annotations
+import json
+import os
+import stat
+from abc import ABC, abstractmethod
+from contextlib import suppress
+from datetime import UTC, datetime, timedelta
+from pathlib import Path
+from typing import Any
+from pydantic import ValidationError
+from qcp import config as cfg
+from qcp.models import SchemaSnapshot
+SCHEMA_CACHE_TTL = timedelta(hours=24)
+SCHEMA_CACHE_VERSION = 2
+class SchemaMemoryStore(ABC):
+    """Contract for persisting schema metadata across CLI runs."""
+    @abstractmethod
+    def recall(self, database_id: str) -> SchemaSnapshot | None:
+        """Return a fresh snapshot for a database, if one exists."""
+    @abstractmethod
+    def store(self, snapshot: SchemaSnapshot) -> None:
+        """Persist a schema snapshot."""
+    @abstractmethod
+    def invalidate(self, database_id: str) -> None:
+        """Remove cached schema for a database."""
+class JsonSchemaMemoryStore(SchemaMemoryStore):
+    """Store isolated schema snapshots in ``~/.qcp/schema.json``."""
+    def __init__(self, path: Path | None = None, ttl: timedelta = SCHEMA_CACHE_TTL) -> None:
+        """Initialize the JSON store and its freshness policy."""
+        self._path = path
+        self._ttl = ttl
+    @property
+    def path(self) -> Path:
+        """Return the current cache path, respecting test-time QCP_HOME overrides."""
+        return self._path or cfg.CONFIG_DIR / "schema.json"
+    def _load_all(self) -> dict[str, Any]:
+        if not self.path.exists():
+            return {}
+        try:
+            value = json.loads(self.path.read_text(encoding="utf-8"))
+            return value if isinstance(value, dict) else {}
+        except json.JSONDecodeError, OSError:
+            return {}
+    def recall(self, database_id: str) -> SchemaSnapshot | None:
+        """Return a fresh, valid snapshot without exposing other databases."""
+        raw_snapshot = self._load_all().get(database_id)
+        if not isinstance(raw_snapshot, dict) or raw_snapshot.get("format_version") != SCHEMA_CACHE_VERSION:
+            return None
+        try:
+            snapshot = SchemaSnapshot.model_validate(raw_snapshot)
+        except ValidationError:
+            return None
+        captured_at = snapshot.captured_at
+        if captured_at.tzinfo is None:
+            captured_at = captured_at.replace(tzinfo=UTC)
+        if datetime.now(UTC) - captured_at > self._ttl:
+            return None
+        return snapshot
+    def store(self, snapshot: SchemaSnapshot) -> None:
+        """Persist one validated snapshot with owner-only permissions."""
+        self.path.parent.mkdir(parents=True, exist_ok=True)
+        with suppress(OSError):
+            os.chmod(self.path.parent, stat.S_IRWXU)
+        snapshots = self._load_all()
+        snapshots[snapshot.database_id] = snapshot.model_dump(mode="json")
+        self.path.write_text(json.dumps(snapshots, indent=2), encoding="utf-8")
+        with suppress(OSError):
+            os.chmod(self.path, stat.S_IRUSR | stat.S_IWUSR)
+    def invalidate(self, database_id: str) -> None:
+        """Remove one database's snapshot while preserving all others."""
+        snapshots = self._load_all()
+        if database_id not in snapshots:
+            return
+        del snapshots[database_id]
+        self.path.write_text(json.dumps(snapshots, indent=2), encoding="utf-8")

qcp/models.py ADDED Viewed

@@ -0,0 +1,125 @@
+"""Typed domain models used by the QCP agent and its tools."""
+from __future__ import annotations
+from datetime import datetime
+from typing import Any, Literal
+from pydantic import BaseModel, ConfigDict, Field, SecretStr, field_serializer
+class QcpConfig(BaseModel):
+    """Validated representation of the persisted QCP configuration."""
+    model_config = ConfigDict(extra="ignore")
+    database_url: str | None = None
+    gemini_api_key: SecretStr | None = None
+    provider: Literal["gemini"] = "gemini"
+    gemini_model: str = "gemini-2.5-flash"
+    @field_serializer("gemini_api_key", when_used="json")
+    def serialize_api_key(self, value: SecretStr | None) -> str | None:
+        """Persist the actual API key while retaining redacted representations."""
+        return value.get_secret_value() if value is not None else None
+class SchemaColumn(BaseModel):
+    """A PostgreSQL column exposed to the database agent."""
+    name: str
+    data_type: str
+    nullable: bool
+class SchemaTable(BaseModel):
+    """A PostgreSQL table and its columns."""
+    schema_name: str = "public"
+    name: str
+    columns: list[SchemaColumn]
+class SchemaSnapshot(BaseModel):
+    """A schema snapshot persisted in local QCP memory."""
+    format_version: int = 2
+    database_id: str
+    captured_at: datetime
+    tables: list[SchemaTable]
+    def summary(self, max_tables: int = 50) -> str:
+        """Return a compact schema representation for the language model."""
+        lines: list[str] = []
+        for table in self.tables[:max_tables]:
+            columns = ", ".join(f"{column.name} {column.data_type}" for column in table.columns)
+            lines.append(f"- {table.schema_name}.{table.name}({columns})")
+        if len(self.tables) > max_tables:
+            lines.append(f"... and {len(self.tables) - max_tables} more tables")
+        return "\n".join(lines) if lines else "(no tables found in 'public' schema)"
+class QueryResult(BaseModel):
+    """The exact SQL and rows returned by the read-query tool."""
+    sql: str
+    columns: list[str] = Field(default_factory=list)
+    rows: list[list[Any]] = Field(default_factory=list)
+    truncated: bool = False
+    executed: bool = True
+class InsightContext(BaseModel):
+    """Grounded facts supplied to the model for insight generation."""
+    facts: list[str]
+class QueryNarrative(BaseModel):
+    """Structured natural-language response from the query agent."""
+    answer: str = Field(description="A concise answer grounded only in the executed query result.")
+class InsightsNarrative(BaseModel):
+    """Structured natural-language response from the insights agent."""
+    insights: list[str] = Field(min_length=3, max_length=6)
+class AgentQueryResponse(BaseModel):
+    """Application-level response returned to the CLI query command."""
+    query_result: QueryResult
+    answer: str
+class AgentInsightsResponse(BaseModel):
+    """Application-level response returned to the CLI insights command."""
+    insights: list[str]
+    query_result: QueryResult | None = None
+class LookupSchemaInput(BaseModel):
+    """Input for the schema lookup tool."""
+    force_refresh: bool = Field(default=False, description="Ignore cached state and query PostgreSQL again.")
+class SchemaMemoryInput(BaseModel):
+    """Input for the local schema-memory tool."""
+    operation: Literal["recall", "store"]
+class ExecuteReadQueryInput(BaseModel):
+    """Input for the read-only query execution tool."""
+    sql: str = Field(description="One PostgreSQL SELECT or WITH query without multiple statements.")
+class AnalyzeInsightsInput(BaseModel):
+    """Input for the grounded insights tool."""
+    focus: str | None = Field(default=None, description="Optional analytical focus supplied by the user.")

qcp/output.py ADDED Viewed

@@ -0,0 +1,119 @@
+"""Small terminal output helpers kept independent from the agent layer."""
+from __future__ import annotations
+import shutil
+import textwrap
+from collections.abc import Sequence
+from typing import Any
+DEFAULT_TERMINAL_WIDTH = 120
+MIN_TERMINAL_WIDTH = 40
+COLUMN_SEPARATOR = "  "
+EXPANDED_SEPARATOR = " | "
+def format_table(
+    columns: Sequence[str],
+    rows: Sequence[Sequence[Any]],
+    max_width: int | None = None,
+) -> str:
+    """Format query results for the current terminal width.
+    Compact results use a conventional horizontal table. Results that cannot
+    fit the terminal switch to an expanded record layout so long values remain
+    readable without truncation.
+    Args:
+        columns: Column names in result order.
+        rows: Query result rows.
+        max_width: Optional deterministic width override, primarily for tests.
+    Returns:
+        A terminal-friendly representation including the result row count.
+    """
+    if not columns:
+        return "(no output)"
+    if not rows:
+        return "(0 rows)"
+    terminal_width = max(
+        MIN_TERMINAL_WIDTH,
+        max_width or shutil.get_terminal_size(fallback=(DEFAULT_TERMINAL_WIDTH, 24)).columns,
+    )
+    str_rows = [[_stringify(value) for value in row] for row in rows]
+    widths = [len(c) for c in columns]
+    for row in str_rows:
+        for index, value in enumerate(row[: len(widths)]):
+            widths[index] = max(widths[index], len(value))
+    table_width = sum(widths) + len(COLUMN_SEPARATOR) * (len(widths) - 1)
+    contains_multiline_value = any("\n" in value for row in str_rows for value in row)
+    if table_width > terminal_width or contains_multiline_value:
+        return _format_expanded(columns, str_rows, terminal_width)
+    def fmt_row(vals: list[str]) -> str:
+        padded_values = [*vals[: len(widths)], *([""] * max(0, len(widths) - len(vals)))]
+        return COLUMN_SEPARATOR.join(value.ljust(widths[index]) for index, value in enumerate(padded_values))
+    separator = COLUMN_SEPARATOR.join("-" * width for width in widths)
+    lines = [fmt_row(list(columns)), separator]
+    lines.extend(fmt_row(row) for row in str_rows)
+    lines.append(f"\n{_row_count(len(rows))}")
+    return "\n".join(lines)
+def _stringify(value: Any) -> str:
+    """Convert a result value to one safe display string."""
+    if value is None:
+        return ""
+    return str(value).replace("\r\n", "\n").replace("\r", "\n")
+def _format_expanded(columns: Sequence[str], rows: Sequence[Sequence[str]], max_width: int) -> str:
+    """Render wide results as wrapped, vertically expanded records."""
+    label_width = min(max(len(column) for column in columns), max_width // 3)
+    value_width = max(1, max_width - label_width - len(EXPANDED_SEPARATOR))
+    lines: list[str] = []
+    for row_number, row in enumerate(rows, start=1):
+        if lines:
+            lines.append("")
+        lines.append(f"-[ RECORD {row_number} ]".ljust(max_width, "-"))
+        padded_row = [*row[: len(columns)], *([""] * max(0, len(columns) - len(row)))]
+        for column, value in zip(columns, padded_row, strict=True):
+            wrapped_column = _wrap_value(column, label_width)
+            wrapped_value = _wrap_value(value, value_width)
+            part_count = max(len(wrapped_column), len(wrapped_value))
+            for part_index in range(part_count):
+                label_part = wrapped_column[part_index] if part_index < len(wrapped_column) else ""
+                value_part = wrapped_value[part_index] if part_index < len(wrapped_value) else ""
+                lines.append(f"{label_part.ljust(label_width)}{EXPANDED_SEPARATOR}{value_part}")
+    lines.append(f"\n{_row_count(len(rows))}")
+    return "\n".join(lines)
+def _wrap_value(value: str, width: int) -> list[str]:
+    """Wrap long and multiline values without dropping their content."""
+    if not value:
+        return [""]
+    wrapped_lines: list[str] = []
+    for logical_line in value.split("\n"):
+        wrapped_lines.extend(
+            textwrap.wrap(
+                logical_line,
+                width=width,
+                replace_whitespace=False,
+                drop_whitespace=False,
+                break_long_words=True,
+                break_on_hyphens=False,
+            )
+            or [""]
+        )
+    return wrapped_lines
+def _row_count(row_count: int) -> str:
+    """Return the conventional result-count footer."""
+    return f"({row_count} row{'s' if row_count != 1 else ''})"

qcp/tools.py ADDED Viewed

@@ -0,0 +1,168 @@
+"""LangChain tools exposed to the QCP database agent."""
+import json
+from typing import Any, NotRequired
+from langchain.agents import AgentState
+from langchain.messages import ToolMessage
+from langchain.tools import ToolRuntime, tool
+from langchain_core.tools import BaseTool
+from langgraph.types import Command
+from qcp.db import DatabaseClient, normalize_read_query
+from qcp.errors import SchemaChangedError
+from qcp.memory import SchemaMemoryStore
+from qcp.models import (
+    AnalyzeInsightsInput,
+    ExecuteReadQueryInput,
+    InsightContext,
+    LookupSchemaInput,
+    QueryResult,
+    SchemaMemoryInput,
+    SchemaSnapshot,
+)
+class QcpAgentState(AgentState):
+    """Agent state containing only validated, application-owned artifacts."""
+    schema_snapshot: NotRequired[dict[str, Any] | None]
+    query_result: NotRequired[dict[str, Any] | None]
+    insight_context: NotRequired[dict[str, Any] | None]
+    schema_retry_count: NotRequired[int]
+    query_execution_retry_count: NotRequired[int]
+class DatabaseToolkit:
+    """Build dependency-injected LangChain tools for one CLI invocation."""
+    def __init__(
+        self,
+        database: DatabaseClient,
+        memory: SchemaMemoryStore,
+        *,
+        dry_run: bool = False,
+    ) -> None:
+        """Initialize database and memory dependencies for the tools."""
+        self._database = database
+        self._memory = memory
+        self._dry_run = dry_run
+    def build(self) -> list[BaseTool]:
+        """Create the schema, memory, query, and insights tools."""
+        database = self._database
+        memory = self._memory
+        dry_run = self._dry_run
+        @tool("lookup_schema", args_schema=LookupSchemaInput)
+        def lookup_schema(force_refresh: bool, runtime: ToolRuntime[None, QcpAgentState]) -> Command:
+            """Read the current public PostgreSQL schema when cache is absent or stale."""
+            del force_refresh
+            snapshot = database.lookup_schema()
+            return _state_command(
+                runtime,
+                content=f"Current PostgreSQL schema:\n{snapshot.summary()}",
+                schema_snapshot=snapshot.model_dump(mode="json"),
+            )
+        @tool("schema_memory", args_schema=SchemaMemoryInput)
+        def schema_memory(operation: str, runtime: ToolRuntime[None, QcpAgentState]) -> Command:
+            """Recall a fresh schema snapshot from local memory or store a looked-up snapshot."""
+            if operation == "recall":
+                snapshot = memory.recall(database.database_id)
+                if snapshot is None:
+                    return _state_command(runtime, content="Schema memory is missing or older than 24 hours.")
+                return _state_command(
+                    runtime,
+                    content=f"Fresh schema recalled from local memory:\n{snapshot.summary()}",
+                    schema_snapshot=snapshot.model_dump(mode="json"),
+                )
+            raw_snapshot = runtime.state.get("schema_snapshot")
+            if raw_snapshot is None:
+                return _state_command(runtime, content="No looked-up schema is available to store.")
+            snapshot = SchemaSnapshot.model_validate(raw_snapshot)
+            memory.store(snapshot)
+            return _state_command(runtime, content="Schema snapshot stored in local memory.")
+        @tool("execute_read_query", args_schema=ExecuteReadQueryInput)
+        def execute_read_query(sql: str, runtime: ToolRuntime[None, QcpAgentState]) -> Command:
+            """Execute one PostgreSQL SELECT or WITH query in a read-only transaction."""
+            if runtime.state.get("schema_snapshot") is None:
+                return _state_command(runtime, content="Schema is required before query execution.")
+            if dry_run:
+                query_result = QueryResult(sql=normalize_read_query(sql), executed=False)
+                return _state_command(
+                    runtime,
+                    content=json.dumps(query_result.model_dump(mode="json")),
+                    query_result=query_result.model_dump(mode="json"),
+                )
+            try:
+                query_result = database.execute_read_query(sql)
+            except SchemaChangedError:
+                retry_count = runtime.state.get("schema_retry_count", 0)
+                if retry_count >= 1:
+                    raise
+                memory.invalidate(database.database_id)
+                return _state_command(
+                    runtime,
+                    content=(
+                        "The cached schema is stale. Call lookup_schema with force_refresh=true, "
+                        "store it with schema_memory, then retry this query once."
+                    ),
+                    schema_snapshot=None,
+                    query_result=None,
+                    schema_retry_count=1,
+                )
+            payload = query_result.model_dump(mode="json")
+            return _state_command(
+                runtime,
+                content=json.dumps(payload),
+                query_result=payload,
+            )
+        @tool("analyze_insights", args_schema=AnalyzeInsightsInput)
+        def analyze_insights(focus: str | None, runtime: ToolRuntime[None, QcpAgentState]) -> Command:
+            """Build grounded facts from schema and optional query results for insight generation."""
+            raw_snapshot = runtime.state.get("schema_snapshot")
+            if raw_snapshot is None:
+                return _state_command(runtime, content="Schema is required before analyzing insights.")
+            snapshot = SchemaSnapshot.model_validate(raw_snapshot)
+            facts = [
+                f"The database snapshot contains {len(snapshot.tables)} tables.",
+                "Available tables: " + ", ".join(f"{table.schema_name}.{table.name}" for table in snapshot.tables),
+            ]
+            if focus:
+                facts.append(f"The user's requested analytical focus is: {focus}")
+            raw_result = runtime.state.get("query_result")
+            if raw_result is not None:
+                query_result = QueryResult.model_validate(raw_result)
+                facts.extend(
+                    [
+                        f"The executed query returned {len(query_result.rows)} rows.",
+                        "Result columns: " + ", ".join(query_result.columns),
+                        "Result sample: " + json.dumps(query_result.model_dump(mode="json")["rows"][:20]),
+                    ]
+                )
+            context = InsightContext(facts=facts)
+            payload = context.model_dump(mode="json")
+            return _state_command(
+                runtime,
+                content=json.dumps(payload),
+                insight_context=payload,
+            )
+        return [lookup_schema, schema_memory, execute_read_query, analyze_insights]
+def _state_command(runtime: ToolRuntime, content: str, **updates: Any) -> Command:
+    """Create a state update containing the required matching tool message."""
+    return Command(
+        update={
+            **updates,
+            "messages": [ToolMessage(content=content, tool_call_id=runtime.tool_call_id)],
+        }
+    )