qcp-cli 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- qcp/__init__.py +8 -0
- qcp/agent.py +165 -0
- qcp/cli.py +191 -0
- qcp/config.py +85 -0
- qcp/db.py +176 -0
- qcp/errors.py +61 -0
- qcp/llm.py +61 -0
- qcp/memory.py +94 -0
- qcp/models.py +125 -0
- qcp/output.py +119 -0
- qcp/tools.py +168 -0
- qcp_cli-0.1.5.dist-info/METADATA +207 -0
- qcp_cli-0.1.5.dist-info/RECORD +16 -0
- qcp_cli-0.1.5.dist-info/WHEEL +4 -0
- qcp_cli-0.1.5.dist-info/entry_points.txt +2 -0
- qcp_cli-0.1.5.dist-info/licenses/LICENSE +21 -0
qcp/__init__.py
ADDED
qcp/agent.py
ADDED
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
"""LangChain database-agent orchestration for QCP commands."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import Mapping
|
|
6
|
+
from typing import Any, Protocol, TypeVar
|
|
7
|
+
|
|
8
|
+
from langchain.agents import create_agent
|
|
9
|
+
from langchain.agents.middleware import after_model
|
|
10
|
+
from langchain.messages import HumanMessage
|
|
11
|
+
from langgraph.runtime import Runtime
|
|
12
|
+
from pydantic import BaseModel, ValidationError
|
|
13
|
+
|
|
14
|
+
from qcp.db import DatabaseClient
|
|
15
|
+
from qcp.errors import LLMError, QcpError
|
|
16
|
+
from qcp.llm import ChatModelFactory
|
|
17
|
+
from qcp.memory import SchemaMemoryStore
|
|
18
|
+
from qcp.models import (
|
|
19
|
+
AgentInsightsResponse,
|
|
20
|
+
AgentQueryResponse,
|
|
21
|
+
InsightsNarrative,
|
|
22
|
+
QueryNarrative,
|
|
23
|
+
QueryResult,
|
|
24
|
+
)
|
|
25
|
+
from qcp.tools import DatabaseToolkit, QcpAgentState
|
|
26
|
+
|
|
27
|
+
SYSTEM_PROMPT = """You are QCP, a PostgreSQL data analyst for non-developers.
|
|
28
|
+
Use tools sequentially and ground every claim in tool output.
|
|
29
|
+
Always call schema_memory with operation=recall first. If it misses, call lookup_schema,
|
|
30
|
+
then schema_memory with operation=store. Never invent tables, columns, SQL results, or insights.
|
|
31
|
+
Only execute one SELECT or WITH query. Never request or attempt writes, DDL, or multiple statements.
|
|
32
|
+
If execute_read_query reports stale schema, refresh, store, and retry exactly once.
|
|
33
|
+
Keep final answers concise and understandable to a non-developer.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
QUERY_EXECUTION_RETRY_PROMPT = (
|
|
37
|
+
"Your previous response did not execute a query. QCP requires SQL and exact rows for every query command. "
|
|
38
|
+
"Use the schema already loaded, call execute_read_query now, and only then return the concise answer."
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
NarrativeModel = TypeVar("NarrativeModel", bound=BaseModel)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class AgentInvoker(Protocol):
|
|
45
|
+
"""Minimal invocation interface implemented by compiled LangChain graphs."""
|
|
46
|
+
|
|
47
|
+
def invoke(self, value: dict[str, Any]) -> Mapping[str, Any]:
|
|
48
|
+
"""Invoke the compiled graph with an agent-state update."""
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _query_execution_retry_update(state: QcpAgentState) -> dict[str, Any] | None:
|
|
52
|
+
"""Return a one-time corrective state update when the model skips execution."""
|
|
53
|
+
if state.get("query_result") is not None or state.get("query_execution_retry_count", 0) >= 1:
|
|
54
|
+
return None
|
|
55
|
+
messages = state.get("messages", [])
|
|
56
|
+
if not messages or getattr(messages[-1], "tool_calls", None):
|
|
57
|
+
return None
|
|
58
|
+
return {
|
|
59
|
+
"messages": [HumanMessage(content=QUERY_EXECUTION_RETRY_PROMPT)],
|
|
60
|
+
"query_execution_retry_count": 1,
|
|
61
|
+
"jump_to": "model",
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@after_model(state_schema=QcpAgentState, can_jump_to=["model"])
|
|
66
|
+
def require_query_execution(state: QcpAgentState, runtime: Runtime) -> dict[str, Any] | None:
|
|
67
|
+
"""Give Gemini one corrective turn when it answers without executing SQL."""
|
|
68
|
+
del runtime
|
|
69
|
+
return _query_execution_retry_update(state)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class DatabaseAgent:
|
|
73
|
+
"""Coordinate LangChain agents while preserving exact tool artifacts."""
|
|
74
|
+
|
|
75
|
+
def __init__(
|
|
76
|
+
self,
|
|
77
|
+
database: DatabaseClient,
|
|
78
|
+
memory: SchemaMemoryStore,
|
|
79
|
+
model_factory: ChatModelFactory,
|
|
80
|
+
) -> None:
|
|
81
|
+
"""Initialize the agent with abstract infrastructure dependencies."""
|
|
82
|
+
self._database = database
|
|
83
|
+
self._memory = memory
|
|
84
|
+
self._model_factory = model_factory
|
|
85
|
+
|
|
86
|
+
def query(self, question: str, *, dry_run: bool = False) -> AgentQueryResponse:
|
|
87
|
+
"""Answer a natural-language question using one read-only SQL query."""
|
|
88
|
+
agent = create_agent(
|
|
89
|
+
model=self._model_factory.create(),
|
|
90
|
+
tools=DatabaseToolkit(self._database, self._memory, dry_run=dry_run).build(),
|
|
91
|
+
system_prompt=SYSTEM_PROMPT,
|
|
92
|
+
state_schema=QcpAgentState,
|
|
93
|
+
response_format=QueryNarrative,
|
|
94
|
+
middleware=[require_query_execution],
|
|
95
|
+
)
|
|
96
|
+
prompt = (
|
|
97
|
+
f"Question: {question}\n"
|
|
98
|
+
"Find the schema, produce one PostgreSQL query, call execute_read_query, "
|
|
99
|
+
"then return a concise answer."
|
|
100
|
+
)
|
|
101
|
+
if dry_run:
|
|
102
|
+
prompt += " This is a dry run: validate and record the SQL with the tool, but do not claim any data result."
|
|
103
|
+
result = self._invoke(agent, prompt)
|
|
104
|
+
raw_query_result = result.get("query_result")
|
|
105
|
+
if raw_query_result is None:
|
|
106
|
+
raise LLMError("the agent finished without producing a SQL query result")
|
|
107
|
+
narrative = self._validate_narrative(QueryNarrative, result.get("structured_response"))
|
|
108
|
+
return AgentQueryResponse(
|
|
109
|
+
query_result=QueryResult.model_validate(raw_query_result),
|
|
110
|
+
answer=narrative.answer,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
def insights(self, from_question: str | None = None) -> AgentInsightsResponse:
|
|
114
|
+
"""Generate schema-grounded or query-grounded analytical suggestions."""
|
|
115
|
+
agent = create_agent(
|
|
116
|
+
model=self._model_factory.create(),
|
|
117
|
+
tools=DatabaseToolkit(self._database, self._memory).build(),
|
|
118
|
+
system_prompt=SYSTEM_PROMPT,
|
|
119
|
+
state_schema=QcpAgentState,
|
|
120
|
+
response_format=InsightsNarrative,
|
|
121
|
+
)
|
|
122
|
+
if from_question:
|
|
123
|
+
prompt = (
|
|
124
|
+
f"Generate insights for this focus: {from_question}\n"
|
|
125
|
+
"Load the schema, create and execute one relevant read query, call analyze_insights, "
|
|
126
|
+
"then return 3-6 grounded insights."
|
|
127
|
+
)
|
|
128
|
+
else:
|
|
129
|
+
prompt = (
|
|
130
|
+
"Load the schema, call analyze_insights without executing a query, "
|
|
131
|
+
"then return 3-6 concrete analyses the user could run next."
|
|
132
|
+
)
|
|
133
|
+
result = self._invoke(agent, prompt)
|
|
134
|
+
narrative = self._validate_narrative(InsightsNarrative, result.get("structured_response"))
|
|
135
|
+
raw_query_result = result.get("query_result")
|
|
136
|
+
query_result = QueryResult.model_validate(raw_query_result) if raw_query_result is not None else None
|
|
137
|
+
return AgentInsightsResponse(insights=narrative.insights, query_result=query_result)
|
|
138
|
+
|
|
139
|
+
@staticmethod
|
|
140
|
+
def _invoke(agent: AgentInvoker, prompt: str) -> dict[str, Any]:
|
|
141
|
+
"""Invoke LangChain and normalize provider failures into QCP errors."""
|
|
142
|
+
try:
|
|
143
|
+
result = agent.invoke(
|
|
144
|
+
{
|
|
145
|
+
"messages": [{"role": "user", "content": prompt}],
|
|
146
|
+
"schema_snapshot": None,
|
|
147
|
+
"query_result": None,
|
|
148
|
+
"insight_context": None,
|
|
149
|
+
"schema_retry_count": 0,
|
|
150
|
+
"query_execution_retry_count": 0,
|
|
151
|
+
}
|
|
152
|
+
)
|
|
153
|
+
except QcpError:
|
|
154
|
+
raise
|
|
155
|
+
except Exception as error:
|
|
156
|
+
raise LLMError(str(error)) from error
|
|
157
|
+
return dict(result)
|
|
158
|
+
|
|
159
|
+
@staticmethod
|
|
160
|
+
def _validate_narrative(model: type[NarrativeModel], value: object) -> NarrativeModel:
|
|
161
|
+
"""Convert invalid structured model output into a user-facing AI error."""
|
|
162
|
+
try:
|
|
163
|
+
return model.model_validate(value)
|
|
164
|
+
except ValidationError as error:
|
|
165
|
+
raise LLMError("the agent returned an invalid structured response") from error
|
qcp/cli.py
ADDED
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
"""qcp - Query Companion: a CLI to query Postgres in natural language."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import sys
|
|
6
|
+
|
|
7
|
+
import click
|
|
8
|
+
|
|
9
|
+
from qcp import __version__, db, llm
|
|
10
|
+
from qcp import config as cfg
|
|
11
|
+
from qcp.agent import DatabaseAgent
|
|
12
|
+
from qcp.db import PostgresDatabaseClient
|
|
13
|
+
from qcp.errors import QcpError
|
|
14
|
+
from qcp.llm import GeminiChatModelFactory
|
|
15
|
+
from qcp.memory import JsonSchemaMemoryStore
|
|
16
|
+
from qcp.output import format_table
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _print_err(msg: str) -> None:
|
|
20
|
+
"""Print a red error message to stderr."""
|
|
21
|
+
click.secho(msg, fg="red", err=True)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@click.group()
|
|
25
|
+
@click.version_option(__version__, prog_name="qcp")
|
|
26
|
+
def main() -> None:
|
|
27
|
+
"""QCP - your CLI companion for querying Postgres in plain English."""
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@main.command()
|
|
31
|
+
@click.option("--database-url", "-d", default=None, help="Postgres connection string. Prompted if omitted.")
|
|
32
|
+
@click.option("--force", is_flag=True, help="Overwrite existing configuration without asking.")
|
|
33
|
+
def init(database_url: str | None, force: bool) -> None:
|
|
34
|
+
"""Connect QCP to a Postgres database."""
|
|
35
|
+
existing = cfg.get_db_url()
|
|
36
|
+
if (
|
|
37
|
+
existing
|
|
38
|
+
and not force
|
|
39
|
+
and not click.confirm(f"A database is already configured ({_mask(existing)}). Replace it?")
|
|
40
|
+
):
|
|
41
|
+
click.echo("Keeping existing configuration.")
|
|
42
|
+
return
|
|
43
|
+
|
|
44
|
+
if not database_url:
|
|
45
|
+
click.echo("Enter your Postgres connection string, e.g.")
|
|
46
|
+
click.echo(" postgresql://user:password@host:5432/dbname")
|
|
47
|
+
database_url = click.prompt("Database URL", hide_input=True)
|
|
48
|
+
|
|
49
|
+
click.echo("Testing connection...")
|
|
50
|
+
db.test_connection(database_url)
|
|
51
|
+
cfg.set_key("database_url", database_url)
|
|
52
|
+
click.secho("Connected and saved.", fg="green")
|
|
53
|
+
|
|
54
|
+
if not cfg.get_gemini_api_key():
|
|
55
|
+
click.echo("\nTip: run `qcp auth` next to add your Gemini API key.")
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@main.command()
|
|
59
|
+
@click.option("--key", "-k", default=None, help="Gemini API key. Prompted if omitted.")
|
|
60
|
+
@click.option("--remove", is_flag=True, help="Remove the stored API key.")
|
|
61
|
+
@click.option("--skip-validate", is_flag=True, help="Skip validating the key against Gemini.")
|
|
62
|
+
@click.option("--model", default=None, help="Override the Gemini model (e.g. gemini-2.5-flash).")
|
|
63
|
+
def auth(key: str | None, remove: bool, skip_validate: bool, model: str | None) -> None:
|
|
64
|
+
"""Add (or remove) your Gemini API key."""
|
|
65
|
+
if remove:
|
|
66
|
+
cfg.unset_key("gemini_api_key")
|
|
67
|
+
click.secho("Removed stored Gemini API key.", fg="green")
|
|
68
|
+
return
|
|
69
|
+
|
|
70
|
+
if model:
|
|
71
|
+
cfg.set_key("gemini_model", model)
|
|
72
|
+
click.echo(f"Using model: {model}")
|
|
73
|
+
|
|
74
|
+
if not key:
|
|
75
|
+
click.echo("Get a free Gemini API key at https://aistudio.google.com/apikey")
|
|
76
|
+
key = click.prompt("Gemini API key", hide_input=True)
|
|
77
|
+
|
|
78
|
+
if not skip_validate:
|
|
79
|
+
click.echo("Validating key...")
|
|
80
|
+
ok, detail = llm.validate_api_key(key)
|
|
81
|
+
if not ok:
|
|
82
|
+
raise QcpError(
|
|
83
|
+
"That Gemini API key didn't validate "
|
|
84
|
+
f"(model: {llm.get_model()}).\n"
|
|
85
|
+
f"Reason: {detail}\n"
|
|
86
|
+
"Double-check the key, or re-run with --skip-validate to store it anyway."
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
cfg.set_key("gemini_api_key", key)
|
|
90
|
+
cfg.set_key("provider", "gemini")
|
|
91
|
+
click.secho("Gemini API key saved.", fg="green")
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@main.command()
|
|
95
|
+
@click.argument("question", nargs=-1, required=True)
|
|
96
|
+
@click.option("--show-sql/--no-show-sql", default=True, help="Print the generated SQL before running it.")
|
|
97
|
+
@click.option("--dry-run", is_flag=True, help="Only generate and print SQL, don't execute it.")
|
|
98
|
+
def query(question: tuple[str, ...], show_sql: bool, dry_run: bool) -> None:
|
|
99
|
+
"""Ask a question about your data in plain English.
|
|
100
|
+
|
|
101
|
+
Example: qcp query "what were the top 5 products by revenue last month?"
|
|
102
|
+
"""
|
|
103
|
+
question_text = " ".join(question)
|
|
104
|
+
db_url = db.require_db_url()
|
|
105
|
+
|
|
106
|
+
click.echo("Reading schema...")
|
|
107
|
+
click.echo("Running database agent...")
|
|
108
|
+
result = _create_agent(db_url).query(question_text, dry_run=dry_run)
|
|
109
|
+
sql = result.query_result.sql
|
|
110
|
+
|
|
111
|
+
if show_sql or dry_run:
|
|
112
|
+
click.secho("\n" + sql + "\n", fg="cyan")
|
|
113
|
+
|
|
114
|
+
if dry_run:
|
|
115
|
+
return
|
|
116
|
+
|
|
117
|
+
click.echo(format_table(result.query_result.columns, result.query_result.rows))
|
|
118
|
+
if result.query_result.truncated:
|
|
119
|
+
click.echo("\n(Result limited to 200 rows.)")
|
|
120
|
+
click.echo("\n" + result.answer)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
@main.command()
|
|
124
|
+
@click.option(
|
|
125
|
+
"--from-question", default=None, help="Base insights on the results of this question instead of just the schema."
|
|
126
|
+
)
|
|
127
|
+
def insights(from_question: str | None) -> None:
|
|
128
|
+
"""Get AI-generated analytics and insights about your database."""
|
|
129
|
+
db_url = db.require_db_url()
|
|
130
|
+
database_agent = _create_agent(db_url)
|
|
131
|
+
if from_question:
|
|
132
|
+
click.echo("Running a read query for your question...")
|
|
133
|
+
|
|
134
|
+
click.echo("Generating insights...")
|
|
135
|
+
result = database_agent.insights(from_question)
|
|
136
|
+
if result.query_result is not None:
|
|
137
|
+
click.secho("\n" + result.query_result.sql + "\n", fg="cyan")
|
|
138
|
+
click.echo(format_table(result.query_result.columns, result.query_result.rows) + "\n")
|
|
139
|
+
click.echo("\n" + "\n".join(f"- {insight}" for insight in result.insights))
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
@main.command()
|
|
143
|
+
def status() -> None:
|
|
144
|
+
"""Show current QCP configuration."""
|
|
145
|
+
db_url = cfg.get_db_url()
|
|
146
|
+
api_key = cfg.get_gemini_api_key()
|
|
147
|
+
provider = cfg.get_provider()
|
|
148
|
+
|
|
149
|
+
click.echo(f"Config file: {cfg.config_path()}")
|
|
150
|
+
click.echo(f"Database: {_mask(db_url) if db_url else 'not configured (run `qcp init`)'}")
|
|
151
|
+
click.echo(f"AI provider: {provider}")
|
|
152
|
+
click.echo(f"Model: {llm.get_model()}")
|
|
153
|
+
click.echo(f"API key: {'configured' if api_key else 'not configured (run `qcp auth`)'}")
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def _mask(url: str) -> str:
|
|
157
|
+
"""Hide credentials in a connection string for display purposes."""
|
|
158
|
+
if "@" not in url:
|
|
159
|
+
return url
|
|
160
|
+
scheme_and_creds, host_part = url.rsplit("@", 1)
|
|
161
|
+
scheme = scheme_and_creds.split("://")[0]
|
|
162
|
+
return f"{scheme}://***@{host_part}"
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _create_agent(database_url: str) -> DatabaseAgent:
|
|
166
|
+
"""Construct the dependency-injected agent used by CLI commands."""
|
|
167
|
+
database = PostgresDatabaseClient(database_url)
|
|
168
|
+
model_factory = GeminiChatModelFactory(llm.require_api_key())
|
|
169
|
+
return DatabaseAgent(database, JsonSchemaMemoryStore(), model_factory)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def run() -> None:
|
|
173
|
+
"""Entry point used by the packaged console script."""
|
|
174
|
+
try:
|
|
175
|
+
main(standalone_mode=False)
|
|
176
|
+
except click.exceptions.Abort:
|
|
177
|
+
click.echo("\nAborted.")
|
|
178
|
+
sys.exit(1)
|
|
179
|
+
except click.ClickException as e:
|
|
180
|
+
e.show()
|
|
181
|
+
sys.exit(e.exit_code)
|
|
182
|
+
except QcpError as e:
|
|
183
|
+
_print_err(f"Error: {e}")
|
|
184
|
+
sys.exit(1)
|
|
185
|
+
except KeyboardInterrupt:
|
|
186
|
+
click.echo("\nAborted.")
|
|
187
|
+
sys.exit(130)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
if __name__ == "__main__":
|
|
191
|
+
run()
|
qcp/config.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""Validated configuration storage for QCP."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
import stat
|
|
8
|
+
from contextlib import suppress
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from pydantic import ValidationError
|
|
13
|
+
|
|
14
|
+
from qcp.models import QcpConfig
|
|
15
|
+
|
|
16
|
+
CONFIG_DIR = Path(os.environ.get("QCP_HOME", Path.home() / ".qcp"))
|
|
17
|
+
CONFIG_FILE = CONFIG_DIR / "config.json"
|
|
18
|
+
DEFAULT_PROVIDER = "gemini"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _ensure_dir() -> None:
|
|
22
|
+
CONFIG_DIR.mkdir(parents=True, exist_ok=True)
|
|
23
|
+
with suppress(OSError):
|
|
24
|
+
os.chmod(CONFIG_DIR, stat.S_IRWXU)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def load() -> dict[str, Any]:
|
|
28
|
+
"""Load validated configuration, treating corrupt files as empty."""
|
|
29
|
+
if not CONFIG_FILE.exists():
|
|
30
|
+
return {}
|
|
31
|
+
try:
|
|
32
|
+
raw_data = json.loads(CONFIG_FILE.read_text(encoding="utf-8"))
|
|
33
|
+
return QcpConfig.model_validate(raw_data).model_dump(mode="json", exclude_none=True)
|
|
34
|
+
except json.JSONDecodeError, OSError, ValidationError:
|
|
35
|
+
return {}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def save(data: dict[str, Any]) -> None:
|
|
39
|
+
"""Validate and persist configuration with owner-only permissions."""
|
|
40
|
+
validated = QcpConfig.model_validate(data)
|
|
41
|
+
serialized = validated.model_dump(mode="json", exclude_none=True)
|
|
42
|
+
_ensure_dir()
|
|
43
|
+
CONFIG_FILE.write_text(json.dumps(serialized, indent=2), encoding="utf-8")
|
|
44
|
+
with suppress(OSError):
|
|
45
|
+
os.chmod(CONFIG_FILE, stat.S_IRUSR | stat.S_IWUSR)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def get(key: str, default: Any = None) -> Any:
|
|
49
|
+
"""Return a configuration value by its persisted key."""
|
|
50
|
+
return load().get(key, default)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def set_key(key: str, value: Any) -> None:
|
|
54
|
+
"""Set and validate one configuration value."""
|
|
55
|
+
data = load()
|
|
56
|
+
data[key] = value
|
|
57
|
+
save(data)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def unset_key(key: str) -> None:
|
|
61
|
+
"""Remove one configuration value when present."""
|
|
62
|
+
data = load()
|
|
63
|
+
if key in data:
|
|
64
|
+
del data[key]
|
|
65
|
+
save(data)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def get_db_url() -> str | None:
|
|
69
|
+
"""Resolve the database URL, preferring ``QCP_DATABASE_URL``."""
|
|
70
|
+
return os.environ.get("QCP_DATABASE_URL") or get("database_url")
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def get_gemini_api_key() -> str | None:
|
|
74
|
+
"""Resolve the Gemini key, preferring ``GEMINI_API_KEY``."""
|
|
75
|
+
return os.environ.get("GEMINI_API_KEY") or get("gemini_api_key")
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def get_provider() -> str:
|
|
79
|
+
"""Return the configured language-model provider."""
|
|
80
|
+
return str(get("provider", DEFAULT_PROVIDER))
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def config_path() -> str:
|
|
84
|
+
"""Return the configuration path for status output."""
|
|
85
|
+
return str(CONFIG_FILE)
|
qcp/db.py
ADDED
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
"""PostgreSQL access with schema introspection and read-only enforcement."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import hashlib
|
|
6
|
+
import re
|
|
7
|
+
from abc import ABC, abstractmethod
|
|
8
|
+
from collections.abc import Sequence
|
|
9
|
+
from datetime import UTC, datetime
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
import psycopg
|
|
13
|
+
from psycopg import Connection
|
|
14
|
+
from psycopg.errors import InvalidSchemaName, UndefinedColumn, UndefinedTable
|
|
15
|
+
|
|
16
|
+
from qcp import config as cfg
|
|
17
|
+
from qcp.errors import (
|
|
18
|
+
DatabaseConnectionError,
|
|
19
|
+
NoDatabaseConfiguredError,
|
|
20
|
+
SchemaChangedError,
|
|
21
|
+
UnsafeQueryError,
|
|
22
|
+
)
|
|
23
|
+
from qcp.models import QueryResult, SchemaColumn, SchemaSnapshot, SchemaTable
|
|
24
|
+
|
|
25
|
+
MAX_QUERY_ROWS = 200
|
|
26
|
+
_READ_QUERY_PATTERN = re.compile(r"^\s*(select|with)\b", re.IGNORECASE)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class DatabaseClient(ABC):
|
|
30
|
+
"""Contract used by the database agent's PostgreSQL tools."""
|
|
31
|
+
|
|
32
|
+
@property
|
|
33
|
+
@abstractmethod
|
|
34
|
+
def database_id(self) -> str:
|
|
35
|
+
"""Return a non-secret stable identifier for schema isolation."""
|
|
36
|
+
|
|
37
|
+
@abstractmethod
|
|
38
|
+
def test_connection(self) -> None:
|
|
39
|
+
"""Raise an application error when the database cannot be reached."""
|
|
40
|
+
|
|
41
|
+
@abstractmethod
|
|
42
|
+
def lookup_schema(self) -> SchemaSnapshot:
|
|
43
|
+
"""Read the public PostgreSQL schema."""
|
|
44
|
+
|
|
45
|
+
@abstractmethod
|
|
46
|
+
def execute_read_query(self, sql: str, limit: int = MAX_QUERY_ROWS) -> QueryResult:
|
|
47
|
+
"""Execute one read-only query and return at most ``limit`` rows."""
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class PostgresDatabaseClient(DatabaseClient):
|
|
51
|
+
"""Psycopg 3 implementation of the QCP database contract."""
|
|
52
|
+
|
|
53
|
+
def __init__(self, database_url: str) -> None:
|
|
54
|
+
"""Initialize the client with a PostgreSQL connection string."""
|
|
55
|
+
self._database_url = database_url
|
|
56
|
+
|
|
57
|
+
@property
|
|
58
|
+
def database_id(self) -> str:
|
|
59
|
+
"""Return a credential-safe identifier for the configured database."""
|
|
60
|
+
return hashlib.sha256(self._database_url.encode("utf-8")).hexdigest()[:16]
|
|
61
|
+
|
|
62
|
+
def _connect(self) -> Connection[Any]:
|
|
63
|
+
try:
|
|
64
|
+
return psycopg.connect(self._database_url, connect_timeout=10)
|
|
65
|
+
except Exception as error:
|
|
66
|
+
raise DatabaseConnectionError(str(error)) from error
|
|
67
|
+
|
|
68
|
+
def test_connection(self) -> None:
|
|
69
|
+
"""Verify that PostgreSQL accepts the configured connection string."""
|
|
70
|
+
connection = self._connect()
|
|
71
|
+
connection.close()
|
|
72
|
+
|
|
73
|
+
def lookup_schema(self) -> SchemaSnapshot:
|
|
74
|
+
"""Return tables and columns from the PostgreSQL public schema."""
|
|
75
|
+
sql = """
|
|
76
|
+
SELECT table_schema, table_name, column_name, data_type, is_nullable
|
|
77
|
+
FROM information_schema.columns
|
|
78
|
+
WHERE table_schema NOT IN ('pg_catalog', 'information_schema')
|
|
79
|
+
AND table_schema NOT LIKE 'pg_toast%'
|
|
80
|
+
ORDER BY table_schema, table_name, ordinal_position
|
|
81
|
+
"""
|
|
82
|
+
connection = self._connect()
|
|
83
|
+
try:
|
|
84
|
+
with connection.cursor() as cursor:
|
|
85
|
+
cursor.execute(sql)
|
|
86
|
+
rows = cursor.fetchall()
|
|
87
|
+
except Exception as error:
|
|
88
|
+
raise DatabaseConnectionError(str(error)) from error
|
|
89
|
+
finally:
|
|
90
|
+
connection.close()
|
|
91
|
+
|
|
92
|
+
columns_by_table: dict[tuple[str, str], list[SchemaColumn]] = {}
|
|
93
|
+
for schema_name, table_name, column_name, data_type, is_nullable in rows:
|
|
94
|
+
table_key = (str(schema_name), str(table_name))
|
|
95
|
+
columns_by_table.setdefault(table_key, []).append(
|
|
96
|
+
SchemaColumn(name=str(column_name), data_type=str(data_type), nullable=is_nullable == "YES")
|
|
97
|
+
)
|
|
98
|
+
tables = [
|
|
99
|
+
SchemaTable(schema_name=schema_name, name=table_name, columns=columns)
|
|
100
|
+
for (schema_name, table_name), columns in columns_by_table.items()
|
|
101
|
+
]
|
|
102
|
+
return SchemaSnapshot(database_id=self.database_id, captured_at=datetime.now(UTC), tables=tables)
|
|
103
|
+
|
|
104
|
+
def execute_read_query(self, sql: str, limit: int = MAX_QUERY_ROWS) -> QueryResult:
|
|
105
|
+
"""Execute a single SELECT/CTE inside a read-only transaction."""
|
|
106
|
+
normalized_sql = normalize_read_query(sql)
|
|
107
|
+
connection = self._connect()
|
|
108
|
+
try:
|
|
109
|
+
connection.read_only = True
|
|
110
|
+
with connection.cursor() as cursor:
|
|
111
|
+
cursor.execute(normalized_sql.encode("utf-8"))
|
|
112
|
+
if cursor.description is None:
|
|
113
|
+
raise UnsafeQueryError(normalized_sql)
|
|
114
|
+
columns = [_column_name(item) for item in cursor.description]
|
|
115
|
+
fetched_rows: Sequence[Sequence[Any]] = cursor.fetchmany(limit + 1)
|
|
116
|
+
except (UndefinedTable, UndefinedColumn, InvalidSchemaName) as error:
|
|
117
|
+
raise SchemaChangedError(str(error)) from error
|
|
118
|
+
except UnsafeQueryError, SchemaChangedError:
|
|
119
|
+
raise
|
|
120
|
+
except Exception as error:
|
|
121
|
+
raise DatabaseConnectionError(str(error)) from error
|
|
122
|
+
finally:
|
|
123
|
+
connection.close()
|
|
124
|
+
|
|
125
|
+
truncated = len(fetched_rows) > limit
|
|
126
|
+
rows = [list(row) for row in fetched_rows[:limit]]
|
|
127
|
+
return QueryResult(sql=normalized_sql, columns=columns, rows=rows, truncated=truncated)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def _column_name(description: Any) -> str:
|
|
131
|
+
"""Read a column name from Psycopg or a DB-API-compatible test double."""
|
|
132
|
+
return str(description.name if hasattr(description, "name") else description[0])
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def normalize_read_query(sql: str) -> str:
|
|
136
|
+
"""Validate and normalize a single PostgreSQL SELECT or WITH statement."""
|
|
137
|
+
normalized = sql.strip()
|
|
138
|
+
if normalized.endswith(";"):
|
|
139
|
+
normalized = normalized[:-1].rstrip()
|
|
140
|
+
candidate = normalized.lstrip("(").lstrip()
|
|
141
|
+
if not normalized or ";" in normalized or not _READ_QUERY_PATTERN.match(candidate):
|
|
142
|
+
raise UnsafeQueryError(sql)
|
|
143
|
+
return normalized
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def is_read_only(sql: str) -> bool:
|
|
147
|
+
"""Return whether SQL passes QCP's single read-statement validation."""
|
|
148
|
+
try:
|
|
149
|
+
normalize_read_query(sql)
|
|
150
|
+
except UnsafeQueryError:
|
|
151
|
+
return False
|
|
152
|
+
return True
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def require_db_url() -> str:
|
|
156
|
+
"""Return the configured database URL or raise a user-facing error."""
|
|
157
|
+
database_url = cfg.get_db_url()
|
|
158
|
+
if not database_url:
|
|
159
|
+
raise NoDatabaseConfiguredError()
|
|
160
|
+
return database_url
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def test_connection(database_url: str) -> None:
|
|
164
|
+
"""Compatibility wrapper around :class:`PostgresDatabaseClient`."""
|
|
165
|
+
PostgresDatabaseClient(database_url).test_connection()
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def get_schema_summary(database_url: str, max_tables: int = 50) -> str:
|
|
169
|
+
"""Compatibility wrapper returning a compact schema string."""
|
|
170
|
+
return PostgresDatabaseClient(database_url).lookup_schema().summary(max_tables=max_tables)
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def run_query(database_url: str, sql: str, limit: int = MAX_QUERY_ROWS) -> tuple[list[str], list[tuple[Any, ...]]]:
|
|
174
|
+
"""Compatibility wrapper returning DB-API-style columns and rows."""
|
|
175
|
+
result = PostgresDatabaseClient(database_url).execute_read_query(sql, limit=limit)
|
|
176
|
+
return result.columns, [tuple(row) for row in result.rows]
|
qcp/errors.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""Application exceptions converted to clean messages at the CLI boundary."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class QcpError(Exception):
|
|
5
|
+
"""Base error for all QCP CLI failures."""
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class NoDatabaseConfiguredError(QcpError):
|
|
9
|
+
"""Raised when a command needs a database but none is configured."""
|
|
10
|
+
|
|
11
|
+
def __init__(self) -> None:
|
|
12
|
+
"""Initialize the actionable configuration error."""
|
|
13
|
+
super().__init__(
|
|
14
|
+
"No database is configured.\n"
|
|
15
|
+
"Run `qcp init` to connect a Postgres database, "
|
|
16
|
+
"or set the QCP_DATABASE_URL environment variable."
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class NoApiKeyConfiguredError(QcpError):
|
|
21
|
+
"""Raised when a command needs Gemini but no key is configured."""
|
|
22
|
+
|
|
23
|
+
def __init__(self, provider: str = "gemini") -> None:
|
|
24
|
+
"""Initialize the actionable API-key error."""
|
|
25
|
+
super().__init__(
|
|
26
|
+
f"No API key configured for provider '{provider}'.\n"
|
|
27
|
+
f"Run `qcp auth` to add your {provider.title()} API key, "
|
|
28
|
+
f"or set the GEMINI_API_KEY environment variable."
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class DatabaseConnectionError(QcpError):
|
|
33
|
+
"""Raised for PostgreSQL connection or execution failures."""
|
|
34
|
+
|
|
35
|
+
def __init__(self, detail: str) -> None:
|
|
36
|
+
"""Initialize the database error with driver details."""
|
|
37
|
+
super().__init__(f"Could not connect to the database: {detail}")
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class LLMError(QcpError):
|
|
41
|
+
"""Raised when Gemini or the LangChain agent fails."""
|
|
42
|
+
|
|
43
|
+
def __init__(self, detail: str) -> None:
|
|
44
|
+
"""Initialize the AI provider error."""
|
|
45
|
+
super().__init__(f"AI provider error: {detail}")
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class UnsafeQueryError(QcpError):
|
|
49
|
+
"""Raised when SQL is not one read-only statement."""
|
|
50
|
+
|
|
51
|
+
def __init__(self, statement: str) -> None:
|
|
52
|
+
"""Initialize the query safety error."""
|
|
53
|
+
super().__init__(
|
|
54
|
+
"Refusing to run a non-read-only statement generated from your "
|
|
55
|
+
f"question:\n {statement}\n"
|
|
56
|
+
"qcp only executes one SELECT or WITH query."
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class SchemaChangedError(QcpError):
|
|
61
|
+
"""Raised when cached schema metadata no longer matches PostgreSQL."""
|