quality-studio 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. quality_studio/__init__.py +1 -0
  2. quality_studio/backend/__init__.py +15 -0
  3. quality_studio/backend/__main__.py +6 -0
  4. quality_studio/backend/api/__init__.py +6 -0
  5. quality_studio/backend/api/_deps.py +77 -0
  6. quality_studio/backend/api/catalog.py +160 -0
  7. quality_studio/backend/api/changesets.py +103 -0
  8. quality_studio/backend/api/coverage.py +103 -0
  9. quality_studio/backend/api/data.py +148 -0
  10. quality_studio/backend/api/generation.py +126 -0
  11. quality_studio/backend/api/generation_stream.py +225 -0
  12. quality_studio/backend/api/generation_trigger.py +413 -0
  13. quality_studio/backend/api/health.py +110 -0
  14. quality_studio/backend/api/history.py +51 -0
  15. quality_studio/backend/api/issues.py +44 -0
  16. quality_studio/backend/api/journeys.py +186 -0
  17. quality_studio/backend/api/runs.py +210 -0
  18. quality_studio/backend/api/status.py +142 -0
  19. quality_studio/backend/api/steps.py +152 -0
  20. quality_studio/backend/api/target_config.py +547 -0
  21. quality_studio/backend/api/target_toml.py +449 -0
  22. quality_studio/backend/api/targets.py +44 -0
  23. quality_studio/backend/main.py +374 -0
  24. quality_studio/backend/state.py +141 -0
  25. quality_studio/cli.py +1703 -0
  26. quality_studio/config.py +320 -0
  27. quality_studio/coverage.py +225 -0
  28. quality_studio/customization.py +100 -0
  29. quality_studio/db.py +432 -0
  30. quality_studio/definitions.py +290 -0
  31. quality_studio/generation/__init__.py +60 -0
  32. quality_studio/generation/anthropic_client.py +395 -0
  33. quality_studio/generation/cli_driver.py +324 -0
  34. quality_studio/generation/context.py +267 -0
  35. quality_studio/generation/driver_resolver.py +127 -0
  36. quality_studio/generation/pr_diff.py +357 -0
  37. quality_studio/generation/progress_log.py +308 -0
  38. quality_studio/generation/prompts/_voice.md +57 -0
  39. quality_studio/generation/prompts/data_author/component.md +33 -0
  40. quality_studio/generation/prompts/data_author/coverage.md +33 -0
  41. quality_studio/generation/prompts/data_author/live.md +33 -0
  42. quality_studio/generation/prompts/data_author/process.md +33 -0
  43. quality_studio/generation/prompts/journey_author/component.md +36 -0
  44. quality_studio/generation/prompts/journey_author/coverage.md +36 -0
  45. quality_studio/generation/prompts/journey_author/live.md +36 -0
  46. quality_studio/generation/prompts/journey_author/process.md +36 -0
  47. quality_studio/generation/prompts/step_author/component.md +56 -0
  48. quality_studio/generation/prompts/step_author/coverage.md +39 -0
  49. quality_studio/generation/prompts/step_author/live.md +39 -0
  50. quality_studio/generation/prompts/step_author/process.md +39 -0
  51. quality_studio/generation/registry.py +185 -0
  52. quality_studio/generation/runner.py +647 -0
  53. quality_studio/generation/validators.py +174 -0
  54. quality_studio/generation/version_guard.py +122 -0
  55. quality_studio/github.py +107 -0
  56. quality_studio/history.py +142 -0
  57. quality_studio/issues.py +485 -0
  58. quality_studio/manage_compose.py +160 -0
  59. quality_studio/migrations/001_initial.sql +319 -0
  60. quality_studio/migrations/002_generation_driver.sql +13 -0
  61. quality_studio/migrations/003_quality_meta.sql +99 -0
  62. quality_studio/migrations/005_user_stories.sql +75 -0
  63. quality_studio/migrations/006_target_config_audit.sql +27 -0
  64. quality_studio/migrations/007_host_repo_path.sql +17 -0
  65. quality_studio/migrations/008_generation_progress_log.sql +26 -0
  66. quality_studio/migrations/009_generation_run_open_state.sql +38 -0
  67. quality_studio/migrations/010_journey_composition_spine.sql +225 -0
  68. quality_studio/migrations/__init__.py +0 -0
  69. quality_studio/runs.py +784 -0
  70. quality_studio/services.py +915 -0
  71. quality_studio/skills.py +160 -0
  72. quality_studio/skills_data/qs-author/SKILL.md +70 -0
  73. quality_studio/skills_data/qs-runs/SKILL.md +63 -0
  74. quality_studio/target_repo.py +78 -0
  75. quality_studio/target_toml_writer.py +262 -0
  76. quality_studio/targets.py +391 -0
  77. quality_studio/validate.py +508 -0
  78. quality_studio/yaml_io.py +207 -0
  79. quality_studio-0.0.1.dist-info/METADATA +38 -0
  80. quality_studio-0.0.1.dist-info/RECORD +82 -0
  81. quality_studio-0.0.1.dist-info/WHEEL +4 -0
  82. quality_studio-0.0.1.dist-info/entry_points.txt +2 -0
@@ -0,0 +1 @@
1
+ __version__ = "0.0.1"
@@ -0,0 +1,15 @@
1
+ """FastAPI backend (FR-10.1 / FR-10.2 / FR-10.3).
2
+
3
+ `main.create_app()` returns the FastAPI ASGI app. `python -m
4
+ quality_studio.backend` launches uvicorn bound to 127.0.0.1:9100
5
+ (NFR-5.1) — the same entry point docker-compose uses.
6
+
7
+ Startup config (target-repo path, DB DSN, Anthropic key) is resolved
8
+ from environment variables at app-factory time; an invalid target-repo
9
+ path fails fast at startup (FR-11.4 implementation-defined mechanism
10
+ documented in `state.py`).
11
+ """
12
+
13
+ from quality_studio.backend.main import create_app
14
+
15
+ __all__ = ["create_app"]
@@ -0,0 +1,6 @@
1
+ """`python -m quality_studio.backend` entry — launches uvicorn."""
2
+
3
+ from quality_studio.backend.main import main
4
+
5
+ if __name__ == "__main__":
6
+ main()
@@ -0,0 +1,6 @@
1
+ """API routers (FR-10.2, FR-10.3).
2
+
3
+ Each module under `quality_studio.backend.api` exports a single
4
+ `router: APIRouter` mounted by `backend.main.create_app` under `/api`.
5
+ Routers are read-only except `runs.py`'s `POST /api/runs`.
6
+ """
@@ -0,0 +1,77 @@
1
+ """Shared FastAPI dependencies for the api routers.
2
+
3
+ v2.1 multi-target (#101/#102): pools live in `app.state.registry` keyed by
4
+ target_id. `get_pool` is path-aware — when the matched route includes a
5
+ `{target_id}` path parameter, it resolves that target's pool from the
6
+ registry; when the legacy unprefixed route matches, it proxies to the
7
+ first registered target. Handlers don't need to know which mount they're
8
+ serving.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import asyncpg
14
+ from fastapi import HTTPException, Request
15
+
16
+
17
+ def get_pool(request: Request) -> asyncpg.Pool:
18
+ """Resolve the asyncpg pool for the current request.
19
+
20
+ Routing rules:
21
+ - When the matched path contains `{target_id}` (the v2.1 mount under
22
+ `/api/targets/{target_id}/...`), look up that target in the registry.
23
+ Unknown ids return 404.
24
+ - Otherwise (legacy mount under `/api/...`) proxy to the first
25
+ registered target. Lets existing UI tabs + scripts keep working
26
+ through the v2.1 → v3 deprecation window.
27
+ """
28
+ target_id = request.path_params.get("target_id")
29
+ registry = getattr(request.app.state, "registry", None)
30
+ if registry is None:
31
+ # Pre-v2.1 fallback path (e.g., a test that hasn't moved to the
32
+ # registry yet) — return the legacy singleton.
33
+ return request.app.state.pool
34
+
35
+ if target_id:
36
+ state = registry.get(target_id)
37
+ if state is None:
38
+ raise HTTPException(
39
+ status_code=404,
40
+ detail=f"target_id {target_id!r} not registered",
41
+ )
42
+ return state.pool
43
+
44
+ first = registry.first()
45
+ if first is None:
46
+ raise HTTPException(
47
+ status_code=503,
48
+ detail="no targets registered — run `quality init` in a target repo",
49
+ )
50
+ return first.pool
51
+
52
+
53
+ def get_pool_for_target(target_id: str, request: Request) -> asyncpg.Pool:
54
+ """Explicit per-target pool dependency.
55
+
56
+ Equivalent to `get_pool` for prefixed routes, but takes `target_id` as
57
+ an explicit argument so handlers that want the parameter in their
58
+ signature can declare it. Same 404 / 503 semantics.
59
+ """
60
+ registry = getattr(request.app.state, "registry", None)
61
+ if registry is None:
62
+ raise HTTPException(status_code=503, detail="registry not initialised")
63
+ state = registry.get(target_id)
64
+ if state is None:
65
+ raise HTTPException(
66
+ status_code=404,
67
+ detail=f"target_id {target_id!r} not registered",
68
+ )
69
+ return state.pool
70
+
71
+
72
+ def get_meta_pool(request: Request) -> asyncpg.Pool:
73
+ """Yield the shared `quality_meta` pool."""
74
+ registry = getattr(request.app.state, "registry", None)
75
+ if registry is None:
76
+ raise HTTPException(status_code=503, detail="registry not initialised")
77
+ return registry.meta_pool
@@ -0,0 +1,160 @@
1
+ """GET /api/catalog (FR-10.2, v3.0 journey-composition model).
2
+
3
+ Returns the domain catalog in the two-primitive model: domains → **steps**
4
+ (atomic leaves carrying validations[] + risk_level + supported_levels[]) and
5
+ **journeys** (recursive composition; each journey lists its ordered elements,
6
+ where an element references either a step or a child journey). The shape is a
7
+ single nested document the read-only UI renders without follow-up requests.
8
+
9
+ Replaces the pre-v3 case/flow/story catalog (no alias shim — Vocabulary
10
+ decision in the locked journey-composition-model doc).
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import json
16
+ from typing import Any
17
+
18
+ import asyncpg
19
+ from fastapi import APIRouter, Depends, Query
20
+
21
+ from quality_studio.backend.api._deps import get_pool
22
+
23
+ router = APIRouter(tags=["catalog"])
24
+
25
+
26
+ def _maybe_json(value: Any) -> Any:
27
+ if isinstance(value, (str, bytes)):
28
+ try:
29
+ return json.loads(value)
30
+ except json.JSONDecodeError:
31
+ return value
32
+ return value
33
+
34
+
35
+ def _summarize_validations(validations: Any) -> dict[str, int]:
36
+ """Count validations[] by category — flat shape the UI renders as chips."""
37
+ if not isinstance(validations, list):
38
+ return {}
39
+ out: dict[str, int] = {}
40
+ for v in validations:
41
+ if isinstance(v, dict) and "category" in v:
42
+ out[v["category"]] = out.get(v["category"], 0) + 1
43
+ return out
44
+
45
+
46
+ async def _load_catalog(
47
+ pool: asyncpg.Pool,
48
+ *,
49
+ domain: str | None,
50
+ risk_level: str | None,
51
+ ) -> list[dict[str, Any]]:
52
+ """Return the catalog as a list of per-domain dicts. The `risk_level`
53
+ filter applies at the step level — a domain still appears even if every
54
+ step is filtered out, so the UI shows the empty-state rather than mistaking
55
+ a filtered domain for a missing one."""
56
+ domain_args: list[Any] = []
57
+ domain_where = ""
58
+ if domain is not None:
59
+ domain_args.append(domain)
60
+ domain_where = "WHERE domain_id = $1"
61
+
62
+ async with pool.acquire() as conn:
63
+ domains = await conn.fetch(
64
+ f"SELECT domain_id, name, owner_team FROM quality_domains {domain_where} "
65
+ "ORDER BY domain_id",
66
+ *domain_args,
67
+ )
68
+ step_args: list[Any] = []
69
+ step_clauses: list[str] = []
70
+ if domain is not None:
71
+ step_args.append(domain)
72
+ step_clauses.append(f"domain_id = ${len(step_args)}")
73
+ if risk_level is not None:
74
+ step_args.append(risk_level)
75
+ step_clauses.append(f"risk_level = ${len(step_args)}")
76
+ step_where = "WHERE " + " AND ".join(step_clauses) if step_clauses else ""
77
+ steps = await conn.fetch(
78
+ f"""
79
+ SELECT step_id, domain_id, intent, risk_level, validations,
80
+ supported_levels, metadata
81
+ FROM quality_steps {step_where} ORDER BY step_id
82
+ """,
83
+ *step_args,
84
+ )
85
+ journey_args: list[Any] = []
86
+ journey_where = ""
87
+ if domain is not None:
88
+ journey_args.append(domain)
89
+ journey_where = "WHERE domain_id = $1"
90
+ journeys = await conn.fetch(
91
+ f"""
92
+ SELECT journey_id, domain_id, title, kind, description
93
+ FROM quality_journeys {journey_where} ORDER BY journey_id
94
+ """,
95
+ *journey_args,
96
+ )
97
+ elements = await conn.fetch(
98
+ "SELECT journey_id, position, step_id, child_journey_id "
99
+ "FROM quality_journey_elements ORDER BY journey_id, position"
100
+ )
101
+
102
+ by_domain: dict[str, dict[str, Any]] = {
103
+ r["domain_id"]: {
104
+ "domain_id": r["domain_id"],
105
+ "name": r["name"],
106
+ "owner_team": r["owner_team"],
107
+ "steps": [],
108
+ "journeys": [],
109
+ }
110
+ for r in domains
111
+ }
112
+ for s in steps:
113
+ if s["domain_id"] not in by_domain:
114
+ continue
115
+ meta = _maybe_json(s["metadata"]) or {}
116
+ by_domain[s["domain_id"]]["steps"].append(
117
+ {
118
+ "step_id": s["step_id"],
119
+ "intent": s["intent"],
120
+ "risk_level": s["risk_level"],
121
+ "supported_levels": list(s["supported_levels"])
122
+ if s["supported_levels"] is not None
123
+ else [],
124
+ "outcome": meta.get("outcome") if isinstance(meta, dict) else None,
125
+ "validation_summary": _summarize_validations(_maybe_json(s["validations"])),
126
+ }
127
+ )
128
+
129
+ elements_by_journey: dict[str, list[dict[str, Any]]] = {}
130
+ for e in elements:
131
+ elements_by_journey.setdefault(e["journey_id"], []).append(
132
+ {
133
+ "position": e["position"],
134
+ "step_id": e["step_id"],
135
+ "child_journey_id": e["child_journey_id"],
136
+ }
137
+ )
138
+ for j in journeys:
139
+ if j["domain_id"] not in by_domain:
140
+ continue
141
+ by_domain[j["domain_id"]]["journeys"].append(
142
+ {
143
+ "journey_id": j["journey_id"],
144
+ "title": j["title"],
145
+ "kind": j["kind"],
146
+ "description": j["description"],
147
+ "elements": elements_by_journey.get(j["journey_id"], []),
148
+ }
149
+ )
150
+ return list(by_domain.values())
151
+
152
+
153
+ @router.get("/catalog")
154
+ async def get_catalog(
155
+ domain: str | None = Query(None),
156
+ risk_level: str | None = Query(None),
157
+ pool: asyncpg.Pool = Depends(get_pool),
158
+ ) -> dict[str, Any]:
159
+ domains = await _load_catalog(pool, domain=domain, risk_level=risk_level)
160
+ return {"domains": domains}
@@ -0,0 +1,103 @@
1
+ """GET /api/changesets (FR-10.2) + GET /api/changesets/{change_set_id}."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from datetime import datetime
6
+ from typing import Any
7
+ from uuid import UUID
8
+
9
+ import asyncpg
10
+ from fastapi import APIRouter, Depends, HTTPException, Query
11
+
12
+ from quality_studio.backend.api._deps import get_pool
13
+
14
+ router = APIRouter(tags=["changesets"])
15
+
16
+
17
+ @router.get("/changesets")
18
+ async def list_changesets(
19
+ issue: str | None = Query(None, description="filter by github_issue_url"),
20
+ pr: str | None = Query(None, description="filter by github_pr_url"),
21
+ since: datetime | None = Query(None, description="ISO-8601 lower bound on created_at"),
22
+ limit: int = Query(50, ge=1, le=500),
23
+ pool: asyncpg.Pool = Depends(get_pool),
24
+ ) -> dict[str, Any]:
25
+ args: list[Any] = []
26
+ clauses: list[str] = []
27
+ if issue is not None:
28
+ args.append(issue)
29
+ clauses.append(f"github_issue_url = ${len(args)}")
30
+ if pr is not None:
31
+ args.append(pr)
32
+ clauses.append(f"github_pr_url = ${len(args)}")
33
+ if since is not None:
34
+ args.append(since)
35
+ clauses.append(f"created_at >= ${len(args)}")
36
+ where = "WHERE " + " AND ".join(clauses) if clauses else ""
37
+ args.append(limit)
38
+ limit_pos = len(args)
39
+
40
+ async with pool.acquire() as conn:
41
+ rows = await conn.fetch(
42
+ f"""
43
+ SELECT change_set_id, github_issue_url, github_pr_url, commit_sha,
44
+ author, summary, model, prompt_version, generation_run_id,
45
+ created_at
46
+ FROM quality_change_sets
47
+ {where}
48
+ ORDER BY created_at DESC
49
+ LIMIT ${limit_pos}
50
+ """,
51
+ *args,
52
+ )
53
+ return {"changesets": [_row_to_dict(r) for r in rows]}
54
+
55
+
56
+ @router.get("/changesets/{change_set_id}")
57
+ async def get_changeset(
58
+ change_set_id: str,
59
+ pool: asyncpg.Pool = Depends(get_pool),
60
+ ) -> dict[str, Any]:
61
+ """Single change-set lookup for `<ChangesetBadge>` (epic #67 / child #71).
62
+
63
+ Returns 404 with a structured body when the id is unknown or malformed.
64
+ """
65
+ try:
66
+ cs_uuid = UUID(change_set_id)
67
+ except ValueError as exc:
68
+ raise HTTPException(
69
+ status_code=404,
70
+ detail={"error": "invalid change_set_id", "value": change_set_id},
71
+ ) from exc
72
+ async with pool.acquire() as conn:
73
+ row = await conn.fetchrow(
74
+ """
75
+ SELECT change_set_id, github_issue_url, github_pr_url, commit_sha,
76
+ author, summary, model, prompt_version, generation_run_id,
77
+ created_at
78
+ FROM quality_change_sets
79
+ WHERE change_set_id = $1
80
+ """,
81
+ cs_uuid,
82
+ )
83
+ if row is None:
84
+ raise HTTPException(
85
+ status_code=404,
86
+ detail={"error": "change_set not found", "value": change_set_id},
87
+ )
88
+ return _row_to_dict(row)
89
+
90
+
91
+ def _row_to_dict(r: asyncpg.Record) -> dict[str, Any]:
92
+ return {
93
+ "change_set_id": str(r["change_set_id"]),
94
+ "github_issue_url": r["github_issue_url"],
95
+ "github_pr_url": r["github_pr_url"],
96
+ "commit_sha": r["commit_sha"],
97
+ "author": r["author"],
98
+ "summary": r["summary"],
99
+ "model": r["model"],
100
+ "prompt_version": r["prompt_version"],
101
+ "generation_run_id": (str(r["generation_run_id"]) if r["generation_run_id"] else None),
102
+ "created_at": r["created_at"].isoformat(),
103
+ }
@@ -0,0 +1,103 @@
1
+ """GET /api/coverage + /api/coverage/live (FR-10.2, FR-7.4, v3.0).
2
+
3
+ `/api/coverage` returns the per-step coverage matrix (step_id × domain × risk ×
4
+ supported_levels × outcome) read from `quality_steps`. `/api/coverage/live`
5
+ overlays the latest per-step result status via `runs.live_coverage` (FR-7.4) —
6
+ status is the newest per-step result; the authoritative above-unit signal is
7
+ the journey-run verdict (`quality_runs`).
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import json
13
+ from typing import Any
14
+
15
+ import asyncpg
16
+ from fastapi import APIRouter, Depends, Query
17
+
18
+ from quality_studio.backend.api._deps import get_pool
19
+ from quality_studio.runs import live_coverage
20
+
21
+ router = APIRouter(tags=["coverage"])
22
+
23
+
24
+ def _outcome(metadata: Any) -> str | None:
25
+ if isinstance(metadata, (str, bytes)):
26
+ try:
27
+ metadata = json.loads(metadata)
28
+ except json.JSONDecodeError:
29
+ return None
30
+ return metadata.get("outcome") if isinstance(metadata, dict) else None
31
+
32
+
33
+ @router.get("/coverage")
34
+ async def get_coverage(
35
+ domain: str | None = Query(None),
36
+ environment: str | None = Query( # noqa: ARG001
37
+ None,
38
+ description="environment filter — currently passthrough to /live; the "
39
+ "static matrix is environment-agnostic by design",
40
+ ),
41
+ pool: asyncpg.Pool = Depends(get_pool),
42
+ ) -> dict[str, Any]:
43
+ args: list[Any] = []
44
+ where = ""
45
+ if domain is not None:
46
+ args.append(domain)
47
+ where = "WHERE domain_id = $1"
48
+ async with pool.acquire() as conn:
49
+ rows = await conn.fetch(
50
+ f"""
51
+ SELECT domain_id, step_id, risk_level, supported_levels, metadata
52
+ FROM quality_steps {where} ORDER BY step_id
53
+ """,
54
+ *args,
55
+ )
56
+ return {
57
+ "rows": [
58
+ {
59
+ "domain_id": r["domain_id"],
60
+ "step_id": r["step_id"],
61
+ "risk_level": r["risk_level"],
62
+ "supported_levels": list(r["supported_levels"])
63
+ if r["supported_levels"] is not None
64
+ else [],
65
+ "outcome": _outcome(r["metadata"]),
66
+ }
67
+ for r in rows
68
+ ]
69
+ }
70
+
71
+
72
+ @router.get("/coverage/live")
73
+ async def get_coverage_live(
74
+ domain: str = Query(..., description="domain_id is required for live coverage"),
75
+ window: str | None = Query(
76
+ None,
77
+ description="window like '30d'; only `<n>d` is parsed in v1",
78
+ pattern=r"^\d+d$",
79
+ ),
80
+ environment: str | None = Query(None),
81
+ pool: asyncpg.Pool = Depends(get_pool),
82
+ ) -> dict[str, Any]:
83
+ window_days = int(window.rstrip("d")) if window else None
84
+ rows = await live_coverage(
85
+ pool, domain_id=domain, window_days=window_days, environment=environment
86
+ )
87
+ return {
88
+ "domain_id": domain,
89
+ "window_days": window_days,
90
+ "environment": environment,
91
+ "rows": [
92
+ {
93
+ "step_id": r.step_id,
94
+ "domain_id": r.domain_id,
95
+ "risk_level": r.risk_level,
96
+ "last_run_status": r.last_run_status,
97
+ "last_run_at": r.last_run_at.isoformat() if r.last_run_at else None,
98
+ "last_environment": r.last_environment,
99
+ "last_level": r.last_level,
100
+ }
101
+ for r in rows
102
+ ],
103
+ }
@@ -0,0 +1,148 @@
1
+ """GET /api/data[?domain=][/{name}] (FR-10.3 / FR-11.4, v3.0).
2
+
3
+ Data list calls the target's `data_introspect_cmd` from `[data_generation]` in
4
+ quality.toml and returns the parsed JSON. Data detail reads source via
5
+ `target_repo.py.read_text` (FR-2.1 boundary) — the backend NEVER imports a data
6
+ module. The detail handler resolves source path with this precedence:
7
+
8
+ 1. `file` field in the introspection metadata (if the target's
9
+ `data_introspect_cmd` populates it);
10
+ 2. fallback: `{output_dir_template}/{name}.py` from `[data_generation]` in
11
+ quality.toml, with `{domain}` substituted.
12
+
13
+ This is the v3.0 rename of the pre-v3 `[seed_generation]` / `/api/seeds` surface
14
+ (data = per-level test data bound to steps).
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import json
20
+ import shlex
21
+ import subprocess
22
+ from typing import Any
23
+
24
+ from fastapi import APIRouter, HTTPException, Query, Request, status
25
+
26
+ from quality_studio.config import load_target_config
27
+ from quality_studio.target_repo import TargetPathError
28
+
29
+ router = APIRouter(tags=["data"])
30
+
31
+ _INTROSPECT_TIMEOUT_SECONDS = 30.0
32
+
33
+
34
+ def _run_introspect(cmd: str, *, cwd) -> list[dict[str, Any]]:
35
+ """Execute the configured introspection command and parse its JSON output."""
36
+ try:
37
+ completed = subprocess.run( # noqa: S603 - cmd comes from operator-controlled toml
38
+ shlex.split(cmd),
39
+ cwd=str(cwd),
40
+ capture_output=True,
41
+ text=True,
42
+ timeout=_INTROSPECT_TIMEOUT_SECONDS,
43
+ )
44
+ except (subprocess.TimeoutExpired, OSError) as exc:
45
+ raise HTTPException(
46
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
47
+ detail=f"data_introspect_cmd execution failed: {exc!s}",
48
+ ) from exc
49
+ if completed.returncode != 0:
50
+ raise HTTPException(
51
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
52
+ detail=f"data_introspect_cmd exited {completed.returncode}: {completed.stderr.strip()}",
53
+ )
54
+ try:
55
+ parsed = json.loads(completed.stdout or "{}")
56
+ except json.JSONDecodeError as exc:
57
+ raise HTTPException(
58
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
59
+ detail=f"data_introspect_cmd output is not JSON: {exc!s}",
60
+ ) from exc
61
+ items = parsed.get("data")
62
+ if not isinstance(items, list):
63
+ raise HTTPException(
64
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
65
+ detail="data_introspect_cmd output missing top-level 'data' list",
66
+ )
67
+ return items
68
+
69
+
70
+ def _resolve_source_relpath(
71
+ *, data_meta: dict[str, Any], name: str, domain: str, data_generation: dict[str, Any]
72
+ ) -> str:
73
+ """Resolve the relative source path with the precedence documented above."""
74
+ explicit = data_meta.get("file") or data_meta.get("path")
75
+ if explicit:
76
+ return str(explicit)
77
+ template = data_generation.get("output_dir_template", "data/{domain}/")
78
+ base = template.format(domain=domain).rstrip("/")
79
+ return f"{base}/{name}.py"
80
+
81
+
82
+ @router.get("/data")
83
+ async def list_data(
84
+ request: Request,
85
+ domain: str = Query(..., description="domain_id; required for the introspect command lookup"),
86
+ ) -> dict[str, Any]:
87
+ target_repo = request.app.state.backend.target_repo
88
+ config = load_target_config(target_repo)
89
+ cmd = config.data_generation.get("data_introspect_cmd")
90
+ if not cmd:
91
+ raise HTTPException(
92
+ status_code=status.HTTP_400_BAD_REQUEST,
93
+ detail="quality.toml [data_generation].data_introspect_cmd is not set",
94
+ )
95
+ items = _run_introspect(cmd, cwd=target_repo.root)
96
+ # Tag each entry with domain so the UI doesn't have to thread it.
97
+ for entry in items:
98
+ entry.setdefault("domain", domain)
99
+ return {"domain": domain, "data": items}
100
+
101
+
102
+ @router.get("/data/{name}")
103
+ async def get_data_detail(
104
+ request: Request,
105
+ name: str,
106
+ domain: str = Query(..., description="domain_id; required for source-path resolution"),
107
+ ) -> dict[str, Any]:
108
+ target_repo = request.app.state.backend.target_repo
109
+ config = load_target_config(target_repo)
110
+ cmd = config.data_generation.get("data_introspect_cmd")
111
+ if not cmd:
112
+ raise HTTPException(
113
+ status_code=status.HTTP_400_BAD_REQUEST,
114
+ detail="quality.toml [data_generation].data_introspect_cmd is not set",
115
+ )
116
+ items = _run_introspect(cmd, cwd=target_repo.root)
117
+ match = next((d for d in items if d.get("name") == name), None)
118
+ if match is None:
119
+ raise HTTPException(
120
+ status_code=status.HTTP_404_NOT_FOUND,
121
+ detail=f"data module {name!r} not found in domain {domain!r}",
122
+ )
123
+ relpath = _resolve_source_relpath(
124
+ data_meta=match,
125
+ name=name,
126
+ domain=domain,
127
+ data_generation=config.data_generation,
128
+ )
129
+ try:
130
+ source = target_repo.read_text(relpath)
131
+ except TargetPathError as exc:
132
+ # Path outside the target root, or unsafe glob — refuse, never silently fall back.
133
+ raise HTTPException(
134
+ status_code=status.HTTP_400_BAD_REQUEST,
135
+ detail=f"data source path rejected: {exc!s}",
136
+ ) from exc
137
+ except FileNotFoundError as exc:
138
+ raise HTTPException(
139
+ status_code=status.HTTP_404_NOT_FOUND,
140
+ detail=f"data source file not found at {relpath!r}",
141
+ ) from exc
142
+ return {
143
+ "domain": domain,
144
+ "name": name,
145
+ "source_path": relpath,
146
+ "source": source,
147
+ "metadata": match,
148
+ }