semcodes 0.1.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
routers/audit.py ADDED
@@ -0,0 +1,333 @@
1
+ """Audit endpoints and analysis pipeline."""
2
+
3
+ import asyncio
4
+ import hashlib
5
+ import re
6
+ import shutil
7
+ import tempfile
8
+ from datetime import datetime
9
+ from pathlib import Path
10
+ from datetime import timezone
11
+
12
+ from fastapi import APIRouter, Depends, HTTPException, Request
13
+
14
+ from config import APP_URL
15
+ from services.pipeline import run_pipeline, run_pipeline_local
16
+ from services.scan_service import (
17
+ save_scan,
18
+ get_recent_scans,
19
+ save_audit_result,
20
+ get_audit_result,
21
+ save_badge_cache,
22
+ )
23
+ from routers.auth import get_current_user
24
+
25
+ router = APIRouter()
26
+
27
+
28
+ def _utc_now_iso() -> str:
29
+ return datetime.now(timezone.utc).isoformat()
30
+
31
+
32
+ def _schedule_background_task(coroutine):
33
+ try:
34
+ loop = asyncio.get_running_loop()
35
+ except RuntimeError:
36
+ coroutine.close()
37
+ return
38
+ loop.create_task(coroutine)
39
+
40
+
41
+ @router.post("/api/audit")
42
+ async def run_audit(request: Request, user: dict = Depends(get_current_user)):
43
+ """Run one-click audit on a repo. Requires authentication."""
44
+ body = await request.json()
45
+ repo = body["repo"]
46
+ token = user["github_token"]
47
+ audit_id = hashlib.sha256(f"{repo}-{_utc_now_iso()}".encode()).hexdigest()[:12]
48
+
49
+ benchmark_meta = {
50
+ "case_id": body.get("case_id"),
51
+ "source_type": body.get("source_type"),
52
+ "change_type": body.get("change_type"),
53
+ "baseline_detected": body.get("baseline_detected"),
54
+ "benchmark_mode": body.get("benchmark_mode", False),
55
+ "ticket_id": body.get("ticket_id"),
56
+ "pr_reference": body.get("pr_reference"),
57
+ }
58
+
59
+ # Save initial audit status to database
60
+ save_audit_result(
61
+ audit_id,
62
+ {
63
+ "status": "running",
64
+ "repo": repo,
65
+ "started": _utc_now_iso(),
66
+ **{k: v for k, v in benchmark_meta.items() if v is not None},
67
+ },
68
+ )
69
+
70
+ _schedule_background_task(_run_audit_pipeline(audit_id, repo, token))
71
+ return {
72
+ "audit_id": audit_id,
73
+ "status": "running",
74
+ **{k: v for k, v in benchmark_meta.items() if v is not None},
75
+ }
76
+
77
+
78
+ @router.get("/api/audit/{audit_id}")
79
+ async def get_audit_result_endpoint(audit_id: str):
80
+ """Poll audit status and results."""
81
+ result = get_audit_result(audit_id)
82
+ if not result:
83
+ raise HTTPException(404, "Audit not found")
84
+ return result
85
+
86
+
87
+ @router.get("/api/scans/recent")
88
+ async def get_recent_scans_api(limit: int = 100):
89
+ """Get list of recent scans with metrics."""
90
+ # Try to get from SQLite first, fall back to in-memory
91
+ try:
92
+ scans = get_recent_scans(limit)
93
+ total = len(scans)
94
+ except Exception:
95
+ scans = scan_history[:limit]
96
+ total = len(scan_history)
97
+
98
+ return {
99
+ "scans": scans,
100
+ "total": total,
101
+ }
102
+
103
+
104
+ @router.post("/api/analyze")
105
+ async def analyze_repo(request: Request):
106
+ """Analyze any public repository by URL (sandbox mode). Supports file:// for local repos."""
107
+ body = await request.json()
108
+ repo_url = body.get("repo_url", "")
109
+ sandbox = body.get("sandbox", False)
110
+
111
+ if not repo_url:
112
+ raise HTTPException(400, "repo_url required")
113
+
114
+ # Initialize actual_repo_url (defaults to repo_url)
115
+ actual_repo_url = repo_url
116
+
117
+ # Support local:/ paths for mounted volume repositories
118
+ if repo_url.startswith("local:/"):
119
+ # Extract repo name from local path for audit_id
120
+ import os
121
+
122
+ path = repo_url.replace("local:/", "/local-repos/")
123
+ repo_name = os.path.basename(path)
124
+ owner = "local"
125
+ repo = repo_name
126
+ # Use the actual mounted path for git clone
127
+ actual_repo_url = path
128
+ audit_id = hashlib.sha256(
129
+ f"local/{repo_name}-{_utc_now_iso()}".encode()
130
+ ).hexdigest()[:12]
131
+ elif repo_url.startswith("file://"):
132
+ # Extract repo name from file path for audit_id
133
+ import os
134
+
135
+ path = repo_url.replace("file://", "")
136
+ repo_name = os.path.basename(path.rstrip("/.git"))
137
+ owner = "local"
138
+ repo = repo_name
139
+ actual_repo_url = path
140
+ audit_id = hashlib.sha256(
141
+ f"local/{repo_name}-{_utc_now_iso()}".encode()
142
+ ).hexdigest()[:12]
143
+ else:
144
+ # Parse owner/repo from URL
145
+ match = (
146
+ re.search(r"github\.com/([^/]+)/([^/\.]+)", repo_url)
147
+ or re.search(r"gitlab\.com/([^/]+)/([^/\.]+)", repo_url)
148
+ or re.search(r"bitbucket\.org/([^/]+)/([^/\.]+)", repo_url)
149
+ )
150
+
151
+ if not match:
152
+ ssh_match = re.search(r":([^/]+)/([^/\.]+)\.?", repo_url)
153
+ if ssh_match:
154
+ match = ssh_match
155
+
156
+ if not match:
157
+ raise HTTPException(400, "Could not parse owner/repo from URL")
158
+
159
+ owner, repo = match.group(1), match.group(2)
160
+ audit_id = hashlib.sha256(
161
+ f"{owner}/{repo}-{_utc_now_iso()}".encode()
162
+ ).hexdigest()[:12]
163
+
164
+ benchmark_meta = {
165
+ "case_id": body.get("case_id"),
166
+ "source_type": body.get("source_type"),
167
+ "change_type": body.get("change_type"),
168
+ "baseline_detected": body.get("baseline_detected"),
169
+ "benchmark_mode": body.get("benchmark_mode", False),
170
+ "ticket_id": body.get("ticket_id"),
171
+ "pr_reference": body.get("pr_reference"),
172
+ }
173
+
174
+ # Save initial audit status to database
175
+ save_audit_result(
176
+ audit_id,
177
+ {
178
+ "status": "running",
179
+ "repo": f"{owner}/{repo}",
180
+ "sandbox": sandbox,
181
+ "started": _utc_now_iso(),
182
+ **{k: v for k, v in benchmark_meta.items() if v is not None},
183
+ },
184
+ )
185
+
186
+ # Use actual_repo_url for local repos, otherwise repo_url
187
+ _schedule_background_task(
188
+ _run_sandbox_analysis(audit_id, actual_repo_url, f"{owner}/{repo}")
189
+ )
190
+ return {
191
+ "audit_id": audit_id,
192
+ "status": "running",
193
+ "sandbox": True,
194
+ **{k: v for k, v in benchmark_meta.items() if v is not None},
195
+ }
196
+
197
+
198
+ async def _run_audit_pipeline(audit_id: str, repo: str, token: str):
199
+ """Background pipeline: clone → code2llm → redup → pyqual → report."""
200
+ try:
201
+ result = await run_pipeline(repo, token, include_code2llm_files=True)
202
+
203
+ report = {
204
+ "status": "complete",
205
+ "repo": repo,
206
+ "completed": _utc_now_iso(),
207
+ "stats": result.stats,
208
+ "health_score": result.health_score,
209
+ "grade": result.grade,
210
+ "metrics": {
211
+ "complexity": result.complexity,
212
+ "duplication": result.duplication,
213
+ "quality": result.quality,
214
+ },
215
+ "recommendations": result.recommendations,
216
+ "badge_url": f"{APP_URL}/badge/{repo.replace('/', '-')}.svg",
217
+ "files": result.code2llm_files.get("files", []),
218
+ }
219
+
220
+ save_audit_result(audit_id, report)
221
+
222
+ weekly_issues = sum(
223
+ 1 for r in result.recommendations if r.get("priority") in ("high", "medium")
224
+ )
225
+ save_badge_cache(
226
+ repo,
227
+ {
228
+ "score": result.health_score,
229
+ "grade": result.grade,
230
+ "updated": _utc_now_iso(),
231
+ "weekly_issues": weekly_issues,
232
+ },
233
+ )
234
+
235
+ scan_entry = {
236
+ "repo": repo,
237
+ "health_score": result.health_score,
238
+ "grade": result.grade,
239
+ "stats": result.stats,
240
+ "completed": _utc_now_iso(),
241
+ "badge_url": f"{APP_URL}/badge/{repo.replace('/', '-')}.svg",
242
+ }
243
+ try:
244
+ save_scan(scan_entry)
245
+ except Exception:
246
+ pass
247
+
248
+ except Exception as e:
249
+ save_audit_result(audit_id, {"status": "error", "repo": repo, "error": str(e)})
250
+
251
+
252
+ async def _run_sandbox_analysis(audit_id: str, repo_url: str, repo: str):
253
+ """Background analysis for sandbox mode (public repos only)."""
254
+ workdir = Path(tempfile.mkdtemp(prefix="semcod-sandbox-"))
255
+
256
+ try:
257
+ # Check if this is a local path (starts with /local-repos/)
258
+ if repo_url.startswith("/local-repos/"):
259
+ source_path = Path(repo_url)
260
+ if source_path.exists():
261
+ shutil.copytree(source_path, workdir / "repo")
262
+ else:
263
+ save_audit_result(
264
+ audit_id,
265
+ {
266
+ "status": "error",
267
+ "error": f"Local repository not found: {repo_url}",
268
+ "repo": repo,
269
+ },
270
+ )
271
+ return
272
+ else:
273
+ # Use git clone for remote repos
274
+ proc = await asyncio.create_subprocess_exec(
275
+ "git",
276
+ "clone",
277
+ "--depth=1",
278
+ repo_url,
279
+ str(workdir / "repo"),
280
+ stdout=asyncio.subprocess.PIPE,
281
+ stderr=asyncio.subprocess.PIPE,
282
+ )
283
+ await proc.wait()
284
+ if proc.returncode != 0:
285
+ save_audit_result(
286
+ audit_id,
287
+ {
288
+ "status": "error",
289
+ "error": "Failed to clone repository. Ensure it's public.",
290
+ "repo": repo,
291
+ },
292
+ )
293
+ return
294
+
295
+ result = await run_pipeline_local(workdir / "repo", include_code2llm_files=True)
296
+
297
+ report = {
298
+ "status": "complete",
299
+ "repo": repo,
300
+ "sandbox": True,
301
+ "completed": _utc_now_iso(),
302
+ "stats": result.stats,
303
+ "health_score": result.health_score,
304
+ "grade": result.grade,
305
+ "metrics": {
306
+ "complexity": result.complexity,
307
+ "duplication": result.duplication,
308
+ "quality": result.quality,
309
+ },
310
+ "recommendations": result.recommendations,
311
+ "files": result.code2llm_files.get("files", []),
312
+ }
313
+
314
+ save_audit_result(audit_id, report)
315
+
316
+ scan_entry = {
317
+ "repo": repo,
318
+ "health_score": result.health_score,
319
+ "grade": result.grade,
320
+ "stats": result.stats,
321
+ "completed": _utc_now_iso(),
322
+ "sandbox": True,
323
+ "badge_url": f"{APP_URL}/badge/{repo.replace('/', '-')}.svg",
324
+ }
325
+ try:
326
+ save_scan(scan_entry)
327
+ except Exception:
328
+ pass
329
+
330
+ except Exception as e:
331
+ save_audit_result(audit_id, {"status": "error", "error": str(e), "repo": repo})
332
+ finally:
333
+ shutil.rmtree(workdir, ignore_errors=True)
routers/auth.py ADDED
@@ -0,0 +1,268 @@
1
+ import httpx
2
+ import jwt
3
+ from datetime import datetime, timedelta, timezone
4
+ from fastapi import APIRouter, Depends, HTTPException
5
+ from fastapi.responses import RedirectResponse
6
+ from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
7
+
8
+ from config import (
9
+ APP_URL,
10
+ FRONTEND_URL,
11
+ GITHUB_CLIENT_ID,
12
+ GITHUB_CLIENT_SECRET,
13
+ GITHUB_OAUTH_SCOPE,
14
+ SECRET_KEY,
15
+ SESSION_EXPIRE_HOURS,
16
+ REPOS_PER_PAGE,
17
+ GITHUB_OAUTH_AUTHORIZE_URL,
18
+ GITHUB_OAUTH_TOKEN_URL,
19
+ GITHUB_API_BASE_URL,
20
+ GITEA_CLIENT_ID,
21
+ GITEA_CLIENT_SECRET,
22
+ GITEA_OAUTH_AUTHORIZE_URL,
23
+ GITEA_OAUTH_TOKEN_URL,
24
+ GITEA_API_BASE_URL,
25
+ )
26
+ from database import upsert_user, get_user_by_id
27
+
28
+ router = APIRouter()
29
+ security = HTTPBearer(auto_error=False)
30
+
31
+
32
+ def create_session_token(user_id: int) -> str:
33
+ payload = {
34
+ "sub": str(user_id),
35
+ "exp": datetime.now(timezone.utc) + timedelta(hours=SESSION_EXPIRE_HOURS),
36
+ "iat": datetime.now(timezone.utc),
37
+ }
38
+ return jwt.encode(payload, SECRET_KEY, algorithm="HS256")
39
+
40
+
41
+ def decode_session_token(token: str) -> dict:
42
+ try:
43
+ return jwt.decode(token, SECRET_KEY, algorithms=["HS256"])
44
+ except jwt.ExpiredSignatureError:
45
+ raise HTTPException(401, "Session expired")
46
+ except jwt.InvalidTokenError:
47
+ raise HTTPException(401, "Invalid token")
48
+
49
+
50
+ async def get_current_user(
51
+ credentials: HTTPAuthorizationCredentials = Depends(security),
52
+ ) -> dict:
53
+ if not credentials:
54
+ raise HTTPException(401, "Not authenticated")
55
+ payload = decode_session_token(credentials.credentials)
56
+ user_id = payload.get("sub")
57
+ if not user_id:
58
+ raise HTTPException(401, "Invalid token payload")
59
+ user = get_user_by_id(int(user_id))
60
+ if not user:
61
+ raise HTTPException(401, "User not found")
62
+ return user
63
+
64
+
65
+ @router.post("/auth/gh-token")
66
+ async def auth_via_github_token(token: str):
67
+ """Exchange a GitHub personal access token (e.g. from `gh auth token`) for a Semcod session JWT.
68
+
69
+ This allows CLI tools using `gh` to authenticate without browser-based OAuth.
70
+ """
71
+ async with httpx.AsyncClient() as client:
72
+ profile_resp = await client.get(
73
+ f"{GITHUB_API_BASE_URL}/user",
74
+ headers={
75
+ "Authorization": f"Bearer {token}",
76
+ "Accept": "application/vnd.github+json",
77
+ },
78
+ )
79
+ if profile_resp.status_code != 200:
80
+ raise HTTPException(401, "Invalid GitHub token")
81
+ profile = profile_resp.json()
82
+
83
+ github_id = profile.get("id")
84
+ if not github_id:
85
+ raise HTTPException(400, "Failed to fetch GitHub profile")
86
+
87
+ user = upsert_user(
88
+ github_id=github_id,
89
+ login=profile.get("login", ""),
90
+ name=profile.get("name", "") or profile.get("login", ""),
91
+ avatar_url=profile.get("avatar_url", ""),
92
+ github_token=token,
93
+ )
94
+
95
+ session_token = create_session_token(user["id"])
96
+ return {
97
+ "session_token": session_token,
98
+ "user": {"id": user["id"], "login": user["login"]},
99
+ }
100
+
101
+
102
+ @router.get("/auth/github")
103
+ async def github_oauth_start():
104
+ """Step 1: Redirect user to GitHub OAuth."""
105
+ scope = GITHUB_OAUTH_SCOPE
106
+ url = (
107
+ f"{GITHUB_OAUTH_AUTHORIZE_URL}"
108
+ f"?client_id={GITHUB_CLIENT_ID}"
109
+ f"&scope={scope}"
110
+ f"&redirect_uri={APP_URL}/auth/callback"
111
+ )
112
+ return RedirectResponse(url)
113
+
114
+
115
+ @router.get("/auth/callback")
116
+ async def github_oauth_callback(code: str):
117
+ """Step 2: Exchange code for token, fetch profile, create user, issue JWT."""
118
+ async with httpx.AsyncClient() as client:
119
+ token_resp = await client.post(
120
+ GITHUB_OAUTH_TOKEN_URL,
121
+ json={
122
+ "client_id": GITHUB_CLIENT_ID,
123
+ "client_secret": GITHUB_CLIENT_SECRET,
124
+ "code": code,
125
+ },
126
+ headers={"Accept": "application/json"},
127
+ )
128
+ token_data = token_resp.json()
129
+
130
+ github_token = token_data.get("access_token")
131
+ if not github_token:
132
+ raise HTTPException(400, "OAuth failed")
133
+
134
+ async with httpx.AsyncClient() as client:
135
+ profile_resp = await client.get(
136
+ f"{GITHUB_API_BASE_URL}/user",
137
+ headers={
138
+ "Authorization": f"Bearer {github_token}",
139
+ "Accept": "application/vnd.github+json",
140
+ },
141
+ )
142
+ profile = profile_resp.json()
143
+
144
+ github_id = profile.get("id")
145
+ if not github_id:
146
+ raise HTTPException(400, "Failed to fetch GitHub profile")
147
+
148
+ user = upsert_user(
149
+ github_id=github_id,
150
+ login=profile.get("login", ""),
151
+ name=profile.get("name", "") or profile.get("login", ""),
152
+ avatar_url=profile.get("avatar_url", ""),
153
+ github_token=github_token,
154
+ )
155
+
156
+ session_token = create_session_token(user["id"])
157
+ return RedirectResponse(f"{FRONTEND_URL}/audit?session={session_token}")
158
+
159
+
160
+ @router.get("/auth/gitea")
161
+ async def gitea_oauth_start():
162
+ """Step 1: Redirect user to Gitea OAuth."""
163
+ if not GITEA_CLIENT_ID or not GITEA_OAUTH_AUTHORIZE_URL:
164
+ raise HTTPException(501, "Gitea OAuth not configured")
165
+ from urllib.parse import urlencode
166
+
167
+ params = urlencode(
168
+ {
169
+ "client_id": GITEA_CLIENT_ID,
170
+ "redirect_uri": f"{APP_URL}/auth/callback/gitea",
171
+ "response_type": "code",
172
+ "scope": "repo",
173
+ }
174
+ )
175
+ return RedirectResponse(f"{GITEA_OAUTH_AUTHORIZE_URL}?{params}")
176
+
177
+
178
+ @router.get("/auth/callback/gitea")
179
+ async def gitea_oauth_callback(code: str):
180
+ """Step 2: Exchange code for token, fetch profile, create user, issue JWT."""
181
+ if not GITEA_CLIENT_ID or not GITEA_OAUTH_TOKEN_URL:
182
+ raise HTTPException(501, "Gitea OAuth not configured")
183
+
184
+ async with httpx.AsyncClient() as client:
185
+ token_resp = await client.post(
186
+ GITEA_OAUTH_TOKEN_URL,
187
+ json={
188
+ "client_id": GITEA_CLIENT_ID,
189
+ "client_secret": GITEA_CLIENT_SECRET,
190
+ "code": code,
191
+ "grant_type": "authorization_code",
192
+ "redirect_uri": f"{APP_URL}/auth/callback/gitea",
193
+ },
194
+ headers={"Accept": "application/json"},
195
+ )
196
+ token_data = token_resp.json()
197
+
198
+ gitea_token = token_data.get("access_token")
199
+ if not gitea_token:
200
+ raise HTTPException(400, "Gitea OAuth failed")
201
+
202
+ async with httpx.AsyncClient() as client:
203
+ profile_resp = await client.get(
204
+ f"{GITEA_API_BASE_URL}/api/v1/user",
205
+ headers={"Authorization": f"token {gitea_token}"},
206
+ )
207
+ profile = profile_resp.json()
208
+
209
+ gitea_id = profile.get("id")
210
+ if not gitea_id:
211
+ raise HTTPException(400, "Failed to fetch Gitea profile")
212
+
213
+ user = upsert_user(
214
+ github_id=gitea_id,
215
+ login=profile.get("login", ""),
216
+ name=profile.get("full_name", "") or profile.get("login", ""),
217
+ avatar_url=profile.get("avatar_url", ""),
218
+ github_token=gitea_token,
219
+ )
220
+
221
+ session_token = create_session_token(user["id"])
222
+ return RedirectResponse(f"{FRONTEND_URL}/audit?session={session_token}")
223
+
224
+
225
+ @router.get("/api/me")
226
+ async def get_me(user: dict = Depends(get_current_user)):
227
+ return {
228
+ "id": user["id"],
229
+ "login": user["login"],
230
+ "name": user["name"],
231
+ "avatar_url": user["avatar_url"],
232
+ }
233
+
234
+
235
+ @router.post("/api/logout")
236
+ async def logout():
237
+ return {"message": "Logged out"}
238
+
239
+
240
+ @router.get("/api/repos")
241
+ async def list_repos(user: dict = Depends(get_current_user)):
242
+ """List user's repos for audit selection."""
243
+ if not user.get("github_token"):
244
+ raise HTTPException(status_code=401, detail="GitHub token required")
245
+
246
+ async with httpx.AsyncClient() as client:
247
+ resp = await client.get(
248
+ f"{GITHUB_API_BASE_URL}/user/repos",
249
+ params={"sort": "updated", "per_page": REPOS_PER_PAGE, "type": "owner"},
250
+ headers={
251
+ "Authorization": f"Bearer {user['github_token']}",
252
+ "Accept": "application/vnd.github+json",
253
+ },
254
+ )
255
+ repos = resp.json()
256
+ return [
257
+ {
258
+ "full_name": r["full_name"],
259
+ "name": r["name"],
260
+ "language": r.get("language"),
261
+ "stars": r.get("stargazers_count", 0),
262
+ "size_kb": r.get("size", 0),
263
+ "private": r.get("private", False),
264
+ "default_branch": r.get("default_branch", "main"),
265
+ }
266
+ for r in repos
267
+ if isinstance(r, dict)
268
+ ]