codemap-python 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. analysis/__init__.py +1 -0
  2. analysis/architecture/__init__.py +1 -0
  3. analysis/architecture/architecture_engine.py +155 -0
  4. analysis/architecture/dependency_cycles.py +103 -0
  5. analysis/architecture/risk_radar.py +220 -0
  6. analysis/call_graph/__init__.py +1 -0
  7. analysis/call_graph/call_extractor.py +91 -0
  8. analysis/call_graph/call_graph_builder.py +1 -0
  9. analysis/call_graph/call_resolver.py +56 -0
  10. analysis/call_graph/context_models.py +1 -0
  11. analysis/call_graph/cross_file_resolver.py +122 -0
  12. analysis/call_graph/execution_tracker.py +1 -0
  13. analysis/call_graph/flow_builder.py +1 -0
  14. analysis/call_graph/models.py +1 -0
  15. analysis/core/__init__.py +1 -0
  16. analysis/core/ast_context.py +1 -0
  17. analysis/core/ast_parser.py +8 -0
  18. analysis/core/class_extractor.py +35 -0
  19. analysis/core/function_extractor.py +16 -0
  20. analysis/core/import_extractor.py +43 -0
  21. analysis/explain/__init__.py +1 -0
  22. analysis/explain/docstring_extractor.py +45 -0
  23. analysis/explain/explain_runner.py +177 -0
  24. analysis/explain/repo_summary_generator.py +138 -0
  25. analysis/explain/return_analyzer.py +114 -0
  26. analysis/explain/risk_flags.py +1 -0
  27. analysis/explain/signature_extractor.py +104 -0
  28. analysis/explain/summary_generator.py +282 -0
  29. analysis/graph/__init__.py +1 -0
  30. analysis/graph/callgraph_index.py +117 -0
  31. analysis/graph/entrypoint_detector.py +1 -0
  32. analysis/graph/impact_analyzer.py +210 -0
  33. analysis/indexing/__init__.py +1 -0
  34. analysis/indexing/import_resolver.py +156 -0
  35. analysis/indexing/symbol_index.py +150 -0
  36. analysis/runners/__init__.py +1 -0
  37. analysis/runners/phase4_runner.py +137 -0
  38. analysis/utils/__init__.py +1 -0
  39. analysis/utils/ast_helpers.py +1 -0
  40. analysis/utils/cache_manager.py +659 -0
  41. analysis/utils/path_resolver.py +1 -0
  42. analysis/utils/repo_fetcher.py +469 -0
  43. cli.py +1728 -0
  44. codemap_cli.py +11 -0
  45. codemap_python-0.1.0.dist-info/METADATA +399 -0
  46. codemap_python-0.1.0.dist-info/RECORD +58 -0
  47. codemap_python-0.1.0.dist-info/WHEEL +5 -0
  48. codemap_python-0.1.0.dist-info/entry_points.txt +2 -0
  49. codemap_python-0.1.0.dist-info/top_level.txt +5 -0
  50. security_utils.py +51 -0
  51. ui/__init__.py +1 -0
  52. ui/app.py +2160 -0
  53. ui/device_id.py +27 -0
  54. ui/static/app.js +2703 -0
  55. ui/static/styles.css +1268 -0
  56. ui/templates/index.html +231 -0
  57. ui/utils/__init__.py +1 -0
  58. ui/utils/registry_manager.py +190 -0
@@ -0,0 +1,469 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ import os
5
+ import re
6
+ import shutil
7
+ import subprocess
8
+ import tempfile
9
+ import zipfile
10
+ from typing import Any, Dict, Optional
11
+ from urllib import error as urllib_error
12
+ from urllib import parse as urllib_parse
13
+ from urllib import request as urllib_request
14
+
15
+ from security_utils import redact_secrets
16
+
17
+ _ALLOWED_HOSTS = {"github.com", "www.github.com"}
18
+ _RE_SAFE = re.compile(r"^[A-Za-z0-9._-]+$")
19
+ _MAX_FILE_SIZE = 20 * 1024 * 1024
20
+ _MAX_FILES = 30000
21
+
22
+
23
+ def _project_root() -> str:
24
+ return os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
25
+
26
+
27
+ def safe_workspace_root() -> str:
28
+ root = os.path.join(_project_root(), ".codemap_cache", "workspaces")
29
+ os.makedirs(root, exist_ok=True)
30
+ return root
31
+
32
+
33
+ def parse_github_url(url: str) -> Dict[str, str]:
34
+ raw = str(url or "").strip()
35
+ if not raw:
36
+ raise ValueError("GitHub URL is required")
37
+ parsed = urllib_parse.urlparse(raw)
38
+ if parsed.scheme.lower() != "https":
39
+ raise ValueError("Only https://github.com URLs are supported")
40
+ host = parsed.netloc.lower().strip()
41
+ if host not in _ALLOWED_HOSTS:
42
+ raise ValueError("Only github.com is supported")
43
+
44
+ path = parsed.path.strip().strip("/")
45
+ if path.endswith(".git"):
46
+ path = path[:-4]
47
+ parts = [p for p in path.split("/") if p]
48
+ if len(parts) < 2:
49
+ raise ValueError("GitHub URL must look like https://github.com/<owner>/<repo>")
50
+ owner, repo = parts[0], parts[1]
51
+ if not _RE_SAFE.match(owner) or not _RE_SAFE.match(repo):
52
+ raise ValueError("Owner/repo contains unsupported characters")
53
+ return {"host": host, "owner": owner, "repo": repo}
54
+
55
+
56
+ def normalize_github_url(url: str) -> str:
57
+ parsed = parse_github_url(url)
58
+ return f"https://github.com/{parsed['owner']}/{parsed['repo']}"
59
+
60
+
61
+ def _workspace_id(normalized_url: str, ref: Optional[str], mode: str) -> str:
62
+ key = f"{normalized_url}|{str(ref or '').strip()}|{mode}"
63
+ return hashlib.sha256(key.encode("utf-8")).hexdigest()[:16]
64
+
65
+
66
+ def resolve_workspace_paths(url: str, ref: Optional[str], mode: str = "git") -> Dict[str, str]:
67
+ normalized = normalize_github_url(url)
68
+ info = parse_github_url(url)
69
+ workspace_id = _workspace_id(normalized, ref, mode)
70
+ ws_root = safe_workspace_root()
71
+ workspace_dir = os.path.join(ws_root, workspace_id)
72
+ repo_dir = os.path.join(workspace_dir, info["repo"])
73
+
74
+ ws_real = os.path.realpath(workspace_dir)
75
+ root_real = os.path.realpath(ws_root)
76
+ try:
77
+ if os.path.commonpath([root_real, ws_real]) != root_real:
78
+ raise ValueError("Unsafe workspace path")
79
+ except ValueError as e:
80
+ raise ValueError(str(e))
81
+
82
+ return {
83
+ "normalized_url": normalized,
84
+ "workspace_id": workspace_id,
85
+ "workspace_dir": workspace_dir,
86
+ "repo_dir": repo_dir,
87
+ "repo_name": info["repo"],
88
+ "owner": info["owner"],
89
+ }
90
+
91
+
92
+ def _safe_rmtree(path: str, allowed_root: str) -> bool:
93
+ if not path or not os.path.exists(path):
94
+ return False
95
+ real_target = os.path.realpath(path)
96
+ real_root = os.path.realpath(allowed_root)
97
+ try:
98
+ if os.path.commonpath([real_root, real_target]) != real_root:
99
+ return False
100
+ except ValueError:
101
+ return False
102
+
103
+ def _onerror(func, p, _exc):
104
+ try:
105
+ os.chmod(p, 0o700)
106
+ except OSError:
107
+ pass
108
+ try:
109
+ func(p)
110
+ except Exception:
111
+ pass
112
+
113
+ shutil.rmtree(real_target, onerror=_onerror)
114
+ return True
115
+
116
+
117
+ def _git_branch(repo_dir: str) -> Optional[str]:
118
+ try:
119
+ proc = subprocess.run(
120
+ ["git", "-C", repo_dir, "rev-parse", "--abbrev-ref", "HEAD"],
121
+ capture_output=True,
122
+ text=True,
123
+ timeout=15,
124
+ check=False,
125
+ )
126
+ branch = str(proc.stdout or "").strip()
127
+ return branch or None
128
+ except Exception:
129
+ return None
130
+
131
+
132
+ def _git_available() -> bool:
133
+ try:
134
+ subprocess.run(["git", "--version"], capture_output=True, text=True, timeout=10, check=False)
135
+ return True
136
+ except Exception:
137
+ return False
138
+
139
+
140
+ def fetch_public_repo(
141
+ url: str,
142
+ ref: Optional[str] = None,
143
+ refresh: bool = False,
144
+ token: Optional[str] = None,
145
+ auth: str = "none",
146
+ ) -> Dict[str, Any]:
147
+ try:
148
+ resolved = resolve_workspace_paths(url, ref, mode="git")
149
+ except Exception as e:
150
+ return {"ok": False, "error": str(e), "error_code": "INVALID_GITHUB_URL", "auth": auth, "mode": "git"}
151
+
152
+ ws_root = safe_workspace_root()
153
+ workspace_dir = resolved["workspace_dir"]
154
+ repo_dir = resolved["repo_dir"]
155
+ normalized_url = resolved["normalized_url"]
156
+
157
+ if refresh:
158
+ _safe_rmtree(workspace_dir, ws_root)
159
+
160
+ os.makedirs(workspace_dir, exist_ok=True)
161
+
162
+ if os.path.isdir(repo_dir) and os.path.exists(os.path.join(repo_dir, ".git")):
163
+ return {
164
+ "ok": True,
165
+ "workspace_dir": workspace_dir,
166
+ "repo_dir": repo_dir,
167
+ "normalized_url": normalized_url,
168
+ "ref": ref or _git_branch(repo_dir),
169
+ "fetched": False,
170
+ "refreshed": bool(refresh),
171
+ "mode": "git",
172
+ "auth": auth,
173
+ "error": None,
174
+ }
175
+
176
+ if not _git_available():
177
+ return {
178
+ "ok": False,
179
+ "workspace_dir": workspace_dir,
180
+ "repo_dir": repo_dir,
181
+ "normalized_url": normalized_url,
182
+ "ref": ref,
183
+ "fetched": False,
184
+ "refreshed": bool(refresh),
185
+ "mode": "git",
186
+ "auth": auth,
187
+ "error": "git not found",
188
+ "error_code": "GIT_NOT_FOUND",
189
+ }
190
+
191
+ remote = f"{normalized_url}.git"
192
+ cmd = ["git"]
193
+ if token:
194
+ cmd += ["-c", f"http.extraheader=AUTHORIZATION: bearer {token}"]
195
+ cmd += ["clone", "--depth", "1", "--no-tags", "--single-branch"]
196
+ if ref:
197
+ cmd += ["--branch", str(ref)]
198
+ cmd += [remote, repo_dir]
199
+
200
+ try:
201
+ proc = subprocess.run(
202
+ cmd,
203
+ cwd=workspace_dir,
204
+ capture_output=True,
205
+ text=True,
206
+ timeout=120,
207
+ check=False,
208
+ )
209
+ except subprocess.TimeoutExpired:
210
+ return {
211
+ "ok": False,
212
+ "workspace_dir": workspace_dir,
213
+ "repo_dir": repo_dir,
214
+ "normalized_url": normalized_url,
215
+ "ref": ref,
216
+ "fetched": False,
217
+ "refreshed": bool(refresh),
218
+ "mode": "git",
219
+ "auth": auth,
220
+ "error": "Git clone timed out",
221
+ "error_code": "GITHUB_FETCH_FAILED",
222
+ }
223
+
224
+ stderr = redact_secrets(str(proc.stderr or ""), extra_secrets=[token] if token else None)
225
+ if proc.returncode != 0:
226
+ msg = stderr.strip() or "Git clone failed"
227
+ code = "GITHUB_FETCH_FAILED"
228
+ if ("404" in msg or "401" in msg or "403" in msg) and not token:
229
+ code = "GITHUB_AUTH_REQUIRED"
230
+ msg = "Repo may be private. Provide --token or set GITHUB_TOKEN."
231
+ return {
232
+ "ok": False,
233
+ "workspace_dir": workspace_dir,
234
+ "repo_dir": repo_dir,
235
+ "normalized_url": normalized_url,
236
+ "ref": ref,
237
+ "fetched": False,
238
+ "refreshed": bool(refresh),
239
+ "mode": "git",
240
+ "auth": auth,
241
+ "error": msg,
242
+ "error_code": code,
243
+ }
244
+
245
+ return {
246
+ "ok": True,
247
+ "workspace_dir": workspace_dir,
248
+ "repo_dir": repo_dir,
249
+ "normalized_url": normalized_url,
250
+ "ref": ref or _git_branch(repo_dir),
251
+ "fetched": True,
252
+ "refreshed": bool(refresh),
253
+ "mode": "git",
254
+ "auth": auth,
255
+ "error": None,
256
+ }
257
+
258
+
259
+ def _download_zip(url: str, token: Optional[str]) -> bytes:
260
+ headers = {"User-Agent": "codemap-ai"}
261
+ if token:
262
+ headers["Authorization"] = f"Bearer {token}"
263
+ req = urllib_request.Request(url, headers=headers, method="GET")
264
+ with urllib_request.urlopen(req, timeout=90) as resp:
265
+ return resp.read()
266
+
267
+
268
+ def _safe_extract_zip(zip_path: str, workspace_dir: str) -> str:
269
+ with zipfile.ZipFile(zip_path, "r") as zf:
270
+ members = zf.infolist()
271
+ if len(members) > _MAX_FILES:
272
+ raise RuntimeError("Archive has too many files")
273
+
274
+ extracted_top: Optional[str] = None
275
+ count = 0
276
+ for info in members:
277
+ count += 1
278
+ if info.is_dir():
279
+ continue
280
+ if info.file_size > _MAX_FILE_SIZE:
281
+ raise RuntimeError("Archive contains oversized file")
282
+ target = os.path.realpath(os.path.join(workspace_dir, info.filename))
283
+ ws_real = os.path.realpath(workspace_dir)
284
+ try:
285
+ if os.path.commonpath([ws_real, target]) != ws_real:
286
+ raise RuntimeError("Unsafe zip entry path")
287
+ except ValueError:
288
+ raise RuntimeError("Unsafe zip entry path")
289
+ zf.extract(info, workspace_dir)
290
+ if extracted_top is None:
291
+ parts = info.filename.replace("\\", "/").split("/")
292
+ if parts:
293
+ extracted_top = parts[0]
294
+
295
+ if count > _MAX_FILES:
296
+ raise RuntimeError("Archive has too many files")
297
+
298
+ if extracted_top:
299
+ top_dir = os.path.join(workspace_dir, extracted_top)
300
+ if os.path.isdir(top_dir):
301
+ return top_dir
302
+
303
+ dirs = [d for d in os.listdir(workspace_dir) if os.path.isdir(os.path.join(workspace_dir, d))]
304
+ if len(dirs) == 1:
305
+ return os.path.join(workspace_dir, dirs[0])
306
+ return workspace_dir
307
+
308
+
309
+ def fetch_public_repo_zip(
310
+ url: str,
311
+ ref: Optional[str],
312
+ refresh: bool = False,
313
+ token: Optional[str] = None,
314
+ auth: str = "none",
315
+ ) -> Dict[str, Any]:
316
+ ref_value = str(ref or "").strip() or "main"
317
+ try:
318
+ resolved = resolve_workspace_paths(url, ref_value, mode="zip")
319
+ except Exception as e:
320
+ return {"ok": False, "error": str(e), "error_code": "INVALID_GITHUB_URL", "auth": auth, "mode": "zip"}
321
+
322
+ ws_root = safe_workspace_root()
323
+ workspace_dir = resolved["workspace_dir"]
324
+ repo_dir = resolved["repo_dir"]
325
+ normalized_url = resolved["normalized_url"]
326
+
327
+ if refresh:
328
+ _safe_rmtree(workspace_dir, ws_root)
329
+
330
+ os.makedirs(workspace_dir, exist_ok=True)
331
+
332
+ if os.path.isdir(repo_dir):
333
+ return {
334
+ "ok": True,
335
+ "workspace_dir": workspace_dir,
336
+ "repo_dir": repo_dir,
337
+ "normalized_url": normalized_url,
338
+ "ref": ref_value,
339
+ "downloaded": False,
340
+ "zip_url": "",
341
+ "fetched": False,
342
+ "refreshed": bool(refresh),
343
+ "mode": "zip",
344
+ "auth": auth,
345
+ "error": None,
346
+ }
347
+
348
+ info = parse_github_url(url)
349
+ base = f"https://github.com/{info['owner']}/{info['repo']}/archive/refs"
350
+ urls = [
351
+ f"{base}/heads/{urllib_parse.quote(ref_value)}.zip",
352
+ f"{base}/tags/{urllib_parse.quote(ref_value)}.zip",
353
+ ]
354
+
355
+ zip_data: Optional[bytes] = None
356
+ zip_url = ""
357
+ last_error = ""
358
+
359
+ for candidate in urls:
360
+ try:
361
+ zip_data = _download_zip(candidate, token=token)
362
+ zip_url = candidate
363
+ break
364
+ except urllib_error.HTTPError as e:
365
+ last_error = f"HTTP {getattr(e, 'code', '')}"
366
+ if int(getattr(e, "code", 0) or 0) == 404:
367
+ continue
368
+ msg = redact_secrets(str(e), extra_secrets=[token] if token else None)
369
+ if int(getattr(e, "code", 0) or 0) in {401, 403, 404} and not token:
370
+ return {
371
+ "ok": False,
372
+ "workspace_dir": workspace_dir,
373
+ "repo_dir": repo_dir,
374
+ "normalized_url": normalized_url,
375
+ "ref": ref_value,
376
+ "downloaded": False,
377
+ "zip_url": candidate,
378
+ "fetched": False,
379
+ "refreshed": bool(refresh),
380
+ "mode": "zip",
381
+ "auth": auth,
382
+ "error": "Repo may be private. Provide --token or set GITHUB_TOKEN.",
383
+ "error_code": "GITHUB_AUTH_REQUIRED",
384
+ }
385
+ return {
386
+ "ok": False,
387
+ "workspace_dir": workspace_dir,
388
+ "repo_dir": repo_dir,
389
+ "normalized_url": normalized_url,
390
+ "ref": ref_value,
391
+ "downloaded": False,
392
+ "zip_url": candidate,
393
+ "fetched": False,
394
+ "refreshed": bool(refresh),
395
+ "mode": "zip",
396
+ "auth": auth,
397
+ "error": msg,
398
+ "error_code": "GITHUB_FETCH_FAILED",
399
+ }
400
+ except Exception as e:
401
+ last_error = redact_secrets(str(e), extra_secrets=[token] if token else None)
402
+ continue
403
+
404
+ if zip_data is None:
405
+ return {
406
+ "ok": False,
407
+ "workspace_dir": workspace_dir,
408
+ "repo_dir": repo_dir,
409
+ "normalized_url": normalized_url,
410
+ "ref": ref_value,
411
+ "downloaded": False,
412
+ "zip_url": zip_url,
413
+ "fetched": False,
414
+ "refreshed": bool(refresh),
415
+ "mode": "zip",
416
+ "auth": auth,
417
+ "error": last_error or "Zip download failed",
418
+ "error_code": "GITHUB_FETCH_FAILED",
419
+ }
420
+
421
+ fd, tmp_zip = tempfile.mkstemp(prefix="codemap_", suffix=".zip", dir=workspace_dir)
422
+ os.close(fd)
423
+ try:
424
+ with open(tmp_zip, "wb") as f:
425
+ f.write(zip_data)
426
+ extracted = _safe_extract_zip(tmp_zip, workspace_dir)
427
+ if os.path.isdir(repo_dir) and extracted != repo_dir:
428
+ pass
429
+ elif os.path.isdir(extracted) and extracted != repo_dir:
430
+ if os.path.exists(repo_dir):
431
+ _safe_rmtree(repo_dir, workspace_dir)
432
+ os.replace(extracted, repo_dir)
433
+ except Exception as e:
434
+ return {
435
+ "ok": False,
436
+ "workspace_dir": workspace_dir,
437
+ "repo_dir": repo_dir,
438
+ "normalized_url": normalized_url,
439
+ "ref": ref_value,
440
+ "downloaded": False,
441
+ "zip_url": zip_url,
442
+ "fetched": False,
443
+ "refreshed": bool(refresh),
444
+ "mode": "zip",
445
+ "auth": auth,
446
+ "error": redact_secrets(str(e), extra_secrets=[token] if token else None),
447
+ "error_code": "GITHUB_FETCH_FAILED",
448
+ }
449
+ finally:
450
+ if os.path.exists(tmp_zip):
451
+ try:
452
+ os.remove(tmp_zip)
453
+ except OSError:
454
+ pass
455
+
456
+ return {
457
+ "ok": True,
458
+ "workspace_dir": workspace_dir,
459
+ "repo_dir": repo_dir,
460
+ "normalized_url": normalized_url,
461
+ "ref": ref_value,
462
+ "downloaded": True,
463
+ "zip_url": zip_url,
464
+ "fetched": True,
465
+ "refreshed": bool(refresh),
466
+ "mode": "zip",
467
+ "auth": auth,
468
+ "error": None,
469
+ }