gwc-pybundle 2.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gwc-pybundle might be problematic. Click here for more details.

Files changed (82) hide show
  1. gwc_pybundle-2.1.2.dist-info/METADATA +903 -0
  2. gwc_pybundle-2.1.2.dist-info/RECORD +82 -0
  3. gwc_pybundle-2.1.2.dist-info/WHEEL +5 -0
  4. gwc_pybundle-2.1.2.dist-info/entry_points.txt +2 -0
  5. gwc_pybundle-2.1.2.dist-info/licenses/LICENSE.md +25 -0
  6. gwc_pybundle-2.1.2.dist-info/top_level.txt +1 -0
  7. pybundle/__init__.py +0 -0
  8. pybundle/__main__.py +4 -0
  9. pybundle/cli.py +546 -0
  10. pybundle/context.py +404 -0
  11. pybundle/doctor.py +148 -0
  12. pybundle/filters.py +228 -0
  13. pybundle/manifest.py +77 -0
  14. pybundle/packaging.py +45 -0
  15. pybundle/policy.py +132 -0
  16. pybundle/profiles.py +454 -0
  17. pybundle/roadmap_model.py +42 -0
  18. pybundle/roadmap_scan.py +328 -0
  19. pybundle/root_detect.py +14 -0
  20. pybundle/runner.py +180 -0
  21. pybundle/steps/__init__.py +26 -0
  22. pybundle/steps/ai_context.py +791 -0
  23. pybundle/steps/api_docs.py +219 -0
  24. pybundle/steps/asyncio_analysis.py +358 -0
  25. pybundle/steps/bandit.py +72 -0
  26. pybundle/steps/base.py +20 -0
  27. pybundle/steps/blocking_call_detection.py +291 -0
  28. pybundle/steps/call_graph.py +219 -0
  29. pybundle/steps/compileall.py +76 -0
  30. pybundle/steps/config_docs.py +319 -0
  31. pybundle/steps/config_validation.py +302 -0
  32. pybundle/steps/container_image.py +294 -0
  33. pybundle/steps/context_expand.py +272 -0
  34. pybundle/steps/copy_pack.py +293 -0
  35. pybundle/steps/coverage.py +101 -0
  36. pybundle/steps/cprofile_step.py +166 -0
  37. pybundle/steps/dependency_sizes.py +136 -0
  38. pybundle/steps/django_checks.py +214 -0
  39. pybundle/steps/dockerfile_lint.py +282 -0
  40. pybundle/steps/dockerignore.py +311 -0
  41. pybundle/steps/duplication.py +103 -0
  42. pybundle/steps/env_completeness.py +269 -0
  43. pybundle/steps/env_var_usage.py +253 -0
  44. pybundle/steps/error_refs.py +204 -0
  45. pybundle/steps/event_loop_patterns.py +280 -0
  46. pybundle/steps/exception_patterns.py +190 -0
  47. pybundle/steps/fastapi_integration.py +250 -0
  48. pybundle/steps/flask_debugging.py +312 -0
  49. pybundle/steps/git_analytics.py +315 -0
  50. pybundle/steps/handoff_md.py +176 -0
  51. pybundle/steps/import_time.py +175 -0
  52. pybundle/steps/interrogate.py +106 -0
  53. pybundle/steps/license_scan.py +96 -0
  54. pybundle/steps/line_profiler.py +117 -0
  55. pybundle/steps/link_validation.py +287 -0
  56. pybundle/steps/logging_analysis.py +233 -0
  57. pybundle/steps/memory_profile.py +176 -0
  58. pybundle/steps/migration_history.py +336 -0
  59. pybundle/steps/mutation_testing.py +141 -0
  60. pybundle/steps/mypy.py +103 -0
  61. pybundle/steps/orm_optimization.py +316 -0
  62. pybundle/steps/pip_audit.py +45 -0
  63. pybundle/steps/pipdeptree.py +62 -0
  64. pybundle/steps/pylance.py +562 -0
  65. pybundle/steps/pytest.py +66 -0
  66. pybundle/steps/query_pattern_analysis.py +334 -0
  67. pybundle/steps/radon.py +161 -0
  68. pybundle/steps/repro_md.py +161 -0
  69. pybundle/steps/rg_scans.py +78 -0
  70. pybundle/steps/roadmap.py +153 -0
  71. pybundle/steps/ruff.py +117 -0
  72. pybundle/steps/secrets_detection.py +235 -0
  73. pybundle/steps/security_headers.py +309 -0
  74. pybundle/steps/shell.py +74 -0
  75. pybundle/steps/slow_tests.py +178 -0
  76. pybundle/steps/sqlalchemy_validation.py +269 -0
  77. pybundle/steps/test_flakiness.py +184 -0
  78. pybundle/steps/tree.py +116 -0
  79. pybundle/steps/type_coverage.py +277 -0
  80. pybundle/steps/unused_deps.py +211 -0
  81. pybundle/steps/vulture.py +167 -0
  82. pybundle/tools.py +63 -0
@@ -0,0 +1,315 @@
1
+ """Advanced git analytics step - v1.5.1
2
+
3
+ Provides deep git history insights:
4
+ - Blame-based contributor analysis
5
+ - Branch health metrics
6
+ - Commit message quality assessment
7
+ - Code ownership tracking
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import re
13
+ import subprocess
14
+ import time
15
+ from dataclasses import dataclass
16
+ from pathlib import Path
17
+ from typing import Dict, List, Tuple
18
+
19
+ from .base import StepResult
20
+ from ..context import BundleContext
21
+
22
+
23
+ @dataclass
24
+ class GitAnalyticsStep:
25
+ """Step that performs advanced git analytics."""
26
+
27
+ name: str = "git-analytics"
28
+ outfile: str = "meta/100_git_analytics.txt"
29
+ blame_depth: int = 100 # Number of commits to analyze in blame
30
+
31
+ def run(self, ctx: BundleContext) -> StepResult:
32
+ """Perform comprehensive git analytics."""
33
+ start = time.time()
34
+
35
+ # Check if we're in a git repo
36
+ git_check = subprocess.run(
37
+ ["git", "rev-parse", "--git-dir"],
38
+ cwd=ctx.root,
39
+ capture_output=True,
40
+ text=True,
41
+ )
42
+
43
+ if git_check.returncode != 0:
44
+ elapsed = int((time.time() - start) * 1000)
45
+ return StepResult(self.name, "SKIP", elapsed, "not a git repository")
46
+
47
+ # Gather analytics
48
+ blame_stats = self._analyze_blame(ctx.root)
49
+ branch_stats = self._analyze_branches(ctx.root)
50
+ commit_quality = self._analyze_commit_messages(ctx.root)
51
+ codeowners_coverage = self._analyze_codeowners(ctx.root)
52
+
53
+ # Write report
54
+ out_path = ctx.workdir / self.outfile
55
+ out_path.parent.mkdir(parents=True, exist_ok=True)
56
+
57
+ with open(out_path, "w") as f:
58
+ f.write("=" * 80 + "\n")
59
+ f.write("GIT ANALYTICS REPORT\n")
60
+ f.write("=" * 80 + "\n\n")
61
+
62
+ # Blame-based analysis
63
+ if blame_stats:
64
+ f.write("CONTRIBUTOR ANALYSIS (by line count)\n")
65
+ f.write("-" * 80 + "\n")
66
+ f.write(f"Analyzed last {self.blame_depth} commits\n\n")
67
+ for i, (author, lines, files) in enumerate(blame_stats[:10], 1):
68
+ f.write(f"{i:2d}. {author:<40} {lines:>6} lines {files:>3} files\n")
69
+ f.write("\n")
70
+
71
+ # Branch health
72
+ if branch_stats:
73
+ f.write("BRANCH HEALTH METRICS\n")
74
+ f.write("-" * 80 + "\n")
75
+ current, ahead, behind, uncommitted = branch_stats
76
+ f.write(f"Current branch: {current}\n")
77
+ f.write(f"Commits ahead main: {ahead}\n")
78
+ f.write(f"Commits behind main: {behind}\n")
79
+ f.write(f"Uncommitted changes: {uncommitted} files\n\n")
80
+
81
+ # Commit message quality
82
+ if commit_quality:
83
+ f.write("COMMIT MESSAGE QUALITY\n")
84
+ f.write("-" * 80 + "\n")
85
+ conventional_pct, avg_length, total = commit_quality
86
+ f.write(f"Total commits analyzed: {total}\n")
87
+ f.write(f"Conventional commits: {conventional_pct:.1f}%\n")
88
+ f.write(f"Average message length: {avg_length:.0f} chars\n")
89
+ f.write("(Conventional format: type(scope): description)\n\n")
90
+
91
+ # CODEOWNERS coverage
92
+ if codeowners_coverage:
93
+ f.write("CODE OWNERSHIP TRACKING\n")
94
+ f.write("-" * 80 + "\n")
95
+ if codeowners_coverage[0]:
96
+ codeowners_exists, coverage_pct, rules_count = codeowners_coverage
97
+ f.write(f"CODEOWNERS file: {'Found' if codeowners_exists else 'Not found'}\n")
98
+ f.write(f"Ownership rules: {rules_count}\n")
99
+ f.write(f"Files covered: {coverage_pct:.1f}%\n\n")
100
+ else:
101
+ f.write("CODEOWNERS file not found\n\n")
102
+
103
+ f.write("=" * 80 + "\n")
104
+ f.write("Git analytics complete\n")
105
+ f.write("=" * 80 + "\n")
106
+
107
+ elapsed = int((time.time() - start) * 1000)
108
+ return StepResult(self.name, "OK", elapsed, "git analytics complete")
109
+
110
+ def _analyze_blame(self, root: Path) -> List[Tuple[str, int, int]]:
111
+ """Analyze git blame to find top contributors by line count.
112
+
113
+ Returns list of (author, line_count, file_count) tuples sorted by lines.
114
+ """
115
+ try:
116
+ # Get commits in range
117
+ result = subprocess.run(
118
+ ["git", "log", f"--max-count={self.blame_depth}", "--format=%an"],
119
+ cwd=root,
120
+ capture_output=True,
121
+ text=True,
122
+ timeout=30,
123
+ )
124
+
125
+ if result.returncode != 0:
126
+ return []
127
+
128
+ authors: Dict[str, Dict[str, int]] = {}
129
+ for commit_author in result.stdout.strip().split("\n"):
130
+ if commit_author:
131
+ if commit_author not in authors:
132
+ authors[commit_author] = {"lines": 0, "files": set()}
133
+
134
+ # For each author, run blame on all Python files
135
+ for py_file in root.rglob("*.py"):
136
+ # Skip artifacts and venv
137
+ if any(
138
+ x in py_file.parts
139
+ for x in ["artifacts", ".git", "venv", ".venv", "__pycache__"]
140
+ ):
141
+ continue
142
+
143
+ try:
144
+ blame_result = subprocess.run(
145
+ ["git", "blame", "--line-porcelain", str(py_file.relative_to(root))],
146
+ cwd=root,
147
+ capture_output=True,
148
+ text=True,
149
+ timeout=10,
150
+ )
151
+
152
+ if blame_result.returncode == 0:
153
+ for line in blame_result.stdout.split("\n"):
154
+ if line.startswith("author "):
155
+ author = line[7:].strip()
156
+ if author in authors:
157
+ authors[author]["lines"] += 1
158
+ authors[author]["files"].add(str(py_file.relative_to(root)))
159
+
160
+ except Exception:
161
+ continue
162
+
163
+ # Convert to list and sort
164
+ result_list = [
165
+ (author, stats["lines"], len(stats["files"]))
166
+ for author, stats in authors.items()
167
+ ]
168
+ return sorted(result_list, key=lambda x: x[1], reverse=True)
169
+
170
+ except Exception:
171
+ return []
172
+
173
+ def _analyze_branches(self, root: Path) -> Tuple[str, int, int, int] | None:
174
+ """Analyze branch health metrics.
175
+
176
+ Returns (current_branch, commits_ahead, commits_behind, uncommitted_files).
177
+ """
178
+ try:
179
+ # Get current branch
180
+ branch_result = subprocess.run(
181
+ ["git", "rev-parse", "--abbrev-ref", "HEAD"],
182
+ cwd=root,
183
+ capture_output=True,
184
+ text=True,
185
+ timeout=5,
186
+ )
187
+
188
+ if branch_result.returncode != 0:
189
+ return None
190
+
191
+ current_branch = branch_result.stdout.strip()
192
+
193
+ # Count commits ahead/behind main
194
+ try:
195
+ ahead_result = subprocess.run(
196
+ ["git", "rev-list", "--count", f"main..{current_branch}"],
197
+ cwd=root,
198
+ capture_output=True,
199
+ text=True,
200
+ timeout=5,
201
+ )
202
+ ahead = int(ahead_result.stdout.strip()) if ahead_result.returncode == 0 else 0
203
+ except Exception:
204
+ ahead = 0
205
+
206
+ try:
207
+ behind_result = subprocess.run(
208
+ ["git", "rev-list", "--count", f"{current_branch}..main"],
209
+ cwd=root,
210
+ capture_output=True,
211
+ text=True,
212
+ timeout=5,
213
+ )
214
+ behind = int(behind_result.stdout.strip()) if behind_result.returncode == 0 else 0
215
+ except Exception:
216
+ behind = 0
217
+
218
+ # Count uncommitted changes
219
+ status_result = subprocess.run(
220
+ ["git", "status", "--porcelain"],
221
+ cwd=root,
222
+ capture_output=True,
223
+ text=True,
224
+ timeout=5,
225
+ )
226
+
227
+ uncommitted = len(status_result.stdout.strip().split("\n")) if status_result.returncode == 0 else 0
228
+
229
+ return (current_branch, ahead, behind, uncommitted)
230
+
231
+ except Exception:
232
+ return None
233
+
234
+ def _analyze_commit_messages(self, root: Path) -> Tuple[float, float, int] | None:
235
+ """Analyze commit message quality.
236
+
237
+ Returns (conventional_commits_pct, avg_message_length, total_commits).
238
+ """
239
+ try:
240
+ # Get recent commits
241
+ log_result = subprocess.run(
242
+ ["git", "log", "--max-count=100", "--format=%s"],
243
+ cwd=root,
244
+ capture_output=True,
245
+ text=True,
246
+ timeout=10,
247
+ )
248
+
249
+ if log_result.returncode != 0:
250
+ return None
251
+
252
+ messages = log_result.stdout.strip().split("\n")
253
+ if not messages:
254
+ return None
255
+
256
+ # Check for conventional commits format: type(scope): message or type: message
257
+ conventional_pattern = re.compile(r"^(feat|fix|docs|style|refactor|perf|test|chore)(\(.+\))?:")
258
+ conventional_count = sum(1 for msg in messages if conventional_pattern.match(msg))
259
+
260
+ total = len(messages)
261
+ conventional_pct = (conventional_count / total * 100) if total > 0 else 0
262
+ avg_length = sum(len(msg) for msg in messages) / total if total > 0 else 0
263
+
264
+ return (conventional_pct, avg_length, total)
265
+
266
+ except Exception:
267
+ return None
268
+
269
+ def _analyze_codeowners(self, root: Path) -> Tuple[bool, float, int] | None:
270
+ """Analyze CODEOWNERS file coverage.
271
+
272
+ Returns (codeowners_exists, coverage_percentage, number_of_rules).
273
+ """
274
+ try:
275
+ # Check for CODEOWNERS file
276
+ codeowners_paths = [
277
+ root / "CODEOWNERS",
278
+ root / ".github" / "CODEOWNERS",
279
+ root / "docs" / "CODEOWNERS",
280
+ ]
281
+
282
+ codeowners_file = None
283
+ for path in codeowners_paths:
284
+ if path.exists():
285
+ codeowners_file = path
286
+ break
287
+
288
+ if not codeowners_file:
289
+ return (False, 0.0, 0)
290
+
291
+ # Parse CODEOWNERS file
292
+ rules: List[Tuple[str, List[str]]] = []
293
+ with open(codeowners_file) as f:
294
+ for line in f:
295
+ line = line.strip()
296
+ if not line or line.startswith("#"):
297
+ continue
298
+
299
+ parts = line.split()
300
+ if len(parts) >= 2:
301
+ pattern = parts[0]
302
+ owners = parts[1:]
303
+ rules.append((pattern, owners))
304
+
305
+ if not rules:
306
+ return (True, 0.0, 0)
307
+
308
+ # Estimate coverage by checking patterns
309
+ # (simplified - just count rules)
310
+ coverage_pct = min(100.0, len(rules) * 10.0) # Heuristic
311
+
312
+ return (True, coverage_pct, len(rules))
313
+
314
+ except Exception:
315
+ return None
@@ -0,0 +1,176 @@
1
+ from __future__ import annotations
2
+
3
+ import time
4
+ from dataclasses import asdict
5
+ from datetime import datetime, timezone
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ from .base import Step, StepResult
10
+
11
+
12
+ def _utc_now() -> str:
13
+ return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
14
+
15
+
16
+ def _safe_read(path: Path) -> str:
17
+ if not path.exists():
18
+ return f"(missing: {path.as_posix()})"
19
+ return path.read_text(encoding="utf-8", errors="replace").strip()
20
+
21
+
22
+ def _tool_table(tools_obj: Any) -> list[str]:
23
+ d = (
24
+ asdict(tools_obj)
25
+ if hasattr(tools_obj, "__dataclass_fields__")
26
+ else dict(tools_obj)
27
+ )
28
+ lines = ["| Tool | Status |", "|------|--------|"]
29
+ for k in sorted(d.keys()):
30
+ v = d[k]
31
+ if v:
32
+ lines.append(f"| `{k}` | ✅ `{v}` |")
33
+ else:
34
+ lines.append(f"| `{k}` | ❌ `<missing>` |")
35
+ return lines
36
+
37
+
38
+ class HandoffMarkdownStep(Step):
39
+ name: str = "generate HANDOFF.md"
40
+
41
+ def run(self, ctx: Any) -> StepResult:
42
+ start = time.time()
43
+
44
+ created_utc = getattr(ctx, "created_utc", None) or _utc_now()
45
+ profile = getattr(ctx, "profile_name", "<unknown>")
46
+ root_path = Path(getattr(ctx, "root"))
47
+ project = root_path.name
48
+ root = str(root_path)
49
+ workdir_path = Path(getattr(ctx, "workdir"))
50
+ workdir = str(workdir_path)
51
+
52
+ # filenames fixed to match your repo
53
+ uname = _safe_read(workdir_path / "meta" / "21_uname.txt")
54
+ pyver = _safe_read(workdir_path / "meta" / "20_python_version.txt")
55
+
56
+ redact = bool(getattr(ctx, "redact", True))
57
+ redact_status = "enabled" if redact else "disabled"
58
+
59
+ results: list[Any] = list(getattr(ctx, "results", []))
60
+ pass_n = sum(1 for r in results if getattr(r, "status", "") == "PASS")
61
+ fail_n = sum(1 for r in results if getattr(r, "status", "") == "FAIL")
62
+ skip_n = sum(1 for r in results if getattr(r, "status", "") == "SKIP")
63
+ total_n = len(results)
64
+
65
+ overall = "FAIL" if fail_n else ("DEGRADED" if skip_n else "PASS")
66
+
67
+ # tool table
68
+ tools_obj = getattr(ctx, "tools", None) or getattr(ctx, "tooling", None)
69
+ tools_table = (
70
+ _tool_table(tools_obj) if tools_obj is not None else ["(no tools detected)"]
71
+ )
72
+
73
+ command_used = getattr(ctx, "command_used", "") or "(not captured)"
74
+
75
+ lines: list[str] = []
76
+ lines.append("# Bundle Handoff")
77
+ lines.append("")
78
+ lines.append("## Overview")
79
+ lines.append(
80
+ f"- **Bundle tool:** pybundle {getattr(ctx, 'version', '<unknown>')}"
81
+ )
82
+ lines.append(f"- **Profile:** {profile}")
83
+ lines.append(f"- **Created (UTC):** {created_utc}")
84
+ lines.append(f"- **Project:** {project}")
85
+ lines.append(f"- **Root:** {root}")
86
+ lines.append(f"- **Workdir:** {workdir}")
87
+ lines.append("")
88
+ lines.append("## System")
89
+ lines.append(f"- **OS:** {uname}")
90
+ lines.append(f"- **Python:** {pyver}")
91
+ lines.append(f"- **Redaction:** {redact_status}")
92
+ lines.append("")
93
+ lines.append("## At a glance")
94
+
95
+ lines.append("## AI context summary")
96
+
97
+ copy_manifest = _safe_read(
98
+ workdir_path / "meta" / "50_copy_manifest.txt"
99
+ ).strip()
100
+ if copy_manifest:
101
+ lines.append("### Curated copy")
102
+ lines.append("```")
103
+ lines.append(copy_manifest)
104
+ lines.append("```")
105
+ else:
106
+ lines.append("- Curated copy manifest not found.")
107
+
108
+ roadmap_json = _safe_read(workdir_path / "meta" / "70_roadmap.json").strip()
109
+ if roadmap_json:
110
+ try:
111
+ import json
112
+
113
+ rj = json.loads(roadmap_json)
114
+ langs = set()
115
+ for n in rj.get("nodes", []):
116
+ if isinstance(n, dict):
117
+ lang = n.get("lang")
118
+ if lang:
119
+ langs.add(lang)
120
+ eps = rj.get("entrypoints", []) or []
121
+ lines.append(
122
+ f"- **Languages detected:** {', '.join(sorted(langs)) if langs else '(none)'}"
123
+ )
124
+ if eps:
125
+ lines.append("- **Entrypoints:**")
126
+ for ep in eps[:10]:
127
+ node = ep.get("node") if isinstance(ep, dict) else None
128
+ reason = ep.get("reason") if isinstance(ep, dict) else None
129
+ conf = ep.get("confidence") if isinstance(ep, dict) else None
130
+ if node:
131
+ extra = ""
132
+ if reason is not None and conf is not None:
133
+ extra = f" — {reason} ({conf}/3)"
134
+ lines.append(f" - `{node}`{extra}")
135
+ else:
136
+ lines.append("- **Entrypoints:** (none detected)")
137
+ except json.JSONDecodeError as e:
138
+ # Show EXACTLY why parsing failed - critical for debugging
139
+ lines.append("- ⚠ Roadmap JSON present but could not be parsed:")
140
+ lines.append(f" - Error: {str(e)}")
141
+ lines.append(f" - Location: Line {e.lineno}, Column {e.colno}")
142
+ lines.append(f" - Raw file included in bundle: `meta/70_roadmap.json`")
143
+ lines.append(f" - Try: `python -m json.tool meta/70_roadmap.json` to validate")
144
+ except Exception as e:
145
+ lines.append("- ⚠ Roadmap JSON present but could not be parsed:")
146
+ lines.append(f" - Error: {type(e).__name__}: {str(e)}")
147
+ lines.append(f" - Raw file included in bundle: `meta/70_roadmap.json`")
148
+ else:
149
+ lines.append("- Roadmap not found.")
150
+
151
+ lines.append("")
152
+
153
+ lines.append(f"- **Overall status:** {overall}")
154
+ lines.append(
155
+ f"- **Steps:** {total_n} total — {pass_n} PASS, {fail_n} FAIL, {skip_n} SKIP"
156
+ )
157
+ lines.append("")
158
+ lines.append("## Tools")
159
+ lines.extend(tools_table)
160
+ lines.append("")
161
+ lines.append("## Command used")
162
+ lines.append("```bash")
163
+ lines.append(command_used)
164
+ lines.append("```")
165
+ lines.append("")
166
+ lines.append("## Reproduction")
167
+ lines.append("See **REPRO.md** for step-by-step reproduction instructions.")
168
+ lines.append("")
169
+
170
+ out_path = workdir_path / "HANDOFF.md"
171
+ out_path.write_text("\n".join(lines), encoding="utf-8")
172
+
173
+ secs = int(time.time() - start)
174
+ return StepResult(
175
+ name=self.name, status="PASS", seconds=secs, note="wrote HANDOFF.md"
176
+ )
@@ -0,0 +1,175 @@
1
+ """
2
+ Import time analysis - Milestone 3 (v1.4.0)
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ import subprocess
8
+ import time
9
+ from dataclasses import dataclass
10
+ from pathlib import Path
11
+
12
+ from .base import StepResult
13
+ from ..context import BundleContext
14
+
15
+
16
+ @dataclass
17
+ class ImportTimeStep:
18
+ """
19
+ Analyze Python import times using -X importtime to identify slow imports.
20
+
21
+ Outputs:
22
+ - logs/61_import_time.txt: Ranked list of slowest imports
23
+ """
24
+
25
+ name: str = "import_time"
26
+
27
+ def run(self, ctx: BundleContext) -> StepResult:
28
+ start = time.time()
29
+
30
+ if ctx.options.no_profile:
31
+ return StepResult(self.name, "SKIP", 0, "profiling disabled")
32
+
33
+ # Find entry point
34
+ entry_point = self._find_entry_point(ctx)
35
+
36
+ if not entry_point:
37
+ return StepResult(self.name, "SKIP", 0, "no suitable entry point found")
38
+
39
+ ctx.emit(f" Analyzing import time for {entry_point.name}")
40
+
41
+ try:
42
+ # Run with -X importtime
43
+ result = subprocess.run(
44
+ [
45
+ str(ctx.tools.python),
46
+ "-X",
47
+ "importtime",
48
+ "-c",
49
+ f"import runpy; runpy.run_path('{entry_point}')",
50
+ ],
51
+ cwd=ctx.root,
52
+ capture_output=True,
53
+ text=True,
54
+ timeout=60,
55
+ )
56
+
57
+ # Parse and rank import times
58
+ self._generate_report(
59
+ result.stderr, ctx.workdir
60
+ ) # importtime outputs to stderr
61
+
62
+ elapsed = int((time.time() - start) * 1000)
63
+ return StepResult(self.name, "OK", elapsed)
64
+
65
+ except subprocess.TimeoutExpired:
66
+ elapsed = int((time.time() - start) * 1000)
67
+ return StepResult(self.name, "FAIL", elapsed, "timeout")
68
+ except Exception as e:
69
+ elapsed = int((time.time() - start) * 1000)
70
+ return StepResult(self.name, "FAIL", elapsed, str(e))
71
+
72
+ def _find_entry_point(self, ctx: BundleContext) -> Path | None:
73
+ """Find the best entry point to analyze"""
74
+ if ctx.options.profile_entry_point:
75
+ ep = Path(ctx.options.profile_entry_point)
76
+ if not ep.is_absolute():
77
+ ep = ctx.root / ctx.options.profile_entry_point
78
+ if ep.exists() and ep.is_file():
79
+ return ep
80
+
81
+ # Try package/__main__.py
82
+ pyproject = ctx.root / "pyproject.toml"
83
+ if pyproject.exists():
84
+ try:
85
+ import tomllib
86
+
87
+ with pyproject.open("rb") as f:
88
+ data = tomllib.load(f)
89
+ pkg_name = data.get("project", {}).get("name", "").replace("-", "_")
90
+ if pkg_name:
91
+ pkg_main = ctx.root / pkg_name / "__main__.py"
92
+ if pkg_main.exists():
93
+ return pkg_main
94
+ except Exception:
95
+ pass
96
+
97
+ # Try common entry points
98
+ for entry in ["__main__.py", "main.py", "app.py", "cli.py"]:
99
+ path = ctx.root / entry
100
+ if path.exists():
101
+ return path
102
+
103
+ return None
104
+
105
+ def _generate_report(self, importtime_output: str, workdir: Path) -> None:
106
+ """Parse -X importtime output and generate ranked report"""
107
+ output_file = workdir / "logs" / "61_import_time.txt"
108
+ output_file.parent.mkdir(parents=True, exist_ok=True)
109
+
110
+ # Parse import times
111
+ # Format: "import time: self [us] | cumulative | imported package"
112
+ imports = []
113
+ for line in importtime_output.splitlines():
114
+ if "import time:" in line:
115
+ parts = line.split("|")
116
+ if len(parts) >= 3:
117
+ try:
118
+ # Extract times
119
+ time_part = parts[0].split(":")[-1].strip()
120
+ self_time = int(time_part.split()[0])
121
+ cumulative = int(parts[1].strip())
122
+ module = parts[2].strip()
123
+ imports.append((cumulative, self_time, module))
124
+ except (ValueError, IndexError):
125
+ continue
126
+
127
+ # Sort by cumulative time (descending)
128
+ imports.sort(reverse=True)
129
+
130
+ with output_file.open("w") as f:
131
+ f.write("=" * 70 + "\n")
132
+ f.write("IMPORT TIME ANALYSIS\n")
133
+ f.write("=" * 70 + "\n\n")
134
+
135
+ if not imports:
136
+ f.write("No import time data collected.\n")
137
+ f.write("\nRaw output:\n")
138
+ f.write(importtime_output)
139
+ return
140
+
141
+ # Calculate total
142
+ total_time = sum(imp[0] for imp in imports[:1]) # Top-level cumulative
143
+
144
+ f.write(f"Total import time: {total_time / 1000:.1f} ms\n")
145
+ f.write(f"Number of imports analyzed: {len(imports)}\n\n")
146
+
147
+ f.write("TOP 30 SLOWEST IMPORTS (by cumulative time):\n")
148
+ f.write("-" * 70 + "\n")
149
+ f.write(f"{'Cumulative (ms)':<18} {'Self (ms)':<15} {'Module'}\n")
150
+ f.write("-" * 70 + "\n")
151
+
152
+ for cumulative, self_time, module in imports[:30]:
153
+ f.write(
154
+ f"{cumulative / 1000:>15.1f} {self_time / 1000:>12.1f} {module}\n"
155
+ )
156
+
157
+ # Also show slowest by self time
158
+ imports_by_self = sorted(imports, key=lambda x: x[1], reverse=True)
159
+
160
+ f.write("\n" + "=" * 70 + "\n")
161
+ f.write("TOP 20 SLOWEST IMPORTS (by self time, excluding children):\n")
162
+ f.write("-" * 70 + "\n")
163
+ f.write(f"{'Self (ms)':<15} {'Cumulative (ms)':<18} {'Module'}\n")
164
+ f.write("-" * 70 + "\n")
165
+
166
+ for cumulative, self_time, module in imports_by_self[:20]:
167
+ f.write(
168
+ f"{self_time / 1000:>12.1f} {cumulative / 1000:>15.1f} {module}\n"
169
+ )
170
+
171
+ f.write("\n" + "=" * 70 + "\n")
172
+ f.write("Recommendations:\n")
173
+ f.write("- Consider lazy imports for modules with high cumulative times\n")
174
+ f.write("- Review modules with high self times for optimization\n")
175
+ f.write("- Use conditional imports to defer loading when possible\n")