luckyd-code 1.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. luckyd_code/__init__.py +54 -0
  2. luckyd_code/__main__.py +5 -0
  3. luckyd_code/_agent_loop.py +551 -0
  4. luckyd_code/_data_dir.py +73 -0
  5. luckyd_code/agent.py +38 -0
  6. luckyd_code/analytics/__init__.py +18 -0
  7. luckyd_code/analytics/reporter.py +195 -0
  8. luckyd_code/analytics/scanner.py +443 -0
  9. luckyd_code/analytics/smells.py +316 -0
  10. luckyd_code/analytics/trends.py +303 -0
  11. luckyd_code/api.py +473 -0
  12. luckyd_code/audit_daemon.py +845 -0
  13. luckyd_code/autonomous_fixer.py +473 -0
  14. luckyd_code/background.py +159 -0
  15. luckyd_code/backup.py +237 -0
  16. luckyd_code/brain/__init__.py +84 -0
  17. luckyd_code/brain/assembler.py +100 -0
  18. luckyd_code/brain/chunker.py +345 -0
  19. luckyd_code/brain/constants.py +73 -0
  20. luckyd_code/brain/embedder.py +163 -0
  21. luckyd_code/brain/graph.py +311 -0
  22. luckyd_code/brain/indexer.py +316 -0
  23. luckyd_code/brain/parser.py +140 -0
  24. luckyd_code/brain/retriever.py +234 -0
  25. luckyd_code/cli.py +894 -0
  26. luckyd_code/cli_commands/__init__.py +1 -0
  27. luckyd_code/cli_commands/audit.py +120 -0
  28. luckyd_code/cli_commands/background.py +83 -0
  29. luckyd_code/cli_commands/brain.py +87 -0
  30. luckyd_code/cli_commands/config.py +75 -0
  31. luckyd_code/cli_commands/dispatcher.py +695 -0
  32. luckyd_code/cli_commands/sessions.py +41 -0
  33. luckyd_code/cli_entry.py +147 -0
  34. luckyd_code/cli_utils.py +112 -0
  35. luckyd_code/config.py +205 -0
  36. luckyd_code/context.py +214 -0
  37. luckyd_code/cost_tracker.py +209 -0
  38. luckyd_code/error_reporter.py +508 -0
  39. luckyd_code/exceptions.py +39 -0
  40. luckyd_code/export.py +126 -0
  41. luckyd_code/feedback_analyzer.py +290 -0
  42. luckyd_code/file_watcher.py +258 -0
  43. luckyd_code/git/__init__.py +11 -0
  44. luckyd_code/git/auto_commit.py +157 -0
  45. luckyd_code/git/tools.py +85 -0
  46. luckyd_code/hooks.py +236 -0
  47. luckyd_code/indexer.py +280 -0
  48. luckyd_code/init.py +39 -0
  49. luckyd_code/keybindings.py +77 -0
  50. luckyd_code/log.py +55 -0
  51. luckyd_code/mcp/__init__.py +6 -0
  52. luckyd_code/mcp/client.py +184 -0
  53. luckyd_code/memory/__init__.py +19 -0
  54. luckyd_code/memory/manager.py +339 -0
  55. luckyd_code/metrics/__init__.py +5 -0
  56. luckyd_code/model_registry.py +131 -0
  57. luckyd_code/orchestrator.py +204 -0
  58. luckyd_code/permissions/__init__.py +1 -0
  59. luckyd_code/permissions/manager.py +103 -0
  60. luckyd_code/planner.py +361 -0
  61. luckyd_code/plugins.py +91 -0
  62. luckyd_code/py.typed +0 -0
  63. luckyd_code/retry.py +57 -0
  64. luckyd_code/router.py +417 -0
  65. luckyd_code/sandbox.py +156 -0
  66. luckyd_code/self_critique.py +2 -0
  67. luckyd_code/self_improve.py +274 -0
  68. luckyd_code/sessions.py +114 -0
  69. luckyd_code/settings.py +72 -0
  70. luckyd_code/skills/__init__.py +8 -0
  71. luckyd_code/skills/review.py +22 -0
  72. luckyd_code/skills/security.py +17 -0
  73. luckyd_code/tasks/__init__.py +1 -0
  74. luckyd_code/tasks/manager.py +102 -0
  75. luckyd_code/templates/icon-192.png +0 -0
  76. luckyd_code/templates/icon-512.png +0 -0
  77. luckyd_code/templates/index.html +1965 -0
  78. luckyd_code/templates/manifest.json +14 -0
  79. luckyd_code/templates/src/app.js +694 -0
  80. luckyd_code/templates/src/body.html +767 -0
  81. luckyd_code/templates/src/cdn.txt +2 -0
  82. luckyd_code/templates/src/style.css +474 -0
  83. luckyd_code/templates/sw.js +31 -0
  84. luckyd_code/templates/test.html +6 -0
  85. luckyd_code/themes.py +48 -0
  86. luckyd_code/tools/__init__.py +97 -0
  87. luckyd_code/tools/agent_tools.py +65 -0
  88. luckyd_code/tools/bash.py +360 -0
  89. luckyd_code/tools/brain_tools.py +137 -0
  90. luckyd_code/tools/browser.py +369 -0
  91. luckyd_code/tools/datetime_tool.py +34 -0
  92. luckyd_code/tools/dockerfile_gen.py +212 -0
  93. luckyd_code/tools/file_ops.py +381 -0
  94. luckyd_code/tools/game_gen.py +360 -0
  95. luckyd_code/tools/git_tools.py +130 -0
  96. luckyd_code/tools/git_worktree.py +63 -0
  97. luckyd_code/tools/path_validate.py +64 -0
  98. luckyd_code/tools/project_gen.py +187 -0
  99. luckyd_code/tools/readme_gen.py +227 -0
  100. luckyd_code/tools/registry.py +157 -0
  101. luckyd_code/tools/shell_detect.py +109 -0
  102. luckyd_code/tools/web.py +89 -0
  103. luckyd_code/tools/youtube.py +187 -0
  104. luckyd_code/tools_bridge.py +144 -0
  105. luckyd_code/undo.py +126 -0
  106. luckyd_code/update.py +60 -0
  107. luckyd_code/verify.py +360 -0
  108. luckyd_code/web_app.py +176 -0
  109. luckyd_code/web_routes/__init__.py +23 -0
  110. luckyd_code/web_routes/background.py +73 -0
  111. luckyd_code/web_routes/brain.py +109 -0
  112. luckyd_code/web_routes/cost.py +12 -0
  113. luckyd_code/web_routes/files.py +133 -0
  114. luckyd_code/web_routes/memories.py +94 -0
  115. luckyd_code/web_routes/misc.py +67 -0
  116. luckyd_code/web_routes/project.py +48 -0
  117. luckyd_code/web_routes/review.py +20 -0
  118. luckyd_code/web_routes/sessions.py +44 -0
  119. luckyd_code/web_routes/settings.py +43 -0
  120. luckyd_code/web_routes/static.py +70 -0
  121. luckyd_code/web_routes/update.py +19 -0
  122. luckyd_code/web_routes/ws.py +237 -0
  123. luckyd_code-1.2.2.dist-info/METADATA +297 -0
  124. luckyd_code-1.2.2.dist-info/RECORD +127 -0
  125. luckyd_code-1.2.2.dist-info/WHEEL +4 -0
  126. luckyd_code-1.2.2.dist-info/entry_points.txt +3 -0
  127. luckyd_code-1.2.2.dist-info/licenses/LICENSE +21 -0
luckyd_code/agent.py ADDED
@@ -0,0 +1,38 @@
1
+ """Sub-agent support — spawn child agents for parallel work."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, Dict, List, Optional
6
+
7
+ from .config import Config
8
+ from .context import ConversationContext
9
+ from .tools import get_default_registry
10
+ from ._agent_loop import run_agent_loop
11
+
12
+ __all__ = ["SubAgent"]
13
+
14
+
15
+ class SubAgent:
16
+ """A lightweight agent that runs independently with its own context."""
17
+
18
+ def __init__(self, config: Config, task: str,
19
+ tools: Optional[List[Dict[str, Any]]] = None):
20
+ self.config = config
21
+ self.task = task
22
+ self.tools = tools
23
+ self.context = ConversationContext(
24
+ config.system_prompt,
25
+ max_messages=20,
26
+ )
27
+ self.registry = get_default_registry()
28
+
29
+ def run(self) -> str:
30
+ """Run the sub-agent synchronously and return its final response."""
31
+ self.context.add_user_message(self.task)
32
+ return run_agent_loop(
33
+ context=self.context,
34
+ config=self.config,
35
+ tools=self.tools or self.registry.list_tools(),
36
+ registry=self.registry,
37
+ label="sub-agent",
38
+ )
@@ -0,0 +1,18 @@
1
+ """Codebase Health & Analytics - scan, measure, report, and track code quality."""
2
+
3
+ from .scanner import CodebaseScanner, scan_project
4
+ from .reporter import ReportGenerator, generate_report
5
+ from .trends import TrendTracker, snapshot_project, get_trends
6
+ from .smells import SmellDetector, detect_smells
7
+
8
+ __all__ = [
9
+ "CodebaseScanner",
10
+ "scan_project",
11
+ "ReportGenerator",
12
+ "generate_report",
13
+ "TrendTracker",
14
+ "snapshot_project",
15
+ "get_trends",
16
+ "SmellDetector",
17
+ "detect_smells",
18
+ ]
@@ -0,0 +1,195 @@
1
+ """Report generation for codebase analytics."""
2
+
3
+ import json
4
+ import time
5
+ from dataclasses import asdict
6
+ from pathlib import Path
7
+
8
+
9
+
10
+ def _format_size(b):
11
+ for u in ["B", "KB", "MB", "GB"]:
12
+ if b < 1024:
13
+ return f"{b:.1f} {u}"
14
+ b /= 1024
15
+ return f"{b:.1f} TB"
16
+
17
+
18
+ class ReportGenerator:
19
+ """Generate reports in multiple formats from scan results."""
20
+
21
+ def __init__(self, pm, smells=None):
22
+ self.pm = pm
23
+ self.smells = smells or []
24
+
25
+ def terminal(self):
26
+ pm = self.pm
27
+ l = []
28
+ l.append("")
29
+ l.append("=== CODEBASE HEALTH REPORT ===")
30
+ l.append(f"Project: {pm.root}")
31
+ l.append(f"Health Score: {pm.health_score}/100")
32
+ l.append("")
33
+ l.append("-- Summary --")
34
+ l.append(f"Source files: {pm.source_files}")
35
+ l.append(f"Total lines: {pm.total_lines:}")
36
+ l.append(f"Code lines: {pm.total_code_lines:}")
37
+ l.append(f"Total size: {_format_size(pm.total_size_bytes)}")
38
+ l.append(f"Functions: {pm.total_functions}")
39
+ l.append(f"Classes: {pm.total_classes}")
40
+ l.append(f"TODOs: {pm.total_todos}")
41
+ l.append(f"FIXMEs: {pm.total_fixmes}")
42
+ l.append(f"Avg complexity: {pm.avg_complexity:.1f}")
43
+
44
+ if pm.files_by_language:
45
+ l.append("")
46
+ l.append("-- Languages --")
47
+ for lang, count in sorted(pm.files_by_language.items(), key=lambda x: -x[1]):
48
+ l.append(f" {lang:<12} {count:>4} files")
49
+
50
+ top = sorted(pm.complexity_breakdown.items(), key=lambda x: -x[1])[:10]
51
+ if top:
52
+ l.append("")
53
+ l.append("-- Top Complexity --")
54
+ for fp, c in top:
55
+ l.append(f" {c:>4} {fp}")
56
+
57
+ if pm.todos:
58
+ l.append("")
59
+ l.append(f"-- TODOs & FIXMEs ({len(pm.todos)}) --")
60
+ for t in pm.todos[:20]:
61
+ l.append(f" [{t['kind']}] {t['file']}:{t['line']} {t['text'][:80]}")
62
+
63
+ if self.smells:
64
+ l.append("")
65
+ l.append(f"-- Code Smells ({len(self.smells)}) --")
66
+ for s in self.smells[:30]:
67
+ l.append(f" [{s.severity}] {s.kind}: {s.file}:{s.line}")
68
+ if s.message:
69
+ l.append(f" {s.message}")
70
+ if s.suggestion:
71
+ l.append(f" -> {s.suggestion}")
72
+
73
+ # Files needing attention
74
+ issues = []
75
+ for fm in pm.file_metrics:
76
+ score = 0
77
+ if fm.lines_code > 300:
78
+ score += 1
79
+ if fm.complexity > 20:
80
+ score += 2
81
+ if fm.todo_count > 5:
82
+ score += 1
83
+ if fm.fixme_count > 2:
84
+ score += 2
85
+ if score > 0:
86
+ issues.append((fm, score))
87
+
88
+ if issues:
89
+ issues.sort(key=lambda x: -x[1])
90
+ l.append("")
91
+ l.append("-- Files Needing Attention --")
92
+ for fm, score in issues[:10]:
93
+ l.append(
94
+ f" {fm.path} "
95
+ f"(lines={fm.lines_code}, complexity={fm.complexity}, "
96
+ f"todos={fm.todo_count}, fixmes={fm.fixme_count})"
97
+ )
98
+
99
+ l.append("")
100
+ return "\n".join(l)
101
+
102
+ def markdown(self):
103
+ pm = self.pm
104
+ m = []
105
+ m.append("# Codebase Health Report\n")
106
+ m.append(f"**Project:** `{pm.root}` ")
107
+ m.append(f"**Health Score: {pm.health_score}/100**\n")
108
+
109
+ m.append("## Summary\n")
110
+ m.append("| Metric | Value |")
111
+ m.append("|--------|-------|")
112
+ m.append(f"| Source files | {pm.source_files} |")
113
+ m.append(f"| Total lines | {pm.total_lines:} |")
114
+ m.append(f"| Code lines | {pm.total_code_lines:} |")
115
+ m.append(f"| Total size | {_format_size(pm.total_size_bytes)} |")
116
+ m.append(f"| Functions | {pm.total_functions} |")
117
+ m.append(f"| Classes | {pm.total_classes} |")
118
+ m.append(f"| Avg complexity | {pm.avg_complexity:.1f} |")
119
+ m.append(f"| TODOs | {pm.total_todos} |")
120
+ m.append(f"| FIXMEs | {pm.total_fixmes} |")
121
+ m.append("")
122
+
123
+ if pm.files_by_language:
124
+ m.append("## Languages\n")
125
+ for lang, cnt in sorted(pm.files_by_language.items(), key=lambda x: -x[1]):
126
+ m.append(f"- **{lang}**: {cnt} files")
127
+ m.append("")
128
+
129
+ top = sorted(pm.complexity_breakdown.items(), key=lambda x: -x[1])[:10]
130
+ if top:
131
+ m.append("## Top Complexity\n")
132
+ for fp, c in top:
133
+ m.append(f"- `{fp}`: {c}")
134
+ m.append("")
135
+
136
+ if pm.todos:
137
+ m.append(f"## TODOs & FIXMEs ({len(pm.todos)})\n")
138
+ m.append("| Kind | File | Line | Description |")
139
+ m.append("|------|------|------|-------------|")
140
+ for t in pm.todos[:30]:
141
+ m.append(f"| {t['kind']} | `{t['file']}` | {t['line']} | {t['text'][:100]} |")
142
+ m.append("")
143
+
144
+ if self.smells:
145
+ m.append(f"## Code Smells ({len(self.smells)})\n")
146
+ for s in self.smells[:30]:
147
+ m.append(f"- **{s.kind}** `{s.file}:{s.line}` ({s.severity}): {s.message}")
148
+
149
+ m.append("\n---\n*Report generated by DeepSeek Code Analytics*")
150
+ return "\n".join(m)
151
+
152
+ def json_report(self):
153
+ return json.dumps({
154
+ "project": self.pm.to_dict(),
155
+ "smells": [asdict(s) for s in self.smells],
156
+ "generated_at": time.time(),
157
+ }, indent=2)
158
+
159
+ def html(self):
160
+ md = self.markdown()
161
+ return (
162
+ "<!DOCTYPE html><html><head><meta charset=UTF-8>"
163
+ "<title>Health Report</title>"
164
+ "<style>body{font-family:sans-serif;max-width:900px;margin:auto;padding:2rem}"
165
+ "table{border-collapse:collapse;width:100%}"
166
+ "th,td{border:1px solid #ddd;padding:8px;text-align:left}"
167
+ "th{background:#f5f5f5}code{background:#f0f0f0;padding:2px 6px}</style>"
168
+ "</head><body><pre>" + md + "</pre></body></html>"
169
+ )
170
+
171
+
172
+ def generate_report(pm=None, smells=None, fmt="terminal", output_path=None):
173
+ """Generate a report from metrics. Scans if none provided."""
174
+ if pm is None:
175
+ from .scanner import scan_project
176
+ pm = scan_project()
177
+
178
+ if smells is None:
179
+ smells = []
180
+
181
+ gen = ReportGenerator(pm, smells)
182
+ report = gen.terminal()
183
+
184
+ if fmt == "markdown":
185
+ report = gen.markdown()
186
+ elif fmt == "json":
187
+ report = gen.json_report()
188
+ elif fmt == "html":
189
+ report = gen.html()
190
+
191
+ if output_path:
192
+ Path(output_path).write_text(report, encoding="utf-8")
193
+ return f"Report written to {output_path}"
194
+
195
+ return report
@@ -0,0 +1,443 @@
1
+ """Codebase scanner — collects metrics across an entire project tree."""
2
+
3
+ import ast
4
+ import os
5
+ import re
6
+ import time
7
+ from dataclasses import dataclass, field, asdict
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+ from ..log import get_logger
12
+
13
+ logger = get_logger()
14
+
15
+ # ── Constants ────────────────────────────────────────────────────────────────
16
+
17
+ SKIP_DIRS = {
18
+ ".git", "__pycache__", "node_modules", ".venv", "venv", "env",
19
+ ".tox", ".eggs", "dist", "build", ".next", ".nuxt", "target",
20
+ "vendor", ".bundle", ".claude", ".deepseek-code", ".vscode", ".idea",
21
+ ".mypy_cache", ".pytest_cache", ".ruff_cache", ".svn", ".hg",
22
+ "egg-info", ".pixi",
23
+ }
24
+
25
+ PYTHON_EXTENSIONS = {".py", ".pyw", ".pyi"}
26
+ JS_EXTENSIONS = {".js", ".jsx", ".mjs", ".cjs"}
27
+ TS_EXTENSIONS = {".ts", ".tsx", ".mts", ".cts"}
28
+ GO_EXTENSIONS = {".go"}
29
+ RUST_EXTENSIONS = {".rs"}
30
+ KNOWN_EXTENSIONS = PYTHON_EXTENSIONS | JS_EXTENSIONS | TS_EXTENSIONS | GO_EXTENSIONS | RUST_EXTENSIONS
31
+
32
+ TODO_RE = re.compile(r"(?:TODO|FIXME|HACK|XXX|BUG|OPTIMIZE|NOTE)[\s:]*(.*?)(?:\n|$)", re.IGNORECASE)
33
+ COMMENT_RE = re.compile(r"(#|//|/\*|<!--)\s*")
34
+ FUNC_RE = re.compile(
35
+ r"^\s*(?:def |async def |fn |func |function |pub fn |pub async fn )",
36
+ re.MULTILINE,
37
+ )
38
+ CLASS_RE = re.compile(
39
+ r"^\s*(?:class |struct |impl |enum |interface |type )",
40
+ re.MULTILINE,
41
+ )
42
+ RETURN_RE = re.compile(r"\breturn\b")
43
+
44
+
45
+ # ── Data classes ─────────────────────────────────────────────────────────────
46
+
47
+
48
+ @dataclass
49
+ class FileMetrics:
50
+ """Metrics for a single source file."""
51
+
52
+ path: str
53
+ language: str = ""
54
+ size_bytes: int = 0
55
+ lines_total: int = 0
56
+ lines_code: int = 0
57
+ lines_blank: int = 0
58
+ lines_comment: int = 0
59
+ todo_count: int = 0
60
+ fixme_count: int = 0
61
+ function_count: int = 0
62
+ class_count: int = 0
63
+ complexity: int = 0 # rough cyclomatic
64
+ max_indent: int = 0
65
+ imports_count: int = 0
66
+
67
+
68
+ @dataclass
69
+ class ProjectMetrics:
70
+ """Aggregate metrics for an entire project."""
71
+
72
+ root: str
73
+ scanned_at: float = field(default_factory=time.time)
74
+ total_files: int = 0
75
+ source_files: int = 0
76
+ total_lines: int = 0
77
+ total_code_lines: int = 0
78
+ total_todos: int = 0
79
+ total_fixmes: int = 0
80
+ total_functions: int = 0
81
+ total_classes: int = 0
82
+ total_complexity: int = 0
83
+ total_size_bytes: int = 0
84
+ file_metrics: list[FileMetrics] = field(default_factory=list)
85
+ todos: list[dict[str, Any]] = field(default_factory=list)
86
+ files_by_language: dict[str, int] = field(default_factory=dict)
87
+ complexity_breakdown: dict[str, int] = field(default_factory=dict)
88
+ smells: list[dict[str, Any]] = field(default_factory=list)
89
+
90
+ @property
91
+ def avg_complexity(self) -> float:
92
+ if self.total_functions == 0:
93
+ return 0.0
94
+ return self.total_complexity / self.total_functions
95
+
96
+ @property
97
+ def todo_rate(self) -> float:
98
+ if self.total_code_lines == 0:
99
+ return 0.0
100
+ return self.total_todos / (self.total_code_lines / 1000)
101
+
102
+ @property
103
+ def health_score(self) -> float:
104
+ """Heuristic health score from 0 (worst) to 100 (best)."""
105
+ score = 100.0
106
+ # Penalize high TODO rates
107
+ score -= min(15, self.todo_rate * 5)
108
+ # Penalize high complexity
109
+ score -= min(15, max(0, self.avg_complexity - 5) * 2)
110
+ # Penalize large files
111
+ if self.source_files > 0:
112
+ avg_lines = self.total_lines / self.source_files
113
+ score -= min(10, max(0, (avg_lines - 300) / 50))
114
+ # Penalize very large codebases with no organization
115
+ if self.source_files > 100 and len(self.files_by_language) < 2:
116
+ score -= 5
117
+ return max(0, round(score, 1))
118
+
119
+ def to_dict(self) -> dict[str, Any]:
120
+ d = asdict(self)
121
+ d["avg_complexity"] = self.avg_complexity
122
+ d["todo_rate"] = self.todo_rate
123
+ d["health_score"] = self.health_score
124
+ return d
125
+
126
+
127
+ # ── Scanner helpers ──────────────────────────────────────────────────────────
128
+
129
+
130
+ def _detect_language(file_path: Path) -> str:
131
+ suffix = file_path.suffix.lower()
132
+ if suffix in PYTHON_EXTENSIONS:
133
+ return "python"
134
+ if suffix in JS_EXTENSIONS:
135
+ return "javascript"
136
+ if suffix in TS_EXTENSIONS:
137
+ return "typescript"
138
+ if suffix in GO_EXTENSIONS:
139
+ return "go"
140
+ if suffix in RUST_EXTENSIONS:
141
+ return "rust"
142
+ if suffix in {".c", ".h"}:
143
+ return "c"
144
+ if suffix in {".cpp", ".cc", ".cxx", ".hpp", ".hxx"}:
145
+ return "c++"
146
+ if suffix in {".java"}:
147
+ return "java"
148
+ if suffix in {".rb"}:
149
+ return "ruby"
150
+ if suffix in {".php"}:
151
+ return "php"
152
+ if suffix in {".swift"}:
153
+ return "swift"
154
+ if suffix in {".kt", ".kts"}:
155
+ return "kotlin"
156
+ if suffix in {".sh", ".bash", ".zsh"}:
157
+ return "shell"
158
+ if suffix in {".md", ".mdx"}:
159
+ return "markdown"
160
+ if suffix in {".json"}:
161
+ return "json"
162
+ if suffix in {".yaml", ".yml"}:
163
+ return "yaml"
164
+ if suffix in {".toml"}:
165
+ return "toml"
166
+ if suffix in {".cfg", ".ini"}:
167
+ return "config"
168
+ return "unknown"
169
+
170
+
171
+ def _count_lines(content: str) -> tuple[int, int, int]:
172
+ """Count total, code, and blank lines."""
173
+ total = 0
174
+ blank = 0
175
+ for line in content.splitlines():
176
+ total += 1
177
+ stripped = line.strip()
178
+ if not stripped:
179
+ blank += 1
180
+ code = total - blank
181
+ return total, code, blank
182
+
183
+
184
+ def _count_comment_lines(content: str, language: str) -> int:
185
+ """Rough count of comment lines."""
186
+ count = 0
187
+ in_block = False
188
+ for line in content.splitlines():
189
+ stripped = line.strip()
190
+ if language in ("python", "ruby", "shell", "yaml", "toml", "config"):
191
+ if stripped.startswith("#"):
192
+ count += 1
193
+ elif language in ("javascript", "typescript", "go", "rust", "c", "c++",
194
+ "java", "kotlin", "swift", "php"):
195
+ if in_block:
196
+ count += 1
197
+ if "*/" in stripped:
198
+ in_block = False
199
+ continue
200
+ if stripped.startswith("//"):
201
+ count += 1
202
+ elif stripped.startswith("/*"):
203
+ count += 1
204
+ if "*/" not in stripped:
205
+ in_block = True
206
+ return count
207
+
208
+
209
+ def _python_complexity(tree: ast.AST) -> int:
210
+ """Cyclomatic complexity for Python via AST."""
211
+ complexity = 1 # base path
212
+ for node in ast.walk(tree):
213
+ if isinstance(node, (ast.If, ast.While, ast.For, ast.AsyncFor,
214
+ ast.ExceptHandler, ast.With, ast.AsyncWith,
215
+ ast.Assert)):
216
+ complexity += 1
217
+ elif isinstance(node, ast.BoolOp):
218
+ complexity += len(node.values) - 1
219
+ elif isinstance(node, ast.Match):
220
+ complexity += 1 # each case adds at walk level
221
+ elif isinstance(node, ast.match_case):
222
+ complexity += 1
223
+ return complexity
224
+
225
+
226
+ def _generic_complexity(content: str) -> int:
227
+ """Regex-based complexity approximation for non-Python languages."""
228
+ complexity = 1
229
+ for pattern in [
230
+ r"\bif\b", r"\belse if\b", r"\bwhile\b", r"\bfor\b",
231
+ r"\bcatch\b", r"\bexcept\b", r"\bmatch\b", r"\bswitch\b",
232
+ r"\bcase\b", r"\b&&\b", r"\b\|\|\b", r"\?\s*[^?:]",
233
+ ]:
234
+ complexity += len(re.findall(pattern, content, re.IGNORECASE))
235
+ return complexity
236
+
237
+
238
+ def _max_indent(content: str) -> int:
239
+ """Find the maximum indentation level."""
240
+ max_indent = 0
241
+ for line in content.splitlines():
242
+ if line.strip():
243
+ indent = len(line) - len(line.lstrip())
244
+ max_indent = max(max_indent, indent)
245
+ return max_indent
246
+
247
+
248
+ def _scan_python(path: Path, content: str) -> FileMetrics:
249
+ """Deep scan of a Python file."""
250
+ fm = FileMetrics(path=str(path), language="python")
251
+
252
+ # AST analysis
253
+ try:
254
+ tree = ast.parse(content, filename=str(path))
255
+ except SyntaxError:
256
+ tree = None
257
+
258
+ # Line counts
259
+ total, code, blank = _count_lines(content)
260
+ fm.lines_total = total
261
+ fm.lines_code = code
262
+ fm.lines_blank = blank
263
+ fm.lines_comment = _count_comment_lines(content, "python")
264
+
265
+ # Max indent
266
+ fm.max_indent = _max_indent(content)
267
+
268
+ # Functions and classes
269
+ if tree is not None:
270
+ for node in ast.walk(tree):
271
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
272
+ fm.function_count += 1
273
+ elif isinstance(node, ast.ClassDef):
274
+ fm.class_count += 1
275
+ elif isinstance(node, (ast.Import, ast.ImportFrom)):
276
+ fm.imports_count += 1
277
+
278
+ fm.complexity = _python_complexity(tree)
279
+ else:
280
+ # Fallback regex counts
281
+ fm.function_count = len(FUNC_RE.findall(content))
282
+ fm.class_count = len(CLASS_RE.findall(content))
283
+ fm.complexity = _generic_complexity(content)
284
+
285
+ return fm
286
+
287
+
288
+ def _scan_generic(path: Path, content: str) -> FileMetrics:
289
+ """Scan a non-Python source file with regex-based metrics."""
290
+ language = _detect_language(path)
291
+ fm = FileMetrics(path=str(path), language=language)
292
+
293
+ total, code, blank = _count_lines(content)
294
+ fm.lines_total = total
295
+ fm.lines_code = code
296
+ fm.lines_blank = blank
297
+ fm.lines_comment = _count_comment_lines(content, language)
298
+
299
+ fm.max_indent = _max_indent(content)
300
+ fm.function_count = len(FUNC_RE.findall(content))
301
+ fm.class_count = len(CLASS_RE.findall(content))
302
+ fm.complexity = _generic_complexity(content)
303
+ fm.imports_count = len(re.findall(r"^\s*(?:import|from|require|use|#include)\b",
304
+ content, re.MULTILINE))
305
+
306
+ return fm
307
+
308
+
309
+ def _extract_todos(content: str, file_path: str) -> list[dict[str, Any]]:
310
+ """Extract TODO/FIXME items with context."""
311
+ items = []
312
+ lines = content.splitlines()
313
+ for i, line in enumerate(lines):
314
+ m = TODO_RE.search(line)
315
+ if m:
316
+ kind = m.group(0).split(":")[0].split()[0].upper().strip(":")
317
+ if kind in ("NOTE", "OPTIMIZE"):
318
+ continue # less critical
319
+ items.append({
320
+ "file": file_path,
321
+ "line": i + 1,
322
+ "kind": kind,
323
+ "text": m.group(1).strip() if m.group(1) else "",
324
+ "context": line.strip()[:120],
325
+ })
326
+ return items
327
+
328
+
329
+ def _should_skip_dir(dirname: str) -> bool:
330
+ return dirname in SKIP_DIRS or dirname.startswith(".")
331
+
332
+
333
+ def _should_scan_file(file_path: Path) -> bool:
334
+ """Check if this file should be scanned."""
335
+ if file_path.suffix.lower() not in KNOWN_EXTENSIONS:
336
+ return False
337
+ # Skip very large files (>2MB)
338
+ try:
339
+ if file_path.stat().st_size > 2 * 1024 * 1024:
340
+ return False
341
+ except OSError:
342
+ return False
343
+ return True
344
+
345
+
346
+ # ── Main scanner ─────────────────────────────────────────────────────────────
347
+
348
+
349
+ class CodebaseScanner:
350
+ """Scan a directory tree and produce comprehensive metrics."""
351
+
352
+ def __init__(self, root: str | None = None):
353
+ self.root = Path(root) if root else Path.cwd()
354
+
355
+ def scan(self) -> ProjectMetrics:
356
+ """Run a full scan and return aggregated metrics."""
357
+ pm = ProjectMetrics(root=str(self.root))
358
+
359
+ for dirpath, dirnames, filenames in os.walk(self.root):
360
+ # Skip hidden / known dirs
361
+ dirnames[:] = [
362
+ d for d in dirnames
363
+ if not _should_skip_dir(d)
364
+ ]
365
+
366
+ for fname in sorted(filenames):
367
+ file_path = Path(dirpath) / fname
368
+ if not _should_scan_file(file_path):
369
+ continue
370
+
371
+ try:
372
+ content = file_path.read_text(encoding="utf-8", errors="replace")
373
+ except (OSError, UnicodeDecodeError):
374
+ continue
375
+
376
+ language = _detect_language(file_path)
377
+
378
+ # Choose scan strategy
379
+ if language == "python":
380
+ fm = _scan_python(file_path, content)
381
+ else:
382
+ fm = _scan_generic(file_path, content)
383
+
384
+ fm.size_bytes = file_path.stat().st_size
385
+ fm.todo_count = len(re.findall(r"\bTODO\b", content, re.IGNORECASE))
386
+ fm.fixme_count = len(re.findall(r"\bFIXME\b", content, re.IGNORECASE))
387
+
388
+ pm.file_metrics.append(fm)
389
+ pm.source_files += 1
390
+ pm.total_lines += fm.lines_total
391
+ pm.total_code_lines += fm.lines_code
392
+ pm.total_todos += fm.todo_count
393
+ pm.total_fixmes += fm.fixme_count
394
+ pm.total_functions += fm.function_count
395
+ pm.total_classes += fm.class_count
396
+ pm.total_complexity += fm.complexity
397
+ pm.total_size_bytes += fm.size_bytes
398
+
399
+ # Language breakdown
400
+ lang = fm.language
401
+ pm.files_by_language[lang] = pm.files_by_language.get(lang, 0) + 1
402
+
403
+ # Complexity breakdown
404
+ if fm.complexity > 0:
405
+ pm.complexity_breakdown[str(file_path)] = fm.complexity
406
+
407
+ # Extract TODOs
408
+ todos = _extract_todos(content, str(file_path))
409
+ pm.todos.extend(todos)
410
+
411
+ pm.total_files += len(filenames)
412
+
413
+ return pm
414
+
415
+ def scan_file(self, file_path: str) -> FileMetrics | None:
416
+ """Scan a single file and return its metrics."""
417
+ fp = Path(file_path)
418
+ if not fp.exists() or not fp.is_file():
419
+ return None
420
+ try:
421
+ content = fp.read_text(encoding="utf-8", errors="replace")
422
+ except (OSError, UnicodeDecodeError):
423
+ return None
424
+
425
+ language = _detect_language(fp)
426
+ if language == "python":
427
+ fm = _scan_python(fp, content)
428
+ else:
429
+ fm = _scan_generic(fp, content)
430
+
431
+ fm.size_bytes = fp.stat().st_size
432
+ fm.todo_count = len(re.findall(r"\bTODO\b", content, re.IGNORECASE))
433
+ fm.fixme_count = len(re.findall(r"\bFIXME\b", content, re.IGNORECASE))
434
+ return fm
435
+
436
+
437
+ # ── Convenience ──────────────────────────────────────────────────────────────
438
+
439
+
440
+ def scan_project(root: str | None = None) -> ProjectMetrics:
441
+ """Convenience: scan a project directory and return metrics."""
442
+ scanner = CodebaseScanner(root)
443
+ return scanner.scan()