chorus-cli 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,465 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ mapper.py — Generate a codebase map for the coder agent.
4
+
5
+ Scans a project directory and produces a compact .coder/map.md that coder
6
+ loads into its system prompt, so it starts every session already knowing
7
+ the file structure, key modules, exports, dependencies, and test status.
8
+
9
+ Usage:
10
+ mapper.py [path] # defaults to cwd
11
+ mapper.py ~/my-project
12
+ """
13
+
14
+ import json
15
+ import os
16
+ import re
17
+ import sys
18
+ from datetime import datetime
19
+ from pathlib import Path
20
+
21
+ # ── Skip rules ──────────────────────────────────────────────────────────────
22
+
23
+ SKIP_DIRS = {
24
+ ".git", ".svn", ".hg", "node_modules", "__pycache__", ".venv", "venv",
25
+ ".tox", ".mypy_cache", ".pytest_cache", ".next", ".nuxt", ".output",
26
+ "dist", "build", "out", "target", "coverage", ".turbo", ".cache",
27
+ ".idea", ".vscode", "vendor", "bower_components", ".terraform",
28
+ }
29
+
30
+ SKIP_FILES = {
31
+ ".DS_Store", "Thumbs.db", "package-lock.json", "yarn.lock",
32
+ "pnpm-lock.yaml", "poetry.lock", "Pipfile.lock", "composer.lock",
33
+ "Cargo.lock", "go.sum",
34
+ }
35
+
36
+ BINARY_EXTENSIONS = {
37
+ ".png", ".jpg", ".jpeg", ".gif", ".ico", ".svg", ".webp", ".bmp",
38
+ ".woff", ".woff2", ".ttf", ".eot", ".otf",
39
+ ".zip", ".tar", ".gz", ".bz2", ".7z", ".rar",
40
+ ".pdf", ".doc", ".docx", ".xls", ".xlsx",
41
+ ".exe", ".dll", ".so", ".dylib", ".o", ".a",
42
+ ".pyc", ".pyo", ".class", ".jar", ".war",
43
+ ".mp3", ".mp4", ".avi", ".mov", ".wav",
44
+ ".sqlite", ".db", ".mdb",
45
+ }
46
+
47
+ CODE_EXTENSIONS = {
48
+ ".py", ".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs",
49
+ ".go", ".rs", ".rb", ".java", ".kt", ".scala",
50
+ ".c", ".cpp", ".h", ".hpp", ".cs",
51
+ ".php", ".swift", ".dart", ".lua", ".ex", ".exs",
52
+ ".vue", ".svelte",
53
+ }
54
+
55
+ MAX_FILE_SIZE = 200 * 1024 # skip files over 200KB for signature extraction
56
+ MAX_TREE_ENTRIES = 300
57
+ MAX_SIGNATURE_FILES = 150
58
+ MAX_SYMBOLS_PER_FILE = 25
59
+
60
+ # ── Signature extraction ────────────────────────────────────────────────────
61
+
62
+ PYTHON_PATTERNS = [
63
+ (re.compile(r"^class\s+(\w+)"), "class"),
64
+ (re.compile(r"^(?:async\s+)?def\s+(\w+)"), "fn"),
65
+ ]
66
+
67
+ JS_PATTERNS = [
68
+ (re.compile(r"^export\s+(?:default\s+)?(?:async\s+)?function\s+(\w+)"), "fn"),
69
+ (re.compile(r"^export\s+(?:default\s+)?class\s+(\w+)"), "class"),
70
+ (re.compile(r"^export\s+(?:const|let|var)\s+(\w+)"), "const"),
71
+ (re.compile(r"^export\s+interface\s+(\w+)"), "interface"),
72
+ (re.compile(r"^export\s+type\s+(\w+)"), "type"),
73
+ (re.compile(r"^export\s+enum\s+(\w+)"), "enum"),
74
+ (re.compile(r"^(?:async\s+)?function\s+(\w+)"), "fn"),
75
+ (re.compile(r"^class\s+(\w+)"), "class"),
76
+ (re.compile(r"^const\s+(\w+)\s*=\s*(?:\(|async|function|class|React)"), "const"),
77
+ ]
78
+
79
+ GO_PATTERNS = [
80
+ (re.compile(r"^func\s+(\w+)\s*\("), "fn"),
81
+ (re.compile(r"^func\s+\(\w+\s+\*?\w+\)\s+(\w+)\s*\("), "method"),
82
+ (re.compile(r"^type\s+(\w+)\s+struct"), "struct"),
83
+ (re.compile(r"^type\s+(\w+)\s+interface"), "interface"),
84
+ ]
85
+
86
+ RUST_PATTERNS = [
87
+ (re.compile(r"^pub\s+(?:async\s+)?fn\s+(\w+)"), "fn"),
88
+ (re.compile(r"^pub\s+struct\s+(\w+)"), "struct"),
89
+ (re.compile(r"^pub\s+enum\s+(\w+)"), "enum"),
90
+ (re.compile(r"^pub\s+trait\s+(\w+)"), "trait"),
91
+ (re.compile(r"^pub\s+type\s+(\w+)"), "type"),
92
+ ]
93
+
94
+ JAVA_PATTERNS = [
95
+ (re.compile(r"^(?:public|protected)\s+(?:static\s+)?(?:abstract\s+)?class\s+(\w+)"), "class"),
96
+ (re.compile(r"^(?:public|protected)\s+interface\s+(\w+)"), "interface"),
97
+ (re.compile(r"^(?:public|protected)\s+enum\s+(\w+)"), "enum"),
98
+ ]
99
+
100
+ PATTERN_MAP = {
101
+ ".py": PYTHON_PATTERNS,
102
+ ".js": JS_PATTERNS, ".jsx": JS_PATTERNS, ".mjs": JS_PATTERNS, ".cjs": JS_PATTERNS,
103
+ ".ts": JS_PATTERNS, ".tsx": JS_PATTERNS,
104
+ ".go": GO_PATTERNS,
105
+ ".rs": RUST_PATTERNS,
106
+ ".java": JAVA_PATTERNS, ".kt": JAVA_PATTERNS, ".scala": JAVA_PATTERNS,
107
+ }
108
+
109
+
110
+ def extract_signatures(filepath):
111
+ """Extract top-level symbols from a source file."""
112
+ ext = filepath.suffix.lower()
113
+ patterns = PATTERN_MAP.get(ext)
114
+ if not patterns:
115
+ return []
116
+
117
+ if filepath.stat().st_size > MAX_FILE_SIZE:
118
+ return []
119
+
120
+ try:
121
+ text = filepath.read_text(encoding="utf-8", errors="replace")
122
+ except (OSError, PermissionError):
123
+ return []
124
+
125
+ symbols = []
126
+ for line in text.splitlines():
127
+ if not line or line[0] in (" ", "\t"):
128
+ continue
129
+ for pattern, kind in patterns:
130
+ m = pattern.match(line)
131
+ if m:
132
+ name = m.group(1)
133
+ if name.startswith("_") and not name.startswith("__"):
134
+ break
135
+ symbols.append((name, kind))
136
+ if len(symbols) >= MAX_SYMBOLS_PER_FILE:
137
+ return symbols
138
+ break
139
+
140
+ return symbols
141
+
142
+ # ── File tree ───────────────────────────────────────────────────────────────
143
+
144
+ def format_size(size):
145
+ if size < 1024:
146
+ return f"{size}b"
147
+ elif size < 1024 * 1024:
148
+ return f"{size / 1024:.1f}kb"
149
+ else:
150
+ return f"{size / (1024 * 1024):.1f}mb"
151
+
152
+
153
+ def should_skip(name, is_dir=False):
154
+ if is_dir:
155
+ return name in SKIP_DIRS or name.startswith(".")
156
+ return name in SKIP_FILES or name.endswith(".map")
157
+
158
+
159
+ def build_tree(root, base_root, prefix="", depth=0, max_depth=5, counter=None):
160
+ """Build indented file tree lines."""
161
+ if counter is None:
162
+ counter = [0]
163
+
164
+ if counter[0] >= MAX_TREE_ENTRIES:
165
+ return []
166
+
167
+ try:
168
+ entries = sorted(root.iterdir(), key=lambda p: (p.is_file(), p.name.lower()))
169
+ except PermissionError:
170
+ return []
171
+
172
+ lines = []
173
+ for entry in entries:
174
+ if counter[0] >= MAX_TREE_ENTRIES:
175
+ lines.append(f"{prefix}... (truncated)")
176
+ break
177
+
178
+ if should_skip(entry.name, entry.is_dir()):
179
+ continue
180
+
181
+ if entry.is_dir():
182
+ lines.append(f"{prefix}{entry.name}/")
183
+ counter[0] += 1
184
+ if depth < max_depth:
185
+ lines.extend(build_tree(entry, base_root, prefix + " ", depth + 1, max_depth, counter))
186
+ elif entry.is_file():
187
+ ext = entry.suffix.lower()
188
+ if ext in BINARY_EXTENSIONS:
189
+ continue
190
+ size = format_size(entry.stat().st_size)
191
+ lines.append(f"{prefix}{entry.name} ({size})")
192
+ counter[0] += 1
193
+
194
+ return lines
195
+
196
+ # ── Project detection ───────────────────────────────────────────────────────
197
+
198
+ def detect_project(root):
199
+ """Detect project type, name, entry points, dependencies."""
200
+ info = {}
201
+
202
+ # package.json
203
+ pkg_path = root / "package.json"
204
+ if pkg_path.exists():
205
+ try:
206
+ data = json.loads(pkg_path.read_text())
207
+ info["type"] = data.get("type", "commonjs")
208
+ info["name"] = data.get("name", "")
209
+ info["description"] = data.get("description", "")
210
+ info["entry"] = data.get("main", "")
211
+ info["scripts"] = list(data.get("scripts", {}).keys())
212
+ info["deps"] = list(data.get("dependencies", {}).keys())
213
+ info["dev_deps"] = list(data.get("devDependencies", {}).keys())
214
+ info["framework"] = "Node.js"
215
+
216
+ # Detect framework
217
+ all_deps = info["deps"] + info["dev_deps"]
218
+ if "next" in all_deps:
219
+ info["framework"] = "Next.js"
220
+ elif "react" in all_deps:
221
+ info["framework"] = "React"
222
+ elif "vue" in all_deps:
223
+ info["framework"] = "Vue"
224
+ elif "svelte" in all_deps:
225
+ info["framework"] = "Svelte"
226
+ elif "express" in all_deps:
227
+ info["framework"] = "Express"
228
+ elif "fastify" in all_deps:
229
+ info["framework"] = "Fastify"
230
+ elif "angular" in [d.split("/")[-1] for d in all_deps]:
231
+ info["framework"] = "Angular"
232
+ except (json.JSONDecodeError, OSError):
233
+ pass
234
+
235
+ # requirements.txt / pyproject.toml
236
+ req_path = root / "requirements.txt"
237
+ pyp_path = root / "pyproject.toml"
238
+
239
+ if req_path.exists():
240
+ info.setdefault("framework", "Python")
241
+ try:
242
+ deps = [
243
+ line.split("==")[0].split(">=")[0].split("[")[0].strip()
244
+ for line in req_path.read_text().splitlines()
245
+ if line.strip() and not line.startswith("#") and not line.startswith("-")
246
+ ]
247
+ info["deps"] = deps
248
+ if "django" in [d.lower() for d in deps]:
249
+ info["framework"] = "Django"
250
+ elif "flask" in [d.lower() for d in deps]:
251
+ info["framework"] = "Flask"
252
+ elif "fastapi" in [d.lower() for d in deps]:
253
+ info["framework"] = "FastAPI"
254
+ except OSError:
255
+ pass
256
+
257
+ if pyp_path.exists():
258
+ info.setdefault("framework", "Python")
259
+
260
+ # Go
261
+ if (root / "go.mod").exists():
262
+ info.setdefault("framework", "Go")
263
+ try:
264
+ mod_text = (root / "go.mod").read_text()
265
+ mod_match = re.search(r"^module\s+(.+)$", mod_text, re.MULTILINE)
266
+ if mod_match:
267
+ info["name"] = mod_match.group(1).strip()
268
+ except OSError:
269
+ pass
270
+
271
+ # Rust
272
+ if (root / "Cargo.toml").exists():
273
+ info.setdefault("framework", "Rust")
274
+
275
+ return info
276
+
277
+
278
+ def detect_tests(root):
279
+ """Check if the project has substantive test coverage."""
280
+ test_dir_names = {"test", "tests", "__tests__", "spec", "specs", "test_suite"}
281
+ test_file_patterns = [
282
+ "test_*.py", "*_test.py", "*_test.go",
283
+ "*.test.js", "*.test.ts", "*.test.jsx", "*.test.tsx",
284
+ "*.spec.js", "*.spec.ts", "*.spec.jsx", "*.spec.tsx",
285
+ "*_spec.rb",
286
+ ]
287
+
288
+ has_test_dir = any((root / d).is_dir() for d in test_dir_names)
289
+
290
+ test_files = []
291
+ for pattern in test_file_patterns:
292
+ for match in root.rglob(pattern):
293
+ try:
294
+ rel = match.relative_to(root)
295
+ except ValueError:
296
+ continue
297
+ if any(part in SKIP_DIRS for part in rel.parts):
298
+ continue
299
+ test_files.append(str(rel))
300
+ if len(test_files) > 50:
301
+ break
302
+
303
+ if not test_files:
304
+ return "none", []
305
+ elif len(test_files) <= 3:
306
+ return "minimal", test_files
307
+ else:
308
+ return "substantive", test_files
309
+
310
+ # ── Map generation ──────────────────────────────────────────────────────────
311
+
312
+ def collect_code_files(root):
313
+ """Collect all code files for signature extraction."""
314
+ files = []
315
+ for fp in sorted(root.rglob("*")):
316
+ if not fp.is_file():
317
+ continue
318
+ try:
319
+ rel = fp.relative_to(root)
320
+ except ValueError:
321
+ continue
322
+ if any(part in SKIP_DIRS for part in rel.parts):
323
+ continue
324
+ if fp.suffix.lower() in CODE_EXTENSIONS:
325
+ files.append(fp)
326
+ if len(files) >= MAX_SIGNATURE_FILES:
327
+ break
328
+ return files
329
+
330
+
331
+ def generate_map(root):
332
+ """Generate the full codebase map."""
333
+ root = root.resolve()
334
+ lines = []
335
+
336
+ lines.append(f"CODEBASE MAP")
337
+ lines.append(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M')}")
338
+ lines.append(f"Root: {root}")
339
+ lines.append("")
340
+
341
+ # Project info
342
+ info = detect_project(root)
343
+ if info:
344
+ lines.append("PROJECT")
345
+ if info.get("framework"):
346
+ lines.append(f" Framework: {info['framework']}")
347
+ if info.get("name"):
348
+ lines.append(f" Name: {info['name']}")
349
+ if info.get("description"):
350
+ lines.append(f" Description: {info['description']}")
351
+ if info.get("entry"):
352
+ lines.append(f" Entry: {info['entry']}")
353
+ if info.get("scripts"):
354
+ lines.append(f" Scripts: {', '.join(info['scripts'])}")
355
+ if info.get("deps"):
356
+ dep_str = ", ".join(info["deps"][:30])
357
+ if len(info["deps"]) > 30:
358
+ dep_str += f" (+{len(info['deps']) - 30} more)"
359
+ lines.append(f" Dependencies: {dep_str}")
360
+ if info.get("dev_deps"):
361
+ dev_str = ", ".join(info["dev_deps"][:20])
362
+ if len(info["dev_deps"]) > 20:
363
+ dev_str += f" (+{len(info['dev_deps']) - 20} more)"
364
+ lines.append(f" Dev dependencies: {dev_str}")
365
+ lines.append("")
366
+
367
+ # Tests
368
+ test_status, test_files = detect_tests(root)
369
+ lines.append("TESTS")
370
+ if test_status == "none":
371
+ lines.append(" No tests found.")
372
+ elif test_status == "minimal":
373
+ lines.append(f" Minimal ({len(test_files)} test file(s))")
374
+ for tf in test_files:
375
+ lines.append(f" {tf}")
376
+ else:
377
+ lines.append(f" Substantive ({len(test_files)} test file(s))")
378
+ for tf in test_files[:10]:
379
+ lines.append(f" {tf}")
380
+ if len(test_files) > 10:
381
+ lines.append(f" ... and {len(test_files) - 10} more")
382
+ lines.append("")
383
+
384
+ # File tree
385
+ lines.append("STRUCTURE")
386
+ tree = build_tree(root, root)
387
+ for tl in tree:
388
+ lines.append(f" {tl}")
389
+ lines.append("")
390
+
391
+ # Signatures
392
+ code_files = collect_code_files(root)
393
+ if code_files:
394
+ lines.append("SYMBOLS")
395
+ for fp in code_files:
396
+ sigs = extract_signatures(fp)
397
+ if not sigs:
398
+ continue
399
+ try:
400
+ rel = str(fp.relative_to(root))
401
+ except ValueError:
402
+ rel = str(fp)
403
+ sig_strs = [f"{name} ({kind})" for name, kind in sigs]
404
+ lines.append(f" {rel}")
405
+ # Compact: fit on one or two lines
406
+ sig_line = ", ".join(sig_strs)
407
+ if len(sig_line) <= 90:
408
+ lines.append(f" {sig_line}")
409
+ else:
410
+ # Split into chunks
411
+ chunk = []
412
+ chunk_len = 0
413
+ for s in sig_strs:
414
+ if chunk_len + len(s) + 2 > 90 and chunk:
415
+ lines.append(f" {', '.join(chunk)}")
416
+ chunk = []
417
+ chunk_len = 0
418
+ chunk.append(s)
419
+ chunk_len += len(s) + 2
420
+ if chunk:
421
+ lines.append(f" {', '.join(chunk)}")
422
+ lines.append("")
423
+
424
+ return "\n".join(lines)
425
+
426
+ # ── CLI ─────────────────────────────────────────────────────────────────────
427
+
428
+ def main():
429
+ root = Path(sys.argv[1]).resolve() if len(sys.argv) > 1 else Path.cwd()
430
+
431
+ if not root.is_dir():
432
+ print(f"Error: {root} is not a directory", file=sys.stderr)
433
+ sys.exit(1)
434
+
435
+ map_content = generate_map(root)
436
+
437
+ out_dir = root / ".coder"
438
+ out_dir.mkdir(exist_ok=True)
439
+ out_file = out_dir / "map.md"
440
+ out_file.write_text(map_content, encoding="utf-8")
441
+
442
+ # Add .coder to .gitignore if not already there
443
+ gitignore = root / ".gitignore"
444
+ if gitignore.exists():
445
+ gi_text = gitignore.read_text()
446
+ if ".coder" not in gi_text and ".coder/" not in gi_text:
447
+ with open(gitignore, "a") as f:
448
+ f.write("\n.coder/\n")
449
+ print(f"Added .coder/ to .gitignore", file=sys.stderr)
450
+ elif (root / ".git").is_dir():
451
+ gitignore.write_text(".coder/\n")
452
+ print(f"Created .gitignore with .coder/", file=sys.stderr)
453
+
454
+ # Stats
455
+ line_count = map_content.count("\n")
456
+ size = len(map_content)
457
+ est_tokens = size // 4
458
+ print(f"Map: {out_file} ({line_count} lines, ~{est_tokens} tokens)", file=sys.stderr)
459
+
460
+ # Also print to stdout for piping
461
+ print(map_content)
462
+
463
+
464
+ if __name__ == "__main__":
465
+ main()