repo-notes 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. repo_notes/__init__.py +3 -0
  2. repo_notes/__main__.py +11 -0
  3. repo_notes/agents_generator.py +326 -0
  4. repo_notes/cache.py +157 -0
  5. repo_notes/cli.py +431 -0
  6. repo_notes/config.py +168 -0
  7. repo_notes/detectors/__init__.py +6 -0
  8. repo_notes/detectors/base.py +41 -0
  9. repo_notes/detectors/c_cpp.py +14 -0
  10. repo_notes/detectors/docker.py +29 -0
  11. repo_notes/detectors/go.py +14 -0
  12. repo_notes/detectors/java.py +14 -0
  13. repo_notes/detectors/javascript.py +25 -0
  14. repo_notes/detectors/kotlin.py +14 -0
  15. repo_notes/detectors/php.py +14 -0
  16. repo_notes/detectors/python.py +14 -0
  17. repo_notes/detectors/r_lang.py +14 -0
  18. repo_notes/detectors/registry.py +90 -0
  19. repo_notes/detectors/ruby.py +14 -0
  20. repo_notes/detectors/rust.py +14 -0
  21. repo_notes/detectors/shell.py +14 -0
  22. repo_notes/detectors/sql.py +14 -0
  23. repo_notes/detectors/swift.py +14 -0
  24. repo_notes/extractors/__init__.py +56 -0
  25. repo_notes/extractors/api_endpoints.py +113 -0
  26. repo_notes/extractors/architecture.py +492 -0
  27. repo_notes/extractors/cicd.py +95 -0
  28. repo_notes/extractors/complexity.py +160 -0
  29. repo_notes/extractors/database.py +68 -0
  30. repo_notes/extractors/dependencies.py +116 -0
  31. repo_notes/extractors/duplicates.py +68 -0
  32. repo_notes/extractors/env_vars.py +54 -0
  33. repo_notes/extractors/git.py +74 -0
  34. repo_notes/extractors/project_intelligence.py +460 -0
  35. repo_notes/extractors/readme_data.py +188 -0
  36. repo_notes/extractors/scripts.py +88 -0
  37. repo_notes/extractors/security.py +162 -0
  38. repo_notes/extractors/stats.py +61 -0
  39. repo_notes/extractors/structure.py +68 -0
  40. repo_notes/extractors/todos.py +169 -0
  41. repo_notes/extractors/type_coverage.py +74 -0
  42. repo_notes/file_cache.py +16 -0
  43. repo_notes/generator.py +793 -0
  44. repo_notes/html_generator.py +639 -0
  45. repo_notes/html_templates.py +204 -0
  46. repo_notes/readme_generator.py +176 -0
  47. repo_notes/scanner.py +171 -0
  48. repo_notes-0.3.0.dist-info/METADATA +319 -0
  49. repo_notes-0.3.0.dist-info/RECORD +53 -0
  50. repo_notes-0.3.0.dist-info/WHEEL +5 -0
  51. repo_notes-0.3.0.dist-info/entry_points.txt +2 -0
  52. repo_notes-0.3.0.dist-info/licenses/LICENSE +21 -0
  53. repo_notes-0.3.0.dist-info/top_level.txt +1 -0
repo_notes/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """repo-notes: Scan a code repository and generate useful project notes."""
2
+
3
+ __version__ = "0.3.0"
repo_notes/__main__.py ADDED
@@ -0,0 +1,11 @@
1
+ """Entry point for repo-notes."""
2
+
3
+ from repo_notes.cli import cli
4
+
5
+
6
+ def main():
7
+ cli()
8
+
9
+
10
+ if __name__ == "__main__":
11
+ main()
@@ -0,0 +1,326 @@
1
+ """AGENTS.md generator — compact repo overview for coding agents."""
2
+
3
+ from pathlib import Path
4
+
5
+ from repo_notes.extractors.architecture import ArchitectureResult
6
+ from repo_notes.extractors.readme_data import ReadmeData
7
+ from repo_notes.extractors.scripts import ScriptsResult
8
+ from repo_notes.extractors.stats import StatsResult
9
+ from repo_notes.extractors.structure import StructureResult
10
+
11
+ _FILE_DESCRIPTIONS: dict[str, str] = {
12
+ "README.md": "Project readme",
13
+ "CONTRIBUTING.md": "Contribution guide",
14
+ "LICENSE": "License",
15
+ "pyproject.toml": "Python project configuration",
16
+ "Cargo.toml": "Rust project configuration",
17
+ "go.mod": "Go module definition",
18
+ "package.json": "Node.js project manifest",
19
+ "Dockerfile": "Docker image definition",
20
+ "docker-compose.yml": "Docker Compose configuration",
21
+ "Makefile": "Build automation",
22
+ "justfile": "Build recipes",
23
+ ".env.example": "Environment variable template",
24
+ ".gitignore": "Git ignore rules",
25
+ ".github/": "CI/CD workflows",
26
+ }
27
+
28
+ _DIRECTORY_DESCRIPTIONS: dict[str, str] = {
29
+ "src": "Source code",
30
+ "source": "Source code",
31
+ "lib": "Library code",
32
+ "app": "Application code",
33
+ "cmd": "Command-line entry points",
34
+ "pkg": "Library packages",
35
+ "internal": "Internal packages",
36
+ "api": "API definitions",
37
+ "tests": "Test suite",
38
+ "test": "Test suite",
39
+ "spec": "Test suite",
40
+ "docs": "Documentation",
41
+ "doc": "Documentation",
42
+ "config": "Configuration",
43
+ "cfg": "Configuration",
44
+ "scripts": "Build and utility scripts",
45
+ "bin": "Executables",
46
+ "dist": "Build output",
47
+ "build": "Build artifacts",
48
+ "assets": "Static assets",
49
+ "public": "Static assets",
50
+ "static": "Static assets",
51
+ "migrations": "Database migrations",
52
+ "db": "Database schemas and migrations",
53
+ "docker": "Docker configuration",
54
+ "examples": "Usage examples",
55
+ "benchmarks": "Performance benchmarks",
56
+ "benchmark": "Performance benchmarks",
57
+ "deploy": "Deployment configuration",
58
+ "infra": "Infrastructure as code",
59
+ "proto": "Protobuf definitions",
60
+ "graphql": "GraphQL schema",
61
+ "web": "Web frontend",
62
+ "ui": "UI components",
63
+ "mobile": "Mobile app code",
64
+ }
65
+
66
+
67
+ def _format_language_name(language: str) -> str:
68
+ if not language:
69
+ return language
70
+ if any(ch.isupper() for ch in language):
71
+ return language
72
+ return language.title()
73
+
74
+
75
+ def _visible_languages(by_language: dict[str, dict]) -> list[str]:
76
+ languages = sorted(by_language.keys(), key=lambda lang: -by_language[lang]["lines"])
77
+ return [
78
+ _format_language_name(language)
79
+ for language in languages
80
+ if language and language.lower() != "unknown"
81
+ ]
82
+
83
+
84
+ class AgentsGenerator:
85
+ """Generates a compact AGENTS.md focused on agent-oriented repo understanding."""
86
+
87
+ def __init__(self, root: Path):
88
+ self.root = root
89
+
90
+ def generate(
91
+ self,
92
+ readme_data: ReadmeData | None = None,
93
+ structure: StructureResult | None = None,
94
+ stats: StatsResult | None = None,
95
+ scripts: ScriptsResult | None = None,
96
+ arch: ArchitectureResult | None = None,
97
+ ) -> str:
98
+ sections = [
99
+ self._render_header(readme_data),
100
+ self._render_tech_stack(stats, readme_data),
101
+ self._render_structure(structure),
102
+ self._render_repo_map(structure),
103
+ self._render_architecture(arch),
104
+ self._render_commands(readme_data, scripts),
105
+ self._render_howto(stats, readme_data),
106
+ ]
107
+ return "\n\n".join(s.rstrip() for s in sections if s and s.strip()) + "\n"
108
+
109
+ def _render_header(self, data: ReadmeData | None) -> str:
110
+ name = data.name if data and data.name else self.root.name
111
+ desc = data.description if data and data.description else ""
112
+ header = f"# {name}"
113
+ if desc:
114
+ header += f"\n\n{desc}"
115
+ return header
116
+
117
+ def _render_tech_stack(self, stats: StatsResult | None, data: ReadmeData | None) -> str:
118
+ lines = ["## Tech Stack", ""]
119
+ has_any = False
120
+
121
+ if stats and stats.by_language:
122
+ langs = _visible_languages(stats.by_language)
123
+ if langs:
124
+ lines.append(f"- **Languages**: {', '.join(langs)}")
125
+ has_any = True
126
+
127
+ if data:
128
+ if data.python_requires:
129
+ lines.append(f"- **Python**: {data.python_requires}")
130
+ has_any = True
131
+ if data.runtime_deps:
132
+ deps = data.runtime_deps[:12]
133
+ label = ", ".join(deps)
134
+ if len(data.runtime_deps) > 12:
135
+ label += f" and {len(data.runtime_deps) - 12} more"
136
+ lines.append(f"- **Runtime deps**: {label}")
137
+ has_any = True
138
+
139
+ if not has_any:
140
+ return ""
141
+
142
+ lines.append("")
143
+ return "\n".join(lines)
144
+
145
+ def _render_structure(self, structure: StructureResult | None) -> str:
146
+ if not structure or not structure.tree:
147
+ return ""
148
+ lines = [
149
+ "## Project Structure",
150
+ "",
151
+ "```text",
152
+ structure.tree,
153
+ "```",
154
+ "",
155
+ f"*{structure.file_count} files, {structure.dir_count} directories*",
156
+ ]
157
+ return "\n".join(lines)
158
+
159
+ def _render_repo_map(self, structure: StructureResult | None) -> str:
160
+ if not structure or not structure.tree:
161
+ return ""
162
+ children = self._parse_tree_children(structure.tree)
163
+ if not children:
164
+ return ""
165
+ lines = ["## Repository Map", ""]
166
+ for child in sorted(children):
167
+ is_dir = child.endswith("/")
168
+ name = child.rstrip("/")
169
+ if is_dir:
170
+ desc = _DIRECTORY_DESCRIPTIONS.get(name, f"{name}/ directory")
171
+ lines.append(f"- **{name}/** — {desc}")
172
+ else:
173
+ desc = _FILE_DESCRIPTIONS.get(name)
174
+ if desc:
175
+ lines.append(f"- **{name}** — {desc}")
176
+ else:
177
+ lines.append(f"- **{name}**")
178
+ lines.append("")
179
+ return "\n".join(lines)
180
+
181
+ @staticmethod
182
+ def _parse_tree_children(tree_text: str) -> list[str]:
183
+ children = []
184
+ non_empty_lines = [line for line in tree_text.split("\n") if line.strip()]
185
+ for index, line in enumerate(non_empty_lines):
186
+ # Skip the root line, for example "repo-name/".
187
+ if index == 0:
188
+ continue
189
+ if not line.startswith(" "):
190
+ child = line.strip()
191
+ if child:
192
+ children.append(child)
193
+ return children
194
+
195
+ def _render_architecture(self, arch: ArchitectureResult | None) -> str:
196
+ if not arch:
197
+ return ""
198
+ lines = ["## Architecture", ""]
199
+
200
+ if arch.import_graph:
201
+ import_count = sum(len(v) for v in arch.import_graph.values())
202
+ m_label = "module" if len(arch.import_graph) == 1 else "modules"
203
+ v_label = "other module" if import_count == 1 else "other modules"
204
+ v_verb = "imports" if len(arch.import_graph) == 1 else "import"
205
+ lines.append(f"- **{len(arch.import_graph)}** {m_label} {v_verb} **{import_count}** {v_label}")
206
+
207
+ if arch.coupling_hotspots:
208
+ top = arch.coupling_hotspots[:3]
209
+ for h in top:
210
+ lines.append(f"- `{h.file}` — {h.total} connections ({h.incoming} in, {h.outgoing} out)")
211
+
212
+ if arch.layers:
213
+ layer_labels = ", ".join(sorted(arch.layers.keys()))
214
+ lines.append(f"- **{len(arch.layers)}** layers detected: {layer_labels}")
215
+
216
+ if arch.entry_points:
217
+ eps = ", ".join(str(f) for f in arch.entry_points)
218
+ lines.append(f"- Entry points: {eps}")
219
+
220
+ if arch.circular_deps:
221
+ lines.append(f"- **{len(arch.circular_deps)}** circular dependenc{'y' if len(arch.circular_deps) == 1 else 'ies'} detected")
222
+ for cycle in arch.circular_deps:
223
+ arrow_chain = " → ".join(cycle)
224
+ lines.append(f" - `{arrow_chain}`")
225
+
226
+ if arch.missing_test_candidates:
227
+ top = arch.missing_test_candidates[:3]
228
+ for c in top:
229
+ lines.append(f"- Missing-test: `{c.file}` — {c.reason}")
230
+
231
+ if arch.dead_code_candidates:
232
+ top = arch.dead_code_candidates[:3]
233
+ for c in top:
234
+ lines.append(f"- Low-reachability: `{c.file}` — {c.reason}")
235
+
236
+ lines.append("")
237
+ return "\n".join(lines)
238
+
239
+ def _render_commands(self, data: ReadmeData | None, scripts: ScriptsResult | None) -> str:
240
+ lines = ["## Key Commands", ""]
241
+
242
+ if data and data.install_cmd:
243
+ lines.append("```bash")
244
+ lines.append(f"# Install\n{data.install_cmd}")
245
+ lines.append("```")
246
+
247
+ if data and data.dev_install_cmd:
248
+ lines.append("```bash")
249
+ lines.append(f"# Dev install\n{data.dev_install_cmd}")
250
+ lines.append("```")
251
+
252
+ test_cmds: list[str] = []
253
+ build_cmds: list[str] = []
254
+ if scripts:
255
+ for name, cmd in (scripts.package_json or {}).items():
256
+ lower = name.lower()
257
+ if "test" in lower:
258
+ test_cmds.append(cmd)
259
+ elif "build" in lower:
260
+ build_cmds.append(cmd)
261
+ for t in (scripts.makefile_targets or []):
262
+ lower = t.lower()
263
+ if "test" in lower:
264
+ test_cmds.append(f"make {t}")
265
+ elif "build" in lower:
266
+ build_cmds.append(f"make {t}")
267
+ for r in (scripts.justfile_recipes or []):
268
+ lower = r.lower()
269
+ if "test" in lower:
270
+ test_cmds.append(f"just {r}")
271
+ elif "build" in lower:
272
+ build_cmds.append(f"just {r}")
273
+ for name, cmd in (scripts.pyproject_scripts or {}).items():
274
+ lower = name.lower()
275
+ if "test" in lower:
276
+ test_cmds.append(cmd)
277
+ elif "build" in lower:
278
+ build_cmds.append(cmd)
279
+
280
+ if test_cmds:
281
+ lines.append("```bash")
282
+ lines.append(f"# Test\n{test_cmds[0]}")
283
+ lines.append("```")
284
+
285
+ if build_cmds:
286
+ lines.append("```bash")
287
+ lines.append(f"# Build\n{build_cmds[0]}")
288
+ lines.append("```")
289
+ if not test_cmds:
290
+ lines.append("```bash")
291
+ lines.append("# Test")
292
+ lines.append("pytest")
293
+ lines.append("")
294
+ lines.append("# Lint")
295
+ lines.append("ruff check .")
296
+ lines.append("```")
297
+
298
+ lines.append("")
299
+ return "\n".join(lines)
300
+
301
+ def _render_howto(self, stats: StatsResult | None, data: ReadmeData | None) -> str:
302
+ lines = ["## How to Work on This Project", ""]
303
+
304
+ primaries: list[str] = []
305
+ if stats and stats.by_language:
306
+ primaries = _visible_languages(stats.by_language)[:3]
307
+
308
+ if primaries:
309
+ label = ", ".join(primaries)
310
+ version = f" (requires Python {data.python_requires})" if data and data.python_requires else ""
311
+ lines.append(f"This is a **{label}**{version} project.")
312
+ elif data and data.python_requires:
313
+ lines.append(f"This is a Python {data.python_requires} project.")
314
+ else:
315
+ lines.append("This is a project.")
316
+ lines.append("")
317
+ lines.append("Before committing changes, run tests and lint to verify nothing is broken.")
318
+ lines.append("")
319
+
320
+ if data and data.install_cmd:
321
+ lines.append("Generate project notes with `repo-notes .`.")
322
+ lines.append("")
323
+
324
+ lines.append("_Generated by repo-notes_")
325
+
326
+ return "\n".join(lines)
repo_notes/cache.py ADDED
@@ -0,0 +1,157 @@
1
+ """Incremental cache to skip re-scanning unchanged projects.
2
+
3
+ Persists file metadata (mtime, size, content hash) in .repo-notes-cache.json.
4
+ On subsequent runs, if no files changed and no config changed, skip the scan.
5
+ """
6
+
7
+ import hashlib
8
+ import json
9
+ from dataclasses import dataclass
10
+ from pathlib import Path
11
+
12
+ from repo_notes.config import Config
13
+ from repo_notes.scanner import FileInfo, scan_directory
14
+
15
+ CACHE_VERSION = 1
16
+ CACHE_FILENAME = ".repo-notes-cache.json"
17
+
18
+
19
+ @dataclass(frozen=True)
20
+ class FileState:
21
+ mtime: float
22
+ size: int
23
+ hash: str # sha256 hex digest
24
+
25
+
26
+ def _content_hash(path: Path) -> str:
27
+ try:
28
+ h = hashlib.sha256()
29
+ with path.open("rb") as f:
30
+ for chunk in iter(lambda: f.read(65536), b""):
31
+ h.update(chunk)
32
+ return h.hexdigest()
33
+ except OSError:
34
+ return ""
35
+
36
+
37
+ def _config_hash(cfg: Config) -> str:
38
+ h = hashlib.sha256()
39
+ h.update(str(cfg.include_hidden).encode())
40
+ h.update(str(cfg.min_file_size).encode())
41
+ for pat in sorted(cfg.exclude_patterns):
42
+ h.update(pat.encode())
43
+ for name in sorted(cfg.detectors.enabled):
44
+ h.update(name.encode())
45
+ ext = cfg.extractors
46
+ h.update("".join([
47
+ str(ext.structure), str(ext.project_intelligence), str(ext.stats),
48
+ str(ext.dependencies), str(ext.git), str(ext.architecture),
49
+ str(ext.security),
50
+ ]).encode())
51
+ h.update(str(cfg.security.entropy_threshold).encode())
52
+ h.update(str(cfg.structure.max_depth).encode())
53
+ return h.hexdigest()
54
+
55
+
56
+ class CacheManager:
57
+ """Manages the incremental scan cache (.repo-notes-cache.json)."""
58
+
59
+ def __init__(self, root: Path, cfg: Config):
60
+ self.root = root.resolve()
61
+ self.cache_path = self.root / CACHE_FILENAME
62
+ self.cfg = cfg
63
+ self._data: dict | None = None
64
+
65
+ def load(self) -> dict:
66
+ if self._data is not None:
67
+ return self._data
68
+ try:
69
+ raw = self.cache_path.read_text(encoding="utf-8")
70
+ self._data = json.loads(raw)
71
+ if self._data.get("version") != CACHE_VERSION:
72
+ self._data = {}
73
+ except (FileNotFoundError, json.JSONDecodeError, OSError):
74
+ self._data = {}
75
+ return self._data
76
+
77
+ def save(self, file_states: dict[str, FileState]) -> None:
78
+ data = {
79
+ "version": CACHE_VERSION,
80
+ "config_hash": _config_hash(self.cfg),
81
+ "files": {
82
+ path: {
83
+ "mtime": fs.mtime,
84
+ "size": fs.size,
85
+ "hash": fs.hash,
86
+ }
87
+ for path, fs in sorted(file_states.items())
88
+ },
89
+ }
90
+ tmp = self.cache_path.with_suffix(".tmp")
91
+ try:
92
+ tmp.write_text(json.dumps(data, indent=2), encoding="utf-8")
93
+ tmp.replace(self.cache_path)
94
+ except OSError:
95
+ pass # cache write failures are non-fatal
96
+
97
+ def is_valid(self) -> bool:
98
+ data = self.load()
99
+ if not data:
100
+ return False
101
+ if data.get("config_hash") != _config_hash(self.cfg):
102
+ return False
103
+ return True
104
+
105
+ def compute_current_states(self) -> dict[str, FileState]:
106
+ states: dict[str, FileState] = {}
107
+ for fi in scan_directory(
108
+ self.root,
109
+ include_hidden=self.cfg.include_hidden,
110
+ extra_excludes=self.cfg.exclude_patterns,
111
+ min_file_size=self.cfg.min_file_size,
112
+ ):
113
+ key = fi.relative_path.as_posix()
114
+ try:
115
+ stat = fi.path.stat()
116
+ states[key] = FileState(
117
+ mtime=stat.st_mtime_ns,
118
+ size=stat.st_size,
119
+ hash=_content_hash(fi.path),
120
+ )
121
+ except OSError:
122
+ continue
123
+ return states
124
+
125
+ def has_changes(self, current: dict[str, FileState]) -> bool:
126
+ cached = self.load().get("files", {})
127
+ if set(current.keys()) != set(cached.keys()):
128
+ return True
129
+ for key, fs in current.items():
130
+ entry = cached.get(key)
131
+ if entry is None:
132
+ return True
133
+ if entry["mtime"] != fs.mtime or entry["size"] != fs.size:
134
+ return True
135
+ return False
136
+
137
+ def save_from_file_infos(self, files: list[FileInfo]) -> None:
138
+ states: dict[str, FileState] = {}
139
+ for fi in files:
140
+ key = fi.relative_path.as_posix()
141
+ try:
142
+ stat = fi.path.stat()
143
+ states[key] = FileState(
144
+ mtime=stat.st_mtime_ns,
145
+ size=stat.st_size,
146
+ hash=_content_hash(fi.path),
147
+ )
148
+ except OSError:
149
+ continue
150
+ self.save(states)
151
+
152
+ def clear(self) -> None:
153
+ self._data = None
154
+ try:
155
+ self.cache_path.unlink()
156
+ except FileNotFoundError:
157
+ pass