patchwork-conventions 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,417 @@
1
+ """
2
+ ConventionReport — the aggregated result of a scan.
3
+ Can render to:
4
+ - Markdown (CONVENTIONS.md)
5
+ - AGENTS.md format
6
+ - JSON (for MCP/programmatic use)
7
+ - Rich terminal summary
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ from dataclasses import dataclass, field
13
+ from datetime import datetime, timezone
14
+ from pathlib import Path
15
+ from typing import TYPE_CHECKING
16
+
17
+ if TYPE_CHECKING:
18
+ from patchwork.miners.config_detector import ProjectConfig
19
+ from patchwork.miners.naming import NamingResult
20
+ from patchwork.miners.imports import ImportResult
21
+ from patchwork.miners.structure import StructureResult
22
+ from patchwork.miners.error_handling import ErrorResult
23
+ from patchwork.miners.testing import TestingResult
24
+ from patchwork.miners.api_patterns import APIResult
25
+ from patchwork.miners.git_patterns import GitResult
26
+
27
+
28
+ @dataclass
29
+ class ConventionReport:
30
+ root: Path
31
+ config: "ProjectConfig | None" = None
32
+ file_count: int = 0
33
+ by_lang: dict[str, int] = field(default_factory=dict)
34
+ naming: dict[str, "NamingResult"] = field(default_factory=dict)
35
+ imports: dict[str, "ImportResult"] = field(default_factory=dict)
36
+ structure: "StructureResult | None" = None
37
+ errors: dict[str, "ErrorResult"] = field(default_factory=dict)
38
+ testing: dict[str, "TestingResult"] = field(default_factory=dict)
39
+ api: dict[str, "APIResult"] = field(default_factory=dict)
40
+ git: "GitResult | None" = None
41
+ elapsed: float = 0.0
42
+
43
+ # ── Rendering ─────────────────────────────────────────────────────────────
44
+
45
+ def to_markdown(self, *, agents_md: bool = False) -> str:
46
+ """Render full CONVENTIONS.md (or AGENTS.md) content."""
47
+ lines: list[str] = []
48
+ filename = "AGENTS.md" if agents_md else "CONVENTIONS.md"
49
+ ts = datetime.now(timezone.utc).strftime("%Y-%m-%d")
50
+
51
+ lines += [
52
+ f"# {filename}",
53
+ f"> Auto-generated by [patchwork](https://github.com/yourusername/patchwork) on {ts} ",
54
+ f"> Scanned {self.file_count} files in {self.elapsed:.1f}s",
55
+ f"> **Do not edit manually** — run `patchwork update` to refresh",
56
+ "",
57
+ ]
58
+
59
+ # Tech stack
60
+ if self.config:
61
+ lines += self._section_stack()
62
+
63
+ # Project structure
64
+ if self.structure:
65
+ lines += self._section_structure()
66
+
67
+ # Naming conventions (per language)
68
+ if self.naming:
69
+ lines += self._section_naming()
70
+
71
+ # Import conventions
72
+ if self.imports:
73
+ lines += self._section_imports()
74
+
75
+ # Error handling
76
+ if self.errors:
77
+ lines += self._section_errors()
78
+
79
+ # Testing conventions
80
+ if self.testing:
81
+ lines += self._section_testing()
82
+
83
+ # API patterns
84
+ if self.api:
85
+ lines += self._section_api()
86
+
87
+ # Git conventions
88
+ if self.git:
89
+ lines += self._section_git()
90
+
91
+ # Quick reference card (AI-optimised)
92
+ lines += self._section_quick_ref()
93
+
94
+ return "\n".join(lines)
95
+
96
+ def to_json(self) -> str:
97
+ """Return JSON-serialisable dict of all findings."""
98
+ return json.dumps(self._to_dict(), indent=2, default=str)
99
+
100
+ def _to_dict(self) -> dict:
101
+ def _conv(obj):
102
+ if hasattr(obj, "__dataclass_fields__"):
103
+ return {k: _conv(v) for k, v in obj.__dict__.items()}
104
+ if isinstance(obj, dict):
105
+ return {k: _conv(v) for k, v in obj.items()}
106
+ if isinstance(obj, list):
107
+ return [_conv(v) for v in obj]
108
+ if isinstance(obj, Path):
109
+ return str(obj)
110
+ return obj
111
+
112
+ return {
113
+ "root": str(self.root),
114
+ "scanned_at": datetime.now(timezone.utc).isoformat(),
115
+ "file_count": self.file_count,
116
+ "by_lang": self.by_lang,
117
+ "elapsed_s": round(self.elapsed, 3),
118
+ "config": _conv(self.config),
119
+ "naming": _conv(self.naming),
120
+ "imports": _conv(self.imports),
121
+ "structure": _conv(self.structure),
122
+ "errors": _conv(self.errors),
123
+ "testing": _conv(self.testing),
124
+ "api": _conv(self.api),
125
+ "git": _conv(self.git),
126
+ }
127
+
128
+ # ── Section builders ──────────────────────────────────────────────────────
129
+
130
+ def _section_stack(self) -> list[str]:
131
+ cfg = self.config
132
+ lines = ["## Tech Stack", ""]
133
+ if cfg.name:
134
+ lines.append(f"**Project:** `{cfg.name}`" + (f" v{cfg.version}" if cfg.version else ""))
135
+ if cfg.language:
136
+ lines.append(f"**Language:** {cfg.language}")
137
+ if cfg.runtime:
138
+ lines.append(f"**Runtime:** {cfg.runtime}")
139
+ if cfg.package_manager:
140
+ lines.append(f"**Package Manager:** {cfg.package_manager}")
141
+ if cfg.frameworks:
142
+ lines.append(f"**Frameworks:** {', '.join(cfg.frameworks)}")
143
+ if cfg.linters:
144
+ lines.append(f"**Linters:** {', '.join(cfg.linters)}")
145
+ if cfg.formatters:
146
+ lines.append(f"**Formatters:** {', '.join(cfg.formatters)}")
147
+ if cfg.type_checker:
148
+ lines.append(f"**Type Checker:** {cfg.type_checker}")
149
+ if cfg.build_tool:
150
+ lines.append(f"**Build Tool:** {cfg.build_tool}")
151
+ if cfg.has_docker:
152
+ lines.append("**Docker:** yes")
153
+ if cfg.has_ci and cfg.ci_platform:
154
+ lines.append(f"**CI:** {cfg.ci_platform}")
155
+ if cfg.scripts:
156
+ lines.append("")
157
+ lines.append("**Key Scripts:**")
158
+ lines.append("```")
159
+ for name, cmd in cfg.scripts.items():
160
+ lines.append(f"{name}: {cmd}")
161
+ lines.append("```")
162
+ lines.append("")
163
+ return lines
164
+
165
+ def _section_structure(self) -> list[str]:
166
+ s = self.structure
167
+ lines = ["## Project Structure", ""]
168
+ if s.is_monorepo:
169
+ lines.append(f"**Layout:** Monorepo ({len(s.monorepo_packages)} packages)")
170
+ for pkg in s.monorepo_packages[:8]:
171
+ lines.append(f" - `{pkg}/`")
172
+ else:
173
+ if s.source_root:
174
+ lines.append(f"**Source root:** `{s.source_root}/`")
175
+ if s.organisation:
176
+ lines.append(f"**Organisation:** {s.organisation}-based")
177
+ if s.test_layout:
178
+ test_dirs = (", ".join(f"`{d}/`" for d in s.test_dirs)
179
+ if s.test_dirs else "co-located")
180
+ lines.append(f"**Tests:** {s.test_layout} ({test_dirs})")
181
+ if s.key_dirs:
182
+ lines.append("")
183
+ lines.append("**Key directories:**")
184
+ for d, role in s.key_dirs.items():
185
+ lines.append(f" - `{d}/` — {role}")
186
+ if s.notes:
187
+ for note in s.notes:
188
+ lines.append(f"> {note}")
189
+ lines.append("")
190
+ return lines
191
+
192
+ def _section_naming(self) -> list[str]:
193
+ lines = ["## Naming Conventions", ""]
194
+ for lang, nr in self.naming.items():
195
+ if not any([nr.functions, nr.classes, nr.variables]):
196
+ continue
197
+ lines.append(f"### {lang.capitalize()}", )
198
+ lines.append("")
199
+
200
+ if nr.functions:
201
+ conf_pct = int(nr.functions.confidence * 100)
202
+ lines.append(
203
+ f"- **Functions:** `{nr.functions.style}` "
204
+ f"({conf_pct}% consistent)"
205
+ )
206
+ if nr.functions.examples:
207
+ lines.append(
208
+ f" - Examples: {', '.join(f'`{e}`' for e in nr.functions.examples[:4])}"
209
+ )
210
+ if nr.functions.counter_examples and nr.functions.confidence < 0.9:
211
+ lines.append(
212
+ f" - Exceptions: {', '.join(f'`{e}`' for e in nr.functions.counter_examples[:2])}"
213
+ )
214
+
215
+ if nr.classes:
216
+ conf_pct = int(nr.classes.confidence * 100)
217
+ lines.append(
218
+ f"- **Classes:** `{nr.classes.style}` ({conf_pct}% consistent)"
219
+ )
220
+ if nr.classes.examples:
221
+ lines.append(
222
+ f" - Examples: {', '.join(f'`{e}`' for e in nr.classes.examples[:4])}"
223
+ )
224
+
225
+ if nr.variables:
226
+ lines.append(f"- **Variables:** `{nr.variables.style}`")
227
+
228
+ if nr.constants and nr.constants.examples:
229
+ lines.append(f"- **Constants:** `{nr.constants.style}`")
230
+ if nr.constants.examples:
231
+ lines.append(
232
+ f" - Examples: {', '.join(f'`{e}`' for e in nr.constants.examples[:3])}"
233
+ )
234
+
235
+ if nr.files:
236
+ lines.append(f"- **Files:** `{nr.files.style}`")
237
+
238
+ if nr.private_prefix:
239
+ lines.append(f"- **Private prefix:** `{nr.private_prefix}`")
240
+
241
+ if nr.test_prefix:
242
+ lines.append(f"- **Test functions:** prefix `{nr.test_prefix}`")
243
+
244
+ for note in nr.notes:
245
+ lines.append(f"> ⚠️ {note}")
246
+
247
+ lines.append("")
248
+
249
+ return lines
250
+
251
+ def _section_imports(self) -> list[str]:
252
+ lines = ["## Import Conventions", ""]
253
+ for lang, ir in self.imports.items():
254
+ lines.append(f"### {lang.capitalize()}")
255
+ lines.append("")
256
+ lines.append(f"- **Style:** {ir.style} imports")
257
+ if ir.aliases_used:
258
+ lines.append(f"- **Path aliases:** {', '.join(f'`{a}`' for a in ir.aliases_used)}")
259
+ if ir.destructuring:
260
+ lines.append(f"- **Import syntax:** {ir.destructuring}")
261
+ if ir.barrel_files:
262
+ lines.append(f"- **Barrel files:** `index.ts` re-exports are used")
263
+ if ir.common_third_party:
264
+ lines.append(
265
+ f"- **Key dependencies:** "
266
+ f"{', '.join(f'`{p}`' for p in ir.common_third_party[:6])}"
267
+ )
268
+ lines.append("")
269
+ return lines
270
+
271
+ def _section_errors(self) -> list[str]:
272
+ lines = ["## Error Handling", ""]
273
+ for lang, er in self.errors.items():
274
+ lines.append(f"### {lang.capitalize()}")
275
+ lines.append("")
276
+ lines.append(f"- **Pattern:** {er.primary_pattern}")
277
+ if er.propagation_style:
278
+ lines.append(f"- **Propagation:** {er.propagation_style}")
279
+ if er.logging_framework:
280
+ lines.append(f"- **Logging:** `{er.logging_framework}`")
281
+ if er.exception_naming:
282
+ lines.append(f"- **Custom exception naming:** {er.exception_naming}")
283
+ if er.custom_exceptions:
284
+ lines.append(
285
+ f"- **Custom exceptions:** "
286
+ f"{', '.join(f'`{e}`' for e in er.custom_exceptions[:6])}"
287
+ )
288
+ for note in er.notes:
289
+ lines.append(f"> {note}")
290
+ lines.append("")
291
+ return lines
292
+
293
+ def _section_testing(self) -> list[str]:
294
+ lines = ["## Testing Conventions", ""]
295
+ for lang, tr in self.testing.items():
296
+ if tr.test_file_count == 0 and not tr.framework:
297
+ continue
298
+ lines.append(f"### {lang.capitalize()}")
299
+ lines.append("")
300
+ if tr.framework:
301
+ lines.append(f"- **Framework:** {tr.framework}")
302
+ lines.append(
303
+ f"- **Coverage:** {tr.test_file_count} test files / "
304
+ f"{tr.source_file_count} source files "
305
+ f"({int(tr.test_ratio * 100)}% ratio)"
306
+ )
307
+ if tr.organisation:
308
+ lines.append(f"- **Organisation:** {tr.organisation}")
309
+ if tr.assertion_style:
310
+ lines.append(f"- **Assertions:** `{tr.assertion_style}(...)`")
311
+ if tr.has_coverage and tr.coverage_tool:
312
+ lines.append(f"- **Coverage tool:** `{tr.coverage_tool}`")
313
+ if tr.has_mocking and tr.mock_library:
314
+ lines.append(f"- **Mocking:** `{tr.mock_library}`")
315
+ flags = []
316
+ if tr.has_fixtures:
317
+ flags.append("fixtures")
318
+ if tr.has_factories:
319
+ flags.append("factories")
320
+ if flags:
321
+ lines.append(f"- **Patterns:** {', '.join(flags)}")
322
+ lines.append("")
323
+ return lines
324
+
325
+ def _section_api(self) -> list[str]:
326
+ lines = ["## API Patterns", ""]
327
+ for lang, ar in self.api.items():
328
+ lines.append(f"### {lang.capitalize()}")
329
+ lines.append("")
330
+ if ar.api_frameworks:
331
+ lines.append(f"- **Framework:** {', '.join(ar.api_frameworks)}")
332
+ if ar.async_pattern:
333
+ lines.append(f"- **Async style:** {ar.async_pattern}")
334
+ if ar.response_shape:
335
+ lines.append(f"- **Response shape:** `{ar.response_shape}`")
336
+ if ar.route_param_style:
337
+ lines.append(f"- **Route params:** {ar.route_param_style}")
338
+ if ar.orm:
339
+ lines.append(f"- **ORM/Query layer:** {ar.orm}")
340
+ if ar.http_client:
341
+ lines.append(f"- **HTTP client:** `{ar.http_client}`")
342
+ if ar.has_graphql:
343
+ lines.append("- **GraphQL:** yes")
344
+ if ar.has_grpc:
345
+ lines.append("- **gRPC/protobuf:** yes")
346
+ lines.append("")
347
+ return lines
348
+
349
+ def _section_git(self) -> list[str]:
350
+ if not self.git:
351
+ return []
352
+ g = self.git
353
+ lines = ["## Git Conventions", ""]
354
+ if g.commit_style:
355
+ lines.append(f"- **Commit style:** {g.commit_style}")
356
+ if g.commit_examples:
357
+ lines.append("- **Examples:**")
358
+ for ex in g.commit_examples[:3]:
359
+ lines.append(f" - `{ex}`")
360
+ if g.branch_style:
361
+ lines.append(f"- **Branch naming:** {g.branch_style}")
362
+ if g.avg_files_per_commit > 0:
363
+ lines.append(f"- **Avg files/commit:** {g.avg_files_per_commit}")
364
+ for note in g.notes:
365
+ lines.append(f"> {note}")
366
+ if g.cochange_pairs:
367
+ lines.append("")
368
+ lines.append("**Files that change together:**")
369
+ for a, b, count in g.cochange_pairs[:3]:
370
+ lines.append(f" - `{a}` ↔ `{b}` ({count}x)")
371
+ lines.append("")
372
+ return lines
373
+
374
+ def _section_quick_ref(self) -> list[str]:
375
+ """AI-optimised quick reference — most useful for agents."""
376
+ lines = ["## Quick Reference", "", "<!-- AI agents: read this section first -->", ""]
377
+ lines.append("| Convention | Rule |")
378
+ lines.append("|---|---|")
379
+
380
+ for lang, nr in self.naming.items():
381
+ if nr.functions:
382
+ lines.append(
383
+ f"| {lang} functions | `{nr.functions.style}` "
384
+ f"({int(nr.functions.confidence * 100)}% consistent) |"
385
+ )
386
+ if nr.classes:
387
+ lines.append(f"| {lang} classes | `{nr.classes.style}` |")
388
+
389
+ if self.structure:
390
+ s = self.structure
391
+ if s.source_root:
392
+ lines.append(f"| Source root | `{s.source_root}/` |")
393
+ if s.test_layout:
394
+ lines.append(f"| Test layout | {s.test_layout} |")
395
+ if s.organisation:
396
+ lines.append(f"| Structure | {s.organisation}-based |")
397
+
398
+ for lang, er in self.errors.items():
399
+ lines.append(f"| {lang} errors | {er.primary_pattern} |")
400
+
401
+ for lang, tr in self.testing.items():
402
+ if tr.framework:
403
+ lines.append(f"| {lang} test framework | {tr.framework} |")
404
+
405
+ if self.git and self.git.commit_style:
406
+ lines.append(f"| Commit style | {self.git.commit_style} |")
407
+
408
+ if self.config:
409
+ if self.config.package_manager:
410
+ lines.append(f"| Package manager | {self.config.package_manager} |")
411
+ if self.config.linters:
412
+ lines.append(f"| Linters | {', '.join(self.config.linters)} |")
413
+ if self.config.formatters:
414
+ lines.append(f"| Formatters | {', '.join(self.config.formatters)} |")
415
+
416
+ lines.append("")
417
+ return lines
patchwork/scanner.py ADDED
@@ -0,0 +1,162 @@
1
+ """
2
+ Core scanner: discovers files, dispatches language miners, aggregates results.
3
+ """
4
+ from __future__ import annotations
5
+
6
+ import os
7
+ import time
8
+ from dataclasses import dataclass, field
9
+ from pathlib import Path
10
+ from typing import Iterator
11
+
12
+ import pathspec
13
+
14
+ from patchwork.miners.naming import NamingMiner
15
+ from patchwork.miners.imports import ImportMiner
16
+ from patchwork.miners.structure import StructureMiner
17
+ from patchwork.miners.error_handling import ErrorHandlingMiner
18
+ from patchwork.miners.testing import TestingMiner
19
+ from patchwork.miners.api_patterns import APIPatternMiner
20
+ from patchwork.miners.git_patterns import GitPatternMiner
21
+ from patchwork.miners.config_detector import ConfigDetector
22
+ from patchwork.output.report import ConventionReport # noqa: E402 — keep at top
23
+
24
+ # File extensions → language tags
25
+ LANGUAGE_MAP: dict[str, str] = {
26
+ ".py": "python",
27
+ ".js": "javascript",
28
+ ".mjs": "javascript",
29
+ ".cjs": "javascript",
30
+ ".jsx": "javascript",
31
+ ".ts": "typescript",
32
+ ".tsx": "typescript",
33
+ ".go": "go",
34
+ ".rs": "rust",
35
+ ".java": "java",
36
+ ".rb": "ruby",
37
+ ".php": "php",
38
+ ".cs": "csharp",
39
+ ".cpp": "cpp",
40
+ ".cc": "cpp",
41
+ ".c": "c",
42
+ ".h": "c",
43
+ ".swift": "swift",
44
+ ".kt": "kotlin",
45
+ ".scala": "scala",
46
+ }
47
+
48
+ DEFAULT_IGNORE_PATTERNS = [
49
+ "node_modules/", ".git/", "__pycache__/", ".venv/", "venv/",
50
+ "dist/", "build/", ".next/", ".nuxt/", "target/",
51
+ "*.min.js", "*.min.css", "*.bundle.js",
52
+ "*.lock", "package-lock.json", "yarn.lock",
53
+ ".mypy_cache/", ".pytest_cache/", ".ruff_cache/",
54
+ "*.egg-info/", "site-packages/",
55
+ "vendor/", "third_party/",
56
+ "*.pb.go", "*.generated.*", "*_gen.*",
57
+ ]
58
+
59
+
60
+ @dataclass
61
+ class ScanOptions:
62
+ root: Path
63
+ max_files: int = 500
64
+ max_file_size_kb: int = 500
65
+ include_git: bool = True
66
+ languages: list[str] = field(default_factory=list) # empty = all
67
+ extra_ignore: list[str] = field(default_factory=list)
68
+ verbose: bool = False
69
+
70
+
71
+ def _build_ignore_spec(root: Path, extra: list[str]) -> pathspec.PathSpec:
72
+ patterns = list(DEFAULT_IGNORE_PATTERNS) + extra
73
+ gitignore = root / ".gitignore"
74
+ if gitignore.exists():
75
+ with open(gitignore) as f:
76
+ patterns.extend(f.read().splitlines())
77
+ return pathspec.PathSpec.from_lines("gitignore", patterns)
78
+
79
+
80
+ def _iter_source_files(
81
+ root: Path,
82
+ spec: pathspec.PathSpec,
83
+ languages: list[str],
84
+ max_files: int,
85
+ max_file_size_kb: int,
86
+ ) -> Iterator[tuple[Path, str]]:
87
+ """Yield (path, language) for every scannable source file."""
88
+ count = 0
89
+ for dirpath, dirnames, filenames in os.walk(root):
90
+ rel_dir = Path(dirpath).relative_to(root)
91
+ # Prune ignored directories in-place
92
+ dirnames[:] = [
93
+ d for d in dirnames
94
+ if not spec.match_file(str(rel_dir / d) + "/")
95
+ ]
96
+ for fname in filenames:
97
+ fpath = Path(dirpath) / fname
98
+ rel = fpath.relative_to(root)
99
+ if spec.match_file(str(rel)):
100
+ continue
101
+ lang = LANGUAGE_MAP.get(fpath.suffix.lower())
102
+ if lang is None:
103
+ continue
104
+ if languages and lang not in languages:
105
+ continue
106
+ if fpath.stat().st_size > max_file_size_kb * 1024:
107
+ continue
108
+ yield fpath, lang
109
+ count += 1
110
+ if count >= max_files:
111
+ return
112
+
113
+
114
+ def scan(opts: ScanOptions) -> ConventionReport:
115
+ """
116
+ Full pipeline: discover → mine → aggregate → return ConventionReport.
117
+ """
118
+ t0 = time.perf_counter()
119
+ root = opts.root.resolve()
120
+
121
+ # Detect project config/stack first (no AST needed)
122
+ config = ConfigDetector(root).detect()
123
+
124
+ # Discover all source files
125
+ spec = _build_ignore_spec(root, opts.extra_ignore)
126
+ files: list[tuple[Path, str]] = list(
127
+ _iter_source_files(root, spec, opts.languages, opts.max_files, opts.max_file_size_kb)
128
+ )
129
+
130
+ if not files:
131
+ return ConventionReport(root=root, config=config, elapsed=time.perf_counter() - t0)
132
+
133
+ # Group by language for efficient miner dispatch
134
+ by_lang: dict[str, list[Path]] = {}
135
+ for fpath, lang in files:
136
+ by_lang.setdefault(lang, []).append(fpath)
137
+
138
+ # Run all miners
139
+ naming = NamingMiner().mine(by_lang)
140
+ imports = ImportMiner().mine(by_lang)
141
+ structure = StructureMiner(root).mine(files)
142
+ errors = ErrorHandlingMiner().mine(by_lang)
143
+ testing = TestingMiner(root).mine(by_lang)
144
+ api = APIPatternMiner().mine(by_lang)
145
+ git = GitPatternMiner(root).mine() if opts.include_git else None
146
+
147
+ elapsed = time.perf_counter() - t0
148
+
149
+ return ConventionReport(
150
+ root=root,
151
+ config=config,
152
+ file_count=len(files),
153
+ by_lang={lang: len(paths) for lang, paths in by_lang.items()},
154
+ naming=naming,
155
+ imports=imports,
156
+ structure=structure,
157
+ errors=errors,
158
+ testing=testing,
159
+ api=api,
160
+ git=git,
161
+ elapsed=elapsed,
162
+ )