patchwork-conventions 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- patchwork/__init__.py +10 -0
- patchwork/cli.py +336 -0
- patchwork/mcp/__init__.py +1 -0
- patchwork/mcp/server.py +442 -0
- patchwork/miners/__init__.py +1 -0
- patchwork/miners/api_patterns.py +204 -0
- patchwork/miners/ast_base.py +113 -0
- patchwork/miners/config_detector.py +273 -0
- patchwork/miners/error_handling.py +207 -0
- patchwork/miners/git_patterns.py +169 -0
- patchwork/miners/imports.py +158 -0
- patchwork/miners/naming.py +277 -0
- patchwork/miners/structure.py +204 -0
- patchwork/miners/testing.py +204 -0
- patchwork/output/__init__.py +1 -0
- patchwork/output/report.py +417 -0
- patchwork/scanner.py +162 -0
- patchwork_conventions-0.1.0.dist-info/METADATA +393 -0
- patchwork_conventions-0.1.0.dist-info/RECORD +23 -0
- patchwork_conventions-0.1.0.dist-info/WHEEL +5 -0
- patchwork_conventions-0.1.0.dist-info/entry_points.txt +2 -0
- patchwork_conventions-0.1.0.dist-info/licenses/LICENSE +21 -0
- patchwork_conventions-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,417 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ConventionReport — the aggregated result of a scan.
|
|
3
|
+
Can render to:
|
|
4
|
+
- Markdown (CONVENTIONS.md)
|
|
5
|
+
- AGENTS.md format
|
|
6
|
+
- JSON (for MCP/programmatic use)
|
|
7
|
+
- Rich terminal summary
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from datetime import datetime, timezone
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import TYPE_CHECKING
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from patchwork.miners.config_detector import ProjectConfig
|
|
19
|
+
from patchwork.miners.naming import NamingResult
|
|
20
|
+
from patchwork.miners.imports import ImportResult
|
|
21
|
+
from patchwork.miners.structure import StructureResult
|
|
22
|
+
from patchwork.miners.error_handling import ErrorResult
|
|
23
|
+
from patchwork.miners.testing import TestingResult
|
|
24
|
+
from patchwork.miners.api_patterns import APIResult
|
|
25
|
+
from patchwork.miners.git_patterns import GitResult
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class ConventionReport:
|
|
30
|
+
root: Path
|
|
31
|
+
config: "ProjectConfig | None" = None
|
|
32
|
+
file_count: int = 0
|
|
33
|
+
by_lang: dict[str, int] = field(default_factory=dict)
|
|
34
|
+
naming: dict[str, "NamingResult"] = field(default_factory=dict)
|
|
35
|
+
imports: dict[str, "ImportResult"] = field(default_factory=dict)
|
|
36
|
+
structure: "StructureResult | None" = None
|
|
37
|
+
errors: dict[str, "ErrorResult"] = field(default_factory=dict)
|
|
38
|
+
testing: dict[str, "TestingResult"] = field(default_factory=dict)
|
|
39
|
+
api: dict[str, "APIResult"] = field(default_factory=dict)
|
|
40
|
+
git: "GitResult | None" = None
|
|
41
|
+
elapsed: float = 0.0
|
|
42
|
+
|
|
43
|
+
# ── Rendering ─────────────────────────────────────────────────────────────
|
|
44
|
+
|
|
45
|
+
def to_markdown(self, *, agents_md: bool = False) -> str:
|
|
46
|
+
"""Render full CONVENTIONS.md (or AGENTS.md) content."""
|
|
47
|
+
lines: list[str] = []
|
|
48
|
+
filename = "AGENTS.md" if agents_md else "CONVENTIONS.md"
|
|
49
|
+
ts = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
|
50
|
+
|
|
51
|
+
lines += [
|
|
52
|
+
f"# {filename}",
|
|
53
|
+
f"> Auto-generated by [patchwork](https://github.com/yourusername/patchwork) on {ts} ",
|
|
54
|
+
f"> Scanned {self.file_count} files in {self.elapsed:.1f}s",
|
|
55
|
+
f"> **Do not edit manually** — run `patchwork update` to refresh",
|
|
56
|
+
"",
|
|
57
|
+
]
|
|
58
|
+
|
|
59
|
+
# Tech stack
|
|
60
|
+
if self.config:
|
|
61
|
+
lines += self._section_stack()
|
|
62
|
+
|
|
63
|
+
# Project structure
|
|
64
|
+
if self.structure:
|
|
65
|
+
lines += self._section_structure()
|
|
66
|
+
|
|
67
|
+
# Naming conventions (per language)
|
|
68
|
+
if self.naming:
|
|
69
|
+
lines += self._section_naming()
|
|
70
|
+
|
|
71
|
+
# Import conventions
|
|
72
|
+
if self.imports:
|
|
73
|
+
lines += self._section_imports()
|
|
74
|
+
|
|
75
|
+
# Error handling
|
|
76
|
+
if self.errors:
|
|
77
|
+
lines += self._section_errors()
|
|
78
|
+
|
|
79
|
+
# Testing conventions
|
|
80
|
+
if self.testing:
|
|
81
|
+
lines += self._section_testing()
|
|
82
|
+
|
|
83
|
+
# API patterns
|
|
84
|
+
if self.api:
|
|
85
|
+
lines += self._section_api()
|
|
86
|
+
|
|
87
|
+
# Git conventions
|
|
88
|
+
if self.git:
|
|
89
|
+
lines += self._section_git()
|
|
90
|
+
|
|
91
|
+
# Quick reference card (AI-optimised)
|
|
92
|
+
lines += self._section_quick_ref()
|
|
93
|
+
|
|
94
|
+
return "\n".join(lines)
|
|
95
|
+
|
|
96
|
+
def to_json(self) -> str:
|
|
97
|
+
"""Return JSON-serialisable dict of all findings."""
|
|
98
|
+
return json.dumps(self._to_dict(), indent=2, default=str)
|
|
99
|
+
|
|
100
|
+
def _to_dict(self) -> dict:
|
|
101
|
+
def _conv(obj):
|
|
102
|
+
if hasattr(obj, "__dataclass_fields__"):
|
|
103
|
+
return {k: _conv(v) for k, v in obj.__dict__.items()}
|
|
104
|
+
if isinstance(obj, dict):
|
|
105
|
+
return {k: _conv(v) for k, v in obj.items()}
|
|
106
|
+
if isinstance(obj, list):
|
|
107
|
+
return [_conv(v) for v in obj]
|
|
108
|
+
if isinstance(obj, Path):
|
|
109
|
+
return str(obj)
|
|
110
|
+
return obj
|
|
111
|
+
|
|
112
|
+
return {
|
|
113
|
+
"root": str(self.root),
|
|
114
|
+
"scanned_at": datetime.now(timezone.utc).isoformat(),
|
|
115
|
+
"file_count": self.file_count,
|
|
116
|
+
"by_lang": self.by_lang,
|
|
117
|
+
"elapsed_s": round(self.elapsed, 3),
|
|
118
|
+
"config": _conv(self.config),
|
|
119
|
+
"naming": _conv(self.naming),
|
|
120
|
+
"imports": _conv(self.imports),
|
|
121
|
+
"structure": _conv(self.structure),
|
|
122
|
+
"errors": _conv(self.errors),
|
|
123
|
+
"testing": _conv(self.testing),
|
|
124
|
+
"api": _conv(self.api),
|
|
125
|
+
"git": _conv(self.git),
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
# ── Section builders ──────────────────────────────────────────────────────
|
|
129
|
+
|
|
130
|
+
def _section_stack(self) -> list[str]:
|
|
131
|
+
cfg = self.config
|
|
132
|
+
lines = ["## Tech Stack", ""]
|
|
133
|
+
if cfg.name:
|
|
134
|
+
lines.append(f"**Project:** `{cfg.name}`" + (f" v{cfg.version}" if cfg.version else ""))
|
|
135
|
+
if cfg.language:
|
|
136
|
+
lines.append(f"**Language:** {cfg.language}")
|
|
137
|
+
if cfg.runtime:
|
|
138
|
+
lines.append(f"**Runtime:** {cfg.runtime}")
|
|
139
|
+
if cfg.package_manager:
|
|
140
|
+
lines.append(f"**Package Manager:** {cfg.package_manager}")
|
|
141
|
+
if cfg.frameworks:
|
|
142
|
+
lines.append(f"**Frameworks:** {', '.join(cfg.frameworks)}")
|
|
143
|
+
if cfg.linters:
|
|
144
|
+
lines.append(f"**Linters:** {', '.join(cfg.linters)}")
|
|
145
|
+
if cfg.formatters:
|
|
146
|
+
lines.append(f"**Formatters:** {', '.join(cfg.formatters)}")
|
|
147
|
+
if cfg.type_checker:
|
|
148
|
+
lines.append(f"**Type Checker:** {cfg.type_checker}")
|
|
149
|
+
if cfg.build_tool:
|
|
150
|
+
lines.append(f"**Build Tool:** {cfg.build_tool}")
|
|
151
|
+
if cfg.has_docker:
|
|
152
|
+
lines.append("**Docker:** yes")
|
|
153
|
+
if cfg.has_ci and cfg.ci_platform:
|
|
154
|
+
lines.append(f"**CI:** {cfg.ci_platform}")
|
|
155
|
+
if cfg.scripts:
|
|
156
|
+
lines.append("")
|
|
157
|
+
lines.append("**Key Scripts:**")
|
|
158
|
+
lines.append("```")
|
|
159
|
+
for name, cmd in cfg.scripts.items():
|
|
160
|
+
lines.append(f"{name}: {cmd}")
|
|
161
|
+
lines.append("```")
|
|
162
|
+
lines.append("")
|
|
163
|
+
return lines
|
|
164
|
+
|
|
165
|
+
def _section_structure(self) -> list[str]:
|
|
166
|
+
s = self.structure
|
|
167
|
+
lines = ["## Project Structure", ""]
|
|
168
|
+
if s.is_monorepo:
|
|
169
|
+
lines.append(f"**Layout:** Monorepo ({len(s.monorepo_packages)} packages)")
|
|
170
|
+
for pkg in s.monorepo_packages[:8]:
|
|
171
|
+
lines.append(f" - `{pkg}/`")
|
|
172
|
+
else:
|
|
173
|
+
if s.source_root:
|
|
174
|
+
lines.append(f"**Source root:** `{s.source_root}/`")
|
|
175
|
+
if s.organisation:
|
|
176
|
+
lines.append(f"**Organisation:** {s.organisation}-based")
|
|
177
|
+
if s.test_layout:
|
|
178
|
+
test_dirs = (", ".join(f"`{d}/`" for d in s.test_dirs)
|
|
179
|
+
if s.test_dirs else "co-located")
|
|
180
|
+
lines.append(f"**Tests:** {s.test_layout} ({test_dirs})")
|
|
181
|
+
if s.key_dirs:
|
|
182
|
+
lines.append("")
|
|
183
|
+
lines.append("**Key directories:**")
|
|
184
|
+
for d, role in s.key_dirs.items():
|
|
185
|
+
lines.append(f" - `{d}/` — {role}")
|
|
186
|
+
if s.notes:
|
|
187
|
+
for note in s.notes:
|
|
188
|
+
lines.append(f"> {note}")
|
|
189
|
+
lines.append("")
|
|
190
|
+
return lines
|
|
191
|
+
|
|
192
|
+
def _section_naming(self) -> list[str]:
|
|
193
|
+
lines = ["## Naming Conventions", ""]
|
|
194
|
+
for lang, nr in self.naming.items():
|
|
195
|
+
if not any([nr.functions, nr.classes, nr.variables]):
|
|
196
|
+
continue
|
|
197
|
+
lines.append(f"### {lang.capitalize()}", )
|
|
198
|
+
lines.append("")
|
|
199
|
+
|
|
200
|
+
if nr.functions:
|
|
201
|
+
conf_pct = int(nr.functions.confidence * 100)
|
|
202
|
+
lines.append(
|
|
203
|
+
f"- **Functions:** `{nr.functions.style}` "
|
|
204
|
+
f"({conf_pct}% consistent)"
|
|
205
|
+
)
|
|
206
|
+
if nr.functions.examples:
|
|
207
|
+
lines.append(
|
|
208
|
+
f" - Examples: {', '.join(f'`{e}`' for e in nr.functions.examples[:4])}"
|
|
209
|
+
)
|
|
210
|
+
if nr.functions.counter_examples and nr.functions.confidence < 0.9:
|
|
211
|
+
lines.append(
|
|
212
|
+
f" - Exceptions: {', '.join(f'`{e}`' for e in nr.functions.counter_examples[:2])}"
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
if nr.classes:
|
|
216
|
+
conf_pct = int(nr.classes.confidence * 100)
|
|
217
|
+
lines.append(
|
|
218
|
+
f"- **Classes:** `{nr.classes.style}` ({conf_pct}% consistent)"
|
|
219
|
+
)
|
|
220
|
+
if nr.classes.examples:
|
|
221
|
+
lines.append(
|
|
222
|
+
f" - Examples: {', '.join(f'`{e}`' for e in nr.classes.examples[:4])}"
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
if nr.variables:
|
|
226
|
+
lines.append(f"- **Variables:** `{nr.variables.style}`")
|
|
227
|
+
|
|
228
|
+
if nr.constants and nr.constants.examples:
|
|
229
|
+
lines.append(f"- **Constants:** `{nr.constants.style}`")
|
|
230
|
+
if nr.constants.examples:
|
|
231
|
+
lines.append(
|
|
232
|
+
f" - Examples: {', '.join(f'`{e}`' for e in nr.constants.examples[:3])}"
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
if nr.files:
|
|
236
|
+
lines.append(f"- **Files:** `{nr.files.style}`")
|
|
237
|
+
|
|
238
|
+
if nr.private_prefix:
|
|
239
|
+
lines.append(f"- **Private prefix:** `{nr.private_prefix}`")
|
|
240
|
+
|
|
241
|
+
if nr.test_prefix:
|
|
242
|
+
lines.append(f"- **Test functions:** prefix `{nr.test_prefix}`")
|
|
243
|
+
|
|
244
|
+
for note in nr.notes:
|
|
245
|
+
lines.append(f"> ⚠️ {note}")
|
|
246
|
+
|
|
247
|
+
lines.append("")
|
|
248
|
+
|
|
249
|
+
return lines
|
|
250
|
+
|
|
251
|
+
def _section_imports(self) -> list[str]:
|
|
252
|
+
lines = ["## Import Conventions", ""]
|
|
253
|
+
for lang, ir in self.imports.items():
|
|
254
|
+
lines.append(f"### {lang.capitalize()}")
|
|
255
|
+
lines.append("")
|
|
256
|
+
lines.append(f"- **Style:** {ir.style} imports")
|
|
257
|
+
if ir.aliases_used:
|
|
258
|
+
lines.append(f"- **Path aliases:** {', '.join(f'`{a}`' for a in ir.aliases_used)}")
|
|
259
|
+
if ir.destructuring:
|
|
260
|
+
lines.append(f"- **Import syntax:** {ir.destructuring}")
|
|
261
|
+
if ir.barrel_files:
|
|
262
|
+
lines.append(f"- **Barrel files:** `index.ts` re-exports are used")
|
|
263
|
+
if ir.common_third_party:
|
|
264
|
+
lines.append(
|
|
265
|
+
f"- **Key dependencies:** "
|
|
266
|
+
f"{', '.join(f'`{p}`' for p in ir.common_third_party[:6])}"
|
|
267
|
+
)
|
|
268
|
+
lines.append("")
|
|
269
|
+
return lines
|
|
270
|
+
|
|
271
|
+
def _section_errors(self) -> list[str]:
|
|
272
|
+
lines = ["## Error Handling", ""]
|
|
273
|
+
for lang, er in self.errors.items():
|
|
274
|
+
lines.append(f"### {lang.capitalize()}")
|
|
275
|
+
lines.append("")
|
|
276
|
+
lines.append(f"- **Pattern:** {er.primary_pattern}")
|
|
277
|
+
if er.propagation_style:
|
|
278
|
+
lines.append(f"- **Propagation:** {er.propagation_style}")
|
|
279
|
+
if er.logging_framework:
|
|
280
|
+
lines.append(f"- **Logging:** `{er.logging_framework}`")
|
|
281
|
+
if er.exception_naming:
|
|
282
|
+
lines.append(f"- **Custom exception naming:** {er.exception_naming}")
|
|
283
|
+
if er.custom_exceptions:
|
|
284
|
+
lines.append(
|
|
285
|
+
f"- **Custom exceptions:** "
|
|
286
|
+
f"{', '.join(f'`{e}`' for e in er.custom_exceptions[:6])}"
|
|
287
|
+
)
|
|
288
|
+
for note in er.notes:
|
|
289
|
+
lines.append(f"> {note}")
|
|
290
|
+
lines.append("")
|
|
291
|
+
return lines
|
|
292
|
+
|
|
293
|
+
def _section_testing(self) -> list[str]:
|
|
294
|
+
lines = ["## Testing Conventions", ""]
|
|
295
|
+
for lang, tr in self.testing.items():
|
|
296
|
+
if tr.test_file_count == 0 and not tr.framework:
|
|
297
|
+
continue
|
|
298
|
+
lines.append(f"### {lang.capitalize()}")
|
|
299
|
+
lines.append("")
|
|
300
|
+
if tr.framework:
|
|
301
|
+
lines.append(f"- **Framework:** {tr.framework}")
|
|
302
|
+
lines.append(
|
|
303
|
+
f"- **Coverage:** {tr.test_file_count} test files / "
|
|
304
|
+
f"{tr.source_file_count} source files "
|
|
305
|
+
f"({int(tr.test_ratio * 100)}% ratio)"
|
|
306
|
+
)
|
|
307
|
+
if tr.organisation:
|
|
308
|
+
lines.append(f"- **Organisation:** {tr.organisation}")
|
|
309
|
+
if tr.assertion_style:
|
|
310
|
+
lines.append(f"- **Assertions:** `{tr.assertion_style}(...)`")
|
|
311
|
+
if tr.has_coverage and tr.coverage_tool:
|
|
312
|
+
lines.append(f"- **Coverage tool:** `{tr.coverage_tool}`")
|
|
313
|
+
if tr.has_mocking and tr.mock_library:
|
|
314
|
+
lines.append(f"- **Mocking:** `{tr.mock_library}`")
|
|
315
|
+
flags = []
|
|
316
|
+
if tr.has_fixtures:
|
|
317
|
+
flags.append("fixtures")
|
|
318
|
+
if tr.has_factories:
|
|
319
|
+
flags.append("factories")
|
|
320
|
+
if flags:
|
|
321
|
+
lines.append(f"- **Patterns:** {', '.join(flags)}")
|
|
322
|
+
lines.append("")
|
|
323
|
+
return lines
|
|
324
|
+
|
|
325
|
+
def _section_api(self) -> list[str]:
|
|
326
|
+
lines = ["## API Patterns", ""]
|
|
327
|
+
for lang, ar in self.api.items():
|
|
328
|
+
lines.append(f"### {lang.capitalize()}")
|
|
329
|
+
lines.append("")
|
|
330
|
+
if ar.api_frameworks:
|
|
331
|
+
lines.append(f"- **Framework:** {', '.join(ar.api_frameworks)}")
|
|
332
|
+
if ar.async_pattern:
|
|
333
|
+
lines.append(f"- **Async style:** {ar.async_pattern}")
|
|
334
|
+
if ar.response_shape:
|
|
335
|
+
lines.append(f"- **Response shape:** `{ar.response_shape}`")
|
|
336
|
+
if ar.route_param_style:
|
|
337
|
+
lines.append(f"- **Route params:** {ar.route_param_style}")
|
|
338
|
+
if ar.orm:
|
|
339
|
+
lines.append(f"- **ORM/Query layer:** {ar.orm}")
|
|
340
|
+
if ar.http_client:
|
|
341
|
+
lines.append(f"- **HTTP client:** `{ar.http_client}`")
|
|
342
|
+
if ar.has_graphql:
|
|
343
|
+
lines.append("- **GraphQL:** yes")
|
|
344
|
+
if ar.has_grpc:
|
|
345
|
+
lines.append("- **gRPC/protobuf:** yes")
|
|
346
|
+
lines.append("")
|
|
347
|
+
return lines
|
|
348
|
+
|
|
349
|
+
def _section_git(self) -> list[str]:
|
|
350
|
+
if not self.git:
|
|
351
|
+
return []
|
|
352
|
+
g = self.git
|
|
353
|
+
lines = ["## Git Conventions", ""]
|
|
354
|
+
if g.commit_style:
|
|
355
|
+
lines.append(f"- **Commit style:** {g.commit_style}")
|
|
356
|
+
if g.commit_examples:
|
|
357
|
+
lines.append("- **Examples:**")
|
|
358
|
+
for ex in g.commit_examples[:3]:
|
|
359
|
+
lines.append(f" - `{ex}`")
|
|
360
|
+
if g.branch_style:
|
|
361
|
+
lines.append(f"- **Branch naming:** {g.branch_style}")
|
|
362
|
+
if g.avg_files_per_commit > 0:
|
|
363
|
+
lines.append(f"- **Avg files/commit:** {g.avg_files_per_commit}")
|
|
364
|
+
for note in g.notes:
|
|
365
|
+
lines.append(f"> {note}")
|
|
366
|
+
if g.cochange_pairs:
|
|
367
|
+
lines.append("")
|
|
368
|
+
lines.append("**Files that change together:**")
|
|
369
|
+
for a, b, count in g.cochange_pairs[:3]:
|
|
370
|
+
lines.append(f" - `{a}` ↔ `{b}` ({count}x)")
|
|
371
|
+
lines.append("")
|
|
372
|
+
return lines
|
|
373
|
+
|
|
374
|
+
def _section_quick_ref(self) -> list[str]:
|
|
375
|
+
"""AI-optimised quick reference — most useful for agents."""
|
|
376
|
+
lines = ["## Quick Reference", "", "<!-- AI agents: read this section first -->", ""]
|
|
377
|
+
lines.append("| Convention | Rule |")
|
|
378
|
+
lines.append("|---|---|")
|
|
379
|
+
|
|
380
|
+
for lang, nr in self.naming.items():
|
|
381
|
+
if nr.functions:
|
|
382
|
+
lines.append(
|
|
383
|
+
f"| {lang} functions | `{nr.functions.style}` "
|
|
384
|
+
f"({int(nr.functions.confidence * 100)}% consistent) |"
|
|
385
|
+
)
|
|
386
|
+
if nr.classes:
|
|
387
|
+
lines.append(f"| {lang} classes | `{nr.classes.style}` |")
|
|
388
|
+
|
|
389
|
+
if self.structure:
|
|
390
|
+
s = self.structure
|
|
391
|
+
if s.source_root:
|
|
392
|
+
lines.append(f"| Source root | `{s.source_root}/` |")
|
|
393
|
+
if s.test_layout:
|
|
394
|
+
lines.append(f"| Test layout | {s.test_layout} |")
|
|
395
|
+
if s.organisation:
|
|
396
|
+
lines.append(f"| Structure | {s.organisation}-based |")
|
|
397
|
+
|
|
398
|
+
for lang, er in self.errors.items():
|
|
399
|
+
lines.append(f"| {lang} errors | {er.primary_pattern} |")
|
|
400
|
+
|
|
401
|
+
for lang, tr in self.testing.items():
|
|
402
|
+
if tr.framework:
|
|
403
|
+
lines.append(f"| {lang} test framework | {tr.framework} |")
|
|
404
|
+
|
|
405
|
+
if self.git and self.git.commit_style:
|
|
406
|
+
lines.append(f"| Commit style | {self.git.commit_style} |")
|
|
407
|
+
|
|
408
|
+
if self.config:
|
|
409
|
+
if self.config.package_manager:
|
|
410
|
+
lines.append(f"| Package manager | {self.config.package_manager} |")
|
|
411
|
+
if self.config.linters:
|
|
412
|
+
lines.append(f"| Linters | {', '.join(self.config.linters)} |")
|
|
413
|
+
if self.config.formatters:
|
|
414
|
+
lines.append(f"| Formatters | {', '.join(self.config.formatters)} |")
|
|
415
|
+
|
|
416
|
+
lines.append("")
|
|
417
|
+
return lines
|
patchwork/scanner.py
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Core scanner: discovers files, dispatches language miners, aggregates results.
|
|
3
|
+
"""
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
import time
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Iterator
|
|
11
|
+
|
|
12
|
+
import pathspec
|
|
13
|
+
|
|
14
|
+
from patchwork.miners.naming import NamingMiner
|
|
15
|
+
from patchwork.miners.imports import ImportMiner
|
|
16
|
+
from patchwork.miners.structure import StructureMiner
|
|
17
|
+
from patchwork.miners.error_handling import ErrorHandlingMiner
|
|
18
|
+
from patchwork.miners.testing import TestingMiner
|
|
19
|
+
from patchwork.miners.api_patterns import APIPatternMiner
|
|
20
|
+
from patchwork.miners.git_patterns import GitPatternMiner
|
|
21
|
+
from patchwork.miners.config_detector import ConfigDetector
|
|
22
|
+
from patchwork.output.report import ConventionReport # noqa: E402 — keep at top
|
|
23
|
+
|
|
24
|
+
# File extensions → language tags
|
|
25
|
+
LANGUAGE_MAP: dict[str, str] = {
|
|
26
|
+
".py": "python",
|
|
27
|
+
".js": "javascript",
|
|
28
|
+
".mjs": "javascript",
|
|
29
|
+
".cjs": "javascript",
|
|
30
|
+
".jsx": "javascript",
|
|
31
|
+
".ts": "typescript",
|
|
32
|
+
".tsx": "typescript",
|
|
33
|
+
".go": "go",
|
|
34
|
+
".rs": "rust",
|
|
35
|
+
".java": "java",
|
|
36
|
+
".rb": "ruby",
|
|
37
|
+
".php": "php",
|
|
38
|
+
".cs": "csharp",
|
|
39
|
+
".cpp": "cpp",
|
|
40
|
+
".cc": "cpp",
|
|
41
|
+
".c": "c",
|
|
42
|
+
".h": "c",
|
|
43
|
+
".swift": "swift",
|
|
44
|
+
".kt": "kotlin",
|
|
45
|
+
".scala": "scala",
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
DEFAULT_IGNORE_PATTERNS = [
|
|
49
|
+
"node_modules/", ".git/", "__pycache__/", ".venv/", "venv/",
|
|
50
|
+
"dist/", "build/", ".next/", ".nuxt/", "target/",
|
|
51
|
+
"*.min.js", "*.min.css", "*.bundle.js",
|
|
52
|
+
"*.lock", "package-lock.json", "yarn.lock",
|
|
53
|
+
".mypy_cache/", ".pytest_cache/", ".ruff_cache/",
|
|
54
|
+
"*.egg-info/", "site-packages/",
|
|
55
|
+
"vendor/", "third_party/",
|
|
56
|
+
"*.pb.go", "*.generated.*", "*_gen.*",
|
|
57
|
+
]
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@dataclass
|
|
61
|
+
class ScanOptions:
|
|
62
|
+
root: Path
|
|
63
|
+
max_files: int = 500
|
|
64
|
+
max_file_size_kb: int = 500
|
|
65
|
+
include_git: bool = True
|
|
66
|
+
languages: list[str] = field(default_factory=list) # empty = all
|
|
67
|
+
extra_ignore: list[str] = field(default_factory=list)
|
|
68
|
+
verbose: bool = False
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _build_ignore_spec(root: Path, extra: list[str]) -> pathspec.PathSpec:
|
|
72
|
+
patterns = list(DEFAULT_IGNORE_PATTERNS) + extra
|
|
73
|
+
gitignore = root / ".gitignore"
|
|
74
|
+
if gitignore.exists():
|
|
75
|
+
with open(gitignore) as f:
|
|
76
|
+
patterns.extend(f.read().splitlines())
|
|
77
|
+
return pathspec.PathSpec.from_lines("gitignore", patterns)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _iter_source_files(
|
|
81
|
+
root: Path,
|
|
82
|
+
spec: pathspec.PathSpec,
|
|
83
|
+
languages: list[str],
|
|
84
|
+
max_files: int,
|
|
85
|
+
max_file_size_kb: int,
|
|
86
|
+
) -> Iterator[tuple[Path, str]]:
|
|
87
|
+
"""Yield (path, language) for every scannable source file."""
|
|
88
|
+
count = 0
|
|
89
|
+
for dirpath, dirnames, filenames in os.walk(root):
|
|
90
|
+
rel_dir = Path(dirpath).relative_to(root)
|
|
91
|
+
# Prune ignored directories in-place
|
|
92
|
+
dirnames[:] = [
|
|
93
|
+
d for d in dirnames
|
|
94
|
+
if not spec.match_file(str(rel_dir / d) + "/")
|
|
95
|
+
]
|
|
96
|
+
for fname in filenames:
|
|
97
|
+
fpath = Path(dirpath) / fname
|
|
98
|
+
rel = fpath.relative_to(root)
|
|
99
|
+
if spec.match_file(str(rel)):
|
|
100
|
+
continue
|
|
101
|
+
lang = LANGUAGE_MAP.get(fpath.suffix.lower())
|
|
102
|
+
if lang is None:
|
|
103
|
+
continue
|
|
104
|
+
if languages and lang not in languages:
|
|
105
|
+
continue
|
|
106
|
+
if fpath.stat().st_size > max_file_size_kb * 1024:
|
|
107
|
+
continue
|
|
108
|
+
yield fpath, lang
|
|
109
|
+
count += 1
|
|
110
|
+
if count >= max_files:
|
|
111
|
+
return
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def scan(opts: ScanOptions) -> ConventionReport:
|
|
115
|
+
"""
|
|
116
|
+
Full pipeline: discover → mine → aggregate → return ConventionReport.
|
|
117
|
+
"""
|
|
118
|
+
t0 = time.perf_counter()
|
|
119
|
+
root = opts.root.resolve()
|
|
120
|
+
|
|
121
|
+
# Detect project config/stack first (no AST needed)
|
|
122
|
+
config = ConfigDetector(root).detect()
|
|
123
|
+
|
|
124
|
+
# Discover all source files
|
|
125
|
+
spec = _build_ignore_spec(root, opts.extra_ignore)
|
|
126
|
+
files: list[tuple[Path, str]] = list(
|
|
127
|
+
_iter_source_files(root, spec, opts.languages, opts.max_files, opts.max_file_size_kb)
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
if not files:
|
|
131
|
+
return ConventionReport(root=root, config=config, elapsed=time.perf_counter() - t0)
|
|
132
|
+
|
|
133
|
+
# Group by language for efficient miner dispatch
|
|
134
|
+
by_lang: dict[str, list[Path]] = {}
|
|
135
|
+
for fpath, lang in files:
|
|
136
|
+
by_lang.setdefault(lang, []).append(fpath)
|
|
137
|
+
|
|
138
|
+
# Run all miners
|
|
139
|
+
naming = NamingMiner().mine(by_lang)
|
|
140
|
+
imports = ImportMiner().mine(by_lang)
|
|
141
|
+
structure = StructureMiner(root).mine(files)
|
|
142
|
+
errors = ErrorHandlingMiner().mine(by_lang)
|
|
143
|
+
testing = TestingMiner(root).mine(by_lang)
|
|
144
|
+
api = APIPatternMiner().mine(by_lang)
|
|
145
|
+
git = GitPatternMiner(root).mine() if opts.include_git else None
|
|
146
|
+
|
|
147
|
+
elapsed = time.perf_counter() - t0
|
|
148
|
+
|
|
149
|
+
return ConventionReport(
|
|
150
|
+
root=root,
|
|
151
|
+
config=config,
|
|
152
|
+
file_count=len(files),
|
|
153
|
+
by_lang={lang: len(paths) for lang, paths in by_lang.items()},
|
|
154
|
+
naming=naming,
|
|
155
|
+
imports=imports,
|
|
156
|
+
structure=structure,
|
|
157
|
+
errors=errors,
|
|
158
|
+
testing=testing,
|
|
159
|
+
api=api,
|
|
160
|
+
git=git,
|
|
161
|
+
elapsed=elapsed,
|
|
162
|
+
)
|