patchwork-conventions 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- patchwork/__init__.py +10 -0
- patchwork/cli.py +336 -0
- patchwork/mcp/__init__.py +1 -0
- patchwork/mcp/server.py +442 -0
- patchwork/miners/__init__.py +1 -0
- patchwork/miners/api_patterns.py +204 -0
- patchwork/miners/ast_base.py +113 -0
- patchwork/miners/config_detector.py +273 -0
- patchwork/miners/error_handling.py +207 -0
- patchwork/miners/git_patterns.py +169 -0
- patchwork/miners/imports.py +158 -0
- patchwork/miners/naming.py +277 -0
- patchwork/miners/structure.py +204 -0
- patchwork/miners/testing.py +204 -0
- patchwork/output/__init__.py +1 -0
- patchwork/output/report.py +417 -0
- patchwork/scanner.py +162 -0
- patchwork_conventions-0.1.0.dist-info/METADATA +393 -0
- patchwork_conventions-0.1.0.dist-info/RECORD +23 -0
- patchwork_conventions-0.1.0.dist-info/WHEEL +5 -0
- patchwork_conventions-0.1.0.dist-info/entry_points.txt +2 -0
- patchwork_conventions-0.1.0.dist-info/licenses/LICENSE +21 -0
- patchwork_conventions-0.1.0.dist-info/top_level.txt +1 -0
patchwork/mcp/server.py
ADDED
|
@@ -0,0 +1,442 @@
|
|
|
1
|
+
"""
|
|
2
|
+
patchwork MCP server — exposes convention mining as MCP tools.
|
|
3
|
+
|
|
4
|
+
Tools:
|
|
5
|
+
patchwork_scan Full scan → returns CONVENTIONS.md text
|
|
6
|
+
patchwork_naming Naming conventions for a specific language
|
|
7
|
+
patchwork_structure Project structure summary
|
|
8
|
+
patchwork_errors Error handling conventions
|
|
9
|
+
patchwork_testing Testing conventions
|
|
10
|
+
patchwork_stack Tech stack detection
|
|
11
|
+
patchwork_git Git workflow conventions
|
|
12
|
+
patchwork_check Check a symbol/path against detected conventions
|
|
13
|
+
"""
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import json
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
|
|
19
|
+
try:
|
|
20
|
+
from mcp.server import Server
|
|
21
|
+
from mcp.server.stdio import stdio_server
|
|
22
|
+
import mcp.types as types
|
|
23
|
+
MCP_AVAILABLE = True
|
|
24
|
+
except ImportError:
|
|
25
|
+
MCP_AVAILABLE = False
|
|
26
|
+
|
|
27
|
+
from patchwork.scanner import scan as do_scan, ScanOptions
|
|
28
|
+
from patchwork.output.report import ConventionReport
|
|
29
|
+
|
|
30
|
+
# Simple in-process cache keyed by root path + mtime of CONVENTIONS.md
|
|
31
|
+
_CACHE: dict[str, ConventionReport] = {}
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _get_or_scan(root: Path) -> ConventionReport:
|
|
35
|
+
key = str(root)
|
|
36
|
+
cached = _CACHE.get(key)
|
|
37
|
+
if cached is not None:
|
|
38
|
+
return cached
|
|
39
|
+
opts = ScanOptions(root=root, max_files=300)
|
|
40
|
+
report = do_scan(opts)
|
|
41
|
+
_CACHE[key] = report
|
|
42
|
+
return report
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _invalidate(root: Path) -> None:
|
|
46
|
+
_CACHE.pop(str(root), None)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
async def run_server(root: Path, port: int = 3742, stdio: bool = True) -> None:
|
|
50
|
+
if not MCP_AVAILABLE:
|
|
51
|
+
raise RuntimeError(
|
|
52
|
+
"MCP package not installed. Run: pip install 'patchwork-conventions[mcp]'"
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
server = Server("patchwork")
|
|
56
|
+
|
|
57
|
+
@server.list_tools()
|
|
58
|
+
async def list_tools() -> list[types.Tool]:
|
|
59
|
+
return [
|
|
60
|
+
types.Tool(
|
|
61
|
+
name="patchwork_scan",
|
|
62
|
+
description=(
|
|
63
|
+
"Scan the codebase and return full CONVENTIONS.md content. "
|
|
64
|
+
"Use this when you need a complete picture of project conventions."
|
|
65
|
+
),
|
|
66
|
+
inputSchema={
|
|
67
|
+
"type": "object",
|
|
68
|
+
"properties": {
|
|
69
|
+
"path": {
|
|
70
|
+
"type": "string",
|
|
71
|
+
"description": "Absolute path to scan (default: current project root)",
|
|
72
|
+
},
|
|
73
|
+
"format": {
|
|
74
|
+
"type": "string",
|
|
75
|
+
"enum": ["markdown", "json"],
|
|
76
|
+
"description": "Output format",
|
|
77
|
+
"default": "markdown",
|
|
78
|
+
},
|
|
79
|
+
"refresh": {
|
|
80
|
+
"type": "boolean",
|
|
81
|
+
"description": "Force re-scan even if cached",
|
|
82
|
+
"default": False,
|
|
83
|
+
},
|
|
84
|
+
},
|
|
85
|
+
},
|
|
86
|
+
),
|
|
87
|
+
types.Tool(
|
|
88
|
+
name="patchwork_naming",
|
|
89
|
+
description=(
|
|
90
|
+
"Return naming conventions for a specific language detected in this project. "
|
|
91
|
+
"Use before writing new functions, classes, or variables."
|
|
92
|
+
),
|
|
93
|
+
inputSchema={
|
|
94
|
+
"type": "object",
|
|
95
|
+
"properties": {
|
|
96
|
+
"path": {"type": "string"},
|
|
97
|
+
"language": {
|
|
98
|
+
"type": "string",
|
|
99
|
+
"description": "Language to query (python, typescript, go, etc.)",
|
|
100
|
+
},
|
|
101
|
+
},
|
|
102
|
+
"required": ["language"],
|
|
103
|
+
},
|
|
104
|
+
),
|
|
105
|
+
types.Tool(
|
|
106
|
+
name="patchwork_structure",
|
|
107
|
+
description=(
|
|
108
|
+
"Return project structure conventions: source root, test layout, "
|
|
109
|
+
"organisation style, key directories. Use before creating new files."
|
|
110
|
+
),
|
|
111
|
+
inputSchema={
|
|
112
|
+
"type": "object",
|
|
113
|
+
"properties": {"path": {"type": "string"}},
|
|
114
|
+
},
|
|
115
|
+
),
|
|
116
|
+
types.Tool(
|
|
117
|
+
name="patchwork_stack",
|
|
118
|
+
description=(
|
|
119
|
+
"Return the detected tech stack: frameworks, package manager, "
|
|
120
|
+
"linters, formatters, build tools, scripts."
|
|
121
|
+
),
|
|
122
|
+
inputSchema={
|
|
123
|
+
"type": "object",
|
|
124
|
+
"properties": {"path": {"type": "string"}},
|
|
125
|
+
},
|
|
126
|
+
),
|
|
127
|
+
types.Tool(
|
|
128
|
+
name="patchwork_errors",
|
|
129
|
+
description=(
|
|
130
|
+
"Return error-handling conventions for this project. "
|
|
131
|
+
"Use before writing new error handling code."
|
|
132
|
+
),
|
|
133
|
+
inputSchema={
|
|
134
|
+
"type": "object",
|
|
135
|
+
"properties": {
|
|
136
|
+
"path": {"type": "string"},
|
|
137
|
+
"language": {"type": "string"},
|
|
138
|
+
},
|
|
139
|
+
},
|
|
140
|
+
),
|
|
141
|
+
types.Tool(
|
|
142
|
+
name="patchwork_testing",
|
|
143
|
+
description=(
|
|
144
|
+
"Return testing conventions: framework, assertion style, "
|
|
145
|
+
"test layout, mocking approach."
|
|
146
|
+
),
|
|
147
|
+
inputSchema={
|
|
148
|
+
"type": "object",
|
|
149
|
+
"properties": {
|
|
150
|
+
"path": {"type": "string"},
|
|
151
|
+
"language": {"type": "string"},
|
|
152
|
+
},
|
|
153
|
+
},
|
|
154
|
+
),
|
|
155
|
+
types.Tool(
|
|
156
|
+
name="patchwork_git",
|
|
157
|
+
description=(
|
|
158
|
+
"Return git workflow conventions: commit message style, "
|
|
159
|
+
"branch naming, co-change file pairs."
|
|
160
|
+
),
|
|
161
|
+
inputSchema={
|
|
162
|
+
"type": "object",
|
|
163
|
+
"properties": {"path": {"type": "string"}},
|
|
164
|
+
},
|
|
165
|
+
),
|
|
166
|
+
types.Tool(
|
|
167
|
+
name="patchwork_check",
|
|
168
|
+
description=(
|
|
169
|
+
"Check whether a proposed symbol name or file path follows "
|
|
170
|
+
"this project's conventions. Returns 'ok' or a specific violation."
|
|
171
|
+
),
|
|
172
|
+
inputSchema={
|
|
173
|
+
"type": "object",
|
|
174
|
+
"properties": {
|
|
175
|
+
"path": {"type": "string"},
|
|
176
|
+
"name": {
|
|
177
|
+
"type": "string",
|
|
178
|
+
"description": "Symbol or file name to check",
|
|
179
|
+
},
|
|
180
|
+
"kind": {
|
|
181
|
+
"type": "string",
|
|
182
|
+
"enum": ["function", "class", "variable", "constant", "file"],
|
|
183
|
+
"description": "What kind of name to check",
|
|
184
|
+
},
|
|
185
|
+
"language": {"type": "string"},
|
|
186
|
+
},
|
|
187
|
+
"required": ["name", "kind", "language"],
|
|
188
|
+
},
|
|
189
|
+
),
|
|
190
|
+
]
|
|
191
|
+
|
|
192
|
+
@server.call_tool()
|
|
193
|
+
async def call_tool(name: str, arguments: dict) -> list[types.TextContent]:
|
|
194
|
+
scan_root = Path(arguments.get("path", str(root))).resolve()
|
|
195
|
+
|
|
196
|
+
if arguments.get("refresh"):
|
|
197
|
+
_invalidate(scan_root)
|
|
198
|
+
|
|
199
|
+
report = _get_or_scan(scan_root)
|
|
200
|
+
|
|
201
|
+
if name == "patchwork_scan":
|
|
202
|
+
fmt = arguments.get("format", "markdown")
|
|
203
|
+
text = report.to_json() if fmt == "json" else report.to_markdown()
|
|
204
|
+
return [types.TextContent(type="text", text=text)]
|
|
205
|
+
|
|
206
|
+
elif name == "patchwork_naming":
|
|
207
|
+
lang = arguments.get("language", "")
|
|
208
|
+
nr = (report.naming or {}).get(lang)
|
|
209
|
+
if nr is None:
|
|
210
|
+
return [types.TextContent(
|
|
211
|
+
type="text",
|
|
212
|
+
text=f"No naming data for '{lang}'. Available: {list(report.naming or {})}"
|
|
213
|
+
)]
|
|
214
|
+
lines = [f"Naming conventions for {lang}:"]
|
|
215
|
+
if nr.functions:
|
|
216
|
+
lines.append(
|
|
217
|
+
f"- functions: {nr.functions.style} "
|
|
218
|
+
f"({int(nr.functions.confidence * 100)}% consistent)"
|
|
219
|
+
)
|
|
220
|
+
if nr.functions.examples:
|
|
221
|
+
lines.append(f" examples: {', '.join(nr.functions.examples[:4])}")
|
|
222
|
+
if nr.classes:
|
|
223
|
+
lines.append(f"- classes: {nr.classes.style}")
|
|
224
|
+
if nr.classes.examples:
|
|
225
|
+
lines.append(f" examples: {', '.join(nr.classes.examples[:4])}")
|
|
226
|
+
if nr.variables:
|
|
227
|
+
lines.append(f"- variables: {nr.variables.style}")
|
|
228
|
+
if nr.constants and nr.constants.examples:
|
|
229
|
+
lines.append(f"- constants: {nr.constants.style}")
|
|
230
|
+
if nr.files:
|
|
231
|
+
lines.append(f"- files: {nr.files.style}")
|
|
232
|
+
if nr.private_prefix:
|
|
233
|
+
lines.append(f"- private prefix: {nr.private_prefix}")
|
|
234
|
+
for note in nr.notes:
|
|
235
|
+
lines.append(f"note: {note}")
|
|
236
|
+
return [types.TextContent(type="text", text="\n".join(lines))]
|
|
237
|
+
|
|
238
|
+
elif name == "patchwork_structure":
|
|
239
|
+
s = report.structure
|
|
240
|
+
if s is None:
|
|
241
|
+
return [types.TextContent(type="text", text="No structure data available.")]
|
|
242
|
+
lines = ["Project structure:"]
|
|
243
|
+
if s.source_root:
|
|
244
|
+
lines.append(f"- source root: {s.source_root}/")
|
|
245
|
+
if s.test_layout:
|
|
246
|
+
lines.append(f"- test layout: {s.test_layout}")
|
|
247
|
+
if s.test_dirs:
|
|
248
|
+
lines.append(f" directories: {', '.join(s.test_dirs)}")
|
|
249
|
+
if s.organisation:
|
|
250
|
+
lines.append(f"- organisation: {s.organisation}")
|
|
251
|
+
if s.is_monorepo:
|
|
252
|
+
lines.append(f"- monorepo: {len(s.monorepo_packages)} packages")
|
|
253
|
+
for pkg in s.monorepo_packages[:5]:
|
|
254
|
+
lines.append(f" - {pkg}/")
|
|
255
|
+
if s.key_dirs:
|
|
256
|
+
lines.append("- key directories:")
|
|
257
|
+
for d, role in s.key_dirs.items():
|
|
258
|
+
lines.append(f" {d}/ = {role}")
|
|
259
|
+
return [types.TextContent(type="text", text="\n".join(lines))]
|
|
260
|
+
|
|
261
|
+
elif name == "patchwork_stack":
|
|
262
|
+
cfg = report.config
|
|
263
|
+
if cfg is None:
|
|
264
|
+
return [types.TextContent(type="text", text="No config detected.")]
|
|
265
|
+
lines = ["Tech stack:"]
|
|
266
|
+
if cfg.name:
|
|
267
|
+
lines.append(f"- project: {cfg.name}")
|
|
268
|
+
if cfg.language:
|
|
269
|
+
lines.append(f"- language: {cfg.language}")
|
|
270
|
+
if cfg.runtime:
|
|
271
|
+
lines.append(f"- runtime: {cfg.runtime}")
|
|
272
|
+
if cfg.package_manager:
|
|
273
|
+
lines.append(f"- package manager: {cfg.package_manager}")
|
|
274
|
+
if cfg.frameworks:
|
|
275
|
+
lines.append(f"- frameworks: {', '.join(cfg.frameworks)}")
|
|
276
|
+
if cfg.linters:
|
|
277
|
+
lines.append(f"- linters: {', '.join(cfg.linters)}")
|
|
278
|
+
if cfg.formatters:
|
|
279
|
+
lines.append(f"- formatters: {', '.join(cfg.formatters)}")
|
|
280
|
+
if cfg.type_checker:
|
|
281
|
+
lines.append(f"- type checker: {cfg.type_checker}")
|
|
282
|
+
if cfg.scripts:
|
|
283
|
+
lines.append("- scripts:")
|
|
284
|
+
for k, v in cfg.scripts.items():
|
|
285
|
+
lines.append(f" {k}: {v}")
|
|
286
|
+
return [types.TextContent(type="text", text="\n".join(lines))]
|
|
287
|
+
|
|
288
|
+
elif name == "patchwork_errors":
|
|
289
|
+
lang = arguments.get("language")
|
|
290
|
+
errors = report.errors or {}
|
|
291
|
+
if lang:
|
|
292
|
+
er = errors.get(lang)
|
|
293
|
+
if er is None:
|
|
294
|
+
return [types.TextContent(
|
|
295
|
+
type="text",
|
|
296
|
+
text=f"No error data for '{lang}'. Available: {list(errors)}"
|
|
297
|
+
)]
|
|
298
|
+
data = {lang: er}
|
|
299
|
+
else:
|
|
300
|
+
data = errors
|
|
301
|
+
lines = []
|
|
302
|
+
for lng, er in data.items():
|
|
303
|
+
lines.append(f"{lng}: {er.primary_pattern}")
|
|
304
|
+
if er.logging_framework:
|
|
305
|
+
lines.append(f" logging: {er.logging_framework}")
|
|
306
|
+
if er.propagation_style:
|
|
307
|
+
lines.append(f" propagation: {er.propagation_style}")
|
|
308
|
+
for note in er.notes:
|
|
309
|
+
lines.append(f" note: {note}")
|
|
310
|
+
return [types.TextContent(type="text", text="\n".join(lines) or "No error patterns found.")]
|
|
311
|
+
|
|
312
|
+
elif name == "patchwork_testing":
|
|
313
|
+
lang = arguments.get("language")
|
|
314
|
+
testing = report.testing or {}
|
|
315
|
+
if lang:
|
|
316
|
+
tr = testing.get(lang)
|
|
317
|
+
if tr is None:
|
|
318
|
+
return [types.TextContent(
|
|
319
|
+
type="text",
|
|
320
|
+
text=f"No testing data for '{lang}'."
|
|
321
|
+
)]
|
|
322
|
+
data = {lang: tr}
|
|
323
|
+
else:
|
|
324
|
+
data = testing
|
|
325
|
+
lines = []
|
|
326
|
+
for lng, tr in data.items():
|
|
327
|
+
lines.append(f"{lng}:")
|
|
328
|
+
if tr.framework:
|
|
329
|
+
lines.append(f" framework: {tr.framework}")
|
|
330
|
+
lines.append(f" test files: {tr.test_file_count}")
|
|
331
|
+
if tr.organisation:
|
|
332
|
+
lines.append(f" organisation: {tr.organisation}")
|
|
333
|
+
if tr.assertion_style:
|
|
334
|
+
lines.append(f" assertions: {tr.assertion_style}(...)")
|
|
335
|
+
if tr.mock_library:
|
|
336
|
+
lines.append(f" mocking: {tr.mock_library}")
|
|
337
|
+
return [types.TextContent(type="text", text="\n".join(lines) or "No testing data.")]
|
|
338
|
+
|
|
339
|
+
elif name == "patchwork_git":
|
|
340
|
+
g = report.git
|
|
341
|
+
if g is None:
|
|
342
|
+
return [types.TextContent(type="text", text="No git data (not a git repo or no commits).")]
|
|
343
|
+
lines = [f"Git conventions ({g.total_commits_sampled} commits sampled):"]
|
|
344
|
+
if g.commit_style:
|
|
345
|
+
lines.append(f"- commit style: {g.commit_style}")
|
|
346
|
+
if g.commit_examples:
|
|
347
|
+
lines.append("- examples:")
|
|
348
|
+
for ex in g.commit_examples[:3]:
|
|
349
|
+
lines.append(f" {ex}")
|
|
350
|
+
if g.branch_style:
|
|
351
|
+
lines.append(f"- branch naming: {g.branch_style}")
|
|
352
|
+
if g.avg_files_per_commit:
|
|
353
|
+
lines.append(f"- avg files/commit: {g.avg_files_per_commit}")
|
|
354
|
+
if g.cochange_pairs:
|
|
355
|
+
lines.append("- files that change together:")
|
|
356
|
+
for a, b, count in g.cochange_pairs[:3]:
|
|
357
|
+
lines.append(f" {a} <-> {b} ({count}x)")
|
|
358
|
+
for note in g.notes:
|
|
359
|
+
lines.append(f"note: {note}")
|
|
360
|
+
return [types.TextContent(type="text", text="\n".join(lines))]
|
|
361
|
+
|
|
362
|
+
elif name == "patchwork_check":
|
|
363
|
+
sym = arguments["name"]
|
|
364
|
+
kind = arguments["kind"]
|
|
365
|
+
lang = arguments.get("language", "")
|
|
366
|
+
return [types.TextContent(
|
|
367
|
+
type="text",
|
|
368
|
+
text=_check_convention(sym, kind, lang, report),
|
|
369
|
+
)]
|
|
370
|
+
|
|
371
|
+
return [types.TextContent(type="text", text=f"Unknown tool: {name}")]
|
|
372
|
+
|
|
373
|
+
# Transport
|
|
374
|
+
if stdio:
|
|
375
|
+
async with stdio_server() as (read_stream, write_stream):
|
|
376
|
+
await server.run(read_stream, write_stream, server.create_initialization_options())
|
|
377
|
+
else:
|
|
378
|
+
# SSE transport for HTTP mode
|
|
379
|
+
from mcp.server.sse import SseServerTransport
|
|
380
|
+
from starlette.applications import Starlette
|
|
381
|
+
from starlette.routing import Mount, Route
|
|
382
|
+
import uvicorn
|
|
383
|
+
|
|
384
|
+
sse = SseServerTransport("/messages")
|
|
385
|
+
|
|
386
|
+
async def handle_sse(request):
|
|
387
|
+
async with sse.connect_sse(request.scope, request.receive, request._send) as streams:
|
|
388
|
+
await server.run(streams[0], streams[1], server.create_initialization_options())
|
|
389
|
+
|
|
390
|
+
app = Starlette(routes=[
|
|
391
|
+
Route("/sse", endpoint=handle_sse),
|
|
392
|
+
Mount("/messages", app=sse.handle_post_message),
|
|
393
|
+
])
|
|
394
|
+
uvicorn.run(app, host="0.0.0.0", port=port)
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
def _check_convention(name: str, kind: str, lang: str, report: ConventionReport) -> str:
|
|
398
|
+
"""Check if a name follows detected conventions. Returns descriptive verdict."""
|
|
399
|
+
import re
|
|
400
|
+
|
|
401
|
+
naming = (report.naming or {}).get(lang)
|
|
402
|
+
if naming is None:
|
|
403
|
+
return f"ok (no convention data for {lang})"
|
|
404
|
+
|
|
405
|
+
convention_map = {
|
|
406
|
+
"function": naming.functions,
|
|
407
|
+
"class": naming.classes,
|
|
408
|
+
"variable": naming.variables,
|
|
409
|
+
"constant": naming.constants,
|
|
410
|
+
"file": naming.files,
|
|
411
|
+
}
|
|
412
|
+
conv = convention_map.get(kind)
|
|
413
|
+
if conv is None or conv.confidence < 0.6:
|
|
414
|
+
return f"ok (no strong convention detected for {lang} {kind}s)"
|
|
415
|
+
|
|
416
|
+
expected = conv.style
|
|
417
|
+
actual = _classify_name(name)
|
|
418
|
+
|
|
419
|
+
if actual == expected:
|
|
420
|
+
return f"✓ ok — `{name}` follows `{expected}` convention for {lang} {kind}s"
|
|
421
|
+
else:
|
|
422
|
+
example = conv.examples[0] if conv.examples else "N/A"
|
|
423
|
+
return (
|
|
424
|
+
f"⚠ violation — `{name}` looks like `{actual}` "
|
|
425
|
+
f"but this project uses `{expected}` for {lang} {kind}s. "
|
|
426
|
+
f"Example: `{example}`"
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
def _classify_name(name: str) -> str:
|
|
431
|
+
import re
|
|
432
|
+
if re.match(r'^[A-Z][A-Z0-9]*(_[A-Z0-9]+)*$', name):
|
|
433
|
+
return "SCREAMING_SNAKE"
|
|
434
|
+
if re.match(r'^[a-z][a-z0-9]*(_[a-z0-9]+)*$', name):
|
|
435
|
+
return "snake_case"
|
|
436
|
+
if re.match(r'^[a-z][a-zA-Z0-9]*$', name):
|
|
437
|
+
return "camelCase"
|
|
438
|
+
if re.match(r'^[A-Z][a-zA-Z0-9]*$', name):
|
|
439
|
+
return "PascalCase"
|
|
440
|
+
if re.match(r'^[a-z][a-z0-9]*(-[a-z0-9]+)*$', name):
|
|
441
|
+
return "kebab-case"
|
|
442
|
+
return "mixed"
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# miners package
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
"""
|
|
2
|
+
APIPatternMiner — Detects API / data-layer conventions:
|
|
3
|
+
- REST response shape: {data, error} | {result} | {success, data} | raw
|
|
4
|
+
- HTTP method naming pattern
|
|
5
|
+
- Route parameter style: :id vs {id} vs <id>
|
|
6
|
+
- ORM in use and query style
|
|
7
|
+
- Async patterns: async/await vs callbacks vs coroutines
|
|
8
|
+
- GraphQL presence
|
|
9
|
+
- gRPC presence
|
|
10
|
+
"""
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import re
|
|
14
|
+
from collections import Counter
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class APIResult:
|
|
21
|
+
response_shape: str | None # e.g. '{data, error}' | '{success, data}' | 'raw'
|
|
22
|
+
route_param_style: str | None # ':id' | '{id}' | '<id>'
|
|
23
|
+
async_pattern: str | None # 'async/await' | 'callbacks' | 'coroutines'
|
|
24
|
+
orm: str | None
|
|
25
|
+
has_graphql: bool
|
|
26
|
+
has_grpc: bool
|
|
27
|
+
api_frameworks: list[str]
|
|
28
|
+
http_client: str | None
|
|
29
|
+
notes: list[str] = field(default_factory=list)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# Response shape patterns
|
|
33
|
+
_RESP_DATA_ERROR = re.compile(r'["\'](?:data|error)["\']', re.IGNORECASE)
|
|
34
|
+
_RESP_SUCCESS_DATA = re.compile(r'["\']success["\'].*["\']data["\']', re.DOTALL | re.IGNORECASE)
|
|
35
|
+
_RESP_RESULT = re.compile(r'["\']result["\']', re.IGNORECASE)
|
|
36
|
+
|
|
37
|
+
# Route parameter styles
|
|
38
|
+
_ROUTE_COLON = re.compile(r'(?:app|router)\.\w+\(["\'][^"\']*:\w+') # Express :id
|
|
39
|
+
_ROUTE_BRACE = re.compile(r'(?:path|url)\s*=\s*["\'][^"\']*\{[a-zA-Z_]+\}') # FastAPI {id}
|
|
40
|
+
_ROUTE_ANGLE = re.compile(r'(?:app|blueprint)\.\w+\(["\'][^"\']*<[a-zA-Z_:]+>') # Flask <id>
|
|
41
|
+
|
|
42
|
+
# ORM signals
|
|
43
|
+
_ORM_SIGNALS = {
|
|
44
|
+
"SQLAlchemy": [r"\bsessionmaker\b", r"Column\(", r"declarative_base", r"db\.session"],
|
|
45
|
+
"Prisma": [r"prisma\.\w+\.find", r"prisma\.\w+\.create", r"from ['\"]@prisma"],
|
|
46
|
+
"Sequelize": [r"Sequelize\b", r"\.define\(", r"sequelize\.query"],
|
|
47
|
+
"TypeORM": [r"@Entity\(\)", r"getRepository\(", r"createQueryBuilder"],
|
|
48
|
+
"Django ORM": [r"models\.Model\b", r"\.objects\.filter\(", r"\.objects\.get\("],
|
|
49
|
+
"GORM": [r"\bgorm\b.*\.Find\(", r"db\.Where\(", r"AutoMigrate\("],
|
|
50
|
+
"Mongoose": [r"mongoose\.model\(", r"new Schema\(", r"\.populate\("],
|
|
51
|
+
"Drizzle": [r"from ['\"]drizzle-orm", r"drizzle\("],
|
|
52
|
+
"Hibernate": [r"@Entity\b.*@Table\b", r"SessionFactory\b"],
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
# Web framework signals
|
|
56
|
+
_FRAMEWORK_SIGNALS = {
|
|
57
|
+
"FastAPI": [r"from fastapi import", r"@app\.get\(", r"@router\."],
|
|
58
|
+
"Flask": [r"from flask import", r"@app\.route\(", r"Blueprint\("],
|
|
59
|
+
"Django": [r"from django", r"urlpatterns\s*=", r"HttpResponse"],
|
|
60
|
+
"Express": [r"require\(['\"]express['\"]", r"app\.use\(", r"router\.get\("],
|
|
61
|
+
"Fastify": [r"require\(['\"]fastify['\"]", r"fastify\.register"],
|
|
62
|
+
"Hono": [r"from ['\"]hono['\"]", r"new Hono\("],
|
|
63
|
+
"Gin": [r"\bgin\b.*\bDefault\(\)", r"r\.GET\(", r"c\.JSON\("],
|
|
64
|
+
"Echo": [r"\becho\b.*\bNew\(\)", r"e\.GET\("],
|
|
65
|
+
"Actix": [r"use actix_web", r"HttpServer::new"],
|
|
66
|
+
"Axum": [r"use axum::", r"Router::new\(\)"],
|
|
67
|
+
"NestJS": [r"@Controller\(", r"@Injectable\(\)", r"@Module\("],
|
|
68
|
+
"Spring": [r"@RestController\b", r"@GetMapping", r"@SpringBootApplication"],
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
_ASYNC_SIGNALS = {
|
|
72
|
+
"python": {
|
|
73
|
+
"async/await": [r"\basync def\b", r"\bawait\b"],
|
|
74
|
+
"coroutines": [r"asyncio\.run\(", r"@asyncio\.coroutine"],
|
|
75
|
+
"callbacks": [r"\.add_done_callback\(", r"concurrent\.futures"],
|
|
76
|
+
},
|
|
77
|
+
"javascript": {
|
|
78
|
+
"async/await": [r"\basync\s+function\b", r"\bawait\b"],
|
|
79
|
+
"promises": [r"\.then\(", r"new Promise\(", r"Promise\.all"],
|
|
80
|
+
"callbacks": [r"callback\s*\)", r"cb\s*\)"],
|
|
81
|
+
},
|
|
82
|
+
"typescript": {
|
|
83
|
+
"async/await": [r"\basync\s+function\b", r"\bawait\b"],
|
|
84
|
+
"promises": [r"Promise<", r"\.then\("],
|
|
85
|
+
},
|
|
86
|
+
"go": {
|
|
87
|
+
"goroutines": [r"\bgo\s+\w+\(", r"\bchan\b"],
|
|
88
|
+
},
|
|
89
|
+
"rust": {
|
|
90
|
+
"async/await": [r"\basync\s+fn\b", r"\.await\b"],
|
|
91
|
+
"futures": [r"Box<dyn Future", r"impl Future"],
|
|
92
|
+
},
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
_HTTP_CLIENTS = {
|
|
96
|
+
"python": ["requests", "httpx", "aiohttp", "urllib3", "pycurl"],
|
|
97
|
+
"javascript": ["axios", "fetch", "got", "node-fetch", "superagent", "ky"],
|
|
98
|
+
"typescript": ["axios", "fetch", "got", "ky", "ofetch"],
|
|
99
|
+
"go": ["net/http", "resty", "fasthttp"],
|
|
100
|
+
"rust": ["reqwest", "hyper", "ureq"],
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _detect_apis(paths: list[Path], lang: str) -> APIResult:
|
|
105
|
+
data_error = 0
|
|
106
|
+
success_data = 0
|
|
107
|
+
result_shape = 0
|
|
108
|
+
colon_routes = 0
|
|
109
|
+
brace_routes = 0
|
|
110
|
+
angle_routes = 0
|
|
111
|
+
orm_counts: Counter[str] = Counter()
|
|
112
|
+
fw_counts: Counter[str] = Counter()
|
|
113
|
+
async_counts: Counter[str] = Counter()
|
|
114
|
+
http_client_counts: Counter[str] = Counter()
|
|
115
|
+
has_graphql = False
|
|
116
|
+
has_grpc = False
|
|
117
|
+
|
|
118
|
+
for path in paths[:200]:
|
|
119
|
+
try:
|
|
120
|
+
text = path.read_text(errors="replace")
|
|
121
|
+
except OSError:
|
|
122
|
+
continue
|
|
123
|
+
|
|
124
|
+
data_error += len(_RESP_DATA_ERROR.findall(text))
|
|
125
|
+
success_data += len(_RESP_SUCCESS_DATA.findall(text))
|
|
126
|
+
result_shape += len(_RESP_RESULT.findall(text))
|
|
127
|
+
|
|
128
|
+
colon_routes += len(_ROUTE_COLON.findall(text))
|
|
129
|
+
brace_routes += len(_ROUTE_BRACE.findall(text))
|
|
130
|
+
angle_routes += len(_ROUTE_ANGLE.findall(text))
|
|
131
|
+
|
|
132
|
+
for orm, patterns in _ORM_SIGNALS.items():
|
|
133
|
+
for pat in patterns:
|
|
134
|
+
if re.search(pat, text):
|
|
135
|
+
orm_counts[orm] += 1
|
|
136
|
+
break
|
|
137
|
+
|
|
138
|
+
for fw, patterns in _FRAMEWORK_SIGNALS.items():
|
|
139
|
+
for pat in patterns:
|
|
140
|
+
if re.search(pat, text):
|
|
141
|
+
fw_counts[fw] += 1
|
|
142
|
+
break
|
|
143
|
+
|
|
144
|
+
for style, patterns in _ASYNC_SIGNALS.get(lang, {}).items():
|
|
145
|
+
for pat in patterns:
|
|
146
|
+
if re.search(pat, text):
|
|
147
|
+
async_counts[style] += 1
|
|
148
|
+
break
|
|
149
|
+
|
|
150
|
+
for client in _HTTP_CLIENTS.get(lang, []):
|
|
151
|
+
if client in text:
|
|
152
|
+
http_client_counts[client] += 1
|
|
153
|
+
|
|
154
|
+
if "graphql" in text.lower() or "GraphQL" in text or "gql`" in text:
|
|
155
|
+
has_graphql = True
|
|
156
|
+
if "proto" in text.lower() or "grpc" in text.lower() or "protobuf" in text.lower():
|
|
157
|
+
has_grpc = True
|
|
158
|
+
|
|
159
|
+
# Response shape
|
|
160
|
+
response_shape = None
|
|
161
|
+
if success_data > 3:
|
|
162
|
+
response_shape = "{success, data}"
|
|
163
|
+
elif data_error > 3:
|
|
164
|
+
response_shape = "{data, error}"
|
|
165
|
+
elif result_shape > 3:
|
|
166
|
+
response_shape = "{result}"
|
|
167
|
+
|
|
168
|
+
# Route param style
|
|
169
|
+
route_total = colon_routes + brace_routes + angle_routes
|
|
170
|
+
route_style = None
|
|
171
|
+
if route_total > 0:
|
|
172
|
+
if colon_routes == max(colon_routes, brace_routes, angle_routes):
|
|
173
|
+
route_style = ":id (Express style)"
|
|
174
|
+
elif brace_routes == max(colon_routes, brace_routes, angle_routes):
|
|
175
|
+
route_style = "{id} (FastAPI style)"
|
|
176
|
+
else:
|
|
177
|
+
route_style = "<id> (Flask style)"
|
|
178
|
+
|
|
179
|
+
async_pattern = async_counts.most_common(1)[0][0] if async_counts else None
|
|
180
|
+
orm = orm_counts.most_common(1)[0][0] if orm_counts else None
|
|
181
|
+
http_client = http_client_counts.most_common(1)[0][0] if http_client_counts else None
|
|
182
|
+
api_frameworks = [fw for fw, _ in fw_counts.most_common(3)]
|
|
183
|
+
|
|
184
|
+
return APIResult(
|
|
185
|
+
response_shape=response_shape,
|
|
186
|
+
route_param_style=route_style,
|
|
187
|
+
async_pattern=async_pattern,
|
|
188
|
+
orm=orm,
|
|
189
|
+
has_graphql=has_graphql,
|
|
190
|
+
has_grpc=has_grpc,
|
|
191
|
+
api_frameworks=api_frameworks,
|
|
192
|
+
http_client=http_client,
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
class APIPatternMiner:
|
|
197
|
+
def mine(self, by_lang: dict[str, list[Path]]) -> dict[str, APIResult]:
|
|
198
|
+
results: dict[str, APIResult] = {}
|
|
199
|
+
for lang, paths in by_lang.items():
|
|
200
|
+
result = _detect_apis(paths, lang)
|
|
201
|
+
# Only include if we found something meaningful
|
|
202
|
+
if result.api_frameworks or result.orm or result.async_pattern:
|
|
203
|
+
results[lang] = result
|
|
204
|
+
return results
|