patchwork-conventions 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- patchwork/__init__.py +10 -0
- patchwork/cli.py +336 -0
- patchwork/mcp/__init__.py +1 -0
- patchwork/mcp/server.py +442 -0
- patchwork/miners/__init__.py +1 -0
- patchwork/miners/api_patterns.py +204 -0
- patchwork/miners/ast_base.py +113 -0
- patchwork/miners/config_detector.py +273 -0
- patchwork/miners/error_handling.py +207 -0
- patchwork/miners/git_patterns.py +169 -0
- patchwork/miners/imports.py +158 -0
- patchwork/miners/naming.py +277 -0
- patchwork/miners/structure.py +204 -0
- patchwork/miners/testing.py +204 -0
- patchwork/output/__init__.py +1 -0
- patchwork/output/report.py +417 -0
- patchwork/scanner.py +162 -0
- patchwork_conventions-0.1.0.dist-info/METADATA +393 -0
- patchwork_conventions-0.1.0.dist-info/RECORD +23 -0
- patchwork_conventions-0.1.0.dist-info/WHEEL +5 -0
- patchwork_conventions-0.1.0.dist-info/entry_points.txt +2 -0
- patchwork_conventions-0.1.0.dist-info/licenses/LICENSE +21 -0
- patchwork_conventions-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tree-sitter parser cache + helpers shared across all miners.
|
|
3
|
+
Lazy-loads language parsers on first use.
|
|
4
|
+
"""
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import re
|
|
8
|
+
from functools import lru_cache
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Iterator
|
|
11
|
+
|
|
12
|
+
try:
|
|
13
|
+
from tree_sitter import Language, Parser, Node
|
|
14
|
+
TS_AVAILABLE = True
|
|
15
|
+
except ImportError:
|
|
16
|
+
TS_AVAILABLE = False
|
|
17
|
+
|
|
18
|
+
# Registry of (language_tag → loader function)
|
|
19
|
+
_LANGUAGE_LOADERS: dict[str, str] = {
|
|
20
|
+
"python": "tree_sitter_python",
|
|
21
|
+
"javascript": "tree_sitter_javascript",
|
|
22
|
+
"typescript": "tree_sitter_typescript",
|
|
23
|
+
"go": "tree_sitter_go",
|
|
24
|
+
"rust": "tree_sitter_rust",
|
|
25
|
+
"java": "tree_sitter_java",
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@lru_cache(maxsize=None)
|
|
30
|
+
def get_parser(lang: str) -> "Parser | None":
|
|
31
|
+
"""Return a cached tree-sitter Parser for the given language, or None."""
|
|
32
|
+
if not TS_AVAILABLE:
|
|
33
|
+
return None
|
|
34
|
+
module_name = _LANGUAGE_LOADERS.get(lang)
|
|
35
|
+
if module_name is None:
|
|
36
|
+
return None
|
|
37
|
+
try:
|
|
38
|
+
mod = __import__(module_name)
|
|
39
|
+
language = Language(mod.language())
|
|
40
|
+
parser = Parser(language)
|
|
41
|
+
return parser
|
|
42
|
+
except Exception:
|
|
43
|
+
return None
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def parse_file(path: Path, lang: str) -> "Node | None":
|
|
47
|
+
"""Parse a source file and return the root AST node, or None on failure."""
|
|
48
|
+
parser = get_parser(lang)
|
|
49
|
+
if parser is None:
|
|
50
|
+
return None
|
|
51
|
+
try:
|
|
52
|
+
source = path.read_bytes()
|
|
53
|
+
tree = parser.parse(source)
|
|
54
|
+
return tree.root_node
|
|
55
|
+
except Exception:
|
|
56
|
+
return None
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def parse_bytes(source: bytes, lang: str) -> "Node | None":
|
|
60
|
+
"""Parse raw bytes and return the root AST node."""
|
|
61
|
+
parser = get_parser(lang)
|
|
62
|
+
if parser is None:
|
|
63
|
+
return None
|
|
64
|
+
try:
|
|
65
|
+
tree = parser.parse(source)
|
|
66
|
+
return tree.root_node
|
|
67
|
+
except Exception:
|
|
68
|
+
return None
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def walk(node: "Node") -> Iterator["Node"]:
|
|
72
|
+
"""DFS walk over all nodes in the AST."""
|
|
73
|
+
cursor = node.walk()
|
|
74
|
+
reached_root = False
|
|
75
|
+
while not reached_root:
|
|
76
|
+
yield cursor.node
|
|
77
|
+
if cursor.goto_first_child():
|
|
78
|
+
continue
|
|
79
|
+
if cursor.goto_next_sibling():
|
|
80
|
+
continue
|
|
81
|
+
retracing = True
|
|
82
|
+
while retracing:
|
|
83
|
+
if not cursor.goto_parent():
|
|
84
|
+
reached_root = True
|
|
85
|
+
retracing = False
|
|
86
|
+
elif cursor.goto_next_sibling():
|
|
87
|
+
retracing = False
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def nodes_of_type(root: "Node", *types: str) -> list["Node"]:
|
|
91
|
+
"""Collect all descendant nodes matching any of the given type names."""
|
|
92
|
+
return [n for n in walk(root) if n.type in types]
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def node_text(node: "Node", source: bytes) -> str:
|
|
96
|
+
"""Extract the UTF-8 text of a node from the raw source bytes."""
|
|
97
|
+
return source[node.start_byte:node.end_byte].decode("utf-8", errors="replace")
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
# ── Regex-based fallbacks (used when tree-sitter is unavailable or for langs
|
|
101
|
+
# without a grammar installed) ──────────────────────────────────────────────
|
|
102
|
+
|
|
103
|
+
def regex_extract_identifiers(source: str, lang: str) -> list[str]:
|
|
104
|
+
"""Very rough identifier extraction via regex (fallback path)."""
|
|
105
|
+
if lang == "python":
|
|
106
|
+
pattern = r"(?:def|class)\s+([A-Za-z_][A-Za-z0-9_]*)"
|
|
107
|
+
elif lang in ("javascript", "typescript"):
|
|
108
|
+
pattern = r"(?:function|class|const|let|var)\s+([A-Za-z_$][A-Za-z0-9_$]*)"
|
|
109
|
+
elif lang == "go":
|
|
110
|
+
pattern = r"(?:func|type|var|const)\s+([A-Z][A-Za-z0-9]*|[a-z][A-Za-z0-9]*)"
|
|
111
|
+
else:
|
|
112
|
+
pattern = r"\b([A-Za-z_][A-Za-z0-9_]{2,})\b"
|
|
113
|
+
return re.findall(pattern, source)
|
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ConfigDetector — Reads config files to detect tech stack without AST parsing.
|
|
3
|
+
Inspects: package.json, pyproject.toml, go.mod, Cargo.toml, Makefile, etc.
|
|
4
|
+
"""
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import re
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
try:
|
|
13
|
+
import tomllib # Python 3.11+
|
|
14
|
+
except ImportError:
|
|
15
|
+
try:
|
|
16
|
+
import tomli as tomllib # type: ignore
|
|
17
|
+
except ImportError:
|
|
18
|
+
tomllib = None # type: ignore
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class ProjectConfig:
|
|
23
|
+
name: str | None = None
|
|
24
|
+
version: str | None = None
|
|
25
|
+
language: str | None = None # primary language
|
|
26
|
+
runtime: str | None = None # e.g. 'Node.js 20', 'Python 3.11'
|
|
27
|
+
package_manager: str | None = None # npm/yarn/pnpm/pip/uv/cargo/go
|
|
28
|
+
frameworks: list[str] = field(default_factory=list)
|
|
29
|
+
linters: list[str] = field(default_factory=list)
|
|
30
|
+
formatters: list[str] = field(default_factory=list)
|
|
31
|
+
type_checker: str | None = None
|
|
32
|
+
build_tool: str | None = None
|
|
33
|
+
scripts: dict[str, str] = field(default_factory=dict)
|
|
34
|
+
has_docker: bool = False
|
|
35
|
+
has_ci: bool = False
|
|
36
|
+
ci_platform: str | None = None
|
|
37
|
+
notes: list[str] = field(default_factory=list)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
_JS_FRAMEWORKS = [
|
|
41
|
+
"react", "next", "vue", "nuxt", "svelte", "sveltekit", "angular",
|
|
42
|
+
"solid", "remix", "astro", "qwik", "hono", "express", "fastify",
|
|
43
|
+
"nestjs", "@nestjs/core", "koa", "elysia",
|
|
44
|
+
]
|
|
45
|
+
_JS_LINTERS = ["eslint", "oxlint", "biome"]
|
|
46
|
+
_JS_FORMATTERS = ["prettier", "biome", "@biomejs/biome"]
|
|
47
|
+
_JS_TYPE_CHECKERS = ["typescript", "flow"]
|
|
48
|
+
_JS_BUILD = ["vite", "webpack", "turbopack", "rollup", "esbuild", "bun", "parcel", "rspack"]
|
|
49
|
+
|
|
50
|
+
_PY_FRAMEWORKS = [
|
|
51
|
+
"fastapi", "flask", "django", "starlette", "litestar", "tornado",
|
|
52
|
+
"aiohttp", "sanic", "falcon",
|
|
53
|
+
]
|
|
54
|
+
_PY_LINTERS = ["ruff", "flake8", "pylint", "pyflakes"]
|
|
55
|
+
_PY_FORMATTERS = ["black", "ruff", "autopep8", "yapf"]
|
|
56
|
+
_PY_TYPE = ["mypy", "pyright", "pytype"]
|
|
57
|
+
|
|
58
|
+
_CI_PLATFORMS = {
|
|
59
|
+
".github/workflows": "GitHub Actions",
|
|
60
|
+
".gitlab-ci.yml": "GitLab CI",
|
|
61
|
+
".circleci": "CircleCI",
|
|
62
|
+
"Jenkinsfile": "Jenkins",
|
|
63
|
+
".travis.yml": "Travis CI",
|
|
64
|
+
"azure-pipelines.yml": "Azure Pipelines",
|
|
65
|
+
".drone.yml": "Drone CI",
|
|
66
|
+
"bitbucket-pipelines.yml": "Bitbucket Pipelines",
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class ConfigDetector:
|
|
71
|
+
def __init__(self, root: Path):
|
|
72
|
+
self.root = root
|
|
73
|
+
|
|
74
|
+
def detect(self) -> ProjectConfig:
|
|
75
|
+
cfg = ProjectConfig()
|
|
76
|
+
|
|
77
|
+
# Detect CI
|
|
78
|
+
for ci_path, ci_name in _CI_PLATFORMS.items():
|
|
79
|
+
if (self.root / ci_path).exists():
|
|
80
|
+
cfg.has_ci = True
|
|
81
|
+
cfg.ci_platform = ci_name
|
|
82
|
+
break
|
|
83
|
+
|
|
84
|
+
# Docker
|
|
85
|
+
if (self.root / "Dockerfile").exists() or (self.root / "docker-compose.yml").exists():
|
|
86
|
+
cfg.has_docker = True
|
|
87
|
+
|
|
88
|
+
# Node.js
|
|
89
|
+
pkg_json = self.root / "package.json"
|
|
90
|
+
if pkg_json.exists():
|
|
91
|
+
self._read_package_json(pkg_json, cfg)
|
|
92
|
+
|
|
93
|
+
# Python
|
|
94
|
+
pyproject = self.root / "pyproject.toml"
|
|
95
|
+
if pyproject.exists():
|
|
96
|
+
self._read_pyproject(pyproject, cfg)
|
|
97
|
+
elif (self.root / "setup.py").exists() or (self.root / "setup.cfg").exists():
|
|
98
|
+
cfg.language = "python"
|
|
99
|
+
cfg.package_manager = "pip"
|
|
100
|
+
|
|
101
|
+
# Go
|
|
102
|
+
go_mod = self.root / "go.mod"
|
|
103
|
+
if go_mod.exists():
|
|
104
|
+
self._read_go_mod(go_mod, cfg)
|
|
105
|
+
|
|
106
|
+
# Rust
|
|
107
|
+
cargo = self.root / "Cargo.toml"
|
|
108
|
+
if cargo.exists():
|
|
109
|
+
self._read_cargo(cargo, cfg)
|
|
110
|
+
|
|
111
|
+
# Package manager detection (node)
|
|
112
|
+
if (self.root / "pnpm-lock.yaml").exists():
|
|
113
|
+
cfg.package_manager = "pnpm"
|
|
114
|
+
elif (self.root / "yarn.lock").exists():
|
|
115
|
+
cfg.package_manager = "yarn"
|
|
116
|
+
elif (self.root / "bun.lockb").exists() or (self.root / "bun.lock").exists():
|
|
117
|
+
cfg.package_manager = "bun"
|
|
118
|
+
elif (self.root / "package-lock.json").exists() and cfg.package_manager is None:
|
|
119
|
+
cfg.package_manager = "npm"
|
|
120
|
+
|
|
121
|
+
# Python package manager
|
|
122
|
+
if (self.root / "uv.lock").exists():
|
|
123
|
+
cfg.package_manager = "uv"
|
|
124
|
+
elif (self.root / "poetry.lock").exists():
|
|
125
|
+
cfg.package_manager = "poetry"
|
|
126
|
+
elif (self.root / "Pipfile").exists():
|
|
127
|
+
cfg.package_manager = "pipenv"
|
|
128
|
+
|
|
129
|
+
return cfg
|
|
130
|
+
|
|
131
|
+
def _read_package_json(self, path: Path, cfg: ProjectConfig) -> None:
|
|
132
|
+
try:
|
|
133
|
+
data = json.loads(path.read_text())
|
|
134
|
+
except (json.JSONDecodeError, OSError):
|
|
135
|
+
return
|
|
136
|
+
|
|
137
|
+
cfg.language = "javascript/typescript"
|
|
138
|
+
cfg.name = data.get("name")
|
|
139
|
+
cfg.version = data.get("version")
|
|
140
|
+
|
|
141
|
+
all_deps = {
|
|
142
|
+
**data.get("dependencies", {}),
|
|
143
|
+
**data.get("devDependencies", {}),
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
for fw in _JS_FRAMEWORKS:
|
|
147
|
+
if fw in all_deps:
|
|
148
|
+
cfg.frameworks.append(fw)
|
|
149
|
+
|
|
150
|
+
for linter in _JS_LINTERS:
|
|
151
|
+
if linter in all_deps:
|
|
152
|
+
cfg.linters.append(linter)
|
|
153
|
+
|
|
154
|
+
for fmt in _JS_FORMATTERS:
|
|
155
|
+
if fmt in all_deps:
|
|
156
|
+
if fmt not in cfg.formatters:
|
|
157
|
+
cfg.formatters.append(fmt)
|
|
158
|
+
|
|
159
|
+
for tc in _JS_TYPE_CHECKERS:
|
|
160
|
+
if tc in all_deps:
|
|
161
|
+
cfg.type_checker = tc
|
|
162
|
+
|
|
163
|
+
for bt in _JS_BUILD:
|
|
164
|
+
if bt in all_deps:
|
|
165
|
+
cfg.build_tool = bt
|
|
166
|
+
break
|
|
167
|
+
|
|
168
|
+
scripts = data.get("scripts", {})
|
|
169
|
+
important = {k: v for k, v in scripts.items()
|
|
170
|
+
if k in ("dev", "start", "build", "test", "lint", "format", "typecheck")}
|
|
171
|
+
cfg.scripts.update(important)
|
|
172
|
+
|
|
173
|
+
# Runtime from engines field
|
|
174
|
+
engines = data.get("engines", {})
|
|
175
|
+
if "node" in engines:
|
|
176
|
+
cfg.runtime = f"Node.js {engines['node']}"
|
|
177
|
+
|
|
178
|
+
def _read_pyproject(self, path: Path, cfg: ProjectConfig) -> None:
|
|
179
|
+
cfg.language = "python"
|
|
180
|
+
if tomllib is None:
|
|
181
|
+
return
|
|
182
|
+
try:
|
|
183
|
+
data = tomllib.loads(path.read_text())
|
|
184
|
+
except Exception:
|
|
185
|
+
return
|
|
186
|
+
|
|
187
|
+
project = data.get("project", {})
|
|
188
|
+
tool = data.get("tool", {})
|
|
189
|
+
|
|
190
|
+
cfg.name = project.get("name")
|
|
191
|
+
cfg.version = project.get("version")
|
|
192
|
+
|
|
193
|
+
requires_python = project.get("requires-python", "")
|
|
194
|
+
if requires_python:
|
|
195
|
+
cfg.runtime = f"Python {requires_python}"
|
|
196
|
+
|
|
197
|
+
all_deps = list(project.get("dependencies", [])) + [
|
|
198
|
+
str(dep) for group in project.get("optional-dependencies", {}).values()
|
|
199
|
+
for dep in group
|
|
200
|
+
]
|
|
201
|
+
dep_names = [re.split(r"[>=<!;[\s]", d)[0].lower() for d in all_deps]
|
|
202
|
+
|
|
203
|
+
for fw in _PY_FRAMEWORKS:
|
|
204
|
+
if fw in dep_names:
|
|
205
|
+
cfg.frameworks.append(fw)
|
|
206
|
+
|
|
207
|
+
for linter in _PY_LINTERS:
|
|
208
|
+
if linter in dep_names or linter in tool:
|
|
209
|
+
if linter not in cfg.linters:
|
|
210
|
+
cfg.linters.append(linter)
|
|
211
|
+
|
|
212
|
+
for fmt in _PY_FORMATTERS:
|
|
213
|
+
if fmt in dep_names or fmt in tool:
|
|
214
|
+
if fmt not in cfg.formatters:
|
|
215
|
+
cfg.formatters.append(fmt)
|
|
216
|
+
|
|
217
|
+
for tc in _PY_TYPE:
|
|
218
|
+
if tc in dep_names or tc in tool:
|
|
219
|
+
cfg.type_checker = tc
|
|
220
|
+
break
|
|
221
|
+
|
|
222
|
+
scripts_section = project.get("scripts", {})
|
|
223
|
+
cfg.scripts.update(scripts_section)
|
|
224
|
+
|
|
225
|
+
def _read_go_mod(self, path: Path, cfg: ProjectConfig) -> None:
|
|
226
|
+
cfg.language = "go"
|
|
227
|
+
cfg.package_manager = "go"
|
|
228
|
+
try:
|
|
229
|
+
text = path.read_text()
|
|
230
|
+
except OSError:
|
|
231
|
+
return
|
|
232
|
+
m = re.search(r'^module\s+(\S+)', text, re.MULTILINE)
|
|
233
|
+
if m:
|
|
234
|
+
cfg.name = m.group(1)
|
|
235
|
+
m2 = re.search(r'^go\s+([\d.]+)', text, re.MULTILINE)
|
|
236
|
+
if m2:
|
|
237
|
+
cfg.runtime = f"Go {m2.group(1)}"
|
|
238
|
+
|
|
239
|
+
# Detect popular Go frameworks from dependencies
|
|
240
|
+
go_frameworks = {
|
|
241
|
+
"gin-gonic/gin": "gin",
|
|
242
|
+
"labstack/echo": "echo",
|
|
243
|
+
"gofiber/fiber": "fiber",
|
|
244
|
+
"go-chi/chi": "chi",
|
|
245
|
+
"gorilla/mux": "gorilla/mux",
|
|
246
|
+
}
|
|
247
|
+
for dep, name in go_frameworks.items():
|
|
248
|
+
if dep in text:
|
|
249
|
+
cfg.frameworks.append(name)
|
|
250
|
+
|
|
251
|
+
def _read_cargo(self, path: Path, cfg: ProjectConfig) -> None:
|
|
252
|
+
cfg.language = "rust"
|
|
253
|
+
cfg.package_manager = "cargo"
|
|
254
|
+
if tomllib is None:
|
|
255
|
+
return
|
|
256
|
+
try:
|
|
257
|
+
data = tomllib.loads(path.read_text())
|
|
258
|
+
except Exception:
|
|
259
|
+
return
|
|
260
|
+
pkg = data.get("package", {})
|
|
261
|
+
cfg.name = pkg.get("name")
|
|
262
|
+
cfg.version = pkg.get("version")
|
|
263
|
+
cfg.runtime = f"Rust {pkg.get('edition', '2021')}"
|
|
264
|
+
|
|
265
|
+
# Detect web frameworks
|
|
266
|
+
rust_frameworks = {
|
|
267
|
+
"axum": "axum", "actix-web": "actix-web", "warp": "warp",
|
|
268
|
+
"rocket": "rocket", "tide": "tide",
|
|
269
|
+
}
|
|
270
|
+
all_deps = {**data.get("dependencies", {}), **data.get("dev-dependencies", {})}
|
|
271
|
+
for dep, name in rust_frameworks.items():
|
|
272
|
+
if dep in all_deps:
|
|
273
|
+
cfg.frameworks.append(name)
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ErrorHandlingMiner — Detects error-handling conventions:
|
|
3
|
+
- try/except vs Result types vs callbacks vs promises
|
|
4
|
+
- Custom exception class naming
|
|
5
|
+
- Error propagation style (raise vs return vs log)
|
|
6
|
+
- Logging framework used
|
|
7
|
+
- Panic/recover (Go), ? operator (Rust), etc.
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import re
|
|
12
|
+
from collections import Counter
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
|
|
16
|
+
from patchwork.miners.ast_base import parse_bytes, walk, node_text
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class ErrorResult:
|
|
21
|
+
primary_pattern: str # 'try/except' | 'Result<T>' | 'callbacks' | 'async/await' | 'Either'
|
|
22
|
+
exception_naming: str | None # 'Error' suffix | 'Exception' suffix | 'mixed'
|
|
23
|
+
logging_framework: str | None
|
|
24
|
+
custom_exceptions: list[str]
|
|
25
|
+
propagation_style: str | None # 'raise' | 'return' | 'log-and-continue' | 'mixed'
|
|
26
|
+
notes: list[str] = field(default_factory=list)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
_PY_RAISE = re.compile(r'\braise\b')
|
|
30
|
+
_PY_TRY = re.compile(r'\btry\s*:')
|
|
31
|
+
_PY_EXCEPT = re.compile(r'\bexcept\s+\w')
|
|
32
|
+
_PY_CUSTOM_EX = re.compile(r'class\s+(\w+(?:Error|Exception))\s*\(')
|
|
33
|
+
_PY_LOGGING = re.compile(r'\b(logging|structlog|loguru|logbook)\b')
|
|
34
|
+
_PY_LOGGER_VAR = re.compile(r'logger\s*=\s*(logging|structlog|loguru)')
|
|
35
|
+
|
|
36
|
+
_JS_TRY = re.compile(r'\btry\s*\{')
|
|
37
|
+
_JS_CATCH = re.compile(r'\bcatch\s*\(')
|
|
38
|
+
_JS_THROW = re.compile(r'\bthrow\s+new\s+\w+')
|
|
39
|
+
_JS_PROMISE_CATCH = re.compile(r'\.catch\(')
|
|
40
|
+
_JS_ASYNC_AWAIT = re.compile(r'\bawait\b')
|
|
41
|
+
_JS_CUSTOM_ERROR = re.compile(r'class\s+(\w+Error)\s+extends\s+\w*Error')
|
|
42
|
+
|
|
43
|
+
_GO_ERR = re.compile(r',\s*err\s*:?=')
|
|
44
|
+
_GO_ERR_NIL = re.compile(r'if\s+err\s*!=\s*nil')
|
|
45
|
+
_GO_CUSTOM_ERR = re.compile(r'type\s+(\w+Error)\s+struct')
|
|
46
|
+
|
|
47
|
+
_RUST_RESULT = re.compile(r'Result<[^,\n]+,')
|
|
48
|
+
_RUST_QUESTION = re.compile(r'\?;')
|
|
49
|
+
_RUST_UNWRAP = re.compile(r'\.unwrap\(\)')
|
|
50
|
+
_RUST_EXPECT = re.compile(r'\.expect\(')
|
|
51
|
+
|
|
52
|
+
_LOGGING_FRAMEWORKS = {
|
|
53
|
+
"python": ["logging", "structlog", "loguru", "logbook"],
|
|
54
|
+
"javascript": ["winston", "pino", "bunyan", "loglevel", "debug", "console"],
|
|
55
|
+
"typescript": ["winston", "pino", "bunyan", "tslog", "pino-pretty"],
|
|
56
|
+
"go": ["log", "zap", "zerolog", "logrus", "slog"],
|
|
57
|
+
"rust": ["log", "tracing", "env_logger", "slog"],
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _detect_py_errors(paths: list[Path]) -> ErrorResult:
|
|
62
|
+
try_count = 0
|
|
63
|
+
raise_count = 0
|
|
64
|
+
promise_count = 0
|
|
65
|
+
custom_excs: list[str] = []
|
|
66
|
+
logging_counts: Counter[str] = Counter()
|
|
67
|
+
|
|
68
|
+
for path in paths[:150]:
|
|
69
|
+
try:
|
|
70
|
+
text = path.read_text(errors="replace")
|
|
71
|
+
except OSError:
|
|
72
|
+
continue
|
|
73
|
+
try_count += len(_PY_TRY.findall(text))
|
|
74
|
+
raise_count += len(_PY_RAISE.findall(text))
|
|
75
|
+
custom_excs.extend(_PY_CUSTOM_EX.findall(text))
|
|
76
|
+
for fw in _LOGGING_FRAMEWORKS.get("python", []):
|
|
77
|
+
if fw in text:
|
|
78
|
+
logging_counts[fw] += 1
|
|
79
|
+
|
|
80
|
+
pattern = "try/except"
|
|
81
|
+
exc_naming = None
|
|
82
|
+
if custom_excs:
|
|
83
|
+
error_suffix = sum(1 for e in custom_excs if e.endswith("Error"))
|
|
84
|
+
exc_suffix = sum(1 for e in custom_excs if e.endswith("Exception"))
|
|
85
|
+
exc_naming = "Error suffix" if error_suffix >= exc_suffix else "Exception suffix"
|
|
86
|
+
|
|
87
|
+
prop = "raise" if raise_count > try_count * 0.5 else "log-and-continue"
|
|
88
|
+
logging_fw = logging_counts.most_common(1)[0][0] if logging_counts else None
|
|
89
|
+
|
|
90
|
+
return ErrorResult(
|
|
91
|
+
primary_pattern=pattern,
|
|
92
|
+
exception_naming=exc_naming,
|
|
93
|
+
logging_framework=logging_fw,
|
|
94
|
+
custom_exceptions=list(dict.fromkeys(custom_excs))[:8],
|
|
95
|
+
propagation_style=prop,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _detect_js_errors(paths: list[Path], lang: str) -> ErrorResult:
|
|
100
|
+
try_count = 0
|
|
101
|
+
throw_count = 0
|
|
102
|
+
promise_catch = 0
|
|
103
|
+
async_await = 0
|
|
104
|
+
custom_errors: list[str] = []
|
|
105
|
+
logging_counts: Counter[str] = Counter()
|
|
106
|
+
|
|
107
|
+
for path in paths[:150]:
|
|
108
|
+
try:
|
|
109
|
+
text = path.read_text(errors="replace")
|
|
110
|
+
except OSError:
|
|
111
|
+
continue
|
|
112
|
+
try_count += len(_JS_TRY.findall(text))
|
|
113
|
+
throw_count += len(_JS_THROW.findall(text))
|
|
114
|
+
promise_catch += len(_JS_PROMISE_CATCH.findall(text))
|
|
115
|
+
async_await += len(_JS_ASYNC_AWAIT.findall(text))
|
|
116
|
+
custom_errors.extend(_JS_CUSTOM_ERROR.findall(text))
|
|
117
|
+
for fw in _LOGGING_FRAMEWORKS.get(lang, []):
|
|
118
|
+
if fw in text:
|
|
119
|
+
logging_counts[fw] += 1
|
|
120
|
+
|
|
121
|
+
total = try_count + promise_catch + async_await
|
|
122
|
+
if total == 0:
|
|
123
|
+
pattern = "try/catch"
|
|
124
|
+
elif async_await > promise_catch:
|
|
125
|
+
pattern = "async/await + try/catch"
|
|
126
|
+
else:
|
|
127
|
+
pattern = "Promise chains"
|
|
128
|
+
|
|
129
|
+
logging_fw = logging_counts.most_common(1)[0][0] if logging_counts else None
|
|
130
|
+
|
|
131
|
+
return ErrorResult(
|
|
132
|
+
primary_pattern=pattern,
|
|
133
|
+
exception_naming="Error suffix" if custom_errors else None,
|
|
134
|
+
logging_framework=logging_fw,
|
|
135
|
+
custom_exceptions=list(dict.fromkeys(custom_errors))[:8],
|
|
136
|
+
propagation_style="throw" if throw_count > 5 else "return",
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _detect_go_errors(paths: list[Path]) -> ErrorResult:
|
|
141
|
+
err_check = 0
|
|
142
|
+
custom_errs: list[str] = []
|
|
143
|
+
|
|
144
|
+
for path in paths[:150]:
|
|
145
|
+
try:
|
|
146
|
+
text = path.read_text(errors="replace")
|
|
147
|
+
except OSError:
|
|
148
|
+
continue
|
|
149
|
+
err_check += len(_GO_ERR_NIL.findall(text))
|
|
150
|
+
custom_errs.extend(_GO_CUSTOM_ERR.findall(text))
|
|
151
|
+
|
|
152
|
+
notes = []
|
|
153
|
+
if err_check > 5:
|
|
154
|
+
notes.append("Idiomatic Go error handling: check `err != nil` after each call")
|
|
155
|
+
|
|
156
|
+
return ErrorResult(
|
|
157
|
+
primary_pattern="if err != nil",
|
|
158
|
+
exception_naming=None,
|
|
159
|
+
logging_framework=None,
|
|
160
|
+
custom_exceptions=list(dict.fromkeys(custom_errs))[:5],
|
|
161
|
+
propagation_style="return",
|
|
162
|
+
notes=notes,
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def _detect_rust_errors(paths: list[Path]) -> ErrorResult:
|
|
167
|
+
result_count = 0
|
|
168
|
+
question_count = 0
|
|
169
|
+
unwrap_count = 0
|
|
170
|
+
|
|
171
|
+
for path in paths[:150]:
|
|
172
|
+
try:
|
|
173
|
+
text = path.read_text(errors="replace")
|
|
174
|
+
except OSError:
|
|
175
|
+
continue
|
|
176
|
+
result_count += len(_RUST_RESULT.findall(text))
|
|
177
|
+
question_count += len(_RUST_QUESTION.findall(text))
|
|
178
|
+
unwrap_count += len(_RUST_UNWRAP.findall(text))
|
|
179
|
+
|
|
180
|
+
pattern = "Result<T,E> + ? operator" if question_count > unwrap_count else "Result<T,E> + unwrap/expect"
|
|
181
|
+
notes = []
|
|
182
|
+
if unwrap_count > question_count and question_count > 0:
|
|
183
|
+
notes.append("Mix of ? operator and .unwrap() — prefer ? in production code")
|
|
184
|
+
|
|
185
|
+
return ErrorResult(
|
|
186
|
+
primary_pattern=pattern,
|
|
187
|
+
exception_naming=None,
|
|
188
|
+
logging_framework=None,
|
|
189
|
+
custom_exceptions=[],
|
|
190
|
+
propagation_style="return",
|
|
191
|
+
notes=notes,
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
class ErrorHandlingMiner:
|
|
196
|
+
def mine(self, by_lang: dict[str, list[Path]]) -> dict[str, ErrorResult]:
|
|
197
|
+
results: dict[str, ErrorResult] = {}
|
|
198
|
+
for lang, paths in by_lang.items():
|
|
199
|
+
if lang == "python":
|
|
200
|
+
results[lang] = _detect_py_errors(paths)
|
|
201
|
+
elif lang in ("javascript", "typescript"):
|
|
202
|
+
results[lang] = _detect_js_errors(paths, lang)
|
|
203
|
+
elif lang == "go":
|
|
204
|
+
results[lang] = _detect_go_errors(paths)
|
|
205
|
+
elif lang == "rust":
|
|
206
|
+
results[lang] = _detect_rust_errors(paths)
|
|
207
|
+
return results
|