commit-defender 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- commit_defender/__init__.py +3 -0
- commit_defender/ai_agent.py +336 -0
- commit_defender/config.py +99 -0
- commit_defender/diff_extractor.py +85 -0
- commit_defender/entrypoint.py +126 -0
- commit_defender/exit_resolver.py +28 -0
- commit_defender/json_renderer.py +53 -0
- commit_defender/linters/__init__.py +28 -0
- commit_defender/linters/base.py +34 -0
- commit_defender/linters/js_linter.py +74 -0
- commit_defender/linters/markdown_linter.py +63 -0
- commit_defender/linters/python_linter.py +65 -0
- commit_defender/linters/shell_linter.py +39 -0
- commit_defender/models.py +63 -0
- commit_defender/renderer.py +91 -0
- commit_defender/settings.py +123 -0
- commit_defender/staged_files.py +128 -0
- commit_defender-0.1.0.dist-info/METADATA +374 -0
- commit_defender-0.1.0.dist-info/RECORD +22 -0
- commit_defender-0.1.0.dist-info/WHEEL +4 -0
- commit_defender-0.1.0.dist-info/entry_points.txt +2 -0
- commit_defender-0.1.0.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,336 @@
|
|
|
1
|
+
"""AI review agent using the Azure OpenAI SDK."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import re
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from .config import AIReviewConfig, Config
|
|
10
|
+
from .models import FileComment, LintFinding, ReviewResult
|
|
11
|
+
from .settings import Settings, load_settings
|
|
12
|
+
|
|
13
|
+
# ── Skill loader ──────────────────────────────────────────────────────────────
|
|
14
|
+
|
|
15
|
+
def _load_skills(repo_path: Path) -> str:
|
|
16
|
+
"""Read all SKILL.md files from <repo>/.commit-defender/*/SKILL.md.
|
|
17
|
+
|
|
18
|
+
Returns a formatted string ready to embed in the system prompt, or an
|
|
19
|
+
empty string if no skill directory exists.
|
|
20
|
+
"""
|
|
21
|
+
skill_dir = repo_path / ".commit-defender"
|
|
22
|
+
if not skill_dir.is_dir():
|
|
23
|
+
return ""
|
|
24
|
+
|
|
25
|
+
sections: list[str] = []
|
|
26
|
+
for skill_md in sorted(skill_dir.glob("*/SKILL.md")):
|
|
27
|
+
category = skill_md.parent.name
|
|
28
|
+
content = skill_md.read_text(encoding="utf-8").strip()
|
|
29
|
+
sections.append(f"### [{category}]\n\n{content}")
|
|
30
|
+
|
|
31
|
+
if not sections:
|
|
32
|
+
return ""
|
|
33
|
+
|
|
34
|
+
return "## Active Review Skills\n\n" + "\n\n---\n\n".join(sections)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# ── Behavior modifiers ────────────────────────────────────────────────────────
|
|
38
|
+
|
|
39
|
+
_SEVERITY_PROMPTS: dict[str, str] = {
|
|
40
|
+
"severe": (
|
|
41
|
+
"Apply the absolute strictest possible review. Flag every deviation from "
|
|
42
|
+
"best practice, every style inconsistency, every potential issue — no matter "
|
|
43
|
+
"how minor. Zero tolerance."
|
|
44
|
+
),
|
|
45
|
+
"rigorous": (
|
|
46
|
+
"Apply a strict review. Flag most issues including minor style and "
|
|
47
|
+
"best-practice deviations. Err on the side of raising concerns."
|
|
48
|
+
),
|
|
49
|
+
"moderate": (
|
|
50
|
+
"Apply a balanced review. Flag meaningful issues and genuine best-practice "
|
|
51
|
+
"violations, but do not nitpick trivial style details."
|
|
52
|
+
),
|
|
53
|
+
"generous": (
|
|
54
|
+
"Apply a lenient review. Only flag significant issues that carry clear risk "
|
|
55
|
+
"or that deviate substantially from convention. Allow minor imperfections."
|
|
56
|
+
),
|
|
57
|
+
"lean": (
|
|
58
|
+
"Apply a minimal review. Only flag critical issues: those that will break "
|
|
59
|
+
"functionality, introduce security vulnerabilities, or cause data loss."
|
|
60
|
+
),
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
_RICHNESS_PROMPTS: dict[str, str] = {
|
|
64
|
+
"colorful": (
|
|
65
|
+
"For each finding, provide an elaborate explanation: describe the problem in "
|
|
66
|
+
"depth, give a concrete example of the fix, explain the reasoning, and mention "
|
|
67
|
+
"any trade-offs. The summary may be up to 600 words."
|
|
68
|
+
),
|
|
69
|
+
"chatty": (
|
|
70
|
+
"For each finding, provide helpful context and a suggested fix. "
|
|
71
|
+
"The summary should be thorough but focused, up to 400 words."
|
|
72
|
+
),
|
|
73
|
+
"moderate": (
|
|
74
|
+
"Provide clear, concise explanations for each finding. "
|
|
75
|
+
"Keep the summary under 300 words."
|
|
76
|
+
),
|
|
77
|
+
"simple": (
|
|
78
|
+
"Be brief. One or two sentences per finding. "
|
|
79
|
+
"Keep the summary under 150 words."
|
|
80
|
+
),
|
|
81
|
+
"silent": (
|
|
82
|
+
"Output one-line descriptions only. No elaboration, no examples, no context. "
|
|
83
|
+
"Keep the summary under 60 words."
|
|
84
|
+
),
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
_LOCALE_PROMPTS: dict[str, str] = {
|
|
88
|
+
"en": "Write all output in English.",
|
|
89
|
+
"ko": "모든 출력을 한국어로 작성하세요.",
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
_BASE_SYSTEM_PROMPT = """\
|
|
93
|
+
You are commit-defender, an AI code reviewer integrated into a git pre-commit hook.
|
|
94
|
+
|
|
95
|
+
Your job is to review the provided git diff and static analysis findings, then produce a
|
|
96
|
+
concise, actionable review that helps the developer understand what needs to be fixed
|
|
97
|
+
before committing.
|
|
98
|
+
|
|
99
|
+
## Core guidelines
|
|
100
|
+
- Be direct and specific. Reference file names and line numbers.
|
|
101
|
+
- Group related issues together.
|
|
102
|
+
- Distinguish between must-fix issues (errors) and suggestions (warnings/style).
|
|
103
|
+
- Do not repeat every lint finding verbatim — synthesize patterns and highlight the most important ones.
|
|
104
|
+
- If the code looks good overall, say so clearly.
|
|
105
|
+
|
|
106
|
+
## Output format
|
|
107
|
+
Respond ONLY with a valid JSON object matching this schema:
|
|
108
|
+
{
|
|
109
|
+
"summary": "<narrative review, markdown allowed>",
|
|
110
|
+
"blocking": <true if the code should not be committed as-is, false otherwise>,
|
|
111
|
+
"file_comments": [
|
|
112
|
+
{
|
|
113
|
+
"file": "<path relative to repo root, e.g. src/main.py>",
|
|
114
|
+
"line": <1-based line number from the diff; 0 for a file-level comment>,
|
|
115
|
+
"comment": "<actionable suggestion, markdown allowed>"
|
|
116
|
+
}
|
|
117
|
+
]
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
Rules for file_comments:
|
|
121
|
+
- Only reference lines that appear in the provided diff.
|
|
122
|
+
- Limit to at most 15 comments total.
|
|
123
|
+
- Omit the array (or use []) if there is nothing specific to annotate.
|
|
124
|
+
- Do not include anything outside the JSON object.
|
|
125
|
+
"""
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _build_system_prompt(
|
|
129
|
+
settings: Settings,
|
|
130
|
+
config: Config,
|
|
131
|
+
skills_text: str,
|
|
132
|
+
project_suffix: str,
|
|
133
|
+
) -> str:
|
|
134
|
+
parts = [_BASE_SYSTEM_PROMPT]
|
|
135
|
+
|
|
136
|
+
if skills_text:
|
|
137
|
+
parts.append(skills_text)
|
|
138
|
+
|
|
139
|
+
# Priority: env var (set by VS Code / hook) > .commit-defender/settings.json > built-in default
|
|
140
|
+
rs = config.review_settings
|
|
141
|
+
severity = settings.cd_severity_level.strip().lower() or rs.severityLevel
|
|
142
|
+
richness = settings.cd_richness_level.strip().lower() or rs.richnessLevel
|
|
143
|
+
locale = settings.cd_locale.strip().lower() or rs.locale
|
|
144
|
+
|
|
145
|
+
modifier_lines = [
|
|
146
|
+
f"- Severity: {_SEVERITY_PROMPTS.get(severity, _SEVERITY_PROMPTS['moderate'])}",
|
|
147
|
+
f"- Detail level: {_RICHNESS_PROMPTS.get(richness, _RICHNESS_PROMPTS['moderate'])}",
|
|
148
|
+
f"- Language: {_LOCALE_PROMPTS.get(locale, _LOCALE_PROMPTS['en'])}",
|
|
149
|
+
]
|
|
150
|
+
parts.append("## Review behavior\n\n" + "\n".join(modifier_lines))
|
|
151
|
+
|
|
152
|
+
if project_suffix:
|
|
153
|
+
parts.append(f"## Project-specific context\n\n{project_suffix}")
|
|
154
|
+
|
|
155
|
+
return "\n\n".join(parts)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
# ── JSON parsing ─────────────────────────────────────────────────────────────
|
|
159
|
+
|
|
160
|
+
def _parse_json(raw: str) -> dict:
|
|
161
|
+
"""Parse JSON from the model response, stripping markdown fences if present."""
|
|
162
|
+
# Direct parse — works when response_format=json_object was honoured
|
|
163
|
+
try:
|
|
164
|
+
return json.loads(raw)
|
|
165
|
+
except json.JSONDecodeError:
|
|
166
|
+
pass
|
|
167
|
+
|
|
168
|
+
# Extract the first {...} block (handles ```json ... ``` wrapping)
|
|
169
|
+
match = re.search(r'\{[\s\S]*\}', raw)
|
|
170
|
+
if match:
|
|
171
|
+
try:
|
|
172
|
+
return json.loads(match.group())
|
|
173
|
+
except json.JSONDecodeError:
|
|
174
|
+
pass
|
|
175
|
+
|
|
176
|
+
raise json.JSONDecodeError("No valid JSON found in response", raw, 0)
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
# ── Agent ─────────────────────────────────────────────────────────────────────
|
|
180
|
+
|
|
181
|
+
class AIReviewAgent:
|
|
182
|
+
def __init__(self, config: AIReviewConfig, full_config: Config | None = None) -> None:
|
|
183
|
+
self.config = config
|
|
184
|
+
self._full_config = full_config
|
|
185
|
+
|
|
186
|
+
def review(
|
|
187
|
+
self,
|
|
188
|
+
diff: str,
|
|
189
|
+
lint_findings: list[LintFinding],
|
|
190
|
+
repo_path: Path | None = None,
|
|
191
|
+
) -> ReviewResult:
|
|
192
|
+
settings = load_settings()
|
|
193
|
+
|
|
194
|
+
if not self.config.enabled or settings.skip_ai:
|
|
195
|
+
return ReviewResult.skipped()
|
|
196
|
+
|
|
197
|
+
missing = settings.missing_azure_fields()
|
|
198
|
+
if missing:
|
|
199
|
+
return ReviewResult.error(
|
|
200
|
+
f"Missing credentials in ~/.commit-defender.env: {', '.join(missing)}"
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
try:
|
|
204
|
+
from openai import (
|
|
205
|
+
AzureOpenAI,
|
|
206
|
+
APIConnectionError,
|
|
207
|
+
APIStatusError,
|
|
208
|
+
APITimeoutError,
|
|
209
|
+
AuthenticationError,
|
|
210
|
+
RateLimitError,
|
|
211
|
+
)
|
|
212
|
+
except ImportError:
|
|
213
|
+
return ReviewResult.error("openai package not installed")
|
|
214
|
+
|
|
215
|
+
client = AzureOpenAI(
|
|
216
|
+
api_key=settings.azure_openai_api_key,
|
|
217
|
+
azure_endpoint=settings.azure_openai_endpoint,
|
|
218
|
+
api_version=settings.azure_openai_api_version,
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
deployment = settings.azure_openai_deployment or self.config.model
|
|
222
|
+
|
|
223
|
+
# Load skill guidelines from the repo's .commit-defender directory
|
|
224
|
+
skills_text = _load_skills(repo_path) if repo_path else ""
|
|
225
|
+
|
|
226
|
+
# Use the full Config for review_settings fallback; build a default if absent
|
|
227
|
+
from .config import Config as _Config
|
|
228
|
+
full_cfg = self._full_config or _Config()
|
|
229
|
+
|
|
230
|
+
system_content = _build_system_prompt(
|
|
231
|
+
settings,
|
|
232
|
+
full_cfg,
|
|
233
|
+
skills_text,
|
|
234
|
+
self.config.system_prompt_suffix,
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
findings_text = "\n".join(str(f) for f in lint_findings) if lint_findings else "None"
|
|
238
|
+
|
|
239
|
+
user_message = f"""\
|
|
240
|
+
## Staged diff
|
|
241
|
+
|
|
242
|
+
```diff
|
|
243
|
+
{diff or '(no diff available)'}
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
## Static analysis findings
|
|
247
|
+
|
|
248
|
+
```
|
|
249
|
+
{findings_text}
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
Please review the above and respond with the JSON object as instructed.
|
|
253
|
+
"""
|
|
254
|
+
|
|
255
|
+
try:
|
|
256
|
+
# Try with structured JSON mode first; fall back to plain text if the
|
|
257
|
+
# deployment does not support response_format (older API versions, fine-tuned
|
|
258
|
+
# models, or deployments with content-filtering restrictions).
|
|
259
|
+
try:
|
|
260
|
+
response = client.chat.completions.create(
|
|
261
|
+
model=deployment,
|
|
262
|
+
max_completion_tokens=self.config.max_tokens,
|
|
263
|
+
response_format={"type": "json_object"},
|
|
264
|
+
messages=[
|
|
265
|
+
{"role": "system", "content": system_content},
|
|
266
|
+
{"role": "user", "content": user_message},
|
|
267
|
+
],
|
|
268
|
+
)
|
|
269
|
+
except Exception as fmt_err:
|
|
270
|
+
# response_format not supported — retry without it
|
|
271
|
+
_fmt_msg = str(fmt_err).lower()
|
|
272
|
+
if not any(k in _fmt_msg for k in ("response_format", "json_object", "unsupported")):
|
|
273
|
+
raise # unrelated error — re-raise
|
|
274
|
+
response = client.chat.completions.create(
|
|
275
|
+
model=deployment,
|
|
276
|
+
max_completion_tokens=self.config.max_tokens,
|
|
277
|
+
messages=[
|
|
278
|
+
{"role": "system", "content": system_content},
|
|
279
|
+
{"role": "user", "content": user_message},
|
|
280
|
+
],
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
raw = response.choices[0].message.content.strip()
|
|
284
|
+
|
|
285
|
+
# Extract JSON even when the model wraps it in markdown fences
|
|
286
|
+
data = _parse_json(raw)
|
|
287
|
+
|
|
288
|
+
file_comments = [
|
|
289
|
+
FileComment(
|
|
290
|
+
file=fc["file"],
|
|
291
|
+
line=int(fc.get("line", 0)),
|
|
292
|
+
comment=fc["comment"],
|
|
293
|
+
)
|
|
294
|
+
for fc in data.get("file_comments", [])
|
|
295
|
+
if "file" in fc and "comment" in fc
|
|
296
|
+
]
|
|
297
|
+
return ReviewResult(
|
|
298
|
+
summary=data.get("summary", "(no summary)"),
|
|
299
|
+
blocking=bool(data.get("blocking", False)),
|
|
300
|
+
raw_response=raw,
|
|
301
|
+
file_comments=file_comments,
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
except AuthenticationError as e:
|
|
305
|
+
return ReviewResult.error(
|
|
306
|
+
f"Authentication failed — check AZURE_OPENAI_API_KEY and AZURE_OPENAI_ENDPOINT.\n"
|
|
307
|
+
f" Detail: {e.message}"
|
|
308
|
+
)
|
|
309
|
+
except APIConnectionError as e:
|
|
310
|
+
return ReviewResult.error(
|
|
311
|
+
f"Could not reach Azure OpenAI endpoint '{settings.azure_openai_endpoint}'.\n"
|
|
312
|
+
f" Check your network and AZURE_OPENAI_ENDPOINT value.\n"
|
|
313
|
+
f" Detail: {e}"
|
|
314
|
+
)
|
|
315
|
+
except APITimeoutError:
|
|
316
|
+
return ReviewResult.error(
|
|
317
|
+
"Request to Azure OpenAI timed out. "
|
|
318
|
+
"Check network connectivity or increase max_tokens."
|
|
319
|
+
)
|
|
320
|
+
except RateLimitError as e:
|
|
321
|
+
return ReviewResult.error(
|
|
322
|
+
f"Azure OpenAI rate limit exceeded — try again in a moment.\n"
|
|
323
|
+
f" Detail: {e.message}"
|
|
324
|
+
)
|
|
325
|
+
except APIStatusError as e:
|
|
326
|
+
return ReviewResult.error(
|
|
327
|
+
f"Azure OpenAI returned HTTP {e.status_code}.\n"
|
|
328
|
+
f" Detail: {e.message}"
|
|
329
|
+
)
|
|
330
|
+
except json.JSONDecodeError:
|
|
331
|
+
return ReviewResult.error(
|
|
332
|
+
f"Could not parse AI response as JSON.\n"
|
|
333
|
+
f" Raw response (first 300 chars): {raw[:300] if 'raw' in dir() else '(none)'}"
|
|
334
|
+
)
|
|
335
|
+
except Exception as e:
|
|
336
|
+
return ReviewResult.error(f"Unexpected error: {type(e).__name__}: {e}")
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""Configuration loader for commit-defender."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any, Literal
|
|
9
|
+
|
|
10
|
+
import yaml
|
|
11
|
+
from pydantic import BaseModel, Field
|
|
12
|
+
|
|
13
|
+
AnalysisMode = Literal["hybrid", "ai-powered", "rule-based"]
|
|
14
|
+
SeverityLevel = Literal["severe", "rigorous", "moderate", "generous", "lean"]
|
|
15
|
+
RichnessLevel = Literal["colorful", "chatty", "moderate", "simple", "silent"]
|
|
16
|
+
Locale = Literal["en", "ko"]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class LinterConfig(BaseModel):
|
|
20
|
+
enabled: bool = True
|
|
21
|
+
tool: str = ""
|
|
22
|
+
args: list[str] = Field(default_factory=list)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class AIReviewConfig(BaseModel):
|
|
26
|
+
enabled: bool = True
|
|
27
|
+
model: str = "gpt-5.1"
|
|
28
|
+
max_tokens: int = 1024
|
|
29
|
+
blocking: bool = False
|
|
30
|
+
system_prompt_suffix: str = ""
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class LinterMap(BaseModel):
|
|
34
|
+
python: LinterConfig = Field(default_factory=lambda: LinterConfig(tool="ruff"))
|
|
35
|
+
javascript: LinterConfig = Field(default_factory=lambda: LinterConfig(tool="eslint"))
|
|
36
|
+
typescript: LinterConfig = Field(default_factory=lambda: LinterConfig(tool="eslint"))
|
|
37
|
+
shell: LinterConfig = Field(default_factory=lambda: LinterConfig(tool="shellcheck"))
|
|
38
|
+
markdown: LinterConfig = Field(
|
|
39
|
+
default_factory=lambda: LinterConfig(enabled=False, tool="markdownlint")
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class ReviewSettings(BaseModel):
|
|
44
|
+
"""Loaded from .commit-defender/settings.json; overridden by env vars."""
|
|
45
|
+
analysisMode: AnalysisMode = "hybrid"
|
|
46
|
+
severityLevel: SeverityLevel = "moderate"
|
|
47
|
+
richnessLevel: RichnessLevel = "moderate"
|
|
48
|
+
locale: Locale = "en"
|
|
49
|
+
excludePatterns: list[str] = Field(default_factory=list)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class Config(BaseModel):
|
|
53
|
+
version: int = 1
|
|
54
|
+
blocking_severity: str = "error"
|
|
55
|
+
linters: LinterMap = Field(default_factory=LinterMap)
|
|
56
|
+
ai_review: AIReviewConfig = Field(default_factory=AIReviewConfig)
|
|
57
|
+
exclude: list[str] = Field(
|
|
58
|
+
default_factory=lambda: ["*.lock", "dist/**", "node_modules/**", "*.min.js"]
|
|
59
|
+
)
|
|
60
|
+
# Loaded from .commit-defender/settings.json; env vars take priority at runtime
|
|
61
|
+
review_settings: ReviewSettings = Field(default_factory=ReviewSettings)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _load_review_settings(repo_path: Path) -> ReviewSettings:
|
|
65
|
+
"""Read .commit-defender/settings.json if present."""
|
|
66
|
+
settings_file = repo_path / ".commit-defender" / "settings.json"
|
|
67
|
+
if not settings_file.exists():
|
|
68
|
+
return ReviewSettings()
|
|
69
|
+
try:
|
|
70
|
+
raw: dict[str, Any] = json.loads(settings_file.read_text(encoding="utf-8"))
|
|
71
|
+
return ReviewSettings.model_validate(raw)
|
|
72
|
+
except Exception:
|
|
73
|
+
return ReviewSettings()
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def load_config(repo_path: Path | None = None) -> Config:
|
|
77
|
+
"""Load config from commit-defender.yaml and .commit-defender/settings.json."""
|
|
78
|
+
config_path_env = os.environ.get("CD_CONFIG_PATH")
|
|
79
|
+
|
|
80
|
+
candidates: list[Path] = []
|
|
81
|
+
if config_path_env:
|
|
82
|
+
candidates.append(Path(config_path_env))
|
|
83
|
+
if repo_path:
|
|
84
|
+
candidates.append(repo_path / "commit-defender.yaml")
|
|
85
|
+
candidates.append(Path("/repo/commit-defender.yaml"))
|
|
86
|
+
|
|
87
|
+
cfg_dict: dict[str, Any] = {}
|
|
88
|
+
for candidate in candidates:
|
|
89
|
+
if candidate.exists():
|
|
90
|
+
cfg_dict = yaml.safe_load(candidate.read_text()) or {}
|
|
91
|
+
break
|
|
92
|
+
|
|
93
|
+
config = Config.model_validate(cfg_dict)
|
|
94
|
+
|
|
95
|
+
# Overlay review_settings from .commit-defender/settings.json
|
|
96
|
+
resolved_repo = repo_path or Path("/repo")
|
|
97
|
+
config.review_settings = _load_review_settings(resolved_repo)
|
|
98
|
+
|
|
99
|
+
return config
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""Extract git diffs for staged files."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import subprocess
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
# Limit diff size sent to AI to avoid token overflow (~100K chars ≈ ~25K tokens)
|
|
9
|
+
MAX_DIFF_CHARS = 80_000
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class DiffExtractor:
|
|
13
|
+
def __init__(self, repo_path: Path) -> None:
|
|
14
|
+
self.repo_path = repo_path
|
|
15
|
+
|
|
16
|
+
def get_full_diff(self, files: list[Path]) -> str:
|
|
17
|
+
"""Return combined unified diff for all staged files."""
|
|
18
|
+
if not files:
|
|
19
|
+
return ""
|
|
20
|
+
|
|
21
|
+
rel_paths = [str(f.relative_to(self.repo_path)) for f in files]
|
|
22
|
+
|
|
23
|
+
# Handle initial commit: no HEAD yet
|
|
24
|
+
try:
|
|
25
|
+
result = self._run_git(["diff", "--cached", "--diff-filter=d", "--"] + rel_paths)
|
|
26
|
+
except subprocess.CalledProcessError:
|
|
27
|
+
# Fallback: diff against empty tree (first commit)
|
|
28
|
+
empty_tree = "4b825dc642cb6eb9a060e54bf8d69288fbee4904"
|
|
29
|
+
result = self._run_git(["diff", "--cached", "--diff-filter=d", empty_tree, "--"] + rel_paths)
|
|
30
|
+
|
|
31
|
+
diff = result.stdout
|
|
32
|
+
if len(diff) > MAX_DIFF_CHARS:
|
|
33
|
+
diff = diff[:MAX_DIFF_CHARS] + "\n\n[... diff truncated for token limit ...]"
|
|
34
|
+
return diff
|
|
35
|
+
|
|
36
|
+
def get_file_diff(self, file: Path) -> str:
|
|
37
|
+
"""Return unified diff for a single staged file."""
|
|
38
|
+
rel = str(file.relative_to(self.repo_path))
|
|
39
|
+
try:
|
|
40
|
+
result = self._run_git(["diff", "--cached", "--", rel])
|
|
41
|
+
return result.stdout
|
|
42
|
+
except subprocess.CalledProcessError:
|
|
43
|
+
return ""
|
|
44
|
+
|
|
45
|
+
def get_working_diff(self, files: list[Path]) -> str:
|
|
46
|
+
"""Return diff of working-tree files vs HEAD (for on-demand analysis).
|
|
47
|
+
|
|
48
|
+
Falls back to full file content for untracked files.
|
|
49
|
+
"""
|
|
50
|
+
if not files:
|
|
51
|
+
return ""
|
|
52
|
+
|
|
53
|
+
parts: list[str] = []
|
|
54
|
+
for f in files:
|
|
55
|
+
rel = str(f.relative_to(self.repo_path))
|
|
56
|
+
# Try staged+unstaged combined (HEAD vs working tree)
|
|
57
|
+
try:
|
|
58
|
+
result = self._run_git(["diff", "HEAD", "--diff-filter=d", "--", rel])
|
|
59
|
+
if result.stdout.strip():
|
|
60
|
+
parts.append(result.stdout)
|
|
61
|
+
continue
|
|
62
|
+
except subprocess.CalledProcessError:
|
|
63
|
+
pass
|
|
64
|
+
|
|
65
|
+
# Untracked / new file — show full content as an addition diff
|
|
66
|
+
try:
|
|
67
|
+
content = f.read_text(encoding="utf-8", errors="replace")
|
|
68
|
+
added = "\n".join(f"+{line}" for line in content.splitlines())
|
|
69
|
+
header = f"diff --git a/{rel} b/{rel}\nnew file mode 100644\n--- /dev/null\n+++ b/{rel}\n"
|
|
70
|
+
parts.append(header + added)
|
|
71
|
+
except OSError:
|
|
72
|
+
pass
|
|
73
|
+
|
|
74
|
+
diff = "\n".join(parts)
|
|
75
|
+
if len(diff) > MAX_DIFF_CHARS:
|
|
76
|
+
diff = diff[:MAX_DIFF_CHARS] + "\n\n[... diff truncated for token limit ...]"
|
|
77
|
+
return diff
|
|
78
|
+
|
|
79
|
+
def _run_git(self, args: list[str]) -> subprocess.CompletedProcess[str]:
|
|
80
|
+
return subprocess.run(
|
|
81
|
+
["git", "-C", str(self.repo_path)] + args,
|
|
82
|
+
capture_output=True,
|
|
83
|
+
text=True,
|
|
84
|
+
check=True,
|
|
85
|
+
)
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
"""Main entrypoint for commit-defender container."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import sys
|
|
7
|
+
import time
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def run() -> int:
|
|
12
|
+
"""Execute the full pre-commit validation pipeline. Returns exit code."""
|
|
13
|
+
|
|
14
|
+
# ── Early diagnostics — printed before any heavy imports ──────────────────
|
|
15
|
+
# These appear in the VS Code output channel so users can debug path issues.
|
|
16
|
+
cd_repo = os.environ.get("CD_REPO_PATH", "(not set — will use cwd)")
|
|
17
|
+
cd_files = os.environ.get("CD_TARGET_FILES") or os.environ.get("CD_STAGED_FILES") or "(not set)"
|
|
18
|
+
cd_json = os.environ.get("CD_JSON", "0")
|
|
19
|
+
file_count = len([l for l in cd_files.splitlines() if l.strip()]) if cd_files != "(not set)" else 0
|
|
20
|
+
print(
|
|
21
|
+
f"[commit-defender] repo={cd_repo} files={file_count} json={cd_json}",
|
|
22
|
+
file=sys.stderr, flush=True,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
from .ai_agent import AIReviewAgent
|
|
26
|
+
from .config import load_config
|
|
27
|
+
from .diff_extractor import DiffExtractor
|
|
28
|
+
from .exit_resolver import ExitCodeResolver
|
|
29
|
+
from .linters import build_linters
|
|
30
|
+
from .models import Report
|
|
31
|
+
from .settings import load_settings
|
|
32
|
+
from .staged_files import StagedFilesReader
|
|
33
|
+
|
|
34
|
+
settings = load_settings()
|
|
35
|
+
repo_path = Path(settings.repo_path)
|
|
36
|
+
dry_run = settings.dry_run
|
|
37
|
+
|
|
38
|
+
config = load_config(repo_path)
|
|
39
|
+
|
|
40
|
+
reader = StagedFilesReader(repo_path, config, settings)
|
|
41
|
+
staged = reader.read()
|
|
42
|
+
|
|
43
|
+
if not staged:
|
|
44
|
+
target_src = "CD_TARGET_FILES" if settings.cd_target_files else "CD_STAGED_FILES"
|
|
45
|
+
print(
|
|
46
|
+
f"[commit-defender] No files matched for analysis "
|
|
47
|
+
f"(repo={repo_path}, source={target_src}).",
|
|
48
|
+
file=sys.stderr, flush=True,
|
|
49
|
+
)
|
|
50
|
+
if settings.json_mode:
|
|
51
|
+
from .json_renderer import JsonRenderer
|
|
52
|
+
from .models import ReviewResult
|
|
53
|
+
empty_report = Report(
|
|
54
|
+
staged_files=[],
|
|
55
|
+
lint_findings=[],
|
|
56
|
+
review=ReviewResult(summary="No files matched for analysis."),
|
|
57
|
+
duration_ms=0,
|
|
58
|
+
)
|
|
59
|
+
JsonRenderer().render(empty_report, exit_code=0, repo_path=str(repo_path))
|
|
60
|
+
return 0
|
|
61
|
+
|
|
62
|
+
# Resolve analysis mode: env var > settings.json > default
|
|
63
|
+
mode = (settings.cd_analysis_mode.strip() or config.review_settings.analysisMode)
|
|
64
|
+
run_linters = mode in ("hybrid", "rule-based")
|
|
65
|
+
run_ai = mode in ("hybrid", "ai-powered")
|
|
66
|
+
|
|
67
|
+
start = time.monotonic()
|
|
68
|
+
|
|
69
|
+
# Static analysis (skipped in ai-powered mode)
|
|
70
|
+
lint_findings = []
|
|
71
|
+
if run_linters:
|
|
72
|
+
by_lang = reader.by_language(staged)
|
|
73
|
+
for lang, files in by_lang.items():
|
|
74
|
+
linter_cfg = getattr(config.linters, lang, None)
|
|
75
|
+
if linter_cfg is None or not linter_cfg.enabled:
|
|
76
|
+
continue
|
|
77
|
+
linters = build_linters(lang, linter_cfg)
|
|
78
|
+
for linter in linters:
|
|
79
|
+
lint_findings.extend(linter.run(files))
|
|
80
|
+
|
|
81
|
+
# Diff extraction — use working-tree diff when files were passed explicitly
|
|
82
|
+
diff_extractor = DiffExtractor(repo_path)
|
|
83
|
+
if settings.cd_target_files:
|
|
84
|
+
full_diff = diff_extractor.get_working_diff(staged)
|
|
85
|
+
else:
|
|
86
|
+
full_diff = diff_extractor.get_full_diff(staged)
|
|
87
|
+
|
|
88
|
+
# AI review (skipped in rule-based mode)
|
|
89
|
+
if run_ai:
|
|
90
|
+
ai_agent = AIReviewAgent(config.ai_review, full_config=config)
|
|
91
|
+
review = ai_agent.review(full_diff, lint_findings, repo_path=repo_path)
|
|
92
|
+
else:
|
|
93
|
+
from .models import ReviewResult
|
|
94
|
+
review = ReviewResult(summary="AI review disabled (rule-based mode).", blocking=False)
|
|
95
|
+
|
|
96
|
+
duration_ms = int((time.monotonic() - start) * 1000)
|
|
97
|
+
|
|
98
|
+
report = Report(
|
|
99
|
+
staged_files=[str(f.relative_to(repo_path)) for f in staged],
|
|
100
|
+
lint_findings=lint_findings,
|
|
101
|
+
review=review,
|
|
102
|
+
duration_ms=duration_ms,
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
resolver = ExitCodeResolver(config)
|
|
106
|
+
exit_code = resolver.resolve(report) if not dry_run else 0
|
|
107
|
+
|
|
108
|
+
# ANSI report always goes to stderr (visible in terminal and hook output)
|
|
109
|
+
from .renderer import ReportRenderer
|
|
110
|
+
ReportRenderer().render(report, blocked=(exit_code == 1))
|
|
111
|
+
|
|
112
|
+
# JSON output goes to stdout when requested (consumed by VS Code extension / CI)
|
|
113
|
+
if settings.json_mode:
|
|
114
|
+
from .json_renderer import JsonRenderer
|
|
115
|
+
JsonRenderer().render(report, exit_code, repo_path=str(repo_path))
|
|
116
|
+
|
|
117
|
+
return exit_code
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def cli() -> None:
|
|
121
|
+
"""CLI entry point (used by installer for direct invocation)."""
|
|
122
|
+
sys.exit(run())
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
if __name__ == "__main__":
|
|
126
|
+
cli()
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""Resolve the final exit code from a Report."""
|
|
2
|
+
|
|
3
|
+
from .config import Config
|
|
4
|
+
from .models import Report
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class ExitCodeResolver:
|
|
8
|
+
def __init__(self, config: Config) -> None:
|
|
9
|
+
self.config = config
|
|
10
|
+
|
|
11
|
+
def resolve(self, report: Report) -> int:
|
|
12
|
+
"""Return 0 (pass) or 1 (block)."""
|
|
13
|
+
# API / infrastructure errors only block when ai_review is a hard gate.
|
|
14
|
+
# When ai_review.blocking=false the review is advisory — a network hiccup
|
|
15
|
+
# must not prevent a commit from landing.
|
|
16
|
+
if report.review.is_error and self.config.ai_review.blocking:
|
|
17
|
+
return 1
|
|
18
|
+
|
|
19
|
+
# Lint findings at or above the configured threshold always block.
|
|
20
|
+
blocking_findings = report.findings_at_or_above(self.config.blocking_severity)
|
|
21
|
+
if blocking_findings:
|
|
22
|
+
return 1
|
|
23
|
+
|
|
24
|
+
# AI review blocks only when both the config gate and the AI say so.
|
|
25
|
+
if self.config.ai_review.blocking and report.review.blocking:
|
|
26
|
+
return 1
|
|
27
|
+
|
|
28
|
+
return 0
|