cognify-code 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_code_assistant/__init__.py +14 -0
- ai_code_assistant/agent/__init__.py +63 -0
- ai_code_assistant/agent/code_agent.py +461 -0
- ai_code_assistant/agent/code_generator.py +388 -0
- ai_code_assistant/agent/code_reviewer.py +365 -0
- ai_code_assistant/agent/diff_engine.py +308 -0
- ai_code_assistant/agent/file_manager.py +300 -0
- ai_code_assistant/agent/intent_classifier.py +284 -0
- ai_code_assistant/chat/__init__.py +11 -0
- ai_code_assistant/chat/agent_session.py +156 -0
- ai_code_assistant/chat/session.py +165 -0
- ai_code_assistant/cli.py +1571 -0
- ai_code_assistant/config.py +149 -0
- ai_code_assistant/editor/__init__.py +8 -0
- ai_code_assistant/editor/diff_handler.py +270 -0
- ai_code_assistant/editor/file_editor.py +350 -0
- ai_code_assistant/editor/prompts.py +146 -0
- ai_code_assistant/generator/__init__.py +7 -0
- ai_code_assistant/generator/code_gen.py +265 -0
- ai_code_assistant/generator/prompts.py +114 -0
- ai_code_assistant/git/__init__.py +6 -0
- ai_code_assistant/git/commit_generator.py +130 -0
- ai_code_assistant/git/manager.py +203 -0
- ai_code_assistant/llm.py +111 -0
- ai_code_assistant/providers/__init__.py +23 -0
- ai_code_assistant/providers/base.py +124 -0
- ai_code_assistant/providers/cerebras.py +97 -0
- ai_code_assistant/providers/factory.py +148 -0
- ai_code_assistant/providers/google.py +103 -0
- ai_code_assistant/providers/groq.py +111 -0
- ai_code_assistant/providers/ollama.py +86 -0
- ai_code_assistant/providers/openai.py +114 -0
- ai_code_assistant/providers/openrouter.py +130 -0
- ai_code_assistant/py.typed +0 -0
- ai_code_assistant/refactor/__init__.py +20 -0
- ai_code_assistant/refactor/analyzer.py +189 -0
- ai_code_assistant/refactor/change_plan.py +172 -0
- ai_code_assistant/refactor/multi_file_editor.py +346 -0
- ai_code_assistant/refactor/prompts.py +175 -0
- ai_code_assistant/retrieval/__init__.py +19 -0
- ai_code_assistant/retrieval/chunker.py +215 -0
- ai_code_assistant/retrieval/indexer.py +236 -0
- ai_code_assistant/retrieval/search.py +239 -0
- ai_code_assistant/reviewer/__init__.py +7 -0
- ai_code_assistant/reviewer/analyzer.py +278 -0
- ai_code_assistant/reviewer/prompts.py +113 -0
- ai_code_assistant/utils/__init__.py +18 -0
- ai_code_assistant/utils/file_handler.py +155 -0
- ai_code_assistant/utils/formatters.py +259 -0
- cognify_code-0.2.0.dist-info/METADATA +383 -0
- cognify_code-0.2.0.dist-info/RECORD +55 -0
- cognify_code-0.2.0.dist-info/WHEEL +5 -0
- cognify_code-0.2.0.dist-info/entry_points.txt +3 -0
- cognify_code-0.2.0.dist-info/licenses/LICENSE +22 -0
- cognify_code-0.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
"""Code analyzer for reviewing code files."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import re
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Dict, List, Optional
|
|
8
|
+
|
|
9
|
+
from ai_code_assistant.config import Config, get_language_by_extension
|
|
10
|
+
from ai_code_assistant.llm import LLMManager
|
|
11
|
+
from ai_code_assistant.reviewer.prompts import REVIEW_PROMPTS
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# Retry prompt for when JSON parsing fails
|
|
15
|
+
RETRY_JSON_PROMPT = """Your previous response was not valid JSON. Please provide ONLY valid JSON output.
|
|
16
|
+
|
|
17
|
+
Here is the code to review again:
|
|
18
|
+
```{language}
|
|
19
|
+
{code}
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
Return ONLY a JSON object with this structure:
|
|
23
|
+
{{
|
|
24
|
+
"summary": "Brief summary",
|
|
25
|
+
"issues": [
|
|
26
|
+
{{
|
|
27
|
+
"line_start": 1,
|
|
28
|
+
"line_end": 1,
|
|
29
|
+
"category": "bugs|security|performance|style|best_practices",
|
|
30
|
+
"severity": "critical|warning|suggestion",
|
|
31
|
+
"title": "Short title",
|
|
32
|
+
"description": "What's wrong",
|
|
33
|
+
"suggestion": "How to fix it",
|
|
34
|
+
"confidence": 0.8
|
|
35
|
+
}}
|
|
36
|
+
],
|
|
37
|
+
"overall_quality": "good|acceptable|needs_improvement|poor"
|
|
38
|
+
}}
|
|
39
|
+
|
|
40
|
+
Return ONLY the JSON, no other text."""
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class ReviewIssue:
|
|
45
|
+
"""Represents a single code review issue."""
|
|
46
|
+
line_start: int
|
|
47
|
+
line_end: int
|
|
48
|
+
category: str
|
|
49
|
+
severity: str
|
|
50
|
+
title: str
|
|
51
|
+
description: str
|
|
52
|
+
suggestion: str
|
|
53
|
+
code_snippet: str = ""
|
|
54
|
+
fixed_code: str = ""
|
|
55
|
+
confidence: float = 0.0
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@dataclass
|
|
59
|
+
class ReviewResult:
|
|
60
|
+
"""Complete review result for a file."""
|
|
61
|
+
filename: str
|
|
62
|
+
language: str
|
|
63
|
+
summary: str
|
|
64
|
+
issues: List[ReviewIssue] = field(default_factory=list)
|
|
65
|
+
metrics: Dict = field(default_factory=dict)
|
|
66
|
+
overall_quality: str = "unknown"
|
|
67
|
+
raw_response: str = ""
|
|
68
|
+
error: Optional[str] = None
|
|
69
|
+
|
|
70
|
+
@property
|
|
71
|
+
def critical_issues(self) -> List[ReviewIssue]:
|
|
72
|
+
return [i for i in self.issues if i.severity == "critical"]
|
|
73
|
+
|
|
74
|
+
@property
|
|
75
|
+
def warnings(self) -> List[ReviewIssue]:
|
|
76
|
+
return [i for i in self.issues if i.severity == "warning"]
|
|
77
|
+
|
|
78
|
+
@property
|
|
79
|
+
def suggestions(self) -> List[ReviewIssue]:
|
|
80
|
+
return [i for i in self.issues if i.severity == "suggestion"]
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class CodeAnalyzer:
|
|
84
|
+
"""Analyzes code files for issues and improvements."""
|
|
85
|
+
|
|
86
|
+
def __init__(self, config: Config, llm_manager: LLMManager):
|
|
87
|
+
self.config = config
|
|
88
|
+
self.llm = llm_manager
|
|
89
|
+
|
|
90
|
+
def review_file(
|
|
91
|
+
self,
|
|
92
|
+
file_path: Path,
|
|
93
|
+
review_type: str = "full",
|
|
94
|
+
categories: Optional[List[str]] = None,
|
|
95
|
+
) -> ReviewResult:
|
|
96
|
+
"""Review a single code file."""
|
|
97
|
+
if not file_path.exists():
|
|
98
|
+
return ReviewResult(
|
|
99
|
+
filename=str(file_path),
|
|
100
|
+
language="unknown",
|
|
101
|
+
summary="",
|
|
102
|
+
error=f"File not found: {file_path}"
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
# Check file size
|
|
106
|
+
file_size_kb = file_path.stat().st_size / 1024
|
|
107
|
+
if file_size_kb > self.config.review.max_file_size_kb:
|
|
108
|
+
return ReviewResult(
|
|
109
|
+
filename=str(file_path),
|
|
110
|
+
language="unknown",
|
|
111
|
+
summary="",
|
|
112
|
+
error=f"File too large: {file_size_kb:.1f}KB (max: {self.config.review.max_file_size_kb}KB)"
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
# Detect language
|
|
116
|
+
language = get_language_by_extension(self.config, file_path)
|
|
117
|
+
if not language:
|
|
118
|
+
language = "unknown"
|
|
119
|
+
|
|
120
|
+
# Read code
|
|
121
|
+
code = file_path.read_text()
|
|
122
|
+
|
|
123
|
+
# Use configured categories if not specified
|
|
124
|
+
if categories is None:
|
|
125
|
+
categories = self.config.review.categories
|
|
126
|
+
|
|
127
|
+
return self._analyze_code(
|
|
128
|
+
code=code,
|
|
129
|
+
filename=str(file_path),
|
|
130
|
+
language=language,
|
|
131
|
+
review_type=review_type,
|
|
132
|
+
categories=categories,
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
def review_code(
|
|
136
|
+
self,
|
|
137
|
+
code: str,
|
|
138
|
+
language: str = "python",
|
|
139
|
+
filename: str = "code_snippet",
|
|
140
|
+
review_type: str = "full",
|
|
141
|
+
categories: Optional[List[str]] = None,
|
|
142
|
+
) -> ReviewResult:
|
|
143
|
+
"""Review a code string directly."""
|
|
144
|
+
if categories is None:
|
|
145
|
+
categories = self.config.review.categories
|
|
146
|
+
|
|
147
|
+
return self._analyze_code(
|
|
148
|
+
code=code,
|
|
149
|
+
filename=filename,
|
|
150
|
+
language=language,
|
|
151
|
+
review_type=review_type,
|
|
152
|
+
categories=categories,
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
def _analyze_code(
|
|
156
|
+
self,
|
|
157
|
+
code: str,
|
|
158
|
+
filename: str,
|
|
159
|
+
language: str,
|
|
160
|
+
review_type: str,
|
|
161
|
+
categories: List[str],
|
|
162
|
+
max_retries: int = 2,
|
|
163
|
+
) -> ReviewResult:
|
|
164
|
+
"""Internal method to analyze code using LLM with retry logic."""
|
|
165
|
+
prompt_template = REVIEW_PROMPTS.get(review_type, REVIEW_PROMPTS["full"])
|
|
166
|
+
|
|
167
|
+
try:
|
|
168
|
+
response = self.llm.invoke_with_template(
|
|
169
|
+
prompt_template,
|
|
170
|
+
code=code,
|
|
171
|
+
language=language,
|
|
172
|
+
filename=filename,
|
|
173
|
+
categories=", ".join(categories),
|
|
174
|
+
)
|
|
175
|
+
result = self._parse_review_response(response, filename, language)
|
|
176
|
+
|
|
177
|
+
# Retry if parsing failed
|
|
178
|
+
retries = 0
|
|
179
|
+
while result.error and "Parse error" in result.error and retries < max_retries:
|
|
180
|
+
retries += 1
|
|
181
|
+
retry_prompt = RETRY_JSON_PROMPT.format(language=language, code=code)
|
|
182
|
+
response = self.llm.invoke(retry_prompt)
|
|
183
|
+
result = self._parse_review_response(response, filename, language)
|
|
184
|
+
|
|
185
|
+
return result
|
|
186
|
+
except Exception as e:
|
|
187
|
+
return ReviewResult(
|
|
188
|
+
filename=filename,
|
|
189
|
+
language=language,
|
|
190
|
+
summary="",
|
|
191
|
+
error=f"Analysis failed: {str(e)}",
|
|
192
|
+
raw_response=""
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
def _extract_json(self, response: str) -> str:
|
|
196
|
+
"""Extract JSON from LLM response, handling various formats."""
|
|
197
|
+
# Try to find JSON in code blocks
|
|
198
|
+
if "```json" in response:
|
|
199
|
+
match = re.search(r'```json\s*(.*?)\s*```', response, re.DOTALL)
|
|
200
|
+
if match:
|
|
201
|
+
return match.group(1)
|
|
202
|
+
|
|
203
|
+
if "```" in response:
|
|
204
|
+
match = re.search(r'```\s*(.*?)\s*```', response, re.DOTALL)
|
|
205
|
+
if match:
|
|
206
|
+
return match.group(1)
|
|
207
|
+
|
|
208
|
+
# Try to find JSON object directly
|
|
209
|
+
match = re.search(r'\{.*\}', response, re.DOTALL)
|
|
210
|
+
if match:
|
|
211
|
+
return match.group(0)
|
|
212
|
+
|
|
213
|
+
return response
|
|
214
|
+
|
|
215
|
+
def _repair_json(self, json_str: str) -> str:
|
|
216
|
+
"""Attempt to repair common JSON issues."""
|
|
217
|
+
# Remove trailing commas before } or ]
|
|
218
|
+
json_str = re.sub(r',\s*([}\]])', r'\1', json_str)
|
|
219
|
+
|
|
220
|
+
# Fix unquoted keys (simple cases)
|
|
221
|
+
json_str = re.sub(r'(\{|\,)\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*:', r'\1"\2":', json_str)
|
|
222
|
+
|
|
223
|
+
# Fix single quotes to double quotes
|
|
224
|
+
json_str = json_str.replace("'", '"')
|
|
225
|
+
|
|
226
|
+
# Remove comments
|
|
227
|
+
json_str = re.sub(r'//.*$', '', json_str, flags=re.MULTILINE)
|
|
228
|
+
|
|
229
|
+
return json_str
|
|
230
|
+
|
|
231
|
+
def _parse_review_response(
|
|
232
|
+
self, response: str, filename: str, language: str
|
|
233
|
+
) -> ReviewResult:
|
|
234
|
+
"""Parse LLM response into ReviewResult."""
|
|
235
|
+
try:
|
|
236
|
+
json_str = self._extract_json(response)
|
|
237
|
+
|
|
238
|
+
try:
|
|
239
|
+
data = json.loads(json_str.strip())
|
|
240
|
+
except json.JSONDecodeError:
|
|
241
|
+
# Try to repair JSON
|
|
242
|
+
repaired = self._repair_json(json_str)
|
|
243
|
+
data = json.loads(repaired.strip())
|
|
244
|
+
|
|
245
|
+
issues = [
|
|
246
|
+
ReviewIssue(
|
|
247
|
+
line_start=i.get("line_start", i.get("line", 0)),
|
|
248
|
+
line_end=i.get("line_end", i.get("line", 0)),
|
|
249
|
+
category=i.get("category", "unknown"),
|
|
250
|
+
severity=i.get("severity", "suggestion"),
|
|
251
|
+
title=i.get("title", "Issue"),
|
|
252
|
+
description=i.get("description", ""),
|
|
253
|
+
suggestion=i.get("suggestion", i.get("fix", "")),
|
|
254
|
+
code_snippet=i.get("code_snippet", ""),
|
|
255
|
+
fixed_code=i.get("fixed_code", ""),
|
|
256
|
+
confidence=float(i.get("confidence", 0.5)),
|
|
257
|
+
)
|
|
258
|
+
for i in data.get("issues", data.get("critical_issues", []))
|
|
259
|
+
]
|
|
260
|
+
|
|
261
|
+
return ReviewResult(
|
|
262
|
+
filename=filename,
|
|
263
|
+
language=language,
|
|
264
|
+
summary=data.get("summary", "Review complete"),
|
|
265
|
+
issues=issues,
|
|
266
|
+
metrics=data.get("metrics", {}),
|
|
267
|
+
overall_quality=data.get("overall_quality", "unknown"),
|
|
268
|
+
raw_response=response,
|
|
269
|
+
)
|
|
270
|
+
except (json.JSONDecodeError, KeyError) as e:
|
|
271
|
+
return ReviewResult(
|
|
272
|
+
filename=filename,
|
|
273
|
+
language=language,
|
|
274
|
+
summary="Failed to parse review response",
|
|
275
|
+
raw_response=response,
|
|
276
|
+
error=f"Parse error: {str(e)}"
|
|
277
|
+
)
|
|
278
|
+
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
"""Prompt templates for code review."""
|
|
2
|
+
|
|
3
|
+
from langchain_core.prompts import ChatPromptTemplate
|
|
4
|
+
|
|
5
|
+
# System prompt for code review
|
|
6
|
+
REVIEW_SYSTEM_PROMPT = """You are an expert code reviewer with deep knowledge of software engineering best practices, security vulnerabilities, and performance optimization.
|
|
7
|
+
|
|
8
|
+
Your task is to analyze code and provide detailed, actionable feedback. For each issue found:
|
|
9
|
+
1. Identify the specific line number(s)
|
|
10
|
+
2. Categorize the issue (bugs, security, performance, style, best_practices)
|
|
11
|
+
3. Assign a severity level (critical, warning, suggestion)
|
|
12
|
+
4. Explain the problem clearly
|
|
13
|
+
5. Provide a specific fix or improvement
|
|
14
|
+
6. Give a confidence score (0.0-1.0)
|
|
15
|
+
|
|
16
|
+
Be thorough but practical. Focus on real issues, not pedantic nitpicks.
|
|
17
|
+
Format your response as structured JSON."""
|
|
18
|
+
|
|
19
|
+
# Main review prompt template
|
|
20
|
+
REVIEW_PROMPT = ChatPromptTemplate.from_messages([
|
|
21
|
+
("system", REVIEW_SYSTEM_PROMPT),
|
|
22
|
+
("human", """Review the following {language} code:
|
|
23
|
+
|
|
24
|
+
```{language}
|
|
25
|
+
{code}
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
File: {filename}
|
|
29
|
+
|
|
30
|
+
Analyze for the following categories: {categories}
|
|
31
|
+
|
|
32
|
+
Respond with a JSON object in this exact format:
|
|
33
|
+
{{
|
|
34
|
+
"summary": "Brief overall assessment",
|
|
35
|
+
"issues": [
|
|
36
|
+
{{
|
|
37
|
+
"line_start": 10,
|
|
38
|
+
"line_end": 12,
|
|
39
|
+
"category": "security",
|
|
40
|
+
"severity": "critical",
|
|
41
|
+
"title": "SQL Injection Vulnerability",
|
|
42
|
+
"description": "User input is directly concatenated into SQL query",
|
|
43
|
+
"suggestion": "Use parameterized queries instead",
|
|
44
|
+
"code_snippet": "the problematic code",
|
|
45
|
+
"fixed_code": "the corrected code",
|
|
46
|
+
"confidence": 0.95
|
|
47
|
+
}}
|
|
48
|
+
],
|
|
49
|
+
"metrics": {{
|
|
50
|
+
"total_lines": 100,
|
|
51
|
+
"issues_count": 5,
|
|
52
|
+
"critical_count": 1,
|
|
53
|
+
"warning_count": 2,
|
|
54
|
+
"suggestion_count": 2
|
|
55
|
+
}},
|
|
56
|
+
"overall_quality": "good|acceptable|needs_improvement|poor"
|
|
57
|
+
}}""")
|
|
58
|
+
])
|
|
59
|
+
|
|
60
|
+
# Quick review prompt (faster, less detailed)
|
|
61
|
+
QUICK_REVIEW_PROMPT = ChatPromptTemplate.from_messages([
|
|
62
|
+
("system", "You are a code reviewer. Provide a brief review focusing on critical issues only."),
|
|
63
|
+
("human", """Quickly review this {language} code for critical bugs and security issues:
|
|
64
|
+
|
|
65
|
+
```{language}
|
|
66
|
+
{code}
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
List only critical issues in this JSON format:
|
|
70
|
+
{{
|
|
71
|
+
"critical_issues": [
|
|
72
|
+
{{"line": 10, "issue": "description", "fix": "suggested fix"}}
|
|
73
|
+
],
|
|
74
|
+
"safe_to_use": true/false
|
|
75
|
+
}}""")
|
|
76
|
+
])
|
|
77
|
+
|
|
78
|
+
# Security-focused review prompt
|
|
79
|
+
SECURITY_REVIEW_PROMPT = ChatPromptTemplate.from_messages([
|
|
80
|
+
("system", """You are a security expert reviewing code for vulnerabilities.
|
|
81
|
+
Focus on: SQL injection, XSS, CSRF, authentication issues, sensitive data exposure,
|
|
82
|
+
insecure dependencies, improper error handling, and other OWASP Top 10 risks."""),
|
|
83
|
+
("human", """Perform a security audit on this {language} code:
|
|
84
|
+
|
|
85
|
+
```{language}
|
|
86
|
+
{code}
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Return JSON with security findings:
|
|
90
|
+
{{
|
|
91
|
+
"vulnerabilities": [
|
|
92
|
+
{{
|
|
93
|
+
"line": 10,
|
|
94
|
+
"cwe_id": "CWE-89",
|
|
95
|
+
"severity": "critical|high|medium|low",
|
|
96
|
+
"title": "SQL Injection",
|
|
97
|
+
"description": "detailed description",
|
|
98
|
+
"remediation": "how to fix",
|
|
99
|
+
"confidence": 0.9
|
|
100
|
+
}}
|
|
101
|
+
],
|
|
102
|
+
"security_score": 0-100,
|
|
103
|
+
"recommendations": ["list of general security improvements"]
|
|
104
|
+
}}""")
|
|
105
|
+
])
|
|
106
|
+
|
|
107
|
+
# Collect all prompts
|
|
108
|
+
REVIEW_PROMPTS = {
|
|
109
|
+
"full": REVIEW_PROMPT,
|
|
110
|
+
"quick": QUICK_REVIEW_PROMPT,
|
|
111
|
+
"security": SECURITY_REVIEW_PROMPT,
|
|
112
|
+
}
|
|
113
|
+
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""Utilities module for AI Code Assistant."""
|
|
2
|
+
|
|
3
|
+
from ai_code_assistant.utils.file_handler import FileHandler
|
|
4
|
+
from ai_code_assistant.utils.formatters import (
|
|
5
|
+
ConsoleFormatter,
|
|
6
|
+
MarkdownFormatter,
|
|
7
|
+
JsonFormatter,
|
|
8
|
+
get_formatter,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"FileHandler",
|
|
13
|
+
"ConsoleFormatter",
|
|
14
|
+
"MarkdownFormatter",
|
|
15
|
+
"JsonFormatter",
|
|
16
|
+
"get_formatter",
|
|
17
|
+
]
|
|
18
|
+
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
"""File handling utilities for AI Code Assistant."""
|
|
2
|
+
|
|
3
|
+
import fnmatch
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Dict, Iterator, List, Optional, Tuple
|
|
6
|
+
|
|
7
|
+
from ai_code_assistant.config import Config, get_language_by_extension
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class FileHandler:
|
|
11
|
+
"""Handles file system operations for code files."""
|
|
12
|
+
|
|
13
|
+
# Common patterns to ignore
|
|
14
|
+
DEFAULT_IGNORE_PATTERNS = [
|
|
15
|
+
"*.pyc", "__pycache__", ".git", ".venv", "venv",
|
|
16
|
+
"node_modules", ".idea", ".vscode", "*.egg-info",
|
|
17
|
+
"dist", "build", ".pytest_cache", ".mypy_cache",
|
|
18
|
+
"*.min.js", "*.min.css", "*.map", ".env",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
def __init__(self, config: Config):
|
|
22
|
+
self.config = config
|
|
23
|
+
self.ignore_patterns = self.DEFAULT_IGNORE_PATTERNS.copy()
|
|
24
|
+
|
|
25
|
+
def add_ignore_pattern(self, pattern: str) -> None:
|
|
26
|
+
"""Add a pattern to ignore list."""
|
|
27
|
+
self.ignore_patterns.append(pattern)
|
|
28
|
+
|
|
29
|
+
def is_supported_file(self, file_path: Path) -> bool:
|
|
30
|
+
"""Check if file is a supported code file."""
|
|
31
|
+
return get_language_by_extension(self.config, file_path) is not None
|
|
32
|
+
|
|
33
|
+
def should_ignore(self, path: Path) -> bool:
|
|
34
|
+
"""Check if path should be ignored."""
|
|
35
|
+
path_str = str(path)
|
|
36
|
+
name = path.name
|
|
37
|
+
|
|
38
|
+
for pattern in self.ignore_patterns:
|
|
39
|
+
if fnmatch.fnmatch(name, pattern) or fnmatch.fnmatch(path_str, f"*/{pattern}/*"):
|
|
40
|
+
return True
|
|
41
|
+
return False
|
|
42
|
+
|
|
43
|
+
def find_code_files(
|
|
44
|
+
self,
|
|
45
|
+
directory: Path,
|
|
46
|
+
recursive: bool = True,
|
|
47
|
+
extensions: Optional[List[str]] = None,
|
|
48
|
+
) -> Iterator[Path]:
|
|
49
|
+
"""Find all code files in a directory."""
|
|
50
|
+
if not directory.is_dir():
|
|
51
|
+
return
|
|
52
|
+
|
|
53
|
+
pattern = "**/*" if recursive else "*"
|
|
54
|
+
|
|
55
|
+
for file_path in directory.glob(pattern):
|
|
56
|
+
if not file_path.is_file():
|
|
57
|
+
continue
|
|
58
|
+
|
|
59
|
+
if self.should_ignore(file_path):
|
|
60
|
+
continue
|
|
61
|
+
|
|
62
|
+
if extensions:
|
|
63
|
+
if file_path.suffix.lower() not in extensions:
|
|
64
|
+
continue
|
|
65
|
+
elif not self.is_supported_file(file_path):
|
|
66
|
+
continue
|
|
67
|
+
|
|
68
|
+
yield file_path
|
|
69
|
+
|
|
70
|
+
def read_file(self, file_path: Path) -> Tuple[str, Optional[str]]:
|
|
71
|
+
"""Read file content. Returns (content, error)."""
|
|
72
|
+
try:
|
|
73
|
+
if not file_path.exists():
|
|
74
|
+
return "", f"File not found: {file_path}"
|
|
75
|
+
|
|
76
|
+
if not file_path.is_file():
|
|
77
|
+
return "", f"Not a file: {file_path}"
|
|
78
|
+
|
|
79
|
+
# Check file size
|
|
80
|
+
file_size_kb = file_path.stat().st_size / 1024
|
|
81
|
+
max_size = self.config.review.max_file_size_kb
|
|
82
|
+
if file_size_kb > max_size:
|
|
83
|
+
return "", f"File too large: {file_size_kb:.1f}KB (max: {max_size}KB)"
|
|
84
|
+
|
|
85
|
+
content = file_path.read_text(encoding="utf-8")
|
|
86
|
+
return content, None
|
|
87
|
+
|
|
88
|
+
except UnicodeDecodeError:
|
|
89
|
+
return "", f"Cannot read file (binary or encoding issue): {file_path}"
|
|
90
|
+
except Exception as e:
|
|
91
|
+
return "", f"Error reading file: {e}"
|
|
92
|
+
|
|
93
|
+
def write_file(
|
|
94
|
+
self,
|
|
95
|
+
file_path: Path,
|
|
96
|
+
content: str,
|
|
97
|
+
create_dirs: bool = True,
|
|
98
|
+
backup: bool = False,
|
|
99
|
+
) -> Optional[str]:
|
|
100
|
+
"""Write content to file. Returns error message or None on success."""
|
|
101
|
+
try:
|
|
102
|
+
if create_dirs:
|
|
103
|
+
file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
104
|
+
|
|
105
|
+
if backup and file_path.exists():
|
|
106
|
+
backup_path = file_path.with_suffix(file_path.suffix + ".bak")
|
|
107
|
+
backup_path.write_text(file_path.read_text())
|
|
108
|
+
|
|
109
|
+
file_path.write_text(content, encoding="utf-8")
|
|
110
|
+
return None
|
|
111
|
+
|
|
112
|
+
except Exception as e:
|
|
113
|
+
return f"Error writing file: {e}"
|
|
114
|
+
|
|
115
|
+
def get_file_info(self, file_path: Path) -> dict:
|
|
116
|
+
"""Get information about a file."""
|
|
117
|
+
if not file_path.exists():
|
|
118
|
+
return {"exists": False}
|
|
119
|
+
|
|
120
|
+
stat = file_path.stat()
|
|
121
|
+
return {
|
|
122
|
+
"exists": True,
|
|
123
|
+
"size_bytes": stat.st_size,
|
|
124
|
+
"size_kb": stat.st_size / 1024,
|
|
125
|
+
"language": get_language_by_extension(self.config, file_path),
|
|
126
|
+
"extension": file_path.suffix,
|
|
127
|
+
"name": file_path.name,
|
|
128
|
+
"path": str(file_path.absolute()),
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
def batch_read(
|
|
132
|
+
self,
|
|
133
|
+
file_paths: List[Path],
|
|
134
|
+
) -> Dict[Path, Tuple[str, Optional[str]]]:
|
|
135
|
+
"""Read multiple files. Returns dict of path -> (content, error)."""
|
|
136
|
+
results = {}
|
|
137
|
+
for path in file_paths:
|
|
138
|
+
results[path] = self.read_file(path)
|
|
139
|
+
return results
|
|
140
|
+
|
|
141
|
+
def get_output_path(
|
|
142
|
+
self,
|
|
143
|
+
original_path: Path,
|
|
144
|
+
suffix: str = "",
|
|
145
|
+
output_dir: Optional[Path] = None,
|
|
146
|
+
) -> Path:
|
|
147
|
+
"""Generate output path for a file."""
|
|
148
|
+
if output_dir is None:
|
|
149
|
+
output_dir = Path(self.config.output.output_dir)
|
|
150
|
+
|
|
151
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
152
|
+
|
|
153
|
+
new_name = original_path.stem + suffix + original_path.suffix
|
|
154
|
+
return output_dir / new_name
|
|
155
|
+
|