secure-review 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- secure_review-1.0.0/PKG-INFO +3 -0
- secure_review-1.0.0/README.md +97 -0
- secure_review-1.0.0/backend/__init__.py +1 -0
- secure_review-1.0.0/backend/app/__init__.py +1 -0
- secure_review-1.0.0/backend/app/agents/__init__.py +1 -0
- secure_review-1.0.0/backend/app/agents/architecture.py +80 -0
- secure_review-1.0.0/backend/app/agents/base.py +35 -0
- secure_review-1.0.0/backend/app/agents/bug.py +80 -0
- secure_review-1.0.0/backend/app/agents/dependency.py +80 -0
- secure_review-1.0.0/backend/app/agents/documentation.py +118 -0
- secure_review-1.0.0/backend/app/agents/planner.py +66 -0
- secure_review-1.0.0/backend/app/agents/security.py +105 -0
- secure_review-1.0.0/backend/app/agents/summary.py +86 -0
- secure_review-1.0.0/backend/app/cli.py +486 -0
- secure_review-1.0.0/backend/app/config/__init__.py +1 -0
- secure_review-1.0.0/backend/app/config/config.py +31 -0
- secure_review-1.0.0/backend/app/dist/assets/index-BpXbLGUq.js +224 -0
- secure_review-1.0.0/backend/app/dist/assets/index-ah8QWuf1.css +1 -0
- secure_review-1.0.0/backend/app/dist/favicon.svg +1 -0
- secure_review-1.0.0/backend/app/dist/icons.svg +24 -0
- secure_review-1.0.0/backend/app/dist/index.html +14 -0
- secure_review-1.0.0/backend/app/graph/__init__.py +1 -0
- secure_review-1.0.0/backend/app/graph/workflow.py +190 -0
- secure_review-1.0.0/backend/app/main.py +294 -0
- secure_review-1.0.0/backend/app/models/__init__.py +1 -0
- secure_review-1.0.0/backend/app/models/models.py +46 -0
- secure_review-1.0.0/backend/app/routers/__init__.py +1 -0
- secure_review-1.0.0/backend/app/services/__init__.py +1 -0
- secure_review-1.0.0/backend/app/tools/__init__.py +1 -0
- secure_review-1.0.0/backend/app/tools/sast_tools.py +182 -0
- secure_review-1.0.0/backend/app/utils/__init__.py +1 -0
- secure_review-1.0.0/backend/app/utils/progress.py +32 -0
- secure_review-1.0.0/backend/app/utils/repo_scanner.py +138 -0
- secure_review-1.0.0/secure_review.egg-info/PKG-INFO +3 -0
- secure_review-1.0.0/secure_review.egg-info/SOURCES.txt +38 -0
- secure_review-1.0.0/secure_review.egg-info/dependency_links.txt +1 -0
- secure_review-1.0.0/secure_review.egg-info/entry_points.txt +2 -0
- secure_review-1.0.0/secure_review.egg-info/top_level.txt +1 -0
- secure_review-1.0.0/setup.cfg +4 -0
- secure_review-1.0.0/setup.py +20 -0
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
# AI Secure Review 🛡️
|
|
2
|
+
|
|
3
|
+
AI Secure Review is a powerful, open-source, multi-agent codebase auditor and security scanner. Using a **LangGraph-driven orchestration engine**, it distributes analysis workloads across specialized AI agents to scan local files for vulnerabilities, logical flaws, manifest dependencies, architectural patterns, and documentation gaps.
|
|
4
|
+
|
|
5
|
+
It includes an offline SAST engine (via Bandit) and launches a glowing, interactive React dashboard served directly from a built-in FastAPI backend.
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## 🚀 Key Features
|
|
10
|
+
|
|
11
|
+
* **Multi-Agent Orchestration**: Distributes codebase audits to specialized, parallel agents:
|
|
12
|
+
* 🔒 **Security Agent**: Scans for OWASP Top 10 vulnerabilities, hardcoded secrets, and injection flaws.
|
|
13
|
+
* 🐛 **Bug Agent**: Reviews code for logical errors, memory leaks, unclosed resource handles, and race conditions.
|
|
14
|
+
* 📦 **Dependency Agent**: Audits project manifest files (like `requirements.txt`, `package.json`) for outdated or insecure libraries.
|
|
15
|
+
* 🏛️ **Architecture Agent**: Evaluates structural coupling, cohesion, SOLID violations, and design patterns.
|
|
16
|
+
* 📝 **Documentation Agent**: Inspects docstrings, comments, and verifies repo `README.md` health.
|
|
17
|
+
* **Hybrid Analysis**: Combines lightning-fast offline SAST heuristics with deep LLM-based logical inspections.
|
|
18
|
+
* **Built-in React Dashboard**: Launches a modern, dark-themed UI served locally out of the Python package.
|
|
19
|
+
* **Descriptive Telemetry**: Displays which agent is currently running and exactly which files it is auditing.
|
|
20
|
+
* **Multi-Provider Support**: Compatible with Groq (GroqCloud), OpenAI, Google Gemini, Anthropic (Claude), Ollama (Local), and LM Studio.
|
|
21
|
+
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
## 📦 Installation
|
|
25
|
+
|
|
26
|
+
Install AI Secure Review via pip from your compiled wheel package:
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
pip install secure_review-1.0.0-py3-none-any.whl
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
### For Developers (Build from Source)
|
|
33
|
+
Clone the repository and compile the package automatically using our build script:
|
|
34
|
+
```bash
|
|
35
|
+
# Clone the repository
|
|
36
|
+
git clone https://github.com/your-username/security-cli.git
|
|
37
|
+
cd security-cli
|
|
38
|
+
|
|
39
|
+
# Run one-click compilation (requires Python build dependencies & npm)
|
|
40
|
+
.\build.bat
|
|
41
|
+
|
|
42
|
+
# Install the generated package
|
|
43
|
+
pip install dist\secure_review-1.0.0-py3-none-any.whl
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
---
|
|
47
|
+
|
|
48
|
+
## 🛠️ Commands Guide
|
|
49
|
+
|
|
50
|
+
AI Secure Review installs the global console command `secure-review`.
|
|
51
|
+
|
|
52
|
+
### 1. Doctor (System Diagnostics)
|
|
53
|
+
Verify your local environment dependencies (Git, Bandit) and confirm that your configured LLM provider has an active internet connection, correct credentials, and sufficient api limits:
|
|
54
|
+
```bash
|
|
55
|
+
secure-review doctor
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
### 2. Interactive Shell
|
|
59
|
+
Boot the core application:
|
|
60
|
+
```bash
|
|
61
|
+
secure-review
|
|
62
|
+
```
|
|
63
|
+
Executing this command automatically:
|
|
64
|
+
1. Starts the FastAPI backend daemon on port `8000`.
|
|
65
|
+
2. Launches your default web browser pointing to [http://localhost:8000/](http://localhost:8000/).
|
|
66
|
+
3. Enters an **interactive command console** in your terminal.
|
|
67
|
+
|
|
68
|
+
---
|
|
69
|
+
|
|
70
|
+
## 💻 Interactive Shell Commands
|
|
71
|
+
|
|
72
|
+
While inside the CLI shell, you can type `/help` to see all active console hooks:
|
|
73
|
+
|
|
74
|
+
| Command | Description |
|
|
75
|
+
| :--- | :--- |
|
|
76
|
+
| `/review [path]` | Scan a local codebase folder (default is current folder `.`) |
|
|
77
|
+
| `/dashboard` | Re-open the React web dashboard in your default browser |
|
|
78
|
+
| `/config` | Interactively configure active LLM providers and API keys |
|
|
79
|
+
| `/providers` | Query the current active LLM configurations and check connections |
|
|
80
|
+
| `/doctor` | Run system tools availability checks and connection tests |
|
|
81
|
+
| `/clear` | Clear the console screen |
|
|
82
|
+
| `/help` | Print out shell documentation |
|
|
83
|
+
| `/exit` or `/quit` | Terminate the interactive session and backend daemon |
|
|
84
|
+
|
|
85
|
+
---
|
|
86
|
+
|
|
87
|
+
## 💻 Web Dashboard Actions
|
|
88
|
+
When a scan is running, the React dashboard showcases:
|
|
89
|
+
* **Workflow Progression**: Live node graphs syncing with backend LangGraph runs.
|
|
90
|
+
* **Agent Telemetry**: Dynamic details explaining which agent is currently reading which file.
|
|
91
|
+
* **One-Click Rescan**: Retries scans with one click after you update local files or manifest dependencies.
|
|
92
|
+
* **Remediation Recommendation**: Technical explanations and code-fix suggestions displayed inside IDE syntax-themed blocks.
|
|
93
|
+
|
|
94
|
+
---
|
|
95
|
+
|
|
96
|
+
## 📄 License
|
|
97
|
+
Distributed under the MIT License. See `LICENSE` for details.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# backend package
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# AI Secure Review package
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Agents module
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import uuid
|
|
3
|
+
from typing import List, Dict, Any
|
|
4
|
+
from backend.app.agents.base import get_llm
|
|
5
|
+
from backend.app.utils.repo_scanner import get_file_content
|
|
6
|
+
from backend.app.models.models import Issue
|
|
7
|
+
|
|
8
|
+
def run_architecture_agent(repo_path: str, files_to_review: List[str]) -> List[Issue]:
|
|
9
|
+
if not files_to_review:
|
|
10
|
+
return []
|
|
11
|
+
|
|
12
|
+
llm = get_llm()
|
|
13
|
+
issues: List[Issue] = []
|
|
14
|
+
|
|
15
|
+
for file_rel in files_to_review:
|
|
16
|
+
file_content = get_file_content(repo_path, file_rel)
|
|
17
|
+
if not file_content:
|
|
18
|
+
continue
|
|
19
|
+
|
|
20
|
+
prompt = f"""You are an expert Software Architect.
|
|
21
|
+
Analyze the following source code file '{file_rel}' for architectural anti-patterns, design pattern violations, violation of separation of concerns, high coupling, low cohesion, or overly large and complex modules.
|
|
22
|
+
|
|
23
|
+
File Content:
|
|
24
|
+
```
|
|
25
|
+
{file_content}
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
Return your findings in JSON format inside a markdown JSON code block. If no architectural issues are found, return an empty array [].
|
|
29
|
+
Each finding must fit the following JSON schema:
|
|
30
|
+
[
|
|
31
|
+
{{
|
|
32
|
+
"line": 1, // usually 1 for architectural suggestions, or specific line
|
|
33
|
+
"severity": "MEDIUM", // CRITICAL, HIGH, MEDIUM, LOW
|
|
34
|
+
"title": "Architectural Issue: [Title]",
|
|
35
|
+
"description": "Short explanation of the architectural or structural flaw",
|
|
36
|
+
"explanation": "Detailed explanation of why this architectural choice scales poorly, affects maintainability, or violates standards.",
|
|
37
|
+
"suggested_fix": "Refactoring plan or structural suggestion.",
|
|
38
|
+
"confidence": "HIGH" // HIGH, MEDIUM, LOW
|
|
39
|
+
}}
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
Make sure to return valid JSON inside a ```json ``` block. Focus on structural cleanups and class/module design.
|
|
43
|
+
JSON Output:
|
|
44
|
+
"""
|
|
45
|
+
try:
|
|
46
|
+
response = llm.invoke(prompt)
|
|
47
|
+
content = response.content
|
|
48
|
+
|
|
49
|
+
# Extract JSON block
|
|
50
|
+
json_str = ""
|
|
51
|
+
if "```json" in content:
|
|
52
|
+
json_str = content.split("```json")[1].split("```")[0].strip()
|
|
53
|
+
elif "```" in content:
|
|
54
|
+
json_str = content.split("```")[1].split("```")[0].strip()
|
|
55
|
+
else:
|
|
56
|
+
json_str = content.strip()
|
|
57
|
+
|
|
58
|
+
findings = json.loads(json_str)
|
|
59
|
+
if isinstance(findings, list):
|
|
60
|
+
for f in findings:
|
|
61
|
+
line = f.get("line", 1)
|
|
62
|
+
title = f.get("title", "Architecture Finding")
|
|
63
|
+
|
|
64
|
+
issue_id = str(uuid.uuid5(uuid.NAMESPACE_DNS, f"architecture-{file_rel}-{line}-{title}"))
|
|
65
|
+
issues.append(Issue(
|
|
66
|
+
id=issue_id,
|
|
67
|
+
file=file_rel,
|
|
68
|
+
line=line,
|
|
69
|
+
severity=f.get("severity", "MEDIUM").upper(),
|
|
70
|
+
category="Architecture",
|
|
71
|
+
title=title,
|
|
72
|
+
description=f.get("description", ""),
|
|
73
|
+
explanation=f.get("explanation", ""),
|
|
74
|
+
suggested_fix=f.get("suggested_fix", ""),
|
|
75
|
+
confidence=f.get("confidence", "MEDIUM")
|
|
76
|
+
))
|
|
77
|
+
except Exception as e:
|
|
78
|
+
print(f"[Architecture Agent] Error analyzing {file_rel}: {e}")
|
|
79
|
+
|
|
80
|
+
return issues
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import Any
|
|
3
|
+
from langchain_openai import ChatOpenAI
|
|
4
|
+
from langchain_anthropic import ChatAnthropic
|
|
5
|
+
from backend.app.config.config import load_config
|
|
6
|
+
|
|
7
|
+
def get_llm() -> Any:
|
|
8
|
+
config = load_config()
|
|
9
|
+
provider = config.provider.lower()
|
|
10
|
+
api_key = config.api_key or "no-key-required"
|
|
11
|
+
|
|
12
|
+
if provider == "anthropic":
|
|
13
|
+
# Anthropic standard Claude models
|
|
14
|
+
return ChatAnthropic(
|
|
15
|
+
api_key=api_key,
|
|
16
|
+
model=config.model,
|
|
17
|
+
temperature=config.temperature,
|
|
18
|
+
)
|
|
19
|
+
else:
|
|
20
|
+
# ChatOpenAI supports OpenAI, Ollama, LM Studio, and Gemini
|
|
21
|
+
base_url = config.base_url
|
|
22
|
+
|
|
23
|
+
if provider == "openai" and (not base_url or "localhost" in base_url):
|
|
24
|
+
base_url = "https://api.openai.com/v1"
|
|
25
|
+
elif provider == "groq" and (not base_url or "localhost" in base_url):
|
|
26
|
+
base_url = "https://api.groq.com/openai/v1"
|
|
27
|
+
elif provider == "gemini" and (not base_url or "localhost" in base_url):
|
|
28
|
+
base_url = "https://generativelanguage.googleapis.com/v1beta/openai/"
|
|
29
|
+
|
|
30
|
+
return ChatOpenAI(
|
|
31
|
+
api_key=api_key,
|
|
32
|
+
base_url=base_url,
|
|
33
|
+
model=config.model,
|
|
34
|
+
temperature=config.temperature,
|
|
35
|
+
)
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import uuid
|
|
3
|
+
from typing import List, Dict, Any
|
|
4
|
+
from backend.app.agents.base import get_llm
|
|
5
|
+
from backend.app.utils.repo_scanner import get_file_content
|
|
6
|
+
from backend.app.models.models import Issue
|
|
7
|
+
|
|
8
|
+
def run_bug_agent(repo_path: str, files_to_review: List[str]) -> List[Issue]:
|
|
9
|
+
if not files_to_review:
|
|
10
|
+
return []
|
|
11
|
+
|
|
12
|
+
llm = get_llm()
|
|
13
|
+
issues: List[Issue] = []
|
|
14
|
+
|
|
15
|
+
for file_rel in files_to_review:
|
|
16
|
+
file_content = get_file_content(repo_path, file_rel)
|
|
17
|
+
if not file_content:
|
|
18
|
+
continue
|
|
19
|
+
|
|
20
|
+
prompt = f"""You are an expert Senior Code Quality Engineer.
|
|
21
|
+
Analyze the following source code file '{file_rel}' for bugs, logic issues, dead code, resource leaks (e.g. unclosed files, DB connections), exception handling anti-patterns (e.g. bare excepts, swallowed errors), race conditions, and performance bottlenecks.
|
|
22
|
+
|
|
23
|
+
File Content:
|
|
24
|
+
```
|
|
25
|
+
{file_content}
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
Return your findings in JSON format inside a markdown JSON code block. If no quality/logic issues are found, return an empty array [].
|
|
29
|
+
Each finding must fit the following JSON schema:
|
|
30
|
+
[
|
|
31
|
+
{{
|
|
32
|
+
"line": 42,
|
|
33
|
+
"severity": "HIGH", // CRITICAL, HIGH, MEDIUM, LOW
|
|
34
|
+
"title": "Bug/Quality Issue: [Title]",
|
|
35
|
+
"description": "Short explanation of the bug or code quality violation",
|
|
36
|
+
"explanation": "Detailed explanation of why this code is problematic and what runtime issues it could cause.",
|
|
37
|
+
"suggested_fix": "Refactored code snippet or exact steps to fix the issue.",
|
|
38
|
+
"confidence": "HIGH" // HIGH, MEDIUM, LOW
|
|
39
|
+
}}
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
Make sure to return valid JSON inside a ```json ``` block. Focus on code that is active, confusing, or logically incorrect.
|
|
43
|
+
JSON Output:
|
|
44
|
+
"""
|
|
45
|
+
try:
|
|
46
|
+
response = llm.invoke(prompt)
|
|
47
|
+
content = response.content
|
|
48
|
+
|
|
49
|
+
# Extract JSON block
|
|
50
|
+
json_str = ""
|
|
51
|
+
if "```json" in content:
|
|
52
|
+
json_str = content.split("```json")[1].split("```")[0].strip()
|
|
53
|
+
elif "```" in content:
|
|
54
|
+
json_str = content.split("```")[1].split("```")[0].strip()
|
|
55
|
+
else:
|
|
56
|
+
json_str = content.strip()
|
|
57
|
+
|
|
58
|
+
findings = json.loads(json_str)
|
|
59
|
+
if isinstance(findings, list):
|
|
60
|
+
for f in findings:
|
|
61
|
+
line = f.get("line", 1)
|
|
62
|
+
title = f.get("title", "Code Quality Finding")
|
|
63
|
+
|
|
64
|
+
issue_id = str(uuid.uuid5(uuid.NAMESPACE_DNS, f"bug-{file_rel}-{line}-{title}"))
|
|
65
|
+
issues.append(Issue(
|
|
66
|
+
id=issue_id,
|
|
67
|
+
file=file_rel,
|
|
68
|
+
line=line,
|
|
69
|
+
severity=f.get("severity", "MEDIUM").upper(),
|
|
70
|
+
category="Bug",
|
|
71
|
+
title=title,
|
|
72
|
+
description=f.get("description", ""),
|
|
73
|
+
explanation=f.get("explanation", ""),
|
|
74
|
+
suggested_fix=f.get("suggested_fix", ""),
|
|
75
|
+
confidence=f.get("confidence", "MEDIUM")
|
|
76
|
+
))
|
|
77
|
+
except Exception as e:
|
|
78
|
+
print(f"[Bug Agent] Error analyzing {file_rel}: {e}")
|
|
79
|
+
|
|
80
|
+
return issues
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import uuid
|
|
3
|
+
from typing import List, Dict, Any
|
|
4
|
+
from backend.app.agents.base import get_llm
|
|
5
|
+
from backend.app.utils.repo_scanner import get_file_content
|
|
6
|
+
from backend.app.models.models import Issue
|
|
7
|
+
|
|
8
|
+
def run_dependency_agent(repo_path: str, files_to_review: List[str]) -> List[Issue]:
|
|
9
|
+
if not files_to_review:
|
|
10
|
+
return []
|
|
11
|
+
|
|
12
|
+
llm = get_llm()
|
|
13
|
+
issues: List[Issue] = []
|
|
14
|
+
|
|
15
|
+
for file_rel in files_to_review:
|
|
16
|
+
file_content = get_file_content(repo_path, file_rel)
|
|
17
|
+
if not file_content:
|
|
18
|
+
continue
|
|
19
|
+
|
|
20
|
+
prompt = f"""You are an expert Dependency Security Engineer.
|
|
21
|
+
Analyze the following dependency manifest '{file_rel}' for security vulnerabilities (outdated packages, packages with known vulnerabilities/CVEs, malicious/typosquatted packages, deprecated packages).
|
|
22
|
+
|
|
23
|
+
File Content:
|
|
24
|
+
```
|
|
25
|
+
{file_content}
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
Return your findings in JSON format inside a markdown JSON code block. If no vulnerable dependencies are found, return an empty array [].
|
|
29
|
+
Each finding must fit the following JSON schema:
|
|
30
|
+
[
|
|
31
|
+
{{
|
|
32
|
+
"line": 12, // approximate line of the package in the file, or 1 if not clear
|
|
33
|
+
"severity": "HIGH", // CRITICAL, HIGH, MEDIUM, LOW
|
|
34
|
+
"title": "Vulnerable library: [package-name]",
|
|
35
|
+
"description": "Short explanation of the CVE/vulnerability or deprecation in this package",
|
|
36
|
+
"explanation": "Detailed explanation of why this library version is insecure, the CVE number if known, and the impact.",
|
|
37
|
+
"suggested_fix": "Command to upgrade the package or target version (e.g. 'Upgrade package-name to v1.2.3')",
|
|
38
|
+
"confidence": "HIGH" // HIGH, MEDIUM, LOW
|
|
39
|
+
}}
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
Make sure to return valid JSON inside a ```json ``` block. Focus on known public CVEs for packages (e.g. log4j, lodash, flask, django, axios, request).
|
|
43
|
+
JSON Output:
|
|
44
|
+
"""
|
|
45
|
+
try:
|
|
46
|
+
response = llm.invoke(prompt)
|
|
47
|
+
content = response.content
|
|
48
|
+
|
|
49
|
+
# Extract JSON block
|
|
50
|
+
json_str = ""
|
|
51
|
+
if "```json" in content:
|
|
52
|
+
json_str = content.split("```json")[1].split("```")[0].strip()
|
|
53
|
+
elif "```" in content:
|
|
54
|
+
json_str = content.split("```")[1].split("```")[0].strip()
|
|
55
|
+
else:
|
|
56
|
+
json_str = content.strip()
|
|
57
|
+
|
|
58
|
+
findings = json.loads(json_str)
|
|
59
|
+
if isinstance(findings, list):
|
|
60
|
+
for f in findings:
|
|
61
|
+
line = f.get("line", 1)
|
|
62
|
+
title = f.get("title", "Vulnerable Dependency")
|
|
63
|
+
|
|
64
|
+
issue_id = str(uuid.uuid5(uuid.NAMESPACE_DNS, f"dependency-{file_rel}-{line}-{title}"))
|
|
65
|
+
issues.append(Issue(
|
|
66
|
+
id=issue_id,
|
|
67
|
+
file=file_rel,
|
|
68
|
+
line=line,
|
|
69
|
+
severity=f.get("severity", "MEDIUM").upper(),
|
|
70
|
+
category="Dependency",
|
|
71
|
+
title=title,
|
|
72
|
+
description=f.get("description", ""),
|
|
73
|
+
explanation=f.get("explanation", ""),
|
|
74
|
+
suggested_fix=f.get("suggested_fix", ""),
|
|
75
|
+
confidence=f.get("confidence", "MEDIUM")
|
|
76
|
+
))
|
|
77
|
+
except Exception as e:
|
|
78
|
+
print(f"[Dependency Agent] Error analyzing {file_rel}: {e}")
|
|
79
|
+
|
|
80
|
+
return issues
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import uuid
|
|
3
|
+
from typing import List, Dict, Any
|
|
4
|
+
from backend.app.agents.base import get_llm
|
|
5
|
+
from backend.app.utils.repo_scanner import get_file_content
|
|
6
|
+
from backend.app.models.models import Issue
|
|
7
|
+
|
|
8
|
+
def run_documentation_agent(repo_path: str, files_to_review: List[str]) -> List[Issue]:
|
|
9
|
+
issues: List[Issue] = []
|
|
10
|
+
|
|
11
|
+
# Check if a README.md exists in the root or subfolders
|
|
12
|
+
import os
|
|
13
|
+
readme_exists = False
|
|
14
|
+
for root, dirs, files in os.walk(repo_path):
|
|
15
|
+
# Prevent searching inside ignored directories
|
|
16
|
+
dirs[:] = [d for d in dirs if d not in (".git", "venv", "node_modules", ".pytest_cache", "__pycache__")]
|
|
17
|
+
if any(f.lower() == "readme.md" for f in files):
|
|
18
|
+
readme_exists = True
|
|
19
|
+
break
|
|
20
|
+
|
|
21
|
+
if not readme_exists:
|
|
22
|
+
issue_id = str(uuid.uuid5(uuid.NAMESPACE_DNS, f"documentation-missing-readme-{repo_path}"))
|
|
23
|
+
issues.append(Issue(
|
|
24
|
+
id=issue_id,
|
|
25
|
+
file="README.md",
|
|
26
|
+
line=1,
|
|
27
|
+
severity="HIGH",
|
|
28
|
+
category="Documentation",
|
|
29
|
+
title="Missing Repository Documentation (README.md)",
|
|
30
|
+
description="The repository does not contain a README.md file in the root or directories.",
|
|
31
|
+
explanation="A README.md file is critical for onboarding developers, detailing project setup guidelines, specifying dependencies, and explaining utility usages. Lacking it makes code maintenance difficult.",
|
|
32
|
+
suggested_fix="Create a README.md file in the root directory outlining the project name, description, setup commands, and architecture overview.",
|
|
33
|
+
confidence="HIGH"
|
|
34
|
+
))
|
|
35
|
+
|
|
36
|
+
if not files_to_review:
|
|
37
|
+
if readme_exists:
|
|
38
|
+
# Find the actual path of the README.md
|
|
39
|
+
for root, dirs, files in os.walk(repo_path):
|
|
40
|
+
dirs[:] = [d for d in dirs if d not in (".git", "venv", "node_modules", ".pytest_cache", "__pycache__")]
|
|
41
|
+
for f in files:
|
|
42
|
+
if f.lower() == "readme.md":
|
|
43
|
+
rel_path = os.path.relpath(os.path.join(root, f), repo_path).replace('\\', '/')
|
|
44
|
+
files_to_review = [rel_path]
|
|
45
|
+
break
|
|
46
|
+
if files_to_review:
|
|
47
|
+
break
|
|
48
|
+
else:
|
|
49
|
+
return issues
|
|
50
|
+
|
|
51
|
+
llm = get_llm()
|
|
52
|
+
|
|
53
|
+
for file_rel in files_to_review:
|
|
54
|
+
file_content = get_file_content(repo_path, file_rel)
|
|
55
|
+
if not file_content:
|
|
56
|
+
continue
|
|
57
|
+
|
|
58
|
+
prompt = f"""You are an expert technical writer and code auditor.
|
|
59
|
+
Analyze the following document or code file '{file_rel}' for documentation completeness, clarity, accuracy, missing setup guides, missing docstrings, or obsolete comments.
|
|
60
|
+
|
|
61
|
+
File Content:
|
|
62
|
+
```
|
|
63
|
+
{file_content}
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
Return your findings in JSON format inside a markdown JSON code block. If no documentation issues are found, return an empty array [].
|
|
67
|
+
Each finding must fit the following JSON schema:
|
|
68
|
+
[
|
|
69
|
+
{{
|
|
70
|
+
"line": 1,
|
|
71
|
+
"severity": "LOW", // CRITICAL, HIGH, MEDIUM, LOW
|
|
72
|
+
"title": "Documentation Issue: [Title]",
|
|
73
|
+
"description": "Short explanation of the missing or incomplete documentation item",
|
|
74
|
+
"explanation": "Detailed explanation of why having this documentation matters for developers, users, or contributors.",
|
|
75
|
+
"suggested_fix": "Write down the recommended text or docstring to add, or outline a README structure.",
|
|
76
|
+
"confidence": "HIGH" // HIGH, MEDIUM, LOW
|
|
77
|
+
}}
|
|
78
|
+
]
|
|
79
|
+
|
|
80
|
+
Make sure to return valid JSON inside a ```json ``` block. Focus on usability and developer onboarding.
|
|
81
|
+
JSON Output:
|
|
82
|
+
"""
|
|
83
|
+
try:
|
|
84
|
+
response = llm.invoke(prompt)
|
|
85
|
+
content = response.content
|
|
86
|
+
|
|
87
|
+
# Extract JSON block
|
|
88
|
+
json_str = ""
|
|
89
|
+
if "```json" in content:
|
|
90
|
+
json_str = content.split("```json")[1].split("```")[0].strip()
|
|
91
|
+
elif "```" in content:
|
|
92
|
+
json_str = content.split("```")[1].split("```")[0].strip()
|
|
93
|
+
else:
|
|
94
|
+
json_str = content.strip()
|
|
95
|
+
|
|
96
|
+
findings = json.loads(json_str)
|
|
97
|
+
if isinstance(findings, list):
|
|
98
|
+
for f in findings:
|
|
99
|
+
line = f.get("line", 1)
|
|
100
|
+
title = f.get("title", "Documentation Finding")
|
|
101
|
+
|
|
102
|
+
issue_id = str(uuid.uuid5(uuid.NAMESPACE_DNS, f"documentation-{file_rel}-{line}-{title}"))
|
|
103
|
+
issues.append(Issue(
|
|
104
|
+
id=issue_id,
|
|
105
|
+
file=file_rel,
|
|
106
|
+
line=line,
|
|
107
|
+
severity=f.get("severity", "LOW").upper(),
|
|
108
|
+
category="Documentation",
|
|
109
|
+
title=title,
|
|
110
|
+
description=f.get("description", ""),
|
|
111
|
+
explanation=f.get("explanation", ""),
|
|
112
|
+
suggested_fix=f.get("suggested_fix", ""),
|
|
113
|
+
confidence=f.get("confidence", "MEDIUM")
|
|
114
|
+
))
|
|
115
|
+
except Exception as e:
|
|
116
|
+
print(f"[Documentation Agent] Error analyzing {file_rel}: {e}")
|
|
117
|
+
|
|
118
|
+
return issues
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
from typing import Dict, Any, List
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
# Deterministic routing based on file types
|
|
5
|
+
def run_planner(scan_result: Dict[str, Any]) -> Dict[str, List[str]]:
|
|
6
|
+
files = scan_result.get("files", [])
|
|
7
|
+
|
|
8
|
+
# Filter files into categories
|
|
9
|
+
security_candidates = []
|
|
10
|
+
bug_candidates = []
|
|
11
|
+
arch_candidates = []
|
|
12
|
+
dep_candidates = []
|
|
13
|
+
doc_candidates = []
|
|
14
|
+
|
|
15
|
+
# Important file markers to prioritize
|
|
16
|
+
priority_keywords = {"main", "app", "server", "index", "route", "controller", "db", "auth", "config", "util"}
|
|
17
|
+
|
|
18
|
+
for f in files:
|
|
19
|
+
f_lower = f.lower()
|
|
20
|
+
path = Path(f)
|
|
21
|
+
ext = path.suffix.lower()
|
|
22
|
+
|
|
23
|
+
# Dependencies
|
|
24
|
+
if path.name.lower() in ("package.json", "requirements.txt", "go.mod", "cargo.toml", "pipfile", "pyproject.toml"):
|
|
25
|
+
dep_candidates.append(f)
|
|
26
|
+
|
|
27
|
+
# Documentation
|
|
28
|
+
elif ext in (".md", ".txt", ".rst") or "docs/" in f_lower or "doc/" in f_lower:
|
|
29
|
+
doc_candidates.append(f)
|
|
30
|
+
|
|
31
|
+
# Code files
|
|
32
|
+
elif ext in (".py", ".js", ".ts", ".tsx", ".jsx", ".go", ".rs", ".java", ".cs", ".php", ".rb", ".sh"):
|
|
33
|
+
# Check if it has priority keywords or is relatively small/medium
|
|
34
|
+
is_priority = any(k in path.stem.lower() for k in priority_keywords)
|
|
35
|
+
|
|
36
|
+
# Security targets authentication, db, routes, or priority files
|
|
37
|
+
if is_priority or any(x in f_lower for x in ("auth", "db", "sql", "api", "route", "controller")):
|
|
38
|
+
security_candidates.append(f)
|
|
39
|
+
else:
|
|
40
|
+
# Add up to 5 general code files to keep it fast
|
|
41
|
+
if len(security_candidates) < 10:
|
|
42
|
+
security_candidates.append(f)
|
|
43
|
+
|
|
44
|
+
# Bug / quality targets general code files
|
|
45
|
+
if is_priority or len(bug_candidates) < 10:
|
|
46
|
+
bug_candidates.append(f)
|
|
47
|
+
|
|
48
|
+
# Architecture looks at main structures, imports, and config files
|
|
49
|
+
if is_priority or len(arch_candidates) < 8:
|
|
50
|
+
arch_candidates.append(f)
|
|
51
|
+
|
|
52
|
+
# Fallbacks if list is empty but files exist
|
|
53
|
+
if not security_candidates and files:
|
|
54
|
+
security_candidates = [f for f in files if Path(f).suffix.lower() in (".py", ".js", ".ts", ".tsx")][:5]
|
|
55
|
+
if not bug_candidates and files:
|
|
56
|
+
bug_candidates = [f for f in files if Path(f).suffix.lower() in (".py", ".js", ".ts", ".tsx")][:5]
|
|
57
|
+
if not arch_candidates and files:
|
|
58
|
+
arch_candidates = [f for f in files if Path(f).suffix.lower() in (".py", ".js", ".ts", ".tsx")][:5]
|
|
59
|
+
|
|
60
|
+
return {
|
|
61
|
+
"security": security_candidates,
|
|
62
|
+
"dependency": dep_candidates,
|
|
63
|
+
"bug": bug_candidates,
|
|
64
|
+
"architecture": arch_candidates,
|
|
65
|
+
"documentation": doc_candidates
|
|
66
|
+
}
|