argus-appsec 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- argus/__init__.py +28 -0
- argus/__main__.py +6 -0
- argus/agents/__init__.py +21 -0
- argus/agents/base.py +37 -0
- argus/agents/enrichment.py +111 -0
- argus/agents/exploit.py +173 -0
- argus/agents/patch.py +82 -0
- argus/ai/__init__.py +16 -0
- argus/ai/anthropic_provider.py +47 -0
- argus/ai/base.py +62 -0
- argus/ai/factory.py +47 -0
- argus/ai/heuristic.py +99 -0
- argus/ai/ollama_provider.py +51 -0
- argus/ai/openai_provider.py +43 -0
- argus/analysis/__init__.py +5 -0
- argus/analysis/languages.py +54 -0
- argus/analysis/repository.py +217 -0
- argus/cli/__init__.py +5 -0
- argus/cli/main.py +441 -0
- argus/core/__init__.py +1 -0
- argus/core/config.py +101 -0
- argus/core/engine.py +131 -0
- argus/core/models.py +231 -0
- argus/core/plugin.py +180 -0
- argus/core/project.py +157 -0
- argus/plugins.py +27 -0
- argus/py.typed +0 -0
- argus/remediation/__init__.py +10 -0
- argus/remediation/applier.py +140 -0
- argus/remediation/git_ops.py +131 -0
- argus/remediation/hosting.py +140 -0
- argus/remediation/pullrequest.py +213 -0
- argus/remediation/rewrites.py +82 -0
- argus/reporting/__init__.py +10 -0
- argus/reporting/html.py +224 -0
- argus/reporting/json_reporter.py +59 -0
- argus/reporting/markdown.py +146 -0
- argus/reporting/sarif.py +122 -0
- argus/scanners/__init__.py +10 -0
- argus/scanners/data/__init__.py +1 -0
- argus/scanners/data/advisories.json +77 -0
- argus/scanners/dependencies.py +169 -0
- argus/scanners/iac.py +213 -0
- argus/scanners/patterns.py +291 -0
- argus/scanners/secrets.py +170 -0
- argus/targets.py +122 -0
- argus_appsec-0.1.0.dist-info/METADATA +246 -0
- argus_appsec-0.1.0.dist-info/RECORD +51 -0
- argus_appsec-0.1.0.dist-info/WHEEL +4 -0
- argus_appsec-0.1.0.dist-info/entry_points.txt +5 -0
- argus_appsec-0.1.0.dist-info/licenses/LICENSE +19 -0
argus/__init__.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""Argus — an open-source AI Security Engineer.
|
|
2
|
+
|
|
3
|
+
Argus maps an application, runs layered security analysis, explains each finding
|
|
4
|
+
in terms a developer can act on, and — where possible — proposes and verifies a fix.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from argus.core.models import (
|
|
8
|
+
Confidence,
|
|
9
|
+
Finding,
|
|
10
|
+
Location,
|
|
11
|
+
Remediation,
|
|
12
|
+
ScanResult,
|
|
13
|
+
Severity,
|
|
14
|
+
)
|
|
15
|
+
from argus.core.project import Project
|
|
16
|
+
|
|
17
|
+
__version__ = "0.1.0"
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
"Confidence",
|
|
21
|
+
"Finding",
|
|
22
|
+
"Location",
|
|
23
|
+
"Project",
|
|
24
|
+
"Remediation",
|
|
25
|
+
"ScanResult",
|
|
26
|
+
"Severity",
|
|
27
|
+
"__version__",
|
|
28
|
+
]
|
argus/__main__.py
ADDED
argus/agents/__init__.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Specialized agents.
|
|
2
|
+
|
|
3
|
+
Argus splits the "AI Security Engineer" role into focused agents, each with one
|
|
4
|
+
job. They operate on findings after scanning: enriching the narrative, simulating
|
|
5
|
+
attacks, and proposing fixes. Every agent degrades gracefully — with the
|
|
6
|
+
heuristic provider it uses templated reasoning; with a real model it produces
|
|
7
|
+
richer, context-aware analysis.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from argus.agents.base import Agent, AgentContext
|
|
11
|
+
from argus.agents.enrichment import EnrichmentAgent
|
|
12
|
+
from argus.agents.exploit import AttackSimulationAgent
|
|
13
|
+
from argus.agents.patch import PatchAgent
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"Agent",
|
|
17
|
+
"AgentContext",
|
|
18
|
+
"AttackSimulationAgent",
|
|
19
|
+
"EnrichmentAgent",
|
|
20
|
+
"PatchAgent",
|
|
21
|
+
]
|
argus/agents/base.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""Agent base class and shared context."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import abc
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
|
|
8
|
+
from argus.ai.base import AIProvider
|
|
9
|
+
from argus.core.config import Config
|
|
10
|
+
from argus.core.models import Finding
|
|
11
|
+
from argus.core.project import Project
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class AgentContext:
|
|
16
|
+
project: Project
|
|
17
|
+
config: Config
|
|
18
|
+
ai: AIProvider
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class Agent(abc.ABC):
|
|
22
|
+
"""Base class for finding-processing agents.
|
|
23
|
+
|
|
24
|
+
An agent takes a finding and returns it (usually mutated). Keeping the return
|
|
25
|
+
explicit lets the engine chain agents in a pipeline.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
name: str = ""
|
|
29
|
+
|
|
30
|
+
@abc.abstractmethod
|
|
31
|
+
def process(self, finding: Finding, ctx: AgentContext) -> Finding:
|
|
32
|
+
raise NotImplementedError
|
|
33
|
+
|
|
34
|
+
@staticmethod
|
|
35
|
+
def _uses_real_model(ctx: AgentContext) -> bool:
|
|
36
|
+
"""True when a real language model backs the provider (not heuristic)."""
|
|
37
|
+
return ctx.ai.name != "heuristic"
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""Enrichment agent: fill the reasoning fields of a finding.
|
|
2
|
+
|
|
3
|
+
Ensures every finding answers "why is this a vulnerability", "how would an
|
|
4
|
+
attacker exploit it", and "what is the business impact". Scanners already provide
|
|
5
|
+
these for their built-in rules; this agent fills gaps and, when a real model is
|
|
6
|
+
configured, rewrites them with project-specific context.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from argus.agents.base import Agent, AgentContext
|
|
12
|
+
from argus.ai.heuristic import HeuristicProvider
|
|
13
|
+
from argus.core.models import Finding
|
|
14
|
+
|
|
15
|
+
_SYSTEM = (
|
|
16
|
+
"You are a senior application security engineer. Given a vulnerability finding, "
|
|
17
|
+
"explain it precisely and without exaggeration. Be concrete and actionable. "
|
|
18
|
+
"Respond in three short sections labelled exactly: WHY, ATTACK, IMPACT."
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class EnrichmentAgent(Agent):
|
|
23
|
+
name = "enrichment"
|
|
24
|
+
|
|
25
|
+
def process(self, finding: Finding, ctx: AgentContext) -> Finding:
|
|
26
|
+
# If the scanner already supplied full reasoning, leave it unless a real
|
|
27
|
+
# model can improve it with project context.
|
|
28
|
+
has_reasoning = all([finding.why_vulnerable, finding.attacker_perspective,
|
|
29
|
+
finding.business_impact])
|
|
30
|
+
|
|
31
|
+
if self._uses_real_model(ctx):
|
|
32
|
+
if not has_reasoning or ctx.config.ai.enabled:
|
|
33
|
+
self._enrich_with_model(finding, ctx)
|
|
34
|
+
return finding
|
|
35
|
+
|
|
36
|
+
# Heuristic path: fill any missing fields from CWE templates.
|
|
37
|
+
if not has_reasoning:
|
|
38
|
+
self._enrich_heuristic(finding)
|
|
39
|
+
return finding
|
|
40
|
+
|
|
41
|
+
def _enrich_heuristic(self, finding: Finding) -> None:
|
|
42
|
+
for cwe in finding.cwe:
|
|
43
|
+
notes = HeuristicProvider.notes_for_cwe(cwe)
|
|
44
|
+
if notes:
|
|
45
|
+
finding.why_vulnerable = finding.why_vulnerable or notes["why"]
|
|
46
|
+
finding.attacker_perspective = (
|
|
47
|
+
finding.attacker_perspective or notes["attack"])
|
|
48
|
+
finding.business_impact = finding.business_impact or notes["impact"]
|
|
49
|
+
return
|
|
50
|
+
# Generic fallback when no CWE template matches.
|
|
51
|
+
finding.why_vulnerable = finding.why_vulnerable or finding.description
|
|
52
|
+
finding.attacker_perspective = finding.attacker_perspective or (
|
|
53
|
+
"An attacker who reaches this code path can supply crafted input to "
|
|
54
|
+
"trigger the weakness described above.")
|
|
55
|
+
finding.business_impact = finding.business_impact or (
|
|
56
|
+
"Potential loss of confidentiality, integrity, or availability "
|
|
57
|
+
"depending on the data and privileges involved.")
|
|
58
|
+
|
|
59
|
+
def _enrich_with_model(self, finding: Finding, ctx: AgentContext) -> None:
|
|
60
|
+
prompt = self._build_prompt(finding, ctx)
|
|
61
|
+
try:
|
|
62
|
+
answer = ctx.ai.complete(_SYSTEM, prompt)
|
|
63
|
+
except Exception as exc: # never let enrichment abort a scan
|
|
64
|
+
finding.metadata["enrichment_error"] = str(exc)
|
|
65
|
+
self._enrich_heuristic(finding)
|
|
66
|
+
return
|
|
67
|
+
sections = self._parse_sections(answer)
|
|
68
|
+
if sections.get("why"):
|
|
69
|
+
finding.why_vulnerable = sections["why"]
|
|
70
|
+
if sections.get("attack"):
|
|
71
|
+
finding.attacker_perspective = sections["attack"]
|
|
72
|
+
if sections.get("impact"):
|
|
73
|
+
finding.business_impact = sections["impact"]
|
|
74
|
+
if not any(sections.values()):
|
|
75
|
+
self._enrich_heuristic(finding)
|
|
76
|
+
|
|
77
|
+
@staticmethod
|
|
78
|
+
def _build_prompt(finding: Finding, ctx: AgentContext) -> str:
|
|
79
|
+
loc = finding.location
|
|
80
|
+
return (
|
|
81
|
+
f"Project: {ctx.project.name}\n"
|
|
82
|
+
f"Languages: {', '.join(ctx.project.languages) or 'unknown'}\n"
|
|
83
|
+
f"Frameworks: {', '.join(ctx.project.frameworks) or 'none detected'}\n\n"
|
|
84
|
+
f"Finding: {finding.title}\n"
|
|
85
|
+
f"Rule: {finding.rule_id}\n"
|
|
86
|
+
f"CWE: {', '.join(finding.cwe) or 'n/a'} "
|
|
87
|
+
f"OWASP: {', '.join(finding.owasp) or 'n/a'}\n"
|
|
88
|
+
f"Location: {loc.as_ref()}\n"
|
|
89
|
+
f"Code:\n{loc.snippet or '(not available)'}\n\n"
|
|
90
|
+
"Explain this finding for the developer who owns this code."
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
@staticmethod
|
|
94
|
+
def _parse_sections(text: str) -> dict[str, str]:
|
|
95
|
+
out = {"why": "", "attack": "", "impact": ""}
|
|
96
|
+
current = None
|
|
97
|
+
for line in text.splitlines():
|
|
98
|
+
stripped = line.strip()
|
|
99
|
+
upper = stripped.upper()
|
|
100
|
+
if upper.startswith("WHY"):
|
|
101
|
+
current = "why"
|
|
102
|
+
stripped = stripped.split(":", 1)[-1].strip()
|
|
103
|
+
elif upper.startswith("ATTACK"):
|
|
104
|
+
current = "attack"
|
|
105
|
+
stripped = stripped.split(":", 1)[-1].strip()
|
|
106
|
+
elif upper.startswith("IMPACT"):
|
|
107
|
+
current = "impact"
|
|
108
|
+
stripped = stripped.split(":", 1)[-1].strip()
|
|
109
|
+
if current and stripped:
|
|
110
|
+
out[current] = (out[current] + " " + stripped).strip()
|
|
111
|
+
return out
|
argus/agents/exploit.py
ADDED
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
"""Attack Simulation agent.
|
|
2
|
+
|
|
3
|
+
This is Argus's distinguishing feature. Rather than stating that an endpoint is
|
|
4
|
+
vulnerable, it produces a safe, self-contained demonstration of the weakness:
|
|
5
|
+
|
|
6
|
+
* how an attacker would discover it,
|
|
7
|
+
* a step-by-step (read-only) exploit walkthrough,
|
|
8
|
+
* what data would be exposed,
|
|
9
|
+
* the business impact,
|
|
10
|
+
* how the proposed fix blocks the attack, and
|
|
11
|
+
* a before/after comparison.
|
|
12
|
+
|
|
13
|
+
Safety: the simulation is *descriptive*. It never sends traffic to a live target
|
|
14
|
+
and never executes generated exploit code. When a real model is used, the prompt
|
|
15
|
+
constrains it to an educational, non-weaponized walkthrough. ``sandbox_ok`` marks
|
|
16
|
+
that the demonstration was produced in this isolated, non-executing context.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
from argus.agents.base import Agent, AgentContext
|
|
22
|
+
from argus.core.models import ExploitScenario, Finding, Severity
|
|
23
|
+
|
|
24
|
+
_SYSTEM = (
|
|
25
|
+
"You are a penetration tester writing an educational, defensive walkthrough for "
|
|
26
|
+
"the developer who owns the vulnerable code. Never produce a weaponized or "
|
|
27
|
+
"copy-paste-runnable exploit against live systems; illustrate the technique at a "
|
|
28
|
+
"level that teaches the risk and the fix. Use these exact section headers: "
|
|
29
|
+
"DISCOVERY, WALKTHROUGH, DATA_AT_RISK, BUSINESS_IMPACT, FIX_BLOCKS, BEFORE_AFTER."
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
# Templated scenarios keyed by primary CWE, used with the heuristic provider.
|
|
33
|
+
_TEMPLATES: dict[str, dict[str, str]] = {
|
|
34
|
+
"CWE-89": {
|
|
35
|
+
"discovery": "Fuzz the parameter with a single quote. A resulting SQL error "
|
|
36
|
+
"or changed response reveals the input reaches the query.",
|
|
37
|
+
"walk": "1. Observe a normal request. 2. Send `' OR '1'='1` in the parameter. "
|
|
38
|
+
"3. The WHERE clause becomes always-true, returning rows the user "
|
|
39
|
+
"should not see. 4. Escalate with UNION SELECT to read other tables.",
|
|
40
|
+
"data": "Any data reachable by the database user — often user records, "
|
|
41
|
+
"password hashes, tokens, and PII.",
|
|
42
|
+
"fix": "Parameterized queries bind input as data, so `' OR '1'='1` is treated "
|
|
43
|
+
"as a literal string and the query structure can no longer change.",
|
|
44
|
+
"ba": "Before: query text is built from input, so injected operators execute. "
|
|
45
|
+
"After: input is a bound parameter and cannot alter the statement.",
|
|
46
|
+
},
|
|
47
|
+
"CWE-78": {
|
|
48
|
+
"discovery": "Probe the input with a benign separator like `; id`. Command "
|
|
49
|
+
"output appearing in the response confirms injection.",
|
|
50
|
+
"walk": "1. Identify the parameter that flows into a shell command. 2. Append "
|
|
51
|
+
"`; whoami`. 3. The extra command runs. 4. Escalate to fetch files or "
|
|
52
|
+
"open a reverse shell.",
|
|
53
|
+
"data": "Everything the application process can read, plus the ability to run "
|
|
54
|
+
"commands on the host.",
|
|
55
|
+
"fix": "Passing arguments as a list (no shell) removes the shell parser, so "
|
|
56
|
+
"metacharacters are treated as literal arguments.",
|
|
57
|
+
"ba": "Before: input is concatenated into a shell string. After: input is a "
|
|
58
|
+
"single argument value with no shell interpretation.",
|
|
59
|
+
},
|
|
60
|
+
"CWE-798": {
|
|
61
|
+
"discovery": "Grep the repository (including history) for key-like strings; the "
|
|
62
|
+
"committed secret is found directly.",
|
|
63
|
+
"walk": "1. Clone the repo. 2. Search history for the credential. 3. Use it "
|
|
64
|
+
"against the corresponding service — no exploitation of the app needed.",
|
|
65
|
+
"data": "Whatever the credential unlocks: cloud account, database, payment or "
|
|
66
|
+
"email provider.",
|
|
67
|
+
"fix": "Rotating the secret invalidates the leaked value; loading it from a "
|
|
68
|
+
"secret manager keeps it out of source entirely.",
|
|
69
|
+
"ba": "Before: the live secret sits in version control. After: the code reads "
|
|
70
|
+
"it from the environment and the old value is revoked.",
|
|
71
|
+
},
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class AttackSimulationAgent(Agent):
|
|
76
|
+
name = "attack-simulation"
|
|
77
|
+
|
|
78
|
+
# Only simulate meaningful findings; noise doesn't warrant a walkthrough.
|
|
79
|
+
MIN_SEVERITY = Severity.MEDIUM
|
|
80
|
+
|
|
81
|
+
def process(self, finding: Finding, ctx: AgentContext) -> Finding:
|
|
82
|
+
if finding.severity < self.MIN_SEVERITY:
|
|
83
|
+
return finding
|
|
84
|
+
if self._uses_real_model(ctx):
|
|
85
|
+
self._simulate_with_model(finding, ctx)
|
|
86
|
+
else:
|
|
87
|
+
self._simulate_heuristic(finding)
|
|
88
|
+
return finding
|
|
89
|
+
|
|
90
|
+
def _simulate_heuristic(self, finding: Finding) -> None:
|
|
91
|
+
tmpl = None
|
|
92
|
+
for cwe in finding.cwe:
|
|
93
|
+
if cwe in _TEMPLATES:
|
|
94
|
+
tmpl = _TEMPLATES[cwe]
|
|
95
|
+
break
|
|
96
|
+
if tmpl is None:
|
|
97
|
+
finding.exploit = ExploitScenario(
|
|
98
|
+
discovery=f"Identify the weak point described by {finding.rule_id} at "
|
|
99
|
+
f"{finding.location.as_ref()}.",
|
|
100
|
+
exploit_walkthrough=finding.attacker_perspective or
|
|
101
|
+
"Craft input that triggers the described weakness and observe the "
|
|
102
|
+
"effect.",
|
|
103
|
+
data_at_risk=finding.business_impact or "Depends on the affected asset.",
|
|
104
|
+
business_impact=finding.business_impact,
|
|
105
|
+
fix_blocks_attack=(finding.remediation.summary
|
|
106
|
+
if finding.remediation else
|
|
107
|
+
"Applying the recommended fix removes the weakness."),
|
|
108
|
+
before_after="Before: the weakness is present. After: the fix removes "
|
|
109
|
+
"the exploitable condition.",
|
|
110
|
+
sandbox_ok=True,
|
|
111
|
+
)
|
|
112
|
+
return
|
|
113
|
+
finding.exploit = ExploitScenario(
|
|
114
|
+
discovery=tmpl["discovery"],
|
|
115
|
+
exploit_walkthrough=tmpl["walk"],
|
|
116
|
+
data_at_risk=tmpl["data"],
|
|
117
|
+
business_impact=finding.business_impact or
|
|
118
|
+
"Direct impact on confidentiality and integrity of affected data.",
|
|
119
|
+
fix_blocks_attack=tmpl["fix"],
|
|
120
|
+
before_after=tmpl["ba"],
|
|
121
|
+
sandbox_ok=True,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
def _simulate_with_model(self, finding: Finding, ctx: AgentContext) -> None:
|
|
125
|
+
prompt = (
|
|
126
|
+
f"Vulnerability: {finding.title}\n"
|
|
127
|
+
f"CWE: {', '.join(finding.cwe)}\n"
|
|
128
|
+
f"Location: {finding.location.as_ref()}\n"
|
|
129
|
+
f"Code:\n{finding.location.snippet or '(unavailable)'}\n\n"
|
|
130
|
+
"Write the educational attack simulation."
|
|
131
|
+
)
|
|
132
|
+
try:
|
|
133
|
+
answer = ctx.ai.complete(_SYSTEM, prompt)
|
|
134
|
+
except Exception as exc:
|
|
135
|
+
finding.metadata["simulation_error"] = str(exc)
|
|
136
|
+
self._simulate_heuristic(finding)
|
|
137
|
+
return
|
|
138
|
+
s = self._parse(answer)
|
|
139
|
+
finding.exploit = ExploitScenario(
|
|
140
|
+
discovery=s.get("discovery", ""),
|
|
141
|
+
exploit_walkthrough=s.get("walkthrough", ""),
|
|
142
|
+
data_at_risk=s.get("data_at_risk", ""),
|
|
143
|
+
business_impact=s.get("business_impact", finding.business_impact),
|
|
144
|
+
fix_blocks_attack=s.get("fix_blocks", ""),
|
|
145
|
+
before_after=s.get("before_after", ""),
|
|
146
|
+
sandbox_ok=True,
|
|
147
|
+
)
|
|
148
|
+
if not any([finding.exploit.discovery, finding.exploit.exploit_walkthrough]):
|
|
149
|
+
self._simulate_heuristic(finding)
|
|
150
|
+
|
|
151
|
+
@staticmethod
|
|
152
|
+
def _parse(text: str) -> dict[str, str]:
|
|
153
|
+
headers = {
|
|
154
|
+
"DISCOVERY": "discovery",
|
|
155
|
+
"WALKTHROUGH": "walkthrough",
|
|
156
|
+
"DATA_AT_RISK": "data_at_risk",
|
|
157
|
+
"BUSINESS_IMPACT": "business_impact",
|
|
158
|
+
"FIX_BLOCKS": "fix_blocks",
|
|
159
|
+
"BEFORE_AFTER": "before_after",
|
|
160
|
+
}
|
|
161
|
+
out: dict[str, str] = {}
|
|
162
|
+
current = None
|
|
163
|
+
for line in text.splitlines():
|
|
164
|
+
stripped = line.strip()
|
|
165
|
+
matched = next((v for k, v in headers.items()
|
|
166
|
+
if stripped.upper().startswith(k)), None)
|
|
167
|
+
if matched:
|
|
168
|
+
current = matched
|
|
169
|
+
rest = stripped.split(":", 1)[-1].strip() if ":" in stripped else ""
|
|
170
|
+
out[current] = rest
|
|
171
|
+
elif current and stripped:
|
|
172
|
+
out[current] = (out.get(current, "") + " " + stripped).strip()
|
|
173
|
+
return out
|
argus/agents/patch.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"""Patch generation agent.
|
|
2
|
+
|
|
3
|
+
Proposes a concrete fix as a unified diff and, where possible, verifies it. Two
|
|
4
|
+
paths:
|
|
5
|
+
|
|
6
|
+
* **Deterministic rewrites** for a set of well-understood rules (e.g. unsafe
|
|
7
|
+
``yaml.load`` → ``yaml.safe_load``). These are self-verifying: Argus re-runs the
|
|
8
|
+
triggering pattern against the rewritten line and only marks the patch
|
|
9
|
+
``verified`` if the detection no longer fires — a fast, local proxy for "the fix
|
|
10
|
+
resolves the issue without obviously breaking the line".
|
|
11
|
+
* **Model-generated patches** when a real provider is configured, for findings
|
|
12
|
+
without a deterministic rewrite. These are proposed but left unverified unless a
|
|
13
|
+
re-scan confirms them.
|
|
14
|
+
|
|
15
|
+
The agent only proposes changes; it never writes to the working tree. Applying a
|
|
16
|
+
patch and opening a pull request is an explicit, separate action.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import re
|
|
22
|
+
|
|
23
|
+
from argus.agents.base import Agent, AgentContext
|
|
24
|
+
from argus.core.models import Finding, Remediation
|
|
25
|
+
from argus.remediation.rewrites import fix_line, verify_line_fixed
|
|
26
|
+
|
|
27
|
+
_SYSTEM = (
|
|
28
|
+
"You are a secure-coding assistant. Given a vulnerable code snippet and the "
|
|
29
|
+
"issue, return ONLY a minimal corrected version of the snippet that resolves "
|
|
30
|
+
"the vulnerability while preserving behavior. Do not add commentary."
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class PatchAgent(Agent):
|
|
35
|
+
name = "patch"
|
|
36
|
+
|
|
37
|
+
def process(self, finding: Finding, ctx: AgentContext) -> Finding:
|
|
38
|
+
if finding.remediation is None:
|
|
39
|
+
finding.remediation = Remediation(summary="See remediation guidance.")
|
|
40
|
+
|
|
41
|
+
original = finding.location.snippet or ""
|
|
42
|
+
fixed = fix_line(finding.rule_id, original)
|
|
43
|
+
verified = False
|
|
44
|
+
|
|
45
|
+
if fixed and fixed != original:
|
|
46
|
+
verified = verify_line_fixed(finding.rule_id, fixed)
|
|
47
|
+
elif self._uses_real_model(ctx) and original:
|
|
48
|
+
fixed = self._model_fix(finding, ctx, original)
|
|
49
|
+
|
|
50
|
+
if fixed and fixed != original:
|
|
51
|
+
finding.remediation.patch = self._unified_diff(
|
|
52
|
+
finding.location.path, original, fixed,
|
|
53
|
+
start_line=finding.location.start_line or 1)
|
|
54
|
+
finding.remediation.verified = verified
|
|
55
|
+
return finding
|
|
56
|
+
|
|
57
|
+
def _model_fix(self, finding: Finding, ctx: AgentContext, original: str) -> str | None:
|
|
58
|
+
prompt = (
|
|
59
|
+
f"Issue: {finding.title} ({', '.join(finding.cwe)})\n"
|
|
60
|
+
f"File: {finding.location.path}\n"
|
|
61
|
+
f"Vulnerable snippet:\n{original}\n\n"
|
|
62
|
+
"Return the corrected snippet only."
|
|
63
|
+
)
|
|
64
|
+
try:
|
|
65
|
+
out = ctx.ai.complete(_SYSTEM, prompt).strip()
|
|
66
|
+
except Exception as exc:
|
|
67
|
+
finding.metadata["patch_error"] = str(exc)
|
|
68
|
+
return None
|
|
69
|
+
# Strip code fences if the model added them.
|
|
70
|
+
out = re.sub(r"^```[a-zA-Z]*\n?|\n?```$", "", out).strip()
|
|
71
|
+
return out or None
|
|
72
|
+
|
|
73
|
+
@staticmethod
|
|
74
|
+
def _unified_diff(path: str, before: str, after: str, start_line: int) -> str:
|
|
75
|
+
"""A compact unified diff for a single changed line/snippet."""
|
|
76
|
+
return (
|
|
77
|
+
f"--- a/{path}\n"
|
|
78
|
+
f"+++ b/{path}\n"
|
|
79
|
+
f"@@ -{start_line} +{start_line} @@\n"
|
|
80
|
+
f"-{before}\n"
|
|
81
|
+
f"+{after}\n"
|
|
82
|
+
)
|
argus/ai/__init__.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""AI provider abstraction.
|
|
2
|
+
|
|
3
|
+
Argus supports multiple model backends behind one interface so organizations can
|
|
4
|
+
choose where their code goes:
|
|
5
|
+
|
|
6
|
+
* ``heuristic`` — no network, no keys; template-based enrichment. The default.
|
|
7
|
+
* ``anthropic`` / ``openai`` — cloud-hosted models.
|
|
8
|
+
* ``ollama`` — local models, keeping source inside your own environment.
|
|
9
|
+
|
|
10
|
+
Providers are plugins like everything else and register on import.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from argus.ai.base import AIProvider, ChatMessage
|
|
14
|
+
from argus.ai.factory import build_provider
|
|
15
|
+
|
|
16
|
+
__all__ = ["AIProvider", "ChatMessage", "build_provider"]
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""Anthropic (Claude) provider.
|
|
2
|
+
|
|
3
|
+
Uses the official ``anthropic`` SDK if installed and ``ANTHROPIC_API_KEY`` is set.
|
|
4
|
+
Import of the SDK is deferred to :meth:`complete` so the module loads even when
|
|
5
|
+
the optional dependency is absent — availability is reported by ``is_available``.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import os
|
|
11
|
+
|
|
12
|
+
from argus.ai.base import AIProvider
|
|
13
|
+
from argus.core.plugin import ai_provider
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@ai_provider
|
|
17
|
+
class AnthropicProvider(AIProvider):
|
|
18
|
+
name = "anthropic"
|
|
19
|
+
is_remote = True
|
|
20
|
+
# A current, capable default; override via config `ai.model`.
|
|
21
|
+
default_model = "claude-sonnet-5"
|
|
22
|
+
|
|
23
|
+
@classmethod
|
|
24
|
+
def is_available(cls) -> bool:
|
|
25
|
+
if not os.environ.get("ANTHROPIC_API_KEY"):
|
|
26
|
+
return False
|
|
27
|
+
try:
|
|
28
|
+
import anthropic # noqa: F401
|
|
29
|
+
except ImportError:
|
|
30
|
+
return False
|
|
31
|
+
return True
|
|
32
|
+
|
|
33
|
+
def complete(self, system: str, user: str) -> str:
|
|
34
|
+
import anthropic
|
|
35
|
+
|
|
36
|
+
client = anthropic.Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])
|
|
37
|
+
resp = client.messages.create(
|
|
38
|
+
model=self.model,
|
|
39
|
+
max_tokens=self.max_tokens,
|
|
40
|
+
temperature=self.temperature,
|
|
41
|
+
system=system,
|
|
42
|
+
messages=[{"role": "user", "content": user}],
|
|
43
|
+
)
|
|
44
|
+
# Concatenate text blocks from the response content.
|
|
45
|
+
return "".join(
|
|
46
|
+
block.text for block in resp.content if getattr(block, "type", "") == "text"
|
|
47
|
+
).strip()
|
argus/ai/base.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""The AIProvider contract and shared helpers.
|
|
2
|
+
|
|
3
|
+
An AI provider is a thin, uniform wrapper around a chat-style model. Agents call
|
|
4
|
+
:meth:`AIProvider.complete` with a system prompt and a user prompt and get back
|
|
5
|
+
text. Everything provider-specific (auth, endpoints, request shape) lives in the
|
|
6
|
+
concrete subclasses.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import abc
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
from typing import ClassVar
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class ChatMessage:
|
|
18
|
+
role: str # "system" | "user" | "assistant"
|
|
19
|
+
content: str
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class AIProvider(abc.ABC):
|
|
23
|
+
"""Base class for all model backends."""
|
|
24
|
+
|
|
25
|
+
#: Unique provider id used in config and the `--ai-provider` flag.
|
|
26
|
+
name: ClassVar[str] = ""
|
|
27
|
+
#: Whether this provider sends data off the local machine. Surfaced to users
|
|
28
|
+
#: so they can make an informed choice about source-code confidentiality.
|
|
29
|
+
is_remote: ClassVar[bool] = True
|
|
30
|
+
#: Default model id when the user doesn't specify one.
|
|
31
|
+
default_model: ClassVar[str] = ""
|
|
32
|
+
|
|
33
|
+
def __init__(self, model: str | None = None, *, temperature: float = 0.0,
|
|
34
|
+
max_tokens: int = 1500) -> None:
|
|
35
|
+
self.model = model or self.default_model
|
|
36
|
+
self.temperature = temperature
|
|
37
|
+
self.max_tokens = max_tokens
|
|
38
|
+
|
|
39
|
+
@classmethod
|
|
40
|
+
def is_available(cls) -> bool:
|
|
41
|
+
"""Return True if this provider can actually be used (SDK + creds present).
|
|
42
|
+
|
|
43
|
+
The heuristic provider is always available; cloud/local providers check
|
|
44
|
+
for their SDK and credentials. Used to fall back gracefully.
|
|
45
|
+
"""
|
|
46
|
+
return True
|
|
47
|
+
|
|
48
|
+
@abc.abstractmethod
|
|
49
|
+
def complete(self, system: str, user: str) -> str:
|
|
50
|
+
"""Return the model's completion for a system + user prompt."""
|
|
51
|
+
raise NotImplementedError
|
|
52
|
+
|
|
53
|
+
def chat(self, messages: list[ChatMessage]) -> str:
|
|
54
|
+
"""Multi-turn convenience wrapper. Providers may override for efficiency.
|
|
55
|
+
|
|
56
|
+
The default flattens the conversation into a single system+user call,
|
|
57
|
+
which is enough for Argus's mostly single-shot agent prompts.
|
|
58
|
+
"""
|
|
59
|
+
system = "\n\n".join(m.content for m in messages if m.role == "system")
|
|
60
|
+
convo = "\n\n".join(f"{m.role.upper()}: {m.content}"
|
|
61
|
+
for m in messages if m.role != "system")
|
|
62
|
+
return self.complete(system, convo)
|
argus/ai/factory.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""Provider selection with graceful fallback.
|
|
2
|
+
|
|
3
|
+
``build_provider`` honors the requested provider but never hard-fails a scan just
|
|
4
|
+
because a model backend is missing: if the requested cloud/local provider is
|
|
5
|
+
unavailable (no SDK, no key, no server), it warns and falls back to the offline
|
|
6
|
+
heuristic provider so ``argus scan`` still produces a report.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import warnings
|
|
12
|
+
|
|
13
|
+
# Importing the provider modules is what registers them in the registry.
|
|
14
|
+
from argus.ai import ( # noqa: F401
|
|
15
|
+
anthropic_provider,
|
|
16
|
+
heuristic,
|
|
17
|
+
ollama_provider,
|
|
18
|
+
openai_provider,
|
|
19
|
+
)
|
|
20
|
+
from argus.ai.base import AIProvider
|
|
21
|
+
from argus.core.config import AIConfig
|
|
22
|
+
from argus.core.plugin import registry
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def build_provider(cfg: AIConfig) -> AIProvider:
|
|
26
|
+
providers = registry.ai_providers()
|
|
27
|
+
requested = cfg.provider
|
|
28
|
+
|
|
29
|
+
if requested not in providers:
|
|
30
|
+
warnings.warn(
|
|
31
|
+
f"Unknown AI provider {requested!r}; using 'heuristic'. "
|
|
32
|
+
f"Available: {sorted(providers)}",
|
|
33
|
+
stacklevel=2,
|
|
34
|
+
)
|
|
35
|
+
requested = "heuristic"
|
|
36
|
+
|
|
37
|
+
cls = providers[requested]
|
|
38
|
+
if not cls.is_available():
|
|
39
|
+
if requested != "heuristic":
|
|
40
|
+
warnings.warn(
|
|
41
|
+
f"AI provider {requested!r} is not available "
|
|
42
|
+
f"(missing SDK, credentials, or server). Falling back to 'heuristic'.",
|
|
43
|
+
stacklevel=2,
|
|
44
|
+
)
|
|
45
|
+
cls = providers["heuristic"]
|
|
46
|
+
|
|
47
|
+
return cls(model=cfg.model, temperature=cfg.temperature, max_tokens=cfg.max_tokens)
|