codejury 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codejury/__init__.py +8 -0
- codejury/agents/__init__.py +6 -0
- codejury/agents/base.py +21 -0
- codejury/agents/debate.py +188 -0
- codejury/agents/mock.py +38 -0
- codejury/agents/parsing.py +42 -0
- codejury/agents/verifier.py +106 -0
- codejury/assembly.py +76 -0
- codejury/cli.py +196 -0
- codejury/data/capabilities/authentication.yaml +67 -0
- codejury/data/capabilities/authorization.yaml +55 -0
- codejury/data/capabilities/business_logic.yaml +58 -0
- codejury/data/capabilities/crypto.yaml +78 -0
- codejury/data/capabilities/data_protection.yaml +57 -0
- codejury/data/capabilities/dependency_config.yaml +52 -0
- codejury/data/capabilities/error_logging.yaml +49 -0
- codejury/data/capabilities/input_validation.yaml +92 -0
- codejury/data/capabilities/output_encoding.yaml +56 -0
- codejury/data/capabilities/secrets.yaml +51 -0
- codejury/data/capabilities/session.yaml +60 -0
- codejury/data/golden/authn_bcrypt_password.yaml +5 -0
- codejury/data/golden/authn_sha256_password.yaml +5 -0
- codejury/data/golden/sqli_fstring_query.yaml +5 -0
- codejury/data/golden/sqli_parameterized_query.yaml +5 -0
- codejury/data/tasks/audit_diff_debate.yaml +4 -0
- codejury/data/tasks/quick_scan_single.yaml +4 -0
- codejury/domain/__init__.py +5 -0
- codejury/domain/artifact.py +20 -0
- codejury/domain/capability.py +123 -0
- codejury/domain/context.py +26 -0
- codejury/domain/observation.py +104 -0
- codejury/domain/result.py +19 -0
- codejury/evaluation.py +107 -0
- codejury/infrastructure/__init__.py +4 -0
- codejury/infrastructure/json_parse.py +57 -0
- codejury/orchestrators/__init__.py +6 -0
- codejury/orchestrators/base.py +19 -0
- codejury/orchestrators/debate.py +57 -0
- codejury/orchestrators/pipeline.py +32 -0
- codejury/orchestrators/reflexion.py +58 -0
- codejury/orchestrators/single.py +24 -0
- codejury/providers/__init__.py +5 -0
- codejury/providers/anthropic.py +68 -0
- codejury/providers/base.py +42 -0
- codejury/providers/litellm.py +68 -0
- codejury/providers/mock.py +32 -0
- codejury/providers/openai.py +57 -0
- codejury/providers/openai_format.py +30 -0
- codejury/providers/retry.py +48 -0
- codejury/reporting.py +114 -0
- codejury/resources.py +13 -0
- codejury/sources/__init__.py +6 -0
- codejury/sources/base.py +17 -0
- codejury/sources/chunker.py +33 -0
- codejury/sources/diff.py +69 -0
- codejury/sources/function.py +35 -0
- codejury/sources/mock.py +25 -0
- codejury/sources/repo.py +44 -0
- codejury/tasks/__init__.py +6 -0
- codejury/tasks/base.py +55 -0
- codejury/tasks/registry.py +22 -0
- codejury-0.1.0.dist-info/METADATA +110 -0
- codejury-0.1.0.dist-info/RECORD +67 -0
- codejury-0.1.0.dist-info/WHEEL +5 -0
- codejury-0.1.0.dist-info/entry_points.txt +2 -0
- codejury-0.1.0.dist-info/licenses/LICENSE +21 -0
- codejury-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
id: input_validation
|
|
2
|
+
name: Input Validation
|
|
3
|
+
asvs_chapter: V5
|
|
4
|
+
description: >-
|
|
5
|
+
The inbound trust boundary. Untrusted input must be parameterized, validated
|
|
6
|
+
against an allowlist, or escaped before it reaches an interpreter (SQL, shell,
|
|
7
|
+
filesystem, template, LDAP, ...).
|
|
8
|
+
|
|
9
|
+
sub_capabilities:
|
|
10
|
+
sql_injection:
|
|
11
|
+
correct_patterns:
|
|
12
|
+
- id: SQLI-OK-1
|
|
13
|
+
description: Use parameterized queries or ORM-bound parameters; never build SQL from input
|
|
14
|
+
signals: ["cursor.execute(", "execute(query, params", "session.query(", "text(:param)"]
|
|
15
|
+
why_ok: The driver sends data separately from the statement, so input cannot alter the query
|
|
16
|
+
|
|
17
|
+
anti_patterns:
|
|
18
|
+
- id: SQLI-BAD-1
|
|
19
|
+
cwe: CWE-89
|
|
20
|
+
severity: CRITICAL
|
|
21
|
+
description: Build SQL by string concatenation or f-string interpolation of input
|
|
22
|
+
signals: ['execute(f"', 'execute("SELECT', '" + ', "% (", ".format("]
|
|
23
|
+
why_bad: Input becomes part of the statement and can change its meaning entirely
|
|
24
|
+
example_bad: |
|
|
25
|
+
cursor.execute(f"SELECT * FROM users WHERE name = '{name}'")
|
|
26
|
+
example_good: |
|
|
27
|
+
cursor.execute("SELECT * FROM users WHERE name = %s", (name,))
|
|
28
|
+
|
|
29
|
+
- id: SQLI-BAD-2
|
|
30
|
+
cwe: CWE-89
|
|
31
|
+
severity: HIGH
|
|
32
|
+
description: Interpolate a table or column name from input without an allowlist
|
|
33
|
+
why_bad: Identifiers cannot be parameterized, so unchecked input still injects
|
|
34
|
+
|
|
35
|
+
- id: SQLI-BAD-3
|
|
36
|
+
cwe: CWE-89
|
|
37
|
+
severity: HIGH
|
|
38
|
+
description: Pass a pre-built SQL string to an ORM raw or text escape hatch
|
|
39
|
+
signals: [".raw(", "text(", "execute_sql("]
|
|
40
|
+
why_bad: Raw escape hatches bypass the ORM's parameter binding
|
|
41
|
+
|
|
42
|
+
command_injection:
|
|
43
|
+
correct_patterns:
|
|
44
|
+
- id: CMDI-OK-1
|
|
45
|
+
description: Run subprocesses with an argument list and shell=False
|
|
46
|
+
signals: ["subprocess.run([", "subprocess.Popen(["]
|
|
47
|
+
why_ok: Arguments are passed directly to execve, so the shell never parses input
|
|
48
|
+
|
|
49
|
+
anti_patterns:
|
|
50
|
+
- id: CMDI-BAD-1
|
|
51
|
+
cwe: CWE-78
|
|
52
|
+
severity: CRITICAL
|
|
53
|
+
description: Invoke a shell with an interpolated command string
|
|
54
|
+
signals: ["os.system(", "shell=True", "os.popen("]
|
|
55
|
+
why_bad: Shell metacharacters in input let an attacker run arbitrary commands
|
|
56
|
+
example_bad: |
|
|
57
|
+
os.system("ping " + host)
|
|
58
|
+
example_good: |
|
|
59
|
+
subprocess.run(["ping", "-c", "1", host], shell=False)
|
|
60
|
+
|
|
61
|
+
- id: CMDI-BAD-2
|
|
62
|
+
cwe: CWE-78
|
|
63
|
+
severity: HIGH
|
|
64
|
+
description: Build the argument list itself from an unvalidated, shell-parsed string
|
|
65
|
+
signals: ["shlex.split(", "shell=True"]
|
|
66
|
+
why_bad: Splitting an untrusted string can still smuggle extra arguments or commands
|
|
67
|
+
|
|
68
|
+
path_traversal:
|
|
69
|
+
correct_patterns:
|
|
70
|
+
- id: PATH-OK-1
|
|
71
|
+
description: Resolve the path and confirm it stays within an allowed base directory
|
|
72
|
+
signals: ["os.path.realpath", "Path(...).resolve()", "is_relative_to("]
|
|
73
|
+
why_ok: A resolved path outside the base is rejected before any file access
|
|
74
|
+
|
|
75
|
+
anti_patterns:
|
|
76
|
+
- id: PATH-BAD-1
|
|
77
|
+
cwe: CWE-22
|
|
78
|
+
severity: HIGH
|
|
79
|
+
description: Join user input into a filesystem path without containment checks
|
|
80
|
+
signals: ["os.path.join(", "open(", "Path("]
|
|
81
|
+
why_bad: Sequences like ../ let input escape the intended directory
|
|
82
|
+
example_bad: |
|
|
83
|
+
open(os.path.join(UPLOAD_DIR, filename))
|
|
84
|
+
example_good: |
|
|
85
|
+
target = (UPLOAD_DIR / filename).resolve()
|
|
86
|
+
if not target.is_relative_to(UPLOAD_DIR):
|
|
87
|
+
raise ValueError("path escapes upload dir")
|
|
88
|
+
|
|
89
|
+
trigger_signals:
|
|
90
|
+
- raw SQL strings or cursor.execute calls appear
|
|
91
|
+
- imports of os, subprocess, or shlex with process execution
|
|
92
|
+
- file paths built from request, form, or query parameters
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
id: output_encoding
|
|
2
|
+
name: Output Encoding
|
|
3
|
+
asvs_chapter: V5
|
|
4
|
+
description: The outbound trust boundary. Untrusted data must be encoded for the context it is rendered into.
|
|
5
|
+
|
|
6
|
+
sub_capabilities:
|
|
7
|
+
xss:
|
|
8
|
+
correct_patterns:
|
|
9
|
+
- id: XSS-OK-1
|
|
10
|
+
description: Rely on contextual output encoding or framework auto-escaping; render untrusted data as text, not markup
|
|
11
|
+
signals: ["escape(", "textContent", "render_template"]
|
|
12
|
+
why_ok: Data is encoded for its context, so it cannot become executable markup
|
|
13
|
+
|
|
14
|
+
anti_patterns:
|
|
15
|
+
- id: XSS-BAD-1
|
|
16
|
+
cwe: CWE-79
|
|
17
|
+
severity: HIGH
|
|
18
|
+
description: Render untrusted input into HTML through a raw sink
|
|
19
|
+
signals: ["innerHTML", "dangerouslySetInnerHTML", "|safe", "mark_safe(", "v-html"]
|
|
20
|
+
why_bad: Attacker-supplied markup runs as script in the victim's browser
|
|
21
|
+
example_bad: |
|
|
22
|
+
el.innerHTML = "Hello " + username
|
|
23
|
+
example_good: |
|
|
24
|
+
el.textContent = "Hello " + username
|
|
25
|
+
|
|
26
|
+
- id: XSS-BAD-2
|
|
27
|
+
cwe: CWE-79
|
|
28
|
+
severity: HIGH
|
|
29
|
+
description: Build HTML by string concatenation of untrusted input
|
|
30
|
+
why_bad: Same XSS sink, just assembled by hand
|
|
31
|
+
|
|
32
|
+
- id: XSS-BAD-3
|
|
33
|
+
cwe: CWE-116
|
|
34
|
+
severity: MEDIUM
|
|
35
|
+
description: Disable template auto-escaping globally
|
|
36
|
+
signals: ["autoescape=False", "| safe"]
|
|
37
|
+
why_bad: Every template output becomes a potential injection point
|
|
38
|
+
|
|
39
|
+
header_and_log:
|
|
40
|
+
anti_patterns:
|
|
41
|
+
- id: HDR-BAD-1
|
|
42
|
+
cwe: CWE-113
|
|
43
|
+
severity: MEDIUM
|
|
44
|
+
description: Place untrusted input into a response header or redirect location without sanitizing newlines
|
|
45
|
+
why_bad: CR/LF in the value splits the response or injects headers
|
|
46
|
+
|
|
47
|
+
- id: LOG-BAD-1
|
|
48
|
+
cwe: CWE-117
|
|
49
|
+
severity: LOW
|
|
50
|
+
description: Write untrusted input to logs without neutralizing newlines or control characters
|
|
51
|
+
why_bad: Forged log lines mislead investigators and can poison log processors
|
|
52
|
+
|
|
53
|
+
trigger_signals:
|
|
54
|
+
- HTML sinks like innerHTML, dangerouslySetInnerHTML, |safe, mark_safe, v-html
|
|
55
|
+
- templates rendering request data
|
|
56
|
+
- response headers or redirect targets built from input
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
id: secrets
|
|
2
|
+
name: Secrets Management
|
|
3
|
+
asvs_chapter: V6
|
|
4
|
+
description: How credentials and keys are stored, supplied, and kept out of code, logs, and version control.
|
|
5
|
+
|
|
6
|
+
sub_capabilities:
|
|
7
|
+
storage:
|
|
8
|
+
correct_patterns:
|
|
9
|
+
- id: SEC-OK-1
|
|
10
|
+
description: Load secrets at runtime from environment variables or a secret manager
|
|
11
|
+
signals: ["os.environ[", "os.getenv(", "secretsmanager", "vault"]
|
|
12
|
+
why_ok: Secrets live outside the codebase and can be rotated without a deploy
|
|
13
|
+
|
|
14
|
+
anti_patterns:
|
|
15
|
+
- id: SEC-BAD-1
|
|
16
|
+
cwe: CWE-798
|
|
17
|
+
severity: HIGH
|
|
18
|
+
description: Hardcode an API key, token, or other credential in source
|
|
19
|
+
signals: ["api_key = \"", "token = \"", "aws_secret_access_key ="]
|
|
20
|
+
why_bad: The credential leaks with the source and cannot be rotated easily
|
|
21
|
+
example_bad: |
|
|
22
|
+
STRIPE_KEY = "sk_live_4eC39Hq..."
|
|
23
|
+
example_good: |
|
|
24
|
+
STRIPE_KEY = os.environ["STRIPE_KEY"]
|
|
25
|
+
|
|
26
|
+
- id: SEC-BAD-2
|
|
27
|
+
cwe: CWE-259
|
|
28
|
+
severity: HIGH
|
|
29
|
+
description: Hardcode a password
|
|
30
|
+
signals: ["password = \"", "passwd = \""]
|
|
31
|
+
why_bad: A fixed password in code is shared, discoverable, and unchangeable
|
|
32
|
+
|
|
33
|
+
exposure:
|
|
34
|
+
anti_patterns:
|
|
35
|
+
- id: SEC-BAD-3
|
|
36
|
+
cwe: CWE-532
|
|
37
|
+
severity: MEDIUM
|
|
38
|
+
description: Write secrets or tokens to logs
|
|
39
|
+
signals: ["log.info(token", "print(password", "logger.debug(secret"]
|
|
40
|
+
why_bad: Logs are widely accessible and long-lived, so logged secrets spread
|
|
41
|
+
|
|
42
|
+
- id: SEC-BAD-4
|
|
43
|
+
cwe: CWE-540
|
|
44
|
+
severity: MEDIUM
|
|
45
|
+
description: Commit secrets in config files or a tracked .env
|
|
46
|
+
why_bad: Version history keeps the secret even after it is removed
|
|
47
|
+
|
|
48
|
+
trigger_signals:
|
|
49
|
+
- assignments named key, token, password, secret, or credential
|
|
50
|
+
- imports of a secret manager or vault client
|
|
51
|
+
- .env or config files with credential-looking keys
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
id: session
|
|
2
|
+
name: Session Management
|
|
3
|
+
asvs_chapter: V3
|
|
4
|
+
description: Establishing, protecting, and ending the context that links requests to an authenticated user.
|
|
5
|
+
|
|
6
|
+
sub_capabilities:
|
|
7
|
+
cookie_attributes:
|
|
8
|
+
correct_patterns:
|
|
9
|
+
- id: COOKIE-OK-1
|
|
10
|
+
description: Set HttpOnly, Secure, and an explicit SameSite on session cookies
|
|
11
|
+
signals: ["httponly=True", "secure=True", "samesite="]
|
|
12
|
+
why_ok: Blocks script access, plaintext transmission, and most cross-site sending
|
|
13
|
+
|
|
14
|
+
anti_patterns:
|
|
15
|
+
- id: COOKIE-BAD-1
|
|
16
|
+
cwe: CWE-1004
|
|
17
|
+
severity: MEDIUM
|
|
18
|
+
description: Session cookie set without HttpOnly
|
|
19
|
+
signals: ["set_cookie("]
|
|
20
|
+
why_bad: JavaScript can read the cookie, so an XSS turns into session theft
|
|
21
|
+
example_bad: |
|
|
22
|
+
resp.set_cookie("sid", token)
|
|
23
|
+
example_good: |
|
|
24
|
+
resp.set_cookie("sid", token, httponly=True, secure=True, samesite="Lax")
|
|
25
|
+
|
|
26
|
+
- id: COOKIE-BAD-2
|
|
27
|
+
cwe: CWE-614
|
|
28
|
+
severity: MEDIUM
|
|
29
|
+
description: Session cookie set without Secure
|
|
30
|
+
why_bad: The cookie is sent over plain HTTP and can be sniffed
|
|
31
|
+
|
|
32
|
+
- id: COOKIE-BAD-3
|
|
33
|
+
cwe: CWE-1275
|
|
34
|
+
severity: LOW
|
|
35
|
+
description: SameSite unset, or None without a documented cross-site need
|
|
36
|
+
why_bad: Widens the CSRF surface
|
|
37
|
+
|
|
38
|
+
lifecycle:
|
|
39
|
+
correct_patterns:
|
|
40
|
+
- id: SESS-OK-1
|
|
41
|
+
description: Regenerate the session id at login, invalidate it at logout, and enforce idle and absolute timeouts
|
|
42
|
+
why_ok: Limits the window an stolen or fixated session is useful
|
|
43
|
+
|
|
44
|
+
anti_patterns:
|
|
45
|
+
- id: SESS-BAD-1
|
|
46
|
+
cwe: CWE-384
|
|
47
|
+
severity: HIGH
|
|
48
|
+
description: Do not rotate the session id after authentication (session fixation)
|
|
49
|
+
why_bad: An attacker who plants a known session id before login rides it afterward
|
|
50
|
+
|
|
51
|
+
- id: SESS-BAD-2
|
|
52
|
+
cwe: CWE-613
|
|
53
|
+
severity: MEDIUM
|
|
54
|
+
description: No session expiry or an unbounded lifetime
|
|
55
|
+
why_bad: A leaked session stays valid indefinitely
|
|
56
|
+
|
|
57
|
+
trigger_signals:
|
|
58
|
+
- set_cookie or Set-Cookie on a session token
|
|
59
|
+
- login, logout, or session creation handlers
|
|
60
|
+
- a session store or framework session configuration
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""CodeArtifact -- the unit of code an agent analyzes.
|
|
2
|
+
|
|
3
|
+
Produced by a Source (diff hunk, file, function, repo chunk) and consumed by an
|
|
4
|
+
agent. It is cross-layer typed data, so it lives in ``domain`` rather than in
|
|
5
|
+
``sources``.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from typing import Literal
|
|
12
|
+
|
|
13
|
+
ArtifactKind = Literal["diff", "file", "function", "repo"]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass(frozen=True, kw_only=True)
|
|
17
|
+
class CodeArtifact:
|
|
18
|
+
kind: ArtifactKind
|
|
19
|
+
path: str # identifier used when building Evidence references
|
|
20
|
+
content: str # the diff/file/function text the agent analyzes
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
"""Capability model -- domain knowledge loaded from YAML into typed dataclasses.
|
|
2
|
+
|
|
3
|
+
A capability is the first-class unit of Application Security knowledge, one per
|
|
4
|
+
OWASP ASVS area. Its YAML is readable by the model as a checklist, by a rule
|
|
5
|
+
engine because ``signals`` can be grepped, and by a human because the ``why_*``
|
|
6
|
+
fields are teaching material.
|
|
7
|
+
|
|
8
|
+
This module only deserializes YAML into dataclasses; it holds no audit logic.
|
|
9
|
+
Unknown keys in the YAML are ignored so the schema can grow without breaking
|
|
10
|
+
older loaders.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Any
|
|
18
|
+
|
|
19
|
+
import yaml
|
|
20
|
+
|
|
21
|
+
from codejury.domain.observation import Severity
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass(frozen=True, kw_only=True)
|
|
25
|
+
class CorrectPattern:
|
|
26
|
+
"""A safe pattern. Matching it supports a SECURE verdict."""
|
|
27
|
+
|
|
28
|
+
id: str
|
|
29
|
+
description: str = ""
|
|
30
|
+
signals: list[str] = field(default_factory=list) # code markers a rule engine can grep
|
|
31
|
+
why_ok: str = ""
|
|
32
|
+
|
|
33
|
+
@classmethod
|
|
34
|
+
def from_dict(cls, data: dict[str, Any]) -> CorrectPattern:
|
|
35
|
+
return cls(
|
|
36
|
+
id=data["id"],
|
|
37
|
+
description=data.get("description", ""),
|
|
38
|
+
signals=list(data.get("signals", [])),
|
|
39
|
+
why_ok=data.get("why_ok", ""),
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass(frozen=True, kw_only=True)
|
|
44
|
+
class AntiPattern:
|
|
45
|
+
"""An unsafe pattern. Matching it supports a VULNERABLE verdict."""
|
|
46
|
+
|
|
47
|
+
id: str
|
|
48
|
+
description: str = ""
|
|
49
|
+
signals: list[str] = field(default_factory=list)
|
|
50
|
+
cwe: str = ""
|
|
51
|
+
severity: Severity = "MEDIUM"
|
|
52
|
+
why_bad: str = ""
|
|
53
|
+
example_bad: str = ""
|
|
54
|
+
example_good: str = ""
|
|
55
|
+
|
|
56
|
+
@classmethod
|
|
57
|
+
def from_dict(cls, data: dict[str, Any]) -> AntiPattern:
|
|
58
|
+
return cls(
|
|
59
|
+
id=data["id"],
|
|
60
|
+
description=data.get("description", ""),
|
|
61
|
+
signals=list(data.get("signals", [])),
|
|
62
|
+
cwe=data.get("cwe", ""),
|
|
63
|
+
severity=data.get("severity", "MEDIUM"),
|
|
64
|
+
why_bad=data.get("why_bad", ""),
|
|
65
|
+
example_bad=data.get("example_bad", ""),
|
|
66
|
+
example_good=data.get("example_good", ""),
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@dataclass(frozen=True, kw_only=True)
|
|
71
|
+
class SubCapability:
|
|
72
|
+
"""One checkable dimension within a capability, such as password_storage."""
|
|
73
|
+
|
|
74
|
+
name: str
|
|
75
|
+
correct_patterns: list[CorrectPattern] = field(default_factory=list)
|
|
76
|
+
anti_patterns: list[AntiPattern] = field(default_factory=list)
|
|
77
|
+
|
|
78
|
+
@classmethod
|
|
79
|
+
def from_dict(cls, name: str, data: dict[str, Any]) -> SubCapability:
|
|
80
|
+
return cls(
|
|
81
|
+
name=name,
|
|
82
|
+
correct_patterns=[CorrectPattern.from_dict(p) for p in data.get("correct_patterns", [])],
|
|
83
|
+
anti_patterns=[AntiPattern.from_dict(p) for p in data.get("anti_patterns", [])],
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
@dataclass(frozen=True, kw_only=True)
|
|
88
|
+
class Capability:
|
|
89
|
+
"""A first-class Application Security knowledge unit, one per OWASP ASVS area."""
|
|
90
|
+
|
|
91
|
+
id: str
|
|
92
|
+
name: str
|
|
93
|
+
asvs_chapter: str = ""
|
|
94
|
+
description: str = ""
|
|
95
|
+
sub_capabilities: dict[str, SubCapability] = field(default_factory=dict)
|
|
96
|
+
# code patterns that bring this capability into scope for a given artifact
|
|
97
|
+
trigger_signals: list[str] = field(default_factory=list)
|
|
98
|
+
|
|
99
|
+
@classmethod
|
|
100
|
+
def from_dict(cls, data: dict[str, Any]) -> Capability:
|
|
101
|
+
subs = data.get("sub_capabilities") or {}
|
|
102
|
+
return cls(
|
|
103
|
+
id=data["id"],
|
|
104
|
+
name=data["name"],
|
|
105
|
+
asvs_chapter=data.get("asvs_chapter", ""),
|
|
106
|
+
description=data.get("description", ""),
|
|
107
|
+
sub_capabilities={name: SubCapability.from_dict(name, body) for name, body in subs.items()},
|
|
108
|
+
trigger_signals=list(data.get("trigger_signals", [])),
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def load_capability(path: str | Path) -> Capability:
|
|
113
|
+
"""Load a single capability YAML file into a Capability."""
|
|
114
|
+
with open(path, encoding="utf-8") as f:
|
|
115
|
+
data = yaml.safe_load(f)
|
|
116
|
+
if not isinstance(data, dict):
|
|
117
|
+
raise ValueError(f"{path}: expected a YAML mapping at the top level, got {type(data).__name__}")
|
|
118
|
+
return Capability.from_dict(data)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def load_capabilities(directory: str | Path) -> list[Capability]:
|
|
122
|
+
"""Load every ``*.yaml`` capability file in a directory, sorted by name."""
|
|
123
|
+
return [load_capability(p) for p in sorted(Path(directory).glob("*.yaml"))]
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""AnalysisContext -- the input an agent reads on a single run.
|
|
2
|
+
|
|
3
|
+
An orchestrator builds one of these (selecting which capabilities apply to the
|
|
4
|
+
artifact) and passes it to ``Agent.run``. Keeping capabilities inside the
|
|
5
|
+
context lets the agent signature stay ``run(ctx)``.
|
|
6
|
+
|
|
7
|
+
For multi-round orchestration (debate, reflexion) the orchestrator threads prior
|
|
8
|
+
observations through ``history`` and the current ``round_num``; single-pass
|
|
9
|
+
strategies leave them at their defaults.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from dataclasses import dataclass, field
|
|
15
|
+
|
|
16
|
+
from codejury.domain.artifact import CodeArtifact
|
|
17
|
+
from codejury.domain.capability import Capability
|
|
18
|
+
from codejury.domain.observation import Observation
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass(frozen=True, kw_only=True)
|
|
22
|
+
class AnalysisContext:
|
|
23
|
+
artifact: CodeArtifact
|
|
24
|
+
capabilities: list[Capability]
|
|
25
|
+
history: list[Observation] = field(default_factory=list)
|
|
26
|
+
round_num: int = 0
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""Observation model -- the unit agents produce and orchestrators consume.
|
|
2
|
+
|
|
3
|
+
A single ``agent.run`` yields a list of ``Observation`` values, each one a
|
|
4
|
+
``Finding``, ``Verdict``, or ``Concession``.
|
|
5
|
+
|
|
6
|
+
``Verdict`` is the important one: it is emitted whether the code matches an
|
|
7
|
+
anti-pattern (VULNERABLE) or a safe pattern (SECURE), so a report can explain
|
|
8
|
+
both "why this is wrong" and "why this is fine". A capability is a checkup
|
|
9
|
+
dimension, not just an anomaly filter.
|
|
10
|
+
|
|
11
|
+
All classes are ``kw_only`` dataclasses to avoid default-ordering problems
|
|
12
|
+
across subclass inheritance.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from dataclasses import asdict, dataclass, field
|
|
18
|
+
from typing import Any, ClassVar, Literal
|
|
19
|
+
|
|
20
|
+
Severity = Literal["CRITICAL", "HIGH", "MEDIUM", "LOW", "INFO"]
|
|
21
|
+
|
|
22
|
+
VerdictStatus = Literal[
|
|
23
|
+
"SECURE", # matched a safe pattern; confirmed not vulnerable here
|
|
24
|
+
"VULNERABLE", # matched an anti-pattern; confirmed vulnerable
|
|
25
|
+
"PARTIAL", # partially in place (e.g. validation present but incomplete)
|
|
26
|
+
"NOT_PRESENT", # dimension does not apply to / does not appear in this code
|
|
27
|
+
"UNKNOWN", # insufficient evidence to decide
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
ObservationKind = Literal["finding", "verdict", "concession"]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass(frozen=True, kw_only=True)
|
|
34
|
+
class Evidence:
|
|
35
|
+
"""A reference to a concrete code location backing a judgement."""
|
|
36
|
+
|
|
37
|
+
file: str
|
|
38
|
+
line: int | None = None
|
|
39
|
+
end_line: int | None = None
|
|
40
|
+
code: str = ""
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass(kw_only=True)
|
|
44
|
+
class Observation:
|
|
45
|
+
"""Base class carrying provenance shared by every observation."""
|
|
46
|
+
|
|
47
|
+
capability: str = "" # e.g. "authn.password_storage"
|
|
48
|
+
produced_by: str = "" # agent role that produced it, e.g. "verifier"
|
|
49
|
+
round_num: int = 0 # round index in multi-round orchestration
|
|
50
|
+
|
|
51
|
+
kind: ClassVar[ObservationKind] = "finding"
|
|
52
|
+
|
|
53
|
+
def to_dict(self) -> dict[str, Any]:
|
|
54
|
+
data = asdict(self)
|
|
55
|
+
data["kind"] = self.kind # ClassVar, so asdict() omits it; add explicitly
|
|
56
|
+
return data
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@dataclass(kw_only=True)
|
|
60
|
+
class Finding(Observation):
|
|
61
|
+
"""A vulnerability claim (produced by Finder / Challenger)."""
|
|
62
|
+
|
|
63
|
+
title: str
|
|
64
|
+
description: str = ""
|
|
65
|
+
severity: Severity = "MEDIUM"
|
|
66
|
+
cwe: str = ""
|
|
67
|
+
evidence: list[Evidence] = field(default_factory=list)
|
|
68
|
+
recommendation: str = ""
|
|
69
|
+
matched_anti: list[str] = field(default_factory=list) # anti_pattern ids hit
|
|
70
|
+
confidence: float = 0.5
|
|
71
|
+
|
|
72
|
+
kind: ClassVar[ObservationKind] = "finding"
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@dataclass(kw_only=True)
|
|
76
|
+
class Verdict(Observation):
|
|
77
|
+
"""A ruling on one capability over a piece of code (produced by Verifier).
|
|
78
|
+
|
|
79
|
+
Expresses both "vulnerable here" and "fine here" -- the key to answering
|
|
80
|
+
"why is this not a problem".
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
status: VerdictStatus
|
|
84
|
+
reasoning: str = ""
|
|
85
|
+
evidence: list[Evidence] = field(default_factory=list)
|
|
86
|
+
matched_correct: list[str] = field(default_factory=list) # correct_pattern ids hit
|
|
87
|
+
matched_anti: list[str] = field(default_factory=list) # anti_pattern ids hit
|
|
88
|
+
confidence: float = 0.5
|
|
89
|
+
|
|
90
|
+
kind: ClassVar[ObservationKind] = "verdict"
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
@dataclass(kw_only=True)
|
|
94
|
+
class Concession(Observation):
|
|
95
|
+
"""A position that an earlier claim should be withdrawn or dismissed, with reason.
|
|
96
|
+
|
|
97
|
+
Covers both a finder conceding its own finding and a challenger or judge
|
|
98
|
+
moving to dismiss one. ``target`` identifies the claim (a Finding title).
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
target: str
|
|
102
|
+
reason: str = ""
|
|
103
|
+
|
|
104
|
+
kind: ClassVar[ObservationKind] = "concession"
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""AnalysisResult -- what an orchestrator returns.
|
|
2
|
+
|
|
3
|
+
Orchestrator-agnostic: it carries the observations produced over a run, plus an
|
|
4
|
+
optional error so a partial failure can be reported without raising. Anything
|
|
5
|
+
strategy-specific (debate convergence, rounds) is added when that orchestrator
|
|
6
|
+
needs it.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
|
|
13
|
+
from codejury.domain.observation import Observation
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass(kw_only=True)
|
|
17
|
+
class AnalysisResult:
|
|
18
|
+
observations: list[Observation] = field(default_factory=list)
|
|
19
|
+
error: str | None = None
|