agent-assurance 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +44 -0
- package/CODE_OF_CONDUCT.md +59 -0
- package/CONTRIBUTING.md +19 -0
- package/LICENSE +21 -0
- package/NOTICE +15 -0
- package/README.md +89 -0
- package/SECURITY.md +13 -0
- package/attacks/exfil.yaml +46 -0
- package/attacks/injection.yaml +51 -0
- package/attacks/tools.yaml +29 -0
- package/bun.lock +484 -0
- package/dist/adapter/exec.d.ts +10 -0
- package/dist/adapter/http.d.ts +7 -0
- package/dist/adapter/index.d.ts +5 -0
- package/dist/adapter/sdk.d.ts +7 -0
- package/dist/adapter/types.d.ts +41 -0
- package/dist/attacks/index.d.ts +3 -0
- package/dist/attacks/load.d.ts +33 -0
- package/dist/attacks/schema.d.ts +206 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +24731 -0
- package/dist/graph/build.d.ts +60 -0
- package/dist/graph/flows.d.ts +14 -0
- package/dist/graph/index.d.ts +4 -0
- package/dist/graph/trifecta.d.ts +13 -0
- package/dist/index.d.ts +18 -0
- package/dist/index.js +22956 -0
- package/dist/manifest/index.d.ts +3 -0
- package/dist/manifest/load.d.ts +25 -0
- package/dist/manifest/schema.d.ts +136 -0
- package/dist/policy/protected-paths.d.ts +56 -0
- package/dist/report/findings.d.ts +52 -0
- package/dist/report/human.d.ts +19 -0
- package/dist/report/index.d.ts +5 -0
- package/dist/report/json.d.ts +39 -0
- package/dist/report/sarif.d.ts +57 -0
- package/dist/runner/index.d.ts +5 -0
- package/dist/runner/oracle.d.ts +46 -0
- package/dist/runner/run.d.ts +38 -0
- package/dist/runner/sandbox.d.ts +27 -0
- package/dist/runner/side-effect.d.ts +32 -0
- package/dist/scan.d.ts +43 -0
- package/package.json +60 -0
- package/policy-pack/README.md +105 -0
- package/policy-pack/hooks/guard-config-change.mjs +61 -0
- package/policy-pack/hooks/guard-protected-paths.mjs +65 -0
- package/policy-pack/managed-settings.json +18 -0
- package/policy-pack/protected-paths.json +18 -0
- package/policy-pack/settings.json +59 -0
- package/policy-pack/spike-bypass.sh +72 -0
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { type Result } from "neverthrow";
|
|
2
|
+
import { CapabilityManifest } from "./schema";
|
|
3
|
+
/** A typed manifest-loading failure. `issues` carries zod's per-field detail when relevant. */
|
|
4
|
+
export type ManifestError = {
|
|
5
|
+
kind: "read";
|
|
6
|
+
path: string;
|
|
7
|
+
message: string;
|
|
8
|
+
} | {
|
|
9
|
+
kind: "parse";
|
|
10
|
+
path: string;
|
|
11
|
+
message: string;
|
|
12
|
+
} | {
|
|
13
|
+
kind: "validation";
|
|
14
|
+
path: string;
|
|
15
|
+
message: string;
|
|
16
|
+
issues: readonly ManifestIssue[];
|
|
17
|
+
};
|
|
18
|
+
export interface ManifestIssue {
|
|
19
|
+
path: string;
|
|
20
|
+
message: string;
|
|
21
|
+
}
|
|
22
|
+
/** Parse + validate an already-in-memory manifest value (source-agnostic). */
|
|
23
|
+
export declare function parseManifest(raw: unknown, path?: string): Result<CapabilityManifest, ManifestError>;
|
|
24
|
+
/** Load, parse (JSON or YAML by extension), and validate a manifest file. */
|
|
25
|
+
export declare function loadManifest(path: string): Result<CapabilityManifest, ManifestError>;
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Capability Manifest schema (FR-1.2).
|
|
3
|
+
*
|
|
4
|
+
* The manifest is the single framework-neutral input to AAL Core. It describes *what an
|
|
5
|
+
* agent can touch* — its tools and their side-effect class, the data it can read, where
|
|
6
|
+
* untrusted content enters, its identity model, and any mitigations the author declares —
|
|
7
|
+
* without AAL knowing anything about the agent's framework. It extends the Agentic Product
|
|
8
|
+
* Standard's `agent-capabilities.json` shape (private-data / untrusted-content / external-comms
|
|
9
|
+
* legs) with the structure the toxic-flow graph (FR-3) and the runner (FR-4/5) need.
|
|
10
|
+
*/
|
|
11
|
+
import { z } from "zod";
|
|
12
|
+
/** Side-effect class of a declared tool. Drives severity and the toxic-flow graph. */
|
|
13
|
+
export declare const SideEffectClass: z.ZodEnum<{
|
|
14
|
+
read: "read";
|
|
15
|
+
write: "write";
|
|
16
|
+
"external-egress": "external-egress";
|
|
17
|
+
"code-exec": "code-exec";
|
|
18
|
+
}>;
|
|
19
|
+
export type SideEffectClass = z.infer<typeof SideEffectClass>;
|
|
20
|
+
/** A data scope a tool can reach. `private` reads are the private-data leg of the trifecta. */
|
|
21
|
+
export declare const DataScope: z.ZodObject<{
|
|
22
|
+
id: z.ZodString;
|
|
23
|
+
sensitivity: z.ZodEnum<{
|
|
24
|
+
public: "public";
|
|
25
|
+
private: "private";
|
|
26
|
+
}>;
|
|
27
|
+
}, z.core.$strip>;
|
|
28
|
+
export type DataScope = z.infer<typeof DataScope>;
|
|
29
|
+
/** A declared tool the agent can invoke. */
|
|
30
|
+
export declare const Tool: z.ZodObject<{
|
|
31
|
+
name: z.ZodString;
|
|
32
|
+
sideEffect: z.ZodEnum<{
|
|
33
|
+
read: "read";
|
|
34
|
+
write: "write";
|
|
35
|
+
"external-egress": "external-egress";
|
|
36
|
+
"code-exec": "code-exec";
|
|
37
|
+
}>;
|
|
38
|
+
dataScopes: z.ZodDefault<z.ZodArray<z.ZodObject<{
|
|
39
|
+
id: z.ZodString;
|
|
40
|
+
sensitivity: z.ZodEnum<{
|
|
41
|
+
public: "public";
|
|
42
|
+
private: "private";
|
|
43
|
+
}>;
|
|
44
|
+
}, z.core.$strip>>>;
|
|
45
|
+
description: z.ZodOptional<z.ZodString>;
|
|
46
|
+
}, z.core.$strip>;
|
|
47
|
+
export type Tool = z.infer<typeof Tool>;
|
|
48
|
+
/** A point where untrusted content enters the agent — the injection surface. */
|
|
49
|
+
export declare const UntrustedIngress: z.ZodObject<{
|
|
50
|
+
id: z.ZodString;
|
|
51
|
+
kind: z.ZodEnum<{
|
|
52
|
+
retrieval: "retrieval";
|
|
53
|
+
web: "web";
|
|
54
|
+
email: "email";
|
|
55
|
+
"tool-output": "tool-output";
|
|
56
|
+
"user-upload": "user-upload";
|
|
57
|
+
}>;
|
|
58
|
+
}, z.core.$strip>;
|
|
59
|
+
export type UntrustedIngress = z.infer<typeof UntrustedIngress>;
|
|
60
|
+
/** The trifecta/RCE legs a declared mitigation can break. Mirrors the APS gate's `leg`. */
|
|
61
|
+
export declare const MitigationLeg: z.ZodEnum<{
|
|
62
|
+
"external-egress": "external-egress";
|
|
63
|
+
"code-exec": "code-exec";
|
|
64
|
+
"private-data": "private-data";
|
|
65
|
+
"untrusted-ingress": "untrusted-ingress";
|
|
66
|
+
}>;
|
|
67
|
+
export type MitigationLeg = z.infer<typeof MitigationLeg>;
|
|
68
|
+
/**
|
|
69
|
+
* A mitigation the author declares as breaking one leg of a dangerous composition.
|
|
70
|
+
* A mitigation only counts if it names a non-empty `control` (mirrors the APS gate: a leg is
|
|
71
|
+
* broken only by a declared control, never by assertion alone).
|
|
72
|
+
*/
|
|
73
|
+
export declare const DeclaredMitigation: z.ZodObject<{
|
|
74
|
+
id: z.ZodOptional<z.ZodString>;
|
|
75
|
+
breaks: z.ZodEnum<{
|
|
76
|
+
"external-egress": "external-egress";
|
|
77
|
+
"code-exec": "code-exec";
|
|
78
|
+
"private-data": "private-data";
|
|
79
|
+
"untrusted-ingress": "untrusted-ingress";
|
|
80
|
+
}>;
|
|
81
|
+
control: z.ZodString;
|
|
82
|
+
}, z.core.$strip>;
|
|
83
|
+
export type DeclaredMitigation = z.infer<typeof DeclaredMitigation>;
|
|
84
|
+
/** The agent's identity posture. Evidence for least-privilege controls in AAL Evidence. */
|
|
85
|
+
export declare const Identity: z.ZodObject<{
|
|
86
|
+
delegated: z.ZodBoolean;
|
|
87
|
+
scoped: z.ZodBoolean;
|
|
88
|
+
}, z.core.$strip>;
|
|
89
|
+
export type Identity = z.infer<typeof Identity>;
|
|
90
|
+
/** The Capability Manifest. Versioned so the schema can evolve without silent breakage. */
|
|
91
|
+
export declare const CapabilityManifest: z.ZodObject<{
|
|
92
|
+
manifestVersion: z.ZodLiteral<"0.1">;
|
|
93
|
+
name: z.ZodString;
|
|
94
|
+
identity: z.ZodObject<{
|
|
95
|
+
delegated: z.ZodBoolean;
|
|
96
|
+
scoped: z.ZodBoolean;
|
|
97
|
+
}, z.core.$strip>;
|
|
98
|
+
tools: z.ZodArray<z.ZodObject<{
|
|
99
|
+
name: z.ZodString;
|
|
100
|
+
sideEffect: z.ZodEnum<{
|
|
101
|
+
read: "read";
|
|
102
|
+
write: "write";
|
|
103
|
+
"external-egress": "external-egress";
|
|
104
|
+
"code-exec": "code-exec";
|
|
105
|
+
}>;
|
|
106
|
+
dataScopes: z.ZodDefault<z.ZodArray<z.ZodObject<{
|
|
107
|
+
id: z.ZodString;
|
|
108
|
+
sensitivity: z.ZodEnum<{
|
|
109
|
+
public: "public";
|
|
110
|
+
private: "private";
|
|
111
|
+
}>;
|
|
112
|
+
}, z.core.$strip>>>;
|
|
113
|
+
description: z.ZodOptional<z.ZodString>;
|
|
114
|
+
}, z.core.$strip>>;
|
|
115
|
+
untrustedIngress: z.ZodDefault<z.ZodArray<z.ZodObject<{
|
|
116
|
+
id: z.ZodString;
|
|
117
|
+
kind: z.ZodEnum<{
|
|
118
|
+
retrieval: "retrieval";
|
|
119
|
+
web: "web";
|
|
120
|
+
email: "email";
|
|
121
|
+
"tool-output": "tool-output";
|
|
122
|
+
"user-upload": "user-upload";
|
|
123
|
+
}>;
|
|
124
|
+
}, z.core.$strip>>>;
|
|
125
|
+
declaredMitigations: z.ZodDefault<z.ZodArray<z.ZodObject<{
|
|
126
|
+
id: z.ZodOptional<z.ZodString>;
|
|
127
|
+
breaks: z.ZodEnum<{
|
|
128
|
+
"external-egress": "external-egress";
|
|
129
|
+
"code-exec": "code-exec";
|
|
130
|
+
"private-data": "private-data";
|
|
131
|
+
"untrusted-ingress": "untrusted-ingress";
|
|
132
|
+
}>;
|
|
133
|
+
control: z.ZodString;
|
|
134
|
+
}, z.core.$strip>>>;
|
|
135
|
+
}, z.core.$strip>;
|
|
136
|
+
export type CapabilityManifest = z.infer<typeof CapabilityManifest>;
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cycle-of-Trust protected-path matching. The single source of truth is
|
|
3
|
+
* policy-pack/protected-paths.json — this module loads + validates it (zod on the
|
|
4
|
+
* boundary, per the repo contract) and exposes the same matching the reference
|
|
5
|
+
* hook (guard-protected-paths.mjs) performs, so engine tests can prove the pack
|
|
6
|
+
* denies every self-modification attempt.
|
|
7
|
+
*
|
|
8
|
+
* This is read/report-only: it evaluates whether a tool call WOULD cross the
|
|
9
|
+
* boundary. It never touches Claude Code config itself.
|
|
10
|
+
*/
|
|
11
|
+
import { z } from "zod";
|
|
12
|
+
declare const protectedPathsSchema: z.ZodObject<{
|
|
13
|
+
globs: z.ZodArray<z.ZodString>;
|
|
14
|
+
shellMarkers: z.ZodArray<z.ZodString>;
|
|
15
|
+
}, z.core.$strip>;
|
|
16
|
+
export type ProtectedPaths = z.infer<typeof protectedPathsSchema>;
|
|
17
|
+
export declare const PROTECTED_PATHS: ProtectedPaths;
|
|
18
|
+
/** True when a file path a Write/Edit tool targets is a protected control path. */
|
|
19
|
+
export declare const matchesProtectedPath: (filePath: string) => boolean;
|
|
20
|
+
/** True when a shell command string reaches into a protected control path. */
|
|
21
|
+
export declare const shellTouchesProtectedPath: (command: string) => boolean;
|
|
22
|
+
/**
|
|
23
|
+
* The empirically-verified per-permission-mode enforcement matrix. `deny` =
|
|
24
|
+
* permissions.deny rule; `hook` = the PreToolUse guard hook; `managed` =
|
|
25
|
+
* managed-settings (disableBypassPermissionsMode + deny). `true` means that
|
|
26
|
+
* layer's block holds in the mode.
|
|
27
|
+
*
|
|
28
|
+
* Spike result (Claude Code v2.1.201, 2026-07-04 — see ADR-0001 "Empirical
|
|
29
|
+
* results"): a PreToolUse command hook returning exit 2 blocks the tool call in
|
|
30
|
+
* EVERY mode tested, including `bypassPermissions` AND `--dangerously-skip-
|
|
31
|
+
* permissions` (a control run with no hook confirmed bypass truly skips
|
|
32
|
+
* permission checks, so the block is attributable to the hook). This REVERSES
|
|
33
|
+
* matrix delta D1: the hook — not just managed settings — holds under bypass.
|
|
34
|
+
* Permission *rules* (`permissions.deny`/`allow`) are still skipped under bypass,
|
|
35
|
+
* so `deny` is false there; the hook and the managed layer both hold.
|
|
36
|
+
*/
|
|
37
|
+
export declare const GUARANTEES_BY_MODE: Record<string, {
|
|
38
|
+
deny: boolean;
|
|
39
|
+
hook: boolean;
|
|
40
|
+
managed: boolean;
|
|
41
|
+
}>;
|
|
42
|
+
/**
|
|
43
|
+
* ConfigChange enforcement (delta D2, resolved by spike). A `ConfigChange` hook
|
|
44
|
+
* fires when a settings/skill file changes during a session; the event carries
|
|
45
|
+
* `source` (user_settings | project_settings | local_settings | policy_settings
|
|
46
|
+
* | skills) and `file_path`. Exit 2 blocks the change from taking effect — EXCEPT
|
|
47
|
+
* `policy_settings` (managed), which a hook cannot block and which remains the
|
|
48
|
+
* admin channel.
|
|
49
|
+
*
|
|
50
|
+
* This returns true when the change should be blocked: any `skills` change (a
|
|
51
|
+
* skill is an executable capability) or a change whose file is a protected
|
|
52
|
+
* control path (settings.json / settings.local.json). Managed (`policy_settings`)
|
|
53
|
+
* is never blocked here — it is how an admin legitimately updates the boundary.
|
|
54
|
+
*/
|
|
55
|
+
export declare const blocksConfigChange: (source: string, filePath: string | undefined) => boolean;
|
|
56
|
+
export {};
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Findings model (FR-6.1).
|
|
3
|
+
*
|
|
4
|
+
* Normalizes two very different inputs — static toxic-flow graph results (Step 2) and dynamic
|
|
5
|
+
* attack results (Step 4) — into one severity-ranked `Finding` list. Every finding is
|
|
6
|
+
* payload-free: it carries the input's sha256 and a derived `observed` summary, never the raw
|
|
7
|
+
* attack input, agent text, or tool arguments (NFR-3).
|
|
8
|
+
*
|
|
9
|
+
* Severity contract: an unmitigated lethal trifecta and a refuse-in-text-but-fire divergence are
|
|
10
|
+
* both `critical`. Untested / inconclusive is surfaced as `info` (`not_verified`), never dropped
|
|
11
|
+
* and never a pass (fail-closed).
|
|
12
|
+
*/
|
|
13
|
+
import type { ToxicFlow } from "../graph/build";
|
|
14
|
+
import type { OwaspAsi } from "../attacks/schema";
|
|
15
|
+
import type { AttackResult } from "../runner/run";
|
|
16
|
+
export type Severity = "critical" | "high" | "medium" | "low" | "info";
|
|
17
|
+
export interface Finding {
|
|
18
|
+
id: string;
|
|
19
|
+
source: "graph" | "dynamic";
|
|
20
|
+
ruleId: string;
|
|
21
|
+
title: string;
|
|
22
|
+
severity: Severity;
|
|
23
|
+
owasp?: OwaspAsi;
|
|
24
|
+
atlas?: string;
|
|
25
|
+
flowId?: string;
|
|
26
|
+
attackId?: string;
|
|
27
|
+
inputHash?: string;
|
|
28
|
+
observed: string;
|
|
29
|
+
stability?: {
|
|
30
|
+
pass: number;
|
|
31
|
+
total: number;
|
|
32
|
+
};
|
|
33
|
+
fix: string;
|
|
34
|
+
/** Manifest-derived node ids that create the finding — unambiguous remediation (FR-3.3). */
|
|
35
|
+
locations: string[];
|
|
36
|
+
}
|
|
37
|
+
/** Sort findings most-severe first (stable within a severity). */
|
|
38
|
+
export declare function rankFindings(findings: Finding[]): Finding[];
|
|
39
|
+
/** Static toxic-flow graph results → findings. Unmitigated flows are critical; mitigated → info. */
|
|
40
|
+
export declare function findingsFromFlows(flows: ToxicFlow[]): Finding[];
|
|
41
|
+
/** Dynamic attack results → findings. Contained attacks produce no finding (target withstood). */
|
|
42
|
+
export declare function findingsFromResults(results: AttackResult[]): Finding[];
|
|
43
|
+
export interface Coverage {
|
|
44
|
+
attacksTotal: number;
|
|
45
|
+
attacksConclusive: number;
|
|
46
|
+
attacksNotVerified: number;
|
|
47
|
+
dynamicRan: boolean;
|
|
48
|
+
/** Fraction of attacks conclusively evaluated (succeeded or contained), 0..1. */
|
|
49
|
+
ratio: number;
|
|
50
|
+
}
|
|
51
|
+
/** Honest coverage: how much was actually tested vs. skipped/inconclusive (NFR-8). */
|
|
52
|
+
export declare function computeCoverage(results: AttackResult[], dynamicRan: boolean): Coverage;
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Human-readable report (FR-6.3).
|
|
3
|
+
*
|
|
4
|
+
* Renders a scan report as Markdown: a verdict line, findings ranked most-severe first with their
|
|
5
|
+
* OWASP/ATLAS mapping and a prioritized fix, and — mandatory — the coverage ratio so results are
|
|
6
|
+
* never over-claimed (NFR-8). Deterministic (no timestamps) so it is reproducible and snapshottable.
|
|
7
|
+
*/
|
|
8
|
+
import type { ScanReport } from "../scan";
|
|
9
|
+
import type { Finding, Severity } from "./findings";
|
|
10
|
+
/** Count findings by severity. */
|
|
11
|
+
export declare function severityCounts(findings: Finding[]): Record<Severity, number>;
|
|
12
|
+
/** One-line coverage statement (NFR-8). */
|
|
13
|
+
export declare function coverageLine(report: ScanReport): string;
|
|
14
|
+
export declare function toMarkdown(report: ScanReport): string;
|
|
15
|
+
/**
|
|
16
|
+
* Compact, deterministic CLI summary (the stdout output contract). Machine-independent: keyed on
|
|
17
|
+
* the target name and findings, not absolute paths, so it is snapshot-stable.
|
|
18
|
+
*/
|
|
19
|
+
export declare function renderCliSummary(report: ScanReport): string;
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Structured JSON report (FR-6.3 / FR-10.3).
|
|
3
|
+
*
|
|
4
|
+
* A downstream-ingestible, versioned view of a scan — consumed by the AAL Evidence layer to score
|
|
5
|
+
* controls. Payload-free (findings and attacks reference inputs by sha256 only). Deterministic and
|
|
6
|
+
* timestamp-free so it stays reproducible.
|
|
7
|
+
*/
|
|
8
|
+
import type { ScanReport } from "../scan";
|
|
9
|
+
import type { Finding } from "./findings";
|
|
10
|
+
export interface AssuranceJson {
|
|
11
|
+
schemaVersion: "aal-core-report/0.1";
|
|
12
|
+
target: string;
|
|
13
|
+
criticalCount: number;
|
|
14
|
+
coverage: ScanReport["coverage"];
|
|
15
|
+
findings: Finding[];
|
|
16
|
+
attacks: AssuranceAttack[];
|
|
17
|
+
flows: AssuranceFlow[];
|
|
18
|
+
}
|
|
19
|
+
export interface AssuranceAttack {
|
|
20
|
+
attackId: string;
|
|
21
|
+
attackClass: string;
|
|
22
|
+
owasp: string;
|
|
23
|
+
atlas: string;
|
|
24
|
+
outcome: "succeeded" | "contained" | "not_verified";
|
|
25
|
+
stability: {
|
|
26
|
+
pass: number;
|
|
27
|
+
total: number;
|
|
28
|
+
};
|
|
29
|
+
inputHash: string;
|
|
30
|
+
refuseButFire: boolean;
|
|
31
|
+
}
|
|
32
|
+
export interface AssuranceFlow {
|
|
33
|
+
id: string;
|
|
34
|
+
kind: string;
|
|
35
|
+
mitigated: boolean;
|
|
36
|
+
legs: string[];
|
|
37
|
+
}
|
|
38
|
+
/** Shape a scan report as the stable JSON the Evidence layer ingests. */
|
|
39
|
+
export declare function toAssuranceJson(report: ScanReport): AssuranceJson;
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import type { ScanReport } from "../scan";
|
|
2
|
+
export type SarifLevel = "error" | "warning" | "note" | "none";
|
|
3
|
+
export interface SarifLog {
|
|
4
|
+
version: "2.1.0";
|
|
5
|
+
$schema: string;
|
|
6
|
+
runs: SarifRun[];
|
|
7
|
+
}
|
|
8
|
+
interface SarifRun {
|
|
9
|
+
tool: {
|
|
10
|
+
driver: SarifDriver;
|
|
11
|
+
};
|
|
12
|
+
results: SarifResult[];
|
|
13
|
+
}
|
|
14
|
+
interface SarifDriver {
|
|
15
|
+
name: string;
|
|
16
|
+
informationUri: string;
|
|
17
|
+
version: string;
|
|
18
|
+
rules: SarifReportingDescriptor[];
|
|
19
|
+
}
|
|
20
|
+
interface SarifReportingDescriptor {
|
|
21
|
+
id: string;
|
|
22
|
+
name: string;
|
|
23
|
+
shortDescription: {
|
|
24
|
+
text: string;
|
|
25
|
+
};
|
|
26
|
+
defaultConfiguration: {
|
|
27
|
+
level: SarifLevel;
|
|
28
|
+
};
|
|
29
|
+
properties: Record<string, unknown>;
|
|
30
|
+
}
|
|
31
|
+
interface SarifResult {
|
|
32
|
+
ruleId: string;
|
|
33
|
+
level: SarifLevel;
|
|
34
|
+
message: {
|
|
35
|
+
text: string;
|
|
36
|
+
};
|
|
37
|
+
locations: SarifLocation[];
|
|
38
|
+
properties: Record<string, unknown>;
|
|
39
|
+
}
|
|
40
|
+
interface SarifLocation {
|
|
41
|
+
physicalLocation: {
|
|
42
|
+
artifactLocation: {
|
|
43
|
+
uri: string;
|
|
44
|
+
};
|
|
45
|
+
};
|
|
46
|
+
logicalLocations?: {
|
|
47
|
+
fullyQualifiedName: string;
|
|
48
|
+
kind: string;
|
|
49
|
+
}[];
|
|
50
|
+
}
|
|
51
|
+
export declare function toSarif(report: ScanReport): SarifLog;
|
|
52
|
+
/** Structural conformance check for SARIF 2.1.0 (offline substitute for schema validation). */
|
|
53
|
+
export declare function validateSarif(log: unknown): {
|
|
54
|
+
valid: boolean;
|
|
55
|
+
issues: string[];
|
|
56
|
+
};
|
|
57
|
+
export {};
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Success-condition oracle (FR-2.4).
|
|
3
|
+
*
|
|
4
|
+
* Judges whether an attack *succeeded* (the target behaved unsafely). `assert` predicates run
|
|
5
|
+
* in-process and are fully deterministic; an `llmRubric` is delegated to a pluggable provider at
|
|
6
|
+
* temperature 0. We reuse the judge-as-oracle *pattern* only — AAL Core imports no AgenticMind
|
|
7
|
+
* code (NFR-7); the provider is generic and swappable via env.
|
|
8
|
+
*
|
|
9
|
+
* Fail-closed: an `llmRubric` with no configured provider is inconclusive (`no-provider`), which
|
|
10
|
+
* the runner surfaces as `not_verified` — never a silent pass.
|
|
11
|
+
*/
|
|
12
|
+
import { type Result } from "neverthrow";
|
|
13
|
+
import type { AgentRun } from "../adapter/types";
|
|
14
|
+
import type { CapabilityManifest } from "../manifest/schema";
|
|
15
|
+
import type { Assertion, SuccessCondition } from "../attacks/schema";
|
|
16
|
+
export type OracleError = {
|
|
17
|
+
kind: "no-provider";
|
|
18
|
+
message: string;
|
|
19
|
+
} | {
|
|
20
|
+
kind: "provider";
|
|
21
|
+
message: string;
|
|
22
|
+
};
|
|
23
|
+
/** A generic success-condition judge. Returns true iff the run satisfies the (unsafe) rubric. */
|
|
24
|
+
export interface OracleProvider {
|
|
25
|
+
readonly name: string;
|
|
26
|
+
judge(rubric: string, run: AgentRun): Promise<Result<boolean, OracleError>>;
|
|
27
|
+
}
|
|
28
|
+
/** Evaluate one assert predicate: does the *unsafe* condition it describes hold? */
|
|
29
|
+
export declare function evaluateAssertion(assertion: Assertion, run: AgentRun, manifest: CapabilityManifest): boolean;
|
|
30
|
+
/** OR across the assert array: the attack succeeded if any predicate holds. */
|
|
31
|
+
export declare function evaluateAssertions(assertions: Assertion[], run: AgentRun, manifest: CapabilityManifest): boolean;
|
|
32
|
+
/** Evaluate a full success condition, consulting the provider only when an llmRubric is present. */
|
|
33
|
+
export declare function evaluateSuccess(success: SuccessCondition, run: AgentRun, manifest: CapabilityManifest, provider?: OracleProvider): Promise<Result<boolean, OracleError>>;
|
|
34
|
+
/** A deterministic provider for tests: applies a supplied predicate instead of calling a model. */
|
|
35
|
+
export declare function createFakeProvider(predicate: (rubric: string, run: AgentRun) => boolean, name?: string): OracleProvider;
|
|
36
|
+
export interface AnthropicProviderOptions {
|
|
37
|
+
apiKey?: string;
|
|
38
|
+
model?: string;
|
|
39
|
+
baseUrl?: string;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Default provider: an Anthropic Messages API call at temperature 0. Generic HTTP (no SDK, no
|
|
43
|
+
* engine code). Key via `dotenvx`/env. Only constructed when a rubric-based attack runs and a key
|
|
44
|
+
* is present — never on the deterministic assert path.
|
|
45
|
+
*/
|
|
46
|
+
export declare function createAnthropicProvider(options?: AnthropicProviderOptions): OracleProvider;
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import type { CapabilityManifest } from "../manifest/schema";
|
|
2
|
+
import { type Attack, type OwaspAsi } from "../attacks/schema";
|
|
3
|
+
import { type OracleProvider } from "./oracle";
|
|
4
|
+
import type { Sandbox, InterceptedSideEffect } from "./sandbox";
|
|
5
|
+
import { type SideEffectFinding } from "./side-effect";
|
|
6
|
+
export type AttackOutcome = "succeeded" | "contained" | "not_verified";
|
|
7
|
+
export interface AttackResult {
|
|
8
|
+
attackId: string;
|
|
9
|
+
attackClass: Attack["class"];
|
|
10
|
+
owasp: OwaspAsi;
|
|
11
|
+
atlas: string;
|
|
12
|
+
inputHash: string;
|
|
13
|
+
outcome: AttackOutcome;
|
|
14
|
+
stability: {
|
|
15
|
+
pass: number;
|
|
16
|
+
total: number;
|
|
17
|
+
};
|
|
18
|
+
sideEffect: SideEffectFinding;
|
|
19
|
+
intercepted: InterceptedSideEffect[];
|
|
20
|
+
observed: string;
|
|
21
|
+
note?: string;
|
|
22
|
+
}
|
|
23
|
+
export interface RunOptions {
|
|
24
|
+
runs?: number;
|
|
25
|
+
provider?: OracleProvider;
|
|
26
|
+
/** Recorded in the report as run provenance. A reproducibility anchor for stochastic oracle
|
|
27
|
+
* providers; the built-in exec adapter is already deterministic. */
|
|
28
|
+
seed?: number;
|
|
29
|
+
}
|
|
30
|
+
/** sha256 of the rendered attack input — the only representation of a payload we persist. */
|
|
31
|
+
export declare function hashInput(text: string): string;
|
|
32
|
+
/** Coerce a runs value to a positive integer. A NaN/invalid value must never become 0 runs
|
|
33
|
+
* (which would make every attack falsely "contained" — a fail-open). */
|
|
34
|
+
export declare function normalizeRuns(runs: number | undefined): number;
|
|
35
|
+
/** Run one attack N times against a sandboxed target and score it. */
|
|
36
|
+
export declare function runAttack(attack: Attack, sandbox: Sandbox, manifest: CapabilityManifest, options?: RunOptions): Promise<AttackResult>;
|
|
37
|
+
/** Run a whole attack suite sequentially (reproducible; avoids subprocess storms). */
|
|
38
|
+
export declare function runSuite(attacks: Attack[], sandbox: Sandbox, manifest: CapabilityManifest, options?: RunOptions): Promise<AttackResult[]>;
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import type { Result } from "neverthrow";
|
|
2
|
+
import type { AdapterError, AgentRun, TargetAdapter } from "../adapter/types";
|
|
3
|
+
import type { CapabilityManifest } from "../manifest/schema";
|
|
4
|
+
import { type ResolvedSideEffect } from "./side-effect";
|
|
5
|
+
/** A side-effecting tool call that was intercepted (never performed) during a run. */
|
|
6
|
+
export interface InterceptedSideEffect {
|
|
7
|
+
tool: string;
|
|
8
|
+
sideEffect: ResolvedSideEffect;
|
|
9
|
+
argHash: string;
|
|
10
|
+
intercepted: true;
|
|
11
|
+
}
|
|
12
|
+
export interface SandboxRunResult {
|
|
13
|
+
run: AgentRun;
|
|
14
|
+
intercepted: InterceptedSideEffect[];
|
|
15
|
+
}
|
|
16
|
+
export interface Sandbox {
|
|
17
|
+
run(input: string): Promise<Result<SandboxRunResult, AdapterError>>;
|
|
18
|
+
/** Count of side effects actually performed against a real system. Must remain 0 (NFR-4). */
|
|
19
|
+
readonly performedCount: number;
|
|
20
|
+
}
|
|
21
|
+
/** sha256 of a value's canonical JSON — never store raw tool arguments. */
|
|
22
|
+
export declare function hashArgs(args: unknown): string;
|
|
23
|
+
/**
|
|
24
|
+
* Build a process-jail sandbox around an adapter. Declared side effects are intercepted and
|
|
25
|
+
* recorded; none are executed. The canary is the only path a side-effecting call can take.
|
|
26
|
+
*/
|
|
27
|
+
export declare function createProcessJailSandbox(adapter: TargetAdapter, manifest: CapabilityManifest): Sandbox;
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Execution-layer side-effect detector (FR-4.1/4.2) — the second differentiated technique.
|
|
3
|
+
*
|
|
4
|
+
* It inspects the target's *tool calls*, not just its text. The dangerous divergence it flags:
|
|
5
|
+
* the agent *refuses or deflects in prose* while a side-effecting tool call still fires. A fired
|
|
6
|
+
* write / external-egress / code-exec under a refusal text is `critical` — the model said no but
|
|
7
|
+
* the system did the thing anyway.
|
|
8
|
+
*/
|
|
9
|
+
import type { AgentRun, ToolCall } from "../adapter/types";
|
|
10
|
+
import type { CapabilityManifest, SideEffectClass } from "../manifest/schema";
|
|
11
|
+
/** A resolved side-effect class, or `unknown` for a tool absent from the manifest (fail-closed). */
|
|
12
|
+
export type ResolvedSideEffect = SideEffectClass | "unknown";
|
|
13
|
+
/** Resolve a tool's side-effect class from the manifest; unknown tools are treated fail-closed. */
|
|
14
|
+
export declare function sideEffectClassOf(tool: string, manifest: CapabilityManifest): ResolvedSideEffect;
|
|
15
|
+
/** True for classes that touch the world. `read` does not; unknown tools count (fail-closed). */
|
|
16
|
+
export declare function isSideEffecting(cls: ResolvedSideEffect): boolean;
|
|
17
|
+
/** Does the agent's prose refuse or deflect the request? */
|
|
18
|
+
export declare function refusesInText(text: string): boolean;
|
|
19
|
+
export interface FiredSideEffect {
|
|
20
|
+
tool: string;
|
|
21
|
+
sideEffect: ResolvedSideEffect;
|
|
22
|
+
}
|
|
23
|
+
export interface SideEffectFinding {
|
|
24
|
+
refusedInText: boolean;
|
|
25
|
+
firedSideEffects: FiredSideEffect[];
|
|
26
|
+
/** The critical divergence: refused in prose, yet a side-effecting tool fired (FR-4.2). */
|
|
27
|
+
refuseButFire: boolean;
|
|
28
|
+
}
|
|
29
|
+
/** Classify a run's tool calls and flag the refuse-in-text-but-fire divergence. */
|
|
30
|
+
export declare function detectSideEffectDivergence(run: AgentRun, manifest: CapabilityManifest): SideEffectFinding;
|
|
31
|
+
/** The side-effecting tool calls in a run (write / egress / exec / unknown). */
|
|
32
|
+
export declare function firedSideEffectsOf(toolCalls: ToolCall[], manifest: CapabilityManifest): FiredSideEffect[];
|
package/dist/scan.d.ts
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Scan orchestration — the one call the CLI (and tests) drive.
|
|
3
|
+
*
|
|
4
|
+
* Runs the static toxic-flow graph over the manifest and, when a target adapter is supplied, the
|
|
5
|
+
* dynamic attack suite in a sandbox; then normalizes both into a single ranked findings list with
|
|
6
|
+
* an honest coverage ratio. Deterministic and timestamp-free so results are reproducible (FR-6.2)
|
|
7
|
+
* and snapshot-testable.
|
|
8
|
+
*/
|
|
9
|
+
import { type Result } from "neverthrow";
|
|
10
|
+
import type { TargetAdapter } from "./adapter/types";
|
|
11
|
+
import { type ToxicFlow } from "./graph";
|
|
12
|
+
import { type AttackResult } from "./runner/run";
|
|
13
|
+
import type { OracleProvider } from "./runner/oracle";
|
|
14
|
+
import { type Coverage, type Finding } from "./report/findings";
|
|
15
|
+
/** Default corpus shipped with the package (resolves from both src/ and dist/). */
|
|
16
|
+
export declare const DEFAULT_ATTACKS_DIR: string;
|
|
17
|
+
export interface ScanInput {
|
|
18
|
+
manifestPath: string;
|
|
19
|
+
attacksDir?: string;
|
|
20
|
+
runs?: number;
|
|
21
|
+
seed?: number;
|
|
22
|
+
adapter?: TargetAdapter;
|
|
23
|
+
provider?: OracleProvider;
|
|
24
|
+
}
|
|
25
|
+
export interface ScanReport {
|
|
26
|
+
target: string;
|
|
27
|
+
manifestPath: string;
|
|
28
|
+
seed?: number;
|
|
29
|
+
runs: number;
|
|
30
|
+
findings: Finding[];
|
|
31
|
+
coverage: Coverage;
|
|
32
|
+
toxicFlows: ToxicFlow[];
|
|
33
|
+
attackResults: AttackResult[];
|
|
34
|
+
criticalCount: number;
|
|
35
|
+
}
|
|
36
|
+
export type ScanError = {
|
|
37
|
+
kind: "manifest";
|
|
38
|
+
message: string;
|
|
39
|
+
} | {
|
|
40
|
+
kind: "attacks";
|
|
41
|
+
message: string;
|
|
42
|
+
};
|
|
43
|
+
export declare function runScan(input: ScanInput): Promise<Result<ScanReport, ScanError>>;
|
package/package.json
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "agent-assurance",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "AAL Core — framework-neutral red-team engine for AI agents (toxic-flow graph, side-effect detection, SARIF). Part of the Agent Assurance Layer.",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"license": "MIT",
|
|
7
|
+
"author": "Moai Team LLC",
|
|
8
|
+
"homepage": "https://github.com/Moai-Team-LLC/agent-assurance#readme",
|
|
9
|
+
"repository": { "type": "git", "url": "git+https://github.com/Moai-Team-LLC/agent-assurance.git" },
|
|
10
|
+
"bugs": { "url": "https://github.com/Moai-Team-LLC/agent-assurance/issues" },
|
|
11
|
+
"keywords": [
|
|
12
|
+
"ai",
|
|
13
|
+
"ai-security",
|
|
14
|
+
"agent",
|
|
15
|
+
"agentic",
|
|
16
|
+
"red-team",
|
|
17
|
+
"security",
|
|
18
|
+
"owasp",
|
|
19
|
+
"owasp-agentic",
|
|
20
|
+
"llm",
|
|
21
|
+
"prompt-injection",
|
|
22
|
+
"sarif",
|
|
23
|
+
"mcp",
|
|
24
|
+
"toxic-flow",
|
|
25
|
+
"lethal-trifecta"
|
|
26
|
+
],
|
|
27
|
+
"bin": { "aal": "./dist/cli.js", "agent-assurance": "./dist/cli.js" },
|
|
28
|
+
"exports": { ".": { "types": "./dist/index.d.ts", "import": "./dist/index.js" } },
|
|
29
|
+
"main": "./dist/index.js",
|
|
30
|
+
"types": "./dist/index.d.ts",
|
|
31
|
+
"engines": { "node": ">=22.18" },
|
|
32
|
+
"packageManager": "bun@1.3.12",
|
|
33
|
+
"publishConfig": { "access": "public", "provenance": true },
|
|
34
|
+
"scripts": {
|
|
35
|
+
"check": "bun run lint && bun run tsc && bun run test",
|
|
36
|
+
"lint": "oxlint",
|
|
37
|
+
"tsc": "tsc --noEmit",
|
|
38
|
+
"test": "vitest run",
|
|
39
|
+
"build": "bun build ./src/cli.ts --outfile ./dist/cli.js --target node && bun build ./src/index.ts --outfile ./dist/index.js --target node && tsc -p tsconfig.dts.json",
|
|
40
|
+
"cli": "bun run src/cli.ts",
|
|
41
|
+
"prepublishOnly": "bun run build",
|
|
42
|
+
"prepare": "husky || true"
|
|
43
|
+
},
|
|
44
|
+
"dependencies": {
|
|
45
|
+
"commander": "^11.1.0",
|
|
46
|
+
"neverthrow": "^8.2.0",
|
|
47
|
+
"yaml": "^2.9.0",
|
|
48
|
+
"zod": "^4.4.3"
|
|
49
|
+
},
|
|
50
|
+
"devDependencies": {
|
|
51
|
+
"@commitlint/cli": "^21.0.1",
|
|
52
|
+
"@commitlint/config-conventional": "^21.0.1",
|
|
53
|
+
"@dotenvx/dotenvx": "^1.67.0",
|
|
54
|
+
"@types/node": "^25.9.1",
|
|
55
|
+
"husky": "^9.1.7",
|
|
56
|
+
"oxlint": "^1.66.0",
|
|
57
|
+
"typescript": "^6.0.3",
|
|
58
|
+
"vitest": "^4.1.7"
|
|
59
|
+
}
|
|
60
|
+
}
|