tryassay 0.6.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/api/pricing-enforcer.d.ts +45 -0
- package/dist/api/pricing-enforcer.js +144 -0
- package/dist/api/pricing-enforcer.js.map +1 -0
- package/dist/api/server.d.ts +28 -0
- package/dist/api/server.js +265 -0
- package/dist/api/server.js.map +1 -0
- package/dist/api/team-session.d.ts +59 -0
- package/dist/api/team-session.js +240 -0
- package/dist/api/team-session.js.map +1 -0
- package/dist/cli.js +123 -2
- package/dist/cli.js.map +1 -1
- package/dist/commands/api.d.ts +4 -0
- package/dist/commands/api.js +50 -0
- package/dist/commands/api.js.map +1 -0
- package/dist/commands/runtime.d.ts +61 -0
- package/dist/commands/runtime.js +554 -0
- package/dist/commands/runtime.js.map +1 -1
- package/dist/runtime/agent-spawner.d.ts +56 -0
- package/dist/runtime/agent-spawner.js +217 -0
- package/dist/runtime/agent-spawner.js.map +1 -0
- package/dist/runtime/agents/coordinator-agent.d.ts +20 -0
- package/dist/runtime/agents/coordinator-agent.js +182 -0
- package/dist/runtime/agents/coordinator-agent.js.map +1 -0
- package/dist/runtime/agents/ops-agent.d.ts +11 -0
- package/dist/runtime/agents/ops-agent.js +113 -0
- package/dist/runtime/agents/ops-agent.js.map +1 -0
- package/dist/runtime/agents/research-agent.d.ts +11 -0
- package/dist/runtime/agents/research-agent.js +114 -0
- package/dist/runtime/agents/research-agent.js.map +1 -0
- package/dist/runtime/agents/test-agent.d.ts +11 -0
- package/dist/runtime/agents/test-agent.js +114 -0
- package/dist/runtime/agents/test-agent.js.map +1 -0
- package/dist/runtime/capability-registry.d.ts +62 -0
- package/dist/runtime/capability-registry.js +191 -0
- package/dist/runtime/capability-registry.js.map +1 -0
- package/dist/runtime/collusion-detector.d.ts +35 -0
- package/dist/runtime/collusion-detector.js +97 -0
- package/dist/runtime/collusion-detector.js.map +1 -0
- package/dist/runtime/domain-coverage-analyzer.d.ts +24 -0
- package/dist/runtime/domain-coverage-analyzer.js +178 -0
- package/dist/runtime/domain-coverage-analyzer.js.map +1 -0
- package/dist/runtime/human-escalation.d.ts +41 -0
- package/dist/runtime/human-escalation.js +122 -0
- package/dist/runtime/human-escalation.js.map +1 -0
- package/dist/runtime/kill-switch.d.ts +51 -0
- package/dist/runtime/kill-switch.js +185 -0
- package/dist/runtime/kill-switch.js.map +1 -0
- package/dist/runtime/layer2-guardian.d.ts +81 -0
- package/dist/runtime/layer2-guardian.js +263 -0
- package/dist/runtime/layer2-guardian.js.map +1 -0
- package/dist/runtime/multi-agent-loop.d.ts +37 -0
- package/dist/runtime/multi-agent-loop.js +411 -0
- package/dist/runtime/multi-agent-loop.js.map +1 -0
- package/dist/runtime/prompt-safety-analyzer.d.ts +17 -0
- package/dist/runtime/prompt-safety-analyzer.js +230 -0
- package/dist/runtime/prompt-safety-analyzer.js.map +1 -0
- package/dist/runtime/rollback-manager.d.ts +50 -0
- package/dist/runtime/rollback-manager.js +157 -0
- package/dist/runtime/rollback-manager.js.map +1 -0
- package/dist/runtime/rule-canary-deployer.d.ts +69 -0
- package/dist/runtime/rule-canary-deployer.js +289 -0
- package/dist/runtime/rule-canary-deployer.js.map +1 -0
- package/dist/runtime/rule-conflict-detector.d.ts +48 -0
- package/dist/runtime/rule-conflict-detector.js +214 -0
- package/dist/runtime/rule-conflict-detector.js.map +1 -0
- package/dist/runtime/rule-meta-verifier.d.ts +18 -0
- package/dist/runtime/rule-meta-verifier.js +275 -0
- package/dist/runtime/rule-meta-verifier.js.map +1 -0
- package/dist/runtime/rule-proposal-manager.d.ts +95 -0
- package/dist/runtime/rule-proposal-manager.js +190 -0
- package/dist/runtime/rule-proposal-manager.js.map +1 -0
- package/dist/runtime/safety-enforcer.d.ts +35 -0
- package/dist/runtime/safety-enforcer.js +165 -0
- package/dist/runtime/safety-enforcer.js.map +1 -0
- package/dist/runtime/safety-status.d.ts +48 -0
- package/dist/runtime/safety-status.js +119 -0
- package/dist/runtime/safety-status.js.map +1 -0
- package/dist/runtime/shared-memory.d.ts +47 -0
- package/dist/runtime/shared-memory.js +151 -0
- package/dist/runtime/shared-memory.js.map +1 -0
- package/dist/runtime/specialized-agent.d.ts +5 -0
- package/dist/runtime/specialized-agent.js +37 -0
- package/dist/runtime/specialized-agent.js.map +1 -1
- package/dist/runtime/stall-detector.d.ts +13 -0
- package/dist/runtime/stall-detector.js +121 -0
- package/dist/runtime/stall-detector.js.map +1 -0
- package/dist/runtime/tool-approval.d.ts +51 -0
- package/dist/runtime/tool-approval.js +148 -0
- package/dist/runtime/tool-approval.js.map +1 -0
- package/dist/runtime/tool-sandbox.d.ts +43 -0
- package/dist/runtime/tool-sandbox.js +394 -0
- package/dist/runtime/tool-sandbox.js.map +1 -0
- package/dist/runtime/tool-verifier.d.ts +18 -0
- package/dist/runtime/tool-verifier.js +323 -0
- package/dist/runtime/tool-verifier.js.map +1 -0
- package/dist/runtime/trust-manager.d.ts +33 -3
- package/dist/runtime/trust-manager.js +128 -26
- package/dist/runtime/trust-manager.js.map +1 -1
- package/dist/runtime/types.d.ts +652 -0
- package/dist/runtime/verification-intensity.d.ts +34 -0
- package/dist/runtime/verification-intensity.js +104 -0
- package/dist/runtime/verification-intensity.js.map +1 -0
- package/package.json +1 -1
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import type { BoundaryClaim, BoundaryVerificationResult, CollusionEvent, AgentIdentity } from './types.js';
|
|
2
|
+
export declare class CollusionDetector {
|
|
3
|
+
private events;
|
|
4
|
+
/**
|
|
5
|
+
* Check a verification result for collusion signals.
|
|
6
|
+
*
|
|
7
|
+
* Collusion is detected when:
|
|
8
|
+
* 1. Two LLM-backed agents agreed on a claim (Code + Review both say PASS)
|
|
9
|
+
* 2. The formal verifier disagrees (says FAIL)
|
|
10
|
+
*
|
|
11
|
+
* This catches the most dangerous failure mode: correlated LLM errors
|
|
12
|
+
* where agents trained on similar data share the same blind spots.
|
|
13
|
+
*/
|
|
14
|
+
check(sourceAgent: AgentIdentity, targetAgent: AgentIdentity, claims: readonly BoundaryClaim[], result: BoundaryVerificationResult): CollusionEvent[];
|
|
15
|
+
/**
|
|
16
|
+
* Check if the collusion rate exceeds a threshold for a given agent pair.
|
|
17
|
+
* Returns true if the ratio of collusion events to total verifications
|
|
18
|
+
* exceeds the threshold (default: 20%).
|
|
19
|
+
*/
|
|
20
|
+
isCollusionRateExcessive(agentA: string, agentB: string, totalVerifications: number, threshold?: number): boolean;
|
|
21
|
+
/** Get all collusion events. */
|
|
22
|
+
getAllEvents(): readonly CollusionEvent[];
|
|
23
|
+
/** Get collusion events for a specific agent. */
|
|
24
|
+
getEventsForAgent(agentId: string): readonly CollusionEvent[];
|
|
25
|
+
/** Get critical collusion events (security/data-loss claims). */
|
|
26
|
+
getCriticalEvents(): readonly CollusionEvent[];
|
|
27
|
+
/** Get collusion event count. */
|
|
28
|
+
getEventCount(): number;
|
|
29
|
+
/** Get unique agent pairs involved in collusion. */
|
|
30
|
+
getColludingPairs(): Array<{
|
|
31
|
+
agentA: string;
|
|
32
|
+
agentB: string;
|
|
33
|
+
count: number;
|
|
34
|
+
}>;
|
|
35
|
+
}
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
// ============================================================
|
|
2
|
+
// Assay Verified Agent Runtime — Collusion Detector
|
|
3
|
+
// Detects when LLM-backed agents agree on something the formal
|
|
4
|
+
// verifier disagrees with. This is the anti-collusion primitive.
|
|
5
|
+
// ============================================================
|
|
6
|
+
import { randomUUID } from 'node:crypto';
|
|
7
|
+
// ── Collusion Detector ─────────────────────────────────────
|
|
8
|
+
export class CollusionDetector {
|
|
9
|
+
events = [];
|
|
10
|
+
/**
|
|
11
|
+
* Check a verification result for collusion signals.
|
|
12
|
+
*
|
|
13
|
+
* Collusion is detected when:
|
|
14
|
+
* 1. Two LLM-backed agents agreed on a claim (Code + Review both say PASS)
|
|
15
|
+
* 2. The formal verifier disagrees (says FAIL)
|
|
16
|
+
*
|
|
17
|
+
* This catches the most dangerous failure mode: correlated LLM errors
|
|
18
|
+
* where agents trained on similar data share the same blind spots.
|
|
19
|
+
*/
|
|
20
|
+
check(sourceAgent, targetAgent, claims, result) {
|
|
21
|
+
const detected = [];
|
|
22
|
+
// Only check if both agents are LLM-backed (not formal)
|
|
23
|
+
if (sourceAgent.trustLevel === 'formal' || targetAgent.trustLevel === 'formal') {
|
|
24
|
+
return detected;
|
|
25
|
+
}
|
|
26
|
+
// Look for formal overrides — these are the collusion signals
|
|
27
|
+
for (const claim of claims) {
|
|
28
|
+
if (claim.formalOverride) {
|
|
29
|
+
// LLM said one thing, formal verifier said another
|
|
30
|
+
const event = {
|
|
31
|
+
id: randomUUID(),
|
|
32
|
+
agentA: sourceAgent.id,
|
|
33
|
+
agentB: targetAgent.id,
|
|
34
|
+
agreedVerdict: claim.formalOverride.originalLlmVerdict,
|
|
35
|
+
formalVerdict: claim.formalOverride.formalVerdict,
|
|
36
|
+
claimId: claim.id,
|
|
37
|
+
claimText: claim.text,
|
|
38
|
+
severity: claim.severity,
|
|
39
|
+
timestamp: new Date().toISOString(),
|
|
40
|
+
};
|
|
41
|
+
detected.push(event);
|
|
42
|
+
this.events.push(event);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
// Also check for suspicious patterns: both agents consistently
|
|
46
|
+
// passing claims that the formal stats show disagreement on
|
|
47
|
+
if (result.formalStats.disagreements > 0 && result.formalStats.formalOverrides > 0) {
|
|
48
|
+
// Each formal override where both LLM agents agreed is a collusion signal
|
|
49
|
+
// (already captured above via formalOverride on claims)
|
|
50
|
+
}
|
|
51
|
+
return detected;
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Check if the collusion rate exceeds a threshold for a given agent pair.
|
|
55
|
+
* Returns true if the ratio of collusion events to total verifications
|
|
56
|
+
* exceeds the threshold (default: 20%).
|
|
57
|
+
*/
|
|
58
|
+
isCollusionRateExcessive(agentA, agentB, totalVerifications, threshold = 0.2) {
|
|
59
|
+
const pairEvents = this.events.filter(e => (e.agentA === agentA && e.agentB === agentB) ||
|
|
60
|
+
(e.agentA === agentB && e.agentB === agentA));
|
|
61
|
+
if (totalVerifications === 0)
|
|
62
|
+
return false;
|
|
63
|
+
return pairEvents.length / totalVerifications > threshold;
|
|
64
|
+
}
|
|
65
|
+
/** Get all collusion events. */
|
|
66
|
+
getAllEvents() {
|
|
67
|
+
return this.events;
|
|
68
|
+
}
|
|
69
|
+
/** Get collusion events for a specific agent. */
|
|
70
|
+
getEventsForAgent(agentId) {
|
|
71
|
+
return this.events.filter(e => e.agentA === agentId || e.agentB === agentId);
|
|
72
|
+
}
|
|
73
|
+
/** Get critical collusion events (security/data-loss claims). */
|
|
74
|
+
getCriticalEvents() {
|
|
75
|
+
return this.events.filter(e => e.severity === 'critical' || e.severity === 'high');
|
|
76
|
+
}
|
|
77
|
+
/** Get collusion event count. */
|
|
78
|
+
getEventCount() {
|
|
79
|
+
return this.events.length;
|
|
80
|
+
}
|
|
81
|
+
/** Get unique agent pairs involved in collusion. */
|
|
82
|
+
getColludingPairs() {
|
|
83
|
+
const pairs = new Map();
|
|
84
|
+
for (const event of this.events) {
|
|
85
|
+
const key = [event.agentA, event.agentB].sort().join('::');
|
|
86
|
+
const existing = pairs.get(key);
|
|
87
|
+
if (existing) {
|
|
88
|
+
existing.count++;
|
|
89
|
+
}
|
|
90
|
+
else {
|
|
91
|
+
pairs.set(key, { agentA: event.agentA, agentB: event.agentB, count: 1 });
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
return Array.from(pairs.values());
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
//# sourceMappingURL=collusion-detector.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"collusion-detector.js","sourceRoot":"","sources":["../../src/runtime/collusion-detector.ts"],"names":[],"mappings":"AAAA,+DAA+D;AAC/D,oDAAoD;AACpD,+DAA+D;AAC/D,iEAAiE;AACjE,+DAA+D;AAE/D,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAQzC,8DAA8D;AAE9D,MAAM,OAAO,iBAAiB;IACpB,MAAM,GAAqB,EAAE,CAAC;IAEtC;;;;;;;;;OASG;IACH,KAAK,CACH,WAA0B,EAC1B,WAA0B,EAC1B,MAAgC,EAChC,MAAkC;QAElC,MAAM,QAAQ,GAAqB,EAAE,CAAC;QAEtC,wDAAwD;QACxD,IAAI,WAAW,CAAC,UAAU,KAAK,QAAQ,IAAI,WAAW,CAAC,UAAU,KAAK,QAAQ,EAAE,CAAC;YAC/E,OAAO,QAAQ,CAAC;QAClB,CAAC;QAED,8DAA8D;QAC9D,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,IAAI,KAAK,CAAC,cAAc,EAAE,CAAC;gBACzB,mDAAmD;gBACnD,MAAM,KAAK,GAAmB;oBAC5B,EAAE,EAAE,UAAU,EAAE;oBAChB,MAAM,EAAE,WAAW,CAAC,EAAE;oBACtB,MAAM,EAAE,WAAW,CAAC,EAAE;oBACtB,aAAa,EAAE,KAAK,CAAC,cAAc,CAAC,kBAAkB;oBACtD,aAAa,EAAE,KAAK,CAAC,cAAc,CAAC,aAAa;oBACjD,OAAO,EAAE,KAAK,CAAC,EAAE;oBACjB,SAAS,EAAE,KAAK,CAAC,IAAI;oBACrB,QAAQ,EAAE,KAAK,CAAC,QAAQ;oBACxB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;iBACpC,CAAC;gBAEF,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;gBACrB,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAC1B,CAAC;QACH,CAAC;QAED,+DAA+D;QAC/D,4DAA4D;QAC5D,IAAI,MAAM,CAAC,WAAW,CAAC,aAAa,GAAG,CAAC,IAAI,MAAM,CAAC,WAAW,CAAC,eAAe,GAAG,CAAC,EAAE,CAAC;YACnF,0EAA0E;YAC1E,wDAAwD;QAC1D,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED;;;;OAIG;IACH,wBAAwB,CACtB,MAAc,EACd,MAAc,EACd,kBAA0B,EAC1B,YAAoB,GAAG;QAEvB,MAAM,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,CACnC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,KAAK,MAAM,IAAI,CAAC,CAAC,MAAM,KAAK,MAAM,CAAC;YAC5C,CAAC,CAAC,CAAC,MAAM,KAAK,MAAM,IAAI,CAAC,CAAC,MAAM,KAAK,MAAM,CAAC,CAClD,CAAC;QACF,IAAI,kBAAkB,KAAK,CAAC;YAAE,OAAO,KAAK,CAAC;QAC3C,OAAO,UAAU,CAAC,MAAM,GAAG,kBAAkB,GAAG,SAAS,CAAC;IAC5D,CAAC;IAED,gCAAgC;IAChC,YAAY;QACV,OAAO,IAAI,CAAC,MAAM,CAAC;IACrB,CAAC;IAED,iDAAiD;IACjD,iBAAiB,CAAC,OAAe;QAC/B,OAAO,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,OAAO,IAAI,CAAC,CAAC,MAAM,KAAK,OAAO,CAAC,CAAC;IAC/E,CAAC;IAED,iEAAiE;IACjE,iBAAiB;QACf,OAAO,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,UAAU,IAAI,CAAC,CAAC,QAAQ,KAAK,MAAM,CAAC,CAAC;IACrF,CAAC;IAED,iCAAiC;IACjC,aAAa;QACX,OAAO,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC;IAC5B,CAAC;IAED,oDAAoD;IACpD,iBAAiB;QACf,MAAM,KAAK,GAAG,IAAI,GAAG,EAA6D,CAAC;QAEnF,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChC,MAAM,GAAG,GAAG,CAAC,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC3D,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAChC,IAAI,QAAQ,EAAE,CAAC;gBACb,QAAQ,CAAC,KAAK,EAAE,CAAC;YACnB,CAAC;iBAAM,CAAC;gBACN,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,MAAM,EAAE,KAAK,CAAC,MAAM,EAAE,MAAM,EAAE,KAAK,CAAC,MAAM,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC;YAC3E,CAAC;QACH,CAAC;QAED,OAAO,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC;IACpC,CAAC;CACF"}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import type { AgentDefinition, ToolDefinition } from './types.js';
|
|
2
|
+
export interface CoverageAnalysis {
|
|
3
|
+
readonly claimed_scope: string;
|
|
4
|
+
readonly verified_scope: string;
|
|
5
|
+
readonly gaps: readonly string[];
|
|
6
|
+
readonly overreach: readonly string[];
|
|
7
|
+
readonly tool_access: {
|
|
8
|
+
readonly justified: readonly string[];
|
|
9
|
+
readonly questionable: readonly string[];
|
|
10
|
+
readonly missing: readonly string[];
|
|
11
|
+
};
|
|
12
|
+
}
|
|
13
|
+
export declare class DomainCoverageAnalyzer {
|
|
14
|
+
/**
|
|
15
|
+
* Analyze an agent definition for coverage gaps and overreach.
|
|
16
|
+
* Compares the agent's declared domain against known patterns
|
|
17
|
+
* and the available tool registry.
|
|
18
|
+
*/
|
|
19
|
+
analyze(agent: AgentDefinition, availableTools: readonly ToolDefinition[]): CoverageAnalysis;
|
|
20
|
+
private matchDomain;
|
|
21
|
+
private isLanguageRelated;
|
|
22
|
+
private analyzeToolAccess;
|
|
23
|
+
private summarizeVerifiedScope;
|
|
24
|
+
}
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
// ============================================================
|
|
2
|
+
// Assay Verified Agent Runtime — Domain Coverage Analyzer
|
|
3
|
+
// Analyzes agent definitions for domain coverage gaps and overreach.
|
|
4
|
+
// ============================================================
|
|
5
|
+
const DOMAIN_EXPECTATIONS = {
|
|
6
|
+
kubernetes: {
|
|
7
|
+
languages: ['yaml', 'helm'],
|
|
8
|
+
frameworks: ['kubernetes', 'docker', 'helm'],
|
|
9
|
+
expected_tools: ['cli_wrapper'],
|
|
10
|
+
expected_claims: ['security', 'correctness', 'completeness'],
|
|
11
|
+
},
|
|
12
|
+
database: {
|
|
13
|
+
languages: ['sql', 'typescript', 'python'],
|
|
14
|
+
frameworks: ['postgresql', 'mysql', 'mongodb', 'prisma', 'drizzle'],
|
|
15
|
+
expected_tools: ['cli_wrapper', 'data_transform'],
|
|
16
|
+
expected_claims: ['security', 'correctness', 'performance'],
|
|
17
|
+
},
|
|
18
|
+
frontend: {
|
|
19
|
+
languages: ['typescript', 'javascript', 'css', 'html'],
|
|
20
|
+
frameworks: ['react', 'vue', 'svelte', 'next.js', 'angular'],
|
|
21
|
+
expected_tools: ['cli_wrapper'],
|
|
22
|
+
expected_claims: ['correctness', 'completeness', 'type-safety'],
|
|
23
|
+
},
|
|
24
|
+
api: {
|
|
25
|
+
languages: ['typescript', 'python', 'go'],
|
|
26
|
+
frameworks: ['express', 'fastify', 'flask', 'gin'],
|
|
27
|
+
expected_tools: ['api_client', 'cli_wrapper'],
|
|
28
|
+
expected_claims: ['security', 'correctness', 'error-handling'],
|
|
29
|
+
},
|
|
30
|
+
security: {
|
|
31
|
+
languages: ['typescript', 'python', 'go', 'rust'],
|
|
32
|
+
frameworks: [],
|
|
33
|
+
expected_tools: ['verification_helper', 'cli_wrapper'],
|
|
34
|
+
expected_claims: ['security', 'correctness'],
|
|
35
|
+
},
|
|
36
|
+
infrastructure: {
|
|
37
|
+
languages: ['yaml', 'hcl', 'terraform'],
|
|
38
|
+
frameworks: ['terraform', 'cloudformation', 'pulumi', 'ansible'],
|
|
39
|
+
expected_tools: ['cli_wrapper'],
|
|
40
|
+
expected_claims: ['security', 'correctness', 'completeness'],
|
|
41
|
+
},
|
|
42
|
+
};
|
|
43
|
+
// ── Domain Coverage Analyzer ───────────────────────────────
|
|
44
|
+
export class DomainCoverageAnalyzer {
|
|
45
|
+
/**
|
|
46
|
+
* Analyze an agent definition for coverage gaps and overreach.
|
|
47
|
+
* Compares the agent's declared domain against known patterns
|
|
48
|
+
* and the available tool registry.
|
|
49
|
+
*/
|
|
50
|
+
analyze(agent, availableTools) {
|
|
51
|
+
const gaps = [];
|
|
52
|
+
const overreach = [];
|
|
53
|
+
// Match against known domain expectations
|
|
54
|
+
const domainKey = this.matchDomain(agent.domain);
|
|
55
|
+
const expectations = domainKey ? DOMAIN_EXPECTATIONS[domainKey] : null;
|
|
56
|
+
// Check language coverage
|
|
57
|
+
if (expectations) {
|
|
58
|
+
for (const lang of expectations.languages) {
|
|
59
|
+
if (!agent.domain.languages.includes(lang)) {
|
|
60
|
+
gaps.push(`Domain "${domainKey}" typically handles "${lang}" but agent doesn't declare it`);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
// Check for overreach: languages not expected for this domain
|
|
64
|
+
for (const lang of agent.domain.languages) {
|
|
65
|
+
const isExpected = expectations.languages.includes(lang);
|
|
66
|
+
const isRelated = this.isLanguageRelated(lang, expectations.languages);
|
|
67
|
+
if (!isExpected && !isRelated) {
|
|
68
|
+
overreach.push(`Agent declares "${lang}" which is unusual for domain "${domainKey}"`);
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
// Check claim category coverage
|
|
72
|
+
for (const cat of expectations.expected_claims) {
|
|
73
|
+
if (!agent.domain.claim_categories.includes(cat)) {
|
|
74
|
+
gaps.push(`Domain "${domainKey}" typically covers "${cat}" claims but agent doesn't declare it`);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
// Analyze tool access
|
|
79
|
+
const toolAnalysis = this.analyzeToolAccess(agent, availableTools, expectations);
|
|
80
|
+
// Check for overly broad domain description
|
|
81
|
+
if (agent.domain.description.length < 20) {
|
|
82
|
+
gaps.push('Domain description is too brief to determine scope');
|
|
83
|
+
}
|
|
84
|
+
if (agent.domain.description.toLowerCase().includes('everything') ||
|
|
85
|
+
agent.domain.description.toLowerCase().includes('all code') ||
|
|
86
|
+
agent.domain.description.toLowerCase().includes('any language')) {
|
|
87
|
+
overreach.push('Domain description is overly broad (claims to handle "everything" or "all")');
|
|
88
|
+
}
|
|
89
|
+
// Check constraints consistency
|
|
90
|
+
if (agent.trust_level === 'sandboxed' && agent.constraints.can_spawn_agents) {
|
|
91
|
+
overreach.push('Sandboxed agents cannot spawn other agents');
|
|
92
|
+
}
|
|
93
|
+
if (agent.trust_level === 'standard' && agent.constraints.can_spawn_agents) {
|
|
94
|
+
overreach.push('Standard trust agents cannot spawn other agents');
|
|
95
|
+
}
|
|
96
|
+
return {
|
|
97
|
+
claimed_scope: agent.domain.description,
|
|
98
|
+
verified_scope: this.summarizeVerifiedScope(agent, expectations),
|
|
99
|
+
gaps,
|
|
100
|
+
overreach,
|
|
101
|
+
tool_access: toolAnalysis,
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
// ── Private helpers ──────────────────────────────────────
|
|
105
|
+
matchDomain(domain) {
|
|
106
|
+
const desc = domain.description.toLowerCase();
|
|
107
|
+
const frameworks = domain.frameworks.map(f => f.toLowerCase());
|
|
108
|
+
for (const [key, _exp] of Object.entries(DOMAIN_EXPECTATIONS)) {
|
|
109
|
+
if (desc.includes(key))
|
|
110
|
+
return key;
|
|
111
|
+
if (frameworks.some(f => _exp.frameworks.includes(f)))
|
|
112
|
+
return key;
|
|
113
|
+
}
|
|
114
|
+
return null;
|
|
115
|
+
}
|
|
116
|
+
isLanguageRelated(lang, expectedLangs) {
|
|
117
|
+
const related = {
|
|
118
|
+
typescript: ['javascript'],
|
|
119
|
+
javascript: ['typescript'],
|
|
120
|
+
yaml: ['json'],
|
|
121
|
+
json: ['yaml'],
|
|
122
|
+
hcl: ['terraform'],
|
|
123
|
+
};
|
|
124
|
+
return (related[lang] ?? []).some(r => expectedLangs.includes(r));
|
|
125
|
+
}
|
|
126
|
+
analyzeToolAccess(agent, availableTools, expectations) {
|
|
127
|
+
const justified = [];
|
|
128
|
+
const questionable = [];
|
|
129
|
+
const missing = [];
|
|
130
|
+
// Check each requested tool
|
|
131
|
+
for (const toolId of agent.tools) {
|
|
132
|
+
const tool = availableTools.find(t => t.id === toolId);
|
|
133
|
+
if (!tool) {
|
|
134
|
+
questionable.push(`${toolId} (not found in registry)`);
|
|
135
|
+
continue;
|
|
136
|
+
}
|
|
137
|
+
// Check if tool type is expected for this domain
|
|
138
|
+
if (expectations && expectations.expected_tools.includes(tool.type)) {
|
|
139
|
+
justified.push(toolId);
|
|
140
|
+
}
|
|
141
|
+
else if (!expectations) {
|
|
142
|
+
justified.push(toolId); // Can't determine, assume justified
|
|
143
|
+
}
|
|
144
|
+
else {
|
|
145
|
+
questionable.push(`${toolId} (type "${tool.type}" not typical for domain)`);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
// Check for missing tools
|
|
149
|
+
if (expectations) {
|
|
150
|
+
for (const expectedType of expectations.expected_tools) {
|
|
151
|
+
const hasType = agent.tools.some(id => {
|
|
152
|
+
const tool = availableTools.find(t => t.id === id);
|
|
153
|
+
return tool?.type === expectedType;
|
|
154
|
+
});
|
|
155
|
+
if (!hasType) {
|
|
156
|
+
missing.push(`No "${expectedType}" tool — typically needed for this domain`);
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
return { justified, questionable, missing };
|
|
161
|
+
}
|
|
162
|
+
summarizeVerifiedScope(agent, expectations) {
|
|
163
|
+
const parts = [];
|
|
164
|
+
parts.push(`Languages: ${agent.domain.languages.join(', ')}`);
|
|
165
|
+
if (agent.domain.frameworks.length > 0) {
|
|
166
|
+
parts.push(`Frameworks: ${agent.domain.frameworks.join(', ')}`);
|
|
167
|
+
}
|
|
168
|
+
parts.push(`Claims: ${agent.domain.claim_categories.join(', ')}`);
|
|
169
|
+
if (expectations) {
|
|
170
|
+
parts.push(`Matched domain: known pattern`);
|
|
171
|
+
}
|
|
172
|
+
else {
|
|
173
|
+
parts.push(`Domain: no known pattern match (custom)`);
|
|
174
|
+
}
|
|
175
|
+
return parts.join('. ');
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
//# sourceMappingURL=domain-coverage-analyzer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"domain-coverage-analyzer.js","sourceRoot":"","sources":["../../src/runtime/domain-coverage-analyzer.ts"],"names":[],"mappings":"AAAA,+DAA+D;AAC/D,0DAA0D;AAC1D,qEAAqE;AACrE,+DAA+D;AA8B/D,MAAM,mBAAmB,GAAsC;IAC7D,UAAU,EAAE;QACV,SAAS,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC;QAC3B,UAAU,EAAE,CAAC,YAAY,EAAE,QAAQ,EAAE,MAAM,CAAC;QAC5C,cAAc,EAAE,CAAC,aAAa,CAAC;QAC/B,eAAe,EAAE,CAAC,UAAU,EAAE,aAAa,EAAE,cAAc,CAAC;KAC7D;IACD,QAAQ,EAAE;QACR,SAAS,EAAE,CAAC,KAAK,EAAE,YAAY,EAAE,QAAQ,CAAC;QAC1C,UAAU,EAAE,CAAC,YAAY,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,SAAS,CAAC;QACnE,cAAc,EAAE,CAAC,aAAa,EAAE,gBAAgB,CAAC;QACjD,eAAe,EAAE,CAAC,UAAU,EAAE,aAAa,EAAE,aAAa,CAAC;KAC5D;IACD,QAAQ,EAAE;QACR,SAAS,EAAE,CAAC,YAAY,EAAE,YAAY,EAAE,KAAK,EAAE,MAAM,CAAC;QACtD,UAAU,EAAE,CAAC,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,SAAS,EAAE,SAAS,CAAC;QAC5D,cAAc,EAAE,CAAC,aAAa,CAAC;QAC/B,eAAe,EAAE,CAAC,aAAa,EAAE,cAAc,EAAE,aAAa,CAAC;KAChE;IACD,GAAG,EAAE;QACH,SAAS,EAAE,CAAC,YAAY,EAAE,QAAQ,EAAE,IAAI,CAAC;QACzC,UAAU,EAAE,CAAC,SAAS,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,CAAC;QAClD,cAAc,EAAE,CAAC,YAAY,EAAE,aAAa,CAAC;QAC7C,eAAe,EAAE,CAAC,UAAU,EAAE,aAAa,EAAE,gBAAgB,CAAC;KAC/D;IACD,QAAQ,EAAE;QACR,SAAS,EAAE,CAAC,YAAY,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,CAAC;QACjD,UAAU,EAAE,EAAE;QACd,cAAc,EAAE,CAAC,qBAAqB,EAAE,aAAa,CAAC;QACtD,eAAe,EAAE,CAAC,UAAU,EAAE,aAAa,CAAC;KAC7C;IACD,cAAc,EAAE;QACd,SAAS,EAAE,CAAC,MAAM,EAAE,KAAK,EAAE,WAAW,CAAC;QACvC,UAAU,EAAE,CAAC,WAAW,EAAE,gBAAgB,EAAE,QAAQ,EAAE,SAAS,CAAC;QAChE,cAAc,EAAE,CAAC,aAAa,CAAC;QAC/B,eAAe,EAAE,CAAC,UAAU,EAAE,aAAa,EAAE,cAAc,CAAC;KAC7D;CACF,CAAC;AAEF,8DAA8D;AAE9D,MAAM,OAAO,sBAAsB;IACjC;;;;OAIG;IACH,OAAO,CACL,KAAsB,EACtB,cAAyC;QAEzC,MAAM,IAAI,GAAa,EAAE,CAAC;QAC1B,MAAM,SAAS,GAAa,EAAE,CAAC;QAE/B,0CAA0C;QAC1C,MAAM,SAAS,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QACjD,MAAM,YAAY,GAAG,SAAS,CAAC,CAAC,CAAC,mBAAmB,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;QAEvE,0BAA0B;QAC1B,IAAI,YAAY,EAAE,CAAC;YACjB,KAAK,MAAM,IAAI,IAAI,YAAY,CAAC,SAAS,EAAE,CAAC;gBAC1C,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,SAAS,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;oBAC3C,IAAI,CAAC,IAAI,CAAC,WAAW,SAAS,wBAAwB,IAAI,gCAAgC,CAAC,CAAC;gBAC9F,CAAC;YACH,CAAC;YAED,8DAA8D;YAC9D,KAAK,MAAM,IAAI,IAAI,KAAK,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC;gBAC1C,MAAM,UAAU,GAAG,YAAY,CAAC,SAAS,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;gBACzD,MAAM,SAAS,GAAG,IAAI,CAAC,iBAAiB,CAAC,IAAI,EAAE,YAAY,CAAC,SAAS,CAAC,CAAC;gBACvE,IAAI,CAAC,UAAU,IAAI,CAAC,SAAS,EAAE,CAAC;oBAC9B,SAAS,CAAC,IAAI,CAAC,mBAAmB,IAAI,kCAAkC,SAAS,GAAG,CAAC,CAAC;gBACxF,CAAC;YACH,CAAC;YAED,gCAAgC;YAChC,KAAK,MAAM,GAAG,IAAI,YAAY,CAAC,eAAe,EAAE,CAAC;gBAC/C,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,gBAAgB,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;oBACjD,IAAI,CAAC,IAAI,CAAC,WAAW,SAAS,uBAAuB,GAAG,uCAAuC,CAAC,CAAC;gBACnG,CAAC;YACH,CAAC;QACH,CAAC;QAED,sBAAsB;QACtB,MAAM,YAAY,GAAG,IAAI,CAAC,iBAAiB,CAAC,KAAK,EAAE,cAAc,EAAE,YAAY,CAAC,CAAC;QAEjF,4CAA4C;QAC5C,IAAI,KAAK,CAAC,MAAM,CAAC,WAAW,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;YACzC,IAAI,CAAC,IAAI,CAAC,oDAAoD,CAAC,CAAC;QAClE,CAAC;QACD,IAAI,KAAK,CAAC,MAAM,CAAC,WAAW,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC;YAC7D,KAAK,CAAC,MAAM,CAAC,WAAW,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,UAAU,CAAC;YAC3D,KAAK,CAAC,MAAM,CAAC,WAAW,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,cAAc,CAAC,EAAE,CAAC;YACpE,SAAS,CAAC,IAAI,CAAC,6EAA6E,CAAC,CAAC;QAChG,CAAC;QAED,gCAAgC;QAChC,IAAI,KAAK,CAAC,WAAW,KAAK,WAAW,IAAI,KAAK,CAAC,WAAW,CAAC,gBAAgB,EAAE,CAAC;YAC5E,SAAS,CAAC,IAAI,CAAC,4CAA4C,CAAC,CAAC;QAC/D,CAAC;QACD,IAAI,KAAK,CAAC,WAAW,KAAK,UAAU,IAAI,KAAK,CAAC,WAAW,CAAC,gBAAgB,EAAE,CAAC;YAC3E,SAAS,CAAC,IAAI,CAAC,iDAAiD,CAAC,CAAC;QACpE,CAAC;QAED,OAAO;YACL,aAAa,EAAE,KAAK,CAAC,MAAM,CAAC,WAAW;YACvC,cAAc,EAAE,IAAI,CAAC,sBAAsB,CAAC,KAAK,EAAE,YAAY,CAAC;YAChE,IAAI;YACJ,SAAS;YACT,WAAW,EAAE,YAAY;SAC1B,CAAC;IACJ,CAAC;IAED,4DAA4D;IAEpD,WAAW,CAAC,MAAiC;QACnD,MAAM,IAAI,GAAG,MAAM,CAAC,WAAW,CAAC,WAAW,EAAE,CAAC;QAC9C,MAAM,UAAU,GAAG,MAAM,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;QAE/D,KAAK,MAAM,CAAC,GAAG,EAAE,IAAI,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,mBAAmB,CAAC,EAAE,CAAC;YAC9D,IAAI,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC;gBAAE,OAAO,GAAG,CAAC;YACnC,IAAI,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;gBAAE,OAAO,GAAG,CAAC;QACpE,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAEO,iBAAiB,CAAC,IAAY,EAAE,aAAuB;QAC7D,MAAM,OAAO,GAA6B;YACxC,UAAU,EAAE,CAAC,YAAY,CAAC;YAC1B,UAAU,EAAE,CAAC,YAAY,CAAC;YAC1B,IAAI,EAAE,CAAC,MAAM,CAAC;YACd,IAAI,EAAE,CAAC,MAAM,CAAC;YACd,GAAG,EAAE,CAAC,WAAW,CAAC;SACnB,CAAC;QACF,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;IACpE,CAAC;IAEO,iBAAiB,CACvB,KAAsB,EACtB,cAAyC,EACzC,YAAsC;QAEtC,MAAM,SAAS,GAAa,EAAE,CAAC;QAC/B,MAAM,YAAY,GAAa,EAAE,CAAC;QAClC,MAAM,OAAO,GAAa,EAAE,CAAC;QAE7B,4BAA4B;QAC5B,KAAK,MAAM,MAAM,IAAI,KAAK,CAAC,KAAK,EAAE,CAAC;YACjC,MAAM,IAAI,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,MAAM,CAAC,CAAC;YACvD,IAAI,CAAC,IAAI,EAAE,CAAC;gBACV,YAAY,CAAC,IAAI,CAAC,GAAG,MAAM,0BAA0B,CAAC,CAAC;gBACvD,SAAS;YACX,CAAC;YAED,iDAAiD;YACjD,IAAI,YAAY,IAAI,YAAY,CAAC,cAAc,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;gBACpE,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACzB,CAAC;iBAAM,IAAI,CAAC,YAAY,EAAE,CAAC;gBACzB,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,oCAAoC;YAC9D,CAAC;iBAAM,CAAC;gBACN,YAAY,CAAC,IAAI,CAAC,GAAG,MAAM,WAAW,IAAI,CAAC,IAAI,2BAA2B,CAAC,CAAC;YAC9E,CAAC;QACH,CAAC;QAED,0BAA0B;QAC1B,IAAI,YAAY,EAAE,CAAC;YACjB,KAAK,MAAM,YAAY,IAAI,YAAY,CAAC,cAAc,EAAE,CAAC;gBACvD,MAAM,OAAO,GAAG,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE;oBACpC,MAAM,IAAI,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC;oBACnD,OAAO,IAAI,EAAE,IAAI,KAAK,YAAY,CAAC;gBACrC,CAAC,CAAC,CAAC;gBACH,IAAI,CAAC,OAAO,EAAE,CAAC;oBACb,OAAO,CAAC,IAAI,CAAC,OAAO,YAAY,2CAA2C,CAAC,CAAC;gBAC/E,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,EAAE,SAAS,EAAE,YAAY,EAAE,OAAO,EAAE,CAAC;IAC9C,CAAC;IAEO,sBAAsB,CAC5B,KAAsB,EACtB,YAAsC;QAEtC,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,KAAK,CAAC,IAAI,CAAC,cAAc,KAAK,CAAC,MAAM,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC9D,IAAI,KAAK,CAAC,MAAM,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvC,KAAK,CAAC,IAAI,CAAC,eAAe,KAAK,CAAC,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAClE,CAAC;QACD,KAAK,CAAC,IAAI,CAAC,WAAW,KAAK,CAAC,MAAM,CAAC,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAClE,IAAI,YAAY,EAAE,CAAC;YACjB,KAAK,CAAC,IAAI,CAAC,+BAA+B,CAAC,CAAC;QAC9C,CAAC;aAAM,CAAC;YACN,KAAK,CAAC,IAAI,CAAC,yCAAyC,CAAC,CAAC;QACxD,CAAC;QACD,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC1B,CAAC;CACF"}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import type { HumanEscalation, TaskGraph, AgentMessage, MemoryConflict, AuditEntry, TrustWindow, CollusionEvent, SafetyPolicy, StallReport } from './types.js';
|
|
2
|
+
export declare class HumanEscalationManager {
|
|
3
|
+
private escalations;
|
|
4
|
+
private policy;
|
|
5
|
+
constructor(policy: SafetyPolicy);
|
|
6
|
+
/**
|
|
7
|
+
* Create an escalation from a safety threshold breach.
|
|
8
|
+
*/
|
|
9
|
+
escalateSafetyBreach(rule: string, currentValue: number, maxValue: number, context: EscalationContext): HumanEscalation;
|
|
10
|
+
/**
|
|
11
|
+
* Create an escalation from a collusion detection.
|
|
12
|
+
*/
|
|
13
|
+
escalateCollusion(event: CollusionEvent, context: EscalationContext): HumanEscalation;
|
|
14
|
+
/**
|
|
15
|
+
* Create an escalation from an unresolvable stall.
|
|
16
|
+
*/
|
|
17
|
+
escalateStall(stalls: readonly StallReport[], context: EscalationContext): HumanEscalation;
|
|
18
|
+
/**
|
|
19
|
+
* Create an escalation from trust collapse (too many demotions).
|
|
20
|
+
*/
|
|
21
|
+
escalateTrustCollapse(agentId: string, demotions: number, context: EscalationContext): HumanEscalation;
|
|
22
|
+
/**
|
|
23
|
+
* Record a human response to a pending escalation.
|
|
24
|
+
*/
|
|
25
|
+
resolve(escalationId: string, action: string, reasoning: string): boolean;
|
|
26
|
+
/** Get all escalations. */
|
|
27
|
+
getAllEscalations(): readonly HumanEscalation[];
|
|
28
|
+
/** Get pending escalations. */
|
|
29
|
+
getPending(): readonly HumanEscalation[];
|
|
30
|
+
/** Check if there are any unresolved escalations. */
|
|
31
|
+
hasUnresolved(): boolean;
|
|
32
|
+
private createEscalation;
|
|
33
|
+
}
|
|
34
|
+
export interface EscalationContext {
|
|
35
|
+
readonly taskGraph: TaskGraph;
|
|
36
|
+
readonly relevantMessages: readonly AgentMessage[];
|
|
37
|
+
readonly conflictingClaims: readonly MemoryConflict[];
|
|
38
|
+
readonly recentAudit: readonly AuditEntry[];
|
|
39
|
+
readonly trustSnapshots: readonly TrustWindow[];
|
|
40
|
+
readonly collusionEvents: readonly CollusionEvent[];
|
|
41
|
+
}
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
// ============================================================
|
|
2
|
+
// Assay Verified Agent Runtime — Human Escalation Manager
|
|
3
|
+
// Packages structured context for human review when the system
|
|
4
|
+
// cannot resolve a situation autonomously.
|
|
5
|
+
// ============================================================
|
|
6
|
+
import { randomUUID } from 'node:crypto';
|
|
7
|
+
// ── Human Escalation Manager ───────────────────────────────
|
|
8
|
+
export class HumanEscalationManager {
|
|
9
|
+
escalations = [];
|
|
10
|
+
policy;
|
|
11
|
+
constructor(policy) {
|
|
12
|
+
this.policy = policy;
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Create an escalation from a safety threshold breach.
|
|
16
|
+
*/
|
|
17
|
+
escalateSafetyBreach(rule, currentValue, maxValue, context) {
|
|
18
|
+
return this.createEscalation({
|
|
19
|
+
type: 'safety_threshold',
|
|
20
|
+
rule,
|
|
21
|
+
value: currentValue,
|
|
22
|
+
max: maxValue,
|
|
23
|
+
}, context, [
|
|
24
|
+
`Reduce the affected agent's autonomy`,
|
|
25
|
+
`Replace the agent with a different model`,
|
|
26
|
+
`Abort the current task and start fresh`,
|
|
27
|
+
`Override the safety threshold (increases risk)`,
|
|
28
|
+
]);
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Create an escalation from a collusion detection.
|
|
32
|
+
*/
|
|
33
|
+
escalateCollusion(event, context) {
|
|
34
|
+
return this.createEscalation({ type: 'collusion_detected', event }, context, [
|
|
35
|
+
`Switch one agent to a different LLM provider (model diversity)`,
|
|
36
|
+
`Require formal verification for all claims from these agents`,
|
|
37
|
+
`Demote both agents to untrusted`,
|
|
38
|
+
`Review the specific claim manually: "${event.claimText}"`,
|
|
39
|
+
]);
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Create an escalation from an unresolvable stall.
|
|
43
|
+
*/
|
|
44
|
+
escalateStall(stalls, context) {
|
|
45
|
+
const actions = [];
|
|
46
|
+
for (const stall of stalls) {
|
|
47
|
+
switch (stall.type) {
|
|
48
|
+
case 'dependency_deadlock':
|
|
49
|
+
actions.push(`Break circular dependency involving task ${stall.taskId}`);
|
|
50
|
+
break;
|
|
51
|
+
case 'reject_loop':
|
|
52
|
+
actions.push(`Simplify or redefine task ${stall.taskId} (rejected ${stall.attempts} times)`);
|
|
53
|
+
break;
|
|
54
|
+
case 'timeout':
|
|
55
|
+
actions.push(`Investigate why task ${stall.taskId} is taking ${stall.duration}ms`);
|
|
56
|
+
break;
|
|
57
|
+
case 'resource_contention':
|
|
58
|
+
actions.push(`Add another agent for the ${stall.taskId} specialization`);
|
|
59
|
+
break;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
return this.createEscalation({ type: 'stall_unresolvable', stalls }, context, actions);
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* Create an escalation from trust collapse (too many demotions).
|
|
66
|
+
*/
|
|
67
|
+
escalateTrustCollapse(agentId, demotions, context) {
|
|
68
|
+
return this.createEscalation({ type: 'trust_collapse', agentId, demotions }, context, [
|
|
69
|
+
`Replace the agent with a different model`,
|
|
70
|
+
`Review the agent's recent verification failures`,
|
|
71
|
+
`Reset the agent's trust and retry with closer oversight`,
|
|
72
|
+
`Remove the agent from the team`,
|
|
73
|
+
]);
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Record a human response to a pending escalation.
|
|
77
|
+
*/
|
|
78
|
+
resolve(escalationId, action, reasoning) {
|
|
79
|
+
const escalation = this.escalations.find(e => e.id === escalationId);
|
|
80
|
+
if (!escalation || escalation.status !== 'pending')
|
|
81
|
+
return false;
|
|
82
|
+
escalation.status = 'resolved';
|
|
83
|
+
escalation.humanResponse = {
|
|
84
|
+
action,
|
|
85
|
+
reasoning,
|
|
86
|
+
timestamp: new Date().toISOString(),
|
|
87
|
+
};
|
|
88
|
+
return true;
|
|
89
|
+
}
|
|
90
|
+
/** Get all escalations. */
|
|
91
|
+
getAllEscalations() {
|
|
92
|
+
return this.escalations;
|
|
93
|
+
}
|
|
94
|
+
/** Get pending escalations. */
|
|
95
|
+
getPending() {
|
|
96
|
+
return this.escalations.filter(e => e.status === 'pending');
|
|
97
|
+
}
|
|
98
|
+
/** Check if there are any unresolved escalations. */
|
|
99
|
+
hasUnresolved() {
|
|
100
|
+
return this.escalations.some(e => e.status === 'pending');
|
|
101
|
+
}
|
|
102
|
+
// ── Private ─────────────────────────────────────────────
|
|
103
|
+
createEscalation(trigger, context, suggestedActions) {
|
|
104
|
+
const escalation = {
|
|
105
|
+
id: randomUUID(),
|
|
106
|
+
trigger,
|
|
107
|
+
context: {
|
|
108
|
+
taskGraph: context.taskGraph,
|
|
109
|
+
relevantMessages: context.relevantMessages,
|
|
110
|
+
conflictingClaims: context.conflictingClaims,
|
|
111
|
+
recentAudit: context.recentAudit.slice(-20), // Last 20 audit entries
|
|
112
|
+
trustSnapshots: context.trustSnapshots,
|
|
113
|
+
collusionEvents: context.collusionEvents,
|
|
114
|
+
},
|
|
115
|
+
suggestedActions,
|
|
116
|
+
status: 'pending',
|
|
117
|
+
};
|
|
118
|
+
this.escalations.push(escalation);
|
|
119
|
+
return escalation;
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
//# sourceMappingURL=human-escalation.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"human-escalation.js","sourceRoot":"","sources":["../../src/runtime/human-escalation.ts"],"names":[],"mappings":"AAAA,+DAA+D;AAC/D,0DAA0D;AAC1D,+DAA+D;AAC/D,2CAA2C;AAC3C,+DAA+D;AAE/D,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAczC,8DAA8D;AAE9D,MAAM,OAAO,sBAAsB;IACzB,WAAW,GAAsB,EAAE,CAAC;IACpC,MAAM,CAAe;IAE7B,YAAY,MAAoB;QAC9B,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACvB,CAAC;IAED;;OAEG;IACH,oBAAoB,CAClB,IAAY,EACZ,YAAoB,EACpB,QAAgB,EAChB,OAA0B;QAE1B,OAAO,IAAI,CAAC,gBAAgB,CAC1B;YACE,IAAI,EAAE,kBAAkB;YACxB,IAAI;YACJ,KAAK,EAAE,YAAY;YACnB,GAAG,EAAE,QAAQ;SACd,EACD,OAAO,EACP;YACE,sCAAsC;YACtC,0CAA0C;YAC1C,wCAAwC;YACxC,gDAAgD;SACjD,CACF,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,iBAAiB,CACf,KAAqB,EACrB,OAA0B;QAE1B,OAAO,IAAI,CAAC,gBAAgB,CAC1B,EAAE,IAAI,EAAE,oBAAoB,EAAE,KAAK,EAAE,EACrC,OAAO,EACP;YACE,gEAAgE;YAChE,8DAA8D;YAC9D,iCAAiC;YACjC,wCAAwC,KAAK,CAAC,SAAS,GAAG;SAC3D,CACF,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,aAAa,CACX,MAA8B,EAC9B,OAA0B;QAE1B,MAAM,OAAO,GAAa,EAAE,CAAC;QAC7B,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,QAAQ,KAAK,CAAC,IAAI,EAAE,CAAC;gBACnB,KAAK,qBAAqB;oBACxB,OAAO,CAAC,IAAI,CAAC,4CAA4C,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC;oBACzE,MAAM;gBACR,KAAK,aAAa;oBAChB,OAAO,CAAC,IAAI,CAAC,6BAA6B,KAAK,CAAC,MAAM,cAAc,KAAK,CAAC,QAAQ,SAAS,CAAC,CAAC;oBAC7F,MAAM;gBACR,KAAK,SAAS;oBACZ,OAAO,CAAC,IAAI,CAAC,wBAAwB,KAAK,CAAC,MAAM,cAAc,KAAK,CAAC,QAAQ,IAAI,CAAC,CAAC;oBACnF,MAAM;gBACR,KAAK,qBAAqB;oBACxB,OAAO,CAAC,IAAI,CAAC,6BAA6B,KAAK,CAAC,MAAM,iBAAiB,CAAC,CAAC;oBACzE,MAAM;YACV,CAAC;QACH,CAAC;QAED,OAAO,IAAI,CAAC,gBAAgB,CAC1B,EAAE,IAAI,EAAE,oBAAoB,EAAE,MAAM,EAAE,EACtC,OAAO,EACP,OAAO,CACR,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,qBAAqB,CACnB,OAAe,EACf,SAAiB,EACjB,OAA0B;QAE1B,OAAO,IAAI,CAAC,gBAAgB,CAC1B,EAAE,IAAI,EAAE,gBAAgB,EAAE,OAAO,EAAE,SAAS,EAAE,EAC9C,OAAO,EACP;YACE,0CAA0C;YAC1C,iDAAiD;YACjD,yDAAyD;YACzD,gCAAgC;SACjC,CACF,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,OAAO,CAAC,YAAoB,EAAE,MAAc,EAAE,SAAiB;QAC7D,MAAM,UAAU,GAAG,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,YAAY,CAAC,CAAC;QACrE,IAAI,CAAC,UAAU,IAAI,UAAU,CAAC,MAAM,KAAK,SAAS;YAAE,OAAO,KAAK,CAAC;QAEhE,UAAiC,CAAC,MAAM,GAAG,UAAU,CAAC;QACtD,UAAkE,CAAC,aAAa,GAAG;YAClF,MAAM;YACN,SAAS;YACT,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;SACpC,CAAC;QAEF,OAAO,IAAI,CAAC;IACd,CAAC;IAED,2BAA2B;IAC3B,iBAAiB;QACf,OAAO,IAAI,CAAC,WAAW,CAAC;IAC1B,CAAC;IAED,+BAA+B;IAC/B,UAAU;QACR,OAAO,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,SAAS,CAAC,CAAC;IAC9D,CAAC;IAED,qDAAqD;IACrD,aAAa;QACX,OAAO,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,SAAS,CAAC,CAAC;IAC5D,CAAC;IAED,2DAA2D;IAEnD,gBAAgB,CACtB,OAA+B,EAC/B,OAA0B,EAC1B,gBAA0B;QAE1B,MAAM,UAAU,GAAoB;YAClC,EAAE,EAAE,UAAU,EAAE;YAChB,OAAO;YACP,OAAO,EAAE;gBACP,SAAS,EAAE,OAAO,CAAC,SAAS;gBAC5B,gBAAgB,EAAE,OAAO,CAAC,gBAAgB;gBAC1C,iBAAiB,EAAE,OAAO,CAAC,iBAAiB;gBAC5C,WAAW,EAAE,OAAO,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,EAAE,wBAAwB;gBACrE,cAAc,EAAE,OAAO,CAAC,cAAc;gBACtC,eAAe,EAAE,OAAO,CAAC,eAAe;aACzC;YACD,gBAAgB;YAChB,MAAM,EAAE,SAAS;SAClB,CAAC;QAEF,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QAClC,OAAO,UAAU,CAAC;IACpB,CAAC;CACF"}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import { EventEmitter } from 'node:events';
|
|
2
|
+
import type { KillSwitchLevel, KillSwitchAction, KillSwitchEvent } from './types.js';
|
|
3
|
+
export declare class KillSwitch extends EventEmitter {
|
|
4
|
+
private eventLog;
|
|
5
|
+
private logPath;
|
|
6
|
+
private suspendedEntities;
|
|
7
|
+
private systemHalted;
|
|
8
|
+
constructor(logPath?: string);
|
|
9
|
+
/**
|
|
10
|
+
* Activate the kill switch at a given level.
|
|
11
|
+
* Validates that the level has authority for the requested action.
|
|
12
|
+
* Validates that the level is not being overridden by a higher level.
|
|
13
|
+
*/
|
|
14
|
+
activate(level: KillSwitchLevel, action: KillSwitchAction, target: string, triggeredBy: string, reason: string): Promise<KillSwitchEvent>;
|
|
15
|
+
/**
|
|
16
|
+
* Release a kill switch hold on a target.
|
|
17
|
+
* Only a level equal to or higher than the suspending level can release.
|
|
18
|
+
*/
|
|
19
|
+
release(target: string, releasedBy: string, level: KillSwitchLevel): Promise<{
|
|
20
|
+
released: boolean;
|
|
21
|
+
reason: string;
|
|
22
|
+
}>;
|
|
23
|
+
/**
|
|
24
|
+
* Check if a target is suspended.
|
|
25
|
+
*/
|
|
26
|
+
isSuspended(target: string): {
|
|
27
|
+
suspended: boolean;
|
|
28
|
+
event?: KillSwitchEvent;
|
|
29
|
+
};
|
|
30
|
+
/**
|
|
31
|
+
* Check if the system is halted (Layer 2 kill switch).
|
|
32
|
+
*/
|
|
33
|
+
isSystemHalted(): boolean;
|
|
34
|
+
/**
|
|
35
|
+
* Get all currently suspended entities.
|
|
36
|
+
*/
|
|
37
|
+
getSuspended(): Map<string, KillSwitchEvent>;
|
|
38
|
+
/**
|
|
39
|
+
* Get the full event log.
|
|
40
|
+
*/
|
|
41
|
+
getEventLog(): readonly KillSwitchEvent[];
|
|
42
|
+
/**
|
|
43
|
+
* Validate that a given entity has authority to perform an action.
|
|
44
|
+
* Used before executing any operation to check kill switch status.
|
|
45
|
+
*/
|
|
46
|
+
validateAuthority(requestingLevel: KillSwitchLevel, action: KillSwitchAction, target: string): {
|
|
47
|
+
authorized: boolean;
|
|
48
|
+
reason: string;
|
|
49
|
+
};
|
|
50
|
+
private logEvent;
|
|
51
|
+
}
|