@llm-dev-ops/agentics-cli 1.5.9 → 1.5.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +478 -148
- package/dist/bundled-agents/manifest.json +1 -0
- package/dist/commands/phase4.d.ts.map +1 -1
- package/dist/commands/phase4.js +4 -2
- package/dist/commands/phase4.js.map +1 -1
- package/dist/commands/phase6.d.ts.map +1 -1
- package/dist/commands/phase6.js +4 -2
- package/dist/commands/phase6.js.map +1 -1
- package/dist/mcp/mcp-server.js +11 -10
- package/dist/mcp/mcp-server.js.map +1 -1
- package/dist/pipeline/auto-chain.d.ts +5 -0
- package/dist/pipeline/auto-chain.d.ts.map +1 -1
- package/dist/pipeline/auto-chain.js +131 -47
- package/dist/pipeline/auto-chain.js.map +1 -1
- package/dist/pipeline/enterprise/artifact-assembler.d.ts +64 -0
- package/dist/pipeline/enterprise/artifact-assembler.d.ts.map +1 -0
- package/dist/pipeline/enterprise/artifact-assembler.js +542 -0
- package/dist/pipeline/enterprise/artifact-assembler.js.map +1 -0
- package/dist/pipeline/enterprise/artifact-renderers.d.ts +42 -0
- package/dist/pipeline/enterprise/artifact-renderers.d.ts.map +1 -0
- package/dist/pipeline/enterprise/artifact-renderers.js +513 -0
- package/dist/pipeline/enterprise/artifact-renderers.js.map +1 -0
- package/dist/pipeline/enterprise/code-resolver.d.ts +43 -0
- package/dist/pipeline/enterprise/code-resolver.d.ts.map +1 -0
- package/dist/pipeline/enterprise/code-resolver.js +219 -0
- package/dist/pipeline/enterprise/code-resolver.js.map +1 -0
- package/dist/pipeline/enterprise/decision-graph-client.d.ts +171 -0
- package/dist/pipeline/enterprise/decision-graph-client.d.ts.map +1 -0
- package/dist/pipeline/enterprise/decision-graph-client.js +222 -0
- package/dist/pipeline/enterprise/decision-graph-client.js.map +1 -0
- package/dist/pipeline/enterprise/decision-graph-memory.d.ts +104 -0
- package/dist/pipeline/enterprise/decision-graph-memory.d.ts.map +1 -0
- package/dist/pipeline/enterprise/decision-graph-memory.js +292 -0
- package/dist/pipeline/enterprise/decision-graph-memory.js.map +1 -0
- package/dist/pipeline/enterprise/decision-graph.d.ts +237 -0
- package/dist/pipeline/enterprise/decision-graph.d.ts.map +1 -0
- package/dist/pipeline/enterprise/decision-graph.js +654 -0
- package/dist/pipeline/enterprise/decision-graph.js.map +1 -0
- package/dist/pipeline/enterprise/index.d.ts +40 -0
- package/dist/pipeline/enterprise/index.d.ts.map +1 -0
- package/dist/pipeline/enterprise/index.js +43 -0
- package/dist/pipeline/enterprise/index.js.map +1 -0
- package/dist/pipeline/enterprise/pass-executor.d.ts +33 -0
- package/dist/pipeline/enterprise/pass-executor.d.ts.map +1 -0
- package/dist/pipeline/enterprise/pass-executor.js +459 -0
- package/dist/pipeline/enterprise/pass-executor.js.map +1 -0
- package/dist/pipeline/enterprise/pass-registry.d.ts +19 -0
- package/dist/pipeline/enterprise/pass-registry.d.ts.map +1 -0
- package/dist/pipeline/enterprise/pass-registry.js +243 -0
- package/dist/pipeline/enterprise/pass-registry.js.map +1 -0
- package/dist/pipeline/enterprise/pass2-simulation.d.ts +130 -0
- package/dist/pipeline/enterprise/pass2-simulation.d.ts.map +1 -0
- package/dist/pipeline/enterprise/pass2-simulation.js +691 -0
- package/dist/pipeline/enterprise/pass2-simulation.js.map +1 -0
- package/dist/pipeline/enterprise/pass4-governance.d.ts +195 -0
- package/dist/pipeline/enterprise/pass4-governance.d.ts.map +1 -0
- package/dist/pipeline/enterprise/pass4-governance.js +748 -0
- package/dist/pipeline/enterprise/pass4-governance.js.map +1 -0
- package/dist/pipeline/enterprise/pass5-decision.d.ts +90 -0
- package/dist/pipeline/enterprise/pass5-decision.d.ts.map +1 -0
- package/dist/pipeline/enterprise/pass5-decision.js +487 -0
- package/dist/pipeline/enterprise/pass5-decision.js.map +1 -0
- package/dist/pipeline/enterprise/pass7-observability.d.ts +198 -0
- package/dist/pipeline/enterprise/pass7-observability.d.ts.map +1 -0
- package/dist/pipeline/enterprise/pass7-observability.js +636 -0
- package/dist/pipeline/enterprise/pass7-observability.js.map +1 -0
- package/dist/pipeline/enterprise/pipeline-orchestrator.d.ts +29 -0
- package/dist/pipeline/enterprise/pipeline-orchestrator.d.ts.map +1 -0
- package/dist/pipeline/enterprise/pipeline-orchestrator.js +283 -0
- package/dist/pipeline/enterprise/pipeline-orchestrator.js.map +1 -0
- package/dist/pipeline/enterprise/provenance-tracker.d.ts +135 -0
- package/dist/pipeline/enterprise/provenance-tracker.d.ts.map +1 -0
- package/dist/pipeline/enterprise/provenance-tracker.js +437 -0
- package/dist/pipeline/enterprise/provenance-tracker.js.map +1 -0
- package/dist/pipeline/enterprise/trace-middleware.d.ts +37 -0
- package/dist/pipeline/enterprise/trace-middleware.d.ts.map +1 -0
- package/dist/pipeline/enterprise/trace-middleware.js +188 -0
- package/dist/pipeline/enterprise/trace-middleware.js.map +1 -0
- package/dist/pipeline/enterprise/types.d.ts +199 -0
- package/dist/pipeline/enterprise/types.d.ts.map +1 -0
- package/dist/pipeline/enterprise/types.js +30 -0
- package/dist/pipeline/enterprise/types.js.map +1 -0
- package/dist/pipeline/phase2/phases/adr-generator.d.ts.map +1 -1
- package/dist/pipeline/phase2/phases/adr-generator.js +56 -8
- package/dist/pipeline/phase2/phases/adr-generator.js.map +1 -1
- package/dist/pipeline/phase3/phases/test-generator.d.ts.map +1 -1
- package/dist/pipeline/phase3/phases/test-generator.js +53 -0
- package/dist/pipeline/phase3/phases/test-generator.js.map +1 -1
- package/dist/pipeline/phase4/phases/deployment-generator.d.ts.map +1 -1
- package/dist/pipeline/phase4/phases/deployment-generator.js +147 -0
- package/dist/pipeline/phase4/phases/deployment-generator.js.map +1 -1
- package/dist/pipeline/phase4-adrs/phase4-adrs-coordinator.d.ts.map +1 -1
- package/dist/pipeline/phase4-adrs/phase4-adrs-coordinator.js +52 -1
- package/dist/pipeline/phase4-adrs/phase4-adrs-coordinator.js.map +1 -1
- package/dist/pipeline/phase6/phases/deployment-finalizer.d.ts.map +1 -1
- package/dist/pipeline/phase6/phases/deployment-finalizer.js +226 -0
- package/dist/pipeline/phase6/phases/deployment-finalizer.js.map +1 -1
- package/dist/pipeline/phase6/phases/service-registrar.d.ts +1 -1
- package/dist/pipeline/phase6/phases/service-registrar.d.ts.map +1 -1
- package/dist/pipeline/phase6/phases/service-registrar.js +47 -7
- package/dist/pipeline/phase6/phases/service-registrar.js.map +1 -1
- package/dist/pipeline/swarm-orchestrator.d.ts.map +1 -1
- package/dist/pipeline/swarm-orchestrator.js +47 -19
- package/dist/pipeline/swarm-orchestrator.js.map +1 -1
- package/package.json +1 -1
|
@@ -0,0 +1,691 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pass 2 — Simulation & Scenario Modeling (ADR-035)
|
|
3
|
+
*
|
|
4
|
+
* Executes three concurrent workstreams that converge into a unified
|
|
5
|
+
* Scenario Matrix stored in the DecisionGraph:
|
|
6
|
+
*
|
|
7
|
+
* Workstream A: Scenario Generation (LLM-Simulator)
|
|
8
|
+
* Workstream B: Risk Signal Detection (LLM-Sentinel)
|
|
9
|
+
* Workstream C: Model Reliability (LLM-Test-Bench)
|
|
10
|
+
*
|
|
11
|
+
* After all workstreams complete, their outputs are merged into a
|
|
12
|
+
* ScenarioMatrix node that cross-references scenarios with risk signals
|
|
13
|
+
* and reliability assessments.
|
|
14
|
+
*/
|
|
15
|
+
import * as crypto from 'node:crypto';
|
|
16
|
+
/** Define which agents belong to which workstream. */
|
|
17
|
+
export const WORKSTREAM_CONFIGS = [
|
|
18
|
+
{
|
|
19
|
+
id: 'scenario-generation',
|
|
20
|
+
label: 'Scenario Generation (LLM-Simulator)',
|
|
21
|
+
agents: [
|
|
22
|
+
{ domain: 'simulator', agent: 'what-if', role: 'What-if simulation' },
|
|
23
|
+
{ domain: 'simulator', agent: 'scenario', role: 'Scenario generator' },
|
|
24
|
+
],
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
id: 'risk-signal-detection',
|
|
28
|
+
label: 'Risk Signal Detection (LLM-Sentinel)',
|
|
29
|
+
agents: [
|
|
30
|
+
{ domain: 'sentinel', agent: 'anomaly', role: 'Anomaly detection' },
|
|
31
|
+
{ domain: 'sentinel', agent: 'drift', role: 'Drift detection' },
|
|
32
|
+
{ domain: 'sentinel', agent: 'correlation', role: 'Incident correlation' },
|
|
33
|
+
{ domain: 'sentinel', agent: 'rca', role: 'Root cause analysis' },
|
|
34
|
+
],
|
|
35
|
+
},
|
|
36
|
+
{
|
|
37
|
+
id: 'model-reliability',
|
|
38
|
+
label: 'Model Reliability (LLM-Test-Bench)',
|
|
39
|
+
agents: [
|
|
40
|
+
{ domain: 'test-bench', agent: 'hallucination', role: 'Hallucination detection' },
|
|
41
|
+
{ domain: 'test-bench', agent: 'faithfulness', role: 'Faithfulness verification' },
|
|
42
|
+
{ domain: 'test-bench', agent: 'bias', role: 'Bias detection' },
|
|
43
|
+
{ domain: 'test-bench', agent: 'stress', role: 'Stress testing' },
|
|
44
|
+
{ domain: 'test-bench', agent: 'red-team', role: 'Red team testing' },
|
|
45
|
+
{ domain: 'test-bench', agent: 'adversarial', role: 'Adversarial prompt testing' },
|
|
46
|
+
],
|
|
47
|
+
},
|
|
48
|
+
];
|
|
49
|
+
// ============================================================================
|
|
50
|
+
// Pass 2 Executor
|
|
51
|
+
// ============================================================================
|
|
52
|
+
/**
|
|
53
|
+
* Execute Pass 2: Simulation & Scenario Modeling.
|
|
54
|
+
*
|
|
55
|
+
* Dispatches three workstreams concurrently, processes their results
|
|
56
|
+
* into typed domain objects, merges into a ScenarioMatrix, and writes
|
|
57
|
+
* all outputs to the DecisionGraph.
|
|
58
|
+
*/
|
|
59
|
+
export async function executePass2(params) {
|
|
60
|
+
const { prompt, graph, tracker, timeoutMs, verbose, skipRedTeam } = params;
|
|
61
|
+
const pass = 2;
|
|
62
|
+
const start = Date.now();
|
|
63
|
+
// Get Pass 1 context from DecisionGraph
|
|
64
|
+
const pass1Nodes = graph.getNodesByPass(1);
|
|
65
|
+
const pass1Context = buildPass1Context(pass1Nodes);
|
|
66
|
+
// Build payload for all agents
|
|
67
|
+
const payload = {
|
|
68
|
+
prompt,
|
|
69
|
+
pass: 2,
|
|
70
|
+
passName: 'Simulation & Scenario Modeling',
|
|
71
|
+
context: pass1Context,
|
|
72
|
+
};
|
|
73
|
+
// Resolve workstream configs (optionally skip red team)
|
|
74
|
+
const workstreams = resolveWorkstreams(skipRedTeam);
|
|
75
|
+
// Invoke function (injectable for testing)
|
|
76
|
+
const invoke = params.invokeAgents ?? defaultInvokeAgents;
|
|
77
|
+
if (verbose) {
|
|
78
|
+
console.error(` [Pass 2] Starting ${workstreams.length} concurrent workstreams`);
|
|
79
|
+
}
|
|
80
|
+
// ── Execute all 3 workstreams concurrently ──
|
|
81
|
+
const workstreamPromises = workstreams.map(async (ws) => {
|
|
82
|
+
const wsStart = Date.now();
|
|
83
|
+
if (verbose) {
|
|
84
|
+
console.error(` [Pass 2] Workstream: ${ws.label} (${ws.agents.length} agents)`);
|
|
85
|
+
}
|
|
86
|
+
const results = await invoke(ws.agents, payload, pass, timeoutMs);
|
|
87
|
+
const successCount = results.filter(r => r.status >= 200 && r.status < 300).length;
|
|
88
|
+
return {
|
|
89
|
+
workstream: ws.id,
|
|
90
|
+
label: ws.label,
|
|
91
|
+
agentResults: results,
|
|
92
|
+
successCount,
|
|
93
|
+
totalCount: results.length,
|
|
94
|
+
durationMs: Date.now() - wsStart,
|
|
95
|
+
};
|
|
96
|
+
});
|
|
97
|
+
const workstreamResults = await Promise.all(workstreamPromises);
|
|
98
|
+
// ── Collect all agent results ──
|
|
99
|
+
const allResults = [];
|
|
100
|
+
for (const ws of workstreamResults) {
|
|
101
|
+
for (const r of ws.agentResults) {
|
|
102
|
+
allResults.push(r);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
// ── Process each workstream's output ──
|
|
106
|
+
const wsA = workstreamResults.find(w => w.workstream === 'scenario-generation');
|
|
107
|
+
const wsB = workstreamResults.find(w => w.workstream === 'risk-signal-detection');
|
|
108
|
+
const wsC = workstreamResults.find(w => w.workstream === 'model-reliability');
|
|
109
|
+
const scenarios = extractScenarios(wsA, prompt);
|
|
110
|
+
const riskSignals = extractRiskSignals(wsB);
|
|
111
|
+
const reliabilityReport = extractReliabilityReport(wsC);
|
|
112
|
+
// ── Converge into Scenario Matrix ──
|
|
113
|
+
const crossReferences = buildCrossReferences(scenarios, riskSignals);
|
|
114
|
+
const overallRiskLevel = computeOverallRiskLevel(riskSignals, scenarios);
|
|
115
|
+
const scenarioMatrix = {
|
|
116
|
+
scenarios,
|
|
117
|
+
riskSignals,
|
|
118
|
+
reliabilityReport,
|
|
119
|
+
crossReferences,
|
|
120
|
+
overallRiskLevel,
|
|
121
|
+
};
|
|
122
|
+
// ── Write to DecisionGraph ──
|
|
123
|
+
const graphNodeIds = writeToGraph(graph, scenarios, riskSignals, reliabilityReport, scenarioMatrix, pass1Nodes, tracker, pass);
|
|
124
|
+
if (verbose) {
|
|
125
|
+
console.error(` [Pass 2] Complete: ${scenarios.length} scenarios, ${riskSignals.length} risk signals, reliability: ${reliabilityReport.recommendation}`);
|
|
126
|
+
console.error(` [Pass 2] Duration: ${Date.now() - start}ms`);
|
|
127
|
+
}
|
|
128
|
+
return {
|
|
129
|
+
workstreamResults,
|
|
130
|
+
scenarios,
|
|
131
|
+
riskSignals,
|
|
132
|
+
reliabilityReport,
|
|
133
|
+
scenarioMatrix,
|
|
134
|
+
graphNodeIds,
|
|
135
|
+
totalAgentResults: allResults,
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
// ============================================================================
|
|
139
|
+
// Workstream Resolution
|
|
140
|
+
// ============================================================================
|
|
141
|
+
/**
|
|
142
|
+
* Resolve workstream configs, optionally removing red team agents.
|
|
143
|
+
*/
|
|
144
|
+
function resolveWorkstreams(skipRedTeam) {
|
|
145
|
+
if (!skipRedTeam)
|
|
146
|
+
return WORKSTREAM_CONFIGS;
|
|
147
|
+
return WORKSTREAM_CONFIGS.map(ws => {
|
|
148
|
+
if (ws.id !== 'model-reliability')
|
|
149
|
+
return ws;
|
|
150
|
+
return {
|
|
151
|
+
...ws,
|
|
152
|
+
agents: ws.agents.filter(a => a.agent !== 'red-team' && a.agent !== 'adversarial'),
|
|
153
|
+
};
|
|
154
|
+
});
|
|
155
|
+
}
|
|
156
|
+
// ============================================================================
|
|
157
|
+
// Pass 1 Context Builder
|
|
158
|
+
// ============================================================================
|
|
159
|
+
/** Extract structured context from Pass 1 nodes for Pass 2 agent payloads. */
|
|
160
|
+
function buildPass1Context(pass1Nodes) {
|
|
161
|
+
const context = {};
|
|
162
|
+
for (const node of pass1Nodes) {
|
|
163
|
+
switch (node.type) {
|
|
164
|
+
case 'problem_definition':
|
|
165
|
+
context['problemDefinition'] = { summary: node.summary, ...node.content };
|
|
166
|
+
break;
|
|
167
|
+
case 'business_context':
|
|
168
|
+
context['businessContext'] = { summary: node.summary, ...node.content };
|
|
169
|
+
break;
|
|
170
|
+
case 'technical_scope':
|
|
171
|
+
context['technicalScope'] = { summary: node.summary, ...node.content };
|
|
172
|
+
break;
|
|
173
|
+
case 'architecture_hypothesis':
|
|
174
|
+
if (!context['architectureHypotheses']) {
|
|
175
|
+
context['architectureHypotheses'] = [];
|
|
176
|
+
}
|
|
177
|
+
context['architectureHypotheses'].push({ summary: node.summary, ...node.content });
|
|
178
|
+
break;
|
|
179
|
+
case 'execution_plan':
|
|
180
|
+
context['executionPlan'] = { summary: node.summary, ...node.content };
|
|
181
|
+
break;
|
|
182
|
+
default:
|
|
183
|
+
break;
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
return context;
|
|
187
|
+
}
|
|
188
|
+
// ============================================================================
|
|
189
|
+
// Result Extractors
|
|
190
|
+
// ============================================================================
|
|
191
|
+
/**
|
|
192
|
+
* Extract typed Scenario objects from Workstream A results.
|
|
193
|
+
* Parses agent responses and falls back to generated scenarios
|
|
194
|
+
* if the agents fail or return unparseable data.
|
|
195
|
+
*/
|
|
196
|
+
function extractScenarios(ws, prompt) {
|
|
197
|
+
const scenarios = [];
|
|
198
|
+
if (ws) {
|
|
199
|
+
for (const result of ws.agentResults) {
|
|
200
|
+
if (result.status >= 200 && result.status < 300 && result.response) {
|
|
201
|
+
const parsed = parseScenarioResponse(result.response, result.agent);
|
|
202
|
+
for (const s of parsed) {
|
|
203
|
+
scenarios.push(s);
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
// Ensure minimum 3 scenarios (at least 1 deployment, 1 failure)
|
|
209
|
+
ensureMinimumScenarios(scenarios, prompt);
|
|
210
|
+
return scenarios;
|
|
211
|
+
}
|
|
212
|
+
/** Parse a single agent response into Scenario objects. */
|
|
213
|
+
function parseScenarioResponse(response, agent) {
|
|
214
|
+
const results = [];
|
|
215
|
+
if (!response || typeof response !== 'object')
|
|
216
|
+
return results;
|
|
217
|
+
const resp = response;
|
|
218
|
+
// Try to extract scenarios from common response shapes
|
|
219
|
+
const candidates = resp['scenarios'] ?? resp['data'] ?? resp['results'] ?? resp['result'];
|
|
220
|
+
if (Array.isArray(candidates)) {
|
|
221
|
+
for (const item of candidates) {
|
|
222
|
+
if (typeof item === 'object' && item !== null) {
|
|
223
|
+
results.push(normalizeScenario(item, agent));
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
else if (typeof candidates === 'object' && candidates !== null) {
|
|
228
|
+
results.push(normalizeScenario(candidates, agent));
|
|
229
|
+
}
|
|
230
|
+
// If no structured scenarios found, create one from the raw response
|
|
231
|
+
if (results.length === 0 && Object.keys(resp).length > 0) {
|
|
232
|
+
results.push(normalizeScenario(resp, agent));
|
|
233
|
+
}
|
|
234
|
+
return results;
|
|
235
|
+
}
|
|
236
|
+
/** Normalize a raw object into a typed Scenario. */
|
|
237
|
+
function normalizeScenario(raw, agent) {
|
|
238
|
+
return {
|
|
239
|
+
id: String(raw['id'] ?? crypto.randomUUID()),
|
|
240
|
+
type: normalizeScenarioType(raw['type']),
|
|
241
|
+
name: String(raw['name'] ?? raw['title'] ?? `${agent.role} scenario`),
|
|
242
|
+
description: String(raw['description'] ?? raw['summary'] ?? ''),
|
|
243
|
+
assumptions: normalizeStringArray(raw['assumptions']),
|
|
244
|
+
probability: normalizeProbability(raw['probability']),
|
|
245
|
+
impact: normalizeImpact(raw['impact']),
|
|
246
|
+
expectedOutcome: String(raw['expectedOutcome'] ?? raw['expected_outcome'] ?? raw['outcome'] ?? ''),
|
|
247
|
+
mitigationStrategy: String(raw['mitigationStrategy'] ?? raw['mitigation_strategy'] ?? raw['mitigation'] ?? ''),
|
|
248
|
+
metrics: normalizeMetrics(raw['metrics']),
|
|
249
|
+
};
|
|
250
|
+
}
|
|
251
|
+
/** Extract typed RiskSignal objects from Workstream B results. */
|
|
252
|
+
function extractRiskSignals(ws) {
|
|
253
|
+
const signals = [];
|
|
254
|
+
if (ws) {
|
|
255
|
+
for (const result of ws.agentResults) {
|
|
256
|
+
if (result.status >= 200 && result.status < 300 && result.response) {
|
|
257
|
+
const parsed = parseRiskSignalResponse(result.response, result.agent);
|
|
258
|
+
for (const s of parsed) {
|
|
259
|
+
signals.push(s);
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
return signals;
|
|
265
|
+
}
|
|
266
|
+
/** Parse a single agent response into RiskSignal objects. */
|
|
267
|
+
function parseRiskSignalResponse(response, agent) {
|
|
268
|
+
const results = [];
|
|
269
|
+
if (!response || typeof response !== 'object')
|
|
270
|
+
return results;
|
|
271
|
+
const resp = response;
|
|
272
|
+
const candidates = resp['risks'] ?? resp['signals'] ?? resp['data'] ?? resp['results'] ?? resp['result'];
|
|
273
|
+
if (Array.isArray(candidates)) {
|
|
274
|
+
for (const item of candidates) {
|
|
275
|
+
if (typeof item === 'object' && item !== null) {
|
|
276
|
+
results.push(normalizeRiskSignal(item, agent));
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
else if (typeof candidates === 'object' && candidates !== null) {
|
|
281
|
+
results.push(normalizeRiskSignal(candidates, agent));
|
|
282
|
+
}
|
|
283
|
+
if (results.length === 0 && Object.keys(resp).length > 0) {
|
|
284
|
+
results.push(normalizeRiskSignal(resp, agent));
|
|
285
|
+
}
|
|
286
|
+
return results;
|
|
287
|
+
}
|
|
288
|
+
/** Normalize a raw object into a typed RiskSignal. */
|
|
289
|
+
function normalizeRiskSignal(raw, agent) {
|
|
290
|
+
return {
|
|
291
|
+
id: String(raw['id'] ?? crypto.randomUUID()),
|
|
292
|
+
category: normalizeRiskCategory(raw['category']),
|
|
293
|
+
severity: normalizeSeverity(raw['severity']),
|
|
294
|
+
description: String(raw['description'] ?? raw['summary'] ?? ''),
|
|
295
|
+
source: String(raw['source'] ?? `${agent.domain}/${agent.agent}`),
|
|
296
|
+
correlatedWith: normalizeStringArray(raw['correlatedWith'] ?? raw['correlated_with']),
|
|
297
|
+
rootCause: String(raw['rootCause'] ?? raw['root_cause'] ?? ''),
|
|
298
|
+
mitigationSuggestion: String(raw['mitigationSuggestion'] ?? raw['mitigation_suggestion'] ?? raw['mitigation'] ?? ''),
|
|
299
|
+
};
|
|
300
|
+
}
|
|
301
|
+
/** Extract a typed ReliabilityReport from Workstream C results. */
|
|
302
|
+
function extractReliabilityReport(ws) {
|
|
303
|
+
const defaults = {
|
|
304
|
+
overallScore: 0,
|
|
305
|
+
hallucinationScore: 0,
|
|
306
|
+
faithfulnessScore: 0,
|
|
307
|
+
biasScore: 0,
|
|
308
|
+
stressResilienceScore: 0,
|
|
309
|
+
redTeamFindings: [],
|
|
310
|
+
recommendation: 'caution',
|
|
311
|
+
};
|
|
312
|
+
if (!ws || ws.successCount === 0)
|
|
313
|
+
return defaults;
|
|
314
|
+
const scores = {};
|
|
315
|
+
const findings = [];
|
|
316
|
+
for (const result of ws.agentResults) {
|
|
317
|
+
if (result.status < 200 || result.status >= 300 || !result.response)
|
|
318
|
+
continue;
|
|
319
|
+
const resp = result.response;
|
|
320
|
+
const agentName = result.agent.agent;
|
|
321
|
+
const score = extractScore(resp);
|
|
322
|
+
switch (agentName) {
|
|
323
|
+
case 'hallucination':
|
|
324
|
+
scores['hallucination'] = score;
|
|
325
|
+
break;
|
|
326
|
+
case 'faithfulness':
|
|
327
|
+
scores['faithfulness'] = score;
|
|
328
|
+
break;
|
|
329
|
+
case 'bias':
|
|
330
|
+
scores['bias'] = score;
|
|
331
|
+
break;
|
|
332
|
+
case 'stress':
|
|
333
|
+
scores['stress'] = score;
|
|
334
|
+
break;
|
|
335
|
+
case 'red-team':
|
|
336
|
+
case 'adversarial': {
|
|
337
|
+
const rtFindings = resp['findings'] ?? resp['vulnerabilities'] ?? resp['issues'];
|
|
338
|
+
if (Array.isArray(rtFindings)) {
|
|
339
|
+
for (const f of rtFindings) {
|
|
340
|
+
findings.push(typeof f === 'string' ? f : JSON.stringify(f));
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
break;
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
const hallucinationScore = scores['hallucination'] ?? 0;
|
|
348
|
+
const faithfulnessScore = scores['faithfulness'] ?? 0;
|
|
349
|
+
const biasScore = scores['bias'] ?? 0;
|
|
350
|
+
const stressResilienceScore = scores['stress'] ?? 0;
|
|
351
|
+
const scoreCount = Object.keys(scores).length;
|
|
352
|
+
const overallScore = scoreCount > 0
|
|
353
|
+
? (hallucinationScore + faithfulnessScore + biasScore + stressResilienceScore) / Math.max(scoreCount, 1)
|
|
354
|
+
: 0;
|
|
355
|
+
const recommendation = overallScore >= 0.8 ? 'reliable'
|
|
356
|
+
: overallScore >= 0.5 ? 'caution'
|
|
357
|
+
: 'unreliable';
|
|
358
|
+
return {
|
|
359
|
+
overallScore: clamp01(overallScore),
|
|
360
|
+
hallucinationScore: clamp01(hallucinationScore),
|
|
361
|
+
faithfulnessScore: clamp01(faithfulnessScore),
|
|
362
|
+
biasScore: clamp01(biasScore),
|
|
363
|
+
stressResilienceScore: clamp01(stressResilienceScore),
|
|
364
|
+
redTeamFindings: findings,
|
|
365
|
+
recommendation,
|
|
366
|
+
};
|
|
367
|
+
}
|
|
368
|
+
// ============================================================================
|
|
369
|
+
// Convergence: Scenario Matrix
|
|
370
|
+
// ============================================================================
|
|
371
|
+
/** Build cross-references between scenarios and risk signals. */
|
|
372
|
+
function buildCrossReferences(scenarios, riskSignals) {
|
|
373
|
+
const refs = [];
|
|
374
|
+
for (const scenario of scenarios) {
|
|
375
|
+
// Find related risk signals by category overlap and keyword matching
|
|
376
|
+
const related = riskSignals.filter(rs => {
|
|
377
|
+
// Category alignment
|
|
378
|
+
if (scenario.type === 'failure' && rs.category === 'operational')
|
|
379
|
+
return true;
|
|
380
|
+
if (scenario.type === 'integration' && rs.category === 'integration')
|
|
381
|
+
return true;
|
|
382
|
+
if (scenario.type === 'scaling' && rs.category === 'performance')
|
|
383
|
+
return true;
|
|
384
|
+
if (scenario.type === 'deployment' && rs.category === 'operational')
|
|
385
|
+
return true;
|
|
386
|
+
// Keyword overlap in descriptions
|
|
387
|
+
const scenarioWords = new Set(scenario.description.toLowerCase().split(/\s+/).filter(w => w.length > 3));
|
|
388
|
+
const signalWords = rs.description.toLowerCase().split(/\s+/).filter(w => w.length > 3);
|
|
389
|
+
const overlap = signalWords.filter(w => scenarioWords.has(w)).length;
|
|
390
|
+
return overlap >= 2;
|
|
391
|
+
});
|
|
392
|
+
refs.push({
|
|
393
|
+
scenarioId: scenario.id,
|
|
394
|
+
relatedRiskSignals: related.map(r => r.id),
|
|
395
|
+
reliabilityImpact: related.length > 0
|
|
396
|
+
? `${related.length} risk signal(s) correlated — review mitigation alignment`
|
|
397
|
+
: 'No directly correlated risk signals',
|
|
398
|
+
});
|
|
399
|
+
}
|
|
400
|
+
return refs;
|
|
401
|
+
}
|
|
402
|
+
/** Compute the overall risk level from signals and scenarios. */
|
|
403
|
+
function computeOverallRiskLevel(riskSignals, scenarios) {
|
|
404
|
+
// Check for critical severity
|
|
405
|
+
if (riskSignals.some(r => r.severity === 'critical'))
|
|
406
|
+
return 'critical';
|
|
407
|
+
if (scenarios.some(s => s.impact === 'critical' && s.probability === 'high'))
|
|
408
|
+
return 'critical';
|
|
409
|
+
// Check for high severity
|
|
410
|
+
const highSignals = riskSignals.filter(r => r.severity === 'high').length;
|
|
411
|
+
const highScenarios = scenarios.filter(s => s.impact === 'high').length;
|
|
412
|
+
if (highSignals >= 3 || highScenarios >= 2)
|
|
413
|
+
return 'high';
|
|
414
|
+
if (highSignals >= 1 || highScenarios >= 1)
|
|
415
|
+
return 'medium';
|
|
416
|
+
return 'low';
|
|
417
|
+
}
|
|
418
|
+
// ============================================================================
|
|
419
|
+
// DecisionGraph Write
|
|
420
|
+
// ============================================================================
|
|
421
|
+
/** Write all Pass 2 outputs to the DecisionGraph. */
|
|
422
|
+
function writeToGraph(graph, scenarios, riskSignals, reliabilityReport, scenarioMatrix, pass1Nodes, tracker, pass) {
|
|
423
|
+
const nodeIds = [];
|
|
424
|
+
const pass1Ids = pass1Nodes.map(n => n.id).slice(0, 5);
|
|
425
|
+
// Write scenario nodes
|
|
426
|
+
for (const scenario of scenarios) {
|
|
427
|
+
const node = graph.createNode({
|
|
428
|
+
type: 'scenario',
|
|
429
|
+
name: `Scenario: ${scenario.name}`,
|
|
430
|
+
content: scenario,
|
|
431
|
+
summary: `${scenario.type} scenario — ${scenario.description.slice(0, 100)}`,
|
|
432
|
+
producedBy: { domain: 'simulator', agent: 'scenario', role: 'Scenario Generator' },
|
|
433
|
+
pass,
|
|
434
|
+
derivedFrom: pass1Ids,
|
|
435
|
+
confidence: probabilityToConfidence(scenario.probability),
|
|
436
|
+
tags: [`scenario:${scenario.type}`, `impact:${scenario.impact}`, `probability:${scenario.probability}`],
|
|
437
|
+
});
|
|
438
|
+
nodeIds.push(node.id);
|
|
439
|
+
tracker.recordInvocation({
|
|
440
|
+
agent: { domain: 'simulator', agent: 'scenario', role: 'Scenario Generator' },
|
|
441
|
+
pass,
|
|
442
|
+
passName: 'Simulation & Scenario Modeling',
|
|
443
|
+
workstream: 'scenario-generation',
|
|
444
|
+
startedAt: new Date().toISOString(),
|
|
445
|
+
completedAt: new Date().toISOString(),
|
|
446
|
+
durationMs: 0,
|
|
447
|
+
inputNodeIds: pass1Ids,
|
|
448
|
+
outputNodeIds: [node.id],
|
|
449
|
+
artifactNumbers: [4],
|
|
450
|
+
status: 'success',
|
|
451
|
+
confidence: probabilityToConfidence(scenario.probability),
|
|
452
|
+
});
|
|
453
|
+
}
|
|
454
|
+
// Write risk signal nodes
|
|
455
|
+
for (const signal of riskSignals) {
|
|
456
|
+
const node = graph.createNode({
|
|
457
|
+
type: 'risk_signal',
|
|
458
|
+
name: `Risk Signal: ${signal.description.slice(0, 60)}`,
|
|
459
|
+
content: signal,
|
|
460
|
+
summary: `${signal.category} risk (${signal.severity}) — ${signal.description.slice(0, 100)}`,
|
|
461
|
+
producedBy: { domain: 'sentinel', agent: signal.source.split('/')[1] ?? 'anomaly', role: 'Risk Detection' },
|
|
462
|
+
pass,
|
|
463
|
+
derivedFrom: pass1Ids,
|
|
464
|
+
confidence: severityToConfidence(signal.severity),
|
|
465
|
+
tags: [`risk:${signal.category}`, `severity:${signal.severity}`],
|
|
466
|
+
});
|
|
467
|
+
nodeIds.push(node.id);
|
|
468
|
+
}
|
|
469
|
+
// Write reliability report node
|
|
470
|
+
const reliabilityNode = graph.createNode({
|
|
471
|
+
type: 'reliability_report',
|
|
472
|
+
name: 'Model Reliability Report',
|
|
473
|
+
content: reliabilityReport,
|
|
474
|
+
summary: `Model reliability: ${reliabilityReport.recommendation} (overall score: ${(reliabilityReport.overallScore * 100).toFixed(0)}%)`,
|
|
475
|
+
producedBy: { domain: 'test-bench', agent: 'quality', role: 'Model Reliability Assessment' },
|
|
476
|
+
pass,
|
|
477
|
+
derivedFrom: pass1Ids,
|
|
478
|
+
confidence: reliabilityReport.overallScore,
|
|
479
|
+
tags: [`reliability:${reliabilityReport.recommendation}`],
|
|
480
|
+
});
|
|
481
|
+
nodeIds.push(reliabilityNode.id);
|
|
482
|
+
// Write scenario matrix convergence node
|
|
483
|
+
const matrixNode = graph.createNode({
|
|
484
|
+
type: 'scenario',
|
|
485
|
+
name: 'Scenario Matrix (Convergence)',
|
|
486
|
+
content: {
|
|
487
|
+
scenarioCount: scenarioMatrix.scenarios.length,
|
|
488
|
+
riskSignalCount: scenarioMatrix.riskSignals.length,
|
|
489
|
+
reliabilityScore: scenarioMatrix.reliabilityReport.overallScore,
|
|
490
|
+
overallRiskLevel: scenarioMatrix.overallRiskLevel,
|
|
491
|
+
crossReferenceCount: scenarioMatrix.crossReferences.length,
|
|
492
|
+
},
|
|
493
|
+
summary: `Pass 2 convergence: ${scenarioMatrix.scenarios.length} scenarios, ${scenarioMatrix.riskSignals.length} risk signals, overall risk: ${scenarioMatrix.overallRiskLevel}`,
|
|
494
|
+
producedBy: { domain: 'pipeline', agent: 'pass2-convergence', role: 'Scenario Matrix Builder' },
|
|
495
|
+
pass,
|
|
496
|
+
derivedFrom: nodeIds.slice(0, 10),
|
|
497
|
+
confidence: reliabilityReport.overallScore > 0 ? reliabilityReport.overallScore : 0.5,
|
|
498
|
+
tags: ['convergence', `risk-level:${scenarioMatrix.overallRiskLevel}`],
|
|
499
|
+
});
|
|
500
|
+
nodeIds.push(matrixNode.id);
|
|
501
|
+
return nodeIds;
|
|
502
|
+
}
|
|
503
|
+
// ============================================================================
|
|
504
|
+
// Default Agent Invoker
|
|
505
|
+
// ============================================================================
|
|
506
|
+
async function defaultInvokeAgents(agents, payload, pass, _timeoutMs) {
|
|
507
|
+
const results = [];
|
|
508
|
+
const promises = agents.map(async (agent) => {
|
|
509
|
+
const start = Date.now();
|
|
510
|
+
const timestamp = new Date().toISOString();
|
|
511
|
+
try {
|
|
512
|
+
const { executeAgentsInvokeCommand } = await import('../../commands/agents.js');
|
|
513
|
+
const result = await executeAgentsInvokeCommand(agent.domain, agent.agent, JSON.stringify(payload), { format: 'json' });
|
|
514
|
+
return {
|
|
515
|
+
agent,
|
|
516
|
+
status: result.status,
|
|
517
|
+
response: result.response,
|
|
518
|
+
durationMs: Date.now() - start,
|
|
519
|
+
pass,
|
|
520
|
+
timestamp,
|
|
521
|
+
};
|
|
522
|
+
}
|
|
523
|
+
catch (err) {
|
|
524
|
+
return {
|
|
525
|
+
agent,
|
|
526
|
+
status: 500,
|
|
527
|
+
response: { error: err instanceof Error ? err.message : String(err) },
|
|
528
|
+
durationMs: Date.now() - start,
|
|
529
|
+
pass,
|
|
530
|
+
timestamp,
|
|
531
|
+
};
|
|
532
|
+
}
|
|
533
|
+
});
|
|
534
|
+
const settled = await Promise.allSettled(promises);
|
|
535
|
+
for (let i = 0; i < settled.length; i++) {
|
|
536
|
+
const outcome = settled[i];
|
|
537
|
+
if (outcome.status === 'fulfilled') {
|
|
538
|
+
results.push(outcome.value);
|
|
539
|
+
}
|
|
540
|
+
else {
|
|
541
|
+
results.push({
|
|
542
|
+
agent: agents[i],
|
|
543
|
+
status: 500,
|
|
544
|
+
response: { error: String(outcome.reason) },
|
|
545
|
+
durationMs: 0,
|
|
546
|
+
pass,
|
|
547
|
+
timestamp: new Date().toISOString(),
|
|
548
|
+
});
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
return results;
|
|
552
|
+
}
|
|
553
|
+
// ============================================================================
|
|
554
|
+
// Minimum Scenario Enforcement
|
|
555
|
+
// ============================================================================
|
|
556
|
+
/** Ensure at least 3 scenarios exist with required types. */
|
|
557
|
+
function ensureMinimumScenarios(scenarios, prompt) {
|
|
558
|
+
const hasDeployment = scenarios.some(s => s.type === 'deployment');
|
|
559
|
+
const hasFailure = scenarios.some(s => s.type === 'failure');
|
|
560
|
+
if (!hasDeployment) {
|
|
561
|
+
scenarios.push({
|
|
562
|
+
id: crypto.randomUUID(),
|
|
563
|
+
type: 'deployment',
|
|
564
|
+
name: 'Standard Phased Deployment',
|
|
565
|
+
description: `Phased rollout for: ${prompt.slice(0, 80)}`,
|
|
566
|
+
assumptions: ['Standard deployment infrastructure available', 'Team capacity sufficient for pilot'],
|
|
567
|
+
probability: 'medium',
|
|
568
|
+
impact: 'medium',
|
|
569
|
+
expectedOutcome: 'Successful deployment with standard risk profile',
|
|
570
|
+
mitigationStrategy: 'Phased rollout with rollback capability at each stage',
|
|
571
|
+
metrics: { deploymentSuccessProbability: 0.75, estimatedRecoveryTime: 'PT4H' },
|
|
572
|
+
});
|
|
573
|
+
}
|
|
574
|
+
if (!hasFailure) {
|
|
575
|
+
scenarios.push({
|
|
576
|
+
id: crypto.randomUUID(),
|
|
577
|
+
type: 'failure',
|
|
578
|
+
name: 'Integration Failure Scenario',
|
|
579
|
+
description: `Integration failure during: ${prompt.slice(0, 80)}`,
|
|
580
|
+
assumptions: ['External system API may change without notice'],
|
|
581
|
+
probability: 'medium',
|
|
582
|
+
impact: 'high',
|
|
583
|
+
expectedOutcome: 'Service degradation requiring manual intervention',
|
|
584
|
+
mitigationStrategy: 'Circuit breaker pattern with automated failover',
|
|
585
|
+
metrics: { failureProbability: 0.25, estimatedRecoveryTime: 'PT2H' },
|
|
586
|
+
});
|
|
587
|
+
}
|
|
588
|
+
while (scenarios.length < 3) {
|
|
589
|
+
scenarios.push({
|
|
590
|
+
id: crypto.randomUUID(),
|
|
591
|
+
type: 'scaling',
|
|
592
|
+
name: 'Load Scaling Scenario',
|
|
593
|
+
description: `System behavior under 10x load increase for: ${prompt.slice(0, 80)}`,
|
|
594
|
+
assumptions: ['Cloud auto-scaling enabled', 'Database can handle connection pooling'],
|
|
595
|
+
probability: 'low',
|
|
596
|
+
impact: 'medium',
|
|
597
|
+
expectedOutcome: 'Graceful degradation under peak load',
|
|
598
|
+
mitigationStrategy: 'Horizontal scaling with load balancing and caching',
|
|
599
|
+
metrics: { reliabilityProbability: 0.85 },
|
|
600
|
+
});
|
|
601
|
+
}
|
|
602
|
+
}
|
|
603
|
+
// ============================================================================
|
|
604
|
+
// Normalization Helpers
|
|
605
|
+
// ============================================================================
|
|
606
|
+
function normalizeScenarioType(raw) {
|
|
607
|
+
const VALID = new Set(['deployment', 'integration', 'failure', 'scaling', 'edge_case']);
|
|
608
|
+
const str = String(raw ?? '').toLowerCase().replace(/[\s-]/g, '_');
|
|
609
|
+
return VALID.has(str) ? str : 'deployment';
|
|
610
|
+
}
|
|
611
|
+
function normalizeProbability(raw) {
|
|
612
|
+
const str = String(raw ?? '').toLowerCase();
|
|
613
|
+
if (str === 'high')
|
|
614
|
+
return 'high';
|
|
615
|
+
if (str === 'low')
|
|
616
|
+
return 'low';
|
|
617
|
+
return 'medium';
|
|
618
|
+
}
|
|
619
|
+
function normalizeImpact(raw) {
|
|
620
|
+
const str = String(raw ?? '').toLowerCase();
|
|
621
|
+
if (str === 'critical')
|
|
622
|
+
return 'critical';
|
|
623
|
+
if (str === 'high')
|
|
624
|
+
return 'high';
|
|
625
|
+
if (str === 'low')
|
|
626
|
+
return 'low';
|
|
627
|
+
return 'medium';
|
|
628
|
+
}
|
|
629
|
+
function normalizeRiskCategory(raw) {
|
|
630
|
+
const VALID = new Set(['operational', 'integration', 'security', 'compliance', 'performance']);
|
|
631
|
+
const str = String(raw ?? '').toLowerCase();
|
|
632
|
+
return VALID.has(str) ? str : 'operational';
|
|
633
|
+
}
|
|
634
|
+
function normalizeSeverity(raw) {
|
|
635
|
+
const str = String(raw ?? '').toLowerCase();
|
|
636
|
+
if (str === 'critical')
|
|
637
|
+
return 'critical';
|
|
638
|
+
if (str === 'high')
|
|
639
|
+
return 'high';
|
|
640
|
+
if (str === 'low')
|
|
641
|
+
return 'low';
|
|
642
|
+
return 'medium';
|
|
643
|
+
}
|
|
644
|
+
function normalizeStringArray(raw) {
|
|
645
|
+
if (Array.isArray(raw))
|
|
646
|
+
return raw.map(String);
|
|
647
|
+
if (typeof raw === 'string')
|
|
648
|
+
return [raw];
|
|
649
|
+
return [];
|
|
650
|
+
}
|
|
651
|
+
function normalizeMetrics(raw) {
|
|
652
|
+
if (!raw || typeof raw !== 'object')
|
|
653
|
+
return {};
|
|
654
|
+
const m = raw;
|
|
655
|
+
return {
|
|
656
|
+
reliabilityProbability: typeof m['reliabilityProbability'] === 'number' ? clamp01(m['reliabilityProbability']) : undefined,
|
|
657
|
+
failureProbability: typeof m['failureProbability'] === 'number' ? clamp01(m['failureProbability']) : undefined,
|
|
658
|
+
deploymentSuccessProbability: typeof m['deploymentSuccessProbability'] === 'number' ? clamp01(m['deploymentSuccessProbability']) : undefined,
|
|
659
|
+
estimatedRecoveryTime: typeof m['estimatedRecoveryTime'] === 'string' ? m['estimatedRecoveryTime'] : undefined,
|
|
660
|
+
};
|
|
661
|
+
}
|
|
662
|
+
function extractScore(resp) {
|
|
663
|
+
const score = resp['score'] ?? resp['confidence'] ?? resp['value'] ?? resp['result'];
|
|
664
|
+
if (typeof score === 'number')
|
|
665
|
+
return clamp01(score);
|
|
666
|
+
if (typeof score === 'string') {
|
|
667
|
+
const n = parseFloat(score);
|
|
668
|
+
if (!isNaN(n))
|
|
669
|
+
return clamp01(n > 1 ? n / 100 : n);
|
|
670
|
+
}
|
|
671
|
+
return 0;
|
|
672
|
+
}
|
|
673
|
+
function clamp01(n) {
|
|
674
|
+
return Math.max(0, Math.min(1, n));
|
|
675
|
+
}
|
|
676
|
+
function probabilityToConfidence(p) {
|
|
677
|
+
switch (p) {
|
|
678
|
+
case 'high': return 0.9;
|
|
679
|
+
case 'medium': return 0.7;
|
|
680
|
+
case 'low': return 0.5;
|
|
681
|
+
}
|
|
682
|
+
}
|
|
683
|
+
function severityToConfidence(s) {
|
|
684
|
+
switch (s) {
|
|
685
|
+
case 'critical': return 0.95;
|
|
686
|
+
case 'high': return 0.85;
|
|
687
|
+
case 'medium': return 0.7;
|
|
688
|
+
case 'low': return 0.5;
|
|
689
|
+
}
|
|
690
|
+
}
|
|
691
|
+
//# sourceMappingURL=pass2-simulation.js.map
|