@consensus-tools/universal 0.9.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/consensus-llm.test.d.ts +2 -0
- package/dist/consensus-llm.test.d.ts.map +1 -0
- package/dist/consensus-llm.test.js +244 -0
- package/dist/consensus-llm.test.js.map +1 -0
- package/dist/defaults.d.ts +10 -0
- package/dist/defaults.d.ts.map +1 -1
- package/dist/defaults.js +63 -2
- package/dist/defaults.js.map +1 -1
- package/dist/index.d.ts +13 -11
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +130 -49
- package/dist/index.js.map +1 -1
- package/dist/persona-reviewer-factory.d.ts +22 -0
- package/dist/persona-reviewer-factory.d.ts.map +1 -0
- package/dist/persona-reviewer-factory.js +318 -0
- package/dist/persona-reviewer-factory.js.map +1 -0
- package/dist/reputation-manager.d.ts +38 -0
- package/dist/reputation-manager.d.ts.map +1 -0
- package/dist/reputation-manager.js +154 -0
- package/dist/reputation-manager.js.map +1 -0
- package/dist/reputation-manager.test.d.ts +2 -0
- package/dist/reputation-manager.test.d.ts.map +1 -0
- package/dist/reputation-manager.test.js +111 -0
- package/dist/reputation-manager.test.js.map +1 -0
- package/dist/risk-tiers.d.ts +10 -0
- package/dist/risk-tiers.d.ts.map +1 -0
- package/dist/risk-tiers.js +46 -0
- package/dist/risk-tiers.js.map +1 -0
- package/dist/risk-tiers.test.d.ts +2 -0
- package/dist/risk-tiers.test.d.ts.map +1 -0
- package/dist/risk-tiers.test.js +40 -0
- package/dist/risk-tiers.test.js.map +1 -0
- package/dist/types.d.ts +59 -6
- package/dist/types.d.ts.map +1 -1
- package/package.json +9 -9
- package/src/consensus-llm.test.ts +23 -4
- package/src/defaults.ts +10 -4
- package/src/index.ts +22 -18
- package/src/persona-reviewer-factory.ts +90 -70
- package/src/reputation-manager.ts +46 -31
- package/src/risk-tiers.test.ts +8 -0
- package/src/risk-tiers.ts +7 -5
|
@@ -17,10 +17,24 @@ import type { RiskTierMap } from "./types.js";
|
|
|
17
17
|
// 2. Risk tier check (low = fast-path regex only)
|
|
18
18
|
// 3. Parallel LLM calls per persona (with timeout + fallback)
|
|
19
19
|
// 4. Parse votes from LLM responses
|
|
20
|
-
// 5. Synthesize ConsensusInput
|
|
21
|
-
//
|
|
20
|
+
// 5. Synthesize ConsensusInput: ONE "allow" submission, all personas
|
|
21
|
+
// vote on it (YES = +1, NO = -1). resolveConsensus aggregates.
|
|
22
|
+
// 6. Determine action from consensus result
|
|
22
23
|
// 7. Return LlmDecisionResult
|
|
23
24
|
|
|
25
|
+
// ── Safe JSON Serialization ──────────────────────────────────────────
|
|
26
|
+
|
|
27
|
+
function safeStringify(obj: unknown, indent?: number): string {
|
|
28
|
+
const seen = new WeakSet();
|
|
29
|
+
return JSON.stringify(obj, (_key, value) => {
|
|
30
|
+
if (typeof value === "object" && value !== null) {
|
|
31
|
+
if (seen.has(value)) return "[Circular]";
|
|
32
|
+
seen.add(value);
|
|
33
|
+
}
|
|
34
|
+
return value;
|
|
35
|
+
}, indent);
|
|
36
|
+
}
|
|
37
|
+
|
|
24
38
|
// ── Vote Parsing ─────────────────────────────────────────────────────
|
|
25
39
|
|
|
26
40
|
interface ParsedVote {
|
|
@@ -29,18 +43,22 @@ interface ParsedVote {
|
|
|
29
43
|
rationale: string;
|
|
30
44
|
}
|
|
31
45
|
|
|
32
|
-
|
|
46
|
+
// Match VOTE: YES/NO/REWRITE on its own line (anchored to reduce injection risk)
|
|
47
|
+
const VOTE_LINE_PATTERN = /^(?:VOTE:\s*)?(YES|NO|REWRITE)\s*$/im;
|
|
48
|
+
// Fallback: match anywhere but only as a last resort
|
|
49
|
+
const VOTE_FALLBACK_PATTERN = /\b(YES|NO|REWRITE)\b/i;
|
|
33
50
|
const CONFIDENCE_PATTERN = /confidence[:\s]*([0-9]*\.?[0-9]+)/i;
|
|
34
51
|
|
|
35
52
|
function parseVoteFromLlm(response: string): ParsedVote | null {
|
|
36
|
-
|
|
53
|
+
// Prefer line-anchored match (harder to inject)
|
|
54
|
+
const lineMatch = response.match(VOTE_LINE_PATTERN);
|
|
55
|
+
const voteMatch = lineMatch ?? response.match(VOTE_FALLBACK_PATTERN);
|
|
37
56
|
if (!voteMatch) return null;
|
|
38
57
|
|
|
39
58
|
const vote = voteMatch[1]!.toUpperCase() as "YES" | "NO" | "REWRITE";
|
|
40
59
|
const confMatch = response.match(CONFIDENCE_PATTERN);
|
|
41
60
|
const confidence = confMatch?.[1] ? Math.min(1, Math.max(0, parseFloat(confMatch[1]))) : 0.5;
|
|
42
61
|
|
|
43
|
-
// Use the full response as rationale (stripped of vote/confidence lines)
|
|
44
62
|
const rationale = response
|
|
45
63
|
.replace(/^.*\b(YES|NO|REWRITE)\b.*$/im, "")
|
|
46
64
|
.replace(/^.*confidence.*$/im, "")
|
|
@@ -58,7 +76,6 @@ function buildPersonaPrompt(
|
|
|
58
76
|
args: Record<string, unknown>,
|
|
59
77
|
regexSignals: string[],
|
|
60
78
|
): ModelMessage[] {
|
|
61
|
-
// Use systemPrompt if available (EvalPersonaConfig), otherwise construct from role/bias
|
|
62
79
|
const evalPersona = persona as Partial<EvalPersonaConfig>;
|
|
63
80
|
const systemPrompt = evalPersona.systemPrompt ??
|
|
64
81
|
`You are a ${persona.role} reviewer named ${persona.name}.` +
|
|
@@ -68,18 +85,22 @@ function buildPersonaPrompt(
|
|
|
68
85
|
: "") +
|
|
69
86
|
(persona.failure_modes?.length
|
|
70
87
|
? ` Watch for: ${persona.failure_modes.join(", ")}.`
|
|
71
|
-
: "")
|
|
88
|
+
: "") +
|
|
89
|
+
` Respond ONLY with a VOTE line, CONFIDENCE line, and RATIONALE line. Do not echo the tool arguments.`;
|
|
72
90
|
|
|
73
91
|
const regexContext = regexSignals.length > 0
|
|
74
92
|
? `\n\nRegex pre-screen detected these signals:\n${regexSignals.map((s) => `- ${s}`).join("\n")}`
|
|
75
93
|
: "\n\nRegex pre-screen: no pattern matches detected.";
|
|
76
94
|
|
|
95
|
+
// Sanitize args: truncate to prevent prompt stuffing, use safe serialization
|
|
96
|
+
const argsStr = safeStringify(args, 2).slice(0, 2000);
|
|
97
|
+
|
|
77
98
|
const userPrompt =
|
|
78
99
|
`Evaluate this tool call for risks:\n\n` +
|
|
79
100
|
`Tool: ${toolName}\n` +
|
|
80
|
-
`Arguments
|
|
101
|
+
`Arguments:\n${argsStr}\n` +
|
|
81
102
|
regexContext +
|
|
82
|
-
`\n\nRespond with:\n` +
|
|
103
|
+
`\n\nRespond with exactly these three lines:\n` +
|
|
83
104
|
`VOTE: YES (safe to proceed), NO (block this action), or REWRITE (needs modification)\n` +
|
|
84
105
|
`CONFIDENCE: 0.0 to 1.0\n` +
|
|
85
106
|
`RATIONALE: Brief explanation of your decision`;
|
|
@@ -92,14 +113,21 @@ function buildPersonaPrompt(
|
|
|
92
113
|
|
|
93
114
|
// ── Regex Pre-Screen ─────────────────────────────────────────────────
|
|
94
115
|
|
|
116
|
+
// Fallback guard domains when configured guards have no matching configs
|
|
117
|
+
const FALLBACK_GUARDS = ["security", "compliance", "user-impact"];
|
|
118
|
+
|
|
95
119
|
function runRegexPreScreen(
|
|
96
120
|
toolName: string,
|
|
97
121
|
args: Record<string, unknown>,
|
|
98
122
|
guards: string[],
|
|
99
123
|
): string[] {
|
|
100
124
|
const signals: string[] = [];
|
|
125
|
+
// Use provided guards, falling back to DEFAULT_PERSONA_TRIO
|
|
126
|
+
const effectiveGuards = guards.filter((g) => GUARD_CONFIGS[g]).length > 0
|
|
127
|
+
? guards
|
|
128
|
+
: FALLBACK_GUARDS;
|
|
101
129
|
|
|
102
|
-
for (const domain of
|
|
130
|
+
for (const domain of effectiveGuards) {
|
|
103
131
|
const config = GUARD_CONFIGS[domain];
|
|
104
132
|
if (!config) continue;
|
|
105
133
|
|
|
@@ -130,28 +158,25 @@ async function callLlmWithTimeout(
|
|
|
130
158
|
messages: ModelMessage[],
|
|
131
159
|
timeoutMs: number,
|
|
132
160
|
): Promise<string> {
|
|
133
|
-
|
|
134
|
-
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
161
|
+
let timer: ReturnType<typeof setTimeout> | undefined;
|
|
135
162
|
|
|
136
163
|
try {
|
|
137
164
|
const result = await Promise.race([
|
|
138
165
|
model(messages),
|
|
139
166
|
new Promise<never>((_, reject) => {
|
|
140
|
-
|
|
141
|
-
reject(new Error("LLM call timed out")),
|
|
142
|
-
);
|
|
167
|
+
timer = setTimeout(() => reject(new Error("LLM call timed out")), timeoutMs);
|
|
143
168
|
}),
|
|
144
169
|
]);
|
|
145
170
|
return result;
|
|
146
171
|
} finally {
|
|
147
|
-
clearTimeout(timer);
|
|
172
|
+
if (timer) clearTimeout(timer);
|
|
148
173
|
}
|
|
149
174
|
}
|
|
150
175
|
|
|
151
176
|
// ── Regex Fallback Vote ──────────────────────────────────────────────
|
|
152
177
|
|
|
153
178
|
function regexFallbackVote(
|
|
154
|
-
|
|
179
|
+
_persona: PersonaConfig,
|
|
155
180
|
toolName: string,
|
|
156
181
|
args: Record<string, unknown>,
|
|
157
182
|
guards: string[],
|
|
@@ -164,10 +189,12 @@ function regexFallbackVote(
|
|
|
164
189
|
rationale: `Regex fallback: ${signals.join("; ")}`,
|
|
165
190
|
};
|
|
166
191
|
}
|
|
192
|
+
// When LLM is unavailable AND regex finds nothing, default to block for safety.
|
|
193
|
+
// This prevents fail-open when all LLMs are down.
|
|
167
194
|
return {
|
|
168
|
-
vote: "
|
|
169
|
-
confidence: 0.
|
|
170
|
-
rationale: "Regex fallback: no pattern matches
|
|
195
|
+
vote: "NO",
|
|
196
|
+
confidence: 0.3,
|
|
197
|
+
rationale: "Regex fallback: no pattern matches but LLM unavailable (fail-closed)",
|
|
171
198
|
};
|
|
172
199
|
}
|
|
173
200
|
|
|
@@ -197,7 +224,7 @@ export async function deliberate(
|
|
|
197
224
|
): Promise<LlmDecisionResult> {
|
|
198
225
|
const decisionId = `dec_${crypto.randomUUID().slice(0, 12)}`;
|
|
199
226
|
const personas = config.reputationManager.getPersonas();
|
|
200
|
-
const guards = config.guards ??
|
|
227
|
+
const guards = config.guards ?? FALLBACK_GUARDS;
|
|
201
228
|
|
|
202
229
|
// 1. Regex pre-screen
|
|
203
230
|
const regexSignals = runRegexPreScreen(toolName, args, guards);
|
|
@@ -205,7 +232,6 @@ export async function deliberate(
|
|
|
205
232
|
// 2. Risk tier check
|
|
206
233
|
const tier = classifyTool(toolName, config.riskTiers);
|
|
207
234
|
if (tier === "low") {
|
|
208
|
-
// Fast-path: regex only, no LLM calls
|
|
209
235
|
const hasRisk = regexSignals.length > 0;
|
|
210
236
|
return {
|
|
211
237
|
decisionId,
|
|
@@ -244,7 +270,6 @@ export async function deliberate(
|
|
|
244
270
|
};
|
|
245
271
|
}
|
|
246
272
|
|
|
247
|
-
// Unparseable response, fall back to regex
|
|
248
273
|
const fallback = regexFallbackVote(persona, toolName, args, guards);
|
|
249
274
|
return {
|
|
250
275
|
personaId: persona.id,
|
|
@@ -253,7 +278,6 @@ export async function deliberate(
|
|
|
253
278
|
source: "regex_fallback" as const,
|
|
254
279
|
};
|
|
255
280
|
} catch {
|
|
256
|
-
// LLM failure, fall back to regex
|
|
257
281
|
const fallback = regexFallbackVote(persona, toolName, args, guards);
|
|
258
282
|
return {
|
|
259
283
|
personaId: persona.id,
|
|
@@ -266,17 +290,21 @@ export async function deliberate(
|
|
|
266
290
|
);
|
|
267
291
|
|
|
268
292
|
// 4. Synthesize ConsensusInput for resolveConsensus()
|
|
269
|
-
//
|
|
293
|
+
//
|
|
294
|
+
// FIXED: Use a SINGLE "allow" submission. All personas vote on it.
|
|
295
|
+
// YES voters score +1, NO voters score -1, REWRITE voters score 0.
|
|
296
|
+
// This way resolveConsensus sees N votes on 1 submission, not N
|
|
297
|
+
// submissions with 1 vote each.
|
|
270
298
|
const now = new Date().toISOString();
|
|
271
299
|
const jobId = `job_facade_${decisionId}`;
|
|
300
|
+
const submissionId = `sub_${decisionId}_allow`;
|
|
272
301
|
|
|
273
|
-
// Create a minimal Job with the configured policy
|
|
274
302
|
const job = {
|
|
275
303
|
id: jobId,
|
|
276
304
|
boardId: "",
|
|
277
305
|
status: "SUBMITTED" as const,
|
|
278
306
|
title: `Deliberation: ${toolName}`,
|
|
279
|
-
description:
|
|
307
|
+
description: "",
|
|
280
308
|
createdByAgentId: "facade",
|
|
281
309
|
createdAt: now,
|
|
282
310
|
updatedAt: now,
|
|
@@ -288,33 +316,31 @@ export async function deliberate(
|
|
|
288
316
|
minParticipants: 1,
|
|
289
317
|
};
|
|
290
318
|
|
|
291
|
-
//
|
|
292
|
-
const submissions =
|
|
293
|
-
id:
|
|
319
|
+
// Single submission representing "allow this tool call"
|
|
320
|
+
const submissions = [{
|
|
321
|
+
id: submissionId,
|
|
294
322
|
jobId,
|
|
295
|
-
agentId:
|
|
323
|
+
agentId: "facade",
|
|
296
324
|
submittedAt: now,
|
|
297
|
-
summary:
|
|
298
|
-
artifacts: {
|
|
299
|
-
confidence:
|
|
325
|
+
summary: `Allow ${toolName}`,
|
|
326
|
+
artifacts: {},
|
|
327
|
+
confidence: 1.0,
|
|
300
328
|
requestedPayout: 0,
|
|
301
329
|
status: "SUBMITTED" as const,
|
|
302
|
-
}
|
|
330
|
+
}];
|
|
303
331
|
|
|
304
|
-
// Each persona votes
|
|
305
|
-
// and scores based on their confidence
|
|
332
|
+
// Each persona votes on the single submission
|
|
306
333
|
const votes = voteResults.map((v, i) => ({
|
|
307
334
|
id: `vote_${decisionId}_${i}`,
|
|
308
335
|
jobId,
|
|
309
336
|
agentId: v.personaId,
|
|
310
|
-
submissionId
|
|
337
|
+
submissionId,
|
|
311
338
|
score: v.vote === "YES" ? 1 : v.vote === "NO" ? -1 : 0,
|
|
312
339
|
weight: v.confidence,
|
|
313
340
|
rationale: v.rationale,
|
|
314
341
|
createdAt: now,
|
|
315
342
|
}));
|
|
316
343
|
|
|
317
|
-
// Reputation function from the manager
|
|
318
344
|
const reputation = (agentId: string) =>
|
|
319
345
|
config.reputationManager.getReputation(agentId);
|
|
320
346
|
|
|
@@ -326,45 +352,40 @@ export async function deliberate(
|
|
|
326
352
|
reputation,
|
|
327
353
|
};
|
|
328
354
|
|
|
329
|
-
let
|
|
355
|
+
let consensusTrace: Record<string, unknown>;
|
|
356
|
+
|
|
330
357
|
try {
|
|
331
|
-
|
|
358
|
+
const result: ConsensusResult = resolveConsensus(consensusInput);
|
|
359
|
+
consensusTrace = result.consensusTrace;
|
|
360
|
+
|
|
361
|
+
// Extract the actual weighted score from the consensus trace.
|
|
362
|
+
// resolveConsensus always returns a "winner" (the single submission),
|
|
363
|
+
// but the score may be negative (more NO than YES votes).
|
|
364
|
+
const traceScores = (consensusTrace as any)?.scores as Record<string, number> | undefined;
|
|
365
|
+
const submissionScore = traceScores?.[submissionId] ?? 0;
|
|
366
|
+
consensusTrace = { ...consensusTrace, submissionScore };
|
|
332
367
|
} catch {
|
|
333
|
-
|
|
334
|
-
const yesCount = voteResults.filter((v) => v.vote === "YES").length;
|
|
335
|
-
const majority = yesCount > voteResults.length / 2;
|
|
336
|
-
consensusResult = {
|
|
337
|
-
winners: majority ? ["allow"] : ["block"],
|
|
338
|
-
winningSubmissionIds: [],
|
|
339
|
-
consensusTrace: { policy: "fallback_majority", reason: "resolve_error" },
|
|
340
|
-
finalArtifact: null,
|
|
341
|
-
};
|
|
368
|
+
consensusTrace = { policy: "fallback_majority", reason: "resolve_error" };
|
|
342
369
|
}
|
|
343
370
|
|
|
344
|
-
// 6. Determine
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
371
|
+
// 6. Determine action from vote distribution (direct counting)
|
|
372
|
+
// resolveConsensus provides the audit trace; vote counting determines the action.
|
|
373
|
+
// This avoids the "always-a-winner" problem where resolveConsensus returns
|
|
374
|
+
// a winner even when the score is negative.
|
|
375
|
+
const yesCount = voteResults.filter((v) => v.vote === "YES").length;
|
|
376
|
+
const noCount = voteResults.filter((v) => v.vote === "NO").length;
|
|
377
|
+
const rewriteCount = voteResults.filter((v) => v.vote === "REWRITE").length;
|
|
350
378
|
|
|
351
379
|
let action: "allow" | "block" | "escalate";
|
|
352
|
-
if (
|
|
380
|
+
if (rewriteCount > voteResults.length / 2) {
|
|
381
|
+
action = "escalate";
|
|
382
|
+
} else if (yesCount > noCount) {
|
|
353
383
|
action = "allow";
|
|
354
|
-
} else if (dominantVote === "NO") {
|
|
355
|
-
action = "block";
|
|
356
384
|
} else {
|
|
357
|
-
action = "
|
|
358
|
-
}
|
|
359
|
-
|
|
360
|
-
// If no clear winner (empty winners), use simple vote counting
|
|
361
|
-
if (consensusResult.winners.length === 0) {
|
|
362
|
-
const yesCount = voteResults.filter((v) => v.vote === "YES").length;
|
|
363
|
-
const noCount = voteResults.filter((v) => v.vote === "NO").length;
|
|
364
|
-
action = yesCount >= noCount ? "allow" : "block";
|
|
385
|
+
action = "block";
|
|
365
386
|
}
|
|
366
387
|
|
|
367
|
-
// Compute aggregate score
|
|
388
|
+
// Compute aggregate score
|
|
368
389
|
const totalConfidence = voteResults.reduce((s, v) => s + v.confidence, 0);
|
|
369
390
|
const yesConfidence = voteResults
|
|
370
391
|
.filter((v) => v.vote === "YES")
|
|
@@ -376,11 +397,10 @@ export async function deliberate(
|
|
|
376
397
|
action,
|
|
377
398
|
votes: voteResults,
|
|
378
399
|
policy: config.policyType,
|
|
379
|
-
consensusTrace
|
|
400
|
+
consensusTrace,
|
|
380
401
|
aggregateScore,
|
|
381
402
|
};
|
|
382
403
|
|
|
383
|
-
// 7. Record decision for reputation tracking
|
|
384
404
|
config.reputationManager.recordDecision(result);
|
|
385
405
|
|
|
386
406
|
return result;
|
|
@@ -9,6 +9,9 @@ import type { FeedbackSignal, LlmDecisionResult } from "./types.js";
|
|
|
9
9
|
// Updates from human feedback signals (onFeedback), not self-consensus.
|
|
10
10
|
// Triggers persona respawn when reputation drops below threshold.
|
|
11
11
|
|
|
12
|
+
const MAX_DECISION_LOOKBACK = 100;
|
|
13
|
+
const MAX_FEEDBACK_LOOKBACK = 500;
|
|
14
|
+
|
|
12
15
|
export interface RespawnEvent {
|
|
13
16
|
oldPersona: PersonaConfig;
|
|
14
17
|
newPersona: PersonaConfig;
|
|
@@ -51,10 +54,16 @@ export class ReputationManager {
|
|
|
51
54
|
recordDecision(result: LlmDecisionResult): void {
|
|
52
55
|
this.decisions.set(result.decisionId, result);
|
|
53
56
|
this.decisionHistory.push(result);
|
|
54
|
-
|
|
55
|
-
|
|
57
|
+
|
|
58
|
+
// Cap both collections to prevent memory leaks
|
|
59
|
+
if (this.decisionHistory.length >= MAX_DECISION_LOOKBACK) {
|
|
56
60
|
this.decisionHistory.shift();
|
|
57
61
|
}
|
|
62
|
+
// Trim the feedback correlation map (keep most recent N entries)
|
|
63
|
+
if (this.decisions.size > MAX_FEEDBACK_LOOKBACK) {
|
|
64
|
+
const oldest = this.decisions.keys().next().value;
|
|
65
|
+
if (oldest) this.decisions.delete(oldest);
|
|
66
|
+
}
|
|
58
67
|
}
|
|
59
68
|
|
|
60
69
|
/** Process human feedback signal and update reputation. */
|
|
@@ -85,7 +94,7 @@ export class ReputationManager {
|
|
|
85
94
|
this.scores.set(change.persona_id, change.reputation_after);
|
|
86
95
|
}
|
|
87
96
|
|
|
88
|
-
// Check for respawn
|
|
97
|
+
// Check for respawn (collect respawns, then apply)
|
|
89
98
|
this.checkRespawn();
|
|
90
99
|
|
|
91
100
|
// Persist if store configured
|
|
@@ -94,39 +103,44 @@ export class ReputationManager {
|
|
|
94
103
|
return result.changes;
|
|
95
104
|
}
|
|
96
105
|
|
|
97
|
-
/** Check if any persona needs respawn. */
|
|
106
|
+
/** Check if any persona needs respawn. Collects replacements first to avoid mutation during iteration. */
|
|
98
107
|
private checkRespawn(): void {
|
|
108
|
+
const replacements: Array<{ index: number; old: PersonaConfig; rep: number }> = [];
|
|
109
|
+
|
|
110
|
+
// Collect personas that need respawn (don't mutate during scan)
|
|
99
111
|
for (let i = 0; i < this.personas.length; i++) {
|
|
100
112
|
const persona = this.personas[i]!;
|
|
101
113
|
const rep = this.scores.get(persona.id) ?? 0.55;
|
|
102
|
-
|
|
103
114
|
if (rep < this.threshold) {
|
|
104
|
-
|
|
105
|
-
const decisionRecords = this.decisionHistory.map((d) => ({
|
|
106
|
-
final_decision: d.action === "allow" ? "ALLOW" : "BLOCK",
|
|
107
|
-
votes: d.votes.map((v) => ({
|
|
108
|
-
persona_id: v.personaId,
|
|
109
|
-
vote: v.vote,
|
|
110
|
-
confidence: v.confidence,
|
|
111
|
-
})),
|
|
112
|
-
}));
|
|
113
|
-
|
|
114
|
-
const learning = buildLearningSummary(persona.id, decisionRecords);
|
|
115
|
-
const successor = mutatePersona(persona, learning);
|
|
116
|
-
|
|
117
|
-
// Replace persona
|
|
118
|
-
this.personas[i] = successor;
|
|
119
|
-
this.scores.delete(persona.id);
|
|
120
|
-
this.scores.set(successor.id, successor.reputation ?? 0.55);
|
|
121
|
-
|
|
122
|
-
this.onRespawn?.({
|
|
123
|
-
oldPersona: persona,
|
|
124
|
-
newPersona: successor,
|
|
125
|
-
reputation: rep,
|
|
126
|
-
reason: `Reputation ${rep.toFixed(3)} below threshold ${this.threshold}`,
|
|
127
|
-
});
|
|
115
|
+
replacements.push({ index: i, old: persona, rep });
|
|
128
116
|
}
|
|
129
117
|
}
|
|
118
|
+
|
|
119
|
+
// Apply replacements after scan
|
|
120
|
+
for (const { index, old, rep } of replacements) {
|
|
121
|
+
const decisionRecords = this.decisionHistory.map((d) => ({
|
|
122
|
+
final_decision: d.action === "allow" ? "ALLOW" : "BLOCK",
|
|
123
|
+
votes: d.votes.map((v) => ({
|
|
124
|
+
persona_id: v.personaId,
|
|
125
|
+
vote: v.vote,
|
|
126
|
+
confidence: v.confidence,
|
|
127
|
+
})),
|
|
128
|
+
}));
|
|
129
|
+
|
|
130
|
+
const learning = buildLearningSummary(old.id, decisionRecords);
|
|
131
|
+
const successor = mutatePersona(old, learning);
|
|
132
|
+
|
|
133
|
+
this.personas[index] = successor;
|
|
134
|
+
this.scores.delete(old.id);
|
|
135
|
+
this.scores.set(successor.id, successor.reputation ?? 0.55);
|
|
136
|
+
|
|
137
|
+
this.onRespawn?.({
|
|
138
|
+
oldPersona: old,
|
|
139
|
+
newPersona: successor,
|
|
140
|
+
reputation: rep,
|
|
141
|
+
reason: `Reputation ${rep.toFixed(3)} below threshold ${this.threshold}`,
|
|
142
|
+
});
|
|
143
|
+
}
|
|
130
144
|
}
|
|
131
145
|
|
|
132
146
|
/** Get current persona list (may include respawned successors). */
|
|
@@ -143,8 +157,9 @@ export class ReputationManager {
|
|
|
143
157
|
}
|
|
144
158
|
this.store.update((state) => {
|
|
145
159
|
(state as any).reputation = data;
|
|
146
|
-
}).catch(() => {
|
|
147
|
-
//
|
|
160
|
+
}).catch((err) => {
|
|
161
|
+
// Log persistence failures instead of silently swallowing
|
|
162
|
+
console.warn("[consensus] Reputation persistence failed:", err); // eslint-disable-line no-console
|
|
148
163
|
});
|
|
149
164
|
}
|
|
150
165
|
|
package/src/risk-tiers.test.ts
CHANGED
|
@@ -33,4 +33,12 @@ describe("classifyTool", () => {
|
|
|
33
33
|
expect(classifyTool("send_email", { send_email: "low" })).toBe("low");
|
|
34
34
|
expect(classifyTool("get_weather", { get_weather: "high" })).toBe("high");
|
|
35
35
|
});
|
|
36
|
+
|
|
37
|
+
it("prevents bypass via compound names (high-risk checked first)", () => {
|
|
38
|
+
// These start with read-like prefixes but contain destructive operations
|
|
39
|
+
expect(classifyTool("execute_and_log")).toBe("high");
|
|
40
|
+
expect(classifyTool("run_cleanup")).toBe("high");
|
|
41
|
+
expect(classifyTool("delete_then_verify")).toBe("high");
|
|
42
|
+
expect(classifyTool("send_and_check")).toBe("high");
|
|
43
|
+
});
|
|
36
44
|
});
|
package/src/risk-tiers.ts
CHANGED
|
@@ -28,7 +28,8 @@ const LOW_RISK_PATTERNS = [
|
|
|
28
28
|
/**
|
|
29
29
|
* Classify a tool name into a risk tier.
|
|
30
30
|
*
|
|
31
|
-
* Priority: user overrides >
|
|
31
|
+
* Priority: user overrides > high-risk patterns > low-risk patterns > default high.
|
|
32
|
+
* High-risk checked FIRST to prevent bypass via naming (e.g., "get_and_delete_user").
|
|
32
33
|
* Unknown tools default to high-risk (safe by default).
|
|
33
34
|
*/
|
|
34
35
|
export function classifyTool(toolName: string, overrides?: RiskTierMap): RiskTier {
|
|
@@ -36,14 +37,15 @@ export function classifyTool(toolName: string, overrides?: RiskTierMap): RiskTie
|
|
|
36
37
|
return overrides[toolName];
|
|
37
38
|
}
|
|
38
39
|
|
|
39
|
-
|
|
40
|
-
if (pattern.test(toolName)) return "low";
|
|
41
|
-
}
|
|
42
|
-
|
|
40
|
+
// Check high-risk FIRST to prevent bypass via compound names
|
|
43
41
|
for (const pattern of HIGH_RISK_PATTERNS) {
|
|
44
42
|
if (pattern.test(toolName)) return "high";
|
|
45
43
|
}
|
|
46
44
|
|
|
45
|
+
for (const pattern of LOW_RISK_PATTERNS) {
|
|
46
|
+
if (pattern.test(toolName)) return "low";
|
|
47
|
+
}
|
|
48
|
+
|
|
47
49
|
// Unknown tools default to high-risk (safe by default)
|
|
48
50
|
return "high";
|
|
49
51
|
}
|