clementine-agent 1.1.4 → 1.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/assistant.js +6 -2
- package/dist/agent/metacognition.d.ts +15 -0
- package/dist/agent/metacognition.js +28 -22
- package/dist/agent/stall-guard.d.ts +10 -2
- package/dist/agent/stall-guard.js +11 -2
- package/dist/cli/index.js +12 -23
- package/dist/gateway/notifications.js +12 -3
- package/dist/memory/store.js +14 -1
- package/dist/security/redact.d.ts +52 -0
- package/dist/security/redact.js +105 -0
- package/package.json +1 -1
package/dist/agent/assistant.js
CHANGED
|
@@ -3789,7 +3789,10 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
|
|
|
3789
3789
|
const cronProfile = agentSlug && agentSlug !== 'clementine'
|
|
3790
3790
|
? this.profileManager.get(agentSlug)
|
|
3791
3791
|
: null;
|
|
3792
|
-
|
|
3792
|
+
// Cron jobs deliver via side effects (sent emails, updated records, etc),
|
|
3793
|
+
// not chat text — pass mode='cron' so high_effort_low_output guard is
|
|
3794
|
+
// disabled. Loop detection and circular-reasoning checks stay active.
|
|
3795
|
+
const cronGuard = new StallGuard('cron');
|
|
3793
3796
|
const sdkOptions = this.buildOptions({
|
|
3794
3797
|
isHeartbeat: true,
|
|
3795
3798
|
cronTier: tier,
|
|
@@ -4271,7 +4274,8 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
|
|
|
4271
4274
|
logger.info(`Unleashed task ${jobName}: starting phase ${phase}`);
|
|
4272
4275
|
// Re-assert autonomous source — a chat message may have changed it between phases
|
|
4273
4276
|
setInteractionSource('autonomous');
|
|
4274
|
-
|
|
4277
|
+
// Unleashed phases run side-effect-heavy work; same logic as cron mode.
|
|
4278
|
+
const phaseGuard = new StallGuard('unleashed');
|
|
4275
4279
|
const sdkOptions = this.buildOptions({
|
|
4276
4280
|
isHeartbeat: true,
|
|
4277
4281
|
cronTier: tier,
|
|
@@ -28,7 +28,21 @@ export interface MetacognitiveSummary {
|
|
|
28
28
|
confidenceFinal: 'high' | 'medium' | 'low';
|
|
29
29
|
signals: string[];
|
|
30
30
|
}
|
|
31
|
+
/**
|
|
32
|
+
* Execution mode the monitor is observing. Chat sessions deliver via output
|
|
33
|
+
* text, so "many tool calls + zero output" is genuinely suspicious. Cron
|
|
34
|
+
* jobs (especially unleashed) deliver via side effects (sent emails, updated
|
|
35
|
+
* records, written files) — chat-text length is NOT the success signal, so
|
|
36
|
+
* the high_effort_low_output heuristic must be disabled or it produces
|
|
37
|
+
* 100+ false-positive interventions per run (observed 2026-04-26 on
|
|
38
|
+
* market-leader-followup which sent 17 real emails while this guard fired
|
|
39
|
+
* 169 times). Other heuristics (circular_reasoning via repeated identical
|
|
40
|
+
* tool calls, research_without_action via consecutive reads) stay active —
|
|
41
|
+
* those are real bug shapes regardless of mode.
|
|
42
|
+
*/
|
|
43
|
+
export type MetacognitiveMode = 'chat' | 'cron' | 'unleashed';
|
|
31
44
|
export declare class MetacognitiveMonitor {
|
|
45
|
+
private readonly mode;
|
|
32
46
|
private toolCalls;
|
|
33
47
|
private uniqueTools;
|
|
34
48
|
private consecutiveReads;
|
|
@@ -37,6 +51,7 @@ export declare class MetacognitiveMonitor {
|
|
|
37
51
|
private interventionCount;
|
|
38
52
|
private signals;
|
|
39
53
|
private confidence;
|
|
54
|
+
constructor(mode?: MetacognitiveMode);
|
|
40
55
|
/**
|
|
41
56
|
* Record a tool call. Returns a signal if the pattern is concerning.
|
|
42
57
|
*/
|
|
@@ -25,8 +25,8 @@ const ACTION_TOOLS = new Set([
|
|
|
25
25
|
'team_message', 'discord_channel_send', 'outlook_draft', 'outlook_send',
|
|
26
26
|
'set_timer', 'self_restart', 'feedback_log', 'teach_skill', 'create_tool',
|
|
27
27
|
]);
|
|
28
|
-
// ── MetacognitiveMonitor ────────────────────────────────────────────
|
|
29
28
|
export class MetacognitiveMonitor {
|
|
29
|
+
mode;
|
|
30
30
|
toolCalls = [];
|
|
31
31
|
uniqueTools = new Set();
|
|
32
32
|
consecutiveReads = 0;
|
|
@@ -35,6 +35,9 @@ export class MetacognitiveMonitor {
|
|
|
35
35
|
interventionCount = 0;
|
|
36
36
|
signals = [];
|
|
37
37
|
confidence = 'high';
|
|
38
|
+
constructor(mode = 'chat') {
|
|
39
|
+
this.mode = mode;
|
|
40
|
+
}
|
|
38
41
|
/**
|
|
39
42
|
* Record a tool call. Returns a signal if the pattern is concerning.
|
|
40
43
|
*/
|
|
@@ -95,31 +98,34 @@ export class MetacognitiveMonitor {
|
|
|
95
98
|
return signal;
|
|
96
99
|
}
|
|
97
100
|
// Signal: excessive tool calls with near-zero output.
|
|
98
|
-
//
|
|
99
|
-
//
|
|
100
|
-
//
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
if (
|
|
104
|
-
this.
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
reason: 'high_effort_low_output',
|
|
110
|
-
guidance: `You've made ${this.toolCalls.length} tool calls across ${this.uniqueTools.size} tools with only ${this.outputCharCount} chars of output. This is a runaway loop. Stopping now to prevent budget waste.`,
|
|
111
|
-
};
|
|
112
|
-
}
|
|
113
|
-
if (this.toolCalls.length > 20 && this.outputCharCount < 200) {
|
|
114
|
-
this.confidence = 'low';
|
|
115
|
-
if (!this.signals.includes('high_effort_low_output')) {
|
|
116
|
-
this.signals.push('high_effort_low_output');
|
|
101
|
+
// Chat scenarios deliver via output text, so this is meaningful there.
|
|
102
|
+
// Cron and unleashed scenarios deliver via side effects (emails sent,
|
|
103
|
+
// records updated, files written) — chat-text length is irrelevant.
|
|
104
|
+
// Skip entirely outside chat mode.
|
|
105
|
+
if (this.mode === 'chat') {
|
|
106
|
+
if (this.toolCalls.length >= 60 && this.outputCharCount < 200) {
|
|
107
|
+
this.confidence = 'low';
|
|
108
|
+
if (!this.signals.includes('high_effort_low_output')) {
|
|
109
|
+
this.signals.push('high_effort_low_output');
|
|
110
|
+
}
|
|
111
|
+
this.interventionCount++;
|
|
117
112
|
return {
|
|
118
|
-
type: '
|
|
113
|
+
type: 'intervene',
|
|
119
114
|
reason: 'high_effort_low_output',
|
|
120
|
-
guidance:
|
|
115
|
+
guidance: `You've made ${this.toolCalls.length} tool calls across ${this.uniqueTools.size} tools with only ${this.outputCharCount} chars of output. This is a runaway loop. Stopping now to prevent budget waste.`,
|
|
121
116
|
};
|
|
122
117
|
}
|
|
118
|
+
if (this.toolCalls.length > 20 && this.outputCharCount < 200) {
|
|
119
|
+
this.confidence = 'low';
|
|
120
|
+
if (!this.signals.includes('high_effort_low_output')) {
|
|
121
|
+
this.signals.push('high_effort_low_output');
|
|
122
|
+
return {
|
|
123
|
+
type: 'warn',
|
|
124
|
+
reason: 'high_effort_low_output',
|
|
125
|
+
guidance: 'You\'ve made 20+ tool calls with minimal output. Step back and simplify your approach.',
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
}
|
|
123
129
|
}
|
|
124
130
|
return { type: 'ok' };
|
|
125
131
|
}
|
|
@@ -11,7 +11,8 @@
|
|
|
11
11
|
* 3. recordToolCall() called for each tool_use block in the stream
|
|
12
12
|
* 4. After query: detectPromiseWithoutAction() + getSummary() for cross-query nudges
|
|
13
13
|
*/
|
|
14
|
-
import { type MetacognitiveSignal, type MetacognitiveSummary } from './metacognition.js';
|
|
14
|
+
import { type MetacognitiveMode, type MetacognitiveSignal, type MetacognitiveSummary } from './metacognition.js';
|
|
15
|
+
export type StallGuardMode = MetacognitiveMode;
|
|
15
16
|
export interface StallSummary {
|
|
16
17
|
metacognition: MetacognitiveSummary;
|
|
17
18
|
breakerActivated: boolean;
|
|
@@ -20,10 +21,17 @@ export interface StallSummary {
|
|
|
20
21
|
}
|
|
21
22
|
export declare class StallGuard {
|
|
22
23
|
private loopDetector;
|
|
23
|
-
private metacog;
|
|
24
|
+
private readonly metacog;
|
|
24
25
|
private breakerActive;
|
|
25
26
|
private breakerReason;
|
|
26
27
|
private toolCallLog;
|
|
28
|
+
/**
|
|
29
|
+
* @param mode 'chat' (default) keeps full output-text-driven heuristics.
|
|
30
|
+
* 'cron' / 'unleashed' disable the high_effort_low_output check
|
|
31
|
+
* since side effects, not chat text, are the deliverable for
|
|
32
|
+
* those execution contexts.
|
|
33
|
+
*/
|
|
34
|
+
constructor(mode?: StallGuardMode);
|
|
27
35
|
/**
|
|
28
36
|
* Check if a tool should be blocked. Called from canUseTool.
|
|
29
37
|
* When the breaker is active, denies read-only tools to force the agent
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
* 4. After query: detectPromiseWithoutAction() + getSummary() for cross-query nudges
|
|
13
13
|
*/
|
|
14
14
|
import { ToolLoopDetector } from './tool-loop-detector.js';
|
|
15
|
-
import { MetacognitiveMonitor } from './metacognition.js';
|
|
15
|
+
import { MetacognitiveMonitor, } from './metacognition.js';
|
|
16
16
|
import pino from 'pino';
|
|
17
17
|
const logger = pino({ name: 'clementine.stall-guard' });
|
|
18
18
|
// Only block SDK read tools — MCP tools (memory_read, etc.) are intentionally
|
|
@@ -21,10 +21,19 @@ const READ_ONLY_TOOLS = new Set(['Read', 'Glob', 'Grep', 'WebSearch', 'WebFetch'
|
|
|
21
21
|
// ── StallGuard ──────────────────────────────────────────────────────
|
|
22
22
|
export class StallGuard {
|
|
23
23
|
loopDetector = new ToolLoopDetector();
|
|
24
|
-
metacog
|
|
24
|
+
metacog;
|
|
25
25
|
breakerActive = false;
|
|
26
26
|
breakerReason = '';
|
|
27
27
|
toolCallLog = [];
|
|
28
|
+
/**
|
|
29
|
+
* @param mode 'chat' (default) keeps full output-text-driven heuristics.
|
|
30
|
+
* 'cron' / 'unleashed' disable the high_effort_low_output check
|
|
31
|
+
* since side effects, not chat text, are the deliverable for
|
|
32
|
+
* those execution contexts.
|
|
33
|
+
*/
|
|
34
|
+
constructor(mode = 'chat') {
|
|
35
|
+
this.metacog = new MetacognitiveMonitor(mode);
|
|
36
|
+
}
|
|
28
37
|
/**
|
|
29
38
|
* Check if a tool should be blocked. Called from canUseTool.
|
|
30
39
|
* When the breaker is active, denies read-only tools to force the agent
|
package/dist/cli/index.js
CHANGED
|
@@ -1227,19 +1227,13 @@ async function cmdConfigKeychainFixAcl(opts) {
|
|
|
1227
1227
|
const RED = '\x1b[0;31m';
|
|
1228
1228
|
const RESET = '\x1b[0m';
|
|
1229
1229
|
const entries = listClementineKeychainEntries();
|
|
1230
|
-
const ours = entries.filter(e => e.isClementine);
|
|
1231
|
-
const foreign = entries.filter(e => !e.isClementine);
|
|
1232
1230
|
console.log();
|
|
1233
|
-
console.log(` ${BOLD}Found ${entries.length} entr${entries.length === 1 ? 'y' : 'ies'}
|
|
1234
|
-
|
|
1231
|
+
console.log(` ${BOLD}Found ${entries.length} clementine-agent keychain entr${entries.length === 1 ? 'y' : 'ies'}.${RESET}`);
|
|
1232
|
+
for (const e of entries)
|
|
1233
|
+
console.log(` ${DIM}${e.account}${RESET}`);
|
|
1235
1234
|
console.log();
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
console.log(` [${tag}] ${e.service}/${e.account}`);
|
|
1239
|
-
}
|
|
1240
|
-
console.log();
|
|
1241
|
-
if (ours.length === 0) {
|
|
1242
|
-
console.log(` ${GREEN}Nothing Clementine-shaped to fix.${RESET}`);
|
|
1235
|
+
if (entries.length === 0) {
|
|
1236
|
+
console.log(` ${GREEN}Nothing to fix.${RESET}`);
|
|
1243
1237
|
console.log();
|
|
1244
1238
|
return;
|
|
1245
1239
|
}
|
|
@@ -1250,34 +1244,29 @@ async function cmdConfigKeychainFixAcl(opts) {
|
|
|
1250
1244
|
}
|
|
1251
1245
|
console.log(` ${BOLD}Fixing ACLs...${RESET}`);
|
|
1252
1246
|
console.log(` ${DIM}macOS may ask for your login keychain password (the system prompt — it DOES appear).${RESET}`);
|
|
1253
|
-
console.log(` ${DIM}
|
|
1247
|
+
console.log(` ${DIM}You may also be asked to "Always Allow" — pick that.${RESET}`);
|
|
1254
1248
|
console.log();
|
|
1255
1249
|
const results = fixAllClementineEntries();
|
|
1256
1250
|
let okCount = 0;
|
|
1257
1251
|
let failCount = 0;
|
|
1258
|
-
let skipCount = 0;
|
|
1259
1252
|
for (const r of results) {
|
|
1260
1253
|
if (r.status === 'fixed') {
|
|
1261
|
-
console.log(` ${GREEN}✓${RESET} ${r.
|
|
1254
|
+
console.log(` ${GREEN}✓${RESET} ${r.account}`);
|
|
1262
1255
|
okCount++;
|
|
1263
1256
|
}
|
|
1264
|
-
else if (r.status === 'skipped-foreign') {
|
|
1265
|
-
skipCount++;
|
|
1266
|
-
}
|
|
1267
1257
|
else {
|
|
1268
|
-
console.log(` ${RED}✗${RESET} ${r.
|
|
1258
|
+
console.log(` ${RED}✗${RESET} ${r.account} ${DIM}— ${r.error}${RESET}`);
|
|
1269
1259
|
failCount++;
|
|
1270
1260
|
}
|
|
1271
1261
|
}
|
|
1272
1262
|
console.log();
|
|
1273
1263
|
if (failCount === 0) {
|
|
1274
|
-
console.log(` ${GREEN}All ${okCount}
|
|
1275
|
-
if (skipCount > 0)
|
|
1276
|
-
console.log(` ${DIM}(${skipCount} foreign entr${skipCount === 1 ? 'y' : 'ies'} left untouched.)${RESET}`);
|
|
1264
|
+
console.log(` ${GREEN}All ${okCount} entries fixed.${RESET} ${DIM}Future reads via the security CLI succeed silently.${RESET}`);
|
|
1277
1265
|
}
|
|
1278
1266
|
else {
|
|
1279
|
-
console.log(` ${YELLOW}${okCount} fixed, ${failCount} failed
|
|
1280
|
-
console.log(` ${DIM}Failed entries can be fixed manually in Keychain Access.app
|
|
1267
|
+
console.log(` ${YELLOW}${okCount} fixed, ${failCount} failed.${RESET}`);
|
|
1268
|
+
console.log(` ${DIM}Failed entries can be fixed manually in Keychain Access.app:${RESET}`);
|
|
1269
|
+
console.log(` ${DIM} search "clementine-agent" → double-click → Access Control → Allow all applications.${RESET}`);
|
|
1281
1270
|
}
|
|
1282
1271
|
console.log();
|
|
1283
1272
|
}
|
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
*/
|
|
8
8
|
import pino from 'pino';
|
|
9
9
|
import { DeliveryQueue } from './delivery-queue.js';
|
|
10
|
+
import { redactSecrets } from '../security/redact.js';
|
|
10
11
|
const logger = pino({ name: 'clementine.notifications' });
|
|
11
12
|
/** Safety cap — prevent runaway messages, but each channel handles its own chunking/limits. */
|
|
12
13
|
const MAX_MESSAGE_LENGTH = 8000;
|
|
@@ -62,10 +63,18 @@ export class NotificationDispatcher {
|
|
|
62
63
|
logger.warn('No notification senders registered — message dropped');
|
|
63
64
|
return { delivered: false, channelErrors: { _: 'no channels registered' } };
|
|
64
65
|
}
|
|
66
|
+
// Outbound credential redaction — last-line defense against the agent
|
|
67
|
+
// accidentally (or via prompt injection) shipping a credential to a
|
|
68
|
+
// public channel. Pattern-based + known-value scan; cheap enough to
|
|
69
|
+
// run on every send. See src/security/redact.ts for the policy.
|
|
70
|
+
const { text: redacted, stats: redactionStats } = redactSecrets(text);
|
|
71
|
+
if (redactionStats.redactionCount > 0) {
|
|
72
|
+
logger.warn({ count: redactionStats.redactionCount, labels: redactionStats.labelsHit, sessionKey: context?.sessionKey }, `Redacted ${redactionStats.redactionCount} credential-shaped value(s) before delivery`);
|
|
73
|
+
}
|
|
65
74
|
// Sanity cap only — each channel sender handles its own chunking/truncation
|
|
66
|
-
const capped =
|
|
67
|
-
?
|
|
68
|
-
:
|
|
75
|
+
const capped = redacted.length > MAX_MESSAGE_LENGTH
|
|
76
|
+
? redacted.slice(0, MAX_MESSAGE_LENGTH - 20) + '\n\n_(truncated)_'
|
|
77
|
+
: redacted;
|
|
69
78
|
// If sessionKey is set, route only to the channel that owns it.
|
|
70
79
|
// Fan out to all channels only when no originating channel is known.
|
|
71
80
|
const targetChannel = context?.sessionKey ? channelForSessionKey(context.sessionKey) : null;
|
package/dist/memory/store.js
CHANGED
|
@@ -1023,8 +1023,10 @@ export class MemoryStore {
|
|
|
1023
1023
|
const tagFilters = (category || topic) ? { category, topic } : undefined;
|
|
1024
1024
|
// 1. FTS5 relevance (fetch extra to allow re-ranking after boost)
|
|
1025
1025
|
const ftsResults = this.searchFts(query, agentSlug ? limit * 2 : limit, tagFilters, agentSlug && strict ? agentSlug : undefined);
|
|
1026
|
-
// Apply
|
|
1026
|
+
// Apply boosts. Order doesn't matter (all multiplicative) but readability does.
|
|
1027
|
+
const nowMs = Date.now();
|
|
1027
1028
|
for (const r of ftsResults) {
|
|
1029
|
+
// Salience: editor-curated importance (admin tag, sticky note, etc.)
|
|
1028
1030
|
if (r.salience > 0) {
|
|
1029
1031
|
r.score *= 1.0 + r.salience;
|
|
1030
1032
|
}
|
|
@@ -1036,6 +1038,17 @@ export class MemoryStore {
|
|
|
1036
1038
|
if (outcome !== 0) {
|
|
1037
1039
|
r.score *= 1.0 + 0.3 * outcome;
|
|
1038
1040
|
}
|
|
1041
|
+
// Temporal decay — without this, a 2-year-old chunk with the same BM25
|
|
1042
|
+
// score ranks identically to one from yesterday. Half-life of 30 days
|
|
1043
|
+
// (matches TEMPORAL_DECAY_HALF_LIFE_DAYS in config). Applied to a
|
|
1044
|
+
// bounded fraction (max 60% reduction) so genuinely high-relevance
|
|
1045
|
+
// historical context still surfaces — this is a tiebreaker, not a cliff.
|
|
1046
|
+
if (r.lastUpdated) {
|
|
1047
|
+
const daysOld = Math.max(0, (nowMs - new Date(r.lastUpdated).getTime()) / 86_400_000);
|
|
1048
|
+
const decay = temporalDecay(daysOld, 30);
|
|
1049
|
+
// Clamp to [0.4, 1.0] so very old chunks lose at most 60% of their score.
|
|
1050
|
+
r.score *= Math.max(0.4, decay);
|
|
1051
|
+
}
|
|
1039
1052
|
}
|
|
1040
1053
|
// Soft-isolation: apply agent affinity boost when not strict
|
|
1041
1054
|
if (agentSlug && !strict) {
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Outbound credential redaction.
|
|
3
|
+
*
|
|
4
|
+
* Last-line defense against prompt-injection exfil: any outbound text
|
|
5
|
+
* (Discord, Slack, email, dashboard chat) gets scanned for credential
|
|
6
|
+
* shapes BEFORE delivery. Matches are replaced with [REDACTED:reason]
|
|
7
|
+
* so the recipient sees that something was stripped without seeing the
|
|
8
|
+
* value itself.
|
|
9
|
+
*
|
|
10
|
+
* Two layers:
|
|
11
|
+
* 1. Pattern-based — well-known token formats from common providers
|
|
12
|
+
* (Stripe, Anthropic, OpenAI, GitHub, Slack, AWS, Discord). These
|
|
13
|
+
* catch credentials whose values we don't know in advance — including
|
|
14
|
+
* ones the agent might have just learned about from external sources.
|
|
15
|
+
* 2. Known-value — exact-match against the live values of credential-
|
|
16
|
+
* shaped keys in process.env / .env. Caught even if the format
|
|
17
|
+
* doesn't match a known pattern (e.g. internal API keys, custom
|
|
18
|
+
* webhook secrets).
|
|
19
|
+
*
|
|
20
|
+
* Designed to be cheap (single pass over each pattern + known-value set)
|
|
21
|
+
* so we can run on every outbound message without measurable latency.
|
|
22
|
+
*
|
|
23
|
+
* Designed to err on the side of REDACTING. False positives (a chunk of
|
|
24
|
+
* text that happens to look like a Stripe key) just produce a [REDACTED]
|
|
25
|
+
* marker; the recipient knows to ask. False negatives (a real credential
|
|
26
|
+
* leaked) are the bug we're trying to prevent.
|
|
27
|
+
*/
|
|
28
|
+
export interface RedactionStats {
|
|
29
|
+
redactionCount: number;
|
|
30
|
+
/** Labels that fired, deduped. Useful for audit logging. */
|
|
31
|
+
labelsHit: string[];
|
|
32
|
+
}
|
|
33
|
+
export interface RedactionResult {
|
|
34
|
+
text: string;
|
|
35
|
+
stats: RedactionStats;
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Pull credential values from process.env for any key that looks sensitive
|
|
39
|
+
* (matches isSensitiveEnvKey). Used to build the known-value redaction set
|
|
40
|
+
* lazily — re-read on each call so a freshly-set credential is covered
|
|
41
|
+
* within one tick.
|
|
42
|
+
*/
|
|
43
|
+
export declare function buildKnownValueSet(env?: NodeJS.ProcessEnv): Set<string>;
|
|
44
|
+
/**
|
|
45
|
+
* Run all redaction layers against a string. Returns the redacted text
|
|
46
|
+
* plus stats about what fired.
|
|
47
|
+
*
|
|
48
|
+
* `knownValues` defaults to a fresh process.env scan but tests pass an
|
|
49
|
+
* explicit set for hermetic coverage.
|
|
50
|
+
*/
|
|
51
|
+
export declare function redactSecrets(text: string, knownValues?: Set<string>): RedactionResult;
|
|
52
|
+
//# sourceMappingURL=redact.d.ts.map
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Outbound credential redaction.
|
|
3
|
+
*
|
|
4
|
+
* Last-line defense against prompt-injection exfil: any outbound text
|
|
5
|
+
* (Discord, Slack, email, dashboard chat) gets scanned for credential
|
|
6
|
+
* shapes BEFORE delivery. Matches are replaced with [REDACTED:reason]
|
|
7
|
+
* so the recipient sees that something was stripped without seeing the
|
|
8
|
+
* value itself.
|
|
9
|
+
*
|
|
10
|
+
* Two layers:
|
|
11
|
+
* 1. Pattern-based — well-known token formats from common providers
|
|
12
|
+
* (Stripe, Anthropic, OpenAI, GitHub, Slack, AWS, Discord). These
|
|
13
|
+
* catch credentials whose values we don't know in advance — including
|
|
14
|
+
* ones the agent might have just learned about from external sources.
|
|
15
|
+
* 2. Known-value — exact-match against the live values of credential-
|
|
16
|
+
* shaped keys in process.env / .env. Caught even if the format
|
|
17
|
+
* doesn't match a known pattern (e.g. internal API keys, custom
|
|
18
|
+
* webhook secrets).
|
|
19
|
+
*
|
|
20
|
+
* Designed to be cheap (single pass over each pattern + known-value set)
|
|
21
|
+
* so we can run on every outbound message without measurable latency.
|
|
22
|
+
*
|
|
23
|
+
* Designed to err on the side of REDACTING. False positives (a chunk of
|
|
24
|
+
* text that happens to look like a Stripe key) just produce a [REDACTED]
|
|
25
|
+
* marker; the recipient knows to ask. False negatives (a real credential
|
|
26
|
+
* leaked) are the bug we're trying to prevent.
|
|
27
|
+
*/
|
|
28
|
+
import { isSensitiveEnvKey } from '../secrets/sensitivity.js';
|
|
29
|
+
// pragma: allowlist secret (this module exists to recognize secret patterns)
|
|
30
|
+
const PATTERNS = [
|
|
31
|
+
{ label: 'stripe', re: /\bsk_(?:live|test)_[A-Za-z0-9]{16,}\b/g },
|
|
32
|
+
{ label: 'anthropic', re: /\bsk-ant-(?:api|admin)\w*-[A-Za-z0-9_-]{16,}\b/g },
|
|
33
|
+
{ label: 'openai-project', re: /\bsk-proj-[A-Za-z0-9_-]{20,}\b/g },
|
|
34
|
+
{ label: 'openai', re: /\bsk-[A-Za-z0-9]{40,}\b/g },
|
|
35
|
+
{ label: 'github', re: /\b(?:ghp|gho|ghu|ghs|ghr)_[A-Za-z0-9]{30,}\b/g },
|
|
36
|
+
{ label: 'slack', re: /\bxox[abpors]-[A-Za-z0-9-]{10,}\b/g },
|
|
37
|
+
{ label: 'aws-access', re: /\b(?:AKIA|ASIA)[0-9A-Z]{16}\b/g },
|
|
38
|
+
{ label: 'discord', re: /\b[A-Za-z0-9_-]{24,28}\.[A-Za-z0-9_-]{6,7}\.[A-Za-z0-9_-]{27,38}\b/g },
|
|
39
|
+
{ label: 'jwt', re: /\beyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\b/g },
|
|
40
|
+
{ label: 'private-key', re: /-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----[\s\S]+?-----END (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----/g },
|
|
41
|
+
];
|
|
42
|
+
/**
|
|
43
|
+
* Pull credential values from process.env for any key that looks sensitive
|
|
44
|
+
* (matches isSensitiveEnvKey). Used to build the known-value redaction set
|
|
45
|
+
* lazily — re-read on each call so a freshly-set credential is covered
|
|
46
|
+
* within one tick.
|
|
47
|
+
*/
|
|
48
|
+
export function buildKnownValueSet(env = process.env) {
|
|
49
|
+
const out = new Set();
|
|
50
|
+
for (const [key, value] of Object.entries(env)) {
|
|
51
|
+
if (!value)
|
|
52
|
+
continue;
|
|
53
|
+
if (value.length < 12)
|
|
54
|
+
continue; // short values likely false positives
|
|
55
|
+
if (value.startsWith('keychain:'))
|
|
56
|
+
continue; // reference, not the secret itself
|
|
57
|
+
if (!isSensitiveEnvKey(key))
|
|
58
|
+
continue;
|
|
59
|
+
out.add(value);
|
|
60
|
+
}
|
|
61
|
+
return out;
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Run all redaction layers against a string. Returns the redacted text
|
|
65
|
+
* plus stats about what fired.
|
|
66
|
+
*
|
|
67
|
+
* `knownValues` defaults to a fresh process.env scan but tests pass an
|
|
68
|
+
* explicit set for hermetic coverage.
|
|
69
|
+
*/
|
|
70
|
+
export function redactSecrets(text, knownValues = buildKnownValueSet()) {
|
|
71
|
+
if (!text)
|
|
72
|
+
return { text, stats: { redactionCount: 0, labelsHit: [] } };
|
|
73
|
+
let working = text;
|
|
74
|
+
const labelsHit = new Set();
|
|
75
|
+
let count = 0;
|
|
76
|
+
// Pattern pass first — catches well-known formats whose values we may
|
|
77
|
+
// not know in advance.
|
|
78
|
+
for (const { label, re } of PATTERNS) {
|
|
79
|
+
working = working.replace(re, () => {
|
|
80
|
+
labelsHit.add(label);
|
|
81
|
+
count++;
|
|
82
|
+
return `[REDACTED:${label}]`;
|
|
83
|
+
});
|
|
84
|
+
}
|
|
85
|
+
// Known-value pass — exact-match every credential currently loaded into
|
|
86
|
+
// process.env. Sort by length descending so longer values get replaced
|
|
87
|
+
// first (a longer secret might contain a shorter one as substring).
|
|
88
|
+
const sortedValues = [...knownValues].sort((a, b) => b.length - a.length);
|
|
89
|
+
for (const v of sortedValues) {
|
|
90
|
+
if (!v || v.length < 12)
|
|
91
|
+
continue;
|
|
92
|
+
let idx = working.indexOf(v);
|
|
93
|
+
while (idx !== -1) {
|
|
94
|
+
working = working.slice(0, idx) + '[REDACTED:env]' + working.slice(idx + v.length);
|
|
95
|
+
labelsHit.add('env');
|
|
96
|
+
count++;
|
|
97
|
+
idx = working.indexOf(v, idx + '[REDACTED:env]'.length);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
return {
|
|
101
|
+
text: working,
|
|
102
|
+
stats: { redactionCount: count, labelsHit: [...labelsHit] },
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
//# sourceMappingURL=redact.js.map
|