@contextfort-ai/openclaw-secure 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,849 @@
1
+ 'use strict';
2
+
3
+ const path = require('path');
4
+ const fs = require('fs');
5
+ const os = require('os');
6
+
7
+ const HOME = os.homedir();
8
+ const CACHE_FILE_NAME = '.secrets_scan_cache.json';
9
+
10
+ /**
11
+ * Secrets Guard — scans all directories OpenClaw can read for hardcoded secrets
12
+ * using TruffleHog. Runs on init and on-demand via `openclaw-secure scan`.
13
+ *
14
+ * If hardcoded secrets exist, the LLM could read them directly from files.
15
+ * This guard ensures secrets only exist as $ENV_VAR references, which the
16
+ * bash guard monitors at runtime.
17
+ */
18
+ module.exports = function createSecretsGuard({ spawnSync, baseDir, analytics }) {
19
+ const track = analytics ? analytics.track.bind(analytics) : () => {};
20
+ const CACHE_FILE = path.join(baseDir, 'monitor', CACHE_FILE_NAME);
21
+
22
+ // Cached scan results: { path: string, findings: array, scannedAt: string }[]
23
+ let lastScanResults = [];
24
+ let trufflehogAvailable = null; // null = unknown, true/false after check
25
+
26
+ /**
27
+ * Check if trufflehog binary is available on the system
28
+ */
29
+ function isTrufflehogInstalled() {
30
+ if (trufflehogAvailable !== null) return trufflehogAvailable;
31
+ try {
32
+ const result = spawnSync('trufflehog', ['--version'], {
33
+ encoding: 'utf8',
34
+ timeout: 10000,
35
+ stdio: ['pipe', 'pipe', 'pipe'],
36
+ });
37
+ trufflehogAvailable = result.status === 0;
38
+ } catch {
39
+ trufflehogAvailable = false;
40
+ }
41
+ return trufflehogAvailable;
42
+ }
43
+
44
+ /**
45
+ * Get all directories that OpenClaw has read access to.
46
+ * These are the attack surface for hardcoded secrets.
47
+ */
48
+ function getScanTargets(cwd) {
49
+ const targets = [];
50
+
51
+ // 1. Current working directory (project dir)
52
+ if (cwd) {
53
+ targets.push({ path: cwd, label: 'Project directory' });
54
+ }
55
+
56
+ // 2. OpenClaw config dirs
57
+ const openclawDir = path.join(HOME, '.openclaw');
58
+ if (dirExists(openclawDir)) {
59
+ targets.push({ path: openclawDir, label: 'OpenClaw config (~/.openclaw/)' });
60
+ }
61
+
62
+ // 3. Claude config dirs
63
+ const claudeDir = path.join(HOME, '.claude');
64
+ if (dirExists(claudeDir)) {
65
+ targets.push({ path: claudeDir, label: 'Claude config (~/.claude/)' });
66
+ }
67
+
68
+ // 4. Legacy config dirs
69
+ const legacyDirs = [
70
+ path.join(HOME, '.config', 'openclaw'),
71
+ path.join(HOME, '.config', 'claude'),
72
+ ];
73
+ for (const d of legacyDirs) {
74
+ if (dirExists(d)) {
75
+ targets.push({ path: d, label: `Legacy config (${d.replace(HOME, '~')})` });
76
+ }
77
+ }
78
+
79
+ return targets;
80
+ }
81
+
82
+ function dirExists(d) {
83
+ try { return fs.statSync(d).isDirectory(); } catch { return false; }
84
+ }
85
+
86
+ /**
87
+ * Run trufflehog on a single directory. Returns array of findings.
88
+ * Uses --only-verified to reduce noise — only live secrets matter.
89
+ */
90
+ function scanDirectory(dirPath, onlyVerified = true) {
91
+ const args = ['filesystem', dirPath, '--json'];
92
+ if (onlyVerified) args.push('--only-verified');
93
+
94
+ // Exclude common noisy dirs
95
+ args.push('--exclude-paths');
96
+ const excludeFile = path.join(baseDir, 'monitor', 'secrets_guard', '.trufflehog-exclude');
97
+ ensureExcludeFile(excludeFile);
98
+ args.push(excludeFile);
99
+
100
+ try {
101
+ const result = spawnSync('trufflehog', args, {
102
+ encoding: 'utf8',
103
+ timeout: 120000, // 2 minutes per directory
104
+ maxBuffer: 10 * 1024 * 1024, // 10MB
105
+ stdio: ['pipe', 'pipe', 'pipe'],
106
+ });
107
+
108
+ if (result.error) return [];
109
+
110
+ const stdout = (result.stdout || '').trim();
111
+ if (!stdout) return [];
112
+
113
+ // TruffleHog outputs one JSON object per line (NDJSON)
114
+ const findings = [];
115
+ for (const line of stdout.split('\n')) {
116
+ const trimmed = line.trim();
117
+ if (!trimmed) continue;
118
+ try {
119
+ const finding = JSON.parse(trimmed);
120
+ findings.push(normalizeFinding(finding));
121
+ } catch {}
122
+ }
123
+
124
+ return findings;
125
+ } catch {
126
+ return [];
127
+ }
128
+ }
129
+
130
+ /**
131
+ * Normalize a TruffleHog finding into a clean, consistent format
132
+ */
133
+ function normalizeFinding(raw) {
134
+ return {
135
+ detectorName: raw.DetectorName || raw.detectorName || 'Unknown',
136
+ verified: raw.Verified === true || raw.verified === true,
137
+ raw: redactSecret(raw.Raw || raw.raw || ''),
138
+ rawFull: raw.Raw || raw.raw || '',
139
+ file: extractFilePath(raw),
140
+ line: raw.SourceMetadata?.Data?.Filesystem?.line || null,
141
+ detectorType: raw.DetectorType || raw.detectorType || null,
142
+ };
143
+ }
144
+
145
+ /**
146
+ * Redact a secret for display — show first 4 and last 4 chars only
147
+ */
148
+ function redactSecret(secret) {
149
+ if (!secret || secret.length < 12) return '****';
150
+ return secret.slice(0, 4) + '...' + secret.slice(-4);
151
+ }
152
+
153
+ /**
154
+ * Extract the file path from a TruffleHog finding
155
+ */
156
+ function extractFilePath(raw) {
157
+ // TruffleHog v3 nests file path under SourceMetadata.Data.Filesystem.file
158
+ const meta = raw.SourceMetadata || raw.sourceMetadata || {};
159
+ const data = meta.Data || meta.data || {};
160
+ const fsData = data.Filesystem || data.filesystem || {};
161
+ return fsData.file || null;
162
+ }
163
+
164
+ /**
165
+ * Create the exclude patterns file for trufflehog
166
+ */
167
+ function ensureExcludeFile(filePath) {
168
+ // TruffleHog --exclude-paths expects regex patterns, one per line
169
+ const patterns = [
170
+ 'node_modules/',
171
+ '\\.git/',
172
+ '__pycache__/',
173
+ '\\.pyc$',
174
+ '\\.next/',
175
+ 'dist/',
176
+ 'build/',
177
+ '\\.venv/',
178
+ 'venv/',
179
+ ].join('\n') + '\n';
180
+
181
+ try {
182
+ // Only write if doesn't exist or different
183
+ let existing = '';
184
+ try { existing = fs.readFileSync(filePath, 'utf8'); } catch {}
185
+ if (existing !== patterns) {
186
+ fs.writeFileSync(filePath, patterns);
187
+ }
188
+ } catch {}
189
+ }
190
+
191
+ /**
192
+ * Run a full scan of all OpenClaw-accessible directories.
193
+ * Returns { targets, findings, summary }.
194
+ */
195
+ function scan(cwd, { onlyVerified = true, verbose = false } = {}) {
196
+ if (!isTrufflehogInstalled()) {
197
+ return {
198
+ error: 'trufflehog is not installed. Install it with: brew install trufflehog',
199
+ targets: [],
200
+ findings: [],
201
+ summary: null,
202
+ };
203
+ }
204
+
205
+ const targets = getScanTargets(cwd);
206
+ const allFindings = [];
207
+
208
+ for (const target of targets) {
209
+ const findings = scanDirectory(target.path, onlyVerified);
210
+ for (const f of findings) {
211
+ f.scanTarget = target.label;
212
+ f.scanTargetPath = target.path;
213
+ }
214
+ allFindings.push(...findings);
215
+ }
216
+
217
+ // Deduplicate by file + detectorName + redacted secret
218
+ const seen = new Set();
219
+ const deduped = [];
220
+ for (const f of allFindings) {
221
+ const key = `${f.file}:${f.detectorName}:${f.raw}`;
222
+ if (!seen.has(key)) {
223
+ seen.add(key);
224
+ deduped.push(f);
225
+ }
226
+ }
227
+
228
+ const verified = deduped.filter(f => f.verified);
229
+ const unverified = deduped.filter(f => !f.verified);
230
+
231
+ const result = {
232
+ error: null,
233
+ targets,
234
+ findings: deduped,
235
+ summary: {
236
+ totalFindings: deduped.length,
237
+ verifiedLive: verified.length,
238
+ unverified: unverified.length,
239
+ targetsScanned: targets.length,
240
+ scannedAt: new Date().toISOString(),
241
+ },
242
+ };
243
+
244
+ // Cache results
245
+ lastScanResults = deduped;
246
+ saveScanCache(result);
247
+ track('secrets_scan_complete', {
248
+ targets_scanned: targets.length,
249
+ total_findings: deduped.length,
250
+ verified_live: verified.length,
251
+ });
252
+
253
+ return result;
254
+ }
255
+
256
+ /**
257
+ * Format scan results for terminal display
258
+ */
259
+ function formatResults(result) {
260
+ if (result.error) {
261
+ return `\n ERROR: ${result.error}\n`;
262
+ }
263
+
264
+ const lines = [];
265
+ lines.push('');
266
+ lines.push(' Scanning areas OpenClaw can access...');
267
+ for (const t of result.targets) {
268
+ lines.push(` \u2713 ${t.label}`);
269
+ }
270
+ lines.push('');
271
+
272
+ const verified = result.findings.filter(f => f.verified);
273
+ const unverified = result.findings.filter(f => !f.verified);
274
+
275
+ if (verified.length > 0) {
276
+ lines.push(` \u26a0 Found ${verified.length} LIVE hardcoded secret${verified.length > 1 ? 's' : ''}:\n`);
277
+ verified.forEach((f, i) => {
278
+ lines.push(` ${i + 1}. ${f.detectorName} (VERIFIED LIVE)`);
279
+ if (f.file) lines.push(` File: ${f.file.replace(HOME, '~')}`);
280
+ lines.push(` Secret: ${f.raw}`);
281
+ lines.push(` Action: Rotate this secret and move to an environment variable`);
282
+ lines.push('');
283
+ });
284
+ }
285
+
286
+ if (unverified.length > 0 && verified.length === 0) {
287
+ lines.push(` Found ${unverified.length} potential secret${unverified.length > 1 ? 's' : ''} (unverified):\n`);
288
+ unverified.forEach((f, i) => {
289
+ lines.push(` ${i + 1}. ${f.detectorName}`);
290
+ if (f.file) lines.push(` File: ${f.file.replace(HOME, '~')}`);
291
+ lines.push(` Secret: ${f.raw}`);
292
+ lines.push('');
293
+ });
294
+ }
295
+
296
+ if (result.findings.length === 0) {
297
+ lines.push(' \u2713 No hardcoded secrets found in OpenClaw-accessible areas.');
298
+ lines.push(' All secrets should be referenced via $ENV_VAR only.');
299
+ }
300
+
301
+ lines.push('');
302
+ return lines.join('\n');
303
+ }
304
+
305
+ /**
306
+ * Save scan cache to disk
307
+ */
308
+ function saveScanCache(result) {
309
+ try {
310
+ const data = {
311
+ scannedAt: result.summary.scannedAt,
312
+ summary: result.summary,
313
+ findings: result.findings.map(f => ({
314
+ detectorName: f.detectorName,
315
+ verified: f.verified,
316
+ raw: f.raw, // redacted already
317
+ file: f.file,
318
+ scanTarget: f.scanTarget,
319
+ })),
320
+ };
321
+ fs.writeFileSync(CACHE_FILE, JSON.stringify(data, null, 2) + '\n');
322
+ } catch {}
323
+ }
324
+
325
+ /**
326
+ * Load last scan results from cache
327
+ */
328
+ function loadScanCache() {
329
+ try {
330
+ const data = JSON.parse(fs.readFileSync(CACHE_FILE, 'utf8'));
331
+ return data;
332
+ } catch {
333
+ return null;
334
+ }
335
+ }
336
+
337
+ // =============================================
338
+ // SOLVE — replace live secrets with dummy values
339
+ // =============================================
340
+
341
+ /**
342
+ * Generate a dummy version of a secret by randomly mutating some characters.
343
+ * Keeps same length and format so the file still looks normal.
344
+ */
345
+ function generateDummy(secret) {
346
+ if (!secret || secret.length < 4) return 'REDACTED_BY_CONTEXTFORT';
347
+ const chars = secret.split('');
348
+ // Mutate ~30% of characters, but skip first 4 (prefix often identifies the key type)
349
+ const startIdx = Math.min(4, chars.length - 1);
350
+ const numToChange = Math.max(3, Math.floor((chars.length - startIdx) * 0.3));
351
+ const indices = [];
352
+ while (indices.length < numToChange) {
353
+ const idx = startIdx + Math.floor(Math.random() * (chars.length - startIdx));
354
+ if (!indices.includes(idx)) indices.push(idx);
355
+ }
356
+ for (const idx of indices) {
357
+ const c = chars[idx];
358
+ if (c >= '0' && c <= '9') {
359
+ chars[idx] = String(Math.floor(Math.random() * 10));
360
+ } else if (c >= 'a' && c <= 'z') {
361
+ chars[idx] = String.fromCharCode(97 + Math.floor(Math.random() * 26));
362
+ } else if (c >= 'A' && c <= 'Z') {
363
+ chars[idx] = String.fromCharCode(65 + Math.floor(Math.random() * 26));
364
+ } else {
365
+ chars[idx] = String.fromCharCode(48 + Math.floor(Math.random() * 10));
366
+ }
367
+ }
368
+ return chars.join('');
369
+ }
370
+
371
+ /**
372
+ * Replace a live secret in a file with a dummy value.
373
+ * Returns { success, file, original (redacted), dummy (redacted) } or { success: false, error }.
374
+ */
375
+ function replaceSecretInFile(filePath, realSecret) {
376
+ try {
377
+ const content = fs.readFileSync(filePath, 'utf8');
378
+ if (!content.includes(realSecret)) {
379
+ return { success: false, file: filePath, error: 'Secret no longer found in file' };
380
+ }
381
+ const dummy = generateDummy(realSecret);
382
+ const updated = content.split(realSecret).join(dummy);
383
+ fs.writeFileSync(filePath, updated);
384
+ return {
385
+ success: true,
386
+ file: filePath,
387
+ original: redactSecret(realSecret),
388
+ dummy: redactSecret(dummy),
389
+ };
390
+ } catch (e) {
391
+ return { success: false, file: filePath, error: e.message };
392
+ }
393
+ }
394
+
395
+ /**
396
+ * Solve: replace all verified live secrets in files with dummy values.
397
+ * Takes scan result findings and replaces each one.
398
+ * Returns array of replacement results.
399
+ */
400
+ function solve(findings) {
401
+ const verified = findings.filter(f => f.verified && f.rawFull && f.file);
402
+ const results = [];
403
+
404
+ // Group by unique secret value to avoid replacing same secret multiple times per file
405
+ const byFile = new Map(); // file → [{ rawFull, detectorName }]
406
+ for (const f of verified) {
407
+ if (!byFile.has(f.file)) byFile.set(f.file, []);
408
+ const existing = byFile.get(f.file);
409
+ if (!existing.some(e => e.rawFull === f.rawFull)) {
410
+ existing.push(f);
411
+ }
412
+ }
413
+
414
+ for (const [filePath, secrets] of byFile) {
415
+ for (const s of secrets) {
416
+ const r = replaceSecretInFile(filePath, s.rawFull);
417
+ r.detectorName = s.detectorName;
418
+ results.push(r);
419
+ }
420
+ }
421
+
422
+ track('secrets_solve', {
423
+ total_replaced: results.filter(r => r.success).length,
424
+ total_failed: results.filter(r => !r.success).length,
425
+ });
426
+
427
+ return results;
428
+ }
429
+
430
+ /**
431
+ * Format solve results for terminal display
432
+ */
433
+ function formatSolveResults(results) {
434
+ const lines = [];
435
+ const succeeded = results.filter(r => r.success);
436
+ const failed = results.filter(r => !r.success);
437
+
438
+ if (succeeded.length > 0) {
439
+ lines.push(`\n Replaced ${succeeded.length} secret${succeeded.length > 1 ? 's' : ''} with dummy values:\n`);
440
+ for (const r of succeeded) {
441
+ lines.push(` \u2713 ${r.file.replace(HOME, '~')}`);
442
+ lines.push(` ${r.detectorName}: ${r.original} -> ${r.dummy}`);
443
+ }
444
+ }
445
+
446
+ if (failed.length > 0) {
447
+ lines.push(`\n Failed to replace ${failed.length}:\n`);
448
+ for (const r of failed) {
449
+ lines.push(` \u2717 ${r.file.replace(HOME, '~')}: ${r.error}`);
450
+ }
451
+ }
452
+
453
+ if (succeeded.length > 0) {
454
+ lines.push('\n These secrets are now invalidated in the files OpenClaw can read.');
455
+ lines.push(' Remember to rotate the REAL secrets in their original services.');
456
+ }
457
+
458
+ lines.push('');
459
+ return lines.join('\n');
460
+ }
461
+
462
+ // =============================================
463
+ // ENV VAR MONITORING — block/log $VAR access
464
+ // =============================================
465
+
466
+ // Patterns that match env var references in shell commands
467
+ // Covers $VAR, ${VAR}, ${VAR:-default}, ${VAR:+alt}, ${VAR:=val}
468
+ // Does NOT match ${!PREFIX_*} (lists names, not values) or ${#VAR} (length)
469
+ const ENV_VAR_PATTERN = /\$([A-Z_][A-Z0-9_]{2,})\b|\$\{([A-Z_][A-Z0-9_]{2,})(?:[:#%\/]|:-|:\+|:=)[^}]*\}|\$\{([A-Z_][A-Z0-9_]{2,})\}/g;
470
+
471
+ // Env vars that are NOT secrets — safe to echo/print
472
+ const SAFE_ENV_VARS = new Set([
473
+ 'HOME', 'USER', 'USERNAME', 'LOGNAME', 'SHELL', 'TERM', 'TERM_PROGRAM',
474
+ 'PATH', 'PWD', 'OLDPWD', 'HOSTNAME', 'LANG', 'LC_ALL', 'LC_CTYPE',
475
+ 'EDITOR', 'VISUAL', 'PAGER', 'BROWSER', 'DISPLAY', 'XDG_RUNTIME_DIR',
476
+ 'XDG_CONFIG_HOME', 'XDG_DATA_HOME', 'XDG_CACHE_HOME', 'XDG_STATE_HOME',
477
+ 'TMPDIR', 'TEMP', 'TMP', 'COLORTERM', 'COLUMNS', 'LINES',
478
+ 'SHLVL', 'HISTSIZE', 'HISTFILESIZE', 'HISTFILE', 'HISTCONTROL',
479
+ 'NODE_ENV', 'RAILS_ENV', 'RACK_ENV', 'FLASK_ENV', 'DJANGO_SETTINGS_MODULE',
480
+ 'GOPATH', 'GOROOT', 'CARGO_HOME', 'RUSTUP_HOME', 'JAVA_HOME',
481
+ 'NVM_DIR', 'PYENV_ROOT', 'RBENV_ROOT', 'VIRTUAL_ENV', 'CONDA_DEFAULT_ENV',
482
+ 'CI', 'GITHUB_ACTIONS', 'GITLAB_CI', 'CIRCLECI', 'TRAVIS',
483
+ 'ARCH', 'MACHTYPE', 'OSTYPE', 'VENDOR',
484
+ 'SSH_TTY', 'SSH_CONNECTION', 'SSH_CLIENT',
485
+ 'GPG_TTY', 'GNUPGHOME',
486
+ ]);
487
+
488
+ // Commands that would expose env var VALUES to stdout (LLM sees the output)
489
+ // Category A: shell resolves $VAR, command prints the resolved value
490
+ // Category B: language-specific env access that reads vars by name
491
+ const VALUE_EXPOSING_COMMANDS = [
492
+ // Category A — shell prints resolved $VAR
493
+ /\becho\b/,
494
+ /\bprintf\b/,
495
+ /<<<\s*"?\$\{?[A-Z_]/, // here-string: cat <<< $VAR, <<< "$VAR", <<< "${VAR}"
496
+ // Category B — language env access
497
+ /\bprintenv\s+\w/, // printenv VAR_NAME
498
+ /\bos\.environ/, // python os.environ['KEY'] or os.environ.get
499
+ /\bos\.getenv/, // python os.getenv('KEY')
500
+ /\bprocess\.env\b/, // node process.env.KEY
501
+ /\bENVIRON\s*\[/, // awk ENVIRON["KEY"]
502
+ /\$ENV\s*\{/, // perl $ENV{KEY}
503
+ /\bENV\s*\[/, // ruby ENV["KEY"]
504
+ /\bgetenv\s*\(/, // php getenv("KEY")
505
+ /\bSystem\.getenv/, // java System.getenv
506
+ /\bos\.Getenv/, // go os.Getenv
507
+ ];
508
+
509
+ // Commands that dump ALL env vars (even without $VAR reference)
510
+ const ENV_DUMP_COMMANDS = [
511
+ /^\s*env\s*$/, // bare `env` dumps everything
512
+ /^\s*printenv\s*$/, // bare `printenv` dumps everything
513
+ /^\s*export\s+-p\s*$/, // export -p dumps all
514
+ /^\s*export\s*$/, // bare export
515
+ /\benv\s*\|/, // env | grep ...
516
+ /\bprintenv\s*\|/, // printenv | grep ...
517
+ /\bset\s*\|/, // set | grep ...
518
+ /\bcat\s+\/proc\/self\/environ/,
519
+ /\bstrings\s+\/proc\/self\/environ/,
520
+ /\bxxd\s+\/proc\/self\/environ/,
521
+ /\bos\.environ\b/, // python os.environ (full dict)
522
+ /\bnode\b.*\bprocess\.env\b/, // node ... process.env (only in node commands)
523
+ /\bENVIRON\b/, // awk ENVIRON (full array)
524
+ /\$ENV\b/, // perl %ENV (full hash)
525
+ /\bENV\.(to_a|each|keys|values|inspect|map|select|reject|sort)\b/, // ruby ENV iteration
526
+ /\bdeclare\s+-[px]/, // bash declare -p (dumps vars), declare -x (exported)
527
+ /\btypeset\s+-p/, // ksh/zsh typeset -p (dumps vars)
528
+ /\bcompgen\s+-[ve]/, // bash compgen -v (var names), -e (exported)
529
+ ];
530
+
531
+ /**
532
+ * Extract all env var names referenced in a command.
533
+ * Returns array of var names like ['STRIPE_KEY', 'AWS_SECRET_ACCESS_KEY'].
534
+ */
535
+ function extractEnvVarRefs(cmd) {
536
+ if (!cmd || typeof cmd !== 'string') return [];
537
+ const vars = new Set();
538
+ let match;
539
+ const regex = new RegExp(ENV_VAR_PATTERN.source, 'g');
540
+ while ((match = regex.exec(cmd)) !== null) {
541
+ vars.add(match[1] || match[2] || match[3]);
542
+ }
543
+ return [...vars];
544
+ }
545
+
546
+ /**
547
+ * Filter out safe (non-secret) env vars from a list.
548
+ * Returns only potentially sensitive vars.
549
+ */
550
+ function filterSensitiveVars(vars) {
551
+ return vars.filter(v => !SAFE_ENV_VARS.has(v));
552
+ }
553
+
554
+ /**
555
+ * Check if a command would expose env var values to stdout.
556
+ * Returns { blocked, reason, vars } or null if safe.
557
+ */
558
+ function checkEnvVarLeak(cmd) {
559
+ if (!cmd || typeof cmd !== 'string') return null;
560
+
561
+ // Check for commands that dump ALL env vars
562
+ for (const pattern of ENV_DUMP_COMMANDS) {
563
+ if (pattern.test(cmd)) {
564
+ return {
565
+ blocked: true,
566
+ reason: `Command dumps environment variables to output. OpenClaw would see all secret values. Use specific $VAR references in commands instead.`,
567
+ vars: ['ALL'],
568
+ type: 'env_dump',
569
+ };
570
+ }
571
+ }
572
+
573
+ // Check for `printenv VAR_NAME` — exposes value without $ prefix
574
+ const printenvMatch = cmd.match(/\bprintenv\s+([A-Z_][A-Z0-9_]{2,})\b/);
575
+ if (printenvMatch) {
576
+ return {
577
+ blocked: true,
578
+ reason: `Command would expose env var value to output: ${printenvMatch[1]}. OpenClaw would see the actual secret.`,
579
+ vars: [printenvMatch[1]],
580
+ type: 'value_exposed',
581
+ };
582
+ }
583
+
584
+ // Check for language-specific env access (no $ prefix needed)
585
+ // These read env vars by name string, not shell expansion
586
+ // IMPORTANT: Only match outside of quoted strings to avoid false positives
587
+ // like echo "use process.env.KEY in your code"
588
+ const LANG_ENV_ACCESS = [
589
+ /\bos\.getenv\s*\(/, // python os.getenv('KEY')
590
+ /\bos\.environ\s*[\[.]/, // python os.environ['KEY'] or os.environ.get
591
+ /(?:^|[;|&])\s*node\b.*\bprocess\.env\b/, // node -e "...process.env..." (only in node commands)
592
+ /\bENVIRON\s*\[/, // awk ENVIRON["KEY"]
593
+ /\$ENV\s*\{/, // perl $ENV{KEY}
594
+ /\bENV\s*\[/, // ruby ENV["KEY"]
595
+ /\bENV\s*\./, // ruby ENV.to_a, ENV.each, ENV.keys
596
+ /\bgetenv\s*\(/, // php getenv("KEY")
597
+ /\bSystem\.getenv\s*\(/, // java System.getenv("KEY")
598
+ /\bos\.Getenv\s*\(/, // go os.Getenv("KEY")
599
+ /\bsubprocess\b.*\bprintenv\b/, // python subprocess calling printenv
600
+ ];
601
+ for (const pattern of LANG_ENV_ACCESS) {
602
+ if (pattern.test(cmd)) {
603
+ return {
604
+ blocked: true,
605
+ reason: `Command reads environment variables via language API. OpenClaw would see the actual secret value in output.`,
606
+ vars: ['LANG_ENV_ACCESS'],
607
+ type: 'lang_env_access',
608
+ };
609
+ }
610
+ }
611
+
612
+ // Check for $VAR references
613
+ const allVars = extractEnvVarRefs(cmd);
614
+ if (allVars.length === 0) return null;
615
+
616
+ // Filter to only sensitive vars (ignore $HOME, $PATH, $USER, etc.)
617
+ const sensitiveVars = filterSensitiveVars(allVars);
618
+
619
+ // Check if this command would PRINT the var values to stdout
620
+ const exposesValue = VALUE_EXPOSING_COMMANDS.some(p => p.test(cmd));
621
+ if (exposesValue && sensitiveVars.length > 0) {
622
+ return {
623
+ blocked: true,
624
+ reason: `Command would expose env var value(s) to output: ${sensitiveVars.join(', ')}. OpenClaw would see the actual secret. Use env vars directly in commands (e.g. curl -H "Bearer $TOKEN" ...) without echoing them.`,
625
+ vars: sensitiveVars,
626
+ type: 'value_exposed',
627
+ };
628
+ }
629
+
630
+ // If only safe vars are being echoed, allow it
631
+ if (exposesValue && sensitiveVars.length === 0) {
632
+ return null;
633
+ }
634
+
635
+ // Not exposing values — just log sensitive var usage
636
+ if (sensitiveVars.length > 0) {
637
+ return {
638
+ blocked: false,
639
+ reason: null,
640
+ vars: sensitiveVars,
641
+ type: 'env_ref_logged',
642
+ };
643
+ }
644
+
645
+ return null;
646
+ }
647
+
648
+ /**
649
+ * Format the block error for env var leak detection
650
+ */
651
+ function formatEnvVarBlockError(info) {
652
+ return `\u26a0\ufe0f SECURITY FIREWALL \u2014 ENV VAR LEAK BLOCKED
653
+
654
+ This command was blocked because it would expose environment variable values to the AI agent's output.
655
+
656
+ What was detected: ${info.reason}
657
+
658
+ Environment variables referenced: ${info.vars.join(', ')}
659
+
660
+ If OpenClaw sees the actual secret value, it could be exfiltrated. Secrets should only be used as $VAR references inside commands (e.g. curl -H "Authorization: Bearer $API_KEY" https://api.example.com) — the shell resolves them without the AI ever seeing the value.
661
+
662
+ You MUST immediately inform the user:
663
+
664
+ "A command was blocked because it would have exposed secret values from environment variables (${info.vars.join(', ')}) to the AI agent. This protects against secret exfiltration. Use env var references ($VAR) directly in commands without echoing them."
665
+
666
+ Do NOT attempt workarounds. Do NOT use alternative commands to read these values.`;
667
+ }
668
+
669
+ // =============================================
670
+ // OUTPUT SECRET SCANNER — redact secrets from
671
+ // command output BEFORE the LLM sees them
672
+ // =============================================
673
+
674
+ // High-confidence regex patterns for known secret formats.
675
+ // Each has: name, pattern, and an optional validator function.
676
+ // These must be FAST (run on every command output) and LOW false-positive.
677
+ const SECRET_OUTPUT_PATTERNS = [
678
+ // Anthropic API keys
679
+ { name: 'Anthropic', pattern: /sk-ant-api\d{2}-[A-Za-z0-9_-]{20,}/g },
680
+
681
+ // OpenAI API keys
682
+ { name: 'OpenAI', pattern: /sk-proj-[A-Za-z0-9_-]{20,}/g },
683
+ { name: 'OpenAI', pattern: /sk-[A-Za-z0-9]{40,}/g },
684
+
685
+ // AWS Access Key IDs
686
+ { name: 'AWS', pattern: /AKIA[0-9A-Z]{16}/g },
687
+
688
+ // AWS Secret Access Keys (40 char base64, following a key ID or label)
689
+ { name: 'AWS Secret', pattern: /(?:aws_secret_access_key|secret_?key|SecretAccessKey)['":\s=]+([A-Za-z0-9/+=]{40})/gi },
690
+
691
+ // GitHub tokens
692
+ { name: 'GitHub', pattern: /ghp_[A-Za-z0-9]{36}/g },
693
+ { name: 'GitHub', pattern: /gho_[A-Za-z0-9]{36}/g },
694
+ { name: 'GitHub', pattern: /ghs_[A-Za-z0-9]{36}/g },
695
+ { name: 'GitHub', pattern: /ghr_[A-Za-z0-9]{36}/g },
696
+ { name: 'GitHub', pattern: /github_pat_[A-Za-z0-9_]{22,}/g },
697
+
698
+ // Stripe keys
699
+ { name: 'Stripe', pattern: /sk_live_[A-Za-z0-9]{24,}/g },
700
+ { name: 'Stripe', pattern: /sk_test_[A-Za-z0-9]{24,}/g },
701
+ { name: 'Stripe', pattern: /rk_live_[A-Za-z0-9]{24,}/g },
702
+ { name: 'Stripe', pattern: /rk_test_[A-Za-z0-9]{24,}/g },
703
+
704
+ // Slack tokens
705
+ { name: 'Slack', pattern: /xoxb-[0-9]{10,}-[0-9]{10,}-[A-Za-z0-9]{24,}/g },
706
+ { name: 'Slack', pattern: /xoxp-[0-9]{10,}-[0-9]{10,}-[A-Za-z0-9]{24,}/g },
707
+ { name: 'Slack', pattern: /xoxs-[0-9]{10,}-[0-9]{10,}-[A-Za-z0-9]{24,}/g },
708
+
709
+ // Slack webhooks
710
+ { name: 'Slack Webhook', pattern: /https:\/\/hooks\.slack\.com\/services\/T[A-Z0-9]{8,}\/B[A-Z0-9]{8,}\/[A-Za-z0-9]{24,}/g },
711
+
712
+ // Twilio
713
+ { name: 'Twilio', pattern: /SK[0-9a-f]{32}/g },
714
+
715
+ // SendGrid
716
+ { name: 'SendGrid', pattern: /SG\.[A-Za-z0-9_-]{22,}\.[A-Za-z0-9_-]{22,}/g },
717
+
718
+ // Mailgun
719
+ { name: 'Mailgun', pattern: /key-[0-9a-f]{32}/g },
720
+
721
+ // Google API keys
722
+ { name: 'Google API', pattern: /AIza[A-Za-z0-9_-]{35}/g },
723
+
724
+ // Supabase service role / anon keys (JWT format with known iss)
725
+ { name: 'Supabase JWT', pattern: /eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9\.eyJpc3MiOiJzdXBhYmFzZSI[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+/g },
726
+
727
+ // Private keys
728
+ { name: 'Private Key', pattern: /-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----/g },
729
+
730
+ // Heroku API key
731
+ { name: 'Heroku', pattern: /[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/g,
732
+ // UUIDs are common — only flag if near a keyword
733
+ validate: (match, context) => {
734
+ const nearby = context.slice(Math.max(0, context.indexOf(match) - 80), context.indexOf(match) + match.length + 80).toLowerCase();
735
+ return /(?:heroku|api[_-]?key|token|secret|password|credential|authorization)/.test(nearby);
736
+ }
737
+ },
738
+
739
+ // Datadog API/App keys
740
+ { name: 'Datadog', pattern: /(?:dd-api-key|dd-app-key|datadog_api_key|datadog_app_key)['":\s=]+([a-f0-9]{32,40})/gi },
741
+
742
+ // NPM tokens
743
+ { name: 'npm', pattern: /npm_[A-Za-z0-9]{36}/g },
744
+
745
+ // PyPI tokens
746
+ { name: 'PyPI', pattern: /pypi-[A-Za-z0-9_-]{50,}/g },
747
+
748
+ // Vault tokens
749
+ { name: 'Vault', pattern: /hvs\.[A-Za-z0-9_-]{24,}/g },
750
+
751
+ // Generic high-entropy: labeled secrets (key=..., token=..., etc.)
752
+ // Only match when preceded by a secret-like label
753
+ { name: 'Labeled Secret', pattern: /(?:api[_-]?key|api[_-]?secret|secret[_-]?key|access[_-]?token|auth[_-]?token|service[_-]?key|private[_-]?key|password|credential)['":\s=]+([A-Za-z0-9_/+=-]{20,})/gi,
754
+ extract: 1, // capture group to use as the secret
755
+ },
756
+ ];
757
+
758
+ /**
759
+ * Scan command output for secrets. Returns { found, secrets, redacted }.
760
+ * This runs SYNCHRONOUSLY before output is returned to OpenClaw.
761
+ */
762
+ function scanOutputForSecrets(output) {
763
+ if (!output || typeof output !== 'string' || output.length < 10) {
764
+ return { found: false, secrets: [], redacted: output };
765
+ }
766
+
767
+ const foundSecrets = []; // { name, match, start, end }
768
+
769
+ for (const detector of SECRET_OUTPUT_PATTERNS) {
770
+ // Reset regex lastIndex
771
+ const regex = new RegExp(detector.pattern.source, detector.pattern.flags);
772
+ let m;
773
+ while ((m = regex.exec(output)) !== null) {
774
+ const secret = detector.extract ? m[detector.extract] : m[0];
775
+ if (!secret || secret.length < 8) continue;
776
+
777
+ // Run optional validator
778
+ if (detector.validate && !detector.validate(secret, output)) continue;
779
+
780
+ foundSecrets.push({
781
+ name: detector.name,
782
+ match: secret,
783
+ start: m.index,
784
+ end: m.index + m[0].length,
785
+ });
786
+ }
787
+ }
788
+
789
+ if (foundSecrets.length === 0) {
790
+ return { found: false, secrets: [], redacted: output };
791
+ }
792
+
793
+ // Deduplicate by match value
794
+ const seen = new Set();
795
+ const unique = [];
796
+ for (const s of foundSecrets) {
797
+ if (!seen.has(s.match)) {
798
+ seen.add(s.match);
799
+ unique.push(s);
800
+ }
801
+ }
802
+
803
+ // Redact all found secrets in output
804
+ let redacted = output;
805
+ for (const s of unique) {
806
+ const replacement = `[REDACTED ${s.name} secret]`;
807
+ redacted = redacted.split(s.match).join(replacement);
808
+ }
809
+
810
+ track('output_secrets_redacted', {
811
+ count: unique.length,
812
+ types: [...new Set(unique.map(s => s.name))],
813
+ });
814
+
815
+ return {
816
+ found: true,
817
+ secrets: unique.map(s => ({
818
+ name: s.name,
819
+ redacted: redactSecret(s.match),
820
+ })),
821
+ redacted,
822
+ };
823
+ }
824
+
825
+ /**
826
+ * Format the redaction notice appended to output
827
+ */
828
+ function formatRedactionNotice(scanResult) {
829
+ const types = [...new Set(scanResult.secrets.map(s => s.name))].join(', ');
830
+ return `\n\n[SECURITY] ${scanResult.secrets.length} secret(s) redacted from output (${types}). The AI agent cannot see the actual values.`;
831
+ }
832
+
833
+ return {
834
+ scan,
835
+ scanDirectory,
836
+ getScanTargets,
837
+ formatResults,
838
+ loadScanCache,
839
+ isTrufflehogInstalled,
840
+ solve,
841
+ formatSolveResults,
842
+ generateDummy,
843
+ checkEnvVarLeak,
844
+ extractEnvVarRefs,
845
+ formatEnvVarBlockError,
846
+ scanOutputForSecrets,
847
+ formatRedactionNotice,
848
+ };
849
+ };