@link-assistant/hive-mind 1.64.2 → 1.64.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +17 -0
- package/package.json +1 -1
- package/src/claude.lib.mjs +12 -1
- package/src/codex.lib.mjs +12 -1
- package/src/github.lib.mjs +2 -2
- package/src/interactive-mode.lib.mjs +104 -8
- package/src/lib.mjs +3 -3
- package/src/post-finish-sanitization-sweep.lib.mjs +201 -0
- package/src/solve.config.lib.mjs +15 -0
- package/src/solve.results.lib.mjs +52 -0
- package/src/telegram-bot.mjs +40 -0
- package/src/telegram-leak-notifier.lib.mjs +79 -0
- package/src/telegram-tokens-command.lib.mjs +151 -0
- package/src/token-sanitization.lib.mjs +355 -18
- package/src/tool-comments.lib.mjs +6 -2
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,22 @@
|
|
|
1
1
|
# @link-assistant/hive-mind
|
|
2
2
|
|
|
3
|
+
## 1.64.3
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- dd52682: Sanitize all user-facing output to prevent token leaks (#1745).
|
|
8
|
+
- All comment-posting paths (`postComment`, `editComment`, `postTrackedComment`) run bodies through `sanitizeOutput` (canonical name) / `sanitizeCommentBody` (active-token wrapper). `sanitizeLogContent` is preserved as a backward-compatible alias.
|
|
9
|
+
- `KNOWN_LOCAL_TOKEN_ENV_VARS` registry masks tokens by exact env value (Telegram, GitHub, Anthropic/Claude, OpenAI/Codex, Gemini/Google, Qwen/Dashscope, OpenCode, AgentCLI, HuggingFace).
|
|
10
|
+
- Three independent CLI flags: `--dangerously-skip-output-sanitization`, `--dangerously-skip-code-output-sanitization`, `--dangerously-skip-active-tokens-output-sanitization` — all default false; active-tokens skip stays separate so the broad skip flag still keeps active-token masking on.
|
|
11
|
+
- Process-wide sanitization counters (`getSanitizationStats`, `formatSanitizationSummary`) print a one-line summary at the end of each run with a hint to use `--dangerously-skip-output-sanitization` when masking blocks the user's workflow.
|
|
12
|
+
- `extractTokensFromUserContent` carve-out helper: tokens already present in user-provided content (issue body, non-bot comments, pre-existing code) are passed as `excludeTokens` so the sanitizer leaves them untouched while still masking active local tokens.
|
|
13
|
+
- Post-finish sweep (`runPostFinishSweep`) re-reads bot-authored PR comments and the PR description after the AI session completes and edits in place if a leak slipped past the live sanitizer.
|
|
14
|
+
- ESLint guardrail (`gh-rate-limit/require-sanitized-output`) flags raw `gh pr comment`, `gh issue comment`, `gh pr edit`, and `gh api .../comments` calls that bypass the sanitizer.
|
|
15
|
+
- Out-of-band Telegram leak DM with masked summaries when a known-local token is detected in an outbound comment.
|
|
16
|
+
- Hidden owner-only `/tokens` Telegram command lists configured tokens (always masked, private chat only).
|
|
17
|
+
- `maskToken` defaults to 3+3 characters per issue requirements.
|
|
18
|
+
- secretlint preset (best-of-breed) runs alongside our custom patterns; mismatch warnings surface gaps.
|
|
19
|
+
|
|
3
20
|
## 1.64.2
|
|
4
21
|
|
|
5
22
|
### Patch Changes
|
package/package.json
CHANGED
package/src/claude.lib.mjs
CHANGED
|
@@ -682,7 +682,18 @@ export const executeClaudeCommand = async params => {
|
|
|
682
682
|
let interactiveHandler = null;
|
|
683
683
|
if (argv.interactiveMode && owner && repo && prNumber) {
|
|
684
684
|
await log('🔌 Interactive mode: Creating handler for real-time PR comments', { verbose: true });
|
|
685
|
-
interactiveHandler = createInteractiveHandler({
|
|
685
|
+
interactiveHandler = createInteractiveHandler({
|
|
686
|
+
owner,
|
|
687
|
+
repo,
|
|
688
|
+
prNumber,
|
|
689
|
+
$,
|
|
690
|
+
log,
|
|
691
|
+
verbose: argv.verbose,
|
|
692
|
+
// Issue #1745: thread the three independent dangerous-skip flags through
|
|
693
|
+
// so the comment-posting path can honor them; flags default to false.
|
|
694
|
+
skipOutputSanitization: argv['dangerously-skip-output-sanitization'] === true,
|
|
695
|
+
skipActiveTokensOutputSanitization: argv['dangerously-skip-active-tokens-output-sanitization'] === true,
|
|
696
|
+
});
|
|
686
697
|
} else if (argv.interactiveMode) {
|
|
687
698
|
await log('⚠️ Interactive mode: Disabled - missing PR info (owner/repo/prNumber)', { verbose: true });
|
|
688
699
|
}
|
package/src/codex.lib.mjs
CHANGED
|
@@ -792,7 +792,18 @@ export const executeCodexCommand = async params => {
|
|
|
792
792
|
let interactiveHandler = null;
|
|
793
793
|
if (argv.interactiveMode && owner && repo && prNumber) {
|
|
794
794
|
await log('🔌 Interactive mode: Creating handler for real-time PR comments', { verbose: true });
|
|
795
|
-
interactiveHandler = createInteractiveHandler({
|
|
795
|
+
interactiveHandler = createInteractiveHandler({
|
|
796
|
+
owner,
|
|
797
|
+
repo,
|
|
798
|
+
prNumber,
|
|
799
|
+
$,
|
|
800
|
+
log,
|
|
801
|
+
verbose: argv.verbose,
|
|
802
|
+
// Issue #1745: pass the three independent dangerous-skip flags so the
|
|
803
|
+
// comment-posting path can honor them. All default to false.
|
|
804
|
+
skipOutputSanitization: argv['dangerously-skip-output-sanitization'] === true,
|
|
805
|
+
skipActiveTokensOutputSanitization: argv['dangerously-skip-active-tokens-output-sanitization'] === true,
|
|
806
|
+
});
|
|
796
807
|
} else if (argv.interactiveMode) {
|
|
797
808
|
await log('⚠️ Interactive mode: Disabled - missing PR info (owner/repo/prNumber)', { verbose: true });
|
|
798
809
|
}
|
package/src/github.lib.mjs
CHANGED
|
@@ -6,8 +6,8 @@ import { log, maskToken, cleanErrorMessage, isENOSPC, ghCmdRetry } from './lib.m
|
|
|
6
6
|
import { reportError } from './sentry.lib.mjs';
|
|
7
7
|
import { githubLimits, timeouts } from './config.lib.mjs';
|
|
8
8
|
import { batchCheckPullRequestsForIssues as batchCheckPRs, batchCheckArchivedRepositories as batchCheckArchived } from './github.batch.lib.mjs';
|
|
9
|
-
import { isSafeToken, isHexInSafeContext, getGitHubTokensFromFiles, getGitHubTokensFromCommand, sanitizeLogContent } from './token-sanitization.lib.mjs';
|
|
10
|
-
export { isSafeToken, isHexInSafeContext, getGitHubTokensFromFiles, getGitHubTokensFromCommand, sanitizeLogContent }; // Re-export for backward compatibility
|
|
9
|
+
import { isSafeToken, isHexInSafeContext, getGitHubTokensFromFiles, getGitHubTokensFromCommand, sanitizeOutput, sanitizeLogContent } from './token-sanitization.lib.mjs';
|
|
10
|
+
export { isSafeToken, isHexInSafeContext, getGitHubTokensFromFiles, getGitHubTokensFromCommand, sanitizeOutput, sanitizeLogContent }; // Re-export for backward compatibility
|
|
11
11
|
import { uploadLogWithGhUploadLog } from './log-upload.lib.mjs';
|
|
12
12
|
import { formatResetTimeWithRelative } from './usage-limit.lib.mjs'; // See: https://github.com/link-assistant/hive-mind/issues/1236
|
|
13
13
|
// Import model info helpers (Issue #1225)
|
|
@@ -38,6 +38,13 @@ import { CONFIG, createCollapsible, createRawJsonSection, escapeMarkdown, execFi
|
|
|
38
38
|
// Use the session-started marker as the single source of truth for the
|
|
39
39
|
// header string, keeping posting and filtering in lock-step.
|
|
40
40
|
import { INTERACTIVE_SESSION_STARTED_MARKER, trackToolCommentId } from './tool-comments.lib.mjs';
|
|
41
|
+
// Issue #1745: every comment body posted by the AI bridge MUST flow through
|
|
42
|
+
// sanitizeCommentBody() before leaving the process. The leak in
|
|
43
|
+
// xlab2016/space_db_private#20 happened because raw bash-tool stdout
|
|
44
|
+
// (including TELEGRAM_BOT_TOKEN=...) was published verbatim. See
|
|
45
|
+
// docs/case-studies/issue-1745/analysis.md for the full timeline.
|
|
46
|
+
import { containsKnownToken, getAllKnownLocalTokens, sanitizeCommentBody } from './token-sanitization.lib.mjs';
|
|
47
|
+
import { reportInteractiveLeak } from './telegram-leak-notifier.lib.mjs';
|
|
41
48
|
|
|
42
49
|
/**
|
|
43
50
|
* Creates an interactive mode handler for processing Claude/Codex CLI events
|
|
@@ -52,7 +59,23 @@ import { INTERACTIVE_SESSION_STARTED_MARKER, trackToolCommentId } from './tool-c
|
|
|
52
59
|
* @returns {Object} Handler object with event processing methods
|
|
53
60
|
*/
|
|
54
61
|
export const createInteractiveHandler = options => {
|
|
55
|
-
const {
|
|
62
|
+
const {
|
|
63
|
+
owner,
|
|
64
|
+
repo,
|
|
65
|
+
prNumber,
|
|
66
|
+
log,
|
|
67
|
+
verbose = false,
|
|
68
|
+
execFile: execFileFn,
|
|
69
|
+
// Issue #1745: dangerous-skip flags. All default to false; passing them
|
|
70
|
+
// through lets the operator opt out of pattern-based sanitization (for
|
|
71
|
+
// controlled debugging in private repos) while keeping active-token
|
|
72
|
+
// masking on by default.
|
|
73
|
+
skipOutputSanitization = false,
|
|
74
|
+
skipActiveTokensOutputSanitization = false,
|
|
75
|
+
// Pre-existing user content carve-out (issue body / non-bot comments /
|
|
76
|
+
// pre-existing code). When provided, sanitizer leaves these tokens untouched.
|
|
77
|
+
excludeTokens = [],
|
|
78
|
+
} = options;
|
|
56
79
|
// Use injected execFile for testability, or the real one by default
|
|
57
80
|
const runGhApi = execFileFn || execFileAsync;
|
|
58
81
|
|
|
@@ -88,6 +111,71 @@ export const createInteractiveHandler = options => {
|
|
|
88
111
|
editsFailed: 0,
|
|
89
112
|
};
|
|
90
113
|
|
|
114
|
+
/**
|
|
115
|
+
* Sanitize a comment body and warn the chat owner when a known-local token
|
|
116
|
+
* was about to be published. Issue #1745. The returned string is what we
|
|
117
|
+
* actually send to GitHub.
|
|
118
|
+
*
|
|
119
|
+
* @param {string} body
|
|
120
|
+
* @returns {Promise<string>} sanitized body
|
|
121
|
+
* @private
|
|
122
|
+
*/
|
|
123
|
+
const sanitizeAndWarn = async body => {
|
|
124
|
+
if (typeof body !== 'string' || body.length === 0) return body;
|
|
125
|
+
|
|
126
|
+
let knownTokens;
|
|
127
|
+
try {
|
|
128
|
+
knownTokens = await getAllKnownLocalTokens();
|
|
129
|
+
} catch (err) {
|
|
130
|
+
// Best-effort: if token lookup fails, fall back to regex/secretlint only.
|
|
131
|
+
knownTokens = [];
|
|
132
|
+
if (verbose) {
|
|
133
|
+
await log(`⚠️ Interactive mode: getAllKnownLocalTokens failed: ${err.message}`, { verbose: true });
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
let hits = [];
|
|
138
|
+
try {
|
|
139
|
+
hits = await containsKnownToken(body, knownTokens);
|
|
140
|
+
} catch {
|
|
141
|
+
hits = [];
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
let sanitized = body;
|
|
145
|
+
try {
|
|
146
|
+
sanitized = await sanitizeCommentBody(body, {
|
|
147
|
+
knownTokens,
|
|
148
|
+
skipOutputSanitization,
|
|
149
|
+
skipActiveTokensOutputSanitization,
|
|
150
|
+
excludeTokens,
|
|
151
|
+
});
|
|
152
|
+
} catch (err) {
|
|
153
|
+
await log(`⚠️ Interactive mode: sanitizeCommentBody failed: ${err.message} — falling back to raw body MASKED`);
|
|
154
|
+
// Fail closed: if sanitization fails entirely, drop the body to a safe
|
|
155
|
+
// placeholder rather than leaking. Better to lose detail than secrets.
|
|
156
|
+
sanitized = '[redacted: sanitization failed]';
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
if (hits.length > 0) {
|
|
160
|
+
await log(`🚨 Interactive mode: known-local token(s) detected in outbound comment — sanitizer masked them. Sources: ${hits.map(h => h.source).join(', ')}`);
|
|
161
|
+
try {
|
|
162
|
+
await reportInteractiveLeak({
|
|
163
|
+
owner,
|
|
164
|
+
repo,
|
|
165
|
+
prNumber,
|
|
166
|
+
tokenHits: hits,
|
|
167
|
+
log,
|
|
168
|
+
});
|
|
169
|
+
} catch (err) {
|
|
170
|
+
if (verbose) {
|
|
171
|
+
await log(`⚠️ Interactive mode: leak notifier failed: ${err.message}`, { verbose: true });
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
return sanitized;
|
|
177
|
+
};
|
|
178
|
+
|
|
91
179
|
/**
|
|
92
180
|
* Post a comment to the PR (with rate limiting)
|
|
93
181
|
* @param {string} body - Comment body
|
|
@@ -104,12 +192,16 @@ export const createInteractiveHandler = options => {
|
|
|
104
192
|
return null;
|
|
105
193
|
}
|
|
106
194
|
|
|
195
|
+
// Issue #1745: sanitize BEFORE rate-limit queuing so queued bodies are
|
|
196
|
+
// also safe (the queue persists across reconnects).
|
|
197
|
+
const safeBody = await sanitizeAndWarn(body);
|
|
198
|
+
|
|
107
199
|
const now = Date.now();
|
|
108
200
|
const timeSinceLastComment = now - state.lastCommentTime;
|
|
109
201
|
|
|
110
202
|
if (timeSinceLastComment < CONFIG.MIN_COMMENT_INTERVAL) {
|
|
111
203
|
// Queue the comment for later with toolId/taskId for tracking
|
|
112
|
-
state.commentQueue.push({ body, toolId, taskId });
|
|
204
|
+
state.commentQueue.push({ body: safeBody, toolId, taskId });
|
|
113
205
|
if (verbose) {
|
|
114
206
|
await log(`📝 Interactive mode: Comment queued (${state.commentQueue.length} in queue)${toolId ? ` [tool: ${toolId}]` : ''}${taskId ? ` [task: ${taskId}]` : ''}`, { verbose: true });
|
|
115
207
|
}
|
|
@@ -122,7 +214,7 @@ export const createInteractiveHandler = options => {
|
|
|
122
214
|
// with complex markdown bodies containing backticks, quotes, etc.
|
|
123
215
|
// See: https://github.com/link-assistant/hive-mind/issues/1458
|
|
124
216
|
const apiUrl = `repos/${owner}/${repo}/issues/${prNumber}/comments`;
|
|
125
|
-
const jsonPayload = JSON.stringify({ body });
|
|
217
|
+
const jsonPayload = JSON.stringify({ body: safeBody });
|
|
126
218
|
const { stdout } = await runGhApi('gh', ['api', apiUrl, '-X', 'POST', '--input', '-'], {
|
|
127
219
|
input: jsonPayload,
|
|
128
220
|
maxBuffer: 10 * 1024 * 1024, // 10MB
|
|
@@ -147,13 +239,13 @@ export const createInteractiveHandler = options => {
|
|
|
147
239
|
trackToolCommentId(commentId);
|
|
148
240
|
|
|
149
241
|
if (verbose) {
|
|
150
|
-
await log(`✅ Interactive mode: Comment posted${commentId ? ` (ID: ${commentId})` : ''} (body: ${
|
|
242
|
+
await log(`✅ Interactive mode: Comment posted${commentId ? ` (ID: ${commentId})` : ''} (body: ${safeBody.length} chars)`, { verbose: true });
|
|
151
243
|
}
|
|
152
244
|
return commentId;
|
|
153
245
|
} catch (error) {
|
|
154
246
|
state.commentsFailed++;
|
|
155
247
|
// Issue #1472: Always log comment failures (not just verbose) — silent failures cause zero-comment bugs
|
|
156
|
-
await log(`⚠️ Interactive mode: Failed to post comment: ${error.message} (body: ${
|
|
248
|
+
await log(`⚠️ Interactive mode: Failed to post comment: ${error.message} (body: ${safeBody.length} chars)`);
|
|
157
249
|
return null;
|
|
158
250
|
}
|
|
159
251
|
};
|
|
@@ -173,25 +265,29 @@ export const createInteractiveHandler = options => {
|
|
|
173
265
|
return false;
|
|
174
266
|
}
|
|
175
267
|
|
|
268
|
+
// Issue #1745: sanitize before sending. editComment is the path that
|
|
269
|
+
// leaked TELEGRAM_BOT_TOKEN in xlab2016/space_db_private#20.
|
|
270
|
+
const safeBody = await sanitizeAndWarn(body);
|
|
271
|
+
|
|
176
272
|
state.editsAttempted++;
|
|
177
273
|
try {
|
|
178
274
|
// Edit comment via gh api with stdin to avoid shell quoting issues
|
|
179
275
|
// with complex markdown bodies containing backticks, quotes, etc.
|
|
180
276
|
// See: https://github.com/link-assistant/hive-mind/issues/1458
|
|
181
277
|
const apiUrl = `repos/${owner}/${repo}/issues/comments/${commentId}`;
|
|
182
|
-
const jsonPayload = JSON.stringify({ body });
|
|
278
|
+
const jsonPayload = JSON.stringify({ body: safeBody });
|
|
183
279
|
await runGhApi('gh', ['api', apiUrl, '-X', 'PATCH', '--input', '-'], {
|
|
184
280
|
input: jsonPayload,
|
|
185
281
|
maxBuffer: 10 * 1024 * 1024, // 10MB
|
|
186
282
|
});
|
|
187
283
|
state.editsSucceeded++;
|
|
188
284
|
if (verbose) {
|
|
189
|
-
await log(`✅ Interactive mode: Comment ${commentId} updated (body: ${
|
|
285
|
+
await log(`✅ Interactive mode: Comment ${commentId} updated (body: ${safeBody.length} chars, payload: ${jsonPayload.length} chars)`, { verbose: true });
|
|
190
286
|
}
|
|
191
287
|
return true;
|
|
192
288
|
} catch (error) {
|
|
193
289
|
state.editsFailed++;
|
|
194
|
-
await log(`⚠️ Interactive mode: Failed to edit comment ${commentId}: ${error.message} (body: ${
|
|
290
|
+
await log(`⚠️ Interactive mode: Failed to edit comment ${commentId}: ${error.message} (body: ${safeBody.length} chars)`);
|
|
195
291
|
return false;
|
|
196
292
|
}
|
|
197
293
|
};
|
package/src/lib.mjs
CHANGED
|
@@ -334,12 +334,12 @@ export const setupStdioLogInterceptor = () => {
|
|
|
334
334
|
* @param {string} token - Token to mask
|
|
335
335
|
* @param {Object} options - Masking options
|
|
336
336
|
* @param {number} [options.minLength=12] - Minimum length to mask
|
|
337
|
-
* @param {number} [options.startChars=
|
|
338
|
-
* @param {number} [options.endChars=
|
|
337
|
+
* @param {number} [options.startChars=3] - Number of characters to show at start
|
|
338
|
+
* @param {number} [options.endChars=3] - Number of characters to show at end
|
|
339
339
|
* @returns {string} Masked token
|
|
340
340
|
*/
|
|
341
341
|
export const maskToken = (token, options = {}) => {
|
|
342
|
-
const { minLength = 12, startChars =
|
|
342
|
+
const { minLength = 12, startChars = 3, endChars = 3 } = options;
|
|
343
343
|
|
|
344
344
|
if (!token || token.length < minLength) {
|
|
345
345
|
return token; // Don't mask very short strings
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Issue #1745 — post-finish sanitization sweep.
|
|
4
|
+
*
|
|
5
|
+
* Comment #4364642786 requirement: "after AI finishes whatever the content
|
|
6
|
+
* was ... we should by default go and mask the token by editing comments,
|
|
7
|
+
* pull requests".
|
|
8
|
+
*
|
|
9
|
+
* This module re-reads bot-authored comments and the PR description after the
|
|
10
|
+
* AI session finishes, runs the body through `sanitizeOutput`, and edits the
|
|
11
|
+
* comment / PR in place if a difference is detected. It is intentionally
|
|
12
|
+
* conservative:
|
|
13
|
+
*
|
|
14
|
+
* - We only touch content authored by the running gh user (the bot).
|
|
15
|
+
* - We never touch issue bodies (those belong to the human).
|
|
16
|
+
* - History rewriting (force-pushing to delete commits) is NOT performed
|
|
17
|
+
* here. The risk to a shared branch is too high; that step requires the
|
|
18
|
+
* operator to opt in explicitly via a future flag, and is documented in
|
|
19
|
+
* docs/case-studies/issue-1745/analysis.md.
|
|
20
|
+
*
|
|
21
|
+
* @module post-finish-sanitization-sweep
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
import { sanitizeOutput, getSanitizationStats } from './token-sanitization.lib.mjs';
|
|
25
|
+
import { wrapDollarWithGhRetry as _wrapDollarWithGhRetry } from './github-rate-limit.lib.mjs'; // rate-limit marker (#1726): caller passes $ already wrapped through wrapDollarWithGhRetry
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Determine the bot's gh login name. The function returns null on any error
|
|
29
|
+
* so the sweep degrades gracefully when offline / unauthenticated.
|
|
30
|
+
*
|
|
31
|
+
* @param {Function} $
|
|
32
|
+
* @returns {Promise<string|null>}
|
|
33
|
+
*/
|
|
34
|
+
const detectBotLogin = async $ => {
|
|
35
|
+
try {
|
|
36
|
+
const result = await $`gh api user --jq .login`;
|
|
37
|
+
if (result && result.code === 0 && result.stdout) {
|
|
38
|
+
const login = result.stdout.toString().trim();
|
|
39
|
+
return login || null;
|
|
40
|
+
}
|
|
41
|
+
} catch {
|
|
42
|
+
/* swallow */
|
|
43
|
+
}
|
|
44
|
+
return null;
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Sanitize bot-authored PR conversation comments (the issuecomment endpoint).
|
|
49
|
+
*
|
|
50
|
+
* @param {Object} args
|
|
51
|
+
* @param {Function} args.$ command-stream helper
|
|
52
|
+
* @param {string} args.owner
|
|
53
|
+
* @param {string} args.repo
|
|
54
|
+
* @param {number|string} args.prNumber
|
|
55
|
+
* @param {string} args.botLogin
|
|
56
|
+
* @param {Function} [args.log]
|
|
57
|
+
* @param {Object} [args.sanitizationOptions] forwarded to sanitizeOutput
|
|
58
|
+
* @returns {Promise<{scanned:number, edited:number, errors:number}>}
|
|
59
|
+
*/
|
|
60
|
+
export const sweepPrConversationComments = async ({ $, owner, repo, prNumber, botLogin, log = async () => {}, sanitizationOptions = {} }) => {
|
|
61
|
+
const stats = { scanned: 0, edited: 0, errors: 0 };
|
|
62
|
+
let response;
|
|
63
|
+
try {
|
|
64
|
+
response = await $`gh api repos/${owner}/${repo}/issues/${prNumber}/comments --paginate`;
|
|
65
|
+
} catch (err) {
|
|
66
|
+
await log(`⚠️ post-finish sweep: failed to list comments: ${err.message || err}`);
|
|
67
|
+
stats.errors++;
|
|
68
|
+
return stats;
|
|
69
|
+
}
|
|
70
|
+
if (!response || response.code !== 0) {
|
|
71
|
+
stats.errors++;
|
|
72
|
+
return stats;
|
|
73
|
+
}
|
|
74
|
+
let comments;
|
|
75
|
+
try {
|
|
76
|
+
comments = JSON.parse(response.stdout.toString());
|
|
77
|
+
} catch {
|
|
78
|
+
stats.errors++;
|
|
79
|
+
return stats;
|
|
80
|
+
}
|
|
81
|
+
if (!Array.isArray(comments)) return stats;
|
|
82
|
+
|
|
83
|
+
for (const c of comments) {
|
|
84
|
+
if (!c || !c.user || c.user.login !== botLogin) continue;
|
|
85
|
+
if (typeof c.body !== 'string' || c.body.length === 0) continue;
|
|
86
|
+
stats.scanned++;
|
|
87
|
+
let sanitized;
|
|
88
|
+
try {
|
|
89
|
+
sanitized = await sanitizeOutput(c.body, sanitizationOptions);
|
|
90
|
+
} catch (err) {
|
|
91
|
+
await log(`⚠️ post-finish sweep: sanitize comment ${c.id} failed: ${err.message || err}`);
|
|
92
|
+
stats.errors++;
|
|
93
|
+
continue;
|
|
94
|
+
}
|
|
95
|
+
if (sanitized === c.body) continue;
|
|
96
|
+
try {
|
|
97
|
+
const payload = JSON.stringify({ body: sanitized });
|
|
98
|
+
const edit = await $({ stdin: payload })`gh api repos/${owner}/${repo}/issues/comments/${c.id} -X PATCH --input -`;
|
|
99
|
+
if (edit && edit.code === 0) {
|
|
100
|
+
stats.edited++;
|
|
101
|
+
await log(`🔒 post-finish sweep: edited comment ${c.id} to mask leaked token(s)`);
|
|
102
|
+
} else {
|
|
103
|
+
stats.errors++;
|
|
104
|
+
}
|
|
105
|
+
} catch (err) {
|
|
106
|
+
await log(`⚠️ post-finish sweep: edit comment ${c.id} failed: ${err.message || err}`);
|
|
107
|
+
stats.errors++;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
return stats;
|
|
111
|
+
};
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Sanitize the PR description if needed.
|
|
115
|
+
*
|
|
116
|
+
* @param {Object} args
|
|
117
|
+
* @returns {Promise<{scanned:number, edited:number, errors:number}>}
|
|
118
|
+
*/
|
|
119
|
+
export const sweepPrDescription = async ({ $, owner, repo, prNumber, log = async () => {}, sanitizationOptions = {} }) => {
|
|
120
|
+
const stats = { scanned: 0, edited: 0, errors: 0 };
|
|
121
|
+
let response;
|
|
122
|
+
try {
|
|
123
|
+
response = await $`gh api repos/${owner}/${repo}/pulls/${prNumber}`;
|
|
124
|
+
} catch (err) {
|
|
125
|
+
await log(`⚠️ post-finish sweep: failed to fetch PR ${prNumber}: ${err.message || err}`);
|
|
126
|
+
stats.errors++;
|
|
127
|
+
return stats;
|
|
128
|
+
}
|
|
129
|
+
if (!response || response.code !== 0) {
|
|
130
|
+
stats.errors++;
|
|
131
|
+
return stats;
|
|
132
|
+
}
|
|
133
|
+
let pr;
|
|
134
|
+
try {
|
|
135
|
+
pr = JSON.parse(response.stdout.toString());
|
|
136
|
+
} catch {
|
|
137
|
+
stats.errors++;
|
|
138
|
+
return stats;
|
|
139
|
+
}
|
|
140
|
+
const body = typeof pr.body === 'string' ? pr.body : '';
|
|
141
|
+
if (body.length === 0) return stats;
|
|
142
|
+
stats.scanned++;
|
|
143
|
+
let sanitized;
|
|
144
|
+
try {
|
|
145
|
+
sanitized = await sanitizeOutput(body, sanitizationOptions);
|
|
146
|
+
} catch (err) {
|
|
147
|
+
await log(`⚠️ post-finish sweep: sanitize PR body failed: ${err.message || err}`);
|
|
148
|
+
stats.errors++;
|
|
149
|
+
return stats;
|
|
150
|
+
}
|
|
151
|
+
if (sanitized === body) return stats;
|
|
152
|
+
try {
|
|
153
|
+
const payload = JSON.stringify({ body: sanitized });
|
|
154
|
+
const edit = await $({ stdin: payload })`gh api repos/${owner}/${repo}/pulls/${prNumber} -X PATCH --input -`;
|
|
155
|
+
if (edit && edit.code === 0) {
|
|
156
|
+
stats.edited++;
|
|
157
|
+
await log('🔒 post-finish sweep: edited PR description to mask leaked token(s)');
|
|
158
|
+
} else {
|
|
159
|
+
stats.errors++;
|
|
160
|
+
}
|
|
161
|
+
} catch (err) {
|
|
162
|
+
await log(`⚠️ post-finish sweep: edit PR body failed: ${err.message || err}`);
|
|
163
|
+
stats.errors++;
|
|
164
|
+
}
|
|
165
|
+
return stats;
|
|
166
|
+
};
|
|
167
|
+
|
|
168
|
+
/**
|
|
169
|
+
* Run the full post-finish sweep: bot-authored PR comments + PR description.
|
|
170
|
+
* Idempotent and safe to call multiple times.
|
|
171
|
+
*
|
|
172
|
+
* @param {Object} args
|
|
173
|
+
* @returns {Promise<{comments:Object, prBody:Object, totalEdited:number, sanitizationStatsBefore:Object, sanitizationStatsAfter:Object}>}
|
|
174
|
+
*/
|
|
175
|
+
export const runPostFinishSweep = async ({ $, owner, repo, prNumber, log = async () => {}, sanitizationOptions = {}, botLogin: providedBotLogin }) => {
|
|
176
|
+
const sanitizationStatsBefore = getSanitizationStats();
|
|
177
|
+
const botLogin = providedBotLogin || (await detectBotLogin($));
|
|
178
|
+
const result = {
|
|
179
|
+
comments: { scanned: 0, edited: 0, errors: 0, skipped: !botLogin },
|
|
180
|
+
prBody: { scanned: 0, edited: 0, errors: 0 },
|
|
181
|
+
totalEdited: 0,
|
|
182
|
+
sanitizationStatsBefore,
|
|
183
|
+
sanitizationStatsAfter: sanitizationStatsBefore,
|
|
184
|
+
};
|
|
185
|
+
if (!owner || !repo || !prNumber) return result;
|
|
186
|
+
if (botLogin) {
|
|
187
|
+
result.comments = await sweepPrConversationComments({ $, owner, repo, prNumber, botLogin, log, sanitizationOptions });
|
|
188
|
+
} else {
|
|
189
|
+
await log('⚠️ post-finish sweep: could not determine bot login; skipping comment sweep.');
|
|
190
|
+
}
|
|
191
|
+
result.prBody = await sweepPrDescription({ $, owner, repo, prNumber, log, sanitizationOptions });
|
|
192
|
+
result.totalEdited = result.comments.edited + result.prBody.edited;
|
|
193
|
+
result.sanitizationStatsAfter = getSanitizationStats();
|
|
194
|
+
return result;
|
|
195
|
+
};
|
|
196
|
+
|
|
197
|
+
export default {
|
|
198
|
+
sweepPrConversationComments,
|
|
199
|
+
sweepPrDescription,
|
|
200
|
+
runPostFinishSweep,
|
|
201
|
+
};
|
package/src/solve.config.lib.mjs
CHANGED
|
@@ -115,6 +115,21 @@ export const SOLVE_OPTION_DEFINITIONS = {
|
|
|
115
115
|
description: 'Upload the solution draft log file to the Pull Request on completion (⚠️ WARNING: May expose sensitive data)',
|
|
116
116
|
default: false,
|
|
117
117
|
},
|
|
118
|
+
'dangerously-skip-output-sanitization': {
|
|
119
|
+
type: 'boolean',
|
|
120
|
+
description: 'DANGEROUS: skip pattern-based sanitization of generated output. Active local token masking stays enabled unless --dangerously-skip-active-tokens-output-sanitization is also set.',
|
|
121
|
+
default: false,
|
|
122
|
+
},
|
|
123
|
+
'dangerously-skip-code-output-sanitization': {
|
|
124
|
+
type: 'boolean',
|
|
125
|
+
description: 'DANGEROUS: allow generated code/file output to keep pattern-matched token-looking strings. Active local token masking stays enabled unless explicitly disabled.',
|
|
126
|
+
default: false,
|
|
127
|
+
},
|
|
128
|
+
'dangerously-skip-active-tokens-output-sanitization': {
|
|
129
|
+
type: 'boolean',
|
|
130
|
+
description: 'DANGEROUS: skip masking known active local tokens in output. This is separate from other sanitization skip flags and should only be used for controlled debugging.',
|
|
131
|
+
default: false,
|
|
132
|
+
},
|
|
118
133
|
'auto-close-pull-request-on-fail': {
|
|
119
134
|
type: 'boolean',
|
|
120
135
|
description: 'Automatically close the pull request if execution fails',
|
|
@@ -28,6 +28,14 @@ import { safeExit } from './exit-handler.lib.mjs';
|
|
|
28
28
|
const githubLib = await import('./github.lib.mjs');
|
|
29
29
|
const { sanitizeLogContent, attachLogToGitHub } = githubLib;
|
|
30
30
|
|
|
31
|
+
// Issue #1745: process-wide sanitization counters used to print a one-line
|
|
32
|
+
// "we masked N secrets" summary at the end of each run.
|
|
33
|
+
const { formatSanitizationSummary } = await import('./token-sanitization.lib.mjs');
|
|
34
|
+
// Issue #1745: post-finish retroactive sanitization of bot-authored PR
|
|
35
|
+
// comments and the PR description. Runs by default; can be skipped via
|
|
36
|
+
// --dangerously-skip-output-sanitization.
|
|
37
|
+
const { runPostFinishSweep } = await import('./post-finish-sanitization-sweep.lib.mjs');
|
|
38
|
+
|
|
31
39
|
// Import continuation functions (session resumption, PR detection)
|
|
32
40
|
const autoContinue = await import('./solve.auto-continue.lib.mjs');
|
|
33
41
|
const { autoContinueWhenLimitResets } = autoContinue;
|
|
@@ -556,6 +564,17 @@ export const cleanupClaudeFile = async (tempDir, branchName, claudeCommitHash =
|
|
|
556
564
|
export const showSessionSummary = async (sessionId, limitReached, argv, issueUrl, tempDir, shouldAttachLogs = false) => {
|
|
557
565
|
await log('\n=== Session Summary ===');
|
|
558
566
|
|
|
567
|
+
// Issue #1745: report how many tokens were masked during this run, with the
|
|
568
|
+
// "use --dangerously-skip-output-sanitization to skip" hint when > 0.
|
|
569
|
+
try {
|
|
570
|
+
const sanitizationSummary = formatSanitizationSummary();
|
|
571
|
+
if (sanitizationSummary) {
|
|
572
|
+
await log(sanitizationSummary);
|
|
573
|
+
}
|
|
574
|
+
} catch {
|
|
575
|
+
/* never fail the summary because of this */
|
|
576
|
+
}
|
|
577
|
+
|
|
559
578
|
if (sessionId) {
|
|
560
579
|
await log(`✅ Session ID: ${sessionId}`);
|
|
561
580
|
// Always use absolute path for log file display
|
|
@@ -622,6 +641,39 @@ export const showSessionSummary = async (sessionId, limitReached, argv, issueUrl
|
|
|
622
641
|
const logFilePath = path.resolve(getLogFile());
|
|
623
642
|
await log(`📁 Log file available: ${logFilePath}`);
|
|
624
643
|
}
|
|
644
|
+
|
|
645
|
+
// Issue #1745: post-finish retroactive sanitization sweep. Re-reads
|
|
646
|
+
// bot-authored PR comments and the PR description, runs them through
|
|
647
|
+
// sanitizeOutput, and edits in place if a leak slipped past the live
|
|
648
|
+
// sanitizer. Honors --dangerously-skip-output-sanitization and the related
|
|
649
|
+
// active-tokens flag.
|
|
650
|
+
try {
|
|
651
|
+
const owner = argv.owner;
|
|
652
|
+
const repo = argv.repo;
|
|
653
|
+
const prNumber = argv.prNumber;
|
|
654
|
+
const skipOutputSanitization = argv['dangerously-skip-output-sanitization'] === true;
|
|
655
|
+
const skipActiveTokensOutputSanitization = argv['dangerously-skip-active-tokens-output-sanitization'] === true;
|
|
656
|
+
if (owner && repo && prNumber && !skipOutputSanitization) {
|
|
657
|
+
const sweepResult = await runPostFinishSweep({
|
|
658
|
+
$,
|
|
659
|
+
owner,
|
|
660
|
+
repo,
|
|
661
|
+
prNumber,
|
|
662
|
+
log,
|
|
663
|
+
sanitizationOptions: {
|
|
664
|
+
warnOnMismatch: false,
|
|
665
|
+
skipActiveTokensOutputSanitization,
|
|
666
|
+
},
|
|
667
|
+
});
|
|
668
|
+
if (sweepResult.totalEdited > 0) {
|
|
669
|
+
await log(`🔒 Post-finish sweep: edited ${sweepResult.totalEdited} bot-authored item(s) to mask leaked tokens.`);
|
|
670
|
+
const followup = formatSanitizationSummary(sweepResult.sanitizationStatsAfter);
|
|
671
|
+
if (followup) await log(followup);
|
|
672
|
+
}
|
|
673
|
+
}
|
|
674
|
+
} catch (sweepErr) {
|
|
675
|
+
await log(`⚠️ Post-finish sanitization sweep failed: ${sweepErr.message || sweepErr}`);
|
|
676
|
+
}
|
|
625
677
|
};
|
|
626
678
|
|
|
627
679
|
// Verify results by searching for new PRs and comments
|
package/src/telegram-bot.mjs
CHANGED
|
@@ -1109,6 +1109,46 @@ registerStartStopCommands(bot, sharedCommandOpts);
|
|
|
1109
1109
|
await registerLogCommand(bot, sharedCommandOpts);
|
|
1110
1110
|
await registerTerminalWatchCommand(bot, sharedCommandOpts);
|
|
1111
1111
|
|
|
1112
|
+
// Issue #1745: hidden /tokens command for chat owners (private DMs only,
|
|
1113
|
+
// undocumented, masked output). Lets operators audit which local tokens are
|
|
1114
|
+
// live in the bot's environment so they can search for accidental leaks.
|
|
1115
|
+
const { registerTokensCommand } = await import('./telegram-tokens-command.lib.mjs');
|
|
1116
|
+
registerTokensCommand(bot, { ...sharedCommandOpts, allowedChats });
|
|
1117
|
+
|
|
1118
|
+
// Issue #1745: register the leak-warning DM hook. The interactive bridge
|
|
1119
|
+
// fires reportInteractiveLeak() whenever it has to mask a known-local token
|
|
1120
|
+
// in an outbound PR comment. We DM every operator (chat creator) of every
|
|
1121
|
+
// allowlisted chat so at least one of them sees it quickly.
|
|
1122
|
+
const { registerLeakNotifier } = await import('./telegram-leak-notifier.lib.mjs');
|
|
1123
|
+
registerLeakNotifier(async ({ owner, repo, prNumber, tokenHits = [] }) => {
|
|
1124
|
+
if (!allowedChats || allowedChats.length === 0) return;
|
|
1125
|
+
const where = prNumber ? `${owner}/${repo}#${prNumber}` : `${owner}/${repo}`;
|
|
1126
|
+
const sources = tokenHits.length ? tokenHits.map(h => `${h.name} (${h.source})`).join(', ') : 'unknown';
|
|
1127
|
+
const text = `🚨 *Token-leak event*\n\nA known local token was about to be published in *${where}* and was masked by the sanitizer just in time.\n\nTokens detected: ${sources}\n\nRotate the affected secret(s) now and check public surfaces (GitHub comments, gists, Slack) for any prior copies.`;
|
|
1128
|
+
for (const chatId of allowedChats) {
|
|
1129
|
+
try {
|
|
1130
|
+
const member = await bot.telegram.getChatMember(chatId, chatId).catch(() => null);
|
|
1131
|
+
// For groups, getChatMember(chatId, chatId) returns the chat itself; we
|
|
1132
|
+
// really want the creator. Fall back to getChatAdministrators.
|
|
1133
|
+
let ownerUserId = null;
|
|
1134
|
+
if (member && member.status === 'creator' && member.user?.id) {
|
|
1135
|
+
ownerUserId = member.user.id;
|
|
1136
|
+
} else {
|
|
1137
|
+
const admins = await bot.telegram.getChatAdministrators(chatId).catch(() => []);
|
|
1138
|
+
const creator = (admins || []).find(a => a.status === 'creator');
|
|
1139
|
+
if (creator && creator.user?.id) ownerUserId = creator.user.id;
|
|
1140
|
+
}
|
|
1141
|
+
if (ownerUserId) {
|
|
1142
|
+
await bot.telegram.sendMessage(ownerUserId, text, { parse_mode: 'Markdown' }).catch(err => {
|
|
1143
|
+
console.warn(`[telegram-leak-notifier] DM to user ${ownerUserId} (chat ${chatId}) failed: ${err.message}`);
|
|
1144
|
+
});
|
|
1145
|
+
}
|
|
1146
|
+
} catch (err) {
|
|
1147
|
+
console.warn(`[telegram-leak-notifier] could not notify owner of chat ${chatId}: ${err.message}`);
|
|
1148
|
+
}
|
|
1149
|
+
}
|
|
1150
|
+
});
|
|
1151
|
+
|
|
1112
1152
|
// Add message listener for verbose debugging
|
|
1113
1153
|
if (VERBOSE) {
|
|
1114
1154
|
bot.on('message', (ctx, next) => {
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Telegram leak-notifier (Issue #1745)
|
|
4
|
+
*
|
|
5
|
+
* The interactive AI bridge calls `reportInteractiveLeak()` whenever it
|
|
6
|
+
* detects that a comment body it was about to publish contained a
|
|
7
|
+
* known-local token. The sanitizer masks the token before it goes out, but
|
|
8
|
+
* we still want the chat owner who started the session to know — quickly,
|
|
9
|
+
* out-of-band — so they can rotate the token immediately.
|
|
10
|
+
*
|
|
11
|
+
* The Telegram bot calls `registerLeakNotifier()` on startup with a callback
|
|
12
|
+
* that knows how to DM the chat owner. We keep this contract intentionally
|
|
13
|
+
* small (callback-based, no direct telegraf import) so:
|
|
14
|
+
*
|
|
15
|
+
* 1. interactive-mode.lib.mjs doesn't have to depend on telegraf at all
|
|
16
|
+
* (avoids a heavy import in the AI subprocess).
|
|
17
|
+
* 2. Tests can register a no-op (or assertion-collecting) notifier.
|
|
18
|
+
* 3. solve.mjs running outside the Telegram bot process degrades gracefully
|
|
19
|
+
* to a console warning.
|
|
20
|
+
*
|
|
21
|
+
* @see docs/case-studies/issue-1745/analysis.md
|
|
22
|
+
* @module telegram-leak-notifier
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
let registeredNotifier = null;
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Telegram bot calls this once during startup so the AI bridge has a way
|
|
29
|
+
* to send out-of-band leak warnings.
|
|
30
|
+
*
|
|
31
|
+
* @param {Function} notifier async ({ owner, repo, prNumber, tokenHits }) => void
|
|
32
|
+
*/
|
|
33
|
+
export const registerLeakNotifier = notifier => {
|
|
34
|
+
registeredNotifier = typeof notifier === 'function' ? notifier : null;
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
/** Test hook — clear the registered notifier between tests. */
|
|
38
|
+
export const clearLeakNotifierForTests = () => {
|
|
39
|
+
registeredNotifier = null;
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Issue #1745 — fired by interactive-mode.lib.mjs when it had to mask a
|
|
44
|
+
* known-local token in an outbound comment.
|
|
45
|
+
*
|
|
46
|
+
* Always succeeds. If no notifier is registered (we're running outside the
|
|
47
|
+
* Telegram bot process) it falls back to a structured console warning.
|
|
48
|
+
*
|
|
49
|
+
* @param {Object} params
|
|
50
|
+
* @param {string} params.owner repo owner
|
|
51
|
+
* @param {string} params.repo repo name
|
|
52
|
+
* @param {number} [params.prNumber] pull-request number, when applicable
|
|
53
|
+
* @param {Array<{name: string, source: string}>} [params.tokenHits]
|
|
54
|
+
* list of token identifiers (NEVER the values) that were detected.
|
|
55
|
+
* @param {Function} [params.log] async logger from interactive-mode
|
|
56
|
+
*/
|
|
57
|
+
export const reportInteractiveLeak = async ({ owner, repo, prNumber, tokenHits = [], log } = {}) => {
|
|
58
|
+
const fallbackLog = log || (async msg => console.warn(msg));
|
|
59
|
+
|
|
60
|
+
const summary = tokenHits.length ? tokenHits.map(h => `${h.name} (${h.source})`).join(', ') : 'unknown';
|
|
61
|
+
|
|
62
|
+
const where = prNumber ? `${owner}/${repo}#${prNumber}` : `${owner}/${repo}`;
|
|
63
|
+
|
|
64
|
+
await fallbackLog(`🚨 Token-leak event: ${summary} found in outbound comment for ${where} (sanitizer masked it).`);
|
|
65
|
+
|
|
66
|
+
if (registeredNotifier) {
|
|
67
|
+
try {
|
|
68
|
+
await registeredNotifier({ owner, repo, prNumber, tokenHits });
|
|
69
|
+
} catch (err) {
|
|
70
|
+
await fallbackLog(`⚠️ Telegram leak notifier threw: ${err.message}`);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
};
|
|
74
|
+
|
|
75
|
+
export default {
|
|
76
|
+
registerLeakNotifier,
|
|
77
|
+
reportInteractiveLeak,
|
|
78
|
+
clearLeakNotifierForTests,
|
|
79
|
+
};
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Telegram /tokens command — hidden, owner-only, private-chat only.
|
|
4
|
+
*
|
|
5
|
+
* Lists every known LOCAL token the bot can see (env vars + GitHub CLI
|
|
6
|
+
* tokens), already masked via `maskToken` (3-char prefix/suffix per
|
|
7
|
+
* issue #1745). Useful for spot-checking which secrets are live in the
|
|
8
|
+
* bot's environment so the operator can search for them in public places
|
|
9
|
+
* before they become a leak.
|
|
10
|
+
*
|
|
11
|
+
* Privacy / safety guarantees:
|
|
12
|
+
*
|
|
13
|
+
* - Hidden command. Not advertised in /help. Not part of the BotFather
|
|
14
|
+
* command list.
|
|
15
|
+
* - Private-chat only. Never echoes tokens (even masked) into a group chat.
|
|
16
|
+
* - Authenticated. The user must own (`status === 'creator'`) at least one
|
|
17
|
+
* chat that is on the allowlist — i.e. they're an actual operator of
|
|
18
|
+
* this bot, not a random DMer.
|
|
19
|
+
* - Output is always masked. We never print raw values.
|
|
20
|
+
*
|
|
21
|
+
* @see https://github.com/link-assistant/hive-mind/issues/1745
|
|
22
|
+
* @module telegram-tokens-command
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
import { getAllKnownLocalTokens } from './token-sanitization.lib.mjs';
|
|
26
|
+
import { maskToken } from './lib.mjs';
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Resolve allowed chat IDs into an array of numeric IDs the user could own.
|
|
30
|
+
* Accepts:
|
|
31
|
+
* - Array<number|string>
|
|
32
|
+
* - Function returning Array<number|string>
|
|
33
|
+
* - undefined / null (treated as "any" — useful in private bot deployments)
|
|
34
|
+
*
|
|
35
|
+
* @param {Array|Function|null|undefined} allowedChats
|
|
36
|
+
* @returns {Array<string>} numeric chat IDs as strings
|
|
37
|
+
*/
|
|
38
|
+
const resolveAllowedChatIds = allowedChats => {
|
|
39
|
+
if (!allowedChats) return [];
|
|
40
|
+
const raw = typeof allowedChats === 'function' ? allowedChats() : allowedChats;
|
|
41
|
+
if (!Array.isArray(raw)) return [];
|
|
42
|
+
return raw.map(v => String(v)).filter(Boolean);
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Returns true if `userId` is the creator of any chat in `allowedChatIds`.
|
|
47
|
+
* Returns true unconditionally when `allowedChatIds` is empty (private
|
|
48
|
+
* deployment — no allowlist means any DM is fine).
|
|
49
|
+
*/
|
|
50
|
+
const isOperatorOfAnyAllowedChat = async ({ telegram, userId, allowedChatIds }) => {
|
|
51
|
+
if (!allowedChatIds || allowedChatIds.length === 0) {
|
|
52
|
+
return true;
|
|
53
|
+
}
|
|
54
|
+
for (const chatId of allowedChatIds) {
|
|
55
|
+
try {
|
|
56
|
+
const member = await telegram.getChatMember(chatId, userId);
|
|
57
|
+
if (member && member.status === 'creator') {
|
|
58
|
+
return true;
|
|
59
|
+
}
|
|
60
|
+
} catch {
|
|
61
|
+
// Bot may have been removed from the chat; skip and try the next one.
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
return false;
|
|
65
|
+
};
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Format the token list for display. Each line: `name (source): masked`.
|
|
69
|
+
* The masked form is `first-3 *** last-3` per maskToken's new default.
|
|
70
|
+
*/
|
|
71
|
+
export const formatTokenList = tokens => {
|
|
72
|
+
if (!tokens || tokens.length === 0) {
|
|
73
|
+
return 'No known local tokens found in this bot process.';
|
|
74
|
+
}
|
|
75
|
+
const lines = tokens.map(t => {
|
|
76
|
+
const masked = maskToken(t.value);
|
|
77
|
+
return `• ${t.name} (${t.source}): \`${masked}\``;
|
|
78
|
+
});
|
|
79
|
+
return ['🔐 *Active local tokens (masked):*', '', ...lines, '', '_Use this list to search public places (GitHub, Slack, etc.) for accidentally leaked tokens before they become a problem. Tokens are masked with first 3 + last 3 characters per issue #1745._'].join('\n');
|
|
80
|
+
};
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Registers the hidden /tokens command on the bot.
|
|
84
|
+
*
|
|
85
|
+
* @param {Object} bot - Telegraf bot
|
|
86
|
+
* @param {Object} options
|
|
87
|
+
* @param {boolean} [options.VERBOSE]
|
|
88
|
+
* @param {Function} [options.isOldMessage]
|
|
89
|
+
* @param {Array|Function} [options.allowedChats] — used for owner-of-allowed-chat check
|
|
90
|
+
* @param {Function} [options.fetchTokens] — test override for getAllKnownLocalTokens
|
|
91
|
+
*/
|
|
92
|
+
export const registerTokensCommand = (bot, options = {}) => {
|
|
93
|
+
const { VERBOSE = false, isOldMessage, allowedChats } = options;
|
|
94
|
+
const fetchTokens = options.fetchTokens || getAllKnownLocalTokens;
|
|
95
|
+
|
|
96
|
+
bot.command('tokens', async ctx => {
|
|
97
|
+
if (isOldMessage && isOldMessage(ctx)) {
|
|
98
|
+
VERBOSE && console.log('[VERBOSE] /tokens ignored: old message');
|
|
99
|
+
return;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
const chat = ctx.chat;
|
|
103
|
+
if (!chat || !ctx.from) return;
|
|
104
|
+
|
|
105
|
+
// Step 1: private-chat only. Silently no-op in groups so the command stays
|
|
106
|
+
// truly hidden — a curious group member never gets a hint that it exists.
|
|
107
|
+
if (chat.type !== 'private') {
|
|
108
|
+
VERBOSE && console.log(`[VERBOSE] /tokens ignored: chat type ${chat.type} (private only)`);
|
|
109
|
+
return;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Step 2: authenticate by ownership of an allowlisted chat.
|
|
113
|
+
const allowedChatIds = resolveAllowedChatIds(allowedChats);
|
|
114
|
+
let isOperator = false;
|
|
115
|
+
try {
|
|
116
|
+
isOperator = await isOperatorOfAnyAllowedChat({
|
|
117
|
+
telegram: ctx.telegram,
|
|
118
|
+
userId: ctx.from.id,
|
|
119
|
+
allowedChatIds,
|
|
120
|
+
});
|
|
121
|
+
} catch (err) {
|
|
122
|
+
VERBOSE && console.error('[VERBOSE] /tokens auth check failed:', err);
|
|
123
|
+
isOperator = false;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
if (!isOperator) {
|
|
127
|
+
VERBOSE && console.log(`[VERBOSE] /tokens denied: user ${ctx.from.id} is not creator of any allowed chat`);
|
|
128
|
+
// Reply with a generic "unknown command"-shaped message so the command
|
|
129
|
+
// stays undiscoverable to non-operators.
|
|
130
|
+
return;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// Step 3: gather and emit.
|
|
134
|
+
let tokens;
|
|
135
|
+
try {
|
|
136
|
+
tokens = await fetchTokens();
|
|
137
|
+
} catch (err) {
|
|
138
|
+
VERBOSE && console.error('[VERBOSE] /tokens: fetchTokens failed:', err);
|
|
139
|
+
await ctx.reply('❌ Failed to gather local tokens.');
|
|
140
|
+
return;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
const message = formatTokenList(tokens);
|
|
144
|
+
await ctx.reply(message, { parse_mode: 'Markdown' });
|
|
145
|
+
});
|
|
146
|
+
};
|
|
147
|
+
|
|
148
|
+
export default {
|
|
149
|
+
registerTokensCommand,
|
|
150
|
+
formatTokenList,
|
|
151
|
+
};
|
|
@@ -28,6 +28,61 @@ const getFsModule = async () => (await import('fs')).promises;
|
|
|
28
28
|
let secretlintCore = null;
|
|
29
29
|
let secretlintConfig = null;
|
|
30
30
|
|
|
31
|
+
// Issue #1745: process-wide counters for how many tokens were masked. The
|
|
32
|
+
// final-summary path (solve.mjs / hive.mjs) reads these to print a one-line
|
|
33
|
+
// "we masked N secrets — pass --dangerously-skip-output-sanitization to skip"
|
|
34
|
+
// note when N > 0. Counters are intentionally simple (numbers, not arrays of
|
|
35
|
+
// values) so we never accidentally retain raw tokens for any longer than the
|
|
36
|
+
// masking pass itself.
|
|
37
|
+
const sanitizationStats = {
|
|
38
|
+
totalMasked: 0,
|
|
39
|
+
knownTokenMasks: 0,
|
|
40
|
+
patternMasks: 0,
|
|
41
|
+
hexMasks: 0,
|
|
42
|
+
excluded: 0,
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Read process-wide sanitization counters. Pure read; never mutates.
|
|
47
|
+
* @returns {{totalMasked:number, knownTokenMasks:number, patternMasks:number, hexMasks:number, excluded:number}}
|
|
48
|
+
*/
|
|
49
|
+
export const getSanitizationStats = () => ({ ...sanitizationStats });
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Reset process-wide counters. Tests use this between cases. Production code
|
|
53
|
+
* has no reason to reset mid-run.
|
|
54
|
+
*/
|
|
55
|
+
export const resetSanitizationStats = () => {
|
|
56
|
+
sanitizationStats.totalMasked = 0;
|
|
57
|
+
sanitizationStats.knownTokenMasks = 0;
|
|
58
|
+
sanitizationStats.patternMasks = 0;
|
|
59
|
+
sanitizationStats.hexMasks = 0;
|
|
60
|
+
sanitizationStats.excluded = 0;
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Format a one-line operator-facing summary describing how many tokens were
|
|
65
|
+
* masked during this run, plus the dangerously-skip note required by the
|
|
66
|
+
* issue when the count is > 0. Returns an empty string if nothing was masked,
|
|
67
|
+
* so the caller can simply check truthiness before logging.
|
|
68
|
+
*
|
|
69
|
+
* @param {Object} [stats] override stats (defaults to module counters)
|
|
70
|
+
* @returns {string}
|
|
71
|
+
*/
|
|
72
|
+
export const formatSanitizationSummary = (stats = sanitizationStats) => {
|
|
73
|
+
const { totalMasked = 0, knownTokenMasks = 0, patternMasks = 0, hexMasks = 0, excluded = 0 } = stats;
|
|
74
|
+
if (totalMasked <= 0 && excluded <= 0) return '';
|
|
75
|
+
const breakdown = [`known-local: ${knownTokenMasks}`, `pattern: ${patternMasks}`, `hex: ${hexMasks}`].join(', ');
|
|
76
|
+
const lines = [`🔒 Output sanitization: masked ${totalMasked} token(s) (${breakdown}) before publishing.`];
|
|
77
|
+
if (excluded > 0) {
|
|
78
|
+
lines.push(` ↳ left ${excluded} pre-existing token(s) untouched (user-provided content carve-out).`);
|
|
79
|
+
}
|
|
80
|
+
if (totalMasked > 0) {
|
|
81
|
+
lines.push(' ↳ Pass --dangerously-skip-output-sanitization if this blocks your workflow (active local tokens stay masked unless --dangerously-skip-active-tokens-output-sanitization is also set).');
|
|
82
|
+
}
|
|
83
|
+
return lines.join('\n');
|
|
84
|
+
};
|
|
85
|
+
|
|
31
86
|
/**
|
|
32
87
|
* Initialize secretlint modules lazily
|
|
33
88
|
* @returns {Promise<boolean>} True if secretlint is available
|
|
@@ -399,20 +454,25 @@ const compareDetectionResults = async (secretlintSecrets, customSecrets) => {
|
|
|
399
454
|
};
|
|
400
455
|
|
|
401
456
|
/**
|
|
402
|
-
* Sanitize
|
|
457
|
+
* Sanitize arbitrary outbound output by masking sensitive tokens while avoiding false positives
|
|
403
458
|
* Uses DUAL APPROACH: Both secretlint AND custom patterns run independently
|
|
404
459
|
*
|
|
405
460
|
* If only secretlint detects a secret (but our custom patterns miss it),
|
|
406
461
|
* a warning is logged so we can improve our patterns.
|
|
407
462
|
*
|
|
408
|
-
* @param {string}
|
|
463
|
+
* @param {string} output - The output to sanitize
|
|
409
464
|
* @param {Object} options - Optional configuration
|
|
410
465
|
* @param {boolean} options.warnOnMismatch - Log warnings when detection approaches differ (default: true in verbose mode)
|
|
411
|
-
* @
|
|
466
|
+
* @param {boolean} options.skipOutputSanitization - Skip pattern-based output sanitization. Does not skip known active-token masking.
|
|
467
|
+
* @param {boolean} options.skipActiveTokensOutputSanitization - Also skip known active-token masking. Dangerous; intended only for explicit debugging.
|
|
468
|
+
* @param {Array<string>} options.excludeTokens - Issue #1745 carve-out: token VALUES that were already in user-provided content (issue body, non-bot comments, pre-existing code). These will be left untouched and counted in `excluded` stats so we don't shock users by mangling tokens they typed themselves.
|
|
469
|
+
* @returns {Promise<string>} Sanitized output with tokens masked
|
|
412
470
|
*/
|
|
413
|
-
export const
|
|
414
|
-
let sanitized =
|
|
415
|
-
const { warnOnMismatch = global.verboseMode } = options;
|
|
471
|
+
export const sanitizeOutput = async (output, options = {}) => {
|
|
472
|
+
let sanitized = output;
|
|
473
|
+
const { warnOnMismatch = global.verboseMode, skipOutputSanitization = false, skipActiveTokensOutputSanitization = false, excludeTokens = [] } = options;
|
|
474
|
+
const excludedSet = new Set((excludeTokens || []).filter(t => typeof t === 'string' && t.length > 0));
|
|
475
|
+
const isExcluded = token => excludedSet.has(token);
|
|
416
476
|
|
|
417
477
|
// Statistics for dual approach
|
|
418
478
|
const stats = {
|
|
@@ -424,20 +484,34 @@ export const sanitizeLogContent = async (logContent, options = {}) => {
|
|
|
424
484
|
};
|
|
425
485
|
|
|
426
486
|
try {
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
487
|
+
if (!skipActiveTokensOutputSanitization) {
|
|
488
|
+
// Step 1: Get known tokens from files and commands
|
|
489
|
+
const fileTokens = await getGitHubTokensFromFiles();
|
|
490
|
+
const commandTokens = await getGitHubTokensFromCommand();
|
|
491
|
+
const allKnownTokens = [...new Set([...fileTokens, ...commandTokens])];
|
|
492
|
+
|
|
493
|
+
// Mask known tokens first
|
|
494
|
+
for (const token of allKnownTokens) {
|
|
495
|
+
if (token && token.length >= 12) {
|
|
496
|
+
if (isExcluded(token)) {
|
|
497
|
+
sanitizationStats.excluded++;
|
|
498
|
+
continue;
|
|
499
|
+
}
|
|
500
|
+
if (sanitized.includes(token)) {
|
|
501
|
+
const maskedToken = maskToken(token);
|
|
502
|
+
sanitized = sanitized.split(token).join(maskedToken);
|
|
503
|
+
stats.knownTokens++;
|
|
504
|
+
sanitizationStats.knownTokenMasks++;
|
|
505
|
+
sanitizationStats.totalMasked++;
|
|
506
|
+
}
|
|
507
|
+
}
|
|
438
508
|
}
|
|
439
509
|
}
|
|
440
510
|
|
|
511
|
+
if (skipOutputSanitization) {
|
|
512
|
+
return sanitized;
|
|
513
|
+
}
|
|
514
|
+
|
|
441
515
|
// Step 2: DUAL APPROACH - Run both detection methods independently
|
|
442
516
|
const [secretlintSecrets, customSecrets] = await Promise.all([detectSecretsWithSecretlint(sanitized), Promise.resolve(detectSecretsWithCustomPatterns(sanitized))]);
|
|
443
517
|
|
|
@@ -491,8 +565,14 @@ export const sanitizeLogContent = async (logContent, options = {}) => {
|
|
|
491
565
|
// Verify the token is still in the content at the expected position
|
|
492
566
|
const currentToken = sanitized.substring(start, end);
|
|
493
567
|
if (currentToken === token) {
|
|
568
|
+
if (isExcluded(token)) {
|
|
569
|
+
sanitizationStats.excluded++;
|
|
570
|
+
continue;
|
|
571
|
+
}
|
|
494
572
|
const masked = maskToken(token);
|
|
495
573
|
sanitized = sanitized.substring(0, start) + masked + sanitized.substring(end);
|
|
574
|
+
sanitizationStats.patternMasks++;
|
|
575
|
+
sanitizationStats.totalMasked++;
|
|
496
576
|
}
|
|
497
577
|
}
|
|
498
578
|
|
|
@@ -516,13 +596,21 @@ export const sanitizeLogContent = async (logContent, options = {}) => {
|
|
|
516
596
|
|
|
517
597
|
// Only mask if NOT in a safe git/gist context
|
|
518
598
|
if (!isHexInSafeContext(tempContent, token, position)) {
|
|
599
|
+
if (isExcluded(token)) {
|
|
600
|
+
sanitizationStats.excluded++;
|
|
601
|
+
continue;
|
|
602
|
+
}
|
|
519
603
|
hexReplacements.push({ token, masked: maskToken(token) });
|
|
520
604
|
}
|
|
521
605
|
}
|
|
522
606
|
|
|
523
607
|
// Second pass: apply replacements
|
|
524
608
|
for (const { token, masked } of hexReplacements) {
|
|
525
|
-
|
|
609
|
+
if (sanitized.includes(token)) {
|
|
610
|
+
sanitized = sanitized.split(token).join(masked);
|
|
611
|
+
sanitizationStats.hexMasks++;
|
|
612
|
+
sanitizationStats.totalMasked++;
|
|
613
|
+
}
|
|
526
614
|
}
|
|
527
615
|
|
|
528
616
|
// Summary logging
|
|
@@ -558,14 +646,263 @@ export const sanitizeLogContent = async (logContent, options = {}) => {
|
|
|
558
646
|
// Export detection functions for testing and visibility
|
|
559
647
|
export { detectSecretsWithSecretlint, detectSecretsWithCustomPatterns, compareDetectionResults };
|
|
560
648
|
|
|
649
|
+
/**
|
|
650
|
+
* Backward-compatible alias for older log-specific call sites.
|
|
651
|
+
* New output paths should call sanitizeOutput().
|
|
652
|
+
*/
|
|
653
|
+
export const sanitizeLogContent = sanitizeOutput;
|
|
654
|
+
|
|
655
|
+
// ============================================================================
|
|
656
|
+
// Issue #1745 — known-local-token registry
|
|
657
|
+
// ============================================================================
|
|
658
|
+
// We mask all known LOCAL tokens (env vars + tokens we discovered via gh/etc.)
|
|
659
|
+
// even when our regex/secretlint patterns miss them. This is the
|
|
660
|
+
// "defense-in-depth" layer for the leak documented in case-studies/issue-1745.
|
|
661
|
+
// ============================================================================
|
|
662
|
+
|
|
663
|
+
/**
|
|
664
|
+
* Names of environment variables that hold local tokens. Order is irrelevant
|
|
665
|
+
* but we list AI-CLI tools first since those are the most common leak vectors
|
|
666
|
+
* (claude, codex, opencode, gemini, qwen + telegram + gh).
|
|
667
|
+
*
|
|
668
|
+
* Adding a name here means: any process.env value at this key will be masked
|
|
669
|
+
* in every comment body / log line the bridge emits.
|
|
670
|
+
*/
|
|
671
|
+
export const KNOWN_LOCAL_TOKEN_ENV_VARS = Object.freeze([
|
|
672
|
+
// Telegram bridge
|
|
673
|
+
'TELEGRAM_BOT_TOKEN',
|
|
674
|
+
'TELEGRAM_OWNER_CHAT_ID',
|
|
675
|
+
// GitHub CLI / API
|
|
676
|
+
'GH_TOKEN',
|
|
677
|
+
'GITHUB_TOKEN',
|
|
678
|
+
'GITHUB_PAT',
|
|
679
|
+
// Claude / Anthropic
|
|
680
|
+
'ANTHROPIC_API_KEY',
|
|
681
|
+
'CLAUDE_API_KEY',
|
|
682
|
+
'CLAUDE_CODE_OAUTH_TOKEN',
|
|
683
|
+
// OpenAI / Codex
|
|
684
|
+
'OPENAI_API_KEY',
|
|
685
|
+
'CODEX_API_KEY',
|
|
686
|
+
// Open-source agent CLIs
|
|
687
|
+
'OPENCODE_API_KEY',
|
|
688
|
+
'AGENT_CLI_TOKEN',
|
|
689
|
+
// Google Gemini / Qwen
|
|
690
|
+
'GEMINI_API_KEY',
|
|
691
|
+
'GOOGLE_API_KEY',
|
|
692
|
+
'QWEN_API_KEY',
|
|
693
|
+
'DASHSCOPE_API_KEY',
|
|
694
|
+
// Misc
|
|
695
|
+
'HUGGINGFACE_TOKEN',
|
|
696
|
+
'HF_TOKEN',
|
|
697
|
+
]);
|
|
698
|
+
|
|
699
|
+
/**
|
|
700
|
+
* Read every known local-token env var that is currently set.
|
|
701
|
+
*
|
|
702
|
+
* @returns {Array<{name: string, value: string}>} entries with non-empty values
|
|
703
|
+
*/
|
|
704
|
+
export const getEnvironmentTokens = () => {
|
|
705
|
+
const out = [];
|
|
706
|
+
for (const name of KNOWN_LOCAL_TOKEN_ENV_VARS) {
|
|
707
|
+
const value = process.env[name];
|
|
708
|
+
if (typeof value === 'string' && value.length >= 12) {
|
|
709
|
+
out.push({ name, value });
|
|
710
|
+
}
|
|
711
|
+
}
|
|
712
|
+
return out;
|
|
713
|
+
};
|
|
714
|
+
|
|
715
|
+
/**
|
|
716
|
+
* Build the union of every known-local token: env vars + GitHub tokens we
|
|
717
|
+
* already discover via `gh auth status` / hosts.yml (existing helpers).
|
|
718
|
+
*
|
|
719
|
+
* Each entry is `{ source, name, value }` where `source` is 'env' | 'gh-files'
|
|
720
|
+
* | 'gh-command'. The `name` field is human-readable for debug logs but is
|
|
721
|
+
* NEVER printed alongside the token to avoid creating a secondary leak.
|
|
722
|
+
*
|
|
723
|
+
* @returns {Promise<Array<{source: string, name: string, value: string}>>}
|
|
724
|
+
*/
|
|
725
|
+
export const getAllKnownLocalTokens = async () => {
|
|
726
|
+
const tokens = [];
|
|
727
|
+
|
|
728
|
+
for (const { name, value } of getEnvironmentTokens()) {
|
|
729
|
+
tokens.push({ source: 'env', name, value });
|
|
730
|
+
}
|
|
731
|
+
|
|
732
|
+
try {
|
|
733
|
+
const fileTokens = await getGitHubTokensFromFiles();
|
|
734
|
+
for (const value of fileTokens) {
|
|
735
|
+
tokens.push({ source: 'gh-files', name: 'github', value });
|
|
736
|
+
}
|
|
737
|
+
} catch {
|
|
738
|
+
/* swallow — best-effort */
|
|
739
|
+
}
|
|
740
|
+
|
|
741
|
+
try {
|
|
742
|
+
const commandTokens = await getGitHubTokensFromCommand();
|
|
743
|
+
for (const value of commandTokens) {
|
|
744
|
+
tokens.push({ source: 'gh-command', name: 'github', value });
|
|
745
|
+
}
|
|
746
|
+
} catch {
|
|
747
|
+
/* swallow — best-effort */
|
|
748
|
+
}
|
|
749
|
+
|
|
750
|
+
// Deduplicate by exact value
|
|
751
|
+
const seen = new Set();
|
|
752
|
+
return tokens.filter(({ value }) => {
|
|
753
|
+
if (seen.has(value)) return false;
|
|
754
|
+
seen.add(value);
|
|
755
|
+
return true;
|
|
756
|
+
});
|
|
757
|
+
};
|
|
758
|
+
|
|
759
|
+
/**
|
|
760
|
+
* Test whether `text` contains any known-local token verbatim.
|
|
761
|
+
* Used to decide whether to fire the Telegram leak-warning DM.
|
|
762
|
+
*
|
|
763
|
+
* @param {string} text
|
|
764
|
+
* @param {Array<{value: string, name?: string, source?: string}>} [tokens]
|
|
765
|
+
* Pre-fetched token list (if you already called getAllKnownLocalTokens).
|
|
766
|
+
* Pass an explicit list to avoid re-running `gh auth status` per check.
|
|
767
|
+
* @returns {Promise<Array<{name: string, source: string}>>} list of token
|
|
768
|
+
* identifiers that were found in the text (NOT the values themselves).
|
|
769
|
+
*/
|
|
770
|
+
export const containsKnownToken = async (text, tokens) => {
|
|
771
|
+
if (typeof text !== 'string' || text.length === 0) return [];
|
|
772
|
+
const list = tokens || (await getAllKnownLocalTokens());
|
|
773
|
+
const hits = [];
|
|
774
|
+
for (const t of list) {
|
|
775
|
+
if (t.value && text.includes(t.value)) {
|
|
776
|
+
hits.push({ name: t.name, source: t.source });
|
|
777
|
+
}
|
|
778
|
+
}
|
|
779
|
+
return hits;
|
|
780
|
+
};
|
|
781
|
+
|
|
782
|
+
/**
|
|
783
|
+
* Mask every known-local token inside `body` and then run `sanitizeOutput`
|
|
784
|
+
* for the regex/secretlint sweep. This is the wrapper that comment-posting
|
|
785
|
+
* paths must call before publishing anything to GitHub.
|
|
786
|
+
*
|
|
787
|
+
* Env-token masking runs FIRST so that even if our regex misses the shape
|
|
788
|
+
* (custom token formats from new AI tools, etc.) the local secret never
|
|
789
|
+
* leaves the process. The regex/secretlint pass then catches anything else.
|
|
790
|
+
*
|
|
791
|
+
* @param {string} body
|
|
792
|
+
* @param {Object} [options]
|
|
793
|
+
* @param {Array<{value: string}>} [options.knownTokens] pre-fetched token list
|
|
794
|
+
* @returns {Promise<string>} sanitized body
|
|
795
|
+
*/
|
|
796
|
+
export const sanitizeCommentBody = async (body, options = {}) => {
|
|
797
|
+
if (typeof body !== 'string' || body.length === 0) return body;
|
|
798
|
+
|
|
799
|
+
let sanitized = body;
|
|
800
|
+
const excludedSet = new Set((options.excludeTokens || []).filter(t => typeof t === 'string' && t.length > 0));
|
|
801
|
+
|
|
802
|
+
// Pass 1: mask known-local tokens verbatim. This is the defense-in-depth
|
|
803
|
+
// layer that closes the gap from issue #1745.
|
|
804
|
+
if (!options.skipActiveTokensOutputSanitization) {
|
|
805
|
+
const knownTokens = options.knownTokens || (await getAllKnownLocalTokens());
|
|
806
|
+
for (const { value } of knownTokens) {
|
|
807
|
+
if (value && value.length >= 12 && sanitized.includes(value)) {
|
|
808
|
+
if (excludedSet.has(value)) {
|
|
809
|
+
sanitizationStats.excluded++;
|
|
810
|
+
continue;
|
|
811
|
+
}
|
|
812
|
+
sanitized = sanitized.split(value).join(maskToken(value));
|
|
813
|
+
sanitizationStats.knownTokenMasks++;
|
|
814
|
+
sanitizationStats.totalMasked++;
|
|
815
|
+
}
|
|
816
|
+
}
|
|
817
|
+
}
|
|
818
|
+
|
|
819
|
+
// Pass 2: regex + secretlint sweep for anything else.
|
|
820
|
+
sanitized = await sanitizeOutput(sanitized, {
|
|
821
|
+
warnOnMismatch: false,
|
|
822
|
+
skipOutputSanitization: options.skipOutputSanitization,
|
|
823
|
+
skipActiveTokensOutputSanitization: true,
|
|
824
|
+
excludeTokens: options.excludeTokens || [],
|
|
825
|
+
});
|
|
826
|
+
|
|
827
|
+
return sanitized;
|
|
828
|
+
};
|
|
829
|
+
|
|
830
|
+
/**
|
|
831
|
+
* Issue #1745 user-content carve-out helper.
|
|
832
|
+
*
|
|
833
|
+
* Comment #4364642786: "if issue description/comment/pull request comment from
|
|
834
|
+
* other users than our bot, contained access token, meaning access token was
|
|
835
|
+
* explicitly given, or access token was existing in code before, we don't
|
|
836
|
+
* touch it. That is not our responsibility by default."
|
|
837
|
+
*
|
|
838
|
+
* Given concatenated user-provided text (issue body, non-bot issue/PR
|
|
839
|
+
* comments, original code), this helper returns the token-shaped strings
|
|
840
|
+
* already present in that text. Callers pass this list as `excludeTokens`
|
|
841
|
+
* to `sanitizeOutput` / `sanitizeCommentBody` so the sanitizer leaves those
|
|
842
|
+
* tokens untouched.
|
|
843
|
+
*
|
|
844
|
+
* Active local tokens (env vars, gh CLI tokens) are NEVER returned even if
|
|
845
|
+
* they appear in user-provided content — the user couldn't have intended for
|
|
846
|
+
* us to leak our own bot tokens, so the carve-out doesn't apply to them.
|
|
847
|
+
*
|
|
848
|
+
* @param {string} text concatenated user-provided text
|
|
849
|
+
* @param {Object} [options]
|
|
850
|
+
* @param {Array<{value: string}>} [options.knownTokens] active local tokens to
|
|
851
|
+
* filter out of the carve-out (so the bot's own tokens still get masked
|
|
852
|
+
* even if the user pasted one verbatim).
|
|
853
|
+
* @returns {Promise<Array<string>>} token VALUES to exclude from sanitization
|
|
854
|
+
*/
|
|
855
|
+
export const extractTokensFromUserContent = async (text, options = {}) => {
|
|
856
|
+
if (typeof text !== 'string' || text.length === 0) return [];
|
|
857
|
+
|
|
858
|
+
const customSecrets = detectSecretsWithCustomPatterns(text);
|
|
859
|
+
const secretlintSecrets = await detectSecretsWithSecretlint(text);
|
|
860
|
+
|
|
861
|
+
const tokens = new Set();
|
|
862
|
+
for (const s of [...customSecrets, ...secretlintSecrets]) {
|
|
863
|
+
if (s.token && s.token.length >= 12) {
|
|
864
|
+
tokens.add(s.token);
|
|
865
|
+
}
|
|
866
|
+
}
|
|
867
|
+
|
|
868
|
+
// 40-char hex in user-provided text — only exclude when not in a safe
|
|
869
|
+
// git/gist context. We're conservative here: the carve-out only applies
|
|
870
|
+
// to things our regex would otherwise mask.
|
|
871
|
+
const hexPattern = /(?:^|[\s:=])([a-f0-9]{40})(?=[\s\n]|$)/gm;
|
|
872
|
+
hexPattern.lastIndex = 0;
|
|
873
|
+
let m;
|
|
874
|
+
while ((m = hexPattern.exec(text)) !== null) {
|
|
875
|
+
const token = m[1];
|
|
876
|
+
if (!isHexInSafeContext(text, token, m.index)) {
|
|
877
|
+
tokens.add(token);
|
|
878
|
+
}
|
|
879
|
+
}
|
|
880
|
+
|
|
881
|
+
// Filter out our own active local tokens. The user pasting our token in
|
|
882
|
+
// their issue body doesn't mean we should leak it — that's still our bot's
|
|
883
|
+
// secret and it stays masked.
|
|
884
|
+
const knownActive = new Set((options.knownTokens || []).map(t => t.value).filter(Boolean));
|
|
885
|
+
return [...tokens].filter(value => !knownActive.has(value));
|
|
886
|
+
};
|
|
887
|
+
|
|
561
888
|
// Default export for convenience
|
|
562
889
|
export default {
|
|
563
890
|
isSafeToken,
|
|
564
891
|
isHexInSafeContext,
|
|
565
892
|
getGitHubTokensFromFiles,
|
|
566
893
|
getGitHubTokensFromCommand,
|
|
894
|
+
sanitizeOutput,
|
|
567
895
|
sanitizeLogContent,
|
|
568
896
|
detectSecretsWithSecretlint,
|
|
569
897
|
detectSecretsWithCustomPatterns,
|
|
570
898
|
compareDetectionResults,
|
|
899
|
+
getEnvironmentTokens,
|
|
900
|
+
getAllKnownLocalTokens,
|
|
901
|
+
containsKnownToken,
|
|
902
|
+
sanitizeCommentBody,
|
|
903
|
+
getSanitizationStats,
|
|
904
|
+
resetSanitizationStats,
|
|
905
|
+
formatSanitizationSummary,
|
|
906
|
+
extractTokensFromUserContent,
|
|
907
|
+
KNOWN_LOCAL_TOKEN_ENV_VARS,
|
|
571
908
|
};
|
|
@@ -198,7 +198,7 @@ export const resetTrackedToolCommentIds = () => {
|
|
|
198
198
|
* @param {string} options.body
|
|
199
199
|
* @returns {Promise<{ok: boolean, commentId: string|null, stderr?: string}>}
|
|
200
200
|
*/
|
|
201
|
-
export const postTrackedComment = async ({ $, owner, repo, targetNumber, body }) => {
|
|
201
|
+
export const postTrackedComment = async ({ $, owner, repo, targetNumber, body, sanitizationOptions }) => {
|
|
202
202
|
if (!$) {
|
|
203
203
|
throw new Error('postTrackedComment requires a command-stream $ helper');
|
|
204
204
|
}
|
|
@@ -208,7 +208,11 @@ export const postTrackedComment = async ({ $, owner, repo, targetNumber, body })
|
|
|
208
208
|
// We use the /issues/<n>/comments endpoint because it works identically
|
|
209
209
|
// for both PRs and issues (a PR is an issue at this endpoint).
|
|
210
210
|
const apiPath = `repos/${owner}/${repo}/issues/${targetNumber}/comments`;
|
|
211
|
-
const
|
|
211
|
+
const { sanitizeOutput } = await import('./token-sanitization.lib.mjs');
|
|
212
|
+
// Issue #1745: caller may pass dangerous-skip flags + carve-out tokens.
|
|
213
|
+
// Defaults preserve fail-closed behavior: full sanitization.
|
|
214
|
+
const sanitizedBody = await sanitizeOutput(body, sanitizationOptions || {});
|
|
215
|
+
const payload = JSON.stringify({ body: sanitizedBody });
|
|
212
216
|
|
|
213
217
|
// command-stream's options key is `stdin`, not `input` — unknown keys are
|
|
214
218
|
// silently ignored, which previously left stdin inherited from the parent
|