@link-assistant/hive-mind 1.2.0 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,26 @@
1
1
  # @link-assistant/hive-mind
2
2
 
3
+ ## 1.2.2
4
+
5
+ ### Patch Changes
6
+
7
+ - db84104: Remove QEMU from CI/CD entirely
8
+ - Remove unnecessary QEMU and Docker Buildx setup from docker-pr-check job
9
+ - The PR check only builds for linux/amd64, so QEMU was never needed
10
+ - docker-publish jobs already use native ARM64 runners (ubuntu-24.04-arm)
11
+ - This addresses feedback to remove QEMU from CI/CD to avoid slowdowns and freezes
12
+
13
+ ## 1.2.1
14
+
15
+ ### Patch Changes
16
+
17
+ - 04cb3d2: Fix false positives in token masking for log sanitization
18
+ - Remove overly broad regex pattern that was matching legitimate identifiers like `browser_take_screenshot` and MCP tool names
19
+ - Add allowlist of safe token patterns (browser\_, mcp\_\_, function names with underscores, UUIDs)
20
+ - Add context-aware detection for 40-char hex strings to avoid masking git commit hashes and gist IDs
21
+ - Export new helper functions `isSafeToken` and `isHexInSafeContext` for testing
22
+ - Add comprehensive unit tests for false positive prevention
23
+
3
24
  ## 1.2.0
4
25
 
5
26
  ### Minor Changes
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@link-assistant/hive-mind",
3
- "version": "1.2.0",
3
+ "version": "1.2.2",
4
4
  "description": "AI-powered issue solver and hive mind for collaborative problem solving",
5
5
  "main": "src/hive.mjs",
6
6
  "type": "module",
@@ -61,8 +61,11 @@
61
61
  "prettier": "^3.6.2"
62
62
  },
63
63
  "dependencies": {
64
+ "@secretlint/core": "^11.2.5",
65
+ "@secretlint/secretlint-rule-preset-recommend": "^11.2.5",
64
66
  "@sentry/node": "^10.15.0",
65
- "@sentry/profiling-node": "^10.15.0"
67
+ "@sentry/profiling-node": "^10.15.0",
68
+ "secretlint": "^11.2.5"
66
69
  },
67
70
  "lint-staged": {
68
71
  "*.{js,mjs,json,md}": [
@@ -5,9 +5,6 @@
5
5
  if (typeof globalThis.use === 'undefined') {
6
6
  globalThis.use = (await eval(await (await fetch('https://unpkg.com/use-m/use.js')).text())).use;
7
7
  }
8
- const fs = (await use('fs')).promises;
9
- const os = (await use('os')).default;
10
- const path = (await use('path')).default;
11
8
  // Use command-stream for consistent $ behavior
12
9
  const { $ } = await use('command-stream');
13
10
  // Import log and maskToken from general lib
@@ -16,6 +13,10 @@ import { reportError } from './sentry.lib.mjs';
16
13
  import { githubLimits, timeouts } from './config.lib.mjs';
17
14
  // Import batch operations from separate module
18
15
  import { batchCheckPullRequestsForIssues as batchCheckPRs, batchCheckArchivedRepositories as batchCheckArchived } from './github.batch.lib.mjs';
16
+ // Import token sanitization from dedicated module (Issue #1037 fix)
17
+ import { isSafeToken, isHexInSafeContext, getGitHubTokensFromFiles, getGitHubTokensFromCommand, sanitizeLogContent } from './token-sanitization.lib.mjs';
18
+ // Re-export token sanitization functions for backward compatibility
19
+ export { isSafeToken, isHexInSafeContext, getGitHubTokensFromFiles, getGitHubTokensFromCommand, sanitizeLogContent };
19
20
  // Import log upload function from separate module
20
21
  import { uploadLogWithGhUploadLog } from './log-upload.lib.mjs';
21
22
 
@@ -60,139 +61,8 @@ const buildCostInfoString = (totalCostUSD, anthropicTotalCostUSD, pricingInfo) =
60
61
 
61
62
  // Helper function to mask GitHub tokens (alias for backward compatibility)
62
63
  export const maskGitHubToken = maskToken;
63
- // Helper function to get GitHub tokens from local config files
64
- export const getGitHubTokensFromFiles = async () => {
65
- const tokens = [];
66
-
67
- try {
68
- // Check ~/.config/gh/hosts.yml
69
- const hostsFile = path.join(os.homedir(), '.config/gh/hosts.yml');
70
- if (
71
- await fs
72
- .access(hostsFile)
73
- .then(() => true)
74
- .catch(() => false)
75
- ) {
76
- const hostsContent = await fs.readFile(hostsFile, 'utf8');
77
-
78
- // Look for oauth_token and api_token patterns
79
- const oauthMatches = hostsContent.match(/oauth_token:\s*([^\s\n]+)/g);
80
- if (oauthMatches) {
81
- for (const match of oauthMatches) {
82
- const token = match.split(':')[1].trim();
83
- if (token && !tokens.includes(token)) {
84
- tokens.push(token);
85
- }
86
- }
87
- }
88
-
89
- const apiMatches = hostsContent.match(/api_token:\s*([^\s\n]+)/g);
90
- if (apiMatches) {
91
- for (const match of apiMatches) {
92
- const token = match.split(':')[1].trim();
93
- if (token && !tokens.includes(token)) {
94
- tokens.push(token);
95
- }
96
- }
97
- }
98
- }
99
- } catch (error) {
100
- // File access errors are expected when config doesn't exist
101
- if (global.verboseMode) {
102
- reportError(error, {
103
- context: 'github_token_file_access',
104
- level: 'debug',
105
- });
106
- }
107
- }
108
-
109
- return tokens;
110
- };
111
- // Helper function to get GitHub tokens from gh command output
112
- export const getGitHubTokensFromCommand = async () => {
113
- const { $ } = await use('command-stream');
114
- const tokens = [];
115
-
116
- try {
117
- // Run gh auth status to get token info
118
- const authResult = await $`gh auth status 2>&1`.catch(() => ({ stdout: '', stderr: '' }));
119
- const authOutput = authResult.stdout?.toString() + authResult.stderr?.toString() || '';
120
-
121
- // Look for token patterns in the output
122
- const tokenPatterns = [/(?:token|oauth|api)[:\s]*([a-zA-Z0-9_]{20,})/gi, /gh[pou]_[a-zA-Z0-9_]{20,}/gi];
123
-
124
- for (const pattern of tokenPatterns) {
125
- const matches = authOutput.match(pattern);
126
- if (matches) {
127
- for (let match of matches) {
128
- // Clean up the match
129
- const token = match.replace(/^(?:token|oauth|api)[:\s]*/, '').trim();
130
- if (token && token.length >= 20 && !tokens.includes(token)) {
131
- tokens.push(token);
132
- }
133
- }
134
- }
135
- }
136
- } catch (error) {
137
- // Command errors are expected when gh is not configured
138
- if (global.verboseMode) {
139
- reportError(error, {
140
- context: 'github_token_gh_auth',
141
- level: 'debug',
142
- });
143
- }
144
- }
145
-
146
- return tokens;
147
- };
148
64
  // Escape ``` in logs for safe markdown embedding (replaces with \`\`\` to prevent code block closure)
149
65
  export const escapeCodeBlocksInLog = logContent => logContent.replace(/```/g, '\\`\\`\\`');
150
- // Helper function to sanitize log content by masking GitHub tokens
151
- export const sanitizeLogContent = async logContent => {
152
- let sanitized = logContent;
153
-
154
- try {
155
- // Get tokens from both sources
156
- const fileTokens = await getGitHubTokensFromFiles();
157
- const commandTokens = await getGitHubTokensFromCommand();
158
- const allTokens = [...new Set([...fileTokens, ...commandTokens])];
159
-
160
- // Mask each token found
161
- for (const token of allTokens) {
162
- if (token && token.length >= 12) {
163
- const maskedToken = maskToken(token);
164
- // Use global replace to mask all occurrences
165
- sanitized = sanitized.split(token).join(maskedToken);
166
- }
167
- }
168
-
169
- // Also look for and mask common GitHub token patterns directly in the log
170
- const tokenPatterns = [
171
- /gh[pou]_[a-zA-Z0-9_]{20,}/g,
172
- /(?:^|[\s:=])([a-f0-9]{40})(?=[\s\n]|$)/gm, // 40-char hex tokens (like personal access tokens)
173
- /(?:^|[\s:=])([a-zA-Z0-9_]{20,})(?=[\s\n]|$)/gm, // General long tokens
174
- ];
175
-
176
- for (const pattern of tokenPatterns) {
177
- sanitized = sanitized.replace(pattern, (match, token) => {
178
- if (token && token.length >= 20) {
179
- return match.replace(token, maskToken(token));
180
- }
181
- return match;
182
- });
183
- }
184
-
185
- await log(` 🔒 Sanitized ${allTokens.length} detected GitHub tokens in log content`, { verbose: true });
186
- } catch (error) {
187
- reportError(error, {
188
- context: 'sanitize_log_content',
189
- level: 'warning',
190
- });
191
- await log(` ⚠️ Warning: Could not fully sanitize log content: ${error.message}`, { verbose: true });
192
- }
193
-
194
- return sanitized;
195
- };
196
66
  // Helper function to check if a file exists in a GitHub branch
197
67
  export const checkFileInBranch = async (owner, repo, fileName, branchName) => {
198
68
  const { $ } = await use('command-stream');
@@ -1469,6 +1339,8 @@ export default {
1469
1339
  getGitHubTokensFromFiles,
1470
1340
  getGitHubTokensFromCommand,
1471
1341
  escapeCodeBlocksInLog,
1342
+ isSafeToken,
1343
+ isHexInSafeContext,
1472
1344
  sanitizeLogContent,
1473
1345
  checkFileInBranch,
1474
1346
  checkGitHubPermissions,
@@ -0,0 +1,563 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Token sanitization utilities for log content
4
+ * Dual approach: Uses both secretlint AND custom patterns for comprehensive coverage
5
+ *
6
+ * Architecture:
7
+ * 1. Custom patterns (our logic) - patterns we define and maintain
8
+ * 2. Secretlint patterns - battle-tested community patterns
9
+ *
10
+ * Both approaches run independently, and if only one detects a secret,
11
+ * a warning is logged (especially when secretlint finds something our logic misses).
12
+ * This helps us improve our custom patterns over time.
13
+ *
14
+ * @module token-sanitization
15
+ */
16
+
17
+ // Import shared utility from lib.mjs
18
+ import { maskToken, log } from './lib.mjs';
19
+ import { reportError } from './sentry.lib.mjs';
20
+
21
+ // Dynamic imports for runtime dependencies
22
+ const getOsModule = async () => (await import('os')).default;
23
+ const getPathModule = async () => (await import('path')).default;
24
+ const getFsModule = async () => (await import('fs')).promises;
25
+
26
+ // Lazy-loaded secretlint modules (initialized on first use)
27
+ let secretlintCore = null;
28
+ let secretlintConfig = null;
29
+
30
+ /**
31
+ * Initialize secretlint modules lazily
32
+ * @returns {Promise<boolean>} True if secretlint is available
33
+ */
34
+ const initSecretlint = async () => {
35
+ if (secretlintConfig !== null) {
36
+ return secretlintConfig !== false;
37
+ }
38
+
39
+ try {
40
+ const [core, preset] = await Promise.all([import('@secretlint/core'), import('@secretlint/secretlint-rule-preset-recommend')]);
41
+
42
+ secretlintCore = core;
43
+ secretlintConfig = {
44
+ rules: [
45
+ {
46
+ id: '@secretlint/secretlint-rule-preset-recommend',
47
+ rule: preset.creator,
48
+ },
49
+ ],
50
+ };
51
+
52
+ return true;
53
+ } catch (error) {
54
+ // secretlint not available - fall back to custom patterns only
55
+ if (global.verboseMode) {
56
+ await log(` ⚠️ Secretlint not available, using fallback patterns: ${error.message}`, { verbose: true });
57
+ }
58
+ secretlintConfig = false;
59
+ return false;
60
+ }
61
+ };
62
+
63
+ /**
64
+ * Patterns that indicate a string is NOT a sensitive token (false positive patterns)
65
+ * These are used to prevent masking legitimate identifiers
66
+ */
67
+ const SAFE_TOKEN_PATTERNS = [
68
+ // MCP tool names (Playwright, etc.)
69
+ /^mcp__[a-z_]+$/i,
70
+ // Browser/Playwright tool names
71
+ /^browser_[a-z_]+$/i,
72
+ // Common function/tool name patterns with underscores
73
+ /^[a-z]+_[a-z]+_[a-z_]+$/i,
74
+ // UUID patterns (not sensitive)
75
+ /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i,
76
+ ];
77
+
78
+ /**
79
+ * Context patterns that indicate the surrounding text is NOT a sensitive context
80
+ * These patterns help identify when a 40-char hex string is just a git commit hash
81
+ */
82
+ const SAFE_CONTEXT_PATTERNS = [
83
+ // Git commands containing commit hashes
84
+ /\bgh\s+gist\s+view\b/i,
85
+ /\bgit\s+(log|show|diff|cherry-pick|revert|checkout|reset)\b/i,
86
+ /\bgit\s+commit\s+-m\b/i,
87
+ // Commit SHA in common git output contexts
88
+ /\bcommit\s+[a-f0-9]{7,40}\b/i,
89
+ /\bSHA\s*:\s*[a-f0-9]{7,40}\b/i,
90
+ // Git log output format
91
+ /^commit\s+[a-f0-9]{40}/m,
92
+ // Short commit hashes in various contexts
93
+ /\b[a-f0-9]{7,40}\s+Author:/i,
94
+ ];
95
+
96
+ // Note: Custom token patterns are now defined in detectSecretsWithCustomPatterns()
97
+ // with named patterns for tracking and comparison with secretlint results.
98
+
99
+ /**
100
+ * Check if a token matches any safe pattern (not a sensitive token)
101
+ * @param {string} token - The token to check
102
+ * @returns {boolean} True if the token is safe and should NOT be masked
103
+ */
104
+ export const isSafeToken = token => {
105
+ if (!token) return false;
106
+ return SAFE_TOKEN_PATTERNS.some(pattern => pattern.test(token));
107
+ };
108
+
109
+ /**
110
+ * Check if a 40-char hex string appears in a safe context (like git commands)
111
+ * @param {string} content - The full content to search
112
+ * @param {string} hexString - The 40-char hex string found
113
+ * @param {number} position - The position where the hex string was found
114
+ * @returns {boolean} True if the hex string is in a safe context
115
+ */
116
+ export const isHexInSafeContext = (content, hexString, position) => {
117
+ // Get surrounding context (100 chars before and after)
118
+ const contextStart = Math.max(0, position - 100);
119
+ const contextEnd = Math.min(content.length, position + hexString.length + 100);
120
+ const context = content.substring(contextStart, contextEnd);
121
+
122
+ // Check if any safe context pattern matches
123
+ return SAFE_CONTEXT_PATTERNS.some(pattern => pattern.test(context));
124
+ };
125
+
126
+ /**
127
+ * Get GitHub tokens from local config files
128
+ * @returns {Promise<string[]>} Array of tokens found
129
+ */
130
+ export const getGitHubTokensFromFiles = async () => {
131
+ const os = await getOsModule();
132
+ const path = await getPathModule();
133
+ const fs = await getFsModule();
134
+ const tokens = [];
135
+
136
+ try {
137
+ // Check ~/.config/gh/hosts.yml
138
+ const hostsFile = path.join(os.homedir(), '.config/gh/hosts.yml');
139
+ if (
140
+ await fs
141
+ .access(hostsFile)
142
+ .then(() => true)
143
+ .catch(() => false)
144
+ ) {
145
+ const hostsContent = await fs.readFile(hostsFile, 'utf8');
146
+
147
+ // Look for oauth_token and api_token patterns
148
+ const oauthMatches = hostsContent.match(/oauth_token:\s*([^\s\n]+)/g);
149
+ if (oauthMatches) {
150
+ for (const match of oauthMatches) {
151
+ const token = match.split(':')[1].trim();
152
+ if (token && !tokens.includes(token)) {
153
+ tokens.push(token);
154
+ }
155
+ }
156
+ }
157
+
158
+ const apiMatches = hostsContent.match(/api_token:\s*([^\s\n]+)/g);
159
+ if (apiMatches) {
160
+ for (const match of apiMatches) {
161
+ const token = match.split(':')[1].trim();
162
+ if (token && !tokens.includes(token)) {
163
+ tokens.push(token);
164
+ }
165
+ }
166
+ }
167
+ }
168
+ } catch (error) {
169
+ // File access errors are expected when config doesn't exist
170
+ if (global.verboseMode) {
171
+ reportError(error, {
172
+ context: 'github_token_file_access',
173
+ level: 'debug',
174
+ });
175
+ }
176
+ }
177
+
178
+ return tokens;
179
+ };
180
+
181
+ /**
182
+ * Get GitHub tokens from gh command output
183
+ * @returns {Promise<string[]>} Array of tokens found
184
+ */
185
+ export const getGitHubTokensFromCommand = async () => {
186
+ if (typeof globalThis.use === 'undefined') {
187
+ globalThis.use = (await eval(await (await fetch('https://unpkg.com/use-m/use.js')).text())).use;
188
+ }
189
+ const { $ } = await globalThis.use('command-stream');
190
+ const tokens = [];
191
+
192
+ try {
193
+ // Run gh auth status to get token info
194
+ const authResult = await $`gh auth status 2>&1`.catch(() => ({ stdout: '', stderr: '' }));
195
+ const authOutput = authResult.stdout?.toString() + authResult.stderr?.toString() || '';
196
+
197
+ // Look for token patterns in the output
198
+ const tokenPatterns = [/(?:token|oauth|api)[:\s]*([a-zA-Z0-9_]{20,})/gi, /gh[pou]_[a-zA-Z0-9_]{20,}/gi];
199
+
200
+ for (const pattern of tokenPatterns) {
201
+ const matches = authOutput.match(pattern);
202
+ if (matches) {
203
+ for (let match of matches) {
204
+ // Clean up the match
205
+ const token = match.replace(/^(?:token|oauth|api)[:\s]*/, '').trim();
206
+ if (token && token.length >= 20 && !tokens.includes(token)) {
207
+ tokens.push(token);
208
+ }
209
+ }
210
+ }
211
+ }
212
+ } catch (error) {
213
+ // Command errors are expected when gh is not configured
214
+ if (global.verboseMode) {
215
+ reportError(error, {
216
+ context: 'github_token_gh_auth',
217
+ level: 'debug',
218
+ });
219
+ }
220
+ }
221
+
222
+ return tokens;
223
+ };
224
+
225
+ /**
226
+ * Use secretlint to detect secrets in content
227
+ * @param {string} content - Content to scan
228
+ * @returns {Promise<Array<{start: number, end: number, token: string, ruleId: string}>>} Array of detected secrets with rule info
229
+ */
230
+ const detectSecretsWithSecretlint = async content => {
231
+ const secrets = [];
232
+
233
+ const available = await initSecretlint();
234
+ if (!available || !secretlintCore || !secretlintConfig) {
235
+ return secrets;
236
+ }
237
+
238
+ try {
239
+ const result = await secretlintCore.lintSource({
240
+ source: {
241
+ filePath: '/virtual/content.txt',
242
+ content: content,
243
+ contentType: 'text',
244
+ },
245
+ options: {
246
+ config: secretlintConfig,
247
+ maskSecrets: false, // We need raw positions to mask ourselves
248
+ },
249
+ });
250
+
251
+ for (const message of result.messages) {
252
+ if (message.range && message.range.length === 2) {
253
+ const [start, end] = message.range;
254
+ const token = content.substring(start, end);
255
+ secrets.push({
256
+ start,
257
+ end,
258
+ token,
259
+ ruleId: message.ruleId || 'unknown',
260
+ source: 'secretlint',
261
+ });
262
+ }
263
+ }
264
+ } catch (error) {
265
+ if (global.verboseMode) {
266
+ await log(` ⚠️ Secretlint detection error: ${error.message}`, { verbose: true });
267
+ }
268
+ }
269
+
270
+ return secrets;
271
+ };
272
+
273
+ /**
274
+ * Use custom patterns to detect secrets in content
275
+ * @param {string} content - Content to scan
276
+ * @returns {Array<{start: number, end: number, token: string, patternName: string}>} Array of detected secrets with pattern info
277
+ */
278
+ const detectSecretsWithCustomPatterns = content => {
279
+ const secrets = [];
280
+
281
+ // Named custom patterns for tracking what detected what
282
+ const namedPatterns = [
283
+ // OpenAI patterns
284
+ { name: 'openai-project', pattern: /\bsk-(?:proj-|svcacct-|admin-)?[A-Za-z0-9_-]*T3BlbkFJ[A-Za-z0-9_-]+/g },
285
+
286
+ // Anthropic patterns
287
+ { name: 'anthropic-claude', pattern: /\bsk-ant-(?:api\d{2}-)?[A-Za-z0-9_-]{20,}/g },
288
+
289
+ // GitHub patterns
290
+ { name: 'github-pat', pattern: /\bgithub_pat_[a-zA-Z0-9_]{20,}/g },
291
+ { name: 'github-server', pattern: /\bghs_[a-zA-Z0-9_]{20,}/g },
292
+ { name: 'github-refresh', pattern: /\bghr_[a-zA-Z0-9_]{20,}/g },
293
+ { name: 'github-ghp', pattern: /\bghp_[a-zA-Z0-9_]{20,}/g },
294
+ { name: 'github-gho', pattern: /\bgho_[a-zA-Z0-9_]{20,}/g },
295
+ { name: 'github-ghu', pattern: /\bghu_[a-zA-Z0-9_]{20,}/g },
296
+
297
+ // AWS patterns
298
+ { name: 'aws-key', pattern: /\b(?:A3T[A-Z0-9]|AKIA|AGPA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16}\b/g },
299
+
300
+ // Stripe patterns
301
+ { name: 'stripe', pattern: /\b(?:sk_live_|sk_test_|pk_live_|pk_test_)[a-zA-Z0-9]{20,}/g },
302
+
303
+ // SendGrid patterns
304
+ { name: 'sendgrid', pattern: /\bSG\.[a-zA-Z0-9_-]{15,}\.[a-zA-Z0-9_-]{30,}/g },
305
+
306
+ // Twilio patterns
307
+ { name: 'twilio', pattern: /\bSK[a-f0-9]{32}\b/g },
308
+
309
+ // Mailchimp patterns
310
+ { name: 'mailchimp', pattern: /\b[a-f0-9]{32}-us[0-9]{1,2}\b/g },
311
+
312
+ // Square patterns
313
+ { name: 'square', pattern: /\bsq0(?:atp|csp)-[a-zA-Z0-9_-]{22,}/g },
314
+
315
+ // Databricks patterns
316
+ { name: 'databricks', pattern: /\bdapi[a-f0-9]{32}\b/g },
317
+
318
+ // PyPI patterns
319
+ { name: 'pypi', pattern: /\bpypi-[A-Za-z0-9_-]{50,}/g },
320
+
321
+ // Discord patterns
322
+ { name: 'discord', pattern: /\b[MN][A-Za-z0-9_-]{23,}\.[A-Za-z0-9_-]{6}\.[A-Za-z0-9_-]{20,}/g },
323
+
324
+ // Telegram patterns
325
+ { name: 'telegram', pattern: /\b[0-9]{8,10}:[a-zA-Z0-9_-]{30,}/g },
326
+
327
+ // Google / Gemini patterns
328
+ { name: 'google-gemini', pattern: /\bAIza[0-9A-Za-z_-]{32,40}\b/g },
329
+
330
+ // HuggingFace patterns
331
+ { name: 'huggingface', pattern: /\bhf_[a-zA-Z0-9]{30,}/g },
332
+
333
+ // Slack patterns (not all covered by secretlint preset)
334
+ { name: 'slack-xoxb', pattern: /\bxoxb-[0-9]{10,}-[0-9]{10,}-[a-zA-Z0-9]{20,}/g },
335
+ { name: 'slack-xoxp', pattern: /\bxoxp-[0-9]{10,}-[0-9]{10,}-[0-9]{10,}-[a-zA-Z0-9]{20,}/g },
336
+
337
+ // npm patterns
338
+ { name: 'npm', pattern: /\bnpm_[a-zA-Z0-9]{30,}/g },
339
+
340
+ // Shopify patterns
341
+ { name: 'shopify', pattern: /\bshpat_[a-f0-9]{32}\b/g },
342
+ ];
343
+
344
+ for (const { name, pattern } of namedPatterns) {
345
+ pattern.lastIndex = 0;
346
+ let match;
347
+ while ((match = pattern.exec(content)) !== null) {
348
+ const token = match[0];
349
+ // Skip if already masked (contains consecutive asterisks)
350
+ if (/\*{3,}/.test(token)) {
351
+ continue;
352
+ }
353
+ secrets.push({
354
+ start: match.index,
355
+ end: match.index + token.length,
356
+ token,
357
+ patternName: name,
358
+ source: 'custom',
359
+ });
360
+ }
361
+ }
362
+
363
+ return secrets;
364
+ };
365
+
366
+ /**
367
+ * Compare detection results from both approaches and log warnings
368
+ * @param {Array} secretlintSecrets - Secrets detected by secretlint
369
+ * @param {Array} customSecrets - Secrets detected by custom patterns
370
+ * @returns {Promise<{secretlintOnly: Array, customOnly: Array, both: Array}>}
371
+ */
372
+ const compareDetectionResults = async (secretlintSecrets, customSecrets) => {
373
+ const secretlintOnly = [];
374
+ const customOnly = [];
375
+ const both = [];
376
+
377
+ // Create sets for easier comparison (normalize tokens)
378
+ const secretlintTokens = new Map(secretlintSecrets.map(s => [s.token, s]));
379
+ const customTokens = new Map(customSecrets.map(s => [s.token, s]));
380
+
381
+ // Find secretlint-only detections (our custom patterns missed these)
382
+ for (const [token, secret] of secretlintTokens) {
383
+ if (!customTokens.has(token)) {
384
+ secretlintOnly.push(secret);
385
+ } else {
386
+ both.push({ ...secret, customPattern: customTokens.get(token).patternName });
387
+ }
388
+ }
389
+
390
+ // Find custom-only detections (secretlint missed these)
391
+ for (const [token, secret] of customTokens) {
392
+ if (!secretlintTokens.has(token)) {
393
+ customOnly.push(secret);
394
+ }
395
+ }
396
+
397
+ return { secretlintOnly, customOnly, both };
398
+ };
399
+
400
+ /**
401
+ * Sanitize log content by masking sensitive tokens while avoiding false positives
402
+ * Uses DUAL APPROACH: Both secretlint AND custom patterns run independently
403
+ *
404
+ * If only secretlint detects a secret (but our custom patterns miss it),
405
+ * a warning is logged so we can improve our patterns.
406
+ *
407
+ * @param {string} logContent - The log content to sanitize
408
+ * @param {Object} options - Optional configuration
409
+ * @param {boolean} options.warnOnMismatch - Log warnings when detection approaches differ (default: true in verbose mode)
410
+ * @returns {Promise<string>} Sanitized log content with tokens masked
411
+ */
412
+ export const sanitizeLogContent = async (logContent, options = {}) => {
413
+ let sanitized = logContent;
414
+ const { warnOnMismatch = global.verboseMode } = options;
415
+
416
+ // Statistics for dual approach
417
+ const stats = {
418
+ knownTokens: 0,
419
+ secretlintDetections: 0,
420
+ customDetections: 0,
421
+ secretlintOnlyWarnings: [],
422
+ customOnlyDetections: [],
423
+ };
424
+
425
+ try {
426
+ // Step 1: Get known tokens from files and commands
427
+ const fileTokens = await getGitHubTokensFromFiles();
428
+ const commandTokens = await getGitHubTokensFromCommand();
429
+ const allKnownTokens = [...new Set([...fileTokens, ...commandTokens])];
430
+
431
+ // Mask known tokens first
432
+ for (const token of allKnownTokens) {
433
+ if (token && token.length >= 12) {
434
+ const maskedToken = maskToken(token);
435
+ sanitized = sanitized.split(token).join(maskedToken);
436
+ stats.knownTokens++;
437
+ }
438
+ }
439
+
440
+ // Step 2: DUAL APPROACH - Run both detection methods independently
441
+ const [secretlintSecrets, customSecrets] = await Promise.all([detectSecretsWithSecretlint(sanitized), Promise.resolve(detectSecretsWithCustomPatterns(sanitized))]);
442
+
443
+ // Compare results to find discrepancies
444
+ const { secretlintOnly, customOnly } = await compareDetectionResults(secretlintSecrets, customSecrets);
445
+
446
+ // Log warnings for secretlint-only detections (our patterns should catch these)
447
+ if (warnOnMismatch && secretlintOnly.length > 0) {
448
+ stats.secretlintOnlyWarnings = secretlintOnly;
449
+ await log(` ⚠️ PATTERN GAP: Secretlint found ${secretlintOnly.length} secret(s) that our custom patterns missed:`, { verbose: true });
450
+ for (const secret of secretlintOnly) {
451
+ // Show truncated token and rule that detected it
452
+ const truncated = secret.token.length > 20 ? `${secret.token.substring(0, 10)}...${secret.token.substring(secret.token.length - 5)}` : secret.token;
453
+ await log(` • Rule: ${secret.ruleId}, Token preview: ${truncated}`, { verbose: true });
454
+ }
455
+ await log(` Consider adding custom patterns for these secret types to improve our detection.`, { verbose: true });
456
+ }
457
+
458
+ // Log info about custom-only detections (we catch things secretlint doesn't)
459
+ if (warnOnMismatch && customOnly.length > 0) {
460
+ stats.customOnlyDetections = customOnly;
461
+ await log(` ℹ️ CUSTOM ADVANTAGE: Our patterns found ${customOnly.length} secret(s) that secretlint missed:`, { verbose: true });
462
+ for (const secret of customOnly) {
463
+ await log(` • Pattern: ${secret.patternName}`, { verbose: true });
464
+ }
465
+ }
466
+
467
+ // Step 3: Merge all unique secrets from both sources for masking
468
+ const allSecrets = new Map();
469
+
470
+ // Add secretlint detections
471
+ for (const secret of secretlintSecrets) {
472
+ const key = `${secret.start}-${secret.end}`;
473
+ allSecrets.set(key, secret);
474
+ stats.secretlintDetections++;
475
+ }
476
+
477
+ // Add custom detections (won't duplicate if same position)
478
+ for (const secret of customSecrets) {
479
+ const key = `${secret.start}-${secret.end}`;
480
+ if (!allSecrets.has(key)) {
481
+ allSecrets.set(key, secret);
482
+ }
483
+ stats.customDetections++;
484
+ }
485
+
486
+ // Apply all detections (from end to start to preserve positions)
487
+ const sortedSecrets = [...allSecrets.values()].sort((a, b) => b.start - a.start);
488
+ for (const secret of sortedSecrets) {
489
+ const { start, end, token } = secret;
490
+ // Verify the token is still in the content at the expected position
491
+ const currentToken = sanitized.substring(start, end);
492
+ if (currentToken === token) {
493
+ const masked = maskToken(token);
494
+ sanitized = sanitized.substring(0, start) + masked + sanitized.substring(end);
495
+ }
496
+ }
497
+
498
+ // Step 4: Handle 40-char hex tokens specially - only mask if NOT in safe context
499
+ // These could be GitHub tokens OR git commit hashes/gist IDs
500
+ const hexPattern = /(?:^|[\s:=])([a-f0-9]{40})(?=[\s\n]|$)/gm;
501
+ let hexMatch;
502
+ const hexReplacements = [];
503
+
504
+ // First pass: find all matches and determine which to mask
505
+ const tempContent = sanitized;
506
+ hexPattern.lastIndex = 0;
507
+ while ((hexMatch = hexPattern.exec(tempContent)) !== null) {
508
+ const token = hexMatch[1];
509
+ const position = hexMatch.index;
510
+
511
+ // Skip if already masked
512
+ if (/\*{3,}/.test(token)) {
513
+ continue;
514
+ }
515
+
516
+ // Only mask if NOT in a safe git/gist context
517
+ if (!isHexInSafeContext(tempContent, token, position)) {
518
+ hexReplacements.push({ token, masked: maskToken(token) });
519
+ }
520
+ }
521
+
522
+ // Second pass: apply replacements
523
+ for (const { token, masked } of hexReplacements) {
524
+ sanitized = sanitized.split(token).join(masked);
525
+ }
526
+
527
+ // Summary logging
528
+ const totalMasked = allSecrets.size + hexReplacements.length + stats.knownTokens;
529
+ if (global.verboseMode && totalMasked > 0) {
530
+ await log(` 🔒 Sanitized ${totalMasked} secrets using dual approach:`, { verbose: true });
531
+ await log(` • Known tokens: ${stats.knownTokens}`, { verbose: true });
532
+ await log(` • Secretlint: ${stats.secretlintDetections} detections`, { verbose: true });
533
+ await log(` • Custom patterns: ${stats.customDetections} detections`, { verbose: true });
534
+ await log(` • Hex tokens: ${hexReplacements.length}`, { verbose: true });
535
+ if (stats.secretlintOnlyWarnings.length > 0) {
536
+ await log(` ⚠️ Pattern gaps to address: ${stats.secretlintOnlyWarnings.length}`, { verbose: true });
537
+ }
538
+ }
539
+ } catch (error) {
540
+ reportError(error, {
541
+ context: 'sanitize_log_content',
542
+ level: 'warning',
543
+ });
544
+ await log(` ⚠️ Warning: Could not fully sanitize log content: ${error.message}`, { verbose: true });
545
+ }
546
+
547
+ return sanitized;
548
+ };
549
+
550
+ // Export detection functions for testing and visibility
551
+ export { detectSecretsWithSecretlint, detectSecretsWithCustomPatterns, compareDetectionResults };
552
+
553
+ // Default export for convenience
554
+ export default {
555
+ isSafeToken,
556
+ isHexInSafeContext,
557
+ getGitHubTokensFromFiles,
558
+ getGitHubTokensFromCommand,
559
+ sanitizeLogContent,
560
+ detectSecretsWithSecretlint,
561
+ detectSecretsWithCustomPatterns,
562
+ compareDetectionResults,
563
+ };