@link-assistant/hive-mind 1.2.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +11 -0
- package/package.json +5 -2
- package/src/github.lib.mjs +6 -134
- package/src/token-sanitization.lib.mjs +563 -0
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,16 @@
|
|
|
1
1
|
# @link-assistant/hive-mind
|
|
2
2
|
|
|
3
|
+
## 1.2.1
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- 04cb3d2: Fix false positives in token masking for log sanitization
|
|
8
|
+
- Remove overly broad regex pattern that was matching legitimate identifiers like `browser_take_screenshot` and MCP tool names
|
|
9
|
+
- Add allowlist of safe token patterns (browser\_, mcp\_\_, function names with underscores, UUIDs)
|
|
10
|
+
- Add context-aware detection for 40-char hex strings to avoid masking git commit hashes and gist IDs
|
|
11
|
+
- Export new helper functions `isSafeToken` and `isHexInSafeContext` for testing
|
|
12
|
+
- Add comprehensive unit tests for false positive prevention
|
|
13
|
+
|
|
3
14
|
## 1.2.0
|
|
4
15
|
|
|
5
16
|
### Minor Changes
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@link-assistant/hive-mind",
|
|
3
|
-
"version": "1.2.
|
|
3
|
+
"version": "1.2.1",
|
|
4
4
|
"description": "AI-powered issue solver and hive mind for collaborative problem solving",
|
|
5
5
|
"main": "src/hive.mjs",
|
|
6
6
|
"type": "module",
|
|
@@ -61,8 +61,11 @@
|
|
|
61
61
|
"prettier": "^3.6.2"
|
|
62
62
|
},
|
|
63
63
|
"dependencies": {
|
|
64
|
+
"@secretlint/core": "^11.2.5",
|
|
65
|
+
"@secretlint/secretlint-rule-preset-recommend": "^11.2.5",
|
|
64
66
|
"@sentry/node": "^10.15.0",
|
|
65
|
-
"@sentry/profiling-node": "^10.15.0"
|
|
67
|
+
"@sentry/profiling-node": "^10.15.0",
|
|
68
|
+
"secretlint": "^11.2.5"
|
|
66
69
|
},
|
|
67
70
|
"lint-staged": {
|
|
68
71
|
"*.{js,mjs,json,md}": [
|
package/src/github.lib.mjs
CHANGED
|
@@ -5,9 +5,6 @@
|
|
|
5
5
|
if (typeof globalThis.use === 'undefined') {
|
|
6
6
|
globalThis.use = (await eval(await (await fetch('https://unpkg.com/use-m/use.js')).text())).use;
|
|
7
7
|
}
|
|
8
|
-
const fs = (await use('fs')).promises;
|
|
9
|
-
const os = (await use('os')).default;
|
|
10
|
-
const path = (await use('path')).default;
|
|
11
8
|
// Use command-stream for consistent $ behavior
|
|
12
9
|
const { $ } = await use('command-stream');
|
|
13
10
|
// Import log and maskToken from general lib
|
|
@@ -16,6 +13,10 @@ import { reportError } from './sentry.lib.mjs';
|
|
|
16
13
|
import { githubLimits, timeouts } from './config.lib.mjs';
|
|
17
14
|
// Import batch operations from separate module
|
|
18
15
|
import { batchCheckPullRequestsForIssues as batchCheckPRs, batchCheckArchivedRepositories as batchCheckArchived } from './github.batch.lib.mjs';
|
|
16
|
+
// Import token sanitization from dedicated module (Issue #1037 fix)
|
|
17
|
+
import { isSafeToken, isHexInSafeContext, getGitHubTokensFromFiles, getGitHubTokensFromCommand, sanitizeLogContent } from './token-sanitization.lib.mjs';
|
|
18
|
+
// Re-export token sanitization functions for backward compatibility
|
|
19
|
+
export { isSafeToken, isHexInSafeContext, getGitHubTokensFromFiles, getGitHubTokensFromCommand, sanitizeLogContent };
|
|
19
20
|
// Import log upload function from separate module
|
|
20
21
|
import { uploadLogWithGhUploadLog } from './log-upload.lib.mjs';
|
|
21
22
|
|
|
@@ -60,139 +61,8 @@ const buildCostInfoString = (totalCostUSD, anthropicTotalCostUSD, pricingInfo) =
|
|
|
60
61
|
|
|
61
62
|
// Helper function to mask GitHub tokens (alias for backward compatibility)
|
|
62
63
|
export const maskGitHubToken = maskToken;
|
|
63
|
-
// Helper function to get GitHub tokens from local config files
|
|
64
|
-
export const getGitHubTokensFromFiles = async () => {
|
|
65
|
-
const tokens = [];
|
|
66
|
-
|
|
67
|
-
try {
|
|
68
|
-
// Check ~/.config/gh/hosts.yml
|
|
69
|
-
const hostsFile = path.join(os.homedir(), '.config/gh/hosts.yml');
|
|
70
|
-
if (
|
|
71
|
-
await fs
|
|
72
|
-
.access(hostsFile)
|
|
73
|
-
.then(() => true)
|
|
74
|
-
.catch(() => false)
|
|
75
|
-
) {
|
|
76
|
-
const hostsContent = await fs.readFile(hostsFile, 'utf8');
|
|
77
|
-
|
|
78
|
-
// Look for oauth_token and api_token patterns
|
|
79
|
-
const oauthMatches = hostsContent.match(/oauth_token:\s*([^\s\n]+)/g);
|
|
80
|
-
if (oauthMatches) {
|
|
81
|
-
for (const match of oauthMatches) {
|
|
82
|
-
const token = match.split(':')[1].trim();
|
|
83
|
-
if (token && !tokens.includes(token)) {
|
|
84
|
-
tokens.push(token);
|
|
85
|
-
}
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
const apiMatches = hostsContent.match(/api_token:\s*([^\s\n]+)/g);
|
|
90
|
-
if (apiMatches) {
|
|
91
|
-
for (const match of apiMatches) {
|
|
92
|
-
const token = match.split(':')[1].trim();
|
|
93
|
-
if (token && !tokens.includes(token)) {
|
|
94
|
-
tokens.push(token);
|
|
95
|
-
}
|
|
96
|
-
}
|
|
97
|
-
}
|
|
98
|
-
}
|
|
99
|
-
} catch (error) {
|
|
100
|
-
// File access errors are expected when config doesn't exist
|
|
101
|
-
if (global.verboseMode) {
|
|
102
|
-
reportError(error, {
|
|
103
|
-
context: 'github_token_file_access',
|
|
104
|
-
level: 'debug',
|
|
105
|
-
});
|
|
106
|
-
}
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
return tokens;
|
|
110
|
-
};
|
|
111
|
-
// Helper function to get GitHub tokens from gh command output
|
|
112
|
-
export const getGitHubTokensFromCommand = async () => {
|
|
113
|
-
const { $ } = await use('command-stream');
|
|
114
|
-
const tokens = [];
|
|
115
|
-
|
|
116
|
-
try {
|
|
117
|
-
// Run gh auth status to get token info
|
|
118
|
-
const authResult = await $`gh auth status 2>&1`.catch(() => ({ stdout: '', stderr: '' }));
|
|
119
|
-
const authOutput = authResult.stdout?.toString() + authResult.stderr?.toString() || '';
|
|
120
|
-
|
|
121
|
-
// Look for token patterns in the output
|
|
122
|
-
const tokenPatterns = [/(?:token|oauth|api)[:\s]*([a-zA-Z0-9_]{20,})/gi, /gh[pou]_[a-zA-Z0-9_]{20,}/gi];
|
|
123
|
-
|
|
124
|
-
for (const pattern of tokenPatterns) {
|
|
125
|
-
const matches = authOutput.match(pattern);
|
|
126
|
-
if (matches) {
|
|
127
|
-
for (let match of matches) {
|
|
128
|
-
// Clean up the match
|
|
129
|
-
const token = match.replace(/^(?:token|oauth|api)[:\s]*/, '').trim();
|
|
130
|
-
if (token && token.length >= 20 && !tokens.includes(token)) {
|
|
131
|
-
tokens.push(token);
|
|
132
|
-
}
|
|
133
|
-
}
|
|
134
|
-
}
|
|
135
|
-
}
|
|
136
|
-
} catch (error) {
|
|
137
|
-
// Command errors are expected when gh is not configured
|
|
138
|
-
if (global.verboseMode) {
|
|
139
|
-
reportError(error, {
|
|
140
|
-
context: 'github_token_gh_auth',
|
|
141
|
-
level: 'debug',
|
|
142
|
-
});
|
|
143
|
-
}
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
return tokens;
|
|
147
|
-
};
|
|
148
64
|
// Escape ``` in logs for safe markdown embedding (replaces with \`\`\` to prevent code block closure)
|
|
149
65
|
export const escapeCodeBlocksInLog = logContent => logContent.replace(/```/g, '\\`\\`\\`');
|
|
150
|
-
// Helper function to sanitize log content by masking GitHub tokens
|
|
151
|
-
export const sanitizeLogContent = async logContent => {
|
|
152
|
-
let sanitized = logContent;
|
|
153
|
-
|
|
154
|
-
try {
|
|
155
|
-
// Get tokens from both sources
|
|
156
|
-
const fileTokens = await getGitHubTokensFromFiles();
|
|
157
|
-
const commandTokens = await getGitHubTokensFromCommand();
|
|
158
|
-
const allTokens = [...new Set([...fileTokens, ...commandTokens])];
|
|
159
|
-
|
|
160
|
-
// Mask each token found
|
|
161
|
-
for (const token of allTokens) {
|
|
162
|
-
if (token && token.length >= 12) {
|
|
163
|
-
const maskedToken = maskToken(token);
|
|
164
|
-
// Use global replace to mask all occurrences
|
|
165
|
-
sanitized = sanitized.split(token).join(maskedToken);
|
|
166
|
-
}
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
// Also look for and mask common GitHub token patterns directly in the log
|
|
170
|
-
const tokenPatterns = [
|
|
171
|
-
/gh[pou]_[a-zA-Z0-9_]{20,}/g,
|
|
172
|
-
/(?:^|[\s:=])([a-f0-9]{40})(?=[\s\n]|$)/gm, // 40-char hex tokens (like personal access tokens)
|
|
173
|
-
/(?:^|[\s:=])([a-zA-Z0-9_]{20,})(?=[\s\n]|$)/gm, // General long tokens
|
|
174
|
-
];
|
|
175
|
-
|
|
176
|
-
for (const pattern of tokenPatterns) {
|
|
177
|
-
sanitized = sanitized.replace(pattern, (match, token) => {
|
|
178
|
-
if (token && token.length >= 20) {
|
|
179
|
-
return match.replace(token, maskToken(token));
|
|
180
|
-
}
|
|
181
|
-
return match;
|
|
182
|
-
});
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
await log(` 🔒 Sanitized ${allTokens.length} detected GitHub tokens in log content`, { verbose: true });
|
|
186
|
-
} catch (error) {
|
|
187
|
-
reportError(error, {
|
|
188
|
-
context: 'sanitize_log_content',
|
|
189
|
-
level: 'warning',
|
|
190
|
-
});
|
|
191
|
-
await log(` ⚠️ Warning: Could not fully sanitize log content: ${error.message}`, { verbose: true });
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
return sanitized;
|
|
195
|
-
};
|
|
196
66
|
// Helper function to check if a file exists in a GitHub branch
|
|
197
67
|
export const checkFileInBranch = async (owner, repo, fileName, branchName) => {
|
|
198
68
|
const { $ } = await use('command-stream');
|
|
@@ -1469,6 +1339,8 @@ export default {
|
|
|
1469
1339
|
getGitHubTokensFromFiles,
|
|
1470
1340
|
getGitHubTokensFromCommand,
|
|
1471
1341
|
escapeCodeBlocksInLog,
|
|
1342
|
+
isSafeToken,
|
|
1343
|
+
isHexInSafeContext,
|
|
1472
1344
|
sanitizeLogContent,
|
|
1473
1345
|
checkFileInBranch,
|
|
1474
1346
|
checkGitHubPermissions,
|
|
@@ -0,0 +1,563 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Token sanitization utilities for log content
|
|
4
|
+
* Dual approach: Uses both secretlint AND custom patterns for comprehensive coverage
|
|
5
|
+
*
|
|
6
|
+
* Architecture:
|
|
7
|
+
* 1. Custom patterns (our logic) - patterns we define and maintain
|
|
8
|
+
* 2. Secretlint patterns - battle-tested community patterns
|
|
9
|
+
*
|
|
10
|
+
* Both approaches run independently, and if only one detects a secret,
|
|
11
|
+
* a warning is logged (especially when secretlint finds something our logic misses).
|
|
12
|
+
* This helps us improve our custom patterns over time.
|
|
13
|
+
*
|
|
14
|
+
* @module token-sanitization
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
// Import shared utility from lib.mjs
|
|
18
|
+
import { maskToken, log } from './lib.mjs';
|
|
19
|
+
import { reportError } from './sentry.lib.mjs';
|
|
20
|
+
|
|
21
|
+
// Dynamic imports for runtime dependencies
|
|
22
|
+
const getOsModule = async () => (await import('os')).default;
|
|
23
|
+
const getPathModule = async () => (await import('path')).default;
|
|
24
|
+
const getFsModule = async () => (await import('fs')).promises;
|
|
25
|
+
|
|
26
|
+
// Lazy-loaded secretlint modules (initialized on first use)
|
|
27
|
+
let secretlintCore = null;
|
|
28
|
+
let secretlintConfig = null;
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Initialize secretlint modules lazily
|
|
32
|
+
* @returns {Promise<boolean>} True if secretlint is available
|
|
33
|
+
*/
|
|
34
|
+
const initSecretlint = async () => {
|
|
35
|
+
if (secretlintConfig !== null) {
|
|
36
|
+
return secretlintConfig !== false;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
try {
|
|
40
|
+
const [core, preset] = await Promise.all([import('@secretlint/core'), import('@secretlint/secretlint-rule-preset-recommend')]);
|
|
41
|
+
|
|
42
|
+
secretlintCore = core;
|
|
43
|
+
secretlintConfig = {
|
|
44
|
+
rules: [
|
|
45
|
+
{
|
|
46
|
+
id: '@secretlint/secretlint-rule-preset-recommend',
|
|
47
|
+
rule: preset.creator,
|
|
48
|
+
},
|
|
49
|
+
],
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
return true;
|
|
53
|
+
} catch (error) {
|
|
54
|
+
// secretlint not available - fall back to custom patterns only
|
|
55
|
+
if (global.verboseMode) {
|
|
56
|
+
await log(` ⚠️ Secretlint not available, using fallback patterns: ${error.message}`, { verbose: true });
|
|
57
|
+
}
|
|
58
|
+
secretlintConfig = false;
|
|
59
|
+
return false;
|
|
60
|
+
}
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Patterns that indicate a string is NOT a sensitive token (false positive patterns)
|
|
65
|
+
* These are used to prevent masking legitimate identifiers
|
|
66
|
+
*/
|
|
67
|
+
const SAFE_TOKEN_PATTERNS = [
|
|
68
|
+
// MCP tool names (Playwright, etc.)
|
|
69
|
+
/^mcp__[a-z_]+$/i,
|
|
70
|
+
// Browser/Playwright tool names
|
|
71
|
+
/^browser_[a-z_]+$/i,
|
|
72
|
+
// Common function/tool name patterns with underscores
|
|
73
|
+
/^[a-z]+_[a-z]+_[a-z_]+$/i,
|
|
74
|
+
// UUID patterns (not sensitive)
|
|
75
|
+
/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i,
|
|
76
|
+
];
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Context patterns that indicate the surrounding text is NOT a sensitive context
|
|
80
|
+
* These patterns help identify when a 40-char hex string is just a git commit hash
|
|
81
|
+
*/
|
|
82
|
+
const SAFE_CONTEXT_PATTERNS = [
|
|
83
|
+
// Git commands containing commit hashes
|
|
84
|
+
/\bgh\s+gist\s+view\b/i,
|
|
85
|
+
/\bgit\s+(log|show|diff|cherry-pick|revert|checkout|reset)\b/i,
|
|
86
|
+
/\bgit\s+commit\s+-m\b/i,
|
|
87
|
+
// Commit SHA in common git output contexts
|
|
88
|
+
/\bcommit\s+[a-f0-9]{7,40}\b/i,
|
|
89
|
+
/\bSHA\s*:\s*[a-f0-9]{7,40}\b/i,
|
|
90
|
+
// Git log output format
|
|
91
|
+
/^commit\s+[a-f0-9]{40}/m,
|
|
92
|
+
// Short commit hashes in various contexts
|
|
93
|
+
/\b[a-f0-9]{7,40}\s+Author:/i,
|
|
94
|
+
];
|
|
95
|
+
|
|
96
|
+
// Note: Custom token patterns are now defined in detectSecretsWithCustomPatterns()
|
|
97
|
+
// with named patterns for tracking and comparison with secretlint results.
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Check if a token matches any safe pattern (not a sensitive token)
|
|
101
|
+
* @param {string} token - The token to check
|
|
102
|
+
* @returns {boolean} True if the token is safe and should NOT be masked
|
|
103
|
+
*/
|
|
104
|
+
export const isSafeToken = token => {
|
|
105
|
+
if (!token) return false;
|
|
106
|
+
return SAFE_TOKEN_PATTERNS.some(pattern => pattern.test(token));
|
|
107
|
+
};
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Check if a 40-char hex string appears in a safe context (like git commands)
|
|
111
|
+
* @param {string} content - The full content to search
|
|
112
|
+
* @param {string} hexString - The 40-char hex string found
|
|
113
|
+
* @param {number} position - The position where the hex string was found
|
|
114
|
+
* @returns {boolean} True if the hex string is in a safe context
|
|
115
|
+
*/
|
|
116
|
+
export const isHexInSafeContext = (content, hexString, position) => {
|
|
117
|
+
// Get surrounding context (100 chars before and after)
|
|
118
|
+
const contextStart = Math.max(0, position - 100);
|
|
119
|
+
const contextEnd = Math.min(content.length, position + hexString.length + 100);
|
|
120
|
+
const context = content.substring(contextStart, contextEnd);
|
|
121
|
+
|
|
122
|
+
// Check if any safe context pattern matches
|
|
123
|
+
return SAFE_CONTEXT_PATTERNS.some(pattern => pattern.test(context));
|
|
124
|
+
};
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Get GitHub tokens from local config files
|
|
128
|
+
* @returns {Promise<string[]>} Array of tokens found
|
|
129
|
+
*/
|
|
130
|
+
export const getGitHubTokensFromFiles = async () => {
|
|
131
|
+
const os = await getOsModule();
|
|
132
|
+
const path = await getPathModule();
|
|
133
|
+
const fs = await getFsModule();
|
|
134
|
+
const tokens = [];
|
|
135
|
+
|
|
136
|
+
try {
|
|
137
|
+
// Check ~/.config/gh/hosts.yml
|
|
138
|
+
const hostsFile = path.join(os.homedir(), '.config/gh/hosts.yml');
|
|
139
|
+
if (
|
|
140
|
+
await fs
|
|
141
|
+
.access(hostsFile)
|
|
142
|
+
.then(() => true)
|
|
143
|
+
.catch(() => false)
|
|
144
|
+
) {
|
|
145
|
+
const hostsContent = await fs.readFile(hostsFile, 'utf8');
|
|
146
|
+
|
|
147
|
+
// Look for oauth_token and api_token patterns
|
|
148
|
+
const oauthMatches = hostsContent.match(/oauth_token:\s*([^\s\n]+)/g);
|
|
149
|
+
if (oauthMatches) {
|
|
150
|
+
for (const match of oauthMatches) {
|
|
151
|
+
const token = match.split(':')[1].trim();
|
|
152
|
+
if (token && !tokens.includes(token)) {
|
|
153
|
+
tokens.push(token);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
const apiMatches = hostsContent.match(/api_token:\s*([^\s\n]+)/g);
|
|
159
|
+
if (apiMatches) {
|
|
160
|
+
for (const match of apiMatches) {
|
|
161
|
+
const token = match.split(':')[1].trim();
|
|
162
|
+
if (token && !tokens.includes(token)) {
|
|
163
|
+
tokens.push(token);
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
} catch (error) {
|
|
169
|
+
// File access errors are expected when config doesn't exist
|
|
170
|
+
if (global.verboseMode) {
|
|
171
|
+
reportError(error, {
|
|
172
|
+
context: 'github_token_file_access',
|
|
173
|
+
level: 'debug',
|
|
174
|
+
});
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
return tokens;
|
|
179
|
+
};
|
|
180
|
+
|
|
181
|
+
/**
|
|
182
|
+
* Get GitHub tokens from gh command output
|
|
183
|
+
* @returns {Promise<string[]>} Array of tokens found
|
|
184
|
+
*/
|
|
185
|
+
export const getGitHubTokensFromCommand = async () => {
|
|
186
|
+
if (typeof globalThis.use === 'undefined') {
|
|
187
|
+
globalThis.use = (await eval(await (await fetch('https://unpkg.com/use-m/use.js')).text())).use;
|
|
188
|
+
}
|
|
189
|
+
const { $ } = await globalThis.use('command-stream');
|
|
190
|
+
const tokens = [];
|
|
191
|
+
|
|
192
|
+
try {
|
|
193
|
+
// Run gh auth status to get token info
|
|
194
|
+
const authResult = await $`gh auth status 2>&1`.catch(() => ({ stdout: '', stderr: '' }));
|
|
195
|
+
const authOutput = authResult.stdout?.toString() + authResult.stderr?.toString() || '';
|
|
196
|
+
|
|
197
|
+
// Look for token patterns in the output
|
|
198
|
+
const tokenPatterns = [/(?:token|oauth|api)[:\s]*([a-zA-Z0-9_]{20,})/gi, /gh[pou]_[a-zA-Z0-9_]{20,}/gi];
|
|
199
|
+
|
|
200
|
+
for (const pattern of tokenPatterns) {
|
|
201
|
+
const matches = authOutput.match(pattern);
|
|
202
|
+
if (matches) {
|
|
203
|
+
for (let match of matches) {
|
|
204
|
+
// Clean up the match
|
|
205
|
+
const token = match.replace(/^(?:token|oauth|api)[:\s]*/, '').trim();
|
|
206
|
+
if (token && token.length >= 20 && !tokens.includes(token)) {
|
|
207
|
+
tokens.push(token);
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
} catch (error) {
|
|
213
|
+
// Command errors are expected when gh is not configured
|
|
214
|
+
if (global.verboseMode) {
|
|
215
|
+
reportError(error, {
|
|
216
|
+
context: 'github_token_gh_auth',
|
|
217
|
+
level: 'debug',
|
|
218
|
+
});
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
return tokens;
|
|
223
|
+
};
|
|
224
|
+
|
|
225
|
+
/**
|
|
226
|
+
* Use secretlint to detect secrets in content
|
|
227
|
+
* @param {string} content - Content to scan
|
|
228
|
+
* @returns {Promise<Array<{start: number, end: number, token: string, ruleId: string}>>} Array of detected secrets with rule info
|
|
229
|
+
*/
|
|
230
|
+
const detectSecretsWithSecretlint = async content => {
|
|
231
|
+
const secrets = [];
|
|
232
|
+
|
|
233
|
+
const available = await initSecretlint();
|
|
234
|
+
if (!available || !secretlintCore || !secretlintConfig) {
|
|
235
|
+
return secrets;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
try {
|
|
239
|
+
const result = await secretlintCore.lintSource({
|
|
240
|
+
source: {
|
|
241
|
+
filePath: '/virtual/content.txt',
|
|
242
|
+
content: content,
|
|
243
|
+
contentType: 'text',
|
|
244
|
+
},
|
|
245
|
+
options: {
|
|
246
|
+
config: secretlintConfig,
|
|
247
|
+
maskSecrets: false, // We need raw positions to mask ourselves
|
|
248
|
+
},
|
|
249
|
+
});
|
|
250
|
+
|
|
251
|
+
for (const message of result.messages) {
|
|
252
|
+
if (message.range && message.range.length === 2) {
|
|
253
|
+
const [start, end] = message.range;
|
|
254
|
+
const token = content.substring(start, end);
|
|
255
|
+
secrets.push({
|
|
256
|
+
start,
|
|
257
|
+
end,
|
|
258
|
+
token,
|
|
259
|
+
ruleId: message.ruleId || 'unknown',
|
|
260
|
+
source: 'secretlint',
|
|
261
|
+
});
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
} catch (error) {
|
|
265
|
+
if (global.verboseMode) {
|
|
266
|
+
await log(` ⚠️ Secretlint detection error: ${error.message}`, { verbose: true });
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
return secrets;
|
|
271
|
+
};
|
|
272
|
+
|
|
273
|
+
/**
|
|
274
|
+
* Use custom patterns to detect secrets in content
|
|
275
|
+
* @param {string} content - Content to scan
|
|
276
|
+
* @returns {Array<{start: number, end: number, token: string, patternName: string}>} Array of detected secrets with pattern info
|
|
277
|
+
*/
|
|
278
|
+
const detectSecretsWithCustomPatterns = content => {
|
|
279
|
+
const secrets = [];
|
|
280
|
+
|
|
281
|
+
// Named custom patterns for tracking what detected what
|
|
282
|
+
const namedPatterns = [
|
|
283
|
+
// OpenAI patterns
|
|
284
|
+
{ name: 'openai-project', pattern: /\bsk-(?:proj-|svcacct-|admin-)?[A-Za-z0-9_-]*T3BlbkFJ[A-Za-z0-9_-]+/g },
|
|
285
|
+
|
|
286
|
+
// Anthropic patterns
|
|
287
|
+
{ name: 'anthropic-claude', pattern: /\bsk-ant-(?:api\d{2}-)?[A-Za-z0-9_-]{20,}/g },
|
|
288
|
+
|
|
289
|
+
// GitHub patterns
|
|
290
|
+
{ name: 'github-pat', pattern: /\bgithub_pat_[a-zA-Z0-9_]{20,}/g },
|
|
291
|
+
{ name: 'github-server', pattern: /\bghs_[a-zA-Z0-9_]{20,}/g },
|
|
292
|
+
{ name: 'github-refresh', pattern: /\bghr_[a-zA-Z0-9_]{20,}/g },
|
|
293
|
+
{ name: 'github-ghp', pattern: /\bghp_[a-zA-Z0-9_]{20,}/g },
|
|
294
|
+
{ name: 'github-gho', pattern: /\bgho_[a-zA-Z0-9_]{20,}/g },
|
|
295
|
+
{ name: 'github-ghu', pattern: /\bghu_[a-zA-Z0-9_]{20,}/g },
|
|
296
|
+
|
|
297
|
+
// AWS patterns
|
|
298
|
+
{ name: 'aws-key', pattern: /\b(?:A3T[A-Z0-9]|AKIA|AGPA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16}\b/g },
|
|
299
|
+
|
|
300
|
+
// Stripe patterns
|
|
301
|
+
{ name: 'stripe', pattern: /\b(?:sk_live_|sk_test_|pk_live_|pk_test_)[a-zA-Z0-9]{20,}/g },
|
|
302
|
+
|
|
303
|
+
// SendGrid patterns
|
|
304
|
+
{ name: 'sendgrid', pattern: /\bSG\.[a-zA-Z0-9_-]{15,}\.[a-zA-Z0-9_-]{30,}/g },
|
|
305
|
+
|
|
306
|
+
// Twilio patterns
|
|
307
|
+
{ name: 'twilio', pattern: /\bSK[a-f0-9]{32}\b/g },
|
|
308
|
+
|
|
309
|
+
// Mailchimp patterns
|
|
310
|
+
{ name: 'mailchimp', pattern: /\b[a-f0-9]{32}-us[0-9]{1,2}\b/g },
|
|
311
|
+
|
|
312
|
+
// Square patterns
|
|
313
|
+
{ name: 'square', pattern: /\bsq0(?:atp|csp)-[a-zA-Z0-9_-]{22,}/g },
|
|
314
|
+
|
|
315
|
+
// Databricks patterns
|
|
316
|
+
{ name: 'databricks', pattern: /\bdapi[a-f0-9]{32}\b/g },
|
|
317
|
+
|
|
318
|
+
// PyPI patterns
|
|
319
|
+
{ name: 'pypi', pattern: /\bpypi-[A-Za-z0-9_-]{50,}/g },
|
|
320
|
+
|
|
321
|
+
// Discord patterns
|
|
322
|
+
{ name: 'discord', pattern: /\b[MN][A-Za-z0-9_-]{23,}\.[A-Za-z0-9_-]{6}\.[A-Za-z0-9_-]{20,}/g },
|
|
323
|
+
|
|
324
|
+
// Telegram patterns
|
|
325
|
+
{ name: 'telegram', pattern: /\b[0-9]{8,10}:[a-zA-Z0-9_-]{30,}/g },
|
|
326
|
+
|
|
327
|
+
// Google / Gemini patterns
|
|
328
|
+
{ name: 'google-gemini', pattern: /\bAIza[0-9A-Za-z_-]{32,40}\b/g },
|
|
329
|
+
|
|
330
|
+
// HuggingFace patterns
|
|
331
|
+
{ name: 'huggingface', pattern: /\bhf_[a-zA-Z0-9]{30,}/g },
|
|
332
|
+
|
|
333
|
+
// Slack patterns (not all covered by secretlint preset)
|
|
334
|
+
{ name: 'slack-xoxb', pattern: /\bxoxb-[0-9]{10,}-[0-9]{10,}-[a-zA-Z0-9]{20,}/g },
|
|
335
|
+
{ name: 'slack-xoxp', pattern: /\bxoxp-[0-9]{10,}-[0-9]{10,}-[0-9]{10,}-[a-zA-Z0-9]{20,}/g },
|
|
336
|
+
|
|
337
|
+
// npm patterns
|
|
338
|
+
{ name: 'npm', pattern: /\bnpm_[a-zA-Z0-9]{30,}/g },
|
|
339
|
+
|
|
340
|
+
// Shopify patterns
|
|
341
|
+
{ name: 'shopify', pattern: /\bshpat_[a-f0-9]{32}\b/g },
|
|
342
|
+
];
|
|
343
|
+
|
|
344
|
+
for (const { name, pattern } of namedPatterns) {
|
|
345
|
+
pattern.lastIndex = 0;
|
|
346
|
+
let match;
|
|
347
|
+
while ((match = pattern.exec(content)) !== null) {
|
|
348
|
+
const token = match[0];
|
|
349
|
+
// Skip if already masked (contains consecutive asterisks)
|
|
350
|
+
if (/\*{3,}/.test(token)) {
|
|
351
|
+
continue;
|
|
352
|
+
}
|
|
353
|
+
secrets.push({
|
|
354
|
+
start: match.index,
|
|
355
|
+
end: match.index + token.length,
|
|
356
|
+
token,
|
|
357
|
+
patternName: name,
|
|
358
|
+
source: 'custom',
|
|
359
|
+
});
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
return secrets;
|
|
364
|
+
};
|
|
365
|
+
|
|
366
|
+
/**
|
|
367
|
+
* Compare detection results from both approaches and log warnings
|
|
368
|
+
* @param {Array} secretlintSecrets - Secrets detected by secretlint
|
|
369
|
+
* @param {Array} customSecrets - Secrets detected by custom patterns
|
|
370
|
+
* @returns {Promise<{secretlintOnly: Array, customOnly: Array, both: Array}>}
|
|
371
|
+
*/
|
|
372
|
+
const compareDetectionResults = async (secretlintSecrets, customSecrets) => {
|
|
373
|
+
const secretlintOnly = [];
|
|
374
|
+
const customOnly = [];
|
|
375
|
+
const both = [];
|
|
376
|
+
|
|
377
|
+
// Create sets for easier comparison (normalize tokens)
|
|
378
|
+
const secretlintTokens = new Map(secretlintSecrets.map(s => [s.token, s]));
|
|
379
|
+
const customTokens = new Map(customSecrets.map(s => [s.token, s]));
|
|
380
|
+
|
|
381
|
+
// Find secretlint-only detections (our custom patterns missed these)
|
|
382
|
+
for (const [token, secret] of secretlintTokens) {
|
|
383
|
+
if (!customTokens.has(token)) {
|
|
384
|
+
secretlintOnly.push(secret);
|
|
385
|
+
} else {
|
|
386
|
+
both.push({ ...secret, customPattern: customTokens.get(token).patternName });
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
// Find custom-only detections (secretlint missed these)
|
|
391
|
+
for (const [token, secret] of customTokens) {
|
|
392
|
+
if (!secretlintTokens.has(token)) {
|
|
393
|
+
customOnly.push(secret);
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
return { secretlintOnly, customOnly, both };
|
|
398
|
+
};
|
|
399
|
+
|
|
400
|
+
/**
|
|
401
|
+
* Sanitize log content by masking sensitive tokens while avoiding false positives
|
|
402
|
+
* Uses DUAL APPROACH: Both secretlint AND custom patterns run independently
|
|
403
|
+
*
|
|
404
|
+
* If only secretlint detects a secret (but our custom patterns miss it),
|
|
405
|
+
* a warning is logged so we can improve our patterns.
|
|
406
|
+
*
|
|
407
|
+
* @param {string} logContent - The log content to sanitize
|
|
408
|
+
* @param {Object} options - Optional configuration
|
|
409
|
+
* @param {boolean} options.warnOnMismatch - Log warnings when detection approaches differ (default: true in verbose mode)
|
|
410
|
+
* @returns {Promise<string>} Sanitized log content with tokens masked
|
|
411
|
+
*/
|
|
412
|
+
export const sanitizeLogContent = async (logContent, options = {}) => {
|
|
413
|
+
let sanitized = logContent;
|
|
414
|
+
const { warnOnMismatch = global.verboseMode } = options;
|
|
415
|
+
|
|
416
|
+
// Statistics for dual approach
|
|
417
|
+
const stats = {
|
|
418
|
+
knownTokens: 0,
|
|
419
|
+
secretlintDetections: 0,
|
|
420
|
+
customDetections: 0,
|
|
421
|
+
secretlintOnlyWarnings: [],
|
|
422
|
+
customOnlyDetections: [],
|
|
423
|
+
};
|
|
424
|
+
|
|
425
|
+
try {
|
|
426
|
+
// Step 1: Get known tokens from files and commands
|
|
427
|
+
const fileTokens = await getGitHubTokensFromFiles();
|
|
428
|
+
const commandTokens = await getGitHubTokensFromCommand();
|
|
429
|
+
const allKnownTokens = [...new Set([...fileTokens, ...commandTokens])];
|
|
430
|
+
|
|
431
|
+
// Mask known tokens first
|
|
432
|
+
for (const token of allKnownTokens) {
|
|
433
|
+
if (token && token.length >= 12) {
|
|
434
|
+
const maskedToken = maskToken(token);
|
|
435
|
+
sanitized = sanitized.split(token).join(maskedToken);
|
|
436
|
+
stats.knownTokens++;
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
// Step 2: DUAL APPROACH - Run both detection methods independently
|
|
441
|
+
const [secretlintSecrets, customSecrets] = await Promise.all([detectSecretsWithSecretlint(sanitized), Promise.resolve(detectSecretsWithCustomPatterns(sanitized))]);
|
|
442
|
+
|
|
443
|
+
// Compare results to find discrepancies
|
|
444
|
+
const { secretlintOnly, customOnly } = await compareDetectionResults(secretlintSecrets, customSecrets);
|
|
445
|
+
|
|
446
|
+
// Log warnings for secretlint-only detections (our patterns should catch these)
|
|
447
|
+
if (warnOnMismatch && secretlintOnly.length > 0) {
|
|
448
|
+
stats.secretlintOnlyWarnings = secretlintOnly;
|
|
449
|
+
await log(` ⚠️ PATTERN GAP: Secretlint found ${secretlintOnly.length} secret(s) that our custom patterns missed:`, { verbose: true });
|
|
450
|
+
for (const secret of secretlintOnly) {
|
|
451
|
+
// Show truncated token and rule that detected it
|
|
452
|
+
const truncated = secret.token.length > 20 ? `${secret.token.substring(0, 10)}...${secret.token.substring(secret.token.length - 5)}` : secret.token;
|
|
453
|
+
await log(` • Rule: ${secret.ruleId}, Token preview: ${truncated}`, { verbose: true });
|
|
454
|
+
}
|
|
455
|
+
await log(` Consider adding custom patterns for these secret types to improve our detection.`, { verbose: true });
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
// Log info about custom-only detections (we catch things secretlint doesn't)
|
|
459
|
+
if (warnOnMismatch && customOnly.length > 0) {
|
|
460
|
+
stats.customOnlyDetections = customOnly;
|
|
461
|
+
await log(` ℹ️ CUSTOM ADVANTAGE: Our patterns found ${customOnly.length} secret(s) that secretlint missed:`, { verbose: true });
|
|
462
|
+
for (const secret of customOnly) {
|
|
463
|
+
await log(` • Pattern: ${secret.patternName}`, { verbose: true });
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
// Step 3: Merge all unique secrets from both sources for masking
|
|
468
|
+
const allSecrets = new Map();
|
|
469
|
+
|
|
470
|
+
// Add secretlint detections
|
|
471
|
+
for (const secret of secretlintSecrets) {
|
|
472
|
+
const key = `${secret.start}-${secret.end}`;
|
|
473
|
+
allSecrets.set(key, secret);
|
|
474
|
+
stats.secretlintDetections++;
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
// Add custom detections (won't duplicate if same position)
|
|
478
|
+
for (const secret of customSecrets) {
|
|
479
|
+
const key = `${secret.start}-${secret.end}`;
|
|
480
|
+
if (!allSecrets.has(key)) {
|
|
481
|
+
allSecrets.set(key, secret);
|
|
482
|
+
}
|
|
483
|
+
stats.customDetections++;
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
// Apply all detections (from end to start to preserve positions)
|
|
487
|
+
const sortedSecrets = [...allSecrets.values()].sort((a, b) => b.start - a.start);
|
|
488
|
+
for (const secret of sortedSecrets) {
|
|
489
|
+
const { start, end, token } = secret;
|
|
490
|
+
// Verify the token is still in the content at the expected position
|
|
491
|
+
const currentToken = sanitized.substring(start, end);
|
|
492
|
+
if (currentToken === token) {
|
|
493
|
+
const masked = maskToken(token);
|
|
494
|
+
sanitized = sanitized.substring(0, start) + masked + sanitized.substring(end);
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
// Step 4: Handle 40-char hex tokens specially - only mask if NOT in safe context
|
|
499
|
+
// These could be GitHub tokens OR git commit hashes/gist IDs
|
|
500
|
+
const hexPattern = /(?:^|[\s:=])([a-f0-9]{40})(?=[\s\n]|$)/gm;
|
|
501
|
+
let hexMatch;
|
|
502
|
+
const hexReplacements = [];
|
|
503
|
+
|
|
504
|
+
// First pass: find all matches and determine which to mask
|
|
505
|
+
const tempContent = sanitized;
|
|
506
|
+
hexPattern.lastIndex = 0;
|
|
507
|
+
while ((hexMatch = hexPattern.exec(tempContent)) !== null) {
|
|
508
|
+
const token = hexMatch[1];
|
|
509
|
+
const position = hexMatch.index;
|
|
510
|
+
|
|
511
|
+
// Skip if already masked
|
|
512
|
+
if (/\*{3,}/.test(token)) {
|
|
513
|
+
continue;
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
// Only mask if NOT in a safe git/gist context
|
|
517
|
+
if (!isHexInSafeContext(tempContent, token, position)) {
|
|
518
|
+
hexReplacements.push({ token, masked: maskToken(token) });
|
|
519
|
+
}
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
// Second pass: apply replacements
|
|
523
|
+
for (const { token, masked } of hexReplacements) {
|
|
524
|
+
sanitized = sanitized.split(token).join(masked);
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
// Summary logging
|
|
528
|
+
const totalMasked = allSecrets.size + hexReplacements.length + stats.knownTokens;
|
|
529
|
+
if (global.verboseMode && totalMasked > 0) {
|
|
530
|
+
await log(` 🔒 Sanitized ${totalMasked} secrets using dual approach:`, { verbose: true });
|
|
531
|
+
await log(` • Known tokens: ${stats.knownTokens}`, { verbose: true });
|
|
532
|
+
await log(` • Secretlint: ${stats.secretlintDetections} detections`, { verbose: true });
|
|
533
|
+
await log(` • Custom patterns: ${stats.customDetections} detections`, { verbose: true });
|
|
534
|
+
await log(` • Hex tokens: ${hexReplacements.length}`, { verbose: true });
|
|
535
|
+
if (stats.secretlintOnlyWarnings.length > 0) {
|
|
536
|
+
await log(` ⚠️ Pattern gaps to address: ${stats.secretlintOnlyWarnings.length}`, { verbose: true });
|
|
537
|
+
}
|
|
538
|
+
}
|
|
539
|
+
} catch (error) {
|
|
540
|
+
reportError(error, {
|
|
541
|
+
context: 'sanitize_log_content',
|
|
542
|
+
level: 'warning',
|
|
543
|
+
});
|
|
544
|
+
await log(` ⚠️ Warning: Could not fully sanitize log content: ${error.message}`, { verbose: true });
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
return sanitized;
|
|
548
|
+
};
|
|
549
|
+
|
|
550
|
+
// Export detection functions for testing and visibility
|
|
551
|
+
export { detectSecretsWithSecretlint, detectSecretsWithCustomPatterns, compareDetectionResults };
|
|
552
|
+
|
|
553
|
+
// Default export for convenience
|
|
554
|
+
export default {
|
|
555
|
+
isSafeToken,
|
|
556
|
+
isHexInSafeContext,
|
|
557
|
+
getGitHubTokensFromFiles,
|
|
558
|
+
getGitHubTokensFromCommand,
|
|
559
|
+
sanitizeLogContent,
|
|
560
|
+
detectSecretsWithSecretlint,
|
|
561
|
+
detectSecretsWithCustomPatterns,
|
|
562
|
+
compareDetectionResults,
|
|
563
|
+
};
|