@link-assistant/hive-mind 1.1.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,74 @@
1
1
  # @link-assistant/hive-mind
2
2
 
3
+ ## 1.2.1
4
+
5
+ ### Patch Changes
6
+
7
+ - 04cb3d2: Fix false positives in token masking for log sanitization
8
+ - Remove overly broad regex pattern that was matching legitimate identifiers like `browser_take_screenshot` and MCP tool names
9
+ - Add allowlist of safe token patterns (browser\_, mcp\_\_, function names with underscores, UUIDs)
10
+ - Add context-aware detection for 40-char hex strings to avoid masking git commit hashes and gist IDs
11
+ - Export new helper functions `isSafeToken` and `isHexInSafeContext` for testing
12
+ - Add comprehensive unit tests for false positive prevention
13
+
14
+ ## 1.2.0
15
+
16
+ ### Minor Changes
17
+
18
+ - Add experimental --execute-tool-with-bun option to improve speed and memory usage
19
+
20
+ This feature adds the `--execute-tool-with-bun` option that allows users to execute the AI tool using `bunx claude` instead of `claude`, which may provide performance benefits in terms of speed and memory usage.
21
+
22
+ **Supported commands:**
23
+ - `solve` - Uses `bunx claude` when option is enabled
24
+ - `task` - Uses `bunx claude` when option is enabled
25
+ - `review` - Uses `bunx claude` when option is enabled
26
+ - `hive` - Passes the option through to the `solve` subprocess
27
+
28
+ **How It Works:**
29
+ When `--execute-tool-with-bun` is enabled, the `claudePath` variable is set to `'bunx claude'` instead of `'claude'` (or `CLAUDE_PATH` environment variable).
30
+
31
+ **Usage Examples:**
32
+
33
+ ```bash
34
+ # Use with solve command
35
+ solve https://github.com/owner/repo/issues/123 --execute-tool-with-bun
36
+
37
+ # Use with task command
38
+ task "implement feature X" --execute-tool-with-bun
39
+
40
+ # Use with review command
41
+ review https://github.com/owner/repo/pull/456 --execute-tool-with-bun
42
+
43
+ # Use with hive command (passes through to solve)
44
+ hive https://github.com/owner/repo --execute-tool-with-bun
45
+ ```
46
+
47
+ The option defaults to `false` to maintain backward compatibility.
48
+
49
+ Fixes #812
50
+
51
+ feat(hive): recheck issue conditions before processing queue items
52
+
53
+ Added `recheckIssueConditions()` function to validate issue state right before processing,
54
+ preventing wasted resources on issues that should be skipped due to changed conditions since queuing.
55
+
56
+ **Checks performed:**
57
+ - **Issue state**: Verifies the issue is still open
58
+ - **Open PRs**: Checks if issue has PRs (when `--skip-issues-with-prs` is enabled)
59
+ - **Repository status**: Confirms repository is not archived
60
+
61
+ **Benefits:**
62
+ - Prevents processing closed issues
63
+ - Avoids duplicate work when PRs already exist
64
+ - Stops work on newly archived repositories
65
+ - Saves AI model tokens and compute resources
66
+
67
+ **Performance impact:**
68
+ Minimal overhead per issue (~300-500ms for API calls), negligible compared to 5-15 minute solve time.
69
+
70
+ Fixes #810
71
+
3
72
  ## 1.1.0
4
73
 
5
74
  ### Minor Changes
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@link-assistant/hive-mind",
3
- "version": "1.1.0",
3
+ "version": "1.2.1",
4
4
  "description": "AI-powered issue solver and hive mind for collaborative problem solving",
5
5
  "main": "src/hive.mjs",
6
6
  "type": "module",
@@ -61,8 +61,11 @@
61
61
  "prettier": "^3.6.2"
62
62
  },
63
63
  "dependencies": {
64
+ "@secretlint/core": "^11.2.5",
65
+ "@secretlint/secretlint-rule-preset-recommend": "^11.2.5",
64
66
  "@sentry/node": "^10.15.0",
65
- "@sentry/profiling-node": "^10.15.0"
67
+ "@sentry/profiling-node": "^10.15.0",
68
+ "secretlint": "^11.2.5"
66
69
  },
67
70
  "lint-staged": {
68
71
  "*.{js,mjs,json,md}": [
@@ -5,9 +5,6 @@
5
5
  if (typeof globalThis.use === 'undefined') {
6
6
  globalThis.use = (await eval(await (await fetch('https://unpkg.com/use-m/use.js')).text())).use;
7
7
  }
8
- const fs = (await use('fs')).promises;
9
- const os = (await use('os')).default;
10
- const path = (await use('path')).default;
11
8
  // Use command-stream for consistent $ behavior
12
9
  const { $ } = await use('command-stream');
13
10
  // Import log and maskToken from general lib
@@ -16,6 +13,10 @@ import { reportError } from './sentry.lib.mjs';
16
13
  import { githubLimits, timeouts } from './config.lib.mjs';
17
14
  // Import batch operations from separate module
18
15
  import { batchCheckPullRequestsForIssues as batchCheckPRs, batchCheckArchivedRepositories as batchCheckArchived } from './github.batch.lib.mjs';
16
+ // Import token sanitization from dedicated module (Issue #1037 fix)
17
+ import { isSafeToken, isHexInSafeContext, getGitHubTokensFromFiles, getGitHubTokensFromCommand, sanitizeLogContent } from './token-sanitization.lib.mjs';
18
+ // Re-export token sanitization functions for backward compatibility
19
+ export { isSafeToken, isHexInSafeContext, getGitHubTokensFromFiles, getGitHubTokensFromCommand, sanitizeLogContent };
19
20
  // Import log upload function from separate module
20
21
  import { uploadLogWithGhUploadLog } from './log-upload.lib.mjs';
21
22
 
@@ -60,139 +61,8 @@ const buildCostInfoString = (totalCostUSD, anthropicTotalCostUSD, pricingInfo) =
60
61
 
61
62
  // Helper function to mask GitHub tokens (alias for backward compatibility)
62
63
  export const maskGitHubToken = maskToken;
63
- // Helper function to get GitHub tokens from local config files
64
- export const getGitHubTokensFromFiles = async () => {
65
- const tokens = [];
66
-
67
- try {
68
- // Check ~/.config/gh/hosts.yml
69
- const hostsFile = path.join(os.homedir(), '.config/gh/hosts.yml');
70
- if (
71
- await fs
72
- .access(hostsFile)
73
- .then(() => true)
74
- .catch(() => false)
75
- ) {
76
- const hostsContent = await fs.readFile(hostsFile, 'utf8');
77
-
78
- // Look for oauth_token and api_token patterns
79
- const oauthMatches = hostsContent.match(/oauth_token:\s*([^\s\n]+)/g);
80
- if (oauthMatches) {
81
- for (const match of oauthMatches) {
82
- const token = match.split(':')[1].trim();
83
- if (token && !tokens.includes(token)) {
84
- tokens.push(token);
85
- }
86
- }
87
- }
88
-
89
- const apiMatches = hostsContent.match(/api_token:\s*([^\s\n]+)/g);
90
- if (apiMatches) {
91
- for (const match of apiMatches) {
92
- const token = match.split(':')[1].trim();
93
- if (token && !tokens.includes(token)) {
94
- tokens.push(token);
95
- }
96
- }
97
- }
98
- }
99
- } catch (error) {
100
- // File access errors are expected when config doesn't exist
101
- if (global.verboseMode) {
102
- reportError(error, {
103
- context: 'github_token_file_access',
104
- level: 'debug',
105
- });
106
- }
107
- }
108
-
109
- return tokens;
110
- };
111
- // Helper function to get GitHub tokens from gh command output
112
- export const getGitHubTokensFromCommand = async () => {
113
- const { $ } = await use('command-stream');
114
- const tokens = [];
115
-
116
- try {
117
- // Run gh auth status to get token info
118
- const authResult = await $`gh auth status 2>&1`.catch(() => ({ stdout: '', stderr: '' }));
119
- const authOutput = authResult.stdout?.toString() + authResult.stderr?.toString() || '';
120
-
121
- // Look for token patterns in the output
122
- const tokenPatterns = [/(?:token|oauth|api)[:\s]*([a-zA-Z0-9_]{20,})/gi, /gh[pou]_[a-zA-Z0-9_]{20,}/gi];
123
-
124
- for (const pattern of tokenPatterns) {
125
- const matches = authOutput.match(pattern);
126
- if (matches) {
127
- for (let match of matches) {
128
- // Clean up the match
129
- const token = match.replace(/^(?:token|oauth|api)[:\s]*/, '').trim();
130
- if (token && token.length >= 20 && !tokens.includes(token)) {
131
- tokens.push(token);
132
- }
133
- }
134
- }
135
- }
136
- } catch (error) {
137
- // Command errors are expected when gh is not configured
138
- if (global.verboseMode) {
139
- reportError(error, {
140
- context: 'github_token_gh_auth',
141
- level: 'debug',
142
- });
143
- }
144
- }
145
-
146
- return tokens;
147
- };
148
64
  // Escape ``` in logs for safe markdown embedding (replaces with \`\`\` to prevent code block closure)
149
65
  export const escapeCodeBlocksInLog = logContent => logContent.replace(/```/g, '\\`\\`\\`');
150
- // Helper function to sanitize log content by masking GitHub tokens
151
- export const sanitizeLogContent = async logContent => {
152
- let sanitized = logContent;
153
-
154
- try {
155
- // Get tokens from both sources
156
- const fileTokens = await getGitHubTokensFromFiles();
157
- const commandTokens = await getGitHubTokensFromCommand();
158
- const allTokens = [...new Set([...fileTokens, ...commandTokens])];
159
-
160
- // Mask each token found
161
- for (const token of allTokens) {
162
- if (token && token.length >= 12) {
163
- const maskedToken = maskToken(token);
164
- // Use global replace to mask all occurrences
165
- sanitized = sanitized.split(token).join(maskedToken);
166
- }
167
- }
168
-
169
- // Also look for and mask common GitHub token patterns directly in the log
170
- const tokenPatterns = [
171
- /gh[pou]_[a-zA-Z0-9_]{20,}/g,
172
- /(?:^|[\s:=])([a-f0-9]{40})(?=[\s\n]|$)/gm, // 40-char hex tokens (like personal access tokens)
173
- /(?:^|[\s:=])([a-zA-Z0-9_]{20,})(?=[\s\n]|$)/gm, // General long tokens
174
- ];
175
-
176
- for (const pattern of tokenPatterns) {
177
- sanitized = sanitized.replace(pattern, (match, token) => {
178
- if (token && token.length >= 20) {
179
- return match.replace(token, maskToken(token));
180
- }
181
- return match;
182
- });
183
- }
184
-
185
- await log(` 🔒 Sanitized ${allTokens.length} detected GitHub tokens in log content`, { verbose: true });
186
- } catch (error) {
187
- reportError(error, {
188
- context: 'sanitize_log_content',
189
- level: 'warning',
190
- });
191
- await log(` ⚠️ Warning: Could not fully sanitize log content: ${error.message}`, { verbose: true });
192
- }
193
-
194
- return sanitized;
195
- };
196
66
  // Helper function to check if a file exists in a GitHub branch
197
67
  export const checkFileInBranch = async (owner, repo, fileName, branchName) => {
198
68
  const { $ } = await use('command-stream');
@@ -1469,6 +1339,8 @@ export default {
1469
1339
  getGitHubTokensFromFiles,
1470
1340
  getGitHubTokensFromCommand,
1471
1341
  escapeCodeBlocksInLog,
1342
+ isSafeToken,
1343
+ isHexInSafeContext,
1472
1344
  sanitizeLogContent,
1473
1345
  checkFileInBranch,
1474
1346
  checkGitHubPermissions,
@@ -286,6 +286,11 @@ export const createYargsConfig = yargsInstance => {
286
286
  description: '[EXPERIMENTAL] Include guidance for managing REQUIREMENTS.md and ARCHITECTURE.md files. When enabled, agents will update these documentation files when changes affect requirements or architecture.',
287
287
  default: false,
288
288
  })
289
+ .option('execute-tool-with-bun', {
290
+ type: 'boolean',
291
+ description: 'Execute the AI tool using bunx (experimental, may improve speed and memory usage) - passed to solve command',
292
+ default: false,
293
+ })
289
294
  .parserConfiguration({
290
295
  'boolean-negation': true,
291
296
  'strip-dashed': false,
package/src/hive.mjs CHANGED
@@ -110,6 +110,8 @@ if (isDirectExecution) {
110
110
  const { tryFetchIssuesWithGraphQL } = graphqlLib;
111
111
  const solutionDraftsLib = await import('./list-solution-drafts.lib.mjs');
112
112
  const { listSolutionDrafts } = solutionDraftsLib;
113
+ const recheckLib = await import('./hive.recheck.lib.mjs');
114
+ const { recheckIssueConditions } = recheckLib;
113
115
  const commandName = process.argv[1] ? process.argv[1].split('/').pop() : '';
114
116
  const isLocalScript = commandName.endsWith('.mjs');
115
117
  const solveCommand = isLocalScript ? './solve.mjs' : 'solve';
@@ -713,6 +715,16 @@ if (isDirectExecution) {
713
715
 
714
716
  await log(`\n👷 Worker ${workerId} processing: ${issueUrl}`);
715
717
 
718
+ // Recheck conditions before processing to avoid wasted work
719
+ const recheckResult = await recheckIssueConditions(issueUrl, argv);
720
+ if (!recheckResult.shouldProcess) {
721
+ await log(` ⏭️ Skipping issue: ${recheckResult.reason}`);
722
+ issueQueue.markCompleted(issueUrl);
723
+ const stats = issueQueue.getStats();
724
+ await log(` 📊 Queue: ${stats.queued} waiting, ${stats.processing} processing, ${stats.completed} completed, ${stats.failed} failed`);
725
+ continue;
726
+ }
727
+
716
728
  // Track if this issue failed
717
729
  let issueFailed = false;
718
730
 
@@ -756,6 +768,7 @@ if (isDirectExecution) {
756
768
  if (argv.promptIssueReporting) args.push('--prompt-issue-reporting');
757
769
  if (argv.promptCaseStudies) args.push('--prompt-case-studies');
758
770
  if (argv.promptPlaywrightMcp !== undefined) args.push(argv.promptPlaywrightMcp ? '--prompt-playwright-mcp' : '--no-prompt-playwright-mcp');
771
+ if (argv.executeToolWithBun) args.push('--execute-tool-with-bun');
759
772
  // Log the actual command being executed so users can investigate/reproduce
760
773
  await log(` 📋 Command: ${solveCommand} ${args.join(' ')}`);
761
774
 
@@ -0,0 +1,86 @@
1
+ #!/usr/bin/env node
2
+ // Library for rechecking issue conditions in hive queue processing
3
+
4
+ import { log, cleanErrorMessage } from './lib.mjs';
5
+ import { batchCheckPullRequestsForIssues, batchCheckArchivedRepositories } from './github.lib.mjs';
6
+ import { reportError } from './sentry.lib.mjs';
7
+
8
+ /**
9
+ * Recheck conditions for an issue right before processing
10
+ * This ensures the issue should still be processed even if conditions changed since queuing
11
+ * @param {string} issueUrl - The URL of the issue to check
12
+ * @param {Object} argv - Command line arguments with configuration
13
+ * @returns {Promise<{shouldProcess: boolean, reason?: string}>}
14
+ */
15
+ export async function recheckIssueConditions(issueUrl, argv) {
16
+ try {
17
+ // Extract owner, repo, and issue number from URL
18
+ const urlMatch = issueUrl.match(/github\.com\/([^/]+)\/([^/]+)\/issues\/(\d+)/);
19
+ if (!urlMatch) {
20
+ await log(` ⚠️ Could not parse issue URL: ${issueUrl}`, { verbose: true });
21
+ return { shouldProcess: true }; // Process anyway if we can't parse
22
+ }
23
+
24
+ const [, owner, repo, issueNumber] = urlMatch;
25
+ const issueNum = parseInt(issueNumber);
26
+
27
+ await log(` 🔍 Rechecking conditions for issue #${issueNum}...`, { verbose: true });
28
+
29
+ // Check 1: Verify issue is still open
30
+ try {
31
+ const { execSync } = await import('child_process');
32
+ const issueState = execSync(`gh api repos/${owner}/${repo}/issues/${issueNum} --jq .state`, {
33
+ encoding: 'utf8',
34
+ }).trim();
35
+
36
+ if (issueState === 'closed') {
37
+ return {
38
+ shouldProcess: false,
39
+ reason: 'Issue is now closed',
40
+ };
41
+ }
42
+ await log(` ✅ Issue is still open`, { verbose: true });
43
+ } catch (error) {
44
+ await log(` ⚠️ Could not check issue state: ${cleanErrorMessage(error)}`, { verbose: true });
45
+ // Continue checking other conditions
46
+ }
47
+
48
+ // Check 2: If skipIssuesWithPrs is enabled, verify issue still has no open PRs
49
+ if (argv.skipIssuesWithPrs) {
50
+ const prResults = await batchCheckPullRequestsForIssues(owner, repo, [issueNum]);
51
+ const prInfo = prResults[issueNum];
52
+
53
+ if (prInfo && prInfo.openPRCount > 0) {
54
+ return {
55
+ shouldProcess: false,
56
+ reason: `Issue now has ${prInfo.openPRCount} open PR${prInfo.openPRCount > 1 ? 's' : ''}`,
57
+ };
58
+ }
59
+ await log(` ✅ Issue still has no open PRs`, { verbose: true });
60
+ }
61
+
62
+ // Check 3: Verify repository is not archived
63
+ const archivedStatusMap = await batchCheckArchivedRepositories([{ owner, name: repo }]);
64
+ const repoKey = `${owner}/${repo}`;
65
+
66
+ if (archivedStatusMap[repoKey] === true) {
67
+ return {
68
+ shouldProcess: false,
69
+ reason: 'Repository is now archived',
70
+ };
71
+ }
72
+ await log(` ✅ Repository is not archived`, { verbose: true });
73
+
74
+ await log(` ✅ All conditions passed, proceeding with processing`, { verbose: true });
75
+ return { shouldProcess: true };
76
+ } catch (error) {
77
+ reportError(error, {
78
+ context: 'recheck_issue_conditions',
79
+ issueUrl,
80
+ operation: 'recheck_conditions',
81
+ });
82
+ await log(` ⚠️ Error rechecking conditions: ${cleanErrorMessage(error)}`, { level: 'warning' });
83
+ // On error, allow processing to continue (fail open)
84
+ return { shouldProcess: true };
85
+ }
86
+ }
package/src/review.mjs CHANGED
@@ -19,14 +19,15 @@ if (earlyArgs.includes('--help') || earlyArgs.includes('-h')) {
19
19
  // Show help and exit
20
20
  console.log('Usage: review.mjs <pr-url> [options]');
21
21
  console.log('\nOptions:');
22
- console.log(' --version Show version number');
23
- console.log(' --help, -h Show help');
24
- console.log(' --resume, -r Resume from a previous session ID');
25
- console.log(' --dry-run, -n Prepare everything but do not execute Claude');
26
- console.log(' --model, -m Model to use (opus, sonnet, or full model ID) [default: opus]');
27
- console.log(' --focus, -f Focus areas for review [default: all]');
28
- console.log(' --approve If review passes, approve the PR');
29
- console.log(' --verbose, -v Enable verbose logging');
22
+ console.log(' --version Show version number');
23
+ console.log(' --help, -h Show help');
24
+ console.log(' --resume, -r Resume from a previous session ID');
25
+ console.log(' --dry-run, -n Prepare everything but do not execute Claude');
26
+ console.log(' --model, -m Model to use (opus, sonnet, or full model ID) [default: opus]');
27
+ console.log(' --focus, -f Focus areas for review [default: all]');
28
+ console.log(' --approve If review passes, approve the PR');
29
+ console.log(' --verbose, -v Enable verbose logging');
30
+ console.log(' --execute-tool-with-bun Execute the AI tool using bunx (experimental) [default: false]');
30
31
  process.exit(0);
31
32
  }
32
33
 
@@ -91,6 +92,11 @@ const argv = yargs()
91
92
  alias: 'v',
92
93
  default: false,
93
94
  })
95
+ .option('execute-tool-with-bun', {
96
+ type: 'boolean',
97
+ description: 'Execute the AI tool using bunx (experimental, may improve speed and memory usage)',
98
+ default: false,
99
+ })
94
100
  .demandCommand(1, 'The GitHub pull request URL is required')
95
101
  .parserConfiguration({
96
102
  'boolean-negation': true,
@@ -126,7 +132,9 @@ if (!prUrl.match(/^https:\/\/github\.com\/[^/]+\/[^/]+\/pull\/\d+$/)) {
126
132
  process.exit(1);
127
133
  }
128
134
 
129
- const claudePath = process.env.CLAUDE_PATH || 'claude';
135
+ // Determine claude command path based on --execute-tool-with-bun option
136
+ // When enabled, uses 'bunx claude' which may improve speed and memory usage
137
+ const claudePath = argv.executeToolWithBun ? 'bunx claude' : process.env.CLAUDE_PATH || 'claude';
130
138
 
131
139
  // Extract repository and PR number from URL
132
140
  const urlParts = prUrl.split('/');
@@ -253,6 +253,11 @@ export const createYargsConfig = yargsInstance => {
253
253
  choices: ['claude', 'opencode', 'codex', 'agent'],
254
254
  default: 'claude',
255
255
  })
256
+ .option('execute-tool-with-bun', {
257
+ type: 'boolean',
258
+ description: 'Execute the AI tool using bunx (experimental, may improve speed and memory usage)',
259
+ default: false,
260
+ })
256
261
  .option('enable-workspaces', {
257
262
  type: 'boolean',
258
263
  description: 'Use separate workspace directory structure with repository/ and tmp/ folders. Works with all tools (claude, opencode, codex, agent). Experimental feature.',
package/src/solve.mjs CHANGED
@@ -236,7 +236,7 @@ if (argv.verbose) {
236
236
  await log(` Is Issue URL: ${!!isIssueUrl}`, { verbose: true });
237
237
  await log(` Is PR URL: ${!!isPrUrl}`, { verbose: true });
238
238
  }
239
- const claudePath = process.env.CLAUDE_PATH || 'claude';
239
+ const claudePath = argv.executeToolWithBun ? 'bunx claude' : process.env.CLAUDE_PATH || 'claude';
240
240
  // Note: owner, repo, and urlNumber are already extracted from validateGitHubUrl() above
241
241
  // The parseUrlComponents() call was removed as it had a bug with hash fragments (#issuecomment-xyz)
242
242
  // and the validation result already provides these values correctly parsed
package/src/task.mjs CHANGED
@@ -124,6 +124,11 @@ const argv = yargs()
124
124
  default: 'text',
125
125
  choices: ['text', 'json'],
126
126
  })
127
+ .option('execute-tool-with-bun', {
128
+ type: 'boolean',
129
+ description: 'Execute the AI tool using bunx (experimental, may improve speed and memory usage)',
130
+ default: false,
131
+ })
127
132
  .check(argv => {
128
133
  if (!argv['task-description'] && !argv._[0]) {
129
134
  throw new Error('Please provide a task description');
@@ -186,7 +191,7 @@ await log(formatAligned('💡', 'Clarify mode:', argv.clarify ? 'enabled' : 'dis
186
191
  await log(formatAligned('🔍', 'Decompose mode:', argv.decompose ? 'enabled' : 'disabled'));
187
192
  await log(formatAligned('📄', 'Output format:', argv.outputFormat));
188
193
 
189
- const claudePath = process.env.CLAUDE_PATH || 'claude';
194
+ const claudePath = argv.executeToolWithBun ? 'bunx claude' : process.env.CLAUDE_PATH || 'claude';
190
195
 
191
196
  // Helper function to execute Claude command
192
197
  const executeClaude = (prompt, model) => {
@@ -0,0 +1,563 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Token sanitization utilities for log content
4
+ * Dual approach: Uses both secretlint AND custom patterns for comprehensive coverage
5
+ *
6
+ * Architecture:
7
+ * 1. Custom patterns (our logic) - patterns we define and maintain
8
+ * 2. Secretlint patterns - battle-tested community patterns
9
+ *
10
+ * Both approaches run independently, and if only one detects a secret,
11
+ * a warning is logged (especially when secretlint finds something our logic misses).
12
+ * This helps us improve our custom patterns over time.
13
+ *
14
+ * @module token-sanitization
15
+ */
16
+
17
+ // Import shared utility from lib.mjs
18
+ import { maskToken, log } from './lib.mjs';
19
+ import { reportError } from './sentry.lib.mjs';
20
+
21
+ // Dynamic imports for runtime dependencies
22
+ const getOsModule = async () => (await import('os')).default;
23
+ const getPathModule = async () => (await import('path')).default;
24
+ const getFsModule = async () => (await import('fs')).promises;
25
+
26
+ // Lazy-loaded secretlint modules (initialized on first use)
27
+ let secretlintCore = null;
28
+ let secretlintConfig = null;
29
+
30
+ /**
31
+ * Initialize secretlint modules lazily
32
+ * @returns {Promise<boolean>} True if secretlint is available
33
+ */
34
+ const initSecretlint = async () => {
35
+ if (secretlintConfig !== null) {
36
+ return secretlintConfig !== false;
37
+ }
38
+
39
+ try {
40
+ const [core, preset] = await Promise.all([import('@secretlint/core'), import('@secretlint/secretlint-rule-preset-recommend')]);
41
+
42
+ secretlintCore = core;
43
+ secretlintConfig = {
44
+ rules: [
45
+ {
46
+ id: '@secretlint/secretlint-rule-preset-recommend',
47
+ rule: preset.creator,
48
+ },
49
+ ],
50
+ };
51
+
52
+ return true;
53
+ } catch (error) {
54
+ // secretlint not available - fall back to custom patterns only
55
+ if (global.verboseMode) {
56
+ await log(` ⚠️ Secretlint not available, using fallback patterns: ${error.message}`, { verbose: true });
57
+ }
58
+ secretlintConfig = false;
59
+ return false;
60
+ }
61
+ };
62
+
63
+ /**
64
+ * Patterns that indicate a string is NOT a sensitive token (false positive patterns)
65
+ * These are used to prevent masking legitimate identifiers
66
+ */
67
+ const SAFE_TOKEN_PATTERNS = [
68
+ // MCP tool names (Playwright, etc.)
69
+ /^mcp__[a-z_]+$/i,
70
+ // Browser/Playwright tool names
71
+ /^browser_[a-z_]+$/i,
72
+ // Common function/tool name patterns with underscores
73
+ /^[a-z]+_[a-z]+_[a-z_]+$/i,
74
+ // UUID patterns (not sensitive)
75
+ /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i,
76
+ ];
77
+
78
+ /**
79
+ * Context patterns that indicate the surrounding text is NOT a sensitive context
80
+ * These patterns help identify when a 40-char hex string is just a git commit hash
81
+ */
82
+ const SAFE_CONTEXT_PATTERNS = [
83
+ // Git commands containing commit hashes
84
+ /\bgh\s+gist\s+view\b/i,
85
+ /\bgit\s+(log|show|diff|cherry-pick|revert|checkout|reset)\b/i,
86
+ /\bgit\s+commit\s+-m\b/i,
87
+ // Commit SHA in common git output contexts
88
+ /\bcommit\s+[a-f0-9]{7,40}\b/i,
89
+ /\bSHA\s*:\s*[a-f0-9]{7,40}\b/i,
90
+ // Git log output format
91
+ /^commit\s+[a-f0-9]{40}/m,
92
+ // Short commit hashes in various contexts
93
+ /\b[a-f0-9]{7,40}\s+Author:/i,
94
+ ];
95
+
96
+ // Note: Custom token patterns are now defined in detectSecretsWithCustomPatterns()
97
+ // with named patterns for tracking and comparison with secretlint results.
98
+
99
+ /**
100
+ * Check if a token matches any safe pattern (not a sensitive token)
101
+ * @param {string} token - The token to check
102
+ * @returns {boolean} True if the token is safe and should NOT be masked
103
+ */
104
+ export const isSafeToken = token => {
105
+ if (!token) return false;
106
+ return SAFE_TOKEN_PATTERNS.some(pattern => pattern.test(token));
107
+ };
108
+
109
+ /**
110
+ * Check if a 40-char hex string appears in a safe context (like git commands)
111
+ * @param {string} content - The full content to search
112
+ * @param {string} hexString - The 40-char hex string found
113
+ * @param {number} position - The position where the hex string was found
114
+ * @returns {boolean} True if the hex string is in a safe context
115
+ */
116
+ export const isHexInSafeContext = (content, hexString, position) => {
117
+ // Get surrounding context (100 chars before and after)
118
+ const contextStart = Math.max(0, position - 100);
119
+ const contextEnd = Math.min(content.length, position + hexString.length + 100);
120
+ const context = content.substring(contextStart, contextEnd);
121
+
122
+ // Check if any safe context pattern matches
123
+ return SAFE_CONTEXT_PATTERNS.some(pattern => pattern.test(context));
124
+ };
125
+
126
+ /**
127
+ * Get GitHub tokens from local config files
128
+ * @returns {Promise<string[]>} Array of tokens found
129
+ */
130
+ export const getGitHubTokensFromFiles = async () => {
131
+ const os = await getOsModule();
132
+ const path = await getPathModule();
133
+ const fs = await getFsModule();
134
+ const tokens = [];
135
+
136
+ try {
137
+ // Check ~/.config/gh/hosts.yml
138
+ const hostsFile = path.join(os.homedir(), '.config/gh/hosts.yml');
139
+ if (
140
+ await fs
141
+ .access(hostsFile)
142
+ .then(() => true)
143
+ .catch(() => false)
144
+ ) {
145
+ const hostsContent = await fs.readFile(hostsFile, 'utf8');
146
+
147
+ // Look for oauth_token and api_token patterns
148
+ const oauthMatches = hostsContent.match(/oauth_token:\s*([^\s\n]+)/g);
149
+ if (oauthMatches) {
150
+ for (const match of oauthMatches) {
151
+ const token = match.split(':')[1].trim();
152
+ if (token && !tokens.includes(token)) {
153
+ tokens.push(token);
154
+ }
155
+ }
156
+ }
157
+
158
+ const apiMatches = hostsContent.match(/api_token:\s*([^\s\n]+)/g);
159
+ if (apiMatches) {
160
+ for (const match of apiMatches) {
161
+ const token = match.split(':')[1].trim();
162
+ if (token && !tokens.includes(token)) {
163
+ tokens.push(token);
164
+ }
165
+ }
166
+ }
167
+ }
168
+ } catch (error) {
169
+ // File access errors are expected when config doesn't exist
170
+ if (global.verboseMode) {
171
+ reportError(error, {
172
+ context: 'github_token_file_access',
173
+ level: 'debug',
174
+ });
175
+ }
176
+ }
177
+
178
+ return tokens;
179
+ };
180
+
181
+ /**
182
+ * Get GitHub tokens from gh command output
183
+ * @returns {Promise<string[]>} Array of tokens found
184
+ */
185
+ export const getGitHubTokensFromCommand = async () => {
186
+ if (typeof globalThis.use === 'undefined') {
187
+ globalThis.use = (await eval(await (await fetch('https://unpkg.com/use-m/use.js')).text())).use;
188
+ }
189
+ const { $ } = await globalThis.use('command-stream');
190
+ const tokens = [];
191
+
192
+ try {
193
+ // Run gh auth status to get token info
194
+ const authResult = await $`gh auth status 2>&1`.catch(() => ({ stdout: '', stderr: '' }));
195
+ const authOutput = authResult.stdout?.toString() + authResult.stderr?.toString() || '';
196
+
197
+ // Look for token patterns in the output
198
+ const tokenPatterns = [/(?:token|oauth|api)[:\s]*([a-zA-Z0-9_]{20,})/gi, /gh[pou]_[a-zA-Z0-9_]{20,}/gi];
199
+
200
+ for (const pattern of tokenPatterns) {
201
+ const matches = authOutput.match(pattern);
202
+ if (matches) {
203
+ for (let match of matches) {
204
+ // Clean up the match
205
+ const token = match.replace(/^(?:token|oauth|api)[:\s]*/, '').trim();
206
+ if (token && token.length >= 20 && !tokens.includes(token)) {
207
+ tokens.push(token);
208
+ }
209
+ }
210
+ }
211
+ }
212
+ } catch (error) {
213
+ // Command errors are expected when gh is not configured
214
+ if (global.verboseMode) {
215
+ reportError(error, {
216
+ context: 'github_token_gh_auth',
217
+ level: 'debug',
218
+ });
219
+ }
220
+ }
221
+
222
+ return tokens;
223
+ };
224
+
225
+ /**
226
+ * Use secretlint to detect secrets in content
227
+ * @param {string} content - Content to scan
228
+ * @returns {Promise<Array<{start: number, end: number, token: string, ruleId: string}>>} Array of detected secrets with rule info
229
+ */
230
+ const detectSecretsWithSecretlint = async content => {
231
+ const secrets = [];
232
+
233
+ const available = await initSecretlint();
234
+ if (!available || !secretlintCore || !secretlintConfig) {
235
+ return secrets;
236
+ }
237
+
238
+ try {
239
+ const result = await secretlintCore.lintSource({
240
+ source: {
241
+ filePath: '/virtual/content.txt',
242
+ content: content,
243
+ contentType: 'text',
244
+ },
245
+ options: {
246
+ config: secretlintConfig,
247
+ maskSecrets: false, // We need raw positions to mask ourselves
248
+ },
249
+ });
250
+
251
+ for (const message of result.messages) {
252
+ if (message.range && message.range.length === 2) {
253
+ const [start, end] = message.range;
254
+ const token = content.substring(start, end);
255
+ secrets.push({
256
+ start,
257
+ end,
258
+ token,
259
+ ruleId: message.ruleId || 'unknown',
260
+ source: 'secretlint',
261
+ });
262
+ }
263
+ }
264
+ } catch (error) {
265
+ if (global.verboseMode) {
266
+ await log(` ⚠️ Secretlint detection error: ${error.message}`, { verbose: true });
267
+ }
268
+ }
269
+
270
+ return secrets;
271
+ };
272
+
273
+ /**
274
+ * Use custom patterns to detect secrets in content
275
+ * @param {string} content - Content to scan
276
+ * @returns {Array<{start: number, end: number, token: string, patternName: string}>} Array of detected secrets with pattern info
277
+ */
278
+ const detectSecretsWithCustomPatterns = content => {
279
+ const secrets = [];
280
+
281
+ // Named custom patterns for tracking what detected what
282
+ const namedPatterns = [
283
+ // OpenAI patterns
284
+ { name: 'openai-project', pattern: /\bsk-(?:proj-|svcacct-|admin-)?[A-Za-z0-9_-]*T3BlbkFJ[A-Za-z0-9_-]+/g },
285
+
286
+ // Anthropic patterns
287
+ { name: 'anthropic-claude', pattern: /\bsk-ant-(?:api\d{2}-)?[A-Za-z0-9_-]{20,}/g },
288
+
289
+ // GitHub patterns
290
+ { name: 'github-pat', pattern: /\bgithub_pat_[a-zA-Z0-9_]{20,}/g },
291
+ { name: 'github-server', pattern: /\bghs_[a-zA-Z0-9_]{20,}/g },
292
+ { name: 'github-refresh', pattern: /\bghr_[a-zA-Z0-9_]{20,}/g },
293
+ { name: 'github-ghp', pattern: /\bghp_[a-zA-Z0-9_]{20,}/g },
294
+ { name: 'github-gho', pattern: /\bgho_[a-zA-Z0-9_]{20,}/g },
295
+ { name: 'github-ghu', pattern: /\bghu_[a-zA-Z0-9_]{20,}/g },
296
+
297
+ // AWS patterns
298
+ { name: 'aws-key', pattern: /\b(?:A3T[A-Z0-9]|AKIA|AGPA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16}\b/g },
299
+
300
+ // Stripe patterns
301
+ { name: 'stripe', pattern: /\b(?:sk_live_|sk_test_|pk_live_|pk_test_)[a-zA-Z0-9]{20,}/g },
302
+
303
+ // SendGrid patterns
304
+ { name: 'sendgrid', pattern: /\bSG\.[a-zA-Z0-9_-]{15,}\.[a-zA-Z0-9_-]{30,}/g },
305
+
306
+ // Twilio patterns
307
+ { name: 'twilio', pattern: /\bSK[a-f0-9]{32}\b/g },
308
+
309
+ // Mailchimp patterns
310
+ { name: 'mailchimp', pattern: /\b[a-f0-9]{32}-us[0-9]{1,2}\b/g },
311
+
312
+ // Square patterns
313
+ { name: 'square', pattern: /\bsq0(?:atp|csp)-[a-zA-Z0-9_-]{22,}/g },
314
+
315
+ // Databricks patterns
316
+ { name: 'databricks', pattern: /\bdapi[a-f0-9]{32}\b/g },
317
+
318
+ // PyPI patterns
319
+ { name: 'pypi', pattern: /\bpypi-[A-Za-z0-9_-]{50,}/g },
320
+
321
+ // Discord patterns
322
+ { name: 'discord', pattern: /\b[MN][A-Za-z0-9_-]{23,}\.[A-Za-z0-9_-]{6}\.[A-Za-z0-9_-]{20,}/g },
323
+
324
+ // Telegram patterns
325
+ { name: 'telegram', pattern: /\b[0-9]{8,10}:[a-zA-Z0-9_-]{30,}/g },
326
+
327
+ // Google / Gemini patterns
328
+ { name: 'google-gemini', pattern: /\bAIza[0-9A-Za-z_-]{32,40}\b/g },
329
+
330
+ // HuggingFace patterns
331
+ { name: 'huggingface', pattern: /\bhf_[a-zA-Z0-9]{30,}/g },
332
+
333
+ // Slack patterns (not all covered by secretlint preset)
334
+ { name: 'slack-xoxb', pattern: /\bxoxb-[0-9]{10,}-[0-9]{10,}-[a-zA-Z0-9]{20,}/g },
335
+ { name: 'slack-xoxp', pattern: /\bxoxp-[0-9]{10,}-[0-9]{10,}-[0-9]{10,}-[a-zA-Z0-9]{20,}/g },
336
+
337
+ // npm patterns
338
+ { name: 'npm', pattern: /\bnpm_[a-zA-Z0-9]{30,}/g },
339
+
340
+ // Shopify patterns
341
+ { name: 'shopify', pattern: /\bshpat_[a-f0-9]{32}\b/g },
342
+ ];
343
+
344
+ for (const { name, pattern } of namedPatterns) {
345
+ pattern.lastIndex = 0;
346
+ let match;
347
+ while ((match = pattern.exec(content)) !== null) {
348
+ const token = match[0];
349
+ // Skip if already masked (contains consecutive asterisks)
350
+ if (/\*{3,}/.test(token)) {
351
+ continue;
352
+ }
353
+ secrets.push({
354
+ start: match.index,
355
+ end: match.index + token.length,
356
+ token,
357
+ patternName: name,
358
+ source: 'custom',
359
+ });
360
+ }
361
+ }
362
+
363
+ return secrets;
364
+ };
365
+
366
+ /**
367
+ * Compare detection results from both approaches and log warnings
368
+ * @param {Array} secretlintSecrets - Secrets detected by secretlint
369
+ * @param {Array} customSecrets - Secrets detected by custom patterns
370
+ * @returns {Promise<{secretlintOnly: Array, customOnly: Array, both: Array}>}
371
+ */
372
+ const compareDetectionResults = async (secretlintSecrets, customSecrets) => {
373
+ const secretlintOnly = [];
374
+ const customOnly = [];
375
+ const both = [];
376
+
377
+ // Create sets for easier comparison (normalize tokens)
378
+ const secretlintTokens = new Map(secretlintSecrets.map(s => [s.token, s]));
379
+ const customTokens = new Map(customSecrets.map(s => [s.token, s]));
380
+
381
+ // Find secretlint-only detections (our custom patterns missed these)
382
+ for (const [token, secret] of secretlintTokens) {
383
+ if (!customTokens.has(token)) {
384
+ secretlintOnly.push(secret);
385
+ } else {
386
+ both.push({ ...secret, customPattern: customTokens.get(token).patternName });
387
+ }
388
+ }
389
+
390
+ // Find custom-only detections (secretlint missed these)
391
+ for (const [token, secret] of customTokens) {
392
+ if (!secretlintTokens.has(token)) {
393
+ customOnly.push(secret);
394
+ }
395
+ }
396
+
397
+ return { secretlintOnly, customOnly, both };
398
+ };
399
+
400
+ /**
401
+ * Sanitize log content by masking sensitive tokens while avoiding false positives
402
+ * Uses DUAL APPROACH: Both secretlint AND custom patterns run independently
403
+ *
404
+ * If only secretlint detects a secret (but our custom patterns miss it),
405
+ * a warning is logged so we can improve our patterns.
406
+ *
407
+ * @param {string} logContent - The log content to sanitize
408
+ * @param {Object} options - Optional configuration
409
+ * @param {boolean} options.warnOnMismatch - Log warnings when detection approaches differ (default: true in verbose mode)
410
+ * @returns {Promise<string>} Sanitized log content with tokens masked
411
+ */
412
+ export const sanitizeLogContent = async (logContent, options = {}) => {
413
+ let sanitized = logContent;
414
+ const { warnOnMismatch = global.verboseMode } = options;
415
+
416
+ // Statistics for dual approach
417
+ const stats = {
418
+ knownTokens: 0,
419
+ secretlintDetections: 0,
420
+ customDetections: 0,
421
+ secretlintOnlyWarnings: [],
422
+ customOnlyDetections: [],
423
+ };
424
+
425
+ try {
426
+ // Step 1: Get known tokens from files and commands
427
+ const fileTokens = await getGitHubTokensFromFiles();
428
+ const commandTokens = await getGitHubTokensFromCommand();
429
+ const allKnownTokens = [...new Set([...fileTokens, ...commandTokens])];
430
+
431
+ // Mask known tokens first
432
+ for (const token of allKnownTokens) {
433
+ if (token && token.length >= 12) {
434
+ const maskedToken = maskToken(token);
435
+ sanitized = sanitized.split(token).join(maskedToken);
436
+ stats.knownTokens++;
437
+ }
438
+ }
439
+
440
+ // Step 2: DUAL APPROACH - Run both detection methods independently
441
+ const [secretlintSecrets, customSecrets] = await Promise.all([detectSecretsWithSecretlint(sanitized), Promise.resolve(detectSecretsWithCustomPatterns(sanitized))]);
442
+
443
+ // Compare results to find discrepancies
444
+ const { secretlintOnly, customOnly } = await compareDetectionResults(secretlintSecrets, customSecrets);
445
+
446
+ // Log warnings for secretlint-only detections (our patterns should catch these)
447
+ if (warnOnMismatch && secretlintOnly.length > 0) {
448
+ stats.secretlintOnlyWarnings = secretlintOnly;
449
+ await log(` ⚠️ PATTERN GAP: Secretlint found ${secretlintOnly.length} secret(s) that our custom patterns missed:`, { verbose: true });
450
+ for (const secret of secretlintOnly) {
451
+ // Show truncated token and rule that detected it
452
+ const truncated = secret.token.length > 20 ? `${secret.token.substring(0, 10)}...${secret.token.substring(secret.token.length - 5)}` : secret.token;
453
+ await log(` • Rule: ${secret.ruleId}, Token preview: ${truncated}`, { verbose: true });
454
+ }
455
+ await log(` Consider adding custom patterns for these secret types to improve our detection.`, { verbose: true });
456
+ }
457
+
458
+ // Log info about custom-only detections (we catch things secretlint doesn't)
459
+ if (warnOnMismatch && customOnly.length > 0) {
460
+ stats.customOnlyDetections = customOnly;
461
+ await log(` ℹ️ CUSTOM ADVANTAGE: Our patterns found ${customOnly.length} secret(s) that secretlint missed:`, { verbose: true });
462
+ for (const secret of customOnly) {
463
+ await log(` • Pattern: ${secret.patternName}`, { verbose: true });
464
+ }
465
+ }
466
+
467
+ // Step 3: Merge all unique secrets from both sources for masking
468
+ const allSecrets = new Map();
469
+
470
+ // Add secretlint detections
471
+ for (const secret of secretlintSecrets) {
472
+ const key = `${secret.start}-${secret.end}`;
473
+ allSecrets.set(key, secret);
474
+ stats.secretlintDetections++;
475
+ }
476
+
477
+ // Add custom detections (won't duplicate if same position)
478
+ for (const secret of customSecrets) {
479
+ const key = `${secret.start}-${secret.end}`;
480
+ if (!allSecrets.has(key)) {
481
+ allSecrets.set(key, secret);
482
+ }
483
+ stats.customDetections++;
484
+ }
485
+
486
+ // Apply all detections (from end to start to preserve positions)
487
+ const sortedSecrets = [...allSecrets.values()].sort((a, b) => b.start - a.start);
488
+ for (const secret of sortedSecrets) {
489
+ const { start, end, token } = secret;
490
+ // Verify the token is still in the content at the expected position
491
+ const currentToken = sanitized.substring(start, end);
492
+ if (currentToken === token) {
493
+ const masked = maskToken(token);
494
+ sanitized = sanitized.substring(0, start) + masked + sanitized.substring(end);
495
+ }
496
+ }
497
+
498
+ // Step 4: Handle 40-char hex tokens specially - only mask if NOT in safe context
499
+ // These could be GitHub tokens OR git commit hashes/gist IDs
500
+ const hexPattern = /(?:^|[\s:=])([a-f0-9]{40})(?=[\s\n]|$)/gm;
501
+ let hexMatch;
502
+ const hexReplacements = [];
503
+
504
+ // First pass: find all matches and determine which to mask
505
+ const tempContent = sanitized;
506
+ hexPattern.lastIndex = 0;
507
+ while ((hexMatch = hexPattern.exec(tempContent)) !== null) {
508
+ const token = hexMatch[1];
509
+ const position = hexMatch.index;
510
+
511
+ // Skip if already masked
512
+ if (/\*{3,}/.test(token)) {
513
+ continue;
514
+ }
515
+
516
+ // Only mask if NOT in a safe git/gist context
517
+ if (!isHexInSafeContext(tempContent, token, position)) {
518
+ hexReplacements.push({ token, masked: maskToken(token) });
519
+ }
520
+ }
521
+
522
+ // Second pass: apply replacements
523
+ for (const { token, masked } of hexReplacements) {
524
+ sanitized = sanitized.split(token).join(masked);
525
+ }
526
+
527
+ // Summary logging
528
+ const totalMasked = allSecrets.size + hexReplacements.length + stats.knownTokens;
529
+ if (global.verboseMode && totalMasked > 0) {
530
+ await log(` 🔒 Sanitized ${totalMasked} secrets using dual approach:`, { verbose: true });
531
+ await log(` • Known tokens: ${stats.knownTokens}`, { verbose: true });
532
+ await log(` • Secretlint: ${stats.secretlintDetections} detections`, { verbose: true });
533
+ await log(` • Custom patterns: ${stats.customDetections} detections`, { verbose: true });
534
+ await log(` • Hex tokens: ${hexReplacements.length}`, { verbose: true });
535
+ if (stats.secretlintOnlyWarnings.length > 0) {
536
+ await log(` ⚠️ Pattern gaps to address: ${stats.secretlintOnlyWarnings.length}`, { verbose: true });
537
+ }
538
+ }
539
+ } catch (error) {
540
+ reportError(error, {
541
+ context: 'sanitize_log_content',
542
+ level: 'warning',
543
+ });
544
+ await log(` ⚠️ Warning: Could not fully sanitize log content: ${error.message}`, { verbose: true });
545
+ }
546
+
547
+ return sanitized;
548
+ };
549
+
550
+ // Export detection functions for testing and visibility
551
+ export { detectSecretsWithSecretlint, detectSecretsWithCustomPatterns, compareDetectionResults };
552
+
553
+ // Default export for convenience
554
+ export default {
555
+ isSafeToken,
556
+ isHexInSafeContext,
557
+ getGitHubTokensFromFiles,
558
+ getGitHubTokensFromCommand,
559
+ sanitizeLogContent,
560
+ detectSecretsWithSecretlint,
561
+ detectSecretsWithCustomPatterns,
562
+ compareDetectionResults,
563
+ };