specmem-hardwicksoftware 3.7.35 → 3.7.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/CHANGELOG.md +34 -0
  2. package/README.md +11 -15
  3. package/bin/specmem-autoclaude.cjs +12 -1
  4. package/bin/specmem-cli.cjs +1077 -11
  5. package/bin/specmem-console.cjs +890 -63
  6. package/bootstrap.cjs +10 -2
  7. package/claude-hooks/agent-loading-hook.cjs +16 -16
  8. package/claude-hooks/agent-loading-hook.js +28 -21
  9. package/claude-hooks/agent-type-matcher.js +1 -1
  10. package/claude-hooks/background-completion-silencer.js +1 -1
  11. package/claude-hooks/file-claim-enforcer.cjs +37 -36
  12. package/claude-hooks/output-cleaner.cjs +1 -1
  13. package/claude-hooks/refusal-detector-hook.cjs +53 -0
  14. package/claude-hooks/settings.json +64 -4
  15. package/claude-hooks/smart-search-interceptor.js +1 -1
  16. package/claude-hooks/specmem-search-enforcer.cjs +2 -11
  17. package/claude-hooks/specmem-team-member-inject.js +1 -1
  18. package/claude-hooks/specmem-unified-hook.py +1 -1
  19. package/claude-hooks/subagent-loading-hook.cjs +1 -1
  20. package/claude-hooks/task-progress-hook.cjs +7 -7
  21. package/claude-hooks/task-progress-hook.js +3 -3
  22. package/claude-hooks/team-comms-enforcer.cjs +113 -47
  23. package/claude-hooks/use-code-pointers.cjs +1 -1
  24. package/dist/claude-sessions/sessionParser.js +5 -0
  25. package/dist/cli/deploy-to-claude.js +9 -2
  26. package/dist/codebase/codebaseIndexer.js +48 -17
  27. package/dist/codebase/exclusions.js +3 -4
  28. package/dist/codebase/index.js +4 -0
  29. package/dist/codebase/pdfExtractor.js +298 -0
  30. package/dist/dashboard/api/taskTeamMembers.js +2 -2
  31. package/dist/db/bigBrainMigrations.js +29 -0
  32. package/dist/hooks/hookManager.js +4 -4
  33. package/dist/hooks/teamFramingCli.js +1 -1
  34. package/dist/hooks/teamMemberPrepromptHook.js +5 -5
  35. package/dist/index.js +49 -12
  36. package/dist/init/claudeConfigInjector.js +27 -8
  37. package/dist/installer/autoInstall.js +7 -1
  38. package/dist/mcp/compactionProxy.js +1052 -192
  39. package/dist/mcp/compactionProxyDaemon.js +112 -37
  40. package/dist/mcp/contextVault.js +439 -0
  41. package/dist/mcp/embeddingServerManager.js +151 -17
  42. package/dist/mcp/mcpProtocolHandler.js +6 -1
  43. package/dist/mcp/miniCOTServerManager.js +82 -8
  44. package/dist/mcp/specMemServer.js +45 -10
  45. package/dist/mcp/toolRegistry.js +6 -0
  46. package/dist/startup/startupIndexing.js +14 -0
  47. package/dist/team-members/taskOrchestrator.js +3 -3
  48. package/dist/team-members/taskTeamMemberLogger.js +2 -2
  49. package/dist/tools/goofy/deployTeamMember.js +3 -3
  50. package/dist/tools/goofy/digInTheVault.js +81 -0
  51. package/dist/tools/goofy/findCodePointers.js +17 -0
  52. package/dist/tools/goofy/findWhatISaid.js +19 -0
  53. package/dist/tools/goofy/stashTheGoods.js +56 -0
  54. package/dist/tools/teamMemberDeployer.js +2 -2
  55. package/dist/watcher/changeHandler.js +65 -8
  56. package/dist/watcher/changeQueue.js +20 -1
  57. package/embedding-sandbox/frankenstein-embeddings.py +4 -3
  58. package/embedding-sandbox/mini-cot-service.py +11 -13
  59. package/embedding-sandbox/pdf-text-extract.py +208 -0
  60. package/package.json +1 -1
  61. package/scripts/deploy-hooks.cjs +12 -4
  62. package/scripts/fast-batch-embedder.cjs +2 -2
  63. package/scripts/force-retry.cjs +34 -0
  64. package/scripts/global-postinstall.cjs +97 -4
  65. package/scripts/poetic-abliteration.cjs +379 -0
  66. package/scripts/refusal-enforcer.cjs +88 -0
  67. package/scripts/specmem-init.cjs +222 -41
  68. package/specmem/model-config.json +6 -6
  69. package/specmem/supervisord.conf +1 -1
  70. package/svg-sections/readme-token-compaction.svg +246 -0
  71. package/claude-hooks/agent-chooser-hook.js +0 -179
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/env node
2
2
  /**
3
- * TASK PROGRESS HOOK - Real loading bars for Task tool agents
3
+ * AGENT PROGRESS HOOK - Real loading bars for Agent tool agents
4
4
  *
5
5
  * Writes DIRECTLY to /dev/tty to bypass Claude's stdout capture
6
6
  * This actually shows content in the terminal!
@@ -92,8 +92,8 @@ process.stdin.on('end', async () => {
92
92
  const data = JSON.parse(input);
93
93
  const { hookEventName, toolName } = data;
94
94
 
95
- // Only handle Task tool
96
- if (toolName !== 'Task') {
95
+ // Only handle Agent tool
96
+ if (toolName !== 'Agent') {
97
97
  console.log(JSON.stringify({ continue: true }));
98
98
  return;
99
99
  }
@@ -114,7 +114,7 @@ process.stdin.on('end', async () => {
114
114
 
115
115
  function handlePreTask(data) {
116
116
  const { toolInput } = data;
117
- const description = toolInput?.description || 'Task';
117
+ const description = toolInput?.description || 'Agent';
118
118
  const runInBackground = toolInput?.run_in_background !== false;
119
119
 
120
120
  // Track task
@@ -148,11 +148,11 @@ function handlePreTask(data) {
148
148
  hookEventName: 'PreToolUse',
149
149
  additionalContext: `
150
150
  [AGENT #${taskNum} DEPLOYED]
151
- Task: ${description}
151
+ Agent: ${description}
152
152
  Status: Running in background
153
153
 
154
154
  OUTPUT PROGRESS using send_team_message():
155
- - When starting: send_team_message({message: "🔄 Starting: [task]"})
155
+ - When starting: send_team_message({message: "🔄 Starting: [agent task]"})
156
156
  - During work: send_team_message({message: "📝 Progress: [update]"})
157
157
  - When done: send_team_message({message: "✅ Completed: [summary]"})
158
158
  `
@@ -164,7 +164,7 @@ OUTPUT PROGRESS using send_team_message():
164
164
 
165
165
  function handlePostTask(data) {
166
166
  const { toolInput, toolOutput } = data;
167
- const description = toolInput?.description || 'Task';
167
+ const description = toolInput?.description || 'Agent';
168
168
 
169
169
  killSpinner();
170
170
 
@@ -93,7 +93,7 @@ process.stdin.on('end', async () => {
93
93
  const { hookEventName, toolName } = data;
94
94
 
95
95
  // Only handle Task tool
96
- if (toolName !== 'Task') {
96
+ if (toolName !== 'Agent') {
97
97
  console.log(JSON.stringify({ continue: true }));
98
98
  return;
99
99
  }
@@ -114,7 +114,7 @@ process.stdin.on('end', async () => {
114
114
 
115
115
  function handlePreTask(data) {
116
116
  const { toolInput } = data;
117
- const description = toolInput?.description || 'Task';
117
+ const description = toolInput?.description || 'Agent';
118
118
  const runInBackground = toolInput?.run_in_background !== false;
119
119
 
120
120
  // Track task
@@ -164,7 +164,7 @@ OUTPUT PROGRESS using send_team_message():
164
164
 
165
165
  function handlePostTask(data) {
166
166
  const { toolInput, toolOutput } = data;
167
- const description = toolInput?.description || 'Task';
167
+ const description = toolInput?.description || 'Agent';
168
168
 
169
169
  killSpinner();
170
170
 
@@ -77,7 +77,7 @@ try {
77
77
  // CONFIGURATION
78
78
  // ============================================================================
79
79
  const MAX_SEARCHES_BEFORE_BLOCK = 2; // Every other search must use find_code_pointers/find_memory
80
- const TEAM_COMMS_CHECK_INTERVAL = 4; // MUST read_team_messages every 4 tool usages
80
+ const TEAM_COMMS_CHECK_INTERVAL = 3; // MUST send_team_message every 3 tool usages
81
81
  const BROADCAST_CHECK_INTERVAL = 5; // MUST read_team_messages w/ include_broadcasts every 5 tool usages
82
82
  const HELP_CHECK_INTERVAL = 8; // Check help requests every 8 tool usages
83
83
 
@@ -118,14 +118,18 @@ const HELP_CHECK_TOOLS = [
118
118
  // NOTE: Read is NOT included — agents abuse Read to reset search counters
119
119
  const BASIC_SEARCH_TOOLS = ['Grep', 'Glob'];
120
120
 
121
+ // READ + SEARCH combined — forces team msg every 3 reads OR searches
122
+ const READ_SEARCH_TOOLS = ['Read', 'Grep', 'Glob'];
123
+ const READ_SEARCH_COMMS_INTERVAL = 3; // Must send_team_message every 3 reads/searches
124
+
121
125
  // Dangerous tools that require full compliance
122
126
  const WRITE_TOOLS = ['Edit', 'Write', 'NotebookEdit'];
123
127
 
124
128
  // FULL COMPLIANCE TOOLS - agents use these to bypass everything
125
129
  // Requires: announced + claimed + usedMemoryTools
126
130
  // - Bash: can run grep/cat/sed/echo to bypass all limits
127
- // - Task: can spawn sub-agents to bypass limits
128
- const FULL_COMPLIANCE_TOOLS = ['Bash', 'Task'];
131
+ // - Agent: can spawn sub-agents to bypass limits
132
+ const FULL_COMPLIANCE_TOOLS = ['Bash', 'Agent'];
129
133
 
130
134
  // Tools that are always allowed (reading team state + cross-swarm help + research)
131
135
  const ALWAYS_ALLOWED = [
@@ -149,7 +153,6 @@ const ALWAYS_ALLOWED = [
149
153
  'WebFetch',
150
154
  'WebSearch',
151
155
  'ToolSearch',
152
- 'Read',
153
156
  ];
154
157
 
155
158
  // ============================================================================
@@ -187,6 +190,9 @@ function getAgentState(tracking, sessionId) {
187
190
  needsCommsCheck: false, // HARD BLOCK until they read team messages
188
191
  needsBroadcastCheck: false, // HARD BLOCK until they read broadcasts
189
192
  needsHelpCheck: false, // Flag when they hit the limit
193
+ readSearchCount: 0, // Read/Grep/Glob count since last team msg
194
+ preClaimMsgSent: false, // Must send team msg BEFORE claim_task
195
+ postReleasePending: false, // Must send team msg AFTER release_task
190
196
  lastActivity: Date.now()
191
197
  };
192
198
  }
@@ -222,33 +228,11 @@ function isRunningAsAgent() {
222
228
  // Deployed team members — always enforce
223
229
  if (isTeamMemberFn()) return true;
224
230
 
225
- // Method 2: General-purpose subagents (CLAUDE_SUBAGENT=1)
226
- // These DO have MCP tools and SHOULD be enforced.
227
- // Exclude Explore/Plan agents — they don't have MCP tools and can't comply.
228
- // We check agents.json to see if the active subagent has MCP tools.
231
+ // Method 2: CLAUDE_SUBAGENT=1 — env var is proof enough, no agents.json check needed
229
232
  if (process.env.CLAUDE_SUBAGENT === '1' || process.env.CLAUDE_AGENT_ID) {
230
- try {
231
- const agentsFile = `${PROJECT_TMP_DIR}/agents.json`;
232
- if (fs.existsSync(agentsFile)) {
233
- const data = JSON.parse(fs.readFileSync(agentsFile, 'utf8'));
234
- const now = Date.now();
235
- for (const agent of Object.values(data.agents || {})) {
236
- // Active agent (started within 10 min, no endTime)
237
- if (!agent.endTime && agent.startTime && (now - agent.startTime < 600000)) {
238
- // Check if this agent has MCP tools (general-purpose agents do)
239
- const tools = agent.tools || [];
240
- const hasMcpTools = tools.some(t => t.startsWith('mcp__specmem__'));
241
- if (hasMcpTools) return true;
242
- }
243
- }
244
- }
245
- } catch {}
246
- // No agents.json or no MCP tools found — this is likely Explore/Plan, skip enforcement
247
- return false;
233
+ return true;
248
234
  }
249
235
 
250
- // Method 3: Check subagent tracking as fallback (parent context seeing active agents)
251
- // This does NOT enforce on the parent — only on processes with CLAUDE_SUBAGENT=1
252
236
  return false;
253
237
  }
254
238
 
@@ -343,6 +327,16 @@ process.stdin.on('end', () => {
343
327
  // ========================================================================
344
328
  if (ANNOUNCE_TOOLS.includes(toolName)) {
345
329
  state.announced = true;
330
+ // Reset comms counter on SEND (agents must send updates, not just read)
331
+ state.commsToolCount = 0;
332
+ state.lastCommsCheck = Date.now();
333
+ state.needsCommsCheck = false;
334
+ // Reset read/search counter — team msg obligation fulfilled
335
+ state.readSearchCount = 0;
336
+ // Cleared to claim files (must msg BEFORE claiming)
337
+ state.preClaimMsgSent = true;
338
+ // Release obligation fulfilled (must msg AFTER releasing)
339
+ state.postReleasePending = false;
346
340
  }
347
341
  if (CLAIM_TOOLS.includes(toolName)) {
348
342
  state.claimed = true;
@@ -370,6 +364,8 @@ process.stdin.on('end', () => {
370
364
  fs.writeFileSync(GLOBAL_CLAIMS_FILE, JSON.stringify(globalClaims, null, 2));
371
365
  } catch (e) {}
372
366
  state.currentClaimId = claimId;
367
+ // Consumed — next claim needs a fresh team msg
368
+ state.preClaimMsgSent = false;
373
369
  }
374
370
  if (toolName === 'mcp__specmem__release_task') {
375
371
  // Remove this session's claims from GLOBAL file
@@ -384,18 +380,23 @@ process.stdin.on('end', () => {
384
380
  } catch (e) {}
385
381
  state.claimed = false;
386
382
  state.editedFiles = [];
383
+ // Must send team msg AFTER releasing — announce the release
384
+ state.postReleasePending = true;
387
385
  }
388
386
  if (MEMORY_TOOLS.includes(toolName)) {
389
387
  state.usedMemoryTools = true;
390
388
  state.searchCount = 0; // Reset search counter — allows next 2 searches
391
389
  // usedMemoryTools resets to false after 2 more searches (see BASIC_SEARCH_TOOLS block)
392
390
  }
393
- // Track team comms reads - resets comms counter
391
+ // Track Read/Search count for team comms cadence
392
+ if (READ_SEARCH_TOOLS.includes(toolName)) {
393
+ state.readSearchCount = (state.readSearchCount || 0) + 1;
394
+ }
395
+
396
+ // Track team comms reads - resets BROADCAST counter only
397
+ // Comms counter now resets on SEND via ANNOUNCE_TOOLS, not on READ
394
398
  if (BROADCAST_CHECK_TOOLS.includes(toolName)) {
395
- state.commsToolCount = 0;
396
- state.lastCommsCheck = Date.now();
397
- state.needsCommsCheck = false;
398
- // Also reset broadcast counter IF they included broadcasts
399
+ // Broadcast counter reset IF they included broadcasts
399
400
  const params = data.tool_input || {};
400
401
  if (params.include_broadcasts !== false) {
401
402
  state.broadcastToolCount = 0;
@@ -431,16 +432,16 @@ process.stdin.on('end', () => {
431
432
  state.helpToolUsageCount = (state.helpToolUsageCount || 0) + 1;
432
433
 
433
434
  // ========================================================================
434
- // HARD BLOCK: Must read team messages every 4 tool usages
435
- // read_team_messages() satisfies this - any mode
435
+ // HARD BLOCK: Must send team message every 3 tool usages
436
+ // send_team_message() or broadcast_to_team() satisfies this
436
437
  // ========================================================================
437
- if (state.commsToolCount >= TEAM_COMMS_CHECK_INTERVAL && !BROADCAST_CHECK_TOOLS.includes(toolName)) {
438
+ if (state.commsToolCount >= TEAM_COMMS_CHECK_INTERVAL && !ANNOUNCE_TOOLS.includes(toolName)) {
438
439
  state.needsCommsCheck = true;
439
440
  state.blockedCount++;
440
441
  saveTracking(tracking);
441
442
  console.log(blockResponse(
442
- 'mcp__specmem__read_team_messages',
443
- `Quick check-in other team members may have updates that affect your work. Call: read_team_messages({include_swarms: true, limit: 5})`
443
+ 'mcp__specmem__send_team_message',
444
+ `Time to update the team on your progress. Call: send_team_message({type:"status", message:"[what you're doing / what you found]"})`
444
445
  ));
445
446
  return;
446
447
  }
@@ -474,6 +475,48 @@ process.stdin.on('end', () => {
474
475
  return;
475
476
  }
476
477
 
478
+ // ========================================================================
479
+ // HARD BLOCK: Read/Search cadence — must send team msg every 3 reads/searches
480
+ // Tracks Read, Grep, Glob separately from general comms counter
481
+ // ========================================================================
482
+ if ((state.readSearchCount || 0) >= READ_SEARCH_COMMS_INTERVAL && !ANNOUNCE_TOOLS.includes(toolName)) {
483
+ state.blockedCount++;
484
+ saveTracking(tracking);
485
+ console.log(blockResponse(
486
+ 'mcp__specmem__send_team_message',
487
+ `You've done ${state.readSearchCount} reads/searches without updating the team. Share what you found! Call: send_team_message({type:"update", message:"[share findings from your recent reads/searches]"})`
488
+ ));
489
+ return;
490
+ }
491
+
492
+ // ========================================================================
493
+ // HARD BLOCK: Must send team msg BEFORE claiming a file
494
+ // Announce what you're about to claim so teammates know
495
+ // ========================================================================
496
+ if (toolName === 'mcp__specmem__claim_task' && !state.preClaimMsgSent) {
497
+ state.blockedCount++;
498
+ saveTracking(tracking);
499
+ console.log(blockResponse(
500
+ 'mcp__specmem__send_team_message',
501
+ `Announce your claim FIRST! Tell the team what files/area you're about to work on. Call: send_team_message({type:"status", message:"Claiming [files/area] — about to work on [description]"})`
502
+ ));
503
+ return;
504
+ }
505
+
506
+ // ========================================================================
507
+ // HARD BLOCK: Must send team msg AFTER releasing a claim
508
+ // Let teammates know files are available again
509
+ // ========================================================================
510
+ if (state.postReleasePending && !ANNOUNCE_TOOLS.includes(toolName)) {
511
+ state.blockedCount++;
512
+ saveTracking(tracking);
513
+ console.log(blockResponse(
514
+ 'mcp__specmem__send_team_message',
515
+ `You released a claim but didn't tell the team! Announce the release. Call: send_team_message({type:"update", message:"Released claim on [files] — files are free for others"})`
516
+ ));
517
+ return;
518
+ }
519
+
477
520
  // ========================================================================
478
521
  // ALWAYS ALLOWED TOOLS - pass through after counter checks
479
522
  // ========================================================================
@@ -488,7 +531,7 @@ process.stdin.on('end', () => {
488
531
  // ========================================================================
489
532
  if (state.commsToolCount === TEAM_COMMS_CHECK_INTERVAL - 1) {
490
533
  console.log(allowWithReminder(
491
- `Heads up — good time to check in with the team: read_team_messages({include_swarms: true, limit: 5})`
534
+ `Heads up — good time to update the team: send_team_message({type:"status", message:"[progress update]"})`
492
535
  ));
493
536
  // Don't return - continue to other checks
494
537
  }
@@ -609,18 +652,41 @@ process.stdin.on('end', () => {
609
652
  }
610
653
 
611
654
  // ========================================================================
612
- // CLAIM RELEASE ENFORCEMENT — After ANY edit, BLOCK until release
613
- // Flow: claim_task → Edit/Write → release_task claim_taskEdit/Write → release_task
655
+ // CLAIM RELEASE + NOTIFICATION ENFORCEMENT — After edit, BLOCK until release AND notify
656
+ // Flow: claim_task → Edit/Write → release_task + send_team_messagenext task
614
657
  // ========================================================================
615
658
  if (state.editedFiles && state.editedFiles.length > 0 && state.claimed && !WRITE_TOOLS.includes(toolName)) {
616
- // Allow: release_task, always-allowed tools, and write tools (handled in WRITE_TOOLS block)
617
- if (!ALWAYS_ALLOWED.includes(toolName) && toolName !== 'mcp__specmem__release_task') {
659
+ const isReleaseTool = toolName === 'mcp__specmem__release_task';
660
+ const isNotifyTool = ANNOUNCE_TOOLS.includes(toolName);
661
+
662
+ // Track completion of release/notify obligations
663
+ if (isReleaseTool) state.releasedClaim = true;
664
+ if (isNotifyTool) state.releaseNotified = true;
665
+
666
+ // Both obligations met — clear state and continue
667
+ if (state.releasedClaim && state.releaseNotified) {
668
+ state.editedFiles = [];
669
+ state.releasedClaim = false;
670
+ state.releaseNotified = false;
671
+ state.claimed = false;
672
+ state.currentClaimId = null;
673
+ }
674
+ // Allow release/notify tools and always-allowed tools through
675
+ else if (!isReleaseTool && !isNotifyTool && !ALWAYS_ALLOWED.includes(toolName)) {
618
676
  state.blockedCount++;
619
677
  saveTracking(tracking);
620
- console.log(blockResponse(
621
- 'mcp__specmem__release_task',
622
- `You're done editing ${state.editedFiles[state.editedFiles.length - 1]} — release the claim so other team members can work on it. Call: release_task({claimId:"${state.currentClaimId || 'your-claim-id'}"})`
623
- ));
678
+
679
+ if (!state.releasedClaim) {
680
+ console.log(blockResponse(
681
+ 'mcp__specmem__release_task',
682
+ `Done editing ${state.editedFiles[state.editedFiles.length - 1]} — release the claim so others can work on it. Call: release_task({claimId:"${state.currentClaimId || 'your-claim-id'}"})`
683
+ ));
684
+ } else {
685
+ console.log(blockResponse(
686
+ 'mcp__specmem__send_team_message',
687
+ `Claim released — now notify the team about your changes. Call: send_team_message({type:"update", message:"Finished editing ${state.editedFiles[state.editedFiles.length - 1]}: [describe what you changed]"})`
688
+ ));
689
+ }
624
690
  return;
625
691
  }
626
692
  }
@@ -73,7 +73,7 @@ async function generateEmbedding(text, socketPath) {
73
73
  for (const line of lines) {
74
74
  try {
75
75
  const resp = JSON.parse(line);
76
- if (resp.status === 'processing') continue;
76
+ if (resp.status === 'working') continue;
77
77
  if (resp.embedding) { socket.end(); resolve(resp.embedding); return; }
78
78
  if (resp.error) { socket.end(); reject(new Error(resp.error)); return; }
79
79
  } catch (e) {}
@@ -996,6 +996,11 @@ export function isToolOrThinkingContent(content) {
996
996
  return true;
997
997
  if (trimmed.startsWith('[Tool:'))
998
998
  return true;
999
+ // Skip task/agent notification XML blocks — system noise, not conversation
1000
+ if (trimmed.startsWith('<task-notification>'))
1001
+ return true;
1002
+ if (trimmed.includes('<task-id>') && trimmed.includes('</task-id>'))
1003
+ return true;
999
1004
  // Check for [CLAUDE] prefixed tool versions
1000
1005
  if (trimmed.startsWith('[CLAUDE] [Tools:'))
1001
1006
  return true;
@@ -218,7 +218,7 @@ function updateSettings() {
218
218
  const settingsPath = path.join(CLAUDE_HOME, 'settings.json');
219
219
  try {
220
220
  let settings = {};
221
- // Load existing settings
221
+ // Load existing settings - PRESERVE all non-specmem keys (env, model, etc.)
222
222
  if (fs.existsSync(settingsPath)) {
223
223
  try {
224
224
  settings = JSON.parse(fs.readFileSync(settingsPath, 'utf-8'));
@@ -227,6 +227,9 @@ function updateSettings() {
227
227
  log('Could not parse existing settings.json, creating new one');
228
228
  }
229
229
  }
230
+ // Capture user's custom env BEFORE any modifications.
231
+ // These include ANTHROPIC_BASE_URL, ANTHROPIC_AUTH_TOKEN, model overrides, etc.
232
+ const _userCustomEnv = settings.env;
230
233
  // IMPORTANT: Do NOT write hooks to main settings.json
231
234
  // All hook config lives in ~/.claude/hooks/settings.json (deployed as a file)
232
235
  // Writing hooks here would cause DOUBLE-FIRING of every hook
@@ -275,9 +278,13 @@ function updateSettings() {
275
278
  settings.permissions.allow.push(perm);
276
279
  }
277
280
  }
281
+ // Restore user's custom env - NEVER clobber ANTHROPIC_BASE_URL, model overrides, etc.
282
+ if (_userCustomEnv !== undefined) {
283
+ settings.env = _userCustomEnv;
284
+ }
278
285
  // Write updated settings
279
286
  fs.writeFileSync(settingsPath, JSON.stringify(settings, null, 2));
280
- log('Updated settings.json (permissions only — hooks in hooks/settings.json)');
287
+ log('Updated settings.json (permissions only — hooks in hooks/settings.json — custom env preserved)');
281
288
  return true;
282
289
  }
283
290
  catch (error) {
@@ -28,6 +28,7 @@ import * as os from 'os';
28
28
  import { v4 as uuidv4 } from 'uuid';
29
29
  import chokidar from 'chokidar';
30
30
  import { logger } from '../utils/logger.js';
31
+ import { extractPdfText, extractPdfBatch, isPdfFile } from './pdfExtractor.js';
31
32
  import { getProjectPath } from '../config.js';
32
33
  import { getCoordinator } from '../coordination/integration.js';
33
34
  /**
@@ -36,15 +37,15 @@ import { getCoordinator } from '../coordination/integration.js';
36
37
  */
37
38
  function loadResourceLimits() {
38
39
  const limits = {
39
- cpuMax: 40, // max CPU % target (back-pressure threshold)
40
+ cpuMax: 35, // max CPU % target (back-pressure threshold)
40
41
  cpuMin: 10, // min CPU % (crawl mode)
41
- ramMaxMb: 6000, // max RAM MB
42
+ ramMaxMb: 4000, // max RAM MB (safe for 8GB laptops)
42
43
  ramMinMb: 2000, // min RAM MB
43
44
  batchSize: 25, // files per batch (was 200!)
44
- maxConcurrency: 8, // max parallel file reads within a batch
45
+ maxConcurrency: 4, // max parallel file reads (safe for dual-core i3s)
45
46
  batchDelayMs: 50, // delay between batches (ms)
46
47
  batchDelayMaxMs: 2000, // max delay under heavy load
47
- cpuCoreMax: 0, // 0 = auto (use all cores)
48
+ cpuCoreMax: 2, // max CPU cores (safe for dual-core i3s)
48
49
  };
49
50
  // 1. Read from model-config.json
50
51
  try {
@@ -177,7 +178,8 @@ const DEFAULT_CONFIG = {
177
178
  '.c', '.cpp', '.h', '.hpp',
178
179
  '.swift',
179
180
  '.dockerfile', 'Dockerfile',
180
- '.env.example', '.env.template'
181
+ '.env.example', '.env.template',
182
+ '.pdf'
181
183
  ],
182
184
  maxFileSizeBytes: 1024 * 1024, // 1MB
183
185
  generateEmbeddings: true,
@@ -444,16 +446,25 @@ export class CodebaseIndexer {
444
446
  const stats = await fs.stat(filePath);
445
447
  if (stats.size > this.config.maxFileSizeBytes)
446
448
  return;
447
- if (await this.isBinaryFile(filePath))
448
- return;
449
- const content = await fs.readFile(filePath, 'utf-8');
449
+ // PDF files: extract text via PyMuPDF instead of reading as UTF-8
450
+ let content;
451
+ if (isPdfFile(filePath)) {
452
+ const pdfResult = await extractPdfText(filePath);
453
+ if (!pdfResult || !pdfResult.text) return;
454
+ content = pdfResult.text;
455
+ logger.debug({ filePath: relativePath, pages: pdfResult.pages, chars: pdfResult.chars }, 'PDF text extracted');
456
+ } else {
457
+ if (await this.isBinaryFile(filePath))
458
+ return;
459
+ content = await fs.readFile(filePath, 'utf-8');
460
+ }
450
461
  const contentHash = this.hashContent(content);
451
462
  const existingHash = existingHashes.get(relativePath);
452
463
  if (existingHash === contentHash) {
453
464
  skipped++;
454
465
  return;
455
466
  }
456
- const indexedFile = await this.indexFile(filePath);
467
+ const indexedFile = await this.indexFile(filePath, isPdfFile(filePath) ? content : undefined);
457
468
  if (indexedFile) {
458
469
  this.index.set(indexedFile.filePath, indexedFile);
459
470
  changedFiles.push(indexedFile);
@@ -616,9 +627,17 @@ export class CodebaseIndexer {
616
627
  if (existing && existing.mtime && stats.mtime.getTime() <= existing.mtime) {
617
628
  return { skipped: true, relativePath, mtimeSkip: true };
618
629
  }
619
- if (await this.isBinaryFile(filePath))
620
- return null;
621
- const content = await fs.readFile(filePath, 'utf-8');
630
+ // PDF files: extract text via PyMuPDF instead of reading as UTF-8
631
+ let content;
632
+ if (isPdfFile(filePath)) {
633
+ const pdfResult = await extractPdfText(filePath);
634
+ if (!pdfResult || !pdfResult.text) return null;
635
+ content = pdfResult.text;
636
+ } else {
637
+ if (await this.isBinaryFile(filePath))
638
+ return null;
639
+ content = await fs.readFile(filePath, 'utf-8');
640
+ }
622
641
  const contentHash = this.hashContent(content);
623
642
  if (existing && existing.hash === contentHash) {
624
643
  return { skipped: true, relativePath, hashSkip: true };
@@ -1178,7 +1197,7 @@ export class CodebaseIndexer {
1178
1197
  /**
1179
1198
  * indexFile - reads and indexes a single file with enhanced analysis
1180
1199
  */
1181
- async indexFile(absolutePath) {
1200
+ async indexFile(absolutePath, preExtractedContent) {
1182
1201
  try {
1183
1202
  const stats = await fs.stat(absolutePath);
1184
1203
  // skip if too large
@@ -1186,11 +1205,23 @@ export class CodebaseIndexer {
1186
1205
  logger.debug({ path: absolutePath, size: stats.size }, 'skipping large file');
1187
1206
  return null;
1188
1207
  }
1189
- // skip if binary
1190
- if (await this.isBinaryFile(absolutePath)) {
1191
- return null;
1208
+ // PDF files: use pre-extracted content or extract on demand
1209
+ let content;
1210
+ if (isPdfFile(absolutePath)) {
1211
+ if (preExtractedContent) {
1212
+ content = preExtractedContent;
1213
+ } else {
1214
+ const pdfResult = await extractPdfText(absolutePath);
1215
+ if (!pdfResult || !pdfResult.text) return null;
1216
+ content = pdfResult.text;
1217
+ }
1218
+ } else {
1219
+ // skip if binary
1220
+ if (await this.isBinaryFile(absolutePath)) {
1221
+ return null;
1222
+ }
1223
+ content = await fs.readFile(absolutePath, 'utf-8');
1192
1224
  }
1193
- const content = await fs.readFile(absolutePath, 'utf-8');
1194
1225
  const relativePath = path.relative(this.config.codebasePath, absolutePath);
1195
1226
  const fileName = path.basename(absolutePath);
1196
1227
  const extension = path.extname(absolutePath).toLowerCase();
@@ -47,7 +47,7 @@ export const EXCLUSION_CONFIG = {
47
47
  '*.db',
48
48
  // Binary assets
49
49
  '*.png', '*.jpg', '*.jpeg', '*.gif', '*.ico', '*.webp',
50
- '*.pdf', '*.zip', '*.tar', '*.gz', '*.rar', '*.7z',
50
+ '*.zip', '*.tar', '*.gz', '*.rar', '*.7z',
51
51
  '*.mp3', '*.mp4', '*.avi', '*.mov', '*.mkv',
52
52
  '*.ttf', '*.woff', '*.woff2', '*.eot', '*.otf',
53
53
  '*.exe', '*.dll', '*.so', '*.dylib', '*.bin',
@@ -145,7 +145,6 @@ const DEFAULT_EXCLUSIONS = [
145
145
  '*.mp4',
146
146
  '*.avi',
147
147
  '*.mov',
148
- '*.pdf',
149
148
  '*.zip',
150
149
  '*.tar',
151
150
  '*.gz',
@@ -547,8 +546,8 @@ const BINARY_EXTENSIONS = new Set([
547
546
  '.zip', '.tar', '.gz', '.bz2', '.7z', '.rar', '.xz', '.lz', '.lzma',
548
547
  // executables and libraries
549
548
  '.exe', '.dll', '.so', '.dylib', '.bin', '.out', '.app', '.msi', '.deb', '.rpm',
550
- // documents (binary formats)
551
- '.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx', '.odt', '.ods', '.odp',
549
+ // documents (binary formats — PDF handled by pdfExtractor.js)
550
+ '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx', '.odt', '.ods', '.odp',
552
551
  // fonts
553
552
  '.ttf', '.otf', '.woff', '.woff2', '.eot',
554
553
  // databases
@@ -6,6 +6,10 @@
6
6
  // ========================================
7
7
  export { SkipTheBoringShit, isBinaryFile, getFileSizeBytes, getExclusionHandler, resetExclusionHandler, DEFAULT_EXCLUSIONS } from './exclusions.js';
8
8
  // ========================================
9
+ // PDF EXTRACTION - pdfExtractor
10
+ // ========================================
11
+ export { extractPdfText, extractPdfBatch, isPdfFile, isPdfExtractionAvailable } from './pdfExtractor.js';
12
+ // ========================================
9
13
  // LANGUAGE DETECTION - whatLanguageIsThis
10
14
  // ========================================
11
15
  export { WhatLanguageIsThis, getLanguageDetector, resetLanguageDetector, LANGUAGE_REGISTRY, EXTENSION_INDEX, FILENAME_MAPPINGS } from './languageDetection.js';