specmem-hardwicksoftware 3.7.35 → 3.7.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/CHANGELOG.md +34 -0
  2. package/README.md +11 -15
  3. package/bin/specmem-autoclaude.cjs +12 -1
  4. package/bin/specmem-cli.cjs +1077 -11
  5. package/bin/specmem-console.cjs +890 -63
  6. package/bootstrap.cjs +10 -2
  7. package/claude-hooks/agent-loading-hook.cjs +16 -16
  8. package/claude-hooks/agent-loading-hook.js +28 -21
  9. package/claude-hooks/agent-type-matcher.js +1 -1
  10. package/claude-hooks/background-completion-silencer.js +1 -1
  11. package/claude-hooks/file-claim-enforcer.cjs +37 -36
  12. package/claude-hooks/output-cleaner.cjs +1 -1
  13. package/claude-hooks/refusal-detector-hook.cjs +53 -0
  14. package/claude-hooks/settings.json +64 -4
  15. package/claude-hooks/smart-search-interceptor.js +1 -1
  16. package/claude-hooks/specmem-search-enforcer.cjs +2 -11
  17. package/claude-hooks/specmem-team-member-inject.js +1 -1
  18. package/claude-hooks/specmem-unified-hook.py +1 -1
  19. package/claude-hooks/subagent-loading-hook.cjs +1 -1
  20. package/claude-hooks/task-progress-hook.cjs +7 -7
  21. package/claude-hooks/task-progress-hook.js +3 -3
  22. package/claude-hooks/team-comms-enforcer.cjs +113 -47
  23. package/claude-hooks/use-code-pointers.cjs +1 -1
  24. package/dist/claude-sessions/sessionParser.js +5 -0
  25. package/dist/cli/deploy-to-claude.js +9 -2
  26. package/dist/codebase/codebaseIndexer.js +48 -17
  27. package/dist/codebase/exclusions.js +3 -4
  28. package/dist/codebase/index.js +4 -0
  29. package/dist/codebase/pdfExtractor.js +298 -0
  30. package/dist/dashboard/api/taskTeamMembers.js +2 -2
  31. package/dist/db/bigBrainMigrations.js +29 -0
  32. package/dist/hooks/hookManager.js +4 -4
  33. package/dist/hooks/teamFramingCli.js +1 -1
  34. package/dist/hooks/teamMemberPrepromptHook.js +5 -5
  35. package/dist/index.js +49 -12
  36. package/dist/init/claudeConfigInjector.js +27 -8
  37. package/dist/installer/autoInstall.js +7 -1
  38. package/dist/mcp/compactionProxy.js +1052 -192
  39. package/dist/mcp/compactionProxyDaemon.js +112 -37
  40. package/dist/mcp/contextVault.js +439 -0
  41. package/dist/mcp/embeddingServerManager.js +151 -17
  42. package/dist/mcp/mcpProtocolHandler.js +6 -1
  43. package/dist/mcp/miniCOTServerManager.js +82 -8
  44. package/dist/mcp/specMemServer.js +45 -10
  45. package/dist/mcp/toolRegistry.js +6 -0
  46. package/dist/startup/startupIndexing.js +14 -0
  47. package/dist/team-members/taskOrchestrator.js +3 -3
  48. package/dist/team-members/taskTeamMemberLogger.js +2 -2
  49. package/dist/tools/goofy/deployTeamMember.js +3 -3
  50. package/dist/tools/goofy/digInTheVault.js +81 -0
  51. package/dist/tools/goofy/findCodePointers.js +17 -0
  52. package/dist/tools/goofy/findWhatISaid.js +19 -0
  53. package/dist/tools/goofy/stashTheGoods.js +56 -0
  54. package/dist/tools/teamMemberDeployer.js +2 -2
  55. package/dist/watcher/changeHandler.js +65 -8
  56. package/dist/watcher/changeQueue.js +20 -1
  57. package/embedding-sandbox/frankenstein-embeddings.py +4 -3
  58. package/embedding-sandbox/mini-cot-service.py +11 -13
  59. package/embedding-sandbox/pdf-text-extract.py +208 -0
  60. package/package.json +1 -1
  61. package/scripts/deploy-hooks.cjs +12 -4
  62. package/scripts/fast-batch-embedder.cjs +2 -2
  63. package/scripts/force-retry.cjs +34 -0
  64. package/scripts/global-postinstall.cjs +97 -4
  65. package/scripts/poetic-abliteration.cjs +379 -0
  66. package/scripts/refusal-enforcer.cjs +88 -0
  67. package/scripts/specmem-init.cjs +222 -41
  68. package/specmem/model-config.json +6 -6
  69. package/specmem/supervisord.conf +1 -1
  70. package/svg-sections/readme-token-compaction.svg +246 -0
  71. package/claude-hooks/agent-chooser-hook.js +0 -179
@@ -0,0 +1,298 @@
1
+ /**
2
+ * pdfExtractor.js — PDF text extraction for codebase indexing
3
+ *
4
+ * Spawns pdf-text-extract.py (PyMuPDF) as a child process.
5
+ * Digital PDFs: instant text extraction (0.003s/page).
6
+ * Scanned PDFs: Tesseract OCR fallback (1-3s/page).
7
+ *
8
+ * Used by codebaseIndexer.js and changeHandler.js to index PDF files
9
+ * alongside regular source code.
10
+ */
11
+ import { spawn } from 'child_process';
12
+ import { existsSync } from 'fs';
13
+ import { join, dirname } from 'path';
14
+ import { logger } from '../utils/logger.js';
15
+ import { getPythonPath } from '../utils/projectEnv.js';
16
+
17
+ const PDF_EXTRACT_TIMEOUT_MS = 60000; // 60s — generous for large scanned PDFs
18
+ const MAX_PDF_PAGES = 100;
19
+
20
+ // Cache the script path after first discovery
21
+ let _cachedScriptPath = null;
22
+ let _pymupdfAvailable = null; // null = unknown, true/false after first check
23
+
24
+ /**
25
+ * Find the pdf-text-extract.py script.
26
+ * Search pattern matches miniCOTServerManager.findMiniCOTScript().
27
+ */
28
+ function findPdfExtractScript() {
29
+ if (_cachedScriptPath && existsSync(_cachedScriptPath)) {
30
+ return _cachedScriptPath;
31
+ }
32
+
33
+ const specmemRoot = dirname(dirname(__dirname));
34
+ const possiblePaths = [
35
+ // SpecMem package root (works for all installs)
36
+ join(specmemRoot, 'embedding-sandbox', 'pdf-text-extract.py'),
37
+ // Direct package root (flat layout)
38
+ join(specmemRoot, 'pdf-text-extract.py'),
39
+ // Global npm install fallback
40
+ join(dirname(dirname(process.execPath)), 'lib', 'node_modules', 'specmem-hardwicksoftware', 'embedding-sandbox', 'pdf-text-extract.py'),
41
+ ];
42
+
43
+ for (const p of possiblePaths) {
44
+ if (existsSync(p)) {
45
+ _cachedScriptPath = p;
46
+ logger.debug({ path: p }, '[pdfExtractor] Found pdf-text-extract.py');
47
+ return p;
48
+ }
49
+ }
50
+
51
+ logger.warn({ searchedPaths: possiblePaths }, '[pdfExtractor] pdf-text-extract.py not found');
52
+ return null;
53
+ }
54
+
55
+ /**
56
+ * Extract text from a PDF file.
57
+ *
58
+ * @param {string} filePath - Absolute path to the PDF file
59
+ * @param {object} options - { maxPages?: number, language?: string }
60
+ * @returns {Promise<{text: string, pages: number, scannedPages?: number[], chars: number} | null>}
61
+ * Returns null if extraction fails or pymupdf not available.
62
+ */
63
+ export async function extractPdfText(filePath, options = {}) {
64
+ const { maxPages = MAX_PDF_PAGES, language = 'eng' } = options;
65
+
66
+ // Fast bail if we already know pymupdf is unavailable
67
+ if (_pymupdfAvailable === false) {
68
+ return null;
69
+ }
70
+
71
+ const scriptPath = findPdfExtractScript();
72
+ if (!scriptPath) {
73
+ logger.warn('[pdfExtractor] Cannot extract PDF — script not found');
74
+ return null;
75
+ }
76
+
77
+ const pythonPath = getPythonPath();
78
+
79
+ return new Promise((resolve) => {
80
+ const args = [scriptPath, filePath, '--max-pages', String(maxPages), '--language', language];
81
+ let stdout = '';
82
+ let stderr = '';
83
+ let resolved = false;
84
+
85
+ const proc = spawn(pythonPath, args, {
86
+ timeout: PDF_EXTRACT_TIMEOUT_MS,
87
+ stdio: ['ignore', 'pipe', 'pipe'],
88
+ env: { ...process.env },
89
+ });
90
+
91
+ const timeoutId = setTimeout(() => {
92
+ if (!resolved) {
93
+ resolved = true;
94
+ proc.kill('SIGKILL');
95
+ logger.warn({ filePath, timeoutMs: PDF_EXTRACT_TIMEOUT_MS }, '[pdfExtractor] PDF extraction timed out');
96
+ resolve(null);
97
+ }
98
+ }, PDF_EXTRACT_TIMEOUT_MS);
99
+
100
+ proc.stdout.on('data', (data) => { stdout += data.toString(); });
101
+ proc.stderr.on('data', (data) => { stderr += data.toString(); });
102
+
103
+ proc.on('close', (code) => {
104
+ clearTimeout(timeoutId);
105
+ if (resolved) return;
106
+ resolved = true;
107
+
108
+ if (stderr && stderr.includes('pymupdf not found')) {
109
+ _pymupdfAvailable = false;
110
+ logger.warn('[pdfExtractor] pymupdf not installed — PDF indexing disabled');
111
+ resolve(null);
112
+ return;
113
+ }
114
+
115
+ if (code !== 0) {
116
+ logger.warn({ filePath, code, stderr: stderr.slice(0, 200) }, '[pdfExtractor] PDF extraction failed');
117
+ resolve(null);
118
+ return;
119
+ }
120
+
121
+ try {
122
+ const result = JSON.parse(stdout.trim());
123
+ if (result.error) {
124
+ logger.warn({ filePath, error: result.error }, '[pdfExtractor] PDF extraction error');
125
+ resolve(null);
126
+ return;
127
+ }
128
+
129
+ // Mark pymupdf as available on first success
130
+ if (_pymupdfAvailable === null) {
131
+ _pymupdfAvailable = true;
132
+ }
133
+
134
+ resolve({
135
+ text: result.text,
136
+ pages: result.pages,
137
+ chars: result.chars,
138
+ scannedPages: result.scanned_pages || [],
139
+ truncated: result.truncated || false,
140
+ totalPages: result.total_pages || result.pages,
141
+ });
142
+ } catch (parseErr) {
143
+ logger.warn({ filePath, stdout: stdout.slice(0, 200) }, '[pdfExtractor] Failed to parse extraction result');
144
+ resolve(null);
145
+ }
146
+ });
147
+
148
+ proc.on('error', (err) => {
149
+ clearTimeout(timeoutId);
150
+ if (!resolved) {
151
+ resolved = true;
152
+ logger.warn({ filePath, error: err.message }, '[pdfExtractor] Failed to spawn Python');
153
+ resolve(null);
154
+ }
155
+ });
156
+ });
157
+ }
158
+
159
+ /**
160
+ * Extract text from multiple PDFs in a single Python process (batch mode).
161
+ * One Python startup for N PDFs — avoids interpreter overhead per file.
162
+ * Returns a Map<filePath, result> where result is the extraction output or null.
163
+ *
164
+ * @param {string[]} filePaths - Absolute paths to PDF files
165
+ * @param {object} options - { maxPages?: number, language?: string }
166
+ * @returns {Promise<Map<string, {text: string, pages: number, chars: number} | null>>}
167
+ */
168
+ export async function extractPdfBatch(filePaths, options = {}) {
169
+ const { maxPages = MAX_PDF_PAGES, language = 'eng' } = options;
170
+ const results = new Map();
171
+
172
+ if (!filePaths.length) return results;
173
+
174
+ // Fast bail if pymupdf is known unavailable
175
+ if (_pymupdfAvailable === false) {
176
+ for (const fp of filePaths) results.set(fp, null);
177
+ return results;
178
+ }
179
+
180
+ const scriptPath = findPdfExtractScript();
181
+ if (!scriptPath) {
182
+ for (const fp of filePaths) results.set(fp, null);
183
+ return results;
184
+ }
185
+
186
+ const pythonPath = getPythonPath();
187
+ // Batch timeout: 60s base + 10s per PDF (scanned pages take 1-3s each)
188
+ const batchTimeout = PDF_EXTRACT_TIMEOUT_MS + (filePaths.length * 10000);
189
+
190
+ return new Promise((resolve) => {
191
+ const args = [scriptPath, '--batch', ...filePaths, '--max-pages', String(maxPages), '--language', language];
192
+ let stdout = '';
193
+ let stderr = '';
194
+ let resolved = false;
195
+
196
+ const proc = spawn(pythonPath, args, {
197
+ stdio: ['ignore', 'pipe', 'pipe'],
198
+ env: { ...process.env },
199
+ });
200
+
201
+ const timeoutId = setTimeout(() => {
202
+ if (!resolved) {
203
+ resolved = true;
204
+ proc.kill('SIGKILL');
205
+ logger.warn({ count: filePaths.length, timeoutMs: batchTimeout }, '[pdfExtractor] Batch extraction timed out');
206
+ // Return whatever we parsed so far + null for the rest
207
+ for (const fp of filePaths) {
208
+ if (!results.has(fp)) results.set(fp, null);
209
+ }
210
+ resolve(results);
211
+ }
212
+ }, batchTimeout);
213
+
214
+ proc.stdout.on('data', (data) => { stdout += data.toString(); });
215
+ proc.stderr.on('data', (data) => { stderr += data.toString(); });
216
+
217
+ proc.on('close', (code) => {
218
+ clearTimeout(timeoutId);
219
+ if (resolved) return;
220
+ resolved = true;
221
+
222
+ if (stderr && stderr.includes('pymupdf not found')) {
223
+ _pymupdfAvailable = false;
224
+ logger.warn('[pdfExtractor] pymupdf not installed — PDF indexing disabled');
225
+ for (const fp of filePaths) results.set(fp, null);
226
+ resolve(results);
227
+ return;
228
+ }
229
+
230
+ // Parse JSONL — one JSON object per line
231
+ const lines = stdout.split('\n').filter(l => l.trim());
232
+ for (const line of lines) {
233
+ try {
234
+ const result = JSON.parse(line);
235
+ const path = result.path;
236
+ if (!path) continue;
237
+
238
+ if (result.error) {
239
+ logger.warn({ path, error: result.error }, '[pdfExtractor] PDF extraction error');
240
+ results.set(path, null);
241
+ continue;
242
+ }
243
+
244
+ if (_pymupdfAvailable === null) _pymupdfAvailable = true;
245
+
246
+ results.set(path, {
247
+ text: result.text,
248
+ pages: result.pages,
249
+ chars: result.chars,
250
+ scannedPages: result.scanned_pages || [],
251
+ truncated: result.truncated || false,
252
+ totalPages: result.total_pages || result.pages,
253
+ });
254
+ } catch (e) {
255
+ logger.debug({ line: line.slice(0, 100) }, '[pdfExtractor] Failed to parse JSONL line');
256
+ }
257
+ }
258
+
259
+ // Fill nulls for any missing paths
260
+ for (const fp of filePaths) {
261
+ if (!results.has(fp)) results.set(fp, null);
262
+ }
263
+
264
+ logger.info({
265
+ total: filePaths.length,
266
+ extracted: [...results.values()].filter(v => v !== null).length,
267
+ failed: [...results.values()].filter(v => v === null).length,
268
+ }, '[pdfExtractor] Batch extraction complete');
269
+
270
+ resolve(results);
271
+ });
272
+
273
+ proc.on('error', (err) => {
274
+ clearTimeout(timeoutId);
275
+ if (!resolved) {
276
+ resolved = true;
277
+ logger.warn({ error: err.message }, '[pdfExtractor] Failed to spawn Python for batch');
278
+ for (const fp of filePaths) results.set(fp, null);
279
+ resolve(results);
280
+ }
281
+ });
282
+ });
283
+ }
284
+
285
+ /**
286
+ * Check if PDF extraction is available (pymupdf installed + script found).
287
+ */
288
+ export function isPdfExtractionAvailable() {
289
+ if (_pymupdfAvailable === false) return false;
290
+ return findPdfExtractScript() !== null;
291
+ }
292
+
293
+ /**
294
+ * Check if a file is a PDF by extension.
295
+ */
296
+ export function isPdfFile(filePath) {
297
+ return filePath.toLowerCase().endsWith('.pdf');
298
+ }
@@ -1,7 +1,7 @@
1
1
  /**
2
- * taskTeamMembers.ts - API endpoints for Task team member tracking
2
+ * taskTeamMembers.ts - API endpoints for Agent team member tracking
3
3
  *
4
- * yo fr fr this lets you view and manually log Task team members
4
+ * yo fr fr this lets you view and manually log Agent team members
5
5
  */
6
6
  import { Router } from 'express';
7
7
  import { z } from 'zod';
@@ -4388,6 +4388,35 @@ export class BigBrainMigrations {
4388
4388
  -- no-op: we only ADD COLUMN IF NOT EXISTS, nothing to reverse
4389
4389
  `,
4390
4390
  checksum: this.generateChecksum('reconcile_code_definitions_columns_v37')
4391
+ },
4392
+ // migration 38: context vault — token-saving stash for thicc tool outputs
4393
+ // chunks large content, indexes with tsvector for BM25 search,
4394
+ // auto-expires after 24h. inspired by claude-context-mode but on postgres.
4395
+ {
4396
+ version: 38,
4397
+ name: 'context_vault_table',
4398
+ up: `
4399
+ CREATE TABLE IF NOT EXISTS context_vault (
4400
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
4401
+ vault_id VARCHAR(16) NOT NULL,
4402
+ chunk_idx INTEGER NOT NULL DEFAULT 0,
4403
+ content TEXT NOT NULL,
4404
+ content_tsv TSVECTOR GENERATED ALWAYS AS (to_tsvector('english', content)) STORED,
4405
+ source_tool VARCHAR(128),
4406
+ source_size INTEGER DEFAULT 0,
4407
+ metadata JSONB DEFAULT '{}',
4408
+ project_path VARCHAR(500) DEFAULT '/',
4409
+ stashed_at TIMESTAMPTZ DEFAULT NOW(),
4410
+ expires_at TIMESTAMPTZ DEFAULT (NOW() + INTERVAL '24 hours'),
4411
+ UNIQUE(vault_id, chunk_idx)
4412
+ );
4413
+ CREATE INDEX IF NOT EXISTS idx_ctx_vault_tsv ON context_vault USING GIN(content_tsv);
4414
+ CREATE INDEX IF NOT EXISTS idx_ctx_vault_id ON context_vault(vault_id);
4415
+ CREATE INDEX IF NOT EXISTS idx_ctx_vault_expires ON context_vault(expires_at);
4416
+ CREATE INDEX IF NOT EXISTS idx_ctx_vault_project ON context_vault(project_path);
4417
+ `,
4418
+ down: `DROP TABLE IF EXISTS context_vault;`,
4419
+ checksum: this.generateChecksum('context_vault_table_v38')
4391
4420
  }
4392
4421
  ];
4393
4422
  }
@@ -486,7 +486,7 @@ export class HookManager {
486
486
  };
487
487
  }
488
488
  /**
489
- * Create the team framing hook for Task tool interception
489
+ * Create the team framing hook for Agent tool interception
490
490
  * This hook injects "dev team" framing into spawned team members
491
491
  */
492
492
  createTeamFramingHook() {
@@ -495,7 +495,7 @@ export class HookManager {
495
495
  /**
496
496
  * Team Framing Hook for SpecMem
497
497
  *
498
- * PreToolUse hook that intercepts Task tool calls and injects
498
+ * PreToolUse hook that intercepts Agent tool calls and injects
499
499
  * a "dev team" framing prompt to spawned team members.
500
500
  *
501
501
  * The framing creates a psychological context where:
@@ -518,7 +518,7 @@ const DEFAULT_CONFIG = {
518
518
  enabled: true,
519
519
  channelName: 'dev-coordination',
520
520
  customPreprompt: '',
521
- triggerTools: ['Task', 'task', 'spawn_agent', 'deploy_agent'],
521
+ triggerTools: ['Agent', 'agent', 'spawn_agent', 'deploy_agent'],
522
522
  includeMcpToolInstructions: true,
523
523
  seniorEngineerName: 'the senior engineer',
524
524
  teamName: 'the development team'
@@ -635,7 +635,7 @@ main().catch(() => process.exit(0));
635
635
  type: 'PreToolUse',
636
636
  enabled: true,
637
637
  file: hookPath,
638
- description: 'Injects dev team framing into Task tool calls for team member spawning'
638
+ description: 'Injects dev team framing into Agent tool calls for team member spawning'
639
639
  });
640
640
  logger.info({ path: hookPath }, 'Created team framing hook');
641
641
  return hookPath;
@@ -167,7 +167,7 @@ ${'-'.repeat(60)}
167
167
  async function runTest() {
168
168
  console.log('Running test with sample Task tool input...\n');
169
169
  const testInput = {
170
- tool_name: 'Task',
170
+ tool_name: 'Agent',
171
171
  tool_input: {
172
172
  prompt: 'Implement the user authentication feature',
173
173
  description: 'Create login and registration flows'
@@ -2,7 +2,7 @@
2
2
  * SPECMEM TEAM_MEMBER PREPROMPT HOOK
3
3
  * ===================================
4
4
  *
5
- * Pre-tool hook that intercepts Task tool calls and injects a "dev team"
5
+ * Pre-tool hook that intercepts Agent tool calls and injects a "dev team"
6
6
  * framing prompt to all spawned team members (teamMembers).
7
7
  *
8
8
  * The framing creates a psychological context where:
@@ -11,10 +11,10 @@
11
11
  * - Team communication tools encourage coordination
12
12
  * - The artificial nature of the team is not revealed
13
13
  *
14
- * Hook Type: PreToolUse (intercepts Task tool calls before execution)
14
+ * Hook Type: PreToolUse (intercepts Agent tool calls before execution)
15
15
  *
16
16
  * Flow:
17
- * 1. calls Task tool to spawn a team member
17
+ * 1. calls Agent tool to spawn a team member
18
18
  * 2. This hook intercepts the call
19
19
  * 3. The prompt is modified to include team framing
20
20
  * 4. TeamMember spawns with team context + original task
@@ -36,7 +36,7 @@ export const DEFAULT_TEAM_CONFIG = {
36
36
  channelName: 'dev-coordination',
37
37
  customPreprompt: '',
38
38
  additionalContext: '',
39
- triggerTools: ['Task', 'task', 'spawn_agent', 'deploy_agent'],
39
+ triggerTools: ['Agent', 'agent', 'spawn_agent', 'deploy_agent'],
40
40
  includeMcpToolInstructions: true,
41
41
  seniorEngineerName: 'the senior engineer',
42
42
  teamName: 'the development team'
@@ -152,7 +152,7 @@ function extractTaskDescription(toolInput) {
152
152
  return null;
153
153
  }
154
154
  /**
155
- * Main hook function - intercepts Task tool calls and injects team framing
155
+ * Main hook function - intercepts Agent tool calls and injects team framing
156
156
  *
157
157
  * @param toolName - Name of the tool being called
158
158
  * @param toolInput - Tool arguments/input
package/dist/index.js CHANGED
@@ -382,8 +382,9 @@ class LocalEmbeddingProvider {
382
382
  if (this._socketCleanupInterval) {
383
383
  clearInterval(this._socketCleanupInterval);
384
384
  }
385
- const cleanupIntervalMs = parseInt(process.env['SPECMEM_SOCKET_CLEANUP_INTERVAL_MS'] || '300000', 10);
386
- const maxAgeMs = parseInt(process.env['SPECMEM_SOCKET_MAX_AGE_MS'] || '60000', 10);
385
+ // FIX: Reduced from 5min/60s to 30s/10s — 75+ leaked sockets cause EAGAIN on accept
386
+ const cleanupIntervalMs = parseInt(process.env['SPECMEM_SOCKET_CLEANUP_INTERVAL_MS'] || '30000', 10);
387
+ const maxAgeMs = parseInt(process.env['SPECMEM_SOCKET_MAX_AGE_MS'] || '10000', 10);
387
388
  this._socketCleanupInterval = setInterval(() => {
388
389
  const now = Date.now();
389
390
  let cleaned = 0;
@@ -604,7 +605,7 @@ class LocalEmbeddingProvider {
604
605
  }, timeoutMs);
605
606
  }
606
607
  // Handle heartbeat/processing status - just reset timeout and continue
607
- if (response.status === 'processing') {
608
+ if (response.status === 'working') {
608
609
  __debugLog('[EMBEDDING DEBUG]', Date.now(), 'INIT_PERSISTENT_SOCKET_HEARTBEAT', {
609
610
  requestId,
610
611
  textLength: response.text_length
@@ -2041,7 +2042,9 @@ class LocalEmbeddingProvider {
2041
2042
  this._trackSocket(socket, `batch-${texts.length}`);
2042
2043
  let buffer = '';
2043
2044
  let resolved = false;
2045
+ let workingReceived = false;
2044
2046
  const startTime = Date.now();
2047
+ const WORKING_TIMEOUT_MS = 25000; // 25s to receive "working" status
2045
2048
  // FIX Issue #1: Ensure socket is destroyed on all exit paths
2046
2049
  const ensureSocketCleanup = () => {
2047
2050
  try {
@@ -2068,6 +2071,14 @@ class LocalEmbeddingProvider {
2068
2071
  reject(new Error(`Batch embedding timeout after ${Math.round(timeoutMs / 1000)}s for ${texts.length} texts`));
2069
2072
  }
2070
2073
  }, timeoutMs);
2074
+ // 25s timeout to receive "working" status - if not received, server may be stuck
2075
+ let workingTimeout = setTimeout(() => {
2076
+ if (!resolved && !workingReceived) {
2077
+ resolved = true;
2078
+ ensureSocketCleanup();
2079
+ reject(new Error(`Embedding server not responding (no 'working' status in 25s). Server may be overloaded or stuck.`));
2080
+ }
2081
+ }, WORKING_TIMEOUT_MS);
2071
2082
  socket.on('connect', () => {
2072
2083
  __debugLog('[EMBEDDING DEBUG]', Date.now(), 'BATCH_SOCKET_CONNECTED', {
2073
2084
  batchSize: texts.length,
@@ -2100,7 +2111,9 @@ class LocalEmbeddingProvider {
2100
2111
  try {
2101
2112
  const response = JSON.parse(responseJson);
2102
2113
  // Skip heartbeat/processing status - keep waiting
2103
- if (response.status === 'processing') {
2114
+ if (response.status === 'working') {
2115
+ workingReceived = true;
2116
+ clearTimeout(workingTimeout); // Server confirmed working
2104
2117
  __debugLog('[EMBEDDING DEBUG]', Date.now(), 'BATCH_SOCKET_HEARTBEAT', {
2105
2118
  batchSize: texts.length,
2106
2119
  count: response.count,
@@ -2110,6 +2123,7 @@ class LocalEmbeddingProvider {
2110
2123
  }
2111
2124
  // Got actual response - resolve or reject
2112
2125
  clearTimeout(timeout);
2126
+ clearTimeout(workingTimeout);
2113
2127
  resolved = true;
2114
2128
  ensureSocketCleanup();
2115
2129
  const responseTime = Date.now() - startTime;
@@ -2132,6 +2146,7 @@ class LocalEmbeddingProvider {
2132
2146
  }
2133
2147
  catch (err) {
2134
2148
  clearTimeout(timeout);
2149
+ clearTimeout(workingTimeout);
2135
2150
  resolved = true;
2136
2151
  ensureSocketCleanup();
2137
2152
  reject(new Error(`Failed to parse batch embedding response: ${err}`));
@@ -2372,7 +2387,7 @@ class LocalEmbeddingProvider {
2372
2387
  try {
2373
2388
  const response = JSON.parse(line);
2374
2389
  // HEARTBEAT: "processing" status means server is working - reset timeout and keep waiting
2375
- if (response.status === 'processing') {
2390
+ if (response.status === 'working') {
2376
2391
  clearTimeout(timeout);
2377
2392
  timeout = setTimeout(() => {
2378
2393
  if (!resolved) {
@@ -2662,13 +2677,32 @@ class LocalEmbeddingProvider {
2662
2677
  * Takes socket path as parameter to ensure we ALWAYS use the fresh path.
2663
2678
  */
2664
2679
  async generateWithDirectSocket(text, socketPath) {
2680
+ // FIX: Limit concurrent socket connections to prevent EAGAIN from socket exhaustion
2681
+ // Without this, startup indexing fires 100+ concurrent requests, each opening a socket
2682
+ const MAX_CONCURRENT_SOCKETS = 6;
2683
+ if (!this._socketSemaphore) {
2684
+ this._socketSemaphore = { count: 0, waiters: [] };
2685
+ }
2686
+ const sem = this._socketSemaphore;
2687
+ if (sem.count >= MAX_CONCURRENT_SOCKETS) {
2688
+ await new Promise(resolve => sem.waiters.push(resolve));
2689
+ }
2690
+ sem.count++;
2691
+ const releaseSemaphore = () => {
2692
+ sem.count--;
2693
+ if (sem.waiters.length > 0) {
2694
+ sem.waiters.shift()();
2695
+ }
2696
+ };
2697
+ try {
2665
2698
  let lastError = null;
2666
2699
  for (let attempt = 1; attempt <= LocalEmbeddingProvider.SOCKET_MAX_RETRIES; attempt++) {
2667
2700
  try {
2668
2701
  __debugLog('[EMBEDDING DEBUG]', Date.now(), 'DIRECT_SOCKET_ATTEMPT', {
2669
2702
  attempt,
2670
2703
  socketPath,
2671
- maxRetries: LocalEmbeddingProvider.SOCKET_MAX_RETRIES
2704
+ maxRetries: LocalEmbeddingProvider.SOCKET_MAX_RETRIES,
2705
+ concurrentSockets: sem.count
2672
2706
  });
2673
2707
  return await this.generateWithDirectSocketAttempt(text, socketPath, attempt);
2674
2708
  }
@@ -2720,6 +2754,9 @@ class LocalEmbeddingProvider {
2720
2754
  `Socket: ${socketPath}. ` +
2721
2755
  `Last error: ${lastError?.message || 'unknown'}. ` +
2722
2756
  `Check if Frankenstein embedding service is running.`);
2757
+ } finally {
2758
+ releaseSemaphore();
2759
+ }
2723
2760
  }
2724
2761
  /**
2725
2762
  * Single attempt to generate embedding via DIRECT socket connection
@@ -2806,7 +2843,7 @@ class LocalEmbeddingProvider {
2806
2843
  try {
2807
2844
  const response = JSON.parse(responseJson);
2808
2845
  // Handle heartbeat/processing status - just keep waiting
2809
- if (response.status === 'processing') {
2846
+ if (response.status === 'working') {
2810
2847
  __debugLog('[EMBEDDING DEBUG]', Date.now(), 'DIRECT_SOCKET_HEARTBEAT', {
2811
2848
  socketPath,
2812
2849
  attempt,
@@ -3198,7 +3235,7 @@ class LocalEmbeddingProvider {
3198
3235
  try {
3199
3236
  const response = JSON.parse(responseJson);
3200
3237
  // Handle heartbeat/processing status - just keep waiting
3201
- if (response.status === 'processing') {
3238
+ if (response.status === 'working') {
3202
3239
  continue;
3203
3240
  }
3204
3241
  // Got actual response - resolve or reject
@@ -4134,15 +4171,15 @@ async function main() {
4134
4171
  buffer = buffer.slice(idx + 1);
4135
4172
  try {
4136
4173
  const resp = JSON.parse(line);
4137
- if (resp.error) { clearTimeout(timeout); resolved = true; socket.end(); reject(new Error(resp.error)); return; }
4138
- if (resp.status === 'processing') continue;
4174
+ if (resp.error) { clearTimeout(timeout); resolved = true; socket.destroy(); reject(new Error(resp.error)); return; }
4175
+ if (resp.status === 'working' || resp.status === 'processing') continue;
4139
4176
  if (resp.embedding && Array.isArray(resp.embedding)) {
4140
- clearTimeout(timeout); resolved = true; socket.end(); resolve(resp.embedding); return;
4177
+ clearTimeout(timeout); resolved = true; socket.destroy(); resolve(resp.embedding); return;
4141
4178
  }
4142
4179
  } catch (e) { /* ignore parse errors */ }
4143
4180
  }
4144
4181
  });
4145
- socket.on('error', (e) => { clearTimeout(timeout); if (!resolved) { resolved = true; reject(e); } });
4182
+ socket.on('error', (e) => { clearTimeout(timeout); if (!resolved) { resolved = true; socket.destroy(); reject(e); } });
4146
4183
  });
4147
4184
  },
4148
4185
  generateEmbeddingsBatch: async (texts) => {
@@ -261,9 +261,9 @@ function configureMcpServer() {
261
261
  SPECMEM_PROJECT_PATH: '${PWD}',
262
262
  SPECMEM_WATCHER_ROOT_PATH: '${PWD}',
263
263
  SPECMEM_CODEBASE_PATH: '${PWD}',
264
- // Database (use environment values or defaults)
265
- SPECMEM_DB_HOST: process.env.SPECMEM_DB_HOST || 'localhost',
266
- SPECMEM_DB_PORT: process.env.SPECMEM_DB_PORT || '5432',
264
+ // Database - use ${PWD} for project isolation
265
+ SPECMEM_DB_HOST: '${PWD}/specmem/run',
266
+ SPECMEM_DB_PORT: '5432',
267
267
  // Watchers enabled by default
268
268
  SPECMEM_SESSION_WATCHER_ENABLED: 'true',
269
269
  SPECMEM_WATCHER_ENABLED: 'true',
@@ -367,8 +367,8 @@ function fixProjectMcpConfigs() {
367
367
  SPECMEM_PROJECT_PATH: '${PWD}',
368
368
  SPECMEM_WATCHER_ROOT_PATH: '${PWD}',
369
369
  SPECMEM_CODEBASE_PATH: '${PWD}',
370
- SPECMEM_DB_HOST: process.env.SPECMEM_DB_HOST || 'localhost',
371
- SPECMEM_DB_PORT: process.env.SPECMEM_DB_PORT || '5432',
370
+ SPECMEM_DB_HOST: '${PWD}/specmem/run',
371
+ SPECMEM_DB_PORT: '5432',
372
372
  SPECMEM_DB_PASSWORD: 'SPECMEM_DB_PASSWORD' in process.env ? process.env.SPECMEM_DB_PASSWORD : undefined,
373
373
  SPECMEM_SESSION_WATCHER_ENABLED: 'true',
374
374
  SPECMEM_WATCHER_ENABLED: 'true',
@@ -727,7 +727,7 @@ function getRequiredHooks() {
727
727
  PreToolUse: [
728
728
  // CRITICAL: Agent loading hook for Task tool - injects SpecMem context & auto-backgrounds
729
729
  ...(fs.existsSync(HOOKS.agentLoading) ? [{
730
- matcher: 'Task',
730
+ matcher: 'Agent',
731
731
  hooks: [{
732
732
  type: 'command',
733
733
  command: `node ${HOOKS.agentLoading}`,
@@ -802,7 +802,7 @@ function getRequiredHooks() {
802
802
  // =========================================================================
803
803
  PostToolUse: [
804
804
  ...(fs.existsSync(HOOKS.taskProgress) ? [{
805
- matcher: 'Task',
805
+ matcher: 'Agent',
806
806
  hooks: [{
807
807
  type: 'command',
808
808
  command: `node ${HOOKS.taskProgress}`,
@@ -904,6 +904,9 @@ function hasHook(hooks, commandSubstring, matcher) {
904
904
  }
905
905
  function configureSettings() {
906
906
  const settings = safeReadJson(SETTINGS_PATH, {});
907
+ // Preserve user's custom top-level env (ANTHROPIC_BASE_URL, ANTHROPIC_AUTH_TOKEN, model overrides, etc.)
908
+ // We explicitly capture and restore this to guard against any future code accidentally touching it.
909
+ const _userCustomEnv = settings.env;
907
910
  const permissionsAdded = [];
908
911
  const hooksAdded = [];
909
912
  let needsUpdate = false;
@@ -959,8 +962,24 @@ function configureSettings() {
959
962
  if (hooksAdded.length > 0) {
960
963
  logger.info({ hooksAdded }, '[ConfigInjector] Adding hooks to settings.json');
961
964
  }
965
+ // Restore user's custom env - NEVER clobber ANTHROPIC_BASE_URL, model overrides, etc.
966
+ if (_userCustomEnv !== undefined) {
967
+ settings.env = _userCustomEnv;
968
+ }
969
+ // Inject Claude Code env flags (append-if-missing, never clobber existing values)
970
+ const REQUIRED_CLAUDE_ENV = {
971
+ CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS: '1',
972
+ };
973
+ if (!settings.env) settings.env = {};
974
+ for (const [key, value] of Object.entries(REQUIRED_CLAUDE_ENV)) {
975
+ if (!(key in settings.env)) {
976
+ settings.env[key] = value;
977
+ needsUpdate = true;
978
+ logger.info({ key, value }, '[ConfigInjector] Injected Claude env flag');
979
+ }
980
+ }
962
981
  if (safeWriteJson(SETTINGS_PATH, settings)) {
963
- logger.info({ path: SETTINGS_PATH, permissionsAdded, hooksAdded }, '[ConfigInjector] Settings updated');
982
+ logger.info({ path: SETTINGS_PATH, permissionsAdded, hooksAdded }, '[ConfigInjector] Settings updated (custom env preserved)');
964
983
  return { updated: true, permissionsAdded, hooksAdded };
965
984
  }
966
985
  return { updated: false, permissionsAdded: [], hooksAdded: [], error: 'Failed to write settings.json' };