npm - specmem-hardwicksoftware - Versions diffs - 3.7.35 → 3.7.36 - Mend

specmem-hardwicksoftware 3.7.35 → 3.7.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

package/CHANGELOG.md +34 -0
package/README.md +11 -15
package/bin/specmem-console.cjs +839 -51
package/claude-hooks/agent-chooser-hook.js +6 -6
package/claude-hooks/agent-loading-hook.cjs +16 -16
package/claude-hooks/agent-loading-hook.js +18 -18
package/claude-hooks/agent-type-matcher.js +1 -1
package/claude-hooks/background-completion-silencer.js +1 -1
package/claude-hooks/file-claim-enforcer.cjs +37 -36
package/claude-hooks/output-cleaner.cjs +1 -1
package/claude-hooks/settings.json +27 -3
package/claude-hooks/specmem-search-enforcer.cjs +2 -11
package/claude-hooks/specmem-team-member-inject.js +1 -1
package/claude-hooks/specmem-unified-hook.py +1 -1
package/claude-hooks/subagent-loading-hook.cjs +1 -1
package/claude-hooks/task-progress-hook.cjs +7 -7
package/claude-hooks/task-progress-hook.js +3 -3
package/claude-hooks/team-comms-enforcer.cjs +49 -47
package/dist/claude-sessions/sessionParser.js +5 -0
package/dist/codebase/codebaseIndexer.js +48 -17
package/dist/codebase/exclusions.js +3 -4
package/dist/codebase/index.js +4 -0
package/dist/codebase/pdfExtractor.js +298 -0
package/dist/dashboard/api/taskTeamMembers.js +2 -2
package/dist/db/bigBrainMigrations.js +29 -0
package/dist/hooks/hookManager.js +4 -4
package/dist/hooks/teamFramingCli.js +1 -1
package/dist/hooks/teamMemberPrepromptHook.js +5 -5
package/dist/init/claudeConfigInjector.js +2 -2
package/dist/mcp/compactionProxy.js +834 -186
package/dist/mcp/compactionProxyDaemon.js +112 -37
package/dist/mcp/contextVault.js +439 -0
package/dist/mcp/embeddingServerManager.js +61 -1
package/dist/mcp/mcpProtocolHandler.js +6 -1
package/dist/mcp/miniCOTServerManager.js +82 -8
package/dist/mcp/specMemServer.js +45 -10
package/dist/mcp/toolRegistry.js +6 -0
package/dist/startup/startupIndexing.js +14 -0
package/dist/team-members/taskOrchestrator.js +3 -3
package/dist/team-members/taskTeamMemberLogger.js +2 -2
package/dist/tools/goofy/deployTeamMember.js +3 -3
package/dist/tools/goofy/digInTheVault.js +81 -0
package/dist/tools/goofy/stashTheGoods.js +56 -0
package/dist/tools/teamMemberDeployer.js +2 -2
package/dist/watcher/changeHandler.js +65 -8
package/dist/watcher/changeQueue.js +20 -1
package/embedding-sandbox/mini-cot-service.py +11 -13
package/embedding-sandbox/pdf-text-extract.py +208 -0
package/package.json +1 -1
package/scripts/deploy-hooks.cjs +2 -2
package/scripts/global-postinstall.cjs +2 -2
package/scripts/specmem-init.cjs +130 -36
package/specmem/model-config.json +6 -6
package/specmem/supervisord.conf +1 -1
package/svg-sections/readme-token-compaction.svg +246 -0

package/claude-hooks/team-comms-enforcer.cjs CHANGED Viewed

@@ -77,7 +77,7 @@ try {
 // CONFIGURATION
 // ============================================================================
 const MAX_SEARCHES_BEFORE_BLOCK = 2;  // Every other search must use find_code_pointers/find_memory
-const TEAM_COMMS_CHECK_INTERVAL = 4;  // MUST read_team_messages every 4 tool usages
+const TEAM_COMMS_CHECK_INTERVAL = 3;  // MUST send_team_message every 3 tool usages
 const BROADCAST_CHECK_INTERVAL = 5;   // MUST read_team_messages w/ include_broadcasts every 5 tool usages
 const HELP_CHECK_INTERVAL = 8;        // Check help requests every 8 tool usages
@@ -124,8 +124,8 @@ const WRITE_TOOLS = ['Edit', 'Write', 'NotebookEdit'];
 // FULL COMPLIANCE TOOLS - agents use these to bypass everything
 // Requires: announced + claimed + usedMemoryTools
 // - Bash: can run grep/cat/sed/echo to bypass all limits
-// - Task: can spawn sub-agents to bypass limits
-const FULL_COMPLIANCE_TOOLS = ['Bash', 'Task'];
+// - Agent: can spawn sub-agents to bypass limits
+const FULL_COMPLIANCE_TOOLS = ['Bash', 'Agent'];
 // Tools that are always allowed (reading team state + cross-swarm help + research)
 const ALWAYS_ALLOWED = [
@@ -149,7 +149,6 @@ const ALWAYS_ALLOWED = [
   'WebFetch',
   'WebSearch',
   'ToolSearch',
-  'Read',
 ];
 // ============================================================================
@@ -222,33 +221,11 @@ function isRunningAsAgent() {
   // Deployed team members — always enforce
   if (isTeamMemberFn()) return true;
-  // Method 2: General-purpose subagents (CLAUDE_SUBAGENT=1)
-  // These DO have MCP tools and SHOULD be enforced.
-  // Exclude Explore/Plan agents — they don't have MCP tools and can't comply.
-  // We check agents.json to see if the active subagent has MCP tools.
+  // Method 2: CLAUDE_SUBAGENT=1 — env var is proof enough, no agents.json check needed
   if (process.env.CLAUDE_SUBAGENT === '1' || process.env.CLAUDE_AGENT_ID) {
-    try {
-      const agentsFile = `${PROJECT_TMP_DIR}/agents.json`;
-      if (fs.existsSync(agentsFile)) {
-        const data = JSON.parse(fs.readFileSync(agentsFile, 'utf8'));
-        const now = Date.now();
-        for (const agent of Object.values(data.agents || {})) {
-          // Active agent (started within 10 min, no endTime)
-          if (!agent.endTime && agent.startTime && (now - agent.startTime < 600000)) {
-            // Check if this agent has MCP tools (general-purpose agents do)
-            const tools = agent.tools || [];
-            const hasMcpTools = tools.some(t => t.startsWith('mcp__specmem__'));
-            if (hasMcpTools) return true;
-          }
-        }
-      }
-    } catch {}
-    // No agents.json or no MCP tools found — this is likely Explore/Plan, skip enforcement
-    return false;
+    return true;
   }
-  // Method 3: Check subagent tracking as fallback (parent context seeing active agents)
-  // This does NOT enforce on the parent — only on processes with CLAUDE_SUBAGENT=1
   return false;
 }
@@ -343,6 +320,10 @@ process.stdin.on('end', () => {
     // ========================================================================
     if (ANNOUNCE_TOOLS.includes(toolName)) {
       state.announced = true;
+      // Reset comms counter on SEND (agents must send updates, not just read)
+      state.commsToolCount = 0;
+      state.lastCommsCheck = Date.now();
+      state.needsCommsCheck = false;
     }
     if (CLAIM_TOOLS.includes(toolName)) {
       state.claimed = true;
@@ -390,12 +371,10 @@ process.stdin.on('end', () => {
       state.searchCount = 0;  // Reset search counter — allows next 2 searches
       // usedMemoryTools resets to false after 2 more searches (see BASIC_SEARCH_TOOLS block)
     }
-    // Track team comms reads - resets comms counter
+    // Track team comms reads - resets BROADCAST counter only
+    // Comms counter now resets on SEND via ANNOUNCE_TOOLS, not on READ
     if (BROADCAST_CHECK_TOOLS.includes(toolName)) {
-      state.commsToolCount = 0;
-      state.lastCommsCheck = Date.now();
-      state.needsCommsCheck = false;
-      // Also reset broadcast counter IF they included broadcasts
+      // Broadcast counter reset IF they included broadcasts
       const params = data.tool_input || {};
       if (params.include_broadcasts !== false) {
         state.broadcastToolCount = 0;
@@ -431,16 +410,16 @@ process.stdin.on('end', () => {
     state.helpToolUsageCount = (state.helpToolUsageCount || 0) + 1;
     // ========================================================================
-    // HARD BLOCK: Must read team messages every 4 tool usages
-    // read_team_messages() satisfies this - any mode
+    // HARD BLOCK: Must send team message every 3 tool usages
+    // send_team_message() or broadcast_to_team() satisfies this
     // ========================================================================
-    if (state.commsToolCount >= TEAM_COMMS_CHECK_INTERVAL && !BROADCAST_CHECK_TOOLS.includes(toolName)) {
+    if (state.commsToolCount >= TEAM_COMMS_CHECK_INTERVAL && !ANNOUNCE_TOOLS.includes(toolName)) {
       state.needsCommsCheck = true;
       state.blockedCount++;
       saveTracking(tracking);
       console.log(blockResponse(
-        'mcp__specmem__read_team_messages',
-        `Quick check-in — other team members may have updates that affect your work. Call: read_team_messages({include_swarms: true, limit: 5})`
+        'mcp__specmem__send_team_message',
+        `Time to update the team on your progress. Call: send_team_message({type:"status", message:"[what you're doing / what you found]"})`
       ));
       return;
     }
@@ -488,7 +467,7 @@ process.stdin.on('end', () => {
     // ========================================================================
     if (state.commsToolCount === TEAM_COMMS_CHECK_INTERVAL - 1) {
       console.log(allowWithReminder(
-        `Heads up — good time to check in with the team: read_team_messages({include_swarms: true, limit: 5})`
+        `Heads up — good time to update the team: send_team_message({type:"status", message:"[progress update]"})`
       ));
       // Don't return - continue to other checks
     }
@@ -609,18 +588,41 @@ process.stdin.on('end', () => {
     }
     // ========================================================================
-    // CLAIM RELEASE ENFORCEMENT — After ANY edit, BLOCK until release
-    // Flow: claim_task → Edit/Write → release_task → claim_task → Edit/Write → release_task
+    // CLAIM RELEASE + NOTIFICATION ENFORCEMENT — After edit, BLOCK until release AND notify
+    // Flow: claim_task → Edit/Write → release_task + send_team_message → next task
     // ========================================================================
     if (state.editedFiles && state.editedFiles.length > 0 && state.claimed && !WRITE_TOOLS.includes(toolName)) {
-      // Allow: release_task, always-allowed tools, and write tools (handled in WRITE_TOOLS block)
-      if (!ALWAYS_ALLOWED.includes(toolName) && toolName !== 'mcp__specmem__release_task') {
+      const isReleaseTool = toolName === 'mcp__specmem__release_task';
+      const isNotifyTool = ANNOUNCE_TOOLS.includes(toolName);
+      // Track completion of release/notify obligations
+      if (isReleaseTool) state.releasedClaim = true;
+      if (isNotifyTool) state.releaseNotified = true;
+      // Both obligations met — clear state and continue
+      if (state.releasedClaim && state.releaseNotified) {
+        state.editedFiles = [];
+        state.releasedClaim = false;
+        state.releaseNotified = false;
+        state.claimed = false;
+        state.currentClaimId = null;
+      }
+      // Allow release/notify tools and always-allowed tools through
+      else if (!isReleaseTool && !isNotifyTool && !ALWAYS_ALLOWED.includes(toolName)) {
         state.blockedCount++;
         saveTracking(tracking);
-        console.log(blockResponse(
-          'mcp__specmem__release_task',
-          `You're done editing ${state.editedFiles[state.editedFiles.length - 1]} — release the claim so other team members can work on it. Call: release_task({claimId:"${state.currentClaimId || 'your-claim-id'}"})`
-        ));
+        if (!state.releasedClaim) {
+          console.log(blockResponse(
+            'mcp__specmem__release_task',
+            `Done editing ${state.editedFiles[state.editedFiles.length - 1]} — release the claim so others can work on it. Call: release_task({claimId:"${state.currentClaimId || 'your-claim-id'}"})`
+          ));
+        } else {
+          console.log(blockResponse(
+            'mcp__specmem__send_team_message',
+            `Claim released — now notify the team about your changes. Call: send_team_message({type:"update", message:"Finished editing ${state.editedFiles[state.editedFiles.length - 1]}: [describe what you changed]"})`
+          ));
+        }
         return;
       }
     }

package/dist/claude-sessions/sessionParser.js CHANGED Viewed

@@ -996,6 +996,11 @@ export function isToolOrThinkingContent(content) {
         return true;
     if (trimmed.startsWith('[Tool:'))
         return true;
+    // Skip task/agent notification XML blocks — system noise, not conversation
+    if (trimmed.startsWith('<task-notification>'))
+        return true;
+    if (trimmed.includes('<task-id>') && trimmed.includes('</task-id>'))
+        return true;
     // Check for [CLAUDE] prefixed tool versions
     if (trimmed.startsWith('[CLAUDE] [Tools:'))
         return true;

package/dist/codebase/codebaseIndexer.js CHANGED Viewed

@@ -28,6 +28,7 @@ import * as os from 'os';
 import { v4 as uuidv4 } from 'uuid';
 import chokidar from 'chokidar';
 import { logger } from '../utils/logger.js';
+import { extractPdfText, extractPdfBatch, isPdfFile } from './pdfExtractor.js';
 import { getProjectPath } from '../config.js';
 import { getCoordinator } from '../coordination/integration.js';
 /**
@@ -36,15 +37,15 @@ import { getCoordinator } from '../coordination/integration.js';
  */
 function loadResourceLimits() {
     const limits = {
-        cpuMax: 40,          // max CPU % target (back-pressure threshold)
+        cpuMax: 35,          // max CPU % target (back-pressure threshold)
         cpuMin: 10,          // min CPU % (crawl mode)
-        ramMaxMb: 6000,      // max RAM MB
+        ramMaxMb: 4000,      // max RAM MB (safe for 8GB laptops)
         ramMinMb: 2000,      // min RAM MB
         batchSize: 25,       // files per batch (was 200!)
-        maxConcurrency: 8,   // max parallel file reads within a batch
+        maxConcurrency: 4,   // max parallel file reads (safe for dual-core i3s)
         batchDelayMs: 50,    // delay between batches (ms)
         batchDelayMaxMs: 2000, // max delay under heavy load
-        cpuCoreMax: 0,       // 0 = auto (use all cores)
+        cpuCoreMax: 2,       // max CPU cores (safe for dual-core i3s)
     };
     // 1. Read from model-config.json
     try {
@@ -177,7 +178,8 @@ const DEFAULT_CONFIG = {
         '.c', '.cpp', '.h', '.hpp',
         '.swift',
         '.dockerfile', 'Dockerfile',
-        '.env.example', '.env.template'
+        '.env.example', '.env.template',
+        '.pdf'
     ],
     maxFileSizeBytes: 1024 * 1024, // 1MB
     generateEmbeddings: true,
@@ -444,16 +446,25 @@ export class CodebaseIndexer {
                     const stats = await fs.stat(filePath);
                     if (stats.size > this.config.maxFileSizeBytes)
                         return;
-                    if (await this.isBinaryFile(filePath))
-                        return;
-                    const content = await fs.readFile(filePath, 'utf-8');
+                    // PDF files: extract text via PyMuPDF instead of reading as UTF-8
+                    let content;
+                    if (isPdfFile(filePath)) {
+                        const pdfResult = await extractPdfText(filePath);
+                        if (!pdfResult || !pdfResult.text) return;
+                        content = pdfResult.text;
+                        logger.debug({ filePath: relativePath, pages: pdfResult.pages, chars: pdfResult.chars }, 'PDF text extracted');
+                    } else {
+                        if (await this.isBinaryFile(filePath))
+                            return;
+                        content = await fs.readFile(filePath, 'utf-8');
+                    }
                     const contentHash = this.hashContent(content);
                     const existingHash = existingHashes.get(relativePath);
                     if (existingHash === contentHash) {
                         skipped++;
                         return;
                     }
-                    const indexedFile = await this.indexFile(filePath);
+                    const indexedFile = await this.indexFile(filePath, isPdfFile(filePath) ? content : undefined);
                     if (indexedFile) {
                         this.index.set(indexedFile.filePath, indexedFile);
                         changedFiles.push(indexedFile);
@@ -616,9 +627,17 @@ export class CodebaseIndexer {
                     if (existing && existing.mtime && stats.mtime.getTime() <= existing.mtime) {
                         return { skipped: true, relativePath, mtimeSkip: true };
                     }
-                    if (await this.isBinaryFile(filePath))
-                        return null;
-                    const content = await fs.readFile(filePath, 'utf-8');
+                    // PDF files: extract text via PyMuPDF instead of reading as UTF-8
+                    let content;
+                    if (isPdfFile(filePath)) {
+                        const pdfResult = await extractPdfText(filePath);
+                        if (!pdfResult || !pdfResult.text) return null;
+                        content = pdfResult.text;
+                    } else {
+                        if (await this.isBinaryFile(filePath))
+                            return null;
+                        content = await fs.readFile(filePath, 'utf-8');
+                    }
                     const contentHash = this.hashContent(content);
                     if (existing && existing.hash === contentHash) {
                         return { skipped: true, relativePath, hashSkip: true };
@@ -1178,7 +1197,7 @@ export class CodebaseIndexer {
     /**
      * indexFile - reads and indexes a single file with enhanced analysis
      */
-    async indexFile(absolutePath) {
+    async indexFile(absolutePath, preExtractedContent) {
         try {
             const stats = await fs.stat(absolutePath);
             // skip if too large
@@ -1186,11 +1205,23 @@ export class CodebaseIndexer {
                 logger.debug({ path: absolutePath, size: stats.size }, 'skipping large file');
                 return null;
             }
-            // skip if binary
-            if (await this.isBinaryFile(absolutePath)) {
-                return null;
+            // PDF files: use pre-extracted content or extract on demand
+            let content;
+            if (isPdfFile(absolutePath)) {
+                if (preExtractedContent) {
+                    content = preExtractedContent;
+                } else {
+                    const pdfResult = await extractPdfText(absolutePath);
+                    if (!pdfResult || !pdfResult.text) return null;
+                    content = pdfResult.text;
+                }
+            } else {
+                // skip if binary
+                if (await this.isBinaryFile(absolutePath)) {
+                    return null;
+                }
+                content = await fs.readFile(absolutePath, 'utf-8');
             }
-            const content = await fs.readFile(absolutePath, 'utf-8');
             const relativePath = path.relative(this.config.codebasePath, absolutePath);
             const fileName = path.basename(absolutePath);
             const extension = path.extname(absolutePath).toLowerCase();

package/dist/codebase/exclusions.js CHANGED Viewed

@@ -47,7 +47,7 @@ export const EXCLUSION_CONFIG = {
         '*.db',
         // Binary assets
         '*.png', '*.jpg', '*.jpeg', '*.gif', '*.ico', '*.webp',
-        '*.pdf', '*.zip', '*.tar', '*.gz', '*.rar', '*.7z',
+        '*.zip', '*.tar', '*.gz', '*.rar', '*.7z',
         '*.mp3', '*.mp4', '*.avi', '*.mov', '*.mkv',
         '*.ttf', '*.woff', '*.woff2', '*.eot', '*.otf',
         '*.exe', '*.dll', '*.so', '*.dylib', '*.bin',
@@ -145,7 +145,6 @@ const DEFAULT_EXCLUSIONS = [
     '*.mp4',
     '*.avi',
     '*.mov',
-    '*.pdf',
     '*.zip',
     '*.tar',
     '*.gz',
@@ -547,8 +546,8 @@ const BINARY_EXTENSIONS = new Set([
     '.zip', '.tar', '.gz', '.bz2', '.7z', '.rar', '.xz', '.lz', '.lzma',
     // executables and libraries
     '.exe', '.dll', '.so', '.dylib', '.bin', '.out', '.app', '.msi', '.deb', '.rpm',
-    // documents (binary formats)
-    '.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx', '.odt', '.ods', '.odp',
+    // documents (binary formats — PDF handled by pdfExtractor.js)
+    '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx', '.odt', '.ods', '.odp',
     // fonts
     '.ttf', '.otf', '.woff', '.woff2', '.eot',
     // databases

package/dist/codebase/index.js CHANGED Viewed

@@ -6,6 +6,10 @@
 // ========================================
 export { SkipTheBoringShit, isBinaryFile, getFileSizeBytes, getExclusionHandler, resetExclusionHandler, DEFAULT_EXCLUSIONS } from './exclusions.js';
 // ========================================
+// PDF EXTRACTION - pdfExtractor
+// ========================================
+export { extractPdfText, extractPdfBatch, isPdfFile, isPdfExtractionAvailable } from './pdfExtractor.js';
+// ========================================
 // LANGUAGE DETECTION - whatLanguageIsThis
 // ========================================
 export { WhatLanguageIsThis, getLanguageDetector, resetLanguageDetector, LANGUAGE_REGISTRY, EXTENSION_INDEX, FILENAME_MAPPINGS } from './languageDetection.js';

package/dist/codebase/pdfExtractor.js ADDED Viewed

@@ -0,0 +1,298 @@
+/**
+ * pdfExtractor.js — PDF text extraction for codebase indexing
+ *
+ * Spawns pdf-text-extract.py (PyMuPDF) as a child process.
+ * Digital PDFs: instant text extraction (0.003s/page).
+ * Scanned PDFs: Tesseract OCR fallback (1-3s/page).
+ *
+ * Used by codebaseIndexer.js and changeHandler.js to index PDF files
+ * alongside regular source code.
+ */
+import { spawn } from 'child_process';
+import { existsSync } from 'fs';
+import { join, dirname } from 'path';
+import { logger } from '../utils/logger.js';
+import { getPythonPath } from '../utils/projectEnv.js';
+const PDF_EXTRACT_TIMEOUT_MS = 60000; // 60s — generous for large scanned PDFs
+const MAX_PDF_PAGES = 100;
+// Cache the script path after first discovery
+let _cachedScriptPath = null;
+let _pymupdfAvailable = null; // null = unknown, true/false after first check
+/**
+ * Find the pdf-text-extract.py script.
+ * Search pattern matches miniCOTServerManager.findMiniCOTScript().
+ */
+function findPdfExtractScript() {
+    if (_cachedScriptPath && existsSync(_cachedScriptPath)) {
+        return _cachedScriptPath;
+    }
+    const specmemRoot = dirname(dirname(__dirname));
+    const possiblePaths = [
+        // SpecMem package root (works for all installs)
+        join(specmemRoot, 'embedding-sandbox', 'pdf-text-extract.py'),
+        // Direct package root (flat layout)
+        join(specmemRoot, 'pdf-text-extract.py'),
+        // Global npm install fallback
+        join(dirname(dirname(process.execPath)), 'lib', 'node_modules', 'specmem-hardwicksoftware', 'embedding-sandbox', 'pdf-text-extract.py'),
+    ];
+    for (const p of possiblePaths) {
+        if (existsSync(p)) {
+            _cachedScriptPath = p;
+            logger.debug({ path: p }, '[pdfExtractor] Found pdf-text-extract.py');
+            return p;
+        }
+    }
+    logger.warn({ searchedPaths: possiblePaths }, '[pdfExtractor] pdf-text-extract.py not found');
+    return null;
+}
+/**
+ * Extract text from a PDF file.
+ *
+ * @param {string} filePath - Absolute path to the PDF file
+ * @param {object} options - { maxPages?: number, language?: string }
+ * @returns {Promise<{text: string, pages: number, scannedPages?: number[], chars: number} | null>}
+ *          Returns null if extraction fails or pymupdf not available.
+ */
+export async function extractPdfText(filePath, options = {}) {
+    const { maxPages = MAX_PDF_PAGES, language = 'eng' } = options;
+    // Fast bail if we already know pymupdf is unavailable
+    if (_pymupdfAvailable === false) {
+        return null;
+    }
+    const scriptPath = findPdfExtractScript();
+    if (!scriptPath) {
+        logger.warn('[pdfExtractor] Cannot extract PDF — script not found');
+        return null;
+    }
+    const pythonPath = getPythonPath();
+    return new Promise((resolve) => {
+        const args = [scriptPath, filePath, '--max-pages', String(maxPages), '--language', language];
+        let stdout = '';
+        let stderr = '';
+        let resolved = false;
+        const proc = spawn(pythonPath, args, {
+            timeout: PDF_EXTRACT_TIMEOUT_MS,
+            stdio: ['ignore', 'pipe', 'pipe'],
+            env: { ...process.env },
+        });
+        const timeoutId = setTimeout(() => {
+            if (!resolved) {
+                resolved = true;
+                proc.kill('SIGKILL');
+                logger.warn({ filePath, timeoutMs: PDF_EXTRACT_TIMEOUT_MS }, '[pdfExtractor] PDF extraction timed out');
+                resolve(null);
+            }
+        }, PDF_EXTRACT_TIMEOUT_MS);
+        proc.stdout.on('data', (data) => { stdout += data.toString(); });
+        proc.stderr.on('data', (data) => { stderr += data.toString(); });
+        proc.on('close', (code) => {
+            clearTimeout(timeoutId);
+            if (resolved) return;
+            resolved = true;
+            if (stderr && stderr.includes('pymupdf not found')) {
+                _pymupdfAvailable = false;
+                logger.warn('[pdfExtractor] pymupdf not installed — PDF indexing disabled');
+                resolve(null);
+                return;
+            }
+            if (code !== 0) {
+                logger.warn({ filePath, code, stderr: stderr.slice(0, 200) }, '[pdfExtractor] PDF extraction failed');
+                resolve(null);
+                return;
+            }
+            try {
+                const result = JSON.parse(stdout.trim());
+                if (result.error) {
+                    logger.warn({ filePath, error: result.error }, '[pdfExtractor] PDF extraction error');
+                    resolve(null);
+                    return;
+                }
+                // Mark pymupdf as available on first success
+                if (_pymupdfAvailable === null) {
+                    _pymupdfAvailable = true;
+                }
+                resolve({
+                    text: result.text,
+                    pages: result.pages,
+                    chars: result.chars,
+                    scannedPages: result.scanned_pages || [],
+                    truncated: result.truncated || false,
+                    totalPages: result.total_pages || result.pages,
+                });
+            } catch (parseErr) {
+                logger.warn({ filePath, stdout: stdout.slice(0, 200) }, '[pdfExtractor] Failed to parse extraction result');
+                resolve(null);
+            }
+        });
+        proc.on('error', (err) => {
+            clearTimeout(timeoutId);
+            if (!resolved) {
+                resolved = true;
+                logger.warn({ filePath, error: err.message }, '[pdfExtractor] Failed to spawn Python');
+                resolve(null);
+            }
+        });
+    });
+}
+/**
+ * Extract text from multiple PDFs in a single Python process (batch mode).
+ * One Python startup for N PDFs — avoids interpreter overhead per file.
+ * Returns a Map<filePath, result> where result is the extraction output or null.
+ *
+ * @param {string[]} filePaths - Absolute paths to PDF files
+ * @param {object} options - { maxPages?: number, language?: string }
+ * @returns {Promise<Map<string, {text: string, pages: number, chars: number} | null>>}
+ */
+export async function extractPdfBatch(filePaths, options = {}) {
+    const { maxPages = MAX_PDF_PAGES, language = 'eng' } = options;
+    const results = new Map();
+    if (!filePaths.length) return results;
+    // Fast bail if pymupdf is known unavailable
+    if (_pymupdfAvailable === false) {
+        for (const fp of filePaths) results.set(fp, null);
+        return results;
+    }
+    const scriptPath = findPdfExtractScript();
+    if (!scriptPath) {
+        for (const fp of filePaths) results.set(fp, null);
+        return results;
+    }
+    const pythonPath = getPythonPath();
+    // Batch timeout: 60s base + 10s per PDF (scanned pages take 1-3s each)
+    const batchTimeout = PDF_EXTRACT_TIMEOUT_MS + (filePaths.length * 10000);
+    return new Promise((resolve) => {
+        const args = [scriptPath, '--batch', ...filePaths, '--max-pages', String(maxPages), '--language', language];
+        let stdout = '';
+        let stderr = '';
+        let resolved = false;
+        const proc = spawn(pythonPath, args, {
+            stdio: ['ignore', 'pipe', 'pipe'],
+            env: { ...process.env },
+        });
+        const timeoutId = setTimeout(() => {
+            if (!resolved) {
+                resolved = true;
+                proc.kill('SIGKILL');
+                logger.warn({ count: filePaths.length, timeoutMs: batchTimeout }, '[pdfExtractor] Batch extraction timed out');
+                // Return whatever we parsed so far + null for the rest
+                for (const fp of filePaths) {
+                    if (!results.has(fp)) results.set(fp, null);
+                }
+                resolve(results);
+            }
+        }, batchTimeout);
+        proc.stdout.on('data', (data) => { stdout += data.toString(); });
+        proc.stderr.on('data', (data) => { stderr += data.toString(); });
+        proc.on('close', (code) => {
+            clearTimeout(timeoutId);
+            if (resolved) return;
+            resolved = true;
+            if (stderr && stderr.includes('pymupdf not found')) {
+                _pymupdfAvailable = false;
+                logger.warn('[pdfExtractor] pymupdf not installed — PDF indexing disabled');
+                for (const fp of filePaths) results.set(fp, null);
+                resolve(results);
+                return;
+            }
+            // Parse JSONL — one JSON object per line
+            const lines = stdout.split('\n').filter(l => l.trim());
+            for (const line of lines) {
+                try {
+                    const result = JSON.parse(line);
+                    const path = result.path;
+                    if (!path) continue;
+                    if (result.error) {
+                        logger.warn({ path, error: result.error }, '[pdfExtractor] PDF extraction error');
+                        results.set(path, null);
+                        continue;
+                    }
+                    if (_pymupdfAvailable === null) _pymupdfAvailable = true;
+                    results.set(path, {
+                        text: result.text,
+                        pages: result.pages,
+                        chars: result.chars,
+                        scannedPages: result.scanned_pages || [],
+                        truncated: result.truncated || false,
+                        totalPages: result.total_pages || result.pages,
+                    });
+                } catch (e) {
+                    logger.debug({ line: line.slice(0, 100) }, '[pdfExtractor] Failed to parse JSONL line');
+                }
+            }
+            // Fill nulls for any missing paths
+            for (const fp of filePaths) {
+                if (!results.has(fp)) results.set(fp, null);
+            }
+            logger.info({
+                total: filePaths.length,
+                extracted: [...results.values()].filter(v => v !== null).length,
+                failed: [...results.values()].filter(v => v === null).length,
+            }, '[pdfExtractor] Batch extraction complete');
+            resolve(results);
+        });
+        proc.on('error', (err) => {
+            clearTimeout(timeoutId);
+            if (!resolved) {
+                resolved = true;
+                logger.warn({ error: err.message }, '[pdfExtractor] Failed to spawn Python for batch');
+                for (const fp of filePaths) results.set(fp, null);
+                resolve(results);
+            }
+        });
+    });
+}
+/**
+ * Check if PDF extraction is available (pymupdf installed + script found).
+ */
+export function isPdfExtractionAvailable() {
+    if (_pymupdfAvailable === false) return false;
+    return findPdfExtractScript() !== null;
+}
+/**
+ * Check if a file is a PDF by extension.
+ */
+export function isPdfFile(filePath) {
+    return filePath.toLowerCase().endsWith('.pdf');
+}

package/dist/dashboard/api/taskTeamMembers.js CHANGED Viewed

@@ -1,7 +1,7 @@
 /**
- * taskTeamMembers.ts - API endpoints for Task team member tracking
+ * taskTeamMembers.ts - API endpoints for Agent team member tracking
  *
- * yo fr fr this lets you view and manually log Task team members
+ * yo fr fr this lets you view and manually log Agent team members
  */
 import { Router } from 'express';
 import { z } from 'zod';