specmem-hardwicksoftware 3.7.34 → 3.7.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/CHANGELOG.md +34 -0
  2. package/README.md +11 -15
  3. package/bin/specmem-console.cjs +839 -51
  4. package/claude-hooks/agent-chooser-hook.js +6 -6
  5. package/claude-hooks/agent-loading-hook.cjs +16 -16
  6. package/claude-hooks/agent-loading-hook.js +18 -18
  7. package/claude-hooks/agent-type-matcher.js +1 -1
  8. package/claude-hooks/background-completion-silencer.js +1 -1
  9. package/claude-hooks/file-claim-enforcer.cjs +37 -36
  10. package/claude-hooks/output-cleaner.cjs +1 -1
  11. package/claude-hooks/settings.json +27 -3
  12. package/claude-hooks/specmem-search-enforcer.cjs +2 -11
  13. package/claude-hooks/specmem-team-member-inject.js +1 -1
  14. package/claude-hooks/specmem-unified-hook.py +1 -1
  15. package/claude-hooks/subagent-loading-hook.cjs +1 -1
  16. package/claude-hooks/task-progress-hook.cjs +7 -7
  17. package/claude-hooks/task-progress-hook.js +3 -3
  18. package/claude-hooks/team-comms-enforcer.cjs +49 -47
  19. package/dist/claude-sessions/sessionParser.js +5 -0
  20. package/dist/codebase/codebaseIndexer.js +48 -17
  21. package/dist/codebase/exclusions.js +3 -4
  22. package/dist/codebase/index.js +4 -0
  23. package/dist/codebase/pdfExtractor.js +298 -0
  24. package/dist/dashboard/api/taskTeamMembers.js +2 -2
  25. package/dist/db/bigBrainMigrations.js +29 -0
  26. package/dist/hooks/hookManager.js +4 -4
  27. package/dist/hooks/teamFramingCli.js +1 -1
  28. package/dist/hooks/teamMemberPrepromptHook.js +5 -5
  29. package/dist/init/claudeConfigInjector.js +2 -2
  30. package/dist/mcp/compactionProxy.js +834 -186
  31. package/dist/mcp/compactionProxyDaemon.js +112 -37
  32. package/dist/mcp/contextVault.js +439 -0
  33. package/dist/mcp/embeddingServerManager.js +61 -1
  34. package/dist/mcp/mcpProtocolHandler.js +6 -1
  35. package/dist/mcp/miniCOTServerManager.js +82 -8
  36. package/dist/mcp/specMemServer.js +45 -10
  37. package/dist/mcp/toolRegistry.js +6 -0
  38. package/dist/startup/startupIndexing.js +14 -0
  39. package/dist/team-members/taskOrchestrator.js +3 -3
  40. package/dist/team-members/taskTeamMemberLogger.js +2 -2
  41. package/dist/tools/goofy/deployTeamMember.js +3 -3
  42. package/dist/tools/goofy/digInTheVault.js +81 -0
  43. package/dist/tools/goofy/stashTheGoods.js +56 -0
  44. package/dist/tools/teamMemberDeployer.js +2 -2
  45. package/dist/watcher/changeHandler.js +65 -8
  46. package/dist/watcher/changeQueue.js +20 -1
  47. package/dist/watcher/index.js +37 -2
  48. package/embedding-sandbox/mini-cot-service.py +11 -13
  49. package/embedding-sandbox/pdf-text-extract.py +208 -0
  50. package/package.json +1 -1
  51. package/scripts/deploy-hooks.cjs +2 -2
  52. package/scripts/global-postinstall.cjs +2 -2
  53. package/scripts/specmem-init.cjs +130 -36
  54. package/specmem/model-config.json +6 -6
  55. package/specmem/supervisord.conf +1 -1
  56. package/svg-sections/readme-token-compaction.svg +246 -0
@@ -21,6 +21,7 @@ import { getCoordinator } from '../coordination/integration.js';
21
21
  import { isMinifiedOrBundled, isBinaryFile, EXCLUSION_CONFIG } from '../codebase/exclusions.js';
22
22
  import { getProjectPathForInsert } from '../services/ProjectContext.js';
23
23
  import { getEmbeddingTimeout } from '../config/embeddingTimeouts.js';
24
+ import { extractPdfText, isPdfFile } from '../codebase/pdfExtractor.js';
24
25
  // Retry helper for transient embedding failures (timeout, socket reset, etc.)
25
26
  const WATCHER_MAX_RETRIES = parseInt(process.env['SPECMEM_WATCHER_RETRIES'] || '2');
26
27
  async function withWatcherRetry(operation, filePath) {
@@ -132,6 +133,51 @@ export class AutoUpdateTheMemories {
132
133
  this.stats.filesSkipped++;
133
134
  return;
134
135
  }
136
+ // PDF files: extract text via PyMuPDF instead of reading as UTF-8
137
+ if (isPdfFile(event.path)) {
138
+ const pdfResult = await extractPdfText(event.path);
139
+ if (!pdfResult || !pdfResult.text) {
140
+ logger.debug({ path: event.relativePath }, 'PDF extraction failed or empty — skipping');
141
+ this.stats.filesSkipped++;
142
+ return;
143
+ }
144
+ // Use extracted PDF text as content for the standard metadata flow
145
+ const metadata = await this.extractFileMetadata(event.path, event.relativePath, pdfResult.text);
146
+ if (metadata.size > this.config.maxFileSizeBytes) {
147
+ this.stats.filesSkipped++;
148
+ return;
149
+ }
150
+ const content = pdfResult.text;
151
+ const existingMemory = await this.findMemoryByContentHash(metadata.contentHash);
152
+ if (existingMemory) {
153
+ this.stats.filesSkipped++;
154
+ return;
155
+ }
156
+ let embedding;
157
+ const WATCHER_EMBEDDING_TIMEOUT = getEmbeddingTimeout('fileWatcher');
158
+ try {
159
+ embedding = await withWatcherRetry(async () => {
160
+ return new Promise((resolve, reject) => {
161
+ const timeoutId = setTimeout(() => reject(new Error('Embedding timeout for PDF')), WATCHER_EMBEDDING_TIMEOUT);
162
+ this.config.embeddingProvider.generateEmbedding(content)
163
+ .then(result => { clearTimeout(timeoutId); resolve(result); })
164
+ .catch(error => { clearTimeout(timeoutId); reject(error); });
165
+ });
166
+ }, event.path);
167
+ } catch (embErr) {
168
+ logger.warn({ path: event.relativePath, error: embErr.message }, 'PDF embedding failed — storing without embedding');
169
+ }
170
+ await this.storeMemory({
171
+ content,
172
+ metadata,
173
+ embedding,
174
+ tags: ['codebase', 'auto-ingested', 'pdf'],
175
+ });
176
+ this.stats.filesIngested++;
177
+ logger.info({ path: event.relativePath, pages: pdfResult.pages, chars: pdfResult.chars }, 'PDF file indexed');
178
+ this.coordinator.emitFileAdded(event.path, event.relativePath, metadata.size);
179
+ return;
180
+ }
135
181
  // check if binary
136
182
  if (await isBinaryFile(event.path)) {
137
183
  logger.debug({ path: event.relativePath }, 'skipping binary file');
@@ -250,14 +296,25 @@ export class AutoUpdateTheMemories {
250
296
  this.stats.filesSkipped++;
251
297
  return;
252
298
  }
253
- // FIX MED-13: Check binary before extractFileMetadata (same as handleFileAdded)
254
- if (await isBinaryFile(event.path)) {
255
- logger.debug({ path: event.relativePath }, 'skipping binary file update');
256
- this.stats.filesSkipped++;
257
- return;
299
+ // PDF files: extract text via PyMuPDF
300
+ let pdfContent = null;
301
+ if (isPdfFile(event.path)) {
302
+ const pdfResult = await extractPdfText(event.path);
303
+ if (!pdfResult || !pdfResult.text) {
304
+ this.stats.filesSkipped++;
305
+ return;
306
+ }
307
+ pdfContent = pdfResult.text;
308
+ } else {
309
+ // FIX MED-13: Check binary before extractFileMetadata (same as handleFileAdded)
310
+ if (await isBinaryFile(event.path)) {
311
+ logger.debug({ path: event.relativePath }, 'skipping binary file update');
312
+ this.stats.filesSkipped++;
313
+ return;
314
+ }
258
315
  }
259
316
  // extract new metadata (FIX 7.04: content included to avoid double read)
260
- const metadata = await this.extractFileMetadata(event.path, event.relativePath);
317
+ const metadata = await this.extractFileMetadata(event.path, event.relativePath, pdfContent);
261
318
  // check file size
262
319
  if (metadata.size > this.config.maxFileSizeBytes) {
263
320
  logger.warn({
@@ -419,9 +476,9 @@ export class AutoUpdateTheMemories {
419
476
  /**
420
477
  * extractFileMetadata - reads file and generates metadata
421
478
  */
422
- async extractFileMetadata(path, relativePath) {
479
+ async extractFileMetadata(path, relativePath, preExtractedContent) {
423
480
  const stats = await fs.stat(path);
424
- const content = await fs.readFile(path, 'utf-8');
481
+ const content = preExtractedContent || await fs.readFile(path, 'utf-8');
425
482
  const contentHash = this.hashContent(content);
426
483
  return {
427
484
  path,
@@ -22,6 +22,7 @@ export class QueueTheChangesUp {
22
22
  config;
23
23
  queue = [];
24
24
  processing = false;
25
+ paused = false; // pause queue processing without stopping (e.g. during background indexing)
25
26
  processingInterval = null;
26
27
  changeHandler;
27
28
  // deduplication map: path -> latest queued change
@@ -162,13 +163,31 @@ export class QueueTheChangesUp {
162
163
  logger.debug({ cancelledTimeouts: cancelledCount }, 'cancelled pending retry timeouts');
163
164
  }
164
165
  }
166
+ /**
167
+ * pause - temporarily halt batch processing without stopping the queue.
168
+ * Changes still enqueue but won't be processed until resume().
169
+ * Used during background indexing to avoid resource contention.
170
+ */
171
+ pause(reason = '') {
172
+ if (this.paused) return;
173
+ this.paused = true;
174
+ logger.info({ reason, pendingCount: this.queue.length }, 'queue PAUSED');
175
+ }
176
+ /**
177
+ * resume - resume batch processing after pause
178
+ */
179
+ resume() {
180
+ if (!this.paused) return;
181
+ this.paused = false;
182
+ logger.info({ pendingCount: this.queue.length }, 'queue RESUMED');
183
+ }
165
184
  /**
166
185
  * processBatch - processes a batch of changes
167
186
  *
168
187
  * nah bruh processing this whole batch at once
169
188
  */
170
189
  async processBatch() {
171
- if (this.queue.length === 0) {
190
+ if (this.paused || this.queue.length === 0) {
172
191
  return;
173
192
  }
174
193
  logger.debug({
@@ -31,6 +31,12 @@ export class WatcherManager {
31
31
  syncTimeout = null;
32
32
  lastLowScoreResyncAt = 0;
33
33
  lastLowScoreResyncScore = null; // track score at last resync to detect drops
34
+ // Drift-resync plateau detection: stop resyncing if score isn't improving
35
+ lastDriftResyncAt = 0;
36
+ lastDriftResyncScore = null;
37
+ driftResyncNoImprovementCount = 0; // consecutive resyncs that didn't improve score
38
+ static DRIFT_RESYNC_MAX_NO_IMPROVEMENT = 2; // after 2 consecutive no-improvement resyncs, accept plateau
39
+ static DRIFT_RESYNC_COOLDOWN_MS = 15 * 60 * 1000; // 15 min cooldown between drift resyncs
34
40
  constructor(config) {
35
41
  // Create handler first - it's the core component
36
42
  this.handler = new AutoUpdateTheMemories(config.handler);
@@ -126,7 +132,7 @@ export class WatcherManager {
126
132
  const LOW_SCORE_THRESHOLD = parseFloat(process.env['SPECMEM_LOW_SCORE_THRESHOLD'] || '0.85');
127
133
  const LOW_SCORE_DROP_THRESHOLD = parseFloat(process.env['SPECMEM_LOW_SCORE_DROP_THRESHOLD'] || '0.10');
128
134
  const LOW_SCORE_DEBOUNCE_MS = parseInt(process.env['SPECMEM_LOW_SCORE_DEBOUNCE_MS'] || String(15 * 60 * 1000), 10);
129
- if (report.syncScore < LOW_SCORE_THRESHOLD) {
135
+ if (report.syncScore <= LOW_SCORE_THRESHOLD) {
130
136
  // First time seeing low score — always resync
131
137
  // After that, only resync if score dropped by >=10% from the post-resync score
132
138
  const scoreDrop = this.lastLowScoreResyncScore !== null
@@ -163,8 +169,21 @@ export class WatcherManager {
163
169
  missingFromMcp: report.missingFromMcp.length,
164
170
  contentMismatch: report.contentMismatch.length
165
171
  }, 'drift detected during periodic check');
166
- // Auto-resync when drift is detected
172
+ // Auto-resync when drift is detected — with plateau detection + cooldown
167
173
  if (report.missingFromMcp.length > 0 || report.contentMismatch.length > 0) {
174
+ // Plateau guard: if we've resynced N times without improvement, accept the score
175
+ if (this.driftResyncNoImprovementCount >= WatcherManager.DRIFT_RESYNC_MAX_NO_IMPROVEMENT) {
176
+ logger.info({ syncScore: report.syncScore, noImprovementCount: this.driftResyncNoImprovementCount }, 'drift-resync plateau reached — score is stable, accepting current sync level');
177
+ return;
178
+ }
179
+ // Cooldown guard: don't resync more than once per 15 min via drift path
180
+ const now = Date.now();
181
+ const driftCooldownRemaining = WatcherManager.DRIFT_RESYNC_COOLDOWN_MS - (now - this.lastDriftResyncAt);
182
+ if (driftCooldownRemaining > 0) {
183
+ logger.debug({ syncScore: report.syncScore, cooldownRemainingSec: Math.round(driftCooldownRemaining / 1000) }, 'drift-resync on cooldown — skipping');
184
+ return;
185
+ }
186
+ this.lastDriftResyncAt = now;
168
187
  logger.info('periodic check triggering auto-resync...');
169
188
  const resyncResult = await this.syncChecker.resyncEverythingFrFr();
170
189
  logger.info({
@@ -175,6 +194,16 @@ export class WatcherManager {
175
194
  // Update score after resync
176
195
  const postReport = await this.syncChecker.checkSync();
177
196
  await this.writeSyncScore(postReport.syncScore);
197
+ // Plateau detection: did this resync actually improve the score?
198
+ const improvement = postReport.syncScore - (this.lastDriftResyncScore ?? 0);
199
+ if (improvement < 0.01) { // less than 1% improvement = no meaningful change
200
+ this.driftResyncNoImprovementCount++;
201
+ logger.warn({ syncScore: postReport.syncScore, previousScore: this.lastDriftResyncScore, noImprovementCount: this.driftResyncNoImprovementCount, maxAllowed: WatcherManager.DRIFT_RESYNC_MAX_NO_IMPROVEMENT }, 'drift-resync did not improve score — tracking plateau');
202
+ } else {
203
+ // Score improved — reset plateau counter
204
+ this.driftResyncNoImprovementCount = 0;
205
+ }
206
+ this.lastDriftResyncScore = postReport.syncScore;
178
207
  }
179
208
  }
180
209
  }
@@ -307,6 +336,12 @@ export class WatcherManager {
307
336
  * resync - manually trigger full resync
308
337
  */
309
338
  async resync() {
339
+ // Manual resync resets all plateau/cooldown state so it always runs fresh
340
+ this.driftResyncNoImprovementCount = 0;
341
+ this.lastDriftResyncAt = 0;
342
+ this.lastDriftResyncScore = null;
343
+ this.lastLowScoreResyncScore = null;
344
+ this.lastLowScoreResyncAt = 0;
310
345
  return await this.syncChecker.resyncEverythingFrFr();
311
346
  }
312
347
  /**
@@ -391,20 +391,18 @@ class ModelManager:
391
391
  file_name="model_quantized.onnx"
392
392
  )
393
393
  else:
394
- # Fallback: download PyTorch model from HuggingFace
395
- print(f"🧠 No local model, downloading {self.model_name}...", file=sys.stderr)
396
- from transformers import AutoModelForCausalLM
397
- cache_dir = '/tmp/mini-cot-models'
398
- os.makedirs(cache_dir, exist_ok=True)
399
- self.torch_model = AutoModelForCausalLM.from_pretrained(
400
- self.model_name,
401
- cache_dir=cache_dir,
402
- torch_dtype=torch.float32,
403
- low_cpu_mem_usage=True,
404
- trust_remote_code=True
394
+ # No local model found — refuse to download from the internet
395
+ search_paths = [
396
+ os.environ.get('SPECMEM_MODEL_CACHE', '(not set)'),
397
+ os.path.join(os.path.dirname(__file__), '..', 'models', 'pythia-410m-onnx-quant'),
398
+ '/app/models/pythia-onnx-quant',
399
+ ]
400
+ raise RuntimeError(
401
+ f"Local ONNX model not found. Searched:\n"
402
+ + "\n".join(f" - {p}" for p in search_paths)
403
+ + "\n\nRun `specmem init` to download models via Git LFS release tarball."
404
+ + "\nSpecMem will NOT download models from the internet at runtime."
405
405
  )
406
- self.torch_model.eval()
407
- torch.set_grad_enabled(False)
408
406
 
409
407
  self.torch_loaded = True
410
408
  print(f"🧠 Generation model loaded for crawl analysis", file=sys.stderr)
@@ -0,0 +1,208 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ pdf-text-extract.py — PDF text extraction for SpecMem codebase indexing
4
+
5
+ Uses PyMuPDF (fitz) for instant digital PDF text extraction (0.003s/page).
6
+ Falls back to Tesseract OCR via PyMuPDF's built-in integration for scanned pages.
7
+
8
+ Usage:
9
+ # Single file mode
10
+ python3 pdf-text-extract.py <pdf_path> [--max-pages N] [--language LANG]
11
+
12
+ # Batch mode (JSONL — one result per line, one Python startup for N PDFs)
13
+ python3 pdf-text-extract.py --batch file1.pdf file2.pdf ... [--max-pages N]
14
+
15
+ Output (JSON/JSONL to stdout):
16
+ {"path": "/abs/path.pdf", "text": "...", "pages": 5, "scanned_pages": [3], "chars": 12345}
17
+ {"path": "/abs/path2.pdf", "error": "..."}
18
+ """
19
+
20
+ import sys
21
+ import json
22
+ import os
23
+ import argparse
24
+
25
+ # ---------------------------------------------------------------------------
26
+ # Auto-install pymupdf if missing (matches frankenstein-embeddings pattern)
27
+ # ---------------------------------------------------------------------------
28
+ def _ensure_pymupdf():
29
+ try:
30
+ import pymupdf
31
+ return pymupdf
32
+ except ImportError:
33
+ pass
34
+ # Try legacy import name
35
+ try:
36
+ import fitz
37
+ return fitz
38
+ except ImportError:
39
+ pass
40
+ # Auto-install
41
+ try:
42
+ import subprocess
43
+ sys.stderr.write('[pdf-text-extract] pymupdf not found, installing...\n')
44
+ subprocess.check_call(
45
+ [sys.executable, '-m', 'pip', 'install', '--quiet', 'pymupdf'],
46
+ stdout=subprocess.DEVNULL
47
+ )
48
+ try:
49
+ import pymupdf
50
+ return pymupdf
51
+ except ImportError:
52
+ import fitz
53
+ return fitz
54
+ except Exception as e:
55
+ _error_exit(f'Failed to install pymupdf: {e}')
56
+
57
+
58
+ def _error_exit(msg):
59
+ """Print error JSON and exit."""
60
+ print(json.dumps({'error': str(msg)}, ensure_ascii=False))
61
+ sys.exit(1)
62
+
63
+
64
+ def _is_scanned_page(page, text):
65
+ """
66
+ Heuristic: page is likely scanned if:
67
+ 1. Extracted text is very short (< 50 chars after stripping)
68
+ 2. Page has images covering >60% of page area
69
+ """
70
+ stripped = text.strip()
71
+ if len(stripped) > 50:
72
+ return False
73
+
74
+ try:
75
+ images = page.get_image_info()
76
+ if not images:
77
+ return False
78
+ page_area = abs(page.rect)
79
+ if page_area == 0:
80
+ return False
81
+ image_area = 0
82
+ for img in images:
83
+ if 'bbox' in img:
84
+ try:
85
+ import pymupdf
86
+ r = pymupdf.Rect(img['bbox'])
87
+ except (ImportError, Exception):
88
+ import fitz
89
+ r = fitz.Rect(img['bbox'])
90
+ image_area += abs(r)
91
+ return (image_area / page_area) >= 0.6
92
+ except Exception:
93
+ return False
94
+
95
+
96
+ def _ocr_page(page, language='eng'):
97
+ """
98
+ Attempt Tesseract OCR on a scanned page via PyMuPDF's built-in integration.
99
+ Returns extracted text or empty string if tesseract unavailable.
100
+ """
101
+ try:
102
+ tp = page.get_textpage_ocr(language=language, dpi=300)
103
+ return page.get_text(textpage=tp).strip()
104
+ except Exception as e:
105
+ msg = str(e).lower()
106
+ if 'tesseract' in msg or 'not installed' in msg or 'not found' in msg:
107
+ # Tesseract not installed — skip OCR, return what we have
108
+ sys.stderr.write(f'[pdf-text-extract] Tesseract not available, skipping OCR for scanned page\n')
109
+ return ''
110
+ # Other error — still don't crash
111
+ sys.stderr.write(f'[pdf-text-extract] OCR failed: {e}\n')
112
+ return ''
113
+
114
+
115
+ def extract_pdf(pdf_path, max_pages=100, language='eng'):
116
+ """
117
+ Extract text from PDF using PyMuPDF.
118
+ Digital pages: instant text extraction.
119
+ Scanned pages: Tesseract OCR fallback.
120
+ """
121
+ pymupdf = _ensure_pymupdf()
122
+
123
+ if not os.path.isfile(pdf_path):
124
+ return {'error': f'File not found: {pdf_path}'}
125
+
126
+ try:
127
+ doc = pymupdf.open(pdf_path)
128
+ except Exception as e:
129
+ msg = str(e).lower()
130
+ if 'password' in msg or 'encrypt' in msg:
131
+ return {'error': f'PDF is password-protected: {pdf_path}'}
132
+ return {'error': f'Failed to open PDF: {e}'}
133
+
134
+ total_pages = len(doc)
135
+ process_count = min(total_pages, max_pages)
136
+ truncated = total_pages > max_pages
137
+
138
+ texts = []
139
+ scanned_pages = []
140
+
141
+ for i in range(process_count):
142
+ page = doc[i]
143
+ text = page.get_text().strip()
144
+
145
+ if _is_scanned_page(page, text):
146
+ # Try OCR
147
+ ocr_text = _ocr_page(page, language)
148
+ if ocr_text:
149
+ text = ocr_text
150
+ scanned_pages.append(i + 1) # 1-indexed
151
+ # If OCR also empty, keep whatever minimal text we got
152
+
153
+ if text:
154
+ if process_count > 1:
155
+ texts.append(f'--- Page {i + 1} ---\n{text}')
156
+ else:
157
+ texts.append(text)
158
+
159
+ doc.close()
160
+
161
+ full_text = '\n\n'.join(texts)
162
+
163
+ result = {
164
+ 'text': full_text,
165
+ 'pages': process_count,
166
+ 'chars': len(full_text),
167
+ }
168
+
169
+ if scanned_pages:
170
+ result['scanned_pages'] = scanned_pages
171
+ if truncated:
172
+ result['truncated'] = True
173
+ result['total_pages'] = total_pages
174
+
175
+ return result
176
+
177
+
178
+ def main():
179
+ parser = argparse.ArgumentParser(description='Extract text from PDF files')
180
+ parser.add_argument('pdf_path', nargs='?', help='Path to the PDF file (single mode)')
181
+ parser.add_argument('--batch', nargs='+', metavar='PDF',
182
+ help='Batch mode: extract multiple PDFs (JSONL output, one line per PDF)')
183
+ parser.add_argument('--max-pages', type=int, default=100,
184
+ help='Maximum pages to process per PDF (default: 100)')
185
+ parser.add_argument('--language', default='eng',
186
+ help='Tesseract language for OCR fallback (default: eng)')
187
+
188
+ args = parser.parse_args()
189
+
190
+ if args.batch:
191
+ # Batch mode — JSONL output, one result per line
192
+ # Single Python startup for N PDFs (avoids repeated interpreter overhead)
193
+ for pdf_path in args.batch:
194
+ result = extract_pdf(pdf_path, args.max_pages, args.language)
195
+ result['path'] = pdf_path
196
+ print(json.dumps(result, ensure_ascii=False), flush=True)
197
+ elif args.pdf_path:
198
+ # Single file mode
199
+ result = extract_pdf(args.pdf_path, args.max_pages, args.language)
200
+ result['path'] = args.pdf_path
201
+ print(json.dumps(result, ensure_ascii=False))
202
+ else:
203
+ parser.print_help()
204
+ sys.exit(1)
205
+
206
+
207
+ if __name__ == '__main__':
208
+ main()
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "specmem-hardwicksoftware",
3
- "version": "3.7.34",
3
+ "version": "3.7.36",
4
4
  "type": "module",
5
5
  "description": "Your Claude Code sessions don't have to start from scratch anymore — SpecMem gives your AI real memory. It won't forget your conversations, your code, or your architecture decisions between sessions. That's the whole point. Semantic code indexing that actually works: TypeScript, JavaScript, Python, Go, Rust, Java, Kotlin, C, C++, HTML and more. It doesn't just track functions — it gets classes, methods, fields, constants, enums, macros, imports, structs, the whole codebase graph. There's chat memory too, powered by pgvector embeddings. You've also got token compression, team coordination, multi-agent comms, and file watching built in. 74+ MCP tools. Runs on PostgreSQL + Docker. It's kind of a big deal. justcalljon.pro",
6
6
  "main": "dist/index.js",
@@ -172,7 +172,7 @@ function getHookConfig() {
172
172
  ],
173
173
  "PreToolUse": [
174
174
  {
175
- "matcher": "Task",
175
+ "matcher": "Agent",
176
176
  "hooks": [
177
177
  {
178
178
  "type": "command",
@@ -421,7 +421,7 @@ function getHookConfig() {
421
421
  ],
422
422
  "PostToolUse": [
423
423
  {
424
- "matcher": "Task",
424
+ "matcher": "Agent",
425
425
  "hooks": [
426
426
  {
427
427
  "type": "command",
@@ -1756,7 +1756,7 @@ function configureSettings() {
1756
1756
  // PreToolUse hooks - Agent loading with chooser - MERGE
1757
1757
  const specmemPreToolUseHooks = [
1758
1758
  {
1759
- matcher: 'Task',
1759
+ matcher: 'Agent',
1760
1760
  hooks: [{
1761
1761
  type: 'command',
1762
1762
  command: `node ${path.join(CLAUDE_HOOKS_DIR, 'agent-loading-hook.js')}`,
@@ -1843,7 +1843,7 @@ function configureSettings() {
1843
1843
  // PostToolUse hooks - agent completion tracking - MERGE
1844
1844
  const specmemPostToolUseHooks = [
1845
1845
  {
1846
- matcher: 'Task',
1846
+ matcher: 'Agent',
1847
1847
  hooks: [{
1848
1848
  type: 'command',
1849
1849
  command: `node ${path.join(CLAUDE_HOOKS_DIR, 'task-progress-hook.js')}`,