specmem-hardwicksoftware 3.5.99 → 3.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,6 +25,38 @@ import { cotStart, cotResult, cotError } from '../../utils/cotBroadcast.js';
25
25
  const __debugLog = process.env['SPECMEM_DEBUG'] === '1'
26
26
  ? (...args) => console.error('[DEBUG]', ...args) // stderr, not stdout!
27
27
  : () => { };
28
+ // ============================================================================
29
+ // RETRY HELPER for find_memory embedding generation
30
+ // ============================================================================
31
+ const FIND_MEMORY_MAX_RETRIES = parseInt(process.env['SPECMEM_FIND_MEMORY_RETRIES'] || '2');
32
+ function isTransientEmbeddingError(error) {
33
+ if (!(error instanceof Error)) return false;
34
+ const msg = error.message.toLowerCase();
35
+ return (msg.includes('timeout') || msg.includes('econnreset') ||
36
+ msg.includes('econnrefused') || msg.includes('socket hang up') ||
37
+ msg.includes('aborted') || msg.includes('etimedout') ||
38
+ msg.includes('qoms') || msg.includes('resource') || msg.includes('busy'));
39
+ }
40
+ async function withEmbeddingRetry(operation, operationName, maxRetries = FIND_MEMORY_MAX_RETRIES) {
41
+ let lastError = null;
42
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
43
+ try {
44
+ return await operation();
45
+ }
46
+ catch (error) {
47
+ lastError = error instanceof Error ? error : new Error(String(error));
48
+ if (attempt < maxRetries && isTransientEmbeddingError(error)) {
49
+ const delay = Math.min(1000 * Math.pow(2, attempt), 8000);
50
+ logger.warn({ operationName, attempt: attempt + 1, maxRetries: maxRetries + 1, error: lastError.message, retryInMs: delay }, `[find_memory] ${operationName} failed, retrying in ${delay}ms`);
51
+ await new Promise(resolve => setTimeout(resolve, delay));
52
+ }
53
+ else {
54
+ break;
55
+ }
56
+ }
57
+ }
58
+ throw lastError;
59
+ }
28
60
  /**
29
61
  * Extract discoverable paths from memory content
30
62
  * This is the KEY to getting lots of info from few memories
@@ -744,23 +776,35 @@ export class FindWhatISaid {
744
776
  socketPath,
745
777
  query: safeParams.query?.slice(0, 50)
746
778
  });
747
- const embeddingPromise = this.embeddingProvider.generateEmbedding(safeParams.query);
748
- const timeoutPromise = new Promise((_, reject) => {
749
- setTimeout(() => {
750
- const timeoutError = new Error(`Embedding generation timeout after ${formatTimeout(EMBEDDING_TIMEOUT_MS)}. ` +
751
- `Socket: ${socketPath}. ` +
752
- `Set SPECMEM_EMBEDDING_TIMEOUT env var to increase timeout.`);
753
- timeoutError.socketPath = socketPath;
754
- timeoutError.code = 'EMBEDDING_TIMEOUT';
755
- reject(timeoutError);
756
- }, EMBEDDING_TIMEOUT_MS);
757
- });
758
779
  let rawEmbedding;
759
780
  try {
760
781
  __debugLog('[FIND_MEMORY DEBUG]', Date.now(), 'AWAITING_EMBEDDING_PROMISE', {
761
782
  elapsedMs: Date.now() - startTime
762
783
  });
763
- rawEmbedding = await Promise.race([embeddingPromise, timeoutPromise]);
784
+ // Retry wrapper: retries transient failures (timeouts, socket errors) with exponential backoff
785
+ rawEmbedding = await withEmbeddingRetry(async () => {
786
+ const embeddingPromise = this.embeddingProvider.generateEmbedding(safeParams.query);
787
+ let embeddingTimeoutId;
788
+ const timeoutPromise = new Promise((_, reject) => {
789
+ embeddingTimeoutId = setTimeout(() => {
790
+ const timeoutError = new Error(`Embedding generation timeout after ${formatTimeout(EMBEDDING_TIMEOUT_MS)}. ` +
791
+ `Socket: ${socketPath}. ` +
792
+ `Set SPECMEM_EMBEDDING_TIMEOUT env var to increase timeout.`);
793
+ timeoutError.socketPath = socketPath;
794
+ timeoutError.code = 'EMBEDDING_TIMEOUT';
795
+ reject(timeoutError);
796
+ }, EMBEDDING_TIMEOUT_MS);
797
+ });
798
+ try {
799
+ const result = await Promise.race([embeddingPromise, timeoutPromise]);
800
+ clearTimeout(embeddingTimeoutId);
801
+ return result;
802
+ }
803
+ catch (err) {
804
+ clearTimeout(embeddingTimeoutId);
805
+ throw err;
806
+ }
807
+ }, 'Embedding generation');
764
808
  const embeddingDuration = Date.now() - embeddingStartTime;
765
809
  // ============================================================================
766
810
  // DEEP DEBUG: After Embedding Generation (Success)
@@ -777,6 +821,7 @@ export class FindWhatISaid {
777
821
  });
778
822
  }
779
823
  catch (embeddingError) {
824
+ clearTimeout(embeddingTimeoutId); // Prevent dangling timer on error path
780
825
  const embeddingDuration = Date.now() - embeddingStartTime;
781
826
  const err = embeddingError;
782
827
  // ============================================================================
@@ -849,8 +894,9 @@ export class FindWhatISaid {
849
894
  ...safeParams,
850
895
  dateRange
851
896
  }, queryEmbedding);
897
+ let searchTimeoutId;
852
898
  const searchTimeoutPromise = new Promise((_, reject) => {
853
- setTimeout(() => {
899
+ searchTimeoutId = setTimeout(() => {
854
900
  const timeoutError = new Error(`Search timeout after ${formatTimeout(SEARCH_TIMEOUT_MS)}. ` +
855
901
  `Query: "${safeParams.query.slice(0, 50)}...". ` +
856
902
  `Set SPECMEM_EMBEDDING_TIMEOUT env var to increase timeout.`);
@@ -864,6 +910,7 @@ export class FindWhatISaid {
864
910
  elapsedMs: Date.now() - startTime
865
911
  });
866
912
  results = await Promise.race([searchPromise, searchTimeoutPromise]);
913
+ clearTimeout(searchTimeoutId);
867
914
  const searchDuration = Date.now() - searchStartTime;
868
915
  // ============================================================================
869
916
  // DEEP DEBUG: After Database Query (Success)
@@ -881,6 +928,7 @@ export class FindWhatISaid {
881
928
  });
882
929
  }
883
930
  catch (searchError) {
931
+ clearTimeout(searchTimeoutId); // Prevent dangling timer on error path
884
932
  const searchDuration = Date.now() - searchStartTime;
885
933
  const err = searchError;
886
934
  // ============================================================================
@@ -946,12 +994,42 @@ export class FindWhatISaid {
946
994
  semanticResults: results.length,
947
995
  topSimilarity: results[0]?.similarity
948
996
  }, '[I5 FIX] Low/no semantic results, triggering keyword fallback');
949
- keywordResults = await this.keywordSearch(safeParams.query, safeParams);
997
+ const KEYWORD_FALLBACK_TIMEOUT = parseInt(process.env['SPECMEM_KEYWORD_FALLBACK_TIMEOUT_MS'] || '30000');
998
+ let keywordTimeoutId;
999
+ try {
1000
+ keywordResults = await Promise.race([
1001
+ this.keywordSearch(safeParams.query, safeParams),
1002
+ new Promise((_, reject) => {
1003
+ keywordTimeoutId = setTimeout(() => reject(new Error(`Keyword fallback timed out after ${KEYWORD_FALLBACK_TIMEOUT}ms`)), KEYWORD_FALLBACK_TIMEOUT);
1004
+ })
1005
+ ]);
1006
+ clearTimeout(keywordTimeoutId);
1007
+ }
1008
+ catch (err) {
1009
+ clearTimeout(keywordTimeoutId);
1010
+ logger.warn({ error: err?.message, timeoutMs: KEYWORD_FALLBACK_TIMEOUT, query: safeParams.query }, '[I5 FIX] Keyword fallback timed out or failed - continuing with semantic results only');
1011
+ keywordResults = [];
1012
+ }
950
1013
  }
951
1014
  // I5 FIX: Get recent memories if requested
952
1015
  let recentResults = [];
953
1016
  if (includeRecentCount > 0) {
954
- recentResults = await this.getRecentMemories(includeRecentCount, safeParams);
1017
+ const RECENT_LOOKUP_TIMEOUT = parseInt(process.env['SPECMEM_RECENT_LOOKUP_TIMEOUT_MS'] || '15000');
1018
+ let recentTimeoutId;
1019
+ try {
1020
+ recentResults = await Promise.race([
1021
+ this.getRecentMemories(includeRecentCount, safeParams),
1022
+ new Promise((_, reject) => {
1023
+ recentTimeoutId = setTimeout(() => reject(new Error(`Recent memories lookup timed out after ${RECENT_LOOKUP_TIMEOUT}ms`)), RECENT_LOOKUP_TIMEOUT);
1024
+ })
1025
+ ]);
1026
+ clearTimeout(recentTimeoutId);
1027
+ }
1028
+ catch (err) {
1029
+ clearTimeout(recentTimeoutId);
1030
+ logger.warn({ error: err?.message, timeoutMs: RECENT_LOOKUP_TIMEOUT, includeRecentCount }, '[I5 FIX] Recent memories lookup timed out or failed - continuing without recent results');
1031
+ recentResults = [];
1032
+ }
955
1033
  logger.info({
956
1034
  recentRequested: includeRecentCount,
957
1035
  recentFound: recentResults.length
@@ -1021,47 +1099,61 @@ export class FindWhatISaid {
1021
1099
  // ============================================================================
1022
1100
  if (safeParams.galleryMode === true) {
1023
1101
  logger.info({ query: safeParams.query, resultCount: results.length }, 'Gallery mode enabled - sending to Mini COT');
1102
+ const GALLERY_TIMEOUT = parseInt(process.env['SPECMEM_GALLERY_TIMEOUT_MS'] || '60000');
1103
+ let galleryTimeoutId;
1024
1104
  try {
1025
- const miniCOT = new MiniCOTProvider();
1026
- // Prepare memories for gallery creation (send ENGLISH to CoT!)
1027
- const memoriesForGallery = results.map(result => ({
1028
- id: result.memory.id,
1029
- keywords: result.memory.metadata?._semanticHints || result.memory.tags.join(', '),
1030
- snippet: result.memory.content.slice(0, 300), // First 300 chars
1031
- timestamp: result.memory.metadata?.timestamp, // When it was said
1032
- role: result.memory.metadata?.role // Who said it (user/assistant)
1033
- }));
1034
- // Call Mini COT to create gallery (CoT analyzes in ENGLISH)
1035
- const gallery = await miniCOT.createGallery(safeParams.query, memoriesForGallery);
1036
- // ROUND-TRIP VERIFIED compression - compress CoT OUTPUT for token efficiency
1037
- // Uses smartCompress: EN→Chinese→EN comparison, keeps English where context lost
1038
- // MED-40 FIX: Add null check before compression to avoid undefined errors
1039
- gallery.gallery = gallery.gallery.map(item => ({
1040
- ...item,
1041
- thumbnail: item.thumbnail ? smartCompress(item.thumbnail, { threshold: 0.75 }).result : '',
1042
- cot: item.cot ? smartCompress(item.cot, { threshold: 0.75 }).result : ''
1043
- }));
1044
- logger.info({
1045
- query: safeParams.query,
1046
- galleryItems: gallery.gallery.length,
1047
- researchedTerms: gallery.total_researched_terms
1048
- }, 'Gallery created by Mini COT and compressed');
1049
- // Always use humanReadable format
1050
- const humanReadableData = gallery.gallery.map((item, idx) => ({
1051
- id: item.id || `gallery-${idx}`,
1052
- similarity: item.relevance ? item.relevance / 100 : 0.5,
1053
- content: `[GALLERY] ${item.thumbnail || item.cot || 'No preview'}`,
1054
- }));
1055
- return formatHumanReadable('find_memory', humanReadableData, {
1056
- grey: true,
1057
- showSimilarity: true,
1058
- query: safeParams.query,
1059
- mode: 'gallery'
1060
- });
1105
+ const galleryOperation = async () => {
1106
+ const miniCOT = new MiniCOTProvider();
1107
+ // Prepare memories for gallery creation (send ENGLISH to CoT!)
1108
+ const memoriesForGallery = results.map(result => ({
1109
+ id: result.memory.id,
1110
+ keywords: result.memory.metadata?._semanticHints || result.memory.tags.join(', '),
1111
+ snippet: result.memory.content.slice(0, 300), // First 300 chars
1112
+ timestamp: result.memory.metadata?.timestamp, // When it was said
1113
+ role: result.memory.metadata?.role // Who said it (user/assistant)
1114
+ }));
1115
+ // Call Mini COT to create gallery (CoT analyzes in ENGLISH)
1116
+ const gallery = await miniCOT.createGallery(safeParams.query, memoriesForGallery);
1117
+ // ROUND-TRIP VERIFIED compression - compress CoT OUTPUT for token efficiency
1118
+ // Uses smartCompress: EN→Chinese→EN comparison, keeps English where context lost
1119
+ // MED-40 FIX: Add null check before compression to avoid undefined errors
1120
+ gallery.gallery = gallery.gallery.map(item => ({
1121
+ ...item,
1122
+ thumbnail: item.thumbnail ? smartCompress(item.thumbnail, { threshold: 0.75 }).result : '',
1123
+ cot: item.cot ? smartCompress(item.cot, { threshold: 0.75 }).result : ''
1124
+ }));
1125
+ logger.info({
1126
+ query: safeParams.query,
1127
+ galleryItems: gallery.gallery.length,
1128
+ researchedTerms: gallery.total_researched_terms
1129
+ }, 'Gallery created by Mini COT and compressed');
1130
+ // Always use humanReadable format
1131
+ const humanReadableData = gallery.gallery.map((item, idx) => ({
1132
+ id: item.id || `gallery-${idx}`,
1133
+ similarity: item.relevance ? item.relevance / 100 : 0.5,
1134
+ content: `[GALLERY] ${item.thumbnail || item.cot || 'No preview'}`,
1135
+ }));
1136
+ return formatHumanReadable('find_memory', humanReadableData, {
1137
+ grey: true,
1138
+ showSimilarity: true,
1139
+ query: safeParams.query,
1140
+ mode: 'gallery'
1141
+ });
1142
+ };
1143
+ const galleryResult = await Promise.race([
1144
+ galleryOperation(),
1145
+ new Promise((_, reject) => {
1146
+ galleryTimeoutId = setTimeout(() => reject(new Error(`Gallery mode timed out after ${GALLERY_TIMEOUT}ms`)), GALLERY_TIMEOUT);
1147
+ })
1148
+ ]);
1149
+ clearTimeout(galleryTimeoutId);
1150
+ return galleryResult;
1061
1151
  }
1062
1152
  catch (error) {
1063
- logger.error({ error, query: safeParams.query }, 'Mini COT gallery creation failed - falling back to normal results');
1064
- // Fall through to normal results on error
1153
+ clearTimeout(galleryTimeoutId);
1154
+ const isTimeout = error?.message?.includes('timed out');
1155
+ logger.error({ error: error?.message, query: safeParams.query, isTimeout, timeoutMs: GALLERY_TIMEOUT }, isTimeout ? 'Gallery mode timed out - falling back to normal results' : 'Mini COT gallery creation failed - falling back to normal results');
1156
+ // Fall through to normal results on error or timeout
1065
1157
  }
1066
1158
  }
1067
1159
  // ============================================================================
@@ -43,13 +43,22 @@ const CONFIG = {
43
43
  maxRetries: 3, // Max retry attempts before DLQ
44
44
  baseRetryDelayMs: 1000, // Base delay for exponential backoff (1s, 2s, 4s)
45
45
  maxRetryDelayMs: 30000, // Cap retry delay at 30s
46
- leaseTimeoutMs: 60000, // 60s lease - requeue if not completed
46
+ leaseTimeoutMs: parseInt(process.env['SPECMEM_QOMS_LEASE_TIMEOUT'] || '120000'), // 120s lease (was 60s) - configurable via env
47
47
  agePromotionMs: 30000, // Promote priority after 30s waiting
48
48
  // DLQ settings
49
- dlqMaxSize: 1000, // Max DLQ size (oldest evicted)
49
+ dlqMaxSize: parseInt(process.env['SPECMEM_QOMS_MAX_DLQ_SIZE'] || '500'), // Max DLQ size (oldest evicted) - Issue #8
50
50
  dlqRetentionMs: 3600000, // Keep DLQ items for 1 hour
51
51
  // Metrics cache
52
52
  metricsCacheMs: 500, // Cache metrics for 500ms
53
+ // Issue #5: Periodic lease expiry check interval (default 10s)
54
+ leaseCheckIntervalMs: parseInt(process.env['SPECMEM_QOMS_LEASE_CHECK_INTERVAL_MS'] || '10000'),
55
+ // Issue #8: Queue size limits (backpressure)
56
+ maxQueueSize: parseInt(process.env['SPECMEM_QOMS_MAX_QUEUE_SIZE'] || '1000'), // Total max across all priorities
57
+ maxHighQueue: parseInt(process.env['SPECMEM_QOMS_MAX_HIGH_QUEUE'] || '500'),
58
+ maxMediumQueue: parseInt(process.env['SPECMEM_QOMS_MAX_MEDIUM_QUEUE'] || '300'),
59
+ maxLowQueue: parseInt(process.env['SPECMEM_QOMS_MAX_LOW_QUEUE'] || '200'),
60
+ // Issue #8: Queue depth metrics logging interval (default 1min)
61
+ metricsIntervalMs: parseInt(process.env['SPECMEM_QOMS_METRICS_INTERVAL_MS'] || '60000'),
53
62
  };
54
63
  // ============================================================================
55
64
  // Types
@@ -91,6 +100,18 @@ let lastCpuInfo = null;
91
100
  let lastCpuTime = 0;
92
101
  // Operation ID counter
93
102
  let operationIdCounter = 0;
103
+ // Issue #5: Periodic lease check interval handle
104
+ let leaseCheckInterval = null;
105
+ // Issue #8: Periodic metrics logging interval handle
106
+ let metricsInterval = null;
107
+ // Issue #8: Per-priority max queue size map
108
+ const perPriorityMaxSize = new Map([
109
+ [Priority.CRITICAL, Infinity], // Critical operations are never rejected
110
+ [Priority.HIGH, CONFIG.maxHighQueue],
111
+ [Priority.MEDIUM, CONFIG.maxMediumQueue],
112
+ [Priority.LOW, CONFIG.maxLowQueue],
113
+ [Priority.IDLE, CONFIG.maxLowQueue], // IDLE shares LOW limit
114
+ ]);
94
115
  // ============================================================================
95
116
  // Utility Functions
96
117
  // ============================================================================
@@ -355,16 +376,37 @@ function nack(opId, error) {
355
376
  }
356
377
  /**
357
378
  * Check for lease timeouts and requeue expired items
379
+ * @param {boolean} periodic - Whether this was triggered by the periodic check (Issue #5)
358
380
  */
359
- function checkLeaseTimeouts() {
381
+ function checkLeaseTimeouts(periodic = false) {
360
382
  const now = Date.now();
383
+ let expiredCount = 0;
361
384
  for (const [opId, item] of processingItems.entries()) {
362
385
  if (item.leaseExpiresAt && now > item.leaseExpiresAt) {
363
- __debugLog('[QOMS DEBUG]', Date.now(), 'LEASE_TIMEOUT', { opId, expiredAgo: now - item.leaseExpiresAt });
386
+ const expiredAgoMs = now - item.leaseExpiresAt;
387
+ if (periodic) {
388
+ // Issue #5: Log with more detail when periodic check catches expired leases
389
+ logger.warn({
390
+ opId,
391
+ priority: Priority[item.priority],
392
+ expiredAgoMs,
393
+ enqueuedAt: item.enqueuedAt,
394
+ startedAt: item.startedAt,
395
+ retryCount: item.retryCount,
396
+ }, 'QOMS: Periodic lease check expired stale operation');
397
+ }
398
+ __debugLog('[QOMS DEBUG]', Date.now(), 'LEASE_TIMEOUT', {
399
+ opId,
400
+ expiredAgo: expiredAgoMs,
401
+ periodic,
402
+ priority: Priority[item.priority],
403
+ });
364
404
  // Treat as failure, trigger retry
365
405
  nack(opId, new Error('Lease timeout - operation took too long'));
406
+ expiredCount++;
366
407
  }
367
408
  }
409
+ return expiredCount;
368
410
  }
369
411
  // ============================================================================
370
412
  // Queue Processor
@@ -475,6 +517,116 @@ async function processQueue() {
475
517
  }
476
518
  }
477
519
  // ============================================================================
520
+ // Issue #5: Periodic Lease Expiry Check
521
+ // ============================================================================
522
+ /**
523
+ * Start periodic lease expiry check.
524
+ * Runs every SPECMEM_QOMS_LEASE_CHECK_INTERVAL_MS (default 10s).
525
+ * If expired leases are found and released, triggers queue processing
526
+ * so waiting items can take the freed slots.
527
+ */
528
+ function startPeriodicLeaseCheck() {
529
+ if (leaseCheckInterval) {
530
+ return; // Already running
531
+ }
532
+ const intervalMs = CONFIG.leaseCheckIntervalMs;
533
+ __debugLog('[QOMS DEBUG]', Date.now(), 'PERIODIC_LEASE_CHECK_START', { intervalMs });
534
+ leaseCheckInterval = setInterval(() => {
535
+ try {
536
+ const expiredCount = checkLeaseTimeouts(true);
537
+ if (expiredCount > 0) {
538
+ __debugLog('[QOMS DEBUG]', Date.now(), 'PERIODIC_LEASE_CHECK_EXPIRED', { expiredCount });
539
+ // Trigger queue processing to fill freed slots
540
+ processQueue().catch(err => {
541
+ logger.error({ error: err }, 'QOMS: queue processing error after periodic lease check');
542
+ });
543
+ }
544
+ }
545
+ catch (err) {
546
+ logger.error({ error: err }, 'QOMS: periodic lease check error');
547
+ }
548
+ }, intervalMs);
549
+ // Prevent the interval from keeping the process alive
550
+ if (leaseCheckInterval && typeof leaseCheckInterval.unref === 'function') {
551
+ leaseCheckInterval.unref();
552
+ }
553
+ }
554
+ // ============================================================================
555
+ // Issue #8: Periodic Queue Depth Metrics Logging
556
+ // ============================================================================
557
+ /**
558
+ * Start periodic queue depth metrics logging.
559
+ * Runs every SPECMEM_QOMS_METRICS_INTERVAL_MS (default 60s).
560
+ * Logs queue depths, processing count, DLQ size for monitoring.
561
+ */
562
+ function startMetricsLogging() {
563
+ if (metricsInterval) {
564
+ return; // Already running
565
+ }
566
+ const intervalMs = CONFIG.metricsIntervalMs;
567
+ __debugLog('[QOMS DEBUG]', Date.now(), 'METRICS_LOGGING_START', { intervalMs });
568
+ metricsInterval = setInterval(() => {
569
+ try {
570
+ const queueDepths = {};
571
+ let totalQueued = 0;
572
+ for (const [priority, queue] of priorityQueues.entries()) {
573
+ const name = Priority[priority];
574
+ queueDepths[name] = queue.length;
575
+ totalQueued += queue.length;
576
+ }
577
+ logger.info({
578
+ queueDepths,
579
+ totalQueued,
580
+ processing: processingItems.size,
581
+ dlqSize: dlq.length,
582
+ totalProcessed,
583
+ totalRetries,
584
+ maxQueueSize: CONFIG.maxQueueSize,
585
+ }, 'QOMS: queue depth metrics');
586
+ __debugLog('[QOMS DEBUG]', Date.now(), 'METRICS_LOG', {
587
+ queueDepths,
588
+ totalQueued,
589
+ processing: processingItems.size,
590
+ dlqSize: dlq.length,
591
+ });
592
+ }
593
+ catch (err) {
594
+ logger.error({ error: err }, 'QOMS: metrics logging error');
595
+ }
596
+ }, intervalMs);
597
+ // Prevent the interval from keeping the process alive
598
+ if (metricsInterval && typeof metricsInterval.unref === 'function') {
599
+ metricsInterval.unref();
600
+ }
601
+ }
602
+ // ============================================================================
603
+ // Issue #5 + #8: Cleanup / Destroy
604
+ // ============================================================================
605
+ /**
606
+ * Cleanup QOMS - clears all intervals and timers.
607
+ * Call this on shutdown to prevent resource leaks.
608
+ */
609
+ function cleanup() {
610
+ if (leaseCheckInterval) {
611
+ clearInterval(leaseCheckInterval);
612
+ leaseCheckInterval = null;
613
+ __debugLog('[QOMS DEBUG]', Date.now(), 'PERIODIC_LEASE_CHECK_STOPPED');
614
+ }
615
+ if (metricsInterval) {
616
+ clearInterval(metricsInterval);
617
+ metricsInterval = null;
618
+ __debugLog('[QOMS DEBUG]', Date.now(), 'METRICS_LOGGING_STOPPED');
619
+ }
620
+ logger.info('QOMS: cleanup complete - all intervals cleared');
621
+ }
622
+ // Alias for cleanup
623
+ const destroy = cleanup;
624
+ // ============================================================================
625
+ // Auto-start periodic checks
626
+ // ============================================================================
627
+ startPeriodicLeaseCheck();
628
+ startMetricsLogging();
629
+ // ============================================================================
478
630
  // Public API
479
631
  // ============================================================================
480
632
  /**
@@ -491,6 +643,26 @@ export async function enqueue(operation, priority = Priority.MEDIUM) {
491
643
  priority: Priority[priority],
492
644
  totalQueued: getTotalQueueLength()
493
645
  });
646
+ // Issue #8: Check queue size limits (backpressure) - skip for CRITICAL priority
647
+ if (priority !== Priority.CRITICAL) {
648
+ const totalQueued = getTotalQueueLength();
649
+ // Check total queue size limit
650
+ if (totalQueued >= CONFIG.maxQueueSize) {
651
+ const errMsg = `QOMS: Queue full (${totalQueued}/${CONFIG.maxQueueSize}). Rejecting operation ${opId} with priority ${Priority[priority]}. Configure SPECMEM_QOMS_MAX_QUEUE_SIZE to increase limit.`;
652
+ logger.warn({ opId, priority: Priority[priority], totalQueued, maxQueueSize: CONFIG.maxQueueSize }, errMsg);
653
+ __debugLog('[QOMS DEBUG]', Date.now(), 'QUEUE_FULL_REJECTED', { opId, totalQueued, maxQueueSize: CONFIG.maxQueueSize });
654
+ throw new Error(errMsg);
655
+ }
656
+ // Check per-priority queue size limit
657
+ const priorityQueue = priorityQueues.get(priority);
658
+ const maxForPriority = perPriorityMaxSize.get(priority) ?? CONFIG.maxQueueSize;
659
+ if (priorityQueue.length >= maxForPriority) {
660
+ const errMsg = `QOMS: ${Priority[priority]} queue full (${priorityQueue.length}/${maxForPriority}). Rejecting operation ${opId}. Configure SPECMEM_QOMS_MAX_${Priority[priority]}_QUEUE to increase limit.`;
661
+ logger.warn({ opId, priority: Priority[priority], queueLength: priorityQueue.length, maxForPriority }, errMsg);
662
+ __debugLog('[QOMS DEBUG]', Date.now(), 'PRIORITY_QUEUE_FULL_REJECTED', { opId, priority: Priority[priority], queueLength: priorityQueue.length, maxForPriority });
663
+ throw new Error(errMsg);
664
+ }
665
+ }
494
666
  // Check if we can execute immediately (empty queue, resources available)
495
667
  const queue = priorityQueues.get(priority);
496
668
  if (getTotalQueueLength() === 0 && processingItems.size === 0 && canExecute(priority, opId)) {
@@ -568,10 +740,19 @@ export function getQueueStats() {
568
740
  pendingRetries,
569
741
  totalRetries,
570
742
  dlqSize: dlq.length,
743
+ dlqMaxSize: CONFIG.dlqMaxSize,
571
744
  isProcessing,
572
745
  avgWaitTimeMs: totalProcessed > 0 ? totalWaitTimeMs / totalProcessed : 0,
573
746
  metrics: getSystemMetrics(),
574
747
  limits: CONFIG,
748
+ // Issue #8: Queue capacity info
749
+ queueCapacity: {
750
+ maxTotal: CONFIG.maxQueueSize,
751
+ maxHigh: CONFIG.maxHighQueue,
752
+ maxMedium: CONFIG.maxMediumQueue,
753
+ maxLow: CONFIG.maxLowQueue,
754
+ remainingTotal: CONFIG.maxQueueSize - getTotalQueueLength(),
755
+ },
575
756
  };
576
757
  }
577
758
  /**
@@ -644,6 +825,8 @@ export const qoms = {
644
825
  getDLQ,
645
826
  clearDLQ,
646
827
  retryDLQItem,
828
+ cleanup,
829
+ destroy,
647
830
  Priority,
648
831
  };
649
832
  export default qoms;
@@ -20,6 +20,32 @@ import { logger } from '../utils/logger.js';
20
20
  import { getCoordinator } from '../coordination/integration.js';
21
21
  import { isMinifiedOrBundled, isBinaryFile, EXCLUSION_CONFIG } from '../codebase/exclusions.js';
22
22
  import { getProjectPathForInsert } from '../services/ProjectContext.js';
23
+ import { getEmbeddingTimeout } from '../config/embeddingTimeouts.js';
24
+ // Retry helper for transient embedding failures (timeout, socket reset, etc.)
25
+ const WATCHER_MAX_RETRIES = parseInt(process.env['SPECMEM_WATCHER_RETRIES'] || '2');
26
+ async function withWatcherRetry(operation, filePath) {
27
+ let lastError = null;
28
+ for (let attempt = 0; attempt <= WATCHER_MAX_RETRIES; attempt++) {
29
+ try {
30
+ return await operation();
31
+ }
32
+ catch (error) {
33
+ lastError = error instanceof Error ? error : new Error(String(error));
34
+ const msg = lastError.message.toLowerCase();
35
+ const isTransient = msg.includes('timeout') || msg.includes('econnreset') ||
36
+ msg.includes('econnrefused') || msg.includes('socket') || msg.includes('qoms');
37
+ if (attempt < WATCHER_MAX_RETRIES && isTransient) {
38
+ const delay = Math.min(1000 * Math.pow(2, attempt), 8000);
39
+ logger.warn({ filePath, attempt: attempt + 1, retryInMs: delay, error: lastError.message }, `[Watcher] Embedding retry ${attempt + 1}/${WATCHER_MAX_RETRIES}`);
40
+ await new Promise(resolve => setTimeout(resolve, delay));
41
+ }
42
+ else {
43
+ break;
44
+ }
45
+ }
46
+ }
47
+ throw lastError;
48
+ }
23
49
  /**
24
50
  * autoUpdateTheMemories - main change handler class
25
51
  *
@@ -135,10 +161,22 @@ export class AutoUpdateTheMemories {
135
161
  this.stats.filesSkipped++;
136
162
  return;
137
163
  }
138
- // generate embedding with retry and queue fallback
164
+ // generate embedding with retry + timeout protection
139
165
  let embedding;
166
+ const WATCHER_EMBEDDING_TIMEOUT = getEmbeddingTimeout('fileWatcher');
140
167
  try {
141
- embedding = await this.config.embeddingProvider.generateEmbedding(content);
168
+ embedding = await withWatcherRetry(async () => {
169
+ return new Promise((resolve, reject) => {
170
+ const timeoutId = setTimeout(() => {
171
+ const err = new Error(`[Watcher] Embedding generation timed out after ${Math.round(WATCHER_EMBEDDING_TIMEOUT / 1000)}s for ${metadata.relativePath}`);
172
+ err.code = 'WATCHER_EMBEDDING_TIMEOUT';
173
+ reject(err);
174
+ }, WATCHER_EMBEDDING_TIMEOUT);
175
+ this.config.embeddingProvider.generateEmbedding(content)
176
+ .then(result => { clearTimeout(timeoutId); resolve(result); })
177
+ .catch(error => { clearTimeout(timeoutId); reject(error); });
178
+ });
179
+ }, metadata.relativePath);
142
180
  }
143
181
  catch (embeddingError) {
144
182
  logger.error({
@@ -236,10 +274,22 @@ export class AutoUpdateTheMemories {
236
274
  this.stats.filesSkipped++;
237
275
  return;
238
276
  }
239
- // generate new embedding with retry and queue fallback
277
+ // generate new embedding with retry and queue fallback + timeout protection
240
278
  let embedding;
279
+ const WATCHER_EMBEDDING_TIMEOUT_MOD = getEmbeddingTimeout('fileWatcher');
241
280
  try {
242
- embedding = await this.config.embeddingProvider.generateEmbedding(content);
281
+ embedding = await withWatcherRetry(async () => {
282
+ return new Promise((resolve, reject) => {
283
+ const timeoutId = setTimeout(() => {
284
+ const err = new Error(`[Watcher] Embedding generation timed out after ${Math.round(WATCHER_EMBEDDING_TIMEOUT_MOD / 1000)}s for ${metadata.relativePath}`);
285
+ err.code = 'WATCHER_EMBEDDING_TIMEOUT';
286
+ reject(err);
287
+ }, WATCHER_EMBEDDING_TIMEOUT_MOD);
288
+ this.config.embeddingProvider.generateEmbedding(content)
289
+ .then(result => { clearTimeout(timeoutId); resolve(result); })
290
+ .catch(error => { clearTimeout(timeoutId); reject(error); });
291
+ });
292
+ }, metadata.relativePath);
243
293
  }
244
294
  catch (embeddingError) {
245
295
  logger.error({