specmem-hardwicksoftware 3.5.99 → 3.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/specmem-statusbar.cjs +154 -298
- package/claude-hooks/agent-loading-hook.js +8 -4
- package/claude-hooks/team-comms-enforcer.cjs +109 -92
- package/dist/config/embeddingTimeouts.js +4 -4
- package/dist/database.js +52 -6
- package/dist/db/bigBrainMigrations.js +7 -6
- package/dist/db/memoryDrilldown.sql +1 -1
- package/dist/db/projectSchemaInit.sql +21 -0
- package/dist/index.js +238 -13
- package/dist/installer/firstRun.js +2 -2
- package/dist/mcp/embeddingServerManager.js +225 -7
- package/dist/mcp/healthMonitor.js +165 -32
- package/dist/mcp/tools/embeddingControl.js +31 -0
- package/dist/mcp/tools/teamComms.js +16 -0
- package/dist/mcp/watcherIntegration.js +50 -7
- package/dist/services/CameraZoomSearch.js +62 -5
- package/dist/services/DimensionService.js +73 -6
- package/dist/services/EmbeddingQueue.js +64 -0
- package/dist/services/MemoryDrilldown.js +19 -12
- package/dist/tools/goofy/findCodePointers.js +11 -7
- package/dist/tools/goofy/findWhatISaid.js +145 -53
- package/dist/utils/qoms.js +187 -4
- package/dist/watcher/changeHandler.js +54 -4
- package/dist/watcher/fileWatcher.js +121 -1
- package/dist/watcher/index.js +75 -31
- package/dist/watcher/syncChecker.js +248 -63
- package/embedding-sandbox/__pycache__/frankenstein-embeddings.cpython-313.pyc +0 -0
- package/embedding-sandbox/frankenstein-embeddings.py +175 -64
- package/package.json +1 -1
|
@@ -25,6 +25,38 @@ import { cotStart, cotResult, cotError } from '../../utils/cotBroadcast.js';
|
|
|
25
25
|
const __debugLog = process.env['SPECMEM_DEBUG'] === '1'
|
|
26
26
|
? (...args) => console.error('[DEBUG]', ...args) // stderr, not stdout!
|
|
27
27
|
: () => { };
|
|
28
|
+
// ============================================================================
|
|
29
|
+
// RETRY HELPER for find_memory embedding generation
|
|
30
|
+
// ============================================================================
|
|
31
|
+
const FIND_MEMORY_MAX_RETRIES = parseInt(process.env['SPECMEM_FIND_MEMORY_RETRIES'] || '2');
|
|
32
|
+
function isTransientEmbeddingError(error) {
|
|
33
|
+
if (!(error instanceof Error)) return false;
|
|
34
|
+
const msg = error.message.toLowerCase();
|
|
35
|
+
return (msg.includes('timeout') || msg.includes('econnreset') ||
|
|
36
|
+
msg.includes('econnrefused') || msg.includes('socket hang up') ||
|
|
37
|
+
msg.includes('aborted') || msg.includes('etimedout') ||
|
|
38
|
+
msg.includes('qoms') || msg.includes('resource') || msg.includes('busy'));
|
|
39
|
+
}
|
|
40
|
+
async function withEmbeddingRetry(operation, operationName, maxRetries = FIND_MEMORY_MAX_RETRIES) {
|
|
41
|
+
let lastError = null;
|
|
42
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
43
|
+
try {
|
|
44
|
+
return await operation();
|
|
45
|
+
}
|
|
46
|
+
catch (error) {
|
|
47
|
+
lastError = error instanceof Error ? error : new Error(String(error));
|
|
48
|
+
if (attempt < maxRetries && isTransientEmbeddingError(error)) {
|
|
49
|
+
const delay = Math.min(1000 * Math.pow(2, attempt), 8000);
|
|
50
|
+
logger.warn({ operationName, attempt: attempt + 1, maxRetries: maxRetries + 1, error: lastError.message, retryInMs: delay }, `[find_memory] ${operationName} failed, retrying in ${delay}ms`);
|
|
51
|
+
await new Promise(resolve => setTimeout(resolve, delay));
|
|
52
|
+
}
|
|
53
|
+
else {
|
|
54
|
+
break;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
throw lastError;
|
|
59
|
+
}
|
|
28
60
|
/**
|
|
29
61
|
* Extract discoverable paths from memory content
|
|
30
62
|
* This is the KEY to getting lots of info from few memories
|
|
@@ -744,23 +776,35 @@ export class FindWhatISaid {
|
|
|
744
776
|
socketPath,
|
|
745
777
|
query: safeParams.query?.slice(0, 50)
|
|
746
778
|
});
|
|
747
|
-
const embeddingPromise = this.embeddingProvider.generateEmbedding(safeParams.query);
|
|
748
|
-
const timeoutPromise = new Promise((_, reject) => {
|
|
749
|
-
setTimeout(() => {
|
|
750
|
-
const timeoutError = new Error(`Embedding generation timeout after ${formatTimeout(EMBEDDING_TIMEOUT_MS)}. ` +
|
|
751
|
-
`Socket: ${socketPath}. ` +
|
|
752
|
-
`Set SPECMEM_EMBEDDING_TIMEOUT env var to increase timeout.`);
|
|
753
|
-
timeoutError.socketPath = socketPath;
|
|
754
|
-
timeoutError.code = 'EMBEDDING_TIMEOUT';
|
|
755
|
-
reject(timeoutError);
|
|
756
|
-
}, EMBEDDING_TIMEOUT_MS);
|
|
757
|
-
});
|
|
758
779
|
let rawEmbedding;
|
|
759
780
|
try {
|
|
760
781
|
__debugLog('[FIND_MEMORY DEBUG]', Date.now(), 'AWAITING_EMBEDDING_PROMISE', {
|
|
761
782
|
elapsedMs: Date.now() - startTime
|
|
762
783
|
});
|
|
763
|
-
|
|
784
|
+
// Retry wrapper: retries transient failures (timeouts, socket errors) with exponential backoff
|
|
785
|
+
rawEmbedding = await withEmbeddingRetry(async () => {
|
|
786
|
+
const embeddingPromise = this.embeddingProvider.generateEmbedding(safeParams.query);
|
|
787
|
+
let embeddingTimeoutId;
|
|
788
|
+
const timeoutPromise = new Promise((_, reject) => {
|
|
789
|
+
embeddingTimeoutId = setTimeout(() => {
|
|
790
|
+
const timeoutError = new Error(`Embedding generation timeout after ${formatTimeout(EMBEDDING_TIMEOUT_MS)}. ` +
|
|
791
|
+
`Socket: ${socketPath}. ` +
|
|
792
|
+
`Set SPECMEM_EMBEDDING_TIMEOUT env var to increase timeout.`);
|
|
793
|
+
timeoutError.socketPath = socketPath;
|
|
794
|
+
timeoutError.code = 'EMBEDDING_TIMEOUT';
|
|
795
|
+
reject(timeoutError);
|
|
796
|
+
}, EMBEDDING_TIMEOUT_MS);
|
|
797
|
+
});
|
|
798
|
+
try {
|
|
799
|
+
const result = await Promise.race([embeddingPromise, timeoutPromise]);
|
|
800
|
+
clearTimeout(embeddingTimeoutId);
|
|
801
|
+
return result;
|
|
802
|
+
}
|
|
803
|
+
catch (err) {
|
|
804
|
+
clearTimeout(embeddingTimeoutId);
|
|
805
|
+
throw err;
|
|
806
|
+
}
|
|
807
|
+
}, 'Embedding generation');
|
|
764
808
|
const embeddingDuration = Date.now() - embeddingStartTime;
|
|
765
809
|
// ============================================================================
|
|
766
810
|
// DEEP DEBUG: After Embedding Generation (Success)
|
|
@@ -777,6 +821,7 @@ export class FindWhatISaid {
|
|
|
777
821
|
});
|
|
778
822
|
}
|
|
779
823
|
catch (embeddingError) {
|
|
824
|
+
clearTimeout(embeddingTimeoutId); // Prevent dangling timer on error path
|
|
780
825
|
const embeddingDuration = Date.now() - embeddingStartTime;
|
|
781
826
|
const err = embeddingError;
|
|
782
827
|
// ============================================================================
|
|
@@ -849,8 +894,9 @@ export class FindWhatISaid {
|
|
|
849
894
|
...safeParams,
|
|
850
895
|
dateRange
|
|
851
896
|
}, queryEmbedding);
|
|
897
|
+
let searchTimeoutId;
|
|
852
898
|
const searchTimeoutPromise = new Promise((_, reject) => {
|
|
853
|
-
setTimeout(() => {
|
|
899
|
+
searchTimeoutId = setTimeout(() => {
|
|
854
900
|
const timeoutError = new Error(`Search timeout after ${formatTimeout(SEARCH_TIMEOUT_MS)}. ` +
|
|
855
901
|
`Query: "${safeParams.query.slice(0, 50)}...". ` +
|
|
856
902
|
`Set SPECMEM_EMBEDDING_TIMEOUT env var to increase timeout.`);
|
|
@@ -864,6 +910,7 @@ export class FindWhatISaid {
|
|
|
864
910
|
elapsedMs: Date.now() - startTime
|
|
865
911
|
});
|
|
866
912
|
results = await Promise.race([searchPromise, searchTimeoutPromise]);
|
|
913
|
+
clearTimeout(searchTimeoutId);
|
|
867
914
|
const searchDuration = Date.now() - searchStartTime;
|
|
868
915
|
// ============================================================================
|
|
869
916
|
// DEEP DEBUG: After Database Query (Success)
|
|
@@ -881,6 +928,7 @@ export class FindWhatISaid {
|
|
|
881
928
|
});
|
|
882
929
|
}
|
|
883
930
|
catch (searchError) {
|
|
931
|
+
clearTimeout(searchTimeoutId); // Prevent dangling timer on error path
|
|
884
932
|
const searchDuration = Date.now() - searchStartTime;
|
|
885
933
|
const err = searchError;
|
|
886
934
|
// ============================================================================
|
|
@@ -946,12 +994,42 @@ export class FindWhatISaid {
|
|
|
946
994
|
semanticResults: results.length,
|
|
947
995
|
topSimilarity: results[0]?.similarity
|
|
948
996
|
}, '[I5 FIX] Low/no semantic results, triggering keyword fallback');
|
|
949
|
-
|
|
997
|
+
const KEYWORD_FALLBACK_TIMEOUT = parseInt(process.env['SPECMEM_KEYWORD_FALLBACK_TIMEOUT_MS'] || '30000');
|
|
998
|
+
let keywordTimeoutId;
|
|
999
|
+
try {
|
|
1000
|
+
keywordResults = await Promise.race([
|
|
1001
|
+
this.keywordSearch(safeParams.query, safeParams),
|
|
1002
|
+
new Promise((_, reject) => {
|
|
1003
|
+
keywordTimeoutId = setTimeout(() => reject(new Error(`Keyword fallback timed out after ${KEYWORD_FALLBACK_TIMEOUT}ms`)), KEYWORD_FALLBACK_TIMEOUT);
|
|
1004
|
+
})
|
|
1005
|
+
]);
|
|
1006
|
+
clearTimeout(keywordTimeoutId);
|
|
1007
|
+
}
|
|
1008
|
+
catch (err) {
|
|
1009
|
+
clearTimeout(keywordTimeoutId);
|
|
1010
|
+
logger.warn({ error: err?.message, timeoutMs: KEYWORD_FALLBACK_TIMEOUT, query: safeParams.query }, '[I5 FIX] Keyword fallback timed out or failed - continuing with semantic results only');
|
|
1011
|
+
keywordResults = [];
|
|
1012
|
+
}
|
|
950
1013
|
}
|
|
951
1014
|
// I5 FIX: Get recent memories if requested
|
|
952
1015
|
let recentResults = [];
|
|
953
1016
|
if (includeRecentCount > 0) {
|
|
954
|
-
|
|
1017
|
+
const RECENT_LOOKUP_TIMEOUT = parseInt(process.env['SPECMEM_RECENT_LOOKUP_TIMEOUT_MS'] || '15000');
|
|
1018
|
+
let recentTimeoutId;
|
|
1019
|
+
try {
|
|
1020
|
+
recentResults = await Promise.race([
|
|
1021
|
+
this.getRecentMemories(includeRecentCount, safeParams),
|
|
1022
|
+
new Promise((_, reject) => {
|
|
1023
|
+
recentTimeoutId = setTimeout(() => reject(new Error(`Recent memories lookup timed out after ${RECENT_LOOKUP_TIMEOUT}ms`)), RECENT_LOOKUP_TIMEOUT);
|
|
1024
|
+
})
|
|
1025
|
+
]);
|
|
1026
|
+
clearTimeout(recentTimeoutId);
|
|
1027
|
+
}
|
|
1028
|
+
catch (err) {
|
|
1029
|
+
clearTimeout(recentTimeoutId);
|
|
1030
|
+
logger.warn({ error: err?.message, timeoutMs: RECENT_LOOKUP_TIMEOUT, includeRecentCount }, '[I5 FIX] Recent memories lookup timed out or failed - continuing without recent results');
|
|
1031
|
+
recentResults = [];
|
|
1032
|
+
}
|
|
955
1033
|
logger.info({
|
|
956
1034
|
recentRequested: includeRecentCount,
|
|
957
1035
|
recentFound: recentResults.length
|
|
@@ -1021,47 +1099,61 @@ export class FindWhatISaid {
|
|
|
1021
1099
|
// ============================================================================
|
|
1022
1100
|
if (safeParams.galleryMode === true) {
|
|
1023
1101
|
logger.info({ query: safeParams.query, resultCount: results.length }, 'Gallery mode enabled - sending to Mini COT');
|
|
1102
|
+
const GALLERY_TIMEOUT = parseInt(process.env['SPECMEM_GALLERY_TIMEOUT_MS'] || '60000');
|
|
1103
|
+
let galleryTimeoutId;
|
|
1024
1104
|
try {
|
|
1025
|
-
const
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1105
|
+
const galleryOperation = async () => {
|
|
1106
|
+
const miniCOT = new MiniCOTProvider();
|
|
1107
|
+
// Prepare memories for gallery creation (send ENGLISH to CoT!)
|
|
1108
|
+
const memoriesForGallery = results.map(result => ({
|
|
1109
|
+
id: result.memory.id,
|
|
1110
|
+
keywords: result.memory.metadata?._semanticHints || result.memory.tags.join(', '),
|
|
1111
|
+
snippet: result.memory.content.slice(0, 300), // First 300 chars
|
|
1112
|
+
timestamp: result.memory.metadata?.timestamp, // When it was said
|
|
1113
|
+
role: result.memory.metadata?.role // Who said it (user/assistant)
|
|
1114
|
+
}));
|
|
1115
|
+
// Call Mini COT to create gallery (CoT analyzes in ENGLISH)
|
|
1116
|
+
const gallery = await miniCOT.createGallery(safeParams.query, memoriesForGallery);
|
|
1117
|
+
// ROUND-TRIP VERIFIED compression - compress CoT OUTPUT for token efficiency
|
|
1118
|
+
// Uses smartCompress: EN→Chinese→EN comparison, keeps English where context lost
|
|
1119
|
+
// MED-40 FIX: Add null check before compression to avoid undefined errors
|
|
1120
|
+
gallery.gallery = gallery.gallery.map(item => ({
|
|
1121
|
+
...item,
|
|
1122
|
+
thumbnail: item.thumbnail ? smartCompress(item.thumbnail, { threshold: 0.75 }).result : '',
|
|
1123
|
+
cot: item.cot ? smartCompress(item.cot, { threshold: 0.75 }).result : ''
|
|
1124
|
+
}));
|
|
1125
|
+
logger.info({
|
|
1126
|
+
query: safeParams.query,
|
|
1127
|
+
galleryItems: gallery.gallery.length,
|
|
1128
|
+
researchedTerms: gallery.total_researched_terms
|
|
1129
|
+
}, 'Gallery created by Mini COT and compressed');
|
|
1130
|
+
// Always use humanReadable format
|
|
1131
|
+
const humanReadableData = gallery.gallery.map((item, idx) => ({
|
|
1132
|
+
id: item.id || `gallery-${idx}`,
|
|
1133
|
+
similarity: item.relevance ? item.relevance / 100 : 0.5,
|
|
1134
|
+
content: `[GALLERY] ${item.thumbnail || item.cot || 'No preview'}`,
|
|
1135
|
+
}));
|
|
1136
|
+
return formatHumanReadable('find_memory', humanReadableData, {
|
|
1137
|
+
grey: true,
|
|
1138
|
+
showSimilarity: true,
|
|
1139
|
+
query: safeParams.query,
|
|
1140
|
+
mode: 'gallery'
|
|
1141
|
+
});
|
|
1142
|
+
};
|
|
1143
|
+
const galleryResult = await Promise.race([
|
|
1144
|
+
galleryOperation(),
|
|
1145
|
+
new Promise((_, reject) => {
|
|
1146
|
+
galleryTimeoutId = setTimeout(() => reject(new Error(`Gallery mode timed out after ${GALLERY_TIMEOUT}ms`)), GALLERY_TIMEOUT);
|
|
1147
|
+
})
|
|
1148
|
+
]);
|
|
1149
|
+
clearTimeout(galleryTimeoutId);
|
|
1150
|
+
return galleryResult;
|
|
1061
1151
|
}
|
|
1062
1152
|
catch (error) {
|
|
1063
|
-
|
|
1064
|
-
|
|
1153
|
+
clearTimeout(galleryTimeoutId);
|
|
1154
|
+
const isTimeout = error?.message?.includes('timed out');
|
|
1155
|
+
logger.error({ error: error?.message, query: safeParams.query, isTimeout, timeoutMs: GALLERY_TIMEOUT }, isTimeout ? 'Gallery mode timed out - falling back to normal results' : 'Mini COT gallery creation failed - falling back to normal results');
|
|
1156
|
+
// Fall through to normal results on error or timeout
|
|
1065
1157
|
}
|
|
1066
1158
|
}
|
|
1067
1159
|
// ============================================================================
|
package/dist/utils/qoms.js
CHANGED
|
@@ -43,13 +43,22 @@ const CONFIG = {
|
|
|
43
43
|
maxRetries: 3, // Max retry attempts before DLQ
|
|
44
44
|
baseRetryDelayMs: 1000, // Base delay for exponential backoff (1s, 2s, 4s)
|
|
45
45
|
maxRetryDelayMs: 30000, // Cap retry delay at 30s
|
|
46
|
-
leaseTimeoutMs:
|
|
46
|
+
leaseTimeoutMs: parseInt(process.env['SPECMEM_QOMS_LEASE_TIMEOUT'] || '120000'), // 120s lease (was 60s) - configurable via env
|
|
47
47
|
agePromotionMs: 30000, // Promote priority after 30s waiting
|
|
48
48
|
// DLQ settings
|
|
49
|
-
dlqMaxSize:
|
|
49
|
+
dlqMaxSize: parseInt(process.env['SPECMEM_QOMS_MAX_DLQ_SIZE'] || '500'), // Max DLQ size (oldest evicted) - Issue #8
|
|
50
50
|
dlqRetentionMs: 3600000, // Keep DLQ items for 1 hour
|
|
51
51
|
// Metrics cache
|
|
52
52
|
metricsCacheMs: 500, // Cache metrics for 500ms
|
|
53
|
+
// Issue #5: Periodic lease expiry check interval (default 10s)
|
|
54
|
+
leaseCheckIntervalMs: parseInt(process.env['SPECMEM_QOMS_LEASE_CHECK_INTERVAL_MS'] || '10000'),
|
|
55
|
+
// Issue #8: Queue size limits (backpressure)
|
|
56
|
+
maxQueueSize: parseInt(process.env['SPECMEM_QOMS_MAX_QUEUE_SIZE'] || '1000'), // Total max across all priorities
|
|
57
|
+
maxHighQueue: parseInt(process.env['SPECMEM_QOMS_MAX_HIGH_QUEUE'] || '500'),
|
|
58
|
+
maxMediumQueue: parseInt(process.env['SPECMEM_QOMS_MAX_MEDIUM_QUEUE'] || '300'),
|
|
59
|
+
maxLowQueue: parseInt(process.env['SPECMEM_QOMS_MAX_LOW_QUEUE'] || '200'),
|
|
60
|
+
// Issue #8: Queue depth metrics logging interval (default 1min)
|
|
61
|
+
metricsIntervalMs: parseInt(process.env['SPECMEM_QOMS_METRICS_INTERVAL_MS'] || '60000'),
|
|
53
62
|
};
|
|
54
63
|
// ============================================================================
|
|
55
64
|
// Types
|
|
@@ -91,6 +100,18 @@ let lastCpuInfo = null;
|
|
|
91
100
|
let lastCpuTime = 0;
|
|
92
101
|
// Operation ID counter
|
|
93
102
|
let operationIdCounter = 0;
|
|
103
|
+
// Issue #5: Periodic lease check interval handle
|
|
104
|
+
let leaseCheckInterval = null;
|
|
105
|
+
// Issue #8: Periodic metrics logging interval handle
|
|
106
|
+
let metricsInterval = null;
|
|
107
|
+
// Issue #8: Per-priority max queue size map
|
|
108
|
+
const perPriorityMaxSize = new Map([
|
|
109
|
+
[Priority.CRITICAL, Infinity], // Critical operations are never rejected
|
|
110
|
+
[Priority.HIGH, CONFIG.maxHighQueue],
|
|
111
|
+
[Priority.MEDIUM, CONFIG.maxMediumQueue],
|
|
112
|
+
[Priority.LOW, CONFIG.maxLowQueue],
|
|
113
|
+
[Priority.IDLE, CONFIG.maxLowQueue], // IDLE shares LOW limit
|
|
114
|
+
]);
|
|
94
115
|
// ============================================================================
|
|
95
116
|
// Utility Functions
|
|
96
117
|
// ============================================================================
|
|
@@ -355,16 +376,37 @@ function nack(opId, error) {
|
|
|
355
376
|
}
|
|
356
377
|
/**
|
|
357
378
|
* Check for lease timeouts and requeue expired items
|
|
379
|
+
* @param {boolean} periodic - Whether this was triggered by the periodic check (Issue #5)
|
|
358
380
|
*/
|
|
359
|
-
function checkLeaseTimeouts() {
|
|
381
|
+
function checkLeaseTimeouts(periodic = false) {
|
|
360
382
|
const now = Date.now();
|
|
383
|
+
let expiredCount = 0;
|
|
361
384
|
for (const [opId, item] of processingItems.entries()) {
|
|
362
385
|
if (item.leaseExpiresAt && now > item.leaseExpiresAt) {
|
|
363
|
-
|
|
386
|
+
const expiredAgoMs = now - item.leaseExpiresAt;
|
|
387
|
+
if (periodic) {
|
|
388
|
+
// Issue #5: Log with more detail when periodic check catches expired leases
|
|
389
|
+
logger.warn({
|
|
390
|
+
opId,
|
|
391
|
+
priority: Priority[item.priority],
|
|
392
|
+
expiredAgoMs,
|
|
393
|
+
enqueuedAt: item.enqueuedAt,
|
|
394
|
+
startedAt: item.startedAt,
|
|
395
|
+
retryCount: item.retryCount,
|
|
396
|
+
}, 'QOMS: Periodic lease check expired stale operation');
|
|
397
|
+
}
|
|
398
|
+
__debugLog('[QOMS DEBUG]', Date.now(), 'LEASE_TIMEOUT', {
|
|
399
|
+
opId,
|
|
400
|
+
expiredAgo: expiredAgoMs,
|
|
401
|
+
periodic,
|
|
402
|
+
priority: Priority[item.priority],
|
|
403
|
+
});
|
|
364
404
|
// Treat as failure, trigger retry
|
|
365
405
|
nack(opId, new Error('Lease timeout - operation took too long'));
|
|
406
|
+
expiredCount++;
|
|
366
407
|
}
|
|
367
408
|
}
|
|
409
|
+
return expiredCount;
|
|
368
410
|
}
|
|
369
411
|
// ============================================================================
|
|
370
412
|
// Queue Processor
|
|
@@ -475,6 +517,116 @@ async function processQueue() {
|
|
|
475
517
|
}
|
|
476
518
|
}
|
|
477
519
|
// ============================================================================
|
|
520
|
+
// Issue #5: Periodic Lease Expiry Check
|
|
521
|
+
// ============================================================================
|
|
522
|
+
/**
|
|
523
|
+
* Start periodic lease expiry check.
|
|
524
|
+
* Runs every SPECMEM_QOMS_LEASE_CHECK_INTERVAL_MS (default 10s).
|
|
525
|
+
* If expired leases are found and released, triggers queue processing
|
|
526
|
+
* so waiting items can take the freed slots.
|
|
527
|
+
*/
|
|
528
|
+
function startPeriodicLeaseCheck() {
|
|
529
|
+
if (leaseCheckInterval) {
|
|
530
|
+
return; // Already running
|
|
531
|
+
}
|
|
532
|
+
const intervalMs = CONFIG.leaseCheckIntervalMs;
|
|
533
|
+
__debugLog('[QOMS DEBUG]', Date.now(), 'PERIODIC_LEASE_CHECK_START', { intervalMs });
|
|
534
|
+
leaseCheckInterval = setInterval(() => {
|
|
535
|
+
try {
|
|
536
|
+
const expiredCount = checkLeaseTimeouts(true);
|
|
537
|
+
if (expiredCount > 0) {
|
|
538
|
+
__debugLog('[QOMS DEBUG]', Date.now(), 'PERIODIC_LEASE_CHECK_EXPIRED', { expiredCount });
|
|
539
|
+
// Trigger queue processing to fill freed slots
|
|
540
|
+
processQueue().catch(err => {
|
|
541
|
+
logger.error({ error: err }, 'QOMS: queue processing error after periodic lease check');
|
|
542
|
+
});
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
catch (err) {
|
|
546
|
+
logger.error({ error: err }, 'QOMS: periodic lease check error');
|
|
547
|
+
}
|
|
548
|
+
}, intervalMs);
|
|
549
|
+
// Prevent the interval from keeping the process alive
|
|
550
|
+
if (leaseCheckInterval && typeof leaseCheckInterval.unref === 'function') {
|
|
551
|
+
leaseCheckInterval.unref();
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
// ============================================================================
|
|
555
|
+
// Issue #8: Periodic Queue Depth Metrics Logging
|
|
556
|
+
// ============================================================================
|
|
557
|
+
/**
|
|
558
|
+
* Start periodic queue depth metrics logging.
|
|
559
|
+
* Runs every SPECMEM_QOMS_METRICS_INTERVAL_MS (default 60s).
|
|
560
|
+
* Logs queue depths, processing count, DLQ size for monitoring.
|
|
561
|
+
*/
|
|
562
|
+
function startMetricsLogging() {
|
|
563
|
+
if (metricsInterval) {
|
|
564
|
+
return; // Already running
|
|
565
|
+
}
|
|
566
|
+
const intervalMs = CONFIG.metricsIntervalMs;
|
|
567
|
+
__debugLog('[QOMS DEBUG]', Date.now(), 'METRICS_LOGGING_START', { intervalMs });
|
|
568
|
+
metricsInterval = setInterval(() => {
|
|
569
|
+
try {
|
|
570
|
+
const queueDepths = {};
|
|
571
|
+
let totalQueued = 0;
|
|
572
|
+
for (const [priority, queue] of priorityQueues.entries()) {
|
|
573
|
+
const name = Priority[priority];
|
|
574
|
+
queueDepths[name] = queue.length;
|
|
575
|
+
totalQueued += queue.length;
|
|
576
|
+
}
|
|
577
|
+
logger.info({
|
|
578
|
+
queueDepths,
|
|
579
|
+
totalQueued,
|
|
580
|
+
processing: processingItems.size,
|
|
581
|
+
dlqSize: dlq.length,
|
|
582
|
+
totalProcessed,
|
|
583
|
+
totalRetries,
|
|
584
|
+
maxQueueSize: CONFIG.maxQueueSize,
|
|
585
|
+
}, 'QOMS: queue depth metrics');
|
|
586
|
+
__debugLog('[QOMS DEBUG]', Date.now(), 'METRICS_LOG', {
|
|
587
|
+
queueDepths,
|
|
588
|
+
totalQueued,
|
|
589
|
+
processing: processingItems.size,
|
|
590
|
+
dlqSize: dlq.length,
|
|
591
|
+
});
|
|
592
|
+
}
|
|
593
|
+
catch (err) {
|
|
594
|
+
logger.error({ error: err }, 'QOMS: metrics logging error');
|
|
595
|
+
}
|
|
596
|
+
}, intervalMs);
|
|
597
|
+
// Prevent the interval from keeping the process alive
|
|
598
|
+
if (metricsInterval && typeof metricsInterval.unref === 'function') {
|
|
599
|
+
metricsInterval.unref();
|
|
600
|
+
}
|
|
601
|
+
}
|
|
602
|
+
// ============================================================================
|
|
603
|
+
// Issue #5 + #8: Cleanup / Destroy
|
|
604
|
+
// ============================================================================
|
|
605
|
+
/**
|
|
606
|
+
* Cleanup QOMS - clears all intervals and timers.
|
|
607
|
+
* Call this on shutdown to prevent resource leaks.
|
|
608
|
+
*/
|
|
609
|
+
function cleanup() {
|
|
610
|
+
if (leaseCheckInterval) {
|
|
611
|
+
clearInterval(leaseCheckInterval);
|
|
612
|
+
leaseCheckInterval = null;
|
|
613
|
+
__debugLog('[QOMS DEBUG]', Date.now(), 'PERIODIC_LEASE_CHECK_STOPPED');
|
|
614
|
+
}
|
|
615
|
+
if (metricsInterval) {
|
|
616
|
+
clearInterval(metricsInterval);
|
|
617
|
+
metricsInterval = null;
|
|
618
|
+
__debugLog('[QOMS DEBUG]', Date.now(), 'METRICS_LOGGING_STOPPED');
|
|
619
|
+
}
|
|
620
|
+
logger.info('QOMS: cleanup complete - all intervals cleared');
|
|
621
|
+
}
|
|
622
|
+
// Alias for cleanup
|
|
623
|
+
const destroy = cleanup;
|
|
624
|
+
// ============================================================================
|
|
625
|
+
// Auto-start periodic checks
|
|
626
|
+
// ============================================================================
|
|
627
|
+
startPeriodicLeaseCheck();
|
|
628
|
+
startMetricsLogging();
|
|
629
|
+
// ============================================================================
|
|
478
630
|
// Public API
|
|
479
631
|
// ============================================================================
|
|
480
632
|
/**
|
|
@@ -491,6 +643,26 @@ export async function enqueue(operation, priority = Priority.MEDIUM) {
|
|
|
491
643
|
priority: Priority[priority],
|
|
492
644
|
totalQueued: getTotalQueueLength()
|
|
493
645
|
});
|
|
646
|
+
// Issue #8: Check queue size limits (backpressure) - skip for CRITICAL priority
|
|
647
|
+
if (priority !== Priority.CRITICAL) {
|
|
648
|
+
const totalQueued = getTotalQueueLength();
|
|
649
|
+
// Check total queue size limit
|
|
650
|
+
if (totalQueued >= CONFIG.maxQueueSize) {
|
|
651
|
+
const errMsg = `QOMS: Queue full (${totalQueued}/${CONFIG.maxQueueSize}). Rejecting operation ${opId} with priority ${Priority[priority]}. Configure SPECMEM_QOMS_MAX_QUEUE_SIZE to increase limit.`;
|
|
652
|
+
logger.warn({ opId, priority: Priority[priority], totalQueued, maxQueueSize: CONFIG.maxQueueSize }, errMsg);
|
|
653
|
+
__debugLog('[QOMS DEBUG]', Date.now(), 'QUEUE_FULL_REJECTED', { opId, totalQueued, maxQueueSize: CONFIG.maxQueueSize });
|
|
654
|
+
throw new Error(errMsg);
|
|
655
|
+
}
|
|
656
|
+
// Check per-priority queue size limit
|
|
657
|
+
const priorityQueue = priorityQueues.get(priority);
|
|
658
|
+
const maxForPriority = perPriorityMaxSize.get(priority) ?? CONFIG.maxQueueSize;
|
|
659
|
+
if (priorityQueue.length >= maxForPriority) {
|
|
660
|
+
const errMsg = `QOMS: ${Priority[priority]} queue full (${priorityQueue.length}/${maxForPriority}). Rejecting operation ${opId}. Configure SPECMEM_QOMS_MAX_${Priority[priority]}_QUEUE to increase limit.`;
|
|
661
|
+
logger.warn({ opId, priority: Priority[priority], queueLength: priorityQueue.length, maxForPriority }, errMsg);
|
|
662
|
+
__debugLog('[QOMS DEBUG]', Date.now(), 'PRIORITY_QUEUE_FULL_REJECTED', { opId, priority: Priority[priority], queueLength: priorityQueue.length, maxForPriority });
|
|
663
|
+
throw new Error(errMsg);
|
|
664
|
+
}
|
|
665
|
+
}
|
|
494
666
|
// Check if we can execute immediately (empty queue, resources available)
|
|
495
667
|
const queue = priorityQueues.get(priority);
|
|
496
668
|
if (getTotalQueueLength() === 0 && processingItems.size === 0 && canExecute(priority, opId)) {
|
|
@@ -568,10 +740,19 @@ export function getQueueStats() {
|
|
|
568
740
|
pendingRetries,
|
|
569
741
|
totalRetries,
|
|
570
742
|
dlqSize: dlq.length,
|
|
743
|
+
dlqMaxSize: CONFIG.dlqMaxSize,
|
|
571
744
|
isProcessing,
|
|
572
745
|
avgWaitTimeMs: totalProcessed > 0 ? totalWaitTimeMs / totalProcessed : 0,
|
|
573
746
|
metrics: getSystemMetrics(),
|
|
574
747
|
limits: CONFIG,
|
|
748
|
+
// Issue #8: Queue capacity info
|
|
749
|
+
queueCapacity: {
|
|
750
|
+
maxTotal: CONFIG.maxQueueSize,
|
|
751
|
+
maxHigh: CONFIG.maxHighQueue,
|
|
752
|
+
maxMedium: CONFIG.maxMediumQueue,
|
|
753
|
+
maxLow: CONFIG.maxLowQueue,
|
|
754
|
+
remainingTotal: CONFIG.maxQueueSize - getTotalQueueLength(),
|
|
755
|
+
},
|
|
575
756
|
};
|
|
576
757
|
}
|
|
577
758
|
/**
|
|
@@ -644,6 +825,8 @@ export const qoms = {
|
|
|
644
825
|
getDLQ,
|
|
645
826
|
clearDLQ,
|
|
646
827
|
retryDLQItem,
|
|
828
|
+
cleanup,
|
|
829
|
+
destroy,
|
|
647
830
|
Priority,
|
|
648
831
|
};
|
|
649
832
|
export default qoms;
|
|
@@ -20,6 +20,32 @@ import { logger } from '../utils/logger.js';
|
|
|
20
20
|
import { getCoordinator } from '../coordination/integration.js';
|
|
21
21
|
import { isMinifiedOrBundled, isBinaryFile, EXCLUSION_CONFIG } from '../codebase/exclusions.js';
|
|
22
22
|
import { getProjectPathForInsert } from '../services/ProjectContext.js';
|
|
23
|
+
import { getEmbeddingTimeout } from '../config/embeddingTimeouts.js';
|
|
24
|
+
// Retry helper for transient embedding failures (timeout, socket reset, etc.)
|
|
25
|
+
const WATCHER_MAX_RETRIES = parseInt(process.env['SPECMEM_WATCHER_RETRIES'] || '2');
|
|
26
|
+
async function withWatcherRetry(operation, filePath) {
|
|
27
|
+
let lastError = null;
|
|
28
|
+
for (let attempt = 0; attempt <= WATCHER_MAX_RETRIES; attempt++) {
|
|
29
|
+
try {
|
|
30
|
+
return await operation();
|
|
31
|
+
}
|
|
32
|
+
catch (error) {
|
|
33
|
+
lastError = error instanceof Error ? error : new Error(String(error));
|
|
34
|
+
const msg = lastError.message.toLowerCase();
|
|
35
|
+
const isTransient = msg.includes('timeout') || msg.includes('econnreset') ||
|
|
36
|
+
msg.includes('econnrefused') || msg.includes('socket') || msg.includes('qoms');
|
|
37
|
+
if (attempt < WATCHER_MAX_RETRIES && isTransient) {
|
|
38
|
+
const delay = Math.min(1000 * Math.pow(2, attempt), 8000);
|
|
39
|
+
logger.warn({ filePath, attempt: attempt + 1, retryInMs: delay, error: lastError.message }, `[Watcher] Embedding retry ${attempt + 1}/${WATCHER_MAX_RETRIES}`);
|
|
40
|
+
await new Promise(resolve => setTimeout(resolve, delay));
|
|
41
|
+
}
|
|
42
|
+
else {
|
|
43
|
+
break;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
throw lastError;
|
|
48
|
+
}
|
|
23
49
|
/**
|
|
24
50
|
* autoUpdateTheMemories - main change handler class
|
|
25
51
|
*
|
|
@@ -135,10 +161,22 @@ export class AutoUpdateTheMemories {
|
|
|
135
161
|
this.stats.filesSkipped++;
|
|
136
162
|
return;
|
|
137
163
|
}
|
|
138
|
-
// generate embedding with retry
|
|
164
|
+
// generate embedding with retry + timeout protection
|
|
139
165
|
let embedding;
|
|
166
|
+
const WATCHER_EMBEDDING_TIMEOUT = getEmbeddingTimeout('fileWatcher');
|
|
140
167
|
try {
|
|
141
|
-
embedding = await
|
|
168
|
+
embedding = await withWatcherRetry(async () => {
|
|
169
|
+
return new Promise((resolve, reject) => {
|
|
170
|
+
const timeoutId = setTimeout(() => {
|
|
171
|
+
const err = new Error(`[Watcher] Embedding generation timed out after ${Math.round(WATCHER_EMBEDDING_TIMEOUT / 1000)}s for ${metadata.relativePath}`);
|
|
172
|
+
err.code = 'WATCHER_EMBEDDING_TIMEOUT';
|
|
173
|
+
reject(err);
|
|
174
|
+
}, WATCHER_EMBEDDING_TIMEOUT);
|
|
175
|
+
this.config.embeddingProvider.generateEmbedding(content)
|
|
176
|
+
.then(result => { clearTimeout(timeoutId); resolve(result); })
|
|
177
|
+
.catch(error => { clearTimeout(timeoutId); reject(error); });
|
|
178
|
+
});
|
|
179
|
+
}, metadata.relativePath);
|
|
142
180
|
}
|
|
143
181
|
catch (embeddingError) {
|
|
144
182
|
logger.error({
|
|
@@ -236,10 +274,22 @@ export class AutoUpdateTheMemories {
|
|
|
236
274
|
this.stats.filesSkipped++;
|
|
237
275
|
return;
|
|
238
276
|
}
|
|
239
|
-
// generate new embedding with retry and queue fallback
|
|
277
|
+
// generate new embedding with retry and queue fallback + timeout protection
|
|
240
278
|
let embedding;
|
|
279
|
+
const WATCHER_EMBEDDING_TIMEOUT_MOD = getEmbeddingTimeout('fileWatcher');
|
|
241
280
|
try {
|
|
242
|
-
embedding = await
|
|
281
|
+
embedding = await withWatcherRetry(async () => {
|
|
282
|
+
return new Promise((resolve, reject) => {
|
|
283
|
+
const timeoutId = setTimeout(() => {
|
|
284
|
+
const err = new Error(`[Watcher] Embedding generation timed out after ${Math.round(WATCHER_EMBEDDING_TIMEOUT_MOD / 1000)}s for ${metadata.relativePath}`);
|
|
285
|
+
err.code = 'WATCHER_EMBEDDING_TIMEOUT';
|
|
286
|
+
reject(err);
|
|
287
|
+
}, WATCHER_EMBEDDING_TIMEOUT_MOD);
|
|
288
|
+
this.config.embeddingProvider.generateEmbedding(content)
|
|
289
|
+
.then(result => { clearTimeout(timeoutId); resolve(result); })
|
|
290
|
+
.catch(error => { clearTimeout(timeoutId); reject(error); });
|
|
291
|
+
});
|
|
292
|
+
}, metadata.relativePath);
|
|
243
293
|
}
|
|
244
294
|
catch (embeddingError) {
|
|
245
295
|
logger.error({
|