@henrychong-ai/mcp-neo4j-knowledge-graph 2.3.2 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,6 @@
1
1
  import crypto from 'node:crypto';
2
2
  import { LRUCache } from 'lru-cache';
3
- import { v4 as uuidv4 } from 'uuid';
4
- /**
5
- * Default logger implementation
6
- */
3
+ const DEFAULT_STALE_CLAIM_MS = 5 * 60 * 1000;
7
4
  const nullLogger = {
8
5
  debug: () => { },
9
6
  info: () => { },
@@ -11,7 +8,11 @@ const nullLogger = {
11
8
  error: () => { },
12
9
  };
13
10
  /**
14
- * Manages embedding jobs for semantic search
11
+ * Manages embedding jobs for semantic search.
12
+ *
13
+ * Persistence of the queue lives behind a `JobStore` — production wiring
14
+ * uses `Neo4jJobStore`, which stores jobs as `:EmbeddingJob` nodes and
15
+ * makes `claim()` safe under multi-worker contention.
15
16
  */
16
17
  export class EmbeddingJobManager {
17
18
  storageProvider;
@@ -20,20 +21,19 @@ export class EmbeddingJobManager {
20
21
  cache;
21
22
  cacheOptions = { size: 1000, ttl: 3_600_000 };
22
23
  logger;
23
- /**
24
- * Creates a new embedding job manager
25
- *
26
- * @param storageProvider - Provider for entity storage
27
- * @param embeddingService - Service to generate embeddings
28
- * @param rateLimiterOptions - Optional configuration for rate limiting
29
- * @param cacheOptions - Optional configuration for caching
30
- * @param logger - Optional logger for operation logging
31
- */
32
- constructor(storageProvider, embeddingService, rateLimiterOptions, cacheOptions, logger) {
24
+ jobStore;
25
+ staleClaimMs;
26
+ /** Stable id for this process — visible in `:EmbeddingJob.claimedBy`. */
27
+ workerId = crypto.randomUUID();
28
+ constructor(storageProvider, embeddingService, rateLimiterOptions, cacheOptions, logger, jobStore, staleClaimMs) {
33
29
  this.storageProvider = storageProvider;
34
30
  this.embeddingService = embeddingService;
35
31
  this.logger = logger || nullLogger;
36
- // Setup rate limiter with defaults
32
+ if (!jobStore) {
33
+ throw new Error('EmbeddingJobManager requires a JobStore (v2.4.0+)');
34
+ }
35
+ this.jobStore = jobStore;
36
+ this.staleClaimMs = staleClaimMs ?? DEFAULT_STALE_CLAIM_MS;
37
37
  const defaultRateLimiter = {
38
38
  tokensPerInterval: 60,
39
39
  interval: 60 * 1000,
@@ -45,9 +45,7 @@ export class EmbeddingJobManager {
45
45
  tokensPerInterval: rateOptions.tokensPerInterval,
46
46
  interval: rateOptions.interval,
47
47
  };
48
- // Setup LRU cache
49
48
  if (cacheOptions) {
50
- // Support both API styles (tests use maxItems/ttlHours)
51
49
  this.cacheOptions = {
52
50
  size: cacheOptions.size || cacheOptions.maxItems || 1000,
53
51
  ttl: cacheOptions.ttl ||
@@ -59,191 +57,97 @@ export class EmbeddingJobManager {
59
57
  ttl: Math.max(1, Math.round(this.cacheOptions.ttl)),
60
58
  updateAgeOnGet: true,
61
59
  allowStale: false,
62
- // Use a ttlAutopurge option to ensure items are purged when TTL expires
63
60
  ttlAutopurge: true,
64
61
  });
65
- // Initialize database schema
66
- this._initializeDatabase();
62
+ // Schema bootstrap — fire-and-log; safe on every start.
63
+ this.jobStore.ensureSchema().catch(err => {
64
+ this.logger.error('Failed to ensure JobStore schema', {
65
+ error: err instanceof Error ? err.message : String(err),
66
+ });
67
+ });
67
68
  this.logger.info('EmbeddingJobManager initialized', {
69
+ workerId: this.workerId,
70
+ staleClaimMs: this.staleClaimMs,
68
71
  cacheSize: this.cacheOptions.size,
69
72
  cacheTtl: this.cacheOptions.ttl,
70
73
  rateLimit: `${this.rateLimiter.tokensPerInterval} per ${this.rateLimiter.interval}ms`,
71
74
  });
72
75
  }
73
- /**
74
- * Initialize the database schema for embedding jobs
75
- *
76
- * @private
77
- */
78
- _initializeDatabase() {
79
- const createTableSql = `
80
- CREATE TABLE IF NOT EXISTS embedding_jobs (
81
- id TEXT PRIMARY KEY,
82
- entity_name TEXT NOT NULL,
83
- status TEXT NOT NULL,
84
- priority INTEGER NOT NULL DEFAULT 1,
85
- created_at INTEGER NOT NULL,
86
- processed_at INTEGER,
87
- error TEXT,
88
- attempts INTEGER NOT NULL DEFAULT 0,
89
- max_attempts INTEGER NOT NULL DEFAULT 3
90
- )
91
- `;
92
- // Create an index for efficient job retrieval
93
- const createIndexSql = `
94
- CREATE INDEX IF NOT EXISTS idx_embedding_jobs_status_priority
95
- ON embedding_jobs (status, priority DESC)
96
- `;
97
- try {
98
- this.storageProvider.db.exec(createTableSql);
99
- this.storageProvider.db.exec(createIndexSql);
100
- this.logger.debug('Database schema initialized for embedding jobs');
101
- }
102
- catch (error) {
103
- this.logger.error('Failed to initialize database schema', { error });
104
- throw error;
105
- }
106
- }
107
- /**
108
- * Schedule an entity for embedding generation
109
- *
110
- * @param entityName - Name of the entity to generate embedding for
111
- * @param priority - Optional priority (higher priority jobs are processed first)
112
- * @returns Job ID
113
- */
114
76
  async scheduleEntityEmbedding(entityName, priority = 1) {
115
- // Verify entity exists
116
77
  const entity = await this.storageProvider.getEntity(entityName);
117
78
  if (!entity) {
118
79
  const error = `Entity ${entityName} not found`;
119
80
  this.logger.error('Failed to schedule embedding', { entityName, error });
120
81
  throw new Error(error);
121
82
  }
122
- // Create a job ID
123
- const jobId = uuidv4();
124
- // Insert a new job record
125
- const stmt = this.storageProvider.db.prepare(`
126
- INSERT INTO embedding_jobs (
127
- id, entity_name, status, priority, created_at, attempts, max_attempts
128
- ) VALUES (?, ?, ?, ?, ?, ?, ?)
129
- `);
130
- stmt.run(jobId, entityName, 'pending', priority, Date.now(), 0, 3);
131
- this.logger.info('Scheduled embedding job', {
132
- jobId,
133
- entityName,
134
- priority,
135
- });
83
+ const jobId = await this.jobStore.enqueue({ entityName, priority });
84
+ this.logger.info('Scheduled embedding job', { jobId, entityName, priority });
136
85
  return jobId;
137
86
  }
138
- /**
139
- * Process a batch of pending embedding jobs
140
- *
141
- * @param batchSize - Maximum number of jobs to process
142
- * @returns Result statistics
143
- */
144
87
  async processJobs(batchSize = 10) {
145
- this.logger.info('Starting job processing', { batchSize });
146
- // Get pending jobs, ordered by priority (highest first)
147
- const stmt = this.storageProvider.db.prepare(`
148
- SELECT * FROM embedding_jobs
149
- WHERE status = 'pending'
150
- ORDER BY priority DESC, created_at ASC
151
- LIMIT ?
152
- `);
153
- const jobs = stmt.all(batchSize);
154
- this.logger.debug('Found pending jobs', { count: jobs.length });
155
- // Initialize counters
156
- const result = {
157
- processed: 0,
158
- successful: 0,
159
- failed: 0,
160
- };
161
- // Process each job
88
+ this.logger.info('Starting job processing', { batchSize, workerId: this.workerId });
89
+ // Stale-claim sweep — releases jobs orphaned by dead workers before
90
+ // we attempt to claim. Cheap when nothing's stale; essential for
91
+ // recovery when a worker died mid-claim.
92
+ await this.jobStore.releaseStale(this.staleClaimMs);
93
+ const jobs = await this.jobStore.claim(this.workerId, batchSize);
94
+ this.logger.debug('Claimed jobs', { count: jobs.length });
95
+ const result = { processed: 0, successful: 0, failed: 0 };
162
96
  for (const job of jobs) {
163
- // Check rate limiter before processing
164
97
  const rateLimitCheck = this._checkRateLimiter();
165
98
  if (!rateLimitCheck.success) {
166
- this.logger.warn('Rate limit reached, pausing job processing', {
167
- remaining: jobs.length - result.processed,
99
+ const remaining = jobs.slice(result.processed).map(j => j.id);
100
+ this.logger.warn('Rate limit reached, releasing remaining claims', {
101
+ remaining: remaining.length,
168
102
  });
169
- break; // Stop processing jobs if rate limit is reached
103
+ // Release without burning retry attempts these jobs never ran.
104
+ await this.jobStore.releaseClaims(remaining).catch(() => { });
105
+ break;
170
106
  }
171
107
  this.logger.info('Processing embedding job', {
172
108
  jobId: job.id,
173
- entityName: job.entity_name,
174
- attempt: job.attempts + 1,
175
- maxAttempts: job.max_attempts,
109
+ entityName: job.entityName,
110
+ attempt: job.attempts,
111
+ maxAttempts: job.maxAttempts,
176
112
  });
177
- // Update job status to processing
178
- this._updateJobStatus(job.id, 'processing', job.attempts + 1);
179
113
  try {
180
- // Get the entity
181
- const entity = await this.storageProvider.getEntity(job.entity_name);
182
- if (!entity) {
183
- throw new Error(`Entity ${job.entity_name} not found`);
184
- }
185
- // Log entity details for debugging
186
- this.logger.debug('Retrieved entity for embedding', {
187
- entityName: job.entity_name,
188
- entityType: entity.entityType,
189
- hasObservations: entity.observations ? 'yes' : 'no',
190
- observationsType: entity.observations ? typeof entity.observations : 'undefined',
191
- observationsLength: entity.observations && Array.isArray(entity.observations)
192
- ? entity.observations.length
193
- : 'n/a',
194
- });
195
- // Prepare text for embedding
114
+ const entity = await this.storageProvider.getEntity(job.entityName);
115
+ if (!entity)
116
+ throw new Error(`Entity ${job.entityName} not found`);
196
117
  const text = this._prepareEntityText(entity);
197
- // Try to get from cache or generate new embedding
198
- this.logger.debug('Generating embedding for entity', { entityName: job.entity_name });
199
118
  const embedding = await this._getCachedEmbeddingOrGenerate(text);
200
- // Get model info for embedding metadata
201
119
  const modelInfo = this.embeddingService.getModelInfo();
202
- // Store the embedding with the entity
203
- this.logger.debug('Storing entity vector', {
204
- entityName: job.entity_name,
205
- vectorLength: embedding.length,
206
- model: modelInfo.name,
207
- });
208
- await this.storageProvider.storeEntityVector(job.entity_name, {
120
+ await this.storageProvider.storeEntityVector(job.entityName, {
209
121
  vector: embedding,
210
122
  model: modelInfo.name,
211
123
  lastUpdated: Date.now(),
212
124
  });
213
- // Update job status to completed
214
- this._updateJobStatus(job.id, 'completed');
125
+ await this.jobStore.complete(job.id);
215
126
  this.logger.info('Successfully processed embedding job', {
216
127
  jobId: job.id,
217
- entityName: job.entity_name,
128
+ entityName: job.entityName,
218
129
  model: modelInfo.name,
219
130
  dimensions: embedding.length,
220
131
  });
221
132
  result.successful++;
222
133
  }
223
134
  catch (error) {
224
- // Handle failures
225
135
  const errorMessage = error instanceof Error ? error.message : String(error);
226
136
  const errorStack = error instanceof Error ? error.stack : undefined;
137
+ const failResult = await this.jobStore.fail(job.id, errorMessage);
227
138
  this.logger.error('Failed to process embedding job', {
228
139
  jobId: job.id,
229
- entityName: job.entity_name,
140
+ entityName: job.entityName,
230
141
  error: errorMessage,
231
142
  errorStack,
232
- attempt: job.attempts + 1,
233
- maxAttempts: job.max_attempts,
143
+ attempt: job.attempts,
144
+ maxAttempts: job.maxAttempts,
145
+ nextStatus: failResult.status,
234
146
  });
235
- // Determine if we should mark as failed or keep for retry
236
- if (job.attempts + 1 >= job.max_attempts) {
237
- this._updateJobStatus(job.id, 'failed', job.attempts + 1, errorMessage);
238
- }
239
- else {
240
- this._updateJobStatus(job.id, 'pending', job.attempts + 1, errorMessage);
241
- }
242
147
  result.failed++;
243
148
  }
244
149
  result.processed++;
245
150
  }
246
- // Log job processing results
247
151
  const queueStatus = await this.getQueueStatus();
248
152
  this.logger.info('Job processing complete', {
249
153
  processed: result.processed,
@@ -253,130 +157,36 @@ export class EmbeddingJobManager {
253
157
  });
254
158
  return result;
255
159
  }
256
- /**
257
- * Get the current status of the job queue
258
- *
259
- * @returns Queue statistics
260
- */
261
160
  async getQueueStatus() {
262
- const getCountForStatus = (status) => {
263
- let sql = 'SELECT COUNT(*) as count FROM embedding_jobs';
264
- const params = [];
265
- if (status) {
266
- sql += ' WHERE status = ?';
267
- params.push(status);
268
- }
269
- const stmt = this.storageProvider.db.prepare(sql);
270
- const result = stmt.get(...params);
271
- return result?.count || 0;
272
- };
273
- const pending = getCountForStatus('pending');
274
- const processing = getCountForStatus('processing');
275
- const completed = getCountForStatus('completed');
276
- const failed = getCountForStatus('failed');
277
- const total = getCountForStatus();
278
- const result = {
279
- pending,
280
- processing,
281
- completed,
282
- failed,
283
- totalJobs: total,
284
- };
285
- this.logger.debug('Retrieved queue status', result);
286
- return result;
161
+ const counts = await this.jobStore.countByStatus();
162
+ this.logger.debug('Retrieved queue status', { ...counts });
163
+ return counts;
287
164
  }
288
- /**
289
- * Retry failed embedding jobs
290
- *
291
- * @returns Number of jobs reset for retry
292
- */
293
165
  async retryFailedJobs() {
294
- const stmt = this.storageProvider.db.prepare(`
295
- UPDATE embedding_jobs
296
- SET status = 'pending', attempts = 0
297
- WHERE status = 'failed'
298
- `);
299
- const result = stmt.run();
300
- const resetCount = result.changes || 0;
301
- this.logger.info('Reset failed jobs for retry', { count: resetCount });
302
- return resetCount;
166
+ const reset = await this.jobStore.retryFailed();
167
+ this.logger.info('Reset failed jobs for retry', { count: reset });
168
+ return reset;
303
169
  }
304
- /**
305
- * Clean up old completed jobs
306
- *
307
- * @param threshold - Age in milliseconds after which to delete completed jobs, defaults to 7 days
308
- * @returns Number of jobs cleaned up
309
- */
310
170
  async cleanupJobs(threshold) {
311
- const cleanupThreshold = threshold || 7 * 24 * 60 * 60 * 1000; // Default: 7 days
312
- const cutoffTime = Date.now() - cleanupThreshold;
313
- const stmt = this.storageProvider.db.prepare(`
314
- DELETE FROM embedding_jobs
315
- WHERE status = 'completed'
316
- AND processed_at < ?
317
- `);
318
- const result = stmt.run(cutoffTime);
319
- const deletedCount = result.changes || 0;
171
+ const cleanupThreshold = threshold || 7 * 24 * 60 * 60 * 1000;
172
+ const deleted = await this.jobStore.cleanup(cleanupThreshold);
320
173
  this.logger.info('Cleaned up old completed jobs', {
321
- count: deletedCount,
174
+ count: deleted,
322
175
  threshold: cleanupThreshold,
323
- olderThan: new Date(cutoffTime).toISOString(),
176
+ olderThan: new Date(Date.now() - cleanupThreshold).toISOString(),
324
177
  });
325
- return deletedCount;
326
- }
327
- /**
328
- * Update a job's status in the database
329
- *
330
- * @private
331
- * @param jobId - ID of the job to update
332
- * @param status - New status
333
- * @param attempts - Optional attempts count update
334
- * @param error - Optional error message
335
- * @returns Database result
336
- */
337
- _updateJobStatus(jobId, status, attempts, error) {
338
- let sql = `
339
- UPDATE embedding_jobs
340
- SET status = ?
341
- `;
342
- const params = [status];
343
- // Add processed_at timestamp for completed/failed statuses
344
- if (status === 'completed' || status === 'failed') {
345
- sql += ', processed_at = ?';
346
- params.push(Date.now());
347
- }
348
- // Update attempts if provided
349
- if (attempts !== undefined) {
350
- sql += ', attempts = ?';
351
- params.push(attempts);
352
- }
353
- // Include error message if provided
354
- if (error) {
355
- sql += ', error = ?';
356
- params.push(error);
357
- }
358
- sql += ' WHERE id = ?';
359
- params.push(jobId);
360
- const stmt = this.storageProvider.db.prepare(sql);
361
- return stmt.run(...params);
178
+ return deleted;
362
179
  }
363
180
  /**
364
- * Check rate limiter and consume a token if available
365
- *
366
- * @private
367
- * @returns Object with success flag
181
+ * Token-bucket rate limiter. Public for legacy test compatibility — was
182
+ * `_checkRateLimiter` historically; kept callable from tests via underscore.
368
183
  */
369
184
  _checkRateLimiter() {
370
- // For testing purposes, make it public by removing 'private'
371
185
  const now = Date.now();
372
186
  const elapsed = now - this.rateLimiter.lastRefill;
373
- // If enough time has passed, refill tokens
374
187
  if (elapsed >= this.rateLimiter.interval) {
375
- // Calculate how many full intervals have passed
376
188
  const intervals = Math.floor(elapsed / this.rateLimiter.interval);
377
- // Completely refill tokens (don't accumulate beyond max)
378
189
  this.rateLimiter.tokens = this.rateLimiter.tokensPerInterval;
379
- // Update last refill time, keeping track of remaining time
380
190
  this.rateLimiter.lastRefill = now;
381
191
  this.logger.debug('Refilled rate limiter tokens', {
382
192
  current: this.rateLimiter.tokens,
@@ -384,7 +194,6 @@ export class EmbeddingJobManager {
384
194
  intervals,
385
195
  });
386
196
  }
387
- // If we have tokens, consume one and return success
388
197
  if (this.rateLimiter.tokens > 0) {
389
198
  this.rateLimiter.tokens--;
390
199
  this.logger.debug('Consumed rate limiter token', {
@@ -393,7 +202,6 @@ export class EmbeddingJobManager {
393
202
  });
394
203
  return { success: true };
395
204
  }
396
- // No tokens available
397
205
  this.logger.warn('Rate limit exceeded', {
398
206
  availableTokens: 0,
399
207
  maxTokens: this.rateLimiter.tokensPerInterval,
@@ -401,15 +209,9 @@ export class EmbeddingJobManager {
401
209
  });
402
210
  return { success: false };
403
211
  }
404
- /**
405
- * Get the current status of the rate limiter
406
- *
407
- * @returns Rate limiter status information
408
- */
409
212
  getRateLimiterStatus() {
410
213
  const now = Date.now();
411
214
  const elapsed = now - this.rateLimiter.lastRefill;
412
- // If enough time has passed for a complete refill
413
215
  if (elapsed >= this.rateLimiter.interval) {
414
216
  return {
415
217
  availableTokens: this.rateLimiter.tokensPerInterval,
@@ -417,22 +219,14 @@ export class EmbeddingJobManager {
417
219
  resetInMs: this.rateLimiter.interval,
418
220
  };
419
221
  }
420
- // Otherwise return current state
421
222
  return {
422
223
  availableTokens: this.rateLimiter.tokens,
423
224
  maxTokens: this.rateLimiter.tokensPerInterval,
424
225
  resetInMs: this.rateLimiter.interval - elapsed,
425
226
  };
426
227
  }
427
- /**
428
- * Retrieve a cached embedding or generate a new one
429
- *
430
- * @param text - Text to generate embedding for
431
- * @returns Embedding vector
432
- */
433
228
  async _getCachedEmbeddingOrGenerate(text) {
434
229
  const cacheKey = this._generateCacheKey(text);
435
- // Try to get from cache first
436
230
  const cachedValue = this.cache.get(cacheKey);
437
231
  if (cachedValue) {
438
232
  this.logger.debug('Cache hit', {
@@ -443,9 +237,7 @@ export class EmbeddingJobManager {
443
237
  }
444
238
  this.logger.debug('Cache miss', { textHash: cacheKey.slice(0, 8) });
445
239
  try {
446
- // Generate new embedding
447
240
  const embedding = await this.embeddingService.generateEmbedding(text);
448
- // Store in cache
449
241
  this._cacheEmbedding(text, embedding);
450
242
  return embedding;
451
243
  }
@@ -457,13 +249,6 @@ export class EmbeddingJobManager {
457
249
  throw error;
458
250
  }
459
251
  }
460
- /**
461
- * Store an embedding in the cache
462
- *
463
- * @private
464
- * @param text - Original text
465
- * @param embedding - Embedding vector
466
- */
467
252
  _cacheEmbedding(text, embedding) {
468
253
  const cacheKey = this._generateCacheKey(text);
469
254
  const modelInfo = this.embeddingService.getModelInfo();
@@ -478,47 +263,27 @@ export class EmbeddingJobManager {
478
263
  dimensions: embedding.length,
479
264
  });
480
265
  }
481
- /**
482
- * Generate a deterministic cache key for text
483
- *
484
- * @private
485
- * @param text - Text to hash
486
- * @returns Cache key
487
- */
488
266
  _generateCacheKey(text) {
489
267
  return crypto.createHash('md5').update(text).digest('hex');
490
268
  }
491
- /**
492
- * Prepare text for embedding from an entity
493
- *
494
- * @private
495
- * @param entity - Entity to prepare text from
496
- * @returns Processed text ready for embedding
497
- */
498
269
  _prepareEntityText(entity) {
499
- // Create a descriptive text from entity data
500
270
  const lines = [`Name: ${entity.name}`, `Type: ${entity.entityType}`, 'Observations:'];
501
- // Add observations, ensuring we handle both string arrays and other formats
502
271
  if (entity.observations) {
503
- // Handle case where observations might be stored as JSON string in some providers
504
272
  let observationsArray = entity.observations;
505
- // If observations is a string, try to parse it as JSON
506
273
  if (typeof entity.observations === 'string') {
507
274
  try {
508
275
  observationsArray = JSON.parse(entity.observations);
509
276
  }
510
277
  catch {
511
- // If parsing fails, treat it as a single observation
512
278
  observationsArray = [entity.observations];
513
279
  }
514
280
  }
515
- // Ensure it's an array at this point
516
281
  if (!Array.isArray(observationsArray)) {
517
282
  observationsArray = [String(observationsArray)];
518
283
  }
519
- // Add each observation to the text
520
- if (observationsArray.length > 0) {
521
- lines.push(...observationsArray.map(obs => `- ${obs}`));
284
+ const arr = observationsArray;
285
+ if (arr.length > 0) {
286
+ lines.push(...arr.map(obs => `- ${obs}`));
522
287
  }
523
288
  else {
524
289
  lines.push(' (No observations)');
@@ -528,7 +293,6 @@ export class EmbeddingJobManager {
528
293
  lines.push(' (No observations)');
529
294
  }
530
295
  const text = lines.join('\n');
531
- // Log the prepared text for debugging
532
296
  this.logger.debug('Prepared entity text for embedding', {
533
297
  entityName: entity.name,
534
298
  entityType: entity.entityType,
@@ -537,37 +301,31 @@ export class EmbeddingJobManager {
537
301
  });
538
302
  return text;
539
303
  }
540
- /**
541
- * Get a cached embedding entry (used for testing)
542
- *
543
- * @param key - Cache key
544
- * @returns Cached embedding or undefined
545
- */
546
304
  getCacheEntry(key) {
547
305
  return this.cache.get(key);
548
306
  }
549
307
  /**
550
- * Schedule incremental regeneration for entities without embeddings
551
- * This method queries all entities and schedules embedding jobs only for those missing embeddings
552
- *
553
- * @returns Number of entities scheduled for embedding generation
308
+ * Walk every current entity and enqueue jobs for any that are missing
309
+ * embeddings. Intended for a server-side cron tick to backfill entities
310
+ * created by thin clients running with `WRITE_EMBEDDINGS_LOCALLY=false`.
554
311
  */
555
312
  async scheduleIncrementalRegeneration() {
556
313
  this.logger.info('Starting incremental embedding regeneration check');
557
314
  try {
558
- // Get all entities from storage
559
315
  const allEntities = await this._getAllEntitiesFromStorage();
560
316
  this.logger.debug('Retrieved entities for embedding check', {
561
317
  totalCount: allEntities.length,
562
318
  });
563
- // Filter for entities without embeddings
564
319
  const entitiesWithoutEmbeddings = allEntities.filter(entity => !entity.embedding);
320
+ const total = allEntities.length;
321
+ const coverage = total > 0
322
+ ? `${Math.round(((total - entitiesWithoutEmbeddings.length) / total) * 100)}%`
323
+ : '0%';
565
324
  this.logger.info('Found entities without embeddings', {
566
325
  count: entitiesWithoutEmbeddings.length,
567
- totalEntities: allEntities.length,
568
- coverage: `${Math.round(((allEntities.length - entitiesWithoutEmbeddings.length) / allEntities.length) * 100)}%`,
326
+ totalEntities: total,
327
+ coverage,
569
328
  });
570
- // Schedule embedding jobs for entities without embeddings
571
329
  let scheduledCount = 0;
572
330
  for (const entity of entitiesWithoutEmbeddings) {
573
331
  try {
@@ -595,15 +353,7 @@ export class EmbeddingJobManager {
595
353
  throw error;
596
354
  }
597
355
  }
598
- /**
599
- * Get all entities from storage provider
600
- * This is a helper method to retrieve all entities for incremental regeneration
601
- *
602
- * @private
603
- * @returns Array of all entities
604
- */
605
356
  async _getAllEntitiesFromStorage() {
606
- // Use the storage provider's loadGraph method to get all entities
607
357
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
608
358
  const storageProviderAny = this.storageProvider;
609
359
  if (typeof storageProviderAny.loadGraph === 'function') {