@henrychong-ai/mcp-neo4j-knowledge-graph 2.3.1 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -595,6 +595,10 @@ WRITE_EMBEDDINGS_LOCALLY=true # Default true. Set to "false" on thin-clien
595
595
  EMBEDDING_BACKFILL_CRON='0 19 * * *' # Cron schedule for scheduleIncrementalRegeneration. Default
596
596
  # 19:00 UTC daily (= 03:00 SGT). Server-side instances may
597
597
  # tighten to '*/1 * * * *' for ~1-minute backfill latency.
598
+ EMBEDDING_STALE_CLAIM_MS=300000 # (v2.4.0+) Claims older than this age are auto-released back
599
+ # to 'pending' on the next processJobs tick. Default 5 minutes.
600
+ # Increase if your worker's batch processing time can exceed
601
+ # this; decrease for faster recovery from worker crashes.
598
602
 
599
603
  # Logging Configuration
600
604
  LOG_LEVEL=warn # Log level: debug, info, warn, error, silent (default: warn)
@@ -3,49 +3,32 @@ import type { Entity } from '../KnowledgeGraphManager.js';
3
3
  import type { StorageProvider } from '../storage/StorageProvider.js';
4
4
  import type { EntityEmbedding } from '../types/entity-embedding.js';
5
5
  import type { EmbeddingService } from './EmbeddingService.js';
6
- /**
7
- * Interface for embedding cache options
8
- */
6
+ import type { JobStore } from './JobStore.js';
9
7
  interface CacheOptions {
10
8
  size: number;
11
9
  ttl: number;
12
10
  maxItems?: number;
13
11
  ttlHours?: number;
14
12
  }
15
- /**
16
- * Interface for rate limiting options
17
- */
18
13
  interface RateLimiterOptions {
19
14
  tokensPerInterval: number;
20
15
  interval: number;
21
16
  }
22
- /**
23
- * Interface for job processing results
24
- */
25
17
  interface JobProcessResults {
26
18
  processed: number;
27
19
  successful: number;
28
20
  failed: number;
29
21
  }
30
- /**
31
- * Interface for the rate limiter status
32
- */
33
22
  interface RateLimiterStatus {
34
23
  availableTokens: number;
35
24
  maxTokens: number;
36
25
  resetInMs: number;
37
26
  }
38
- /**
39
- * Interface for a cached embedding entry
40
- */
41
27
  interface CachedEmbedding {
42
28
  embedding: number[];
43
29
  timestamp: number;
44
30
  model: string;
45
31
  }
46
- /**
47
- * Interface for a logger
48
- */
49
32
  interface Logger {
50
33
  debug: (message: string, meta?: Record<string, unknown>) => void;
51
34
  info: (message: string, meta?: Record<string, unknown>) => void;
@@ -53,25 +36,13 @@ interface Logger {
53
36
  error: (message: string, meta?: Record<string, unknown>) => void;
54
37
  }
55
38
  /**
56
- * Interface for embedding storage provider, extending the base provider
39
+ * Storage shape required by `EmbeddingJobManager` for entity access.
40
+ * Persistence of the queue itself is delegated to `JobStore`.
57
41
  */
58
42
  interface EmbeddingStorageProvider extends StorageProvider {
59
- /**
60
- * Access to the underlying database
61
- */
62
- db: any;
63
- /**
64
- * Get an entity by name
65
- */
66
43
  getEntity(entityName: string): Promise<Entity | null>;
67
- /**
68
- * Store an entity vector embedding
69
- */
70
44
  storeEntityVector(entityName: string, embedding: EntityEmbedding): Promise<void>;
71
45
  }
72
- /**
73
- * Return structure for queue status
74
- */
75
46
  interface QueueStatus {
76
47
  pending: number;
77
48
  processing: number;
@@ -80,7 +51,11 @@ interface QueueStatus {
80
51
  totalJobs: number;
81
52
  }
82
53
  /**
83
- * Manages embedding jobs for semantic search
54
+ * Manages embedding jobs for semantic search.
55
+ *
56
+ * Persistence of the queue lives behind a `JobStore` — production wiring
57
+ * uses `Neo4jJobStore`, which stores jobs as `:EmbeddingJob` nodes and
58
+ * makes `claim()` safe under multi-worker contention.
84
59
  */
85
60
  export declare class EmbeddingJobManager {
86
61
  private storageProvider;
@@ -94,134 +69,35 @@ export declare class EmbeddingJobManager {
94
69
  cache: LRUCache<string, CachedEmbedding>;
95
70
  private cacheOptions;
96
71
  private logger;
97
- /**
98
- * Creates a new embedding job manager
99
- *
100
- * @param storageProvider - Provider for entity storage
101
- * @param embeddingService - Service to generate embeddings
102
- * @param rateLimiterOptions - Optional configuration for rate limiting
103
- * @param cacheOptions - Optional configuration for caching
104
- * @param logger - Optional logger for operation logging
105
- */
106
- constructor(storageProvider: EmbeddingStorageProvider, embeddingService: EmbeddingService, rateLimiterOptions?: RateLimiterOptions | null, cacheOptions?: CacheOptions | null, logger?: Logger | null);
107
- /**
108
- * Initialize the database schema for embedding jobs
109
- *
110
- * @private
111
- */
112
- private _initializeDatabase;
113
- /**
114
- * Schedule an entity for embedding generation
115
- *
116
- * @param entityName - Name of the entity to generate embedding for
117
- * @param priority - Optional priority (higher priority jobs are processed first)
118
- * @returns Job ID
119
- */
72
+ private jobStore;
73
+ private staleClaimMs;
74
+ /** Stable id for this process — visible in `:EmbeddingJob.claimedBy`. */
75
+ readonly workerId: string;
76
+ constructor(storageProvider: EmbeddingStorageProvider, embeddingService: EmbeddingService, rateLimiterOptions?: RateLimiterOptions | null, cacheOptions?: CacheOptions | null, logger?: Logger | null, jobStore?: JobStore, staleClaimMs?: number);
120
77
  scheduleEntityEmbedding(entityName: string, priority?: number): Promise<string>;
121
- /**
122
- * Process a batch of pending embedding jobs
123
- *
124
- * @param batchSize - Maximum number of jobs to process
125
- * @returns Result statistics
126
- */
127
78
  processJobs(batchSize?: number): Promise<JobProcessResults>;
128
- /**
129
- * Get the current status of the job queue
130
- *
131
- * @returns Queue statistics
132
- */
133
79
  getQueueStatus(): Promise<QueueStatus>;
134
- /**
135
- * Retry failed embedding jobs
136
- *
137
- * @returns Number of jobs reset for retry
138
- */
139
80
  retryFailedJobs(): Promise<number>;
140
- /**
141
- * Clean up old completed jobs
142
- *
143
- * @param threshold - Age in milliseconds after which to delete completed jobs, defaults to 7 days
144
- * @returns Number of jobs cleaned up
145
- */
146
81
  cleanupJobs(threshold?: number): Promise<number>;
147
82
  /**
148
- * Update a job's status in the database
149
- *
150
- * @private
151
- * @param jobId - ID of the job to update
152
- * @param status - New status
153
- * @param attempts - Optional attempts count update
154
- * @param error - Optional error message
155
- * @returns Database result
156
- */
157
- private _updateJobStatus;
158
- /**
159
- * Check rate limiter and consume a token if available
160
- *
161
- * @private
162
- * @returns Object with success flag
83
+ * Token-bucket rate limiter. Public for legacy test compatibility — was
84
+ * `_checkRateLimiter` historically; kept callable from tests via underscore.
163
85
  */
164
86
  _checkRateLimiter(): {
165
87
  success: boolean;
166
88
  };
167
- /**
168
- * Get the current status of the rate limiter
169
- *
170
- * @returns Rate limiter status information
171
- */
172
89
  getRateLimiterStatus(): RateLimiterStatus;
173
- /**
174
- * Retrieve a cached embedding or generate a new one
175
- *
176
- * @param text - Text to generate embedding for
177
- * @returns Embedding vector
178
- */
179
90
  _getCachedEmbeddingOrGenerate(text: string): Promise<number[]>;
180
- /**
181
- * Store an embedding in the cache
182
- *
183
- * @private
184
- * @param text - Original text
185
- * @param embedding - Embedding vector
186
- */
187
91
  private _cacheEmbedding;
188
- /**
189
- * Generate a deterministic cache key for text
190
- *
191
- * @private
192
- * @param text - Text to hash
193
- * @returns Cache key
194
- */
195
92
  _generateCacheKey(text: string): string;
196
- /**
197
- * Prepare text for embedding from an entity
198
- *
199
- * @private
200
- * @param entity - Entity to prepare text from
201
- * @returns Processed text ready for embedding
202
- */
203
93
  private _prepareEntityText;
204
- /**
205
- * Get a cached embedding entry (used for testing)
206
- *
207
- * @param key - Cache key
208
- * @returns Cached embedding or undefined
209
- */
210
94
  getCacheEntry(key: string): CachedEmbedding | undefined;
211
95
  /**
212
- * Schedule incremental regeneration for entities without embeddings
213
- * This method queries all entities and schedules embedding jobs only for those missing embeddings
214
- *
215
- * @returns Number of entities scheduled for embedding generation
96
+ * Walk every current entity and enqueue jobs for any that are missing
97
+ * embeddings. Intended for a server-side cron tick to backfill entities
98
+ * created by thin clients running with `WRITE_EMBEDDINGS_LOCALLY=false`.
216
99
  */
217
100
  scheduleIncrementalRegeneration(): Promise<number>;
218
- /**
219
- * Get all entities from storage provider
220
- * This is a helper method to retrieve all entities for incremental regeneration
221
- *
222
- * @private
223
- * @returns Array of all entities
224
- */
225
101
  private _getAllEntitiesFromStorage;
226
102
  }
227
103
  export {};