@henrychong-ai/mcp-neo4j-knowledge-graph 2.3.2 → 2.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -0
- package/dist/embeddings/EmbeddingJobManager.d.ts +18 -142
- package/dist/embeddings/EmbeddingJobManager.js +79 -329
- package/dist/embeddings/EmbeddingJobManager.js.map +1 -1
- package/dist/embeddings/JobStore.d.ts +80 -0
- package/dist/embeddings/JobStore.js +9 -0
- package/dist/embeddings/JobStore.js.map +1 -0
- package/dist/embeddings/Neo4jJobStore.d.ts +34 -0
- package/dist/embeddings/Neo4jJobStore.js +242 -0
- package/dist/embeddings/Neo4jJobStore.js.map +1 -0
- package/dist/index.js +17 -2
- package/dist/index.js.map +1 -1
- package/dist/storage/createAdaptedStorageProvider.d.ts +10 -10
- package/dist/storage/createAdaptedStorageProvider.js +11 -26
- package/dist/storage/createAdaptedStorageProvider.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -595,6 +595,10 @@ WRITE_EMBEDDINGS_LOCALLY=true # Default true. Set to "false" on thin-clien
|
|
|
595
595
|
EMBEDDING_BACKFILL_CRON='0 19 * * *' # Cron schedule for scheduleIncrementalRegeneration. Default
|
|
596
596
|
# 19:00 UTC daily (= 03:00 SGT). Server-side instances may
|
|
597
597
|
# tighten to '*/1 * * * *' for ~1-minute backfill latency.
|
|
598
|
+
EMBEDDING_STALE_CLAIM_MS=300000 # (v2.4.0+) Claims older than this age are auto-released back
|
|
599
|
+
# to 'pending' on the next processJobs tick. Default 5 minutes.
|
|
600
|
+
# Increase if your worker's batch processing time can exceed
|
|
601
|
+
# this; decrease for faster recovery from worker crashes.
|
|
598
602
|
|
|
599
603
|
# Logging Configuration
|
|
600
604
|
LOG_LEVEL=warn # Log level: debug, info, warn, error, silent (default: warn)
|
|
@@ -3,49 +3,32 @@ import type { Entity } from '../KnowledgeGraphManager.js';
|
|
|
3
3
|
import type { StorageProvider } from '../storage/StorageProvider.js';
|
|
4
4
|
import type { EntityEmbedding } from '../types/entity-embedding.js';
|
|
5
5
|
import type { EmbeddingService } from './EmbeddingService.js';
|
|
6
|
-
|
|
7
|
-
* Interface for embedding cache options
|
|
8
|
-
*/
|
|
6
|
+
import type { JobStore } from './JobStore.js';
|
|
9
7
|
interface CacheOptions {
|
|
10
8
|
size: number;
|
|
11
9
|
ttl: number;
|
|
12
10
|
maxItems?: number;
|
|
13
11
|
ttlHours?: number;
|
|
14
12
|
}
|
|
15
|
-
/**
|
|
16
|
-
* Interface for rate limiting options
|
|
17
|
-
*/
|
|
18
13
|
interface RateLimiterOptions {
|
|
19
14
|
tokensPerInterval: number;
|
|
20
15
|
interval: number;
|
|
21
16
|
}
|
|
22
|
-
/**
|
|
23
|
-
* Interface for job processing results
|
|
24
|
-
*/
|
|
25
17
|
interface JobProcessResults {
|
|
26
18
|
processed: number;
|
|
27
19
|
successful: number;
|
|
28
20
|
failed: number;
|
|
29
21
|
}
|
|
30
|
-
/**
|
|
31
|
-
* Interface for the rate limiter status
|
|
32
|
-
*/
|
|
33
22
|
interface RateLimiterStatus {
|
|
34
23
|
availableTokens: number;
|
|
35
24
|
maxTokens: number;
|
|
36
25
|
resetInMs: number;
|
|
37
26
|
}
|
|
38
|
-
/**
|
|
39
|
-
* Interface for a cached embedding entry
|
|
40
|
-
*/
|
|
41
27
|
interface CachedEmbedding {
|
|
42
28
|
embedding: number[];
|
|
43
29
|
timestamp: number;
|
|
44
30
|
model: string;
|
|
45
31
|
}
|
|
46
|
-
/**
|
|
47
|
-
* Interface for a logger
|
|
48
|
-
*/
|
|
49
32
|
interface Logger {
|
|
50
33
|
debug: (message: string, meta?: Record<string, unknown>) => void;
|
|
51
34
|
info: (message: string, meta?: Record<string, unknown>) => void;
|
|
@@ -53,25 +36,13 @@ interface Logger {
|
|
|
53
36
|
error: (message: string, meta?: Record<string, unknown>) => void;
|
|
54
37
|
}
|
|
55
38
|
/**
|
|
56
|
-
*
|
|
39
|
+
* Storage shape required by `EmbeddingJobManager` for entity access.
|
|
40
|
+
* Persistence of the queue itself is delegated to `JobStore`.
|
|
57
41
|
*/
|
|
58
42
|
interface EmbeddingStorageProvider extends StorageProvider {
|
|
59
|
-
/**
|
|
60
|
-
* Access to the underlying database
|
|
61
|
-
*/
|
|
62
|
-
db: any;
|
|
63
|
-
/**
|
|
64
|
-
* Get an entity by name
|
|
65
|
-
*/
|
|
66
43
|
getEntity(entityName: string): Promise<Entity | null>;
|
|
67
|
-
/**
|
|
68
|
-
* Store an entity vector embedding
|
|
69
|
-
*/
|
|
70
44
|
storeEntityVector(entityName: string, embedding: EntityEmbedding): Promise<void>;
|
|
71
45
|
}
|
|
72
|
-
/**
|
|
73
|
-
* Return structure for queue status
|
|
74
|
-
*/
|
|
75
46
|
interface QueueStatus {
|
|
76
47
|
pending: number;
|
|
77
48
|
processing: number;
|
|
@@ -80,7 +51,11 @@ interface QueueStatus {
|
|
|
80
51
|
totalJobs: number;
|
|
81
52
|
}
|
|
82
53
|
/**
|
|
83
|
-
* Manages embedding jobs for semantic search
|
|
54
|
+
* Manages embedding jobs for semantic search.
|
|
55
|
+
*
|
|
56
|
+
* Persistence of the queue lives behind a `JobStore` — production wiring
|
|
57
|
+
* uses `Neo4jJobStore`, which stores jobs as `:EmbeddingJob` nodes and
|
|
58
|
+
* makes `claim()` safe under multi-worker contention.
|
|
84
59
|
*/
|
|
85
60
|
export declare class EmbeddingJobManager {
|
|
86
61
|
private storageProvider;
|
|
@@ -94,134 +69,35 @@ export declare class EmbeddingJobManager {
|
|
|
94
69
|
cache: LRUCache<string, CachedEmbedding>;
|
|
95
70
|
private cacheOptions;
|
|
96
71
|
private logger;
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
* @param rateLimiterOptions - Optional configuration for rate limiting
|
|
103
|
-
* @param cacheOptions - Optional configuration for caching
|
|
104
|
-
* @param logger - Optional logger for operation logging
|
|
105
|
-
*/
|
|
106
|
-
constructor(storageProvider: EmbeddingStorageProvider, embeddingService: EmbeddingService, rateLimiterOptions?: RateLimiterOptions | null, cacheOptions?: CacheOptions | null, logger?: Logger | null);
|
|
107
|
-
/**
|
|
108
|
-
* Initialize the database schema for embedding jobs
|
|
109
|
-
*
|
|
110
|
-
* @private
|
|
111
|
-
*/
|
|
112
|
-
private _initializeDatabase;
|
|
113
|
-
/**
|
|
114
|
-
* Schedule an entity for embedding generation
|
|
115
|
-
*
|
|
116
|
-
* @param entityName - Name of the entity to generate embedding for
|
|
117
|
-
* @param priority - Optional priority (higher priority jobs are processed first)
|
|
118
|
-
* @returns Job ID
|
|
119
|
-
*/
|
|
72
|
+
private jobStore;
|
|
73
|
+
private staleClaimMs;
|
|
74
|
+
/** Stable id for this process — visible in `:EmbeddingJob.claimedBy`. */
|
|
75
|
+
readonly workerId: string;
|
|
76
|
+
constructor(storageProvider: EmbeddingStorageProvider, embeddingService: EmbeddingService, rateLimiterOptions?: RateLimiterOptions | null, cacheOptions?: CacheOptions | null, logger?: Logger | null, jobStore?: JobStore, staleClaimMs?: number);
|
|
120
77
|
scheduleEntityEmbedding(entityName: string, priority?: number): Promise<string>;
|
|
121
|
-
/**
|
|
122
|
-
* Process a batch of pending embedding jobs
|
|
123
|
-
*
|
|
124
|
-
* @param batchSize - Maximum number of jobs to process
|
|
125
|
-
* @returns Result statistics
|
|
126
|
-
*/
|
|
127
78
|
processJobs(batchSize?: number): Promise<JobProcessResults>;
|
|
128
|
-
/**
|
|
129
|
-
* Get the current status of the job queue
|
|
130
|
-
*
|
|
131
|
-
* @returns Queue statistics
|
|
132
|
-
*/
|
|
133
79
|
getQueueStatus(): Promise<QueueStatus>;
|
|
134
|
-
/**
|
|
135
|
-
* Retry failed embedding jobs
|
|
136
|
-
*
|
|
137
|
-
* @returns Number of jobs reset for retry
|
|
138
|
-
*/
|
|
139
80
|
retryFailedJobs(): Promise<number>;
|
|
140
|
-
/**
|
|
141
|
-
* Clean up old completed jobs
|
|
142
|
-
*
|
|
143
|
-
* @param threshold - Age in milliseconds after which to delete completed jobs, defaults to 7 days
|
|
144
|
-
* @returns Number of jobs cleaned up
|
|
145
|
-
*/
|
|
146
81
|
cleanupJobs(threshold?: number): Promise<number>;
|
|
147
82
|
/**
|
|
148
|
-
*
|
|
149
|
-
*
|
|
150
|
-
* @private
|
|
151
|
-
* @param jobId - ID of the job to update
|
|
152
|
-
* @param status - New status
|
|
153
|
-
* @param attempts - Optional attempts count update
|
|
154
|
-
* @param error - Optional error message
|
|
155
|
-
* @returns Database result
|
|
156
|
-
*/
|
|
157
|
-
private _updateJobStatus;
|
|
158
|
-
/**
|
|
159
|
-
* Check rate limiter and consume a token if available
|
|
160
|
-
*
|
|
161
|
-
* @private
|
|
162
|
-
* @returns Object with success flag
|
|
83
|
+
* Token-bucket rate limiter. Public for legacy test compatibility — was
|
|
84
|
+
* `_checkRateLimiter` historically; kept callable from tests via underscore.
|
|
163
85
|
*/
|
|
164
86
|
_checkRateLimiter(): {
|
|
165
87
|
success: boolean;
|
|
166
88
|
};
|
|
167
|
-
/**
|
|
168
|
-
* Get the current status of the rate limiter
|
|
169
|
-
*
|
|
170
|
-
* @returns Rate limiter status information
|
|
171
|
-
*/
|
|
172
89
|
getRateLimiterStatus(): RateLimiterStatus;
|
|
173
|
-
/**
|
|
174
|
-
* Retrieve a cached embedding or generate a new one
|
|
175
|
-
*
|
|
176
|
-
* @param text - Text to generate embedding for
|
|
177
|
-
* @returns Embedding vector
|
|
178
|
-
*/
|
|
179
90
|
_getCachedEmbeddingOrGenerate(text: string): Promise<number[]>;
|
|
180
|
-
/**
|
|
181
|
-
* Store an embedding in the cache
|
|
182
|
-
*
|
|
183
|
-
* @private
|
|
184
|
-
* @param text - Original text
|
|
185
|
-
* @param embedding - Embedding vector
|
|
186
|
-
*/
|
|
187
91
|
private _cacheEmbedding;
|
|
188
|
-
/**
|
|
189
|
-
* Generate a deterministic cache key for text
|
|
190
|
-
*
|
|
191
|
-
* @private
|
|
192
|
-
* @param text - Text to hash
|
|
193
|
-
* @returns Cache key
|
|
194
|
-
*/
|
|
195
92
|
_generateCacheKey(text: string): string;
|
|
196
|
-
/**
|
|
197
|
-
* Prepare text for embedding from an entity
|
|
198
|
-
*
|
|
199
|
-
* @private
|
|
200
|
-
* @param entity - Entity to prepare text from
|
|
201
|
-
* @returns Processed text ready for embedding
|
|
202
|
-
*/
|
|
203
93
|
private _prepareEntityText;
|
|
204
|
-
/**
|
|
205
|
-
* Get a cached embedding entry (used for testing)
|
|
206
|
-
*
|
|
207
|
-
* @param key - Cache key
|
|
208
|
-
* @returns Cached embedding or undefined
|
|
209
|
-
*/
|
|
210
94
|
getCacheEntry(key: string): CachedEmbedding | undefined;
|
|
211
95
|
/**
|
|
212
|
-
*
|
|
213
|
-
*
|
|
214
|
-
*
|
|
215
|
-
* @returns Number of entities scheduled for embedding generation
|
|
96
|
+
* Walk every current entity and enqueue jobs for any that are missing
|
|
97
|
+
* embeddings. Intended for a server-side cron tick to backfill entities
|
|
98
|
+
* created by thin clients running with `WRITE_EMBEDDINGS_LOCALLY=false`.
|
|
216
99
|
*/
|
|
217
100
|
scheduleIncrementalRegeneration(): Promise<number>;
|
|
218
|
-
/**
|
|
219
|
-
* Get all entities from storage provider
|
|
220
|
-
* This is a helper method to retrieve all entities for incremental regeneration
|
|
221
|
-
*
|
|
222
|
-
* @private
|
|
223
|
-
* @returns Array of all entities
|
|
224
|
-
*/
|
|
225
101
|
private _getAllEntitiesFromStorage;
|
|
226
102
|
}
|
|
227
103
|
export {};
|