@henrychong-ai/mcp-neo4j-knowledge-graph 2.3.2 → 2.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -0
- package/dist/embeddings/EmbeddingJobManager.d.ts +33 -138
- package/dist/embeddings/EmbeddingJobManager.js +95 -336
- package/dist/embeddings/EmbeddingJobManager.js.map +1 -1
- package/dist/embeddings/JobStore.d.ts +80 -0
- package/dist/embeddings/JobStore.js +9 -0
- package/dist/embeddings/JobStore.js.map +1 -0
- package/dist/embeddings/Neo4jJobStore.d.ts +34 -0
- package/dist/embeddings/Neo4jJobStore.js +242 -0
- package/dist/embeddings/Neo4jJobStore.js.map +1 -0
- package/dist/index.js +17 -2
- package/dist/index.js.map +1 -1
- package/dist/storage/createAdaptedStorageProvider.d.ts +10 -10
- package/dist/storage/createAdaptedStorageProvider.js +23 -26
- package/dist/storage/createAdaptedStorageProvider.js.map +1 -1
- package/dist/storage/neo4j/Neo4jStorageProvider.d.ts +13 -0
- package/dist/storage/neo4j/Neo4jStorageProvider.js +20 -0
- package/dist/storage/neo4j/Neo4jStorageProvider.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -595,6 +595,10 @@ WRITE_EMBEDDINGS_LOCALLY=true # Default true. Set to "false" on thin-clien
|
|
|
595
595
|
EMBEDDING_BACKFILL_CRON='0 19 * * *' # Cron schedule for scheduleIncrementalRegeneration. Default
|
|
596
596
|
# 19:00 UTC daily (= 03:00 SGT). Server-side instances may
|
|
597
597
|
# tighten to '*/1 * * * *' for ~1-minute backfill latency.
|
|
598
|
+
EMBEDDING_STALE_CLAIM_MS=300000 # (v2.4.0+) Claims older than this age are auto-released back
|
|
599
|
+
# to 'pending' on the next processJobs tick. Default 5 minutes.
|
|
600
|
+
# Increase if your worker's batch processing time can exceed
|
|
601
|
+
# this; decrease for faster recovery from worker crashes.
|
|
598
602
|
|
|
599
603
|
# Logging Configuration
|
|
600
604
|
LOG_LEVEL=warn # Log level: debug, info, warn, error, silent (default: warn)
|
|
@@ -3,49 +3,32 @@ import type { Entity } from '../KnowledgeGraphManager.js';
|
|
|
3
3
|
import type { StorageProvider } from '../storage/StorageProvider.js';
|
|
4
4
|
import type { EntityEmbedding } from '../types/entity-embedding.js';
|
|
5
5
|
import type { EmbeddingService } from './EmbeddingService.js';
|
|
6
|
-
|
|
7
|
-
* Interface for embedding cache options
|
|
8
|
-
*/
|
|
6
|
+
import type { JobStore } from './JobStore.js';
|
|
9
7
|
interface CacheOptions {
|
|
10
8
|
size: number;
|
|
11
9
|
ttl: number;
|
|
12
10
|
maxItems?: number;
|
|
13
11
|
ttlHours?: number;
|
|
14
12
|
}
|
|
15
|
-
/**
|
|
16
|
-
* Interface for rate limiting options
|
|
17
|
-
*/
|
|
18
13
|
interface RateLimiterOptions {
|
|
19
14
|
tokensPerInterval: number;
|
|
20
15
|
interval: number;
|
|
21
16
|
}
|
|
22
|
-
/**
|
|
23
|
-
* Interface for job processing results
|
|
24
|
-
*/
|
|
25
17
|
interface JobProcessResults {
|
|
26
18
|
processed: number;
|
|
27
19
|
successful: number;
|
|
28
20
|
failed: number;
|
|
29
21
|
}
|
|
30
|
-
/**
|
|
31
|
-
* Interface for the rate limiter status
|
|
32
|
-
*/
|
|
33
22
|
interface RateLimiterStatus {
|
|
34
23
|
availableTokens: number;
|
|
35
24
|
maxTokens: number;
|
|
36
25
|
resetInMs: number;
|
|
37
26
|
}
|
|
38
|
-
/**
|
|
39
|
-
* Interface for a cached embedding entry
|
|
40
|
-
*/
|
|
41
27
|
interface CachedEmbedding {
|
|
42
28
|
embedding: number[];
|
|
43
29
|
timestamp: number;
|
|
44
30
|
model: string;
|
|
45
31
|
}
|
|
46
|
-
/**
|
|
47
|
-
* Interface for a logger
|
|
48
|
-
*/
|
|
49
32
|
interface Logger {
|
|
50
33
|
debug: (message: string, meta?: Record<string, unknown>) => void;
|
|
51
34
|
info: (message: string, meta?: Record<string, unknown>) => void;
|
|
@@ -53,25 +36,20 @@ interface Logger {
|
|
|
53
36
|
error: (message: string, meta?: Record<string, unknown>) => void;
|
|
54
37
|
}
|
|
55
38
|
/**
|
|
56
|
-
*
|
|
39
|
+
* Storage shape required by `EmbeddingJobManager` for entity access.
|
|
40
|
+
* Persistence of the queue itself is delegated to `JobStore`.
|
|
57
41
|
*/
|
|
58
42
|
interface EmbeddingStorageProvider extends StorageProvider {
|
|
59
|
-
/**
|
|
60
|
-
* Access to the underlying database
|
|
61
|
-
*/
|
|
62
|
-
db: any;
|
|
63
|
-
/**
|
|
64
|
-
* Get an entity by name
|
|
65
|
-
*/
|
|
66
43
|
getEntity(entityName: string): Promise<Entity | null>;
|
|
44
|
+
storeEntityVector(entityName: string, embedding: EntityEmbedding): Promise<void>;
|
|
67
45
|
/**
|
|
68
|
-
*
|
|
46
|
+
* Optional: efficient predicate-based lookup for entities lacking an embedding.
|
|
47
|
+
* When present, `scheduleIncrementalRegeneration` prefers it over the
|
|
48
|
+
* `loadGraph()`-and-filter fallback (which strips the embedding property in
|
|
49
|
+
* its return mapper and so always reports 100% missing).
|
|
69
50
|
*/
|
|
70
|
-
|
|
51
|
+
getEntityNamesMissingEmbeddings?(): Promise<string[]>;
|
|
71
52
|
}
|
|
72
|
-
/**
|
|
73
|
-
* Return structure for queue status
|
|
74
|
-
*/
|
|
75
53
|
interface QueueStatus {
|
|
76
54
|
pending: number;
|
|
77
55
|
processing: number;
|
|
@@ -80,7 +58,11 @@ interface QueueStatus {
|
|
|
80
58
|
totalJobs: number;
|
|
81
59
|
}
|
|
82
60
|
/**
|
|
83
|
-
* Manages embedding jobs for semantic search
|
|
61
|
+
* Manages embedding jobs for semantic search.
|
|
62
|
+
*
|
|
63
|
+
* Persistence of the queue lives behind a `JobStore` — production wiring
|
|
64
|
+
* uses `Neo4jJobStore`, which stores jobs as `:EmbeddingJob` nodes and
|
|
65
|
+
* makes `claim()` safe under multi-worker contention.
|
|
84
66
|
*/
|
|
85
67
|
export declare class EmbeddingJobManager {
|
|
86
68
|
private storageProvider;
|
|
@@ -94,134 +76,47 @@ export declare class EmbeddingJobManager {
|
|
|
94
76
|
cache: LRUCache<string, CachedEmbedding>;
|
|
95
77
|
private cacheOptions;
|
|
96
78
|
private logger;
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
* @param rateLimiterOptions - Optional configuration for rate limiting
|
|
103
|
-
* @param cacheOptions - Optional configuration for caching
|
|
104
|
-
* @param logger - Optional logger for operation logging
|
|
105
|
-
*/
|
|
106
|
-
constructor(storageProvider: EmbeddingStorageProvider, embeddingService: EmbeddingService, rateLimiterOptions?: RateLimiterOptions | null, cacheOptions?: CacheOptions | null, logger?: Logger | null);
|
|
107
|
-
/**
|
|
108
|
-
* Initialize the database schema for embedding jobs
|
|
109
|
-
*
|
|
110
|
-
* @private
|
|
111
|
-
*/
|
|
112
|
-
private _initializeDatabase;
|
|
113
|
-
/**
|
|
114
|
-
* Schedule an entity for embedding generation
|
|
115
|
-
*
|
|
116
|
-
* @param entityName - Name of the entity to generate embedding for
|
|
117
|
-
* @param priority - Optional priority (higher priority jobs are processed first)
|
|
118
|
-
* @returns Job ID
|
|
119
|
-
*/
|
|
79
|
+
private jobStore;
|
|
80
|
+
private staleClaimMs;
|
|
81
|
+
/** Stable id for this process — visible in `:EmbeddingJob.claimedBy`. */
|
|
82
|
+
readonly workerId: string;
|
|
83
|
+
constructor(storageProvider: EmbeddingStorageProvider, embeddingService: EmbeddingService, rateLimiterOptions?: RateLimiterOptions | null, cacheOptions?: CacheOptions | null, logger?: Logger | null, jobStore?: JobStore, staleClaimMs?: number);
|
|
120
84
|
scheduleEntityEmbedding(entityName: string, priority?: number): Promise<string>;
|
|
121
|
-
/**
|
|
122
|
-
* Process a batch of pending embedding jobs
|
|
123
|
-
*
|
|
124
|
-
* @param batchSize - Maximum number of jobs to process
|
|
125
|
-
* @returns Result statistics
|
|
126
|
-
*/
|
|
127
85
|
processJobs(batchSize?: number): Promise<JobProcessResults>;
|
|
128
|
-
/**
|
|
129
|
-
* Get the current status of the job queue
|
|
130
|
-
*
|
|
131
|
-
* @returns Queue statistics
|
|
132
|
-
*/
|
|
133
86
|
getQueueStatus(): Promise<QueueStatus>;
|
|
134
|
-
/**
|
|
135
|
-
* Retry failed embedding jobs
|
|
136
|
-
*
|
|
137
|
-
* @returns Number of jobs reset for retry
|
|
138
|
-
*/
|
|
139
87
|
retryFailedJobs(): Promise<number>;
|
|
140
|
-
/**
|
|
141
|
-
* Clean up old completed jobs
|
|
142
|
-
*
|
|
143
|
-
* @param threshold - Age in milliseconds after which to delete completed jobs, defaults to 7 days
|
|
144
|
-
* @returns Number of jobs cleaned up
|
|
145
|
-
*/
|
|
146
88
|
cleanupJobs(threshold?: number): Promise<number>;
|
|
147
89
|
/**
|
|
148
|
-
*
|
|
149
|
-
*
|
|
150
|
-
* @private
|
|
151
|
-
* @param jobId - ID of the job to update
|
|
152
|
-
* @param status - New status
|
|
153
|
-
* @param attempts - Optional attempts count update
|
|
154
|
-
* @param error - Optional error message
|
|
155
|
-
* @returns Database result
|
|
156
|
-
*/
|
|
157
|
-
private _updateJobStatus;
|
|
158
|
-
/**
|
|
159
|
-
* Check rate limiter and consume a token if available
|
|
160
|
-
*
|
|
161
|
-
* @private
|
|
162
|
-
* @returns Object with success flag
|
|
90
|
+
* Token-bucket rate limiter. Public for legacy test compatibility — was
|
|
91
|
+
* `_checkRateLimiter` historically; kept callable from tests via underscore.
|
|
163
92
|
*/
|
|
164
93
|
_checkRateLimiter(): {
|
|
165
94
|
success: boolean;
|
|
166
95
|
};
|
|
167
|
-
/**
|
|
168
|
-
* Get the current status of the rate limiter
|
|
169
|
-
*
|
|
170
|
-
* @returns Rate limiter status information
|
|
171
|
-
*/
|
|
172
96
|
getRateLimiterStatus(): RateLimiterStatus;
|
|
173
|
-
/**
|
|
174
|
-
* Retrieve a cached embedding or generate a new one
|
|
175
|
-
*
|
|
176
|
-
* @param text - Text to generate embedding for
|
|
177
|
-
* @returns Embedding vector
|
|
178
|
-
*/
|
|
179
97
|
_getCachedEmbeddingOrGenerate(text: string): Promise<number[]>;
|
|
180
|
-
/**
|
|
181
|
-
* Store an embedding in the cache
|
|
182
|
-
*
|
|
183
|
-
* @private
|
|
184
|
-
* @param text - Original text
|
|
185
|
-
* @param embedding - Embedding vector
|
|
186
|
-
*/
|
|
187
98
|
private _cacheEmbedding;
|
|
188
|
-
/**
|
|
189
|
-
* Generate a deterministic cache key for text
|
|
190
|
-
*
|
|
191
|
-
* @private
|
|
192
|
-
* @param text - Text to hash
|
|
193
|
-
* @returns Cache key
|
|
194
|
-
*/
|
|
195
99
|
_generateCacheKey(text: string): string;
|
|
196
|
-
/**
|
|
197
|
-
* Prepare text for embedding from an entity
|
|
198
|
-
*
|
|
199
|
-
* @private
|
|
200
|
-
* @param entity - Entity to prepare text from
|
|
201
|
-
* @returns Processed text ready for embedding
|
|
202
|
-
*/
|
|
203
100
|
private _prepareEntityText;
|
|
204
|
-
/**
|
|
205
|
-
* Get a cached embedding entry (used for testing)
|
|
206
|
-
*
|
|
207
|
-
* @param key - Cache key
|
|
208
|
-
* @returns Cached embedding or undefined
|
|
209
|
-
*/
|
|
210
101
|
getCacheEntry(key: string): CachedEmbedding | undefined;
|
|
211
102
|
/**
|
|
212
|
-
*
|
|
213
|
-
*
|
|
103
|
+
* Enqueue embedding jobs for every currently-valid entity that lacks an
|
|
104
|
+
* embedding. Intended for a server-side cron tick to backfill entities
|
|
105
|
+
* created by thin clients running with `WRITE_EMBEDDINGS_LOCALLY=false`.
|
|
214
106
|
*
|
|
215
|
-
*
|
|
107
|
+
* v2.4.1+ — prefers `storageProvider.getEntityNamesMissingEmbeddings()`
|
|
108
|
+
* when available (a single Cypher predicate, no client-side filtering).
|
|
109
|
+
* Falls back to the legacy `loadGraph`-and-filter path for compatibility,
|
|
110
|
+
* but that path is buggy with the current `nodeToEntity` mapper which
|
|
111
|
+
* strips the embedding property — so it always reports 100% missing.
|
|
216
112
|
*/
|
|
217
113
|
scheduleIncrementalRegeneration(): Promise<number>;
|
|
218
114
|
/**
|
|
219
|
-
*
|
|
220
|
-
*
|
|
221
|
-
*
|
|
222
|
-
* @private
|
|
223
|
-
* @returns Array of all entities
|
|
115
|
+
* Resolve the list of entity names lacking embeddings. Prefer the storage
|
|
116
|
+
* provider's dedicated Cypher predicate; otherwise fall back to walking
|
|
117
|
+
* `loadGraph()` (legacy, suboptimal — see the v2.4.1 notes above).
|
|
224
118
|
*/
|
|
119
|
+
private _getEntityNamesMissingEmbeddings;
|
|
225
120
|
private _getAllEntitiesFromStorage;
|
|
226
121
|
}
|
|
227
122
|
export {};
|