@levalicious/server-memory 0.0.10 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +41 -27
- package/dist/index.js +4 -0
- package/dist/scripts/migrate-jsonl.js +169 -0
- package/dist/scripts/verify-migration.js +39 -0
- package/dist/server.js +763 -536
- package/dist/src/graphfile.js +560 -0
- package/dist/src/memoryfile.js +121 -0
- package/dist/src/pagerank.js +78 -0
- package/dist/src/stringtable.js +373 -0
- package/dist/tests/concurrency.test.js +189 -0
- package/dist/tests/memory-server.test.js +225 -53
- package/package.json +6 -4
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PageRank sampling — structural rank via random walks on graph topology.
|
|
3
|
+
*
|
|
4
|
+
* Implements MC complete-path stopping at dangling nodes (Algorithm 4 from
|
|
5
|
+
* Avrachenkov et al. "Monte Carlo methods in PageRank computation").
|
|
6
|
+
*
|
|
7
|
+
* One "iteration" = one random walk starting from each node in the graph.
|
|
8
|
+
* Each walk follows forward edges with probability c (damping factor),
|
|
9
|
+
* terminates with probability (1-c) at each step, and stops immediately
|
|
10
|
+
* at dangling nodes (nodes with no forward edges).
|
|
11
|
+
*
|
|
12
|
+
* Every node visited along the walk gets its structuralVisits incremented.
|
|
13
|
+
* The global structuralTotal tracks the sum across all visits.
|
|
14
|
+
* PageRank(j) = structuralVisits(j) / structuralTotal.
|
|
15
|
+
*
|
|
16
|
+
* For 14K nodes with c=0.85, one iteration produces ~93K visits total
|
|
17
|
+
* (14K starts × ~6.67 avg walk length) and gives <7% error for top pages.
|
|
18
|
+
*/
|
|
19
|
+
import { DIR_FORWARD } from './graphfile.js';
|
|
20
|
+
const DEFAULT_DAMPING = 0.85;
|
|
21
|
+
/**
|
|
22
|
+
* Run one full iteration of structural PageRank sampling.
|
|
23
|
+
* Starts one random walk from every node in the graph.
|
|
24
|
+
*
|
|
25
|
+
* @param gf GraphFile to sample on
|
|
26
|
+
* @param damping Probability of following a link (vs terminating). Default 0.85.
|
|
27
|
+
* @returns Total number of visits recorded in this iteration.
|
|
28
|
+
*/
|
|
29
|
+
export function structuralIteration(gf, damping = DEFAULT_DAMPING) {
|
|
30
|
+
const offsets = gf.getAllEntityOffsets();
|
|
31
|
+
let totalVisits = 0;
|
|
32
|
+
for (const startOffset of offsets) {
|
|
33
|
+
totalVisits += structuralWalk(gf, startOffset, damping);
|
|
34
|
+
}
|
|
35
|
+
return totalVisits;
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Run a single structural random walk starting from `startOffset`.
|
|
39
|
+
* Follows forward edges, counting every visited node (complete path).
|
|
40
|
+
* Stops at dangling nodes or with probability (1-damping) at each step.
|
|
41
|
+
*
|
|
42
|
+
* @returns Number of visits recorded in this walk.
|
|
43
|
+
*/
|
|
44
|
+
export function structuralWalk(gf, startOffset, damping = DEFAULT_DAMPING) {
|
|
45
|
+
let current = startOffset;
|
|
46
|
+
let visits = 0;
|
|
47
|
+
while (true) {
|
|
48
|
+
// Visit current node
|
|
49
|
+
gf.incrementStructuralVisit(current);
|
|
50
|
+
visits++;
|
|
51
|
+
// Get forward edges only
|
|
52
|
+
const edges = gf.getEdges(current);
|
|
53
|
+
const forward = edges.filter(e => e.direction === DIR_FORWARD);
|
|
54
|
+
// Dangling node — stop (Algorithm 4)
|
|
55
|
+
if (forward.length === 0)
|
|
56
|
+
break;
|
|
57
|
+
// Terminate with probability (1 - damping)
|
|
58
|
+
if (Math.random() >= damping)
|
|
59
|
+
break;
|
|
60
|
+
// Follow a random forward edge
|
|
61
|
+
const idx = Math.floor(Math.random() * forward.length);
|
|
62
|
+
current = forward[idx].targetOffset;
|
|
63
|
+
}
|
|
64
|
+
return visits;
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Run N iterations of structural sampling.
|
|
68
|
+
* Each iteration = one walk from every node.
|
|
69
|
+
*
|
|
70
|
+
* @returns Total visits across all iterations.
|
|
71
|
+
*/
|
|
72
|
+
export function structuralSample(gf, iterations = 1, damping = DEFAULT_DAMPING) {
|
|
73
|
+
let total = 0;
|
|
74
|
+
for (let i = 0; i < iterations; i++) {
|
|
75
|
+
total += structuralIteration(gf, damping);
|
|
76
|
+
}
|
|
77
|
+
return total;
|
|
78
|
+
}
|
|
@@ -0,0 +1,373 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* StringTable - Interned, refcounted string storage backed by a MemoryFile.
|
|
3
|
+
*
|
|
4
|
+
* Layout of each string entry (allocated via memfile_alloc):
|
|
5
|
+
* [u32 refcount] [u32 hash] [u16 len] [u8 data[len]]
|
|
6
|
+
*
|
|
7
|
+
* String ID = the memfile offset to the entry (the offset returned by alloc,
|
|
8
|
+
* pointing past the memfile_alloc_t header, directly at refcount).
|
|
9
|
+
*
|
|
10
|
+
* The hash index is a separate allocated block:
|
|
11
|
+
* [u32 bucket_count] [u32 _pad] [u64 offsets[bucket_count]]
|
|
12
|
+
* Each bucket is the offset of the first entry in that bucket (0 = empty).
|
|
13
|
+
* Collisions are resolved by linear probing.
|
|
14
|
+
*
|
|
15
|
+
* File header (after memfile header):
|
|
16
|
+
* offset 32: [u64 hash_index_offset]
|
|
17
|
+
* offset 40: [u32 entry_count] total live strings
|
|
18
|
+
* offset 44: [u32 _pad]
|
|
19
|
+
*/
|
|
20
|
+
import { MemoryFile } from './memoryfile.js';
|
|
21
|
+
// We store our own header in an allocated block so it doesn't collide with
|
|
22
|
+
// the memfile header region. The offset of this header block is stored at
|
|
23
|
+
// a fixed position right after the memfile header.
|
|
24
|
+
//
|
|
25
|
+
// Our header block layout:
|
|
26
|
+
// [u64 hash_index_offset] [u32 entry_count] [u32 _pad]
|
|
27
|
+
//
|
|
28
|
+
// The pointer TO our header block is at memfile offset 32 (right after the
|
|
29
|
+
// 32-byte memfile header). We write it there during init.
|
|
30
|
+
//
|
|
31
|
+
// BUT: offsets 32+ belong to the allocator. So we allocate a 16-byte block
|
|
32
|
+
// for our header, and store its offset as the very first allocation.
|
|
33
|
+
// Position in file where we store the offset to our header block.
|
|
34
|
+
// This is a "well-known" location. We use the first allocation's offset.
|
|
35
|
+
const OUR_HEADER_SIZE = 16; // u64 hash_index_offset + u32 entry_count + u32 pad
|
|
36
|
+
// Offsets within our header block
|
|
37
|
+
const HDR_HASH_INDEX_OFFSET = 0; // u64
|
|
38
|
+
const HDR_ENTRY_COUNT = 8; // u32
|
|
39
|
+
// String entry field offsets (relative to entry start)
|
|
40
|
+
const ENT_REFCOUNT = 0; // u32
|
|
41
|
+
const ENT_HASH = 4; // u32
|
|
42
|
+
const ENT_LEN = 8; // u16
|
|
43
|
+
const ENT_DATA = 10; // u8[len]
|
|
44
|
+
const ENT_HEADER_SIZE = 10;
|
|
45
|
+
// Hash index field offsets (relative to index block start)
|
|
46
|
+
const IDX_BUCKET_COUNT = 0; // u32
|
|
47
|
+
const IDX_BUCKETS = 8; // u64[bucket_count]
|
|
48
|
+
const INITIAL_BUCKETS = 4096;
|
|
49
|
+
const LOAD_FACTOR_THRESHOLD = 0.7;
|
|
50
|
+
// FNV-1a 32-bit hash
|
|
51
|
+
function fnv1a(data) {
|
|
52
|
+
let hash = 0x811c9dc5;
|
|
53
|
+
for (let i = 0; i < data.length; i++) {
|
|
54
|
+
hash ^= data[i];
|
|
55
|
+
hash = Math.imul(hash, 0x01000193);
|
|
56
|
+
}
|
|
57
|
+
return hash >>> 0; // unsigned
|
|
58
|
+
}
|
|
59
|
+
export class StringTable {
|
|
60
|
+
mf;
|
|
61
|
+
headerOffset; // offset to our header block
|
|
62
|
+
constructor(path, initialSize = 65536) {
|
|
63
|
+
this.mf = new MemoryFile(path, initialSize);
|
|
64
|
+
const stats = this.mf.stats();
|
|
65
|
+
// A fresh memfile has allocated = 32 (just the memfile header).
|
|
66
|
+
// If anything has been allocated, the file was previously initialized.
|
|
67
|
+
if (stats.allocated <= 32n) {
|
|
68
|
+
this.headerOffset = this.initHeader();
|
|
69
|
+
}
|
|
70
|
+
else {
|
|
71
|
+
// The first allocation in the file is always our header block.
|
|
72
|
+
// It's at the first allocatable position after the memfile header.
|
|
73
|
+
// memfile_alloc returns the offset past the alloc_t header (8 bytes),
|
|
74
|
+
// so the first allocation is at offset 32 (memfile header) + 8 (alloc_t) = 40.
|
|
75
|
+
this.headerOffset = 40n;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
initHeader() {
|
|
79
|
+
// Allocate our header block (first alloc in the file)
|
|
80
|
+
const hdrOffset = this.mf.alloc(BigInt(OUR_HEADER_SIZE));
|
|
81
|
+
if (hdrOffset === 0n)
|
|
82
|
+
throw new Error('StringTable: failed to allocate header');
|
|
83
|
+
// Allocate hash index block
|
|
84
|
+
const bucketBytes = INITIAL_BUCKETS * 8;
|
|
85
|
+
const indexSize = 4 + 4 + bucketBytes; // bucket_count + pad + buckets
|
|
86
|
+
const indexOffset = this.mf.alloc(BigInt(indexSize));
|
|
87
|
+
if (indexOffset === 0n)
|
|
88
|
+
throw new Error('StringTable: failed to allocate hash index');
|
|
89
|
+
// Write bucket_count to index block
|
|
90
|
+
const idxHeader = Buffer.alloc(8);
|
|
91
|
+
idxHeader.writeUInt32LE(INITIAL_BUCKETS, 0);
|
|
92
|
+
idxHeader.writeUInt32LE(0, 4); // pad
|
|
93
|
+
this.mf.write(indexOffset, idxHeader);
|
|
94
|
+
// Zero all buckets
|
|
95
|
+
const zeroBuckets = Buffer.alloc(bucketBytes);
|
|
96
|
+
this.mf.write(indexOffset + 8n, zeroBuckets);
|
|
97
|
+
// Write our header: hash_index_offset and entry_count
|
|
98
|
+
const hdr = Buffer.alloc(OUR_HEADER_SIZE);
|
|
99
|
+
hdr.writeBigUInt64LE(indexOffset, HDR_HASH_INDEX_OFFSET);
|
|
100
|
+
hdr.writeUInt32LE(0, HDR_ENTRY_COUNT); // entry_count = 0
|
|
101
|
+
hdr.writeUInt32LE(0, 12); // pad
|
|
102
|
+
this.mf.write(hdrOffset, hdr);
|
|
103
|
+
return hdrOffset;
|
|
104
|
+
}
|
|
105
|
+
// --- Header access ---
|
|
106
|
+
getHashIndexOffset() {
|
|
107
|
+
const buf = this.mf.read(this.headerOffset + BigInt(HDR_HASH_INDEX_OFFSET), 8n);
|
|
108
|
+
return buf.readBigUInt64LE(0);
|
|
109
|
+
}
|
|
110
|
+
getEntryCount() {
|
|
111
|
+
const buf = this.mf.read(this.headerOffset + BigInt(HDR_ENTRY_COUNT), 4n);
|
|
112
|
+
return buf.readUInt32LE(0);
|
|
113
|
+
}
|
|
114
|
+
setEntryCount(count) {
|
|
115
|
+
const buf = Buffer.alloc(4);
|
|
116
|
+
buf.writeUInt32LE(count, 0);
|
|
117
|
+
this.mf.write(this.headerOffset + BigInt(HDR_ENTRY_COUNT), buf);
|
|
118
|
+
}
|
|
119
|
+
// --- Hash index access ---
|
|
120
|
+
getBucketCount() {
|
|
121
|
+
const indexOffset = this.getHashIndexOffset();
|
|
122
|
+
const buf = this.mf.read(indexOffset, 4n);
|
|
123
|
+
return buf.readUInt32LE(0);
|
|
124
|
+
}
|
|
125
|
+
getBucket(index) {
|
|
126
|
+
const indexOffset = this.getHashIndexOffset();
|
|
127
|
+
const pos = indexOffset + BigInt(IDX_BUCKETS + index * 8);
|
|
128
|
+
const buf = this.mf.read(pos, 8n);
|
|
129
|
+
return buf.readBigUInt64LE(0);
|
|
130
|
+
}
|
|
131
|
+
setBucket(index, offset) {
|
|
132
|
+
const indexOffset = this.getHashIndexOffset();
|
|
133
|
+
const pos = indexOffset + BigInt(IDX_BUCKETS + index * 8);
|
|
134
|
+
const buf = Buffer.alloc(8);
|
|
135
|
+
buf.writeBigUInt64LE(offset, 0);
|
|
136
|
+
this.mf.write(pos, buf);
|
|
137
|
+
}
|
|
138
|
+
// --- String entry access ---
|
|
139
|
+
readEntry(offset) {
|
|
140
|
+
const header = this.mf.read(offset, BigInt(ENT_HEADER_SIZE));
|
|
141
|
+
const refcount = header.readUInt32LE(ENT_REFCOUNT);
|
|
142
|
+
const hash = header.readUInt32LE(ENT_HASH);
|
|
143
|
+
const len = header.readUInt16LE(ENT_LEN);
|
|
144
|
+
const data = len > 0 ? this.mf.read(offset + BigInt(ENT_DATA), BigInt(len)) : Buffer.alloc(0);
|
|
145
|
+
return { refcount, hash, len, data };
|
|
146
|
+
}
|
|
147
|
+
writeRefcount(offset, refcount) {
|
|
148
|
+
const buf = Buffer.alloc(4);
|
|
149
|
+
buf.writeUInt32LE(refcount, 0);
|
|
150
|
+
this.mf.write(offset, buf);
|
|
151
|
+
}
|
|
152
|
+
// --- Public API ---
|
|
153
|
+
/**
|
|
154
|
+
* Intern a string. Returns its ID (offset).
|
|
155
|
+
* If the string already exists, bumps refcount and returns existing ID.
|
|
156
|
+
* If new, allocates an entry and inserts into the hash index.
|
|
157
|
+
*/
|
|
158
|
+
intern(str) {
|
|
159
|
+
const data = Buffer.from(str, 'utf-8');
|
|
160
|
+
const hash = fnv1a(data);
|
|
161
|
+
const bucketCount = this.getBucketCount();
|
|
162
|
+
let bucket = hash % bucketCount;
|
|
163
|
+
// Linear probe to find existing or empty slot
|
|
164
|
+
for (let i = 0; i < bucketCount; i++) {
|
|
165
|
+
const slotIdx = (bucket + i) % bucketCount;
|
|
166
|
+
const entryOffset = this.getBucket(slotIdx);
|
|
167
|
+
if (entryOffset === 0n) {
|
|
168
|
+
// Empty slot — string not found, allocate new entry
|
|
169
|
+
const entrySize = ENT_HEADER_SIZE + data.length;
|
|
170
|
+
const newOffset = this.mf.alloc(BigInt(entrySize));
|
|
171
|
+
if (newOffset === 0n)
|
|
172
|
+
throw new Error('StringTable: alloc failed');
|
|
173
|
+
// Write entry: refcount=1, hash, len, data
|
|
174
|
+
const entryBuf = Buffer.alloc(ENT_HEADER_SIZE);
|
|
175
|
+
entryBuf.writeUInt32LE(1, ENT_REFCOUNT);
|
|
176
|
+
entryBuf.writeUInt32LE(hash, ENT_HASH);
|
|
177
|
+
entryBuf.writeUInt16LE(data.length, ENT_LEN);
|
|
178
|
+
this.mf.write(newOffset, entryBuf);
|
|
179
|
+
if (data.length > 0) {
|
|
180
|
+
this.mf.write(newOffset + BigInt(ENT_DATA), data);
|
|
181
|
+
}
|
|
182
|
+
// Insert into hash index
|
|
183
|
+
this.setBucket(slotIdx, newOffset);
|
|
184
|
+
const count = this.getEntryCount() + 1;
|
|
185
|
+
this.setEntryCount(count);
|
|
186
|
+
// Check load factor and rehash if needed
|
|
187
|
+
if (count > bucketCount * LOAD_FACTOR_THRESHOLD) {
|
|
188
|
+
this.rehash(bucketCount * 2);
|
|
189
|
+
}
|
|
190
|
+
return newOffset;
|
|
191
|
+
}
|
|
192
|
+
// Slot occupied — check if it matches
|
|
193
|
+
const entry = this.readEntry(entryOffset);
|
|
194
|
+
if (entry.hash === hash && entry.len === data.length && entry.data.equals(data)) {
|
|
195
|
+
// Found — bump refcount
|
|
196
|
+
this.writeRefcount(entryOffset, entry.refcount + 1);
|
|
197
|
+
return entryOffset;
|
|
198
|
+
}
|
|
199
|
+
// Collision — continue probing
|
|
200
|
+
}
|
|
201
|
+
throw new Error('StringTable: hash index full (should not happen with rehashing)');
|
|
202
|
+
}
|
|
203
|
+
/**
|
|
204
|
+
* Get the string for an ID. Returns the UTF-8 string.
|
|
205
|
+
*/
|
|
206
|
+
get(id) {
|
|
207
|
+
const entry = this.readEntry(id);
|
|
208
|
+
return entry.data.toString('utf-8');
|
|
209
|
+
}
|
|
210
|
+
/**
|
|
211
|
+
* Look up a string without interning or bumping refcount.
|
|
212
|
+
* Returns the ID (offset) if found, or null if not present.
|
|
213
|
+
*/
|
|
214
|
+
find(str) {
|
|
215
|
+
const data = Buffer.from(str, 'utf-8');
|
|
216
|
+
const hash = fnv1a(data);
|
|
217
|
+
const bucketCount = this.getBucketCount();
|
|
218
|
+
let bucket = hash % bucketCount;
|
|
219
|
+
for (let i = 0; i < bucketCount; i++) {
|
|
220
|
+
const slotIdx = (bucket + i) % bucketCount;
|
|
221
|
+
const entryOffset = this.getBucket(slotIdx);
|
|
222
|
+
if (entryOffset === 0n)
|
|
223
|
+
return null; // Empty slot — not found
|
|
224
|
+
const entry = this.readEntry(entryOffset);
|
|
225
|
+
if (entry.hash === hash && entry.len === data.length && entry.data.equals(data)) {
|
|
226
|
+
return entryOffset;
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
return null;
|
|
230
|
+
}
|
|
231
|
+
/**
|
|
232
|
+
* Decrement refcount. If it reaches 0, free the entry and remove from hash index.
|
|
233
|
+
*/
|
|
234
|
+
release(id) {
|
|
235
|
+
const entry = this.readEntry(id);
|
|
236
|
+
if (entry.refcount <= 1) {
|
|
237
|
+
// Remove from hash index
|
|
238
|
+
this.removeFromIndex(id, entry.hash);
|
|
239
|
+
// Free the allocation
|
|
240
|
+
this.mf.free(id);
|
|
241
|
+
this.setEntryCount(this.getEntryCount() - 1);
|
|
242
|
+
}
|
|
243
|
+
else {
|
|
244
|
+
this.writeRefcount(id, entry.refcount - 1);
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
/**
|
|
248
|
+
* Bump refcount without interning (for when you already have the ID).
|
|
249
|
+
*/
|
|
250
|
+
addRef(id) {
|
|
251
|
+
const entry = this.readEntry(id);
|
|
252
|
+
this.writeRefcount(id, entry.refcount + 1);
|
|
253
|
+
}
|
|
254
|
+
/**
|
|
255
|
+
* Get current refcount for an entry.
|
|
256
|
+
*/
|
|
257
|
+
refcount(id) {
|
|
258
|
+
const buf = this.mf.read(id, 4n);
|
|
259
|
+
return buf.readUInt32LE(0);
|
|
260
|
+
}
|
|
261
|
+
/**
|
|
262
|
+
* Number of live strings in the table.
|
|
263
|
+
*/
|
|
264
|
+
get count() {
|
|
265
|
+
return this.getEntryCount();
|
|
266
|
+
}
|
|
267
|
+
// --- Hash index management ---
|
|
268
|
+
removeFromIndex(offset, hash) {
|
|
269
|
+
const bucketCount = this.getBucketCount();
|
|
270
|
+
let bucket = hash % bucketCount;
|
|
271
|
+
// Find the entry in the index
|
|
272
|
+
for (let i = 0; i < bucketCount; i++) {
|
|
273
|
+
const slotIdx = (bucket + i) % bucketCount;
|
|
274
|
+
const entryOffset = this.getBucket(slotIdx);
|
|
275
|
+
if (entryOffset === 0n)
|
|
276
|
+
return; // Not found (shouldn't happen)
|
|
277
|
+
if (entryOffset === offset) {
|
|
278
|
+
// Found — remove and fix up the linear probe chain
|
|
279
|
+
this.setBucket(slotIdx, 0n);
|
|
280
|
+
this.fixupAfterRemoval(slotIdx, bucketCount);
|
|
281
|
+
return;
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
/**
|
|
286
|
+
* After removing an entry at slotIdx, re-insert any entries that were
|
|
287
|
+
* displaced past the removed slot by linear probing.
|
|
288
|
+
*/
|
|
289
|
+
fixupAfterRemoval(removedSlot, bucketCount) {
|
|
290
|
+
let slot = (removedSlot + 1) % bucketCount;
|
|
291
|
+
while (true) {
|
|
292
|
+
const entryOffset = this.getBucket(slot);
|
|
293
|
+
if (entryOffset === 0n)
|
|
294
|
+
break; // End of cluster
|
|
295
|
+
// Read this entry's natural bucket
|
|
296
|
+
const entry = this.readEntry(entryOffset);
|
|
297
|
+
const naturalBucket = entry.hash % bucketCount;
|
|
298
|
+
// Check if this entry needs to move (it was displaced past removedSlot)
|
|
299
|
+
if (this.needsRelocation(naturalBucket, removedSlot, slot, bucketCount)) {
|
|
300
|
+
this.setBucket(removedSlot, entryOffset);
|
|
301
|
+
this.setBucket(slot, 0n);
|
|
302
|
+
// Continue fixup from the newly emptied slot
|
|
303
|
+
removedSlot = slot;
|
|
304
|
+
}
|
|
305
|
+
slot = (slot + 1) % bucketCount;
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
needsRelocation(natural, empty, current, size) {
|
|
309
|
+
// Is 'empty' between 'natural' and 'current' in the circular probe sequence?
|
|
310
|
+
if (natural <= current) {
|
|
311
|
+
return natural <= empty && empty < current;
|
|
312
|
+
}
|
|
313
|
+
else {
|
|
314
|
+
// Wraps around
|
|
315
|
+
return natural <= empty || empty < current;
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
rehash(newBucketCount) {
|
|
319
|
+
const oldIndexOffset = this.getHashIndexOffset();
|
|
320
|
+
const oldBucketCount = this.getBucketCount();
|
|
321
|
+
// Allocate new index
|
|
322
|
+
const newIndexSize = 4 + 4 + newBucketCount * 8;
|
|
323
|
+
const newIndexOffset = this.mf.alloc(BigInt(newIndexSize));
|
|
324
|
+
if (newIndexOffset === 0n)
|
|
325
|
+
throw new Error('StringTable: rehash alloc failed');
|
|
326
|
+
// Write new bucket count
|
|
327
|
+
const header = Buffer.alloc(8);
|
|
328
|
+
header.writeUInt32LE(newBucketCount, 0);
|
|
329
|
+
this.mf.write(newIndexOffset, header);
|
|
330
|
+
// Zero new buckets
|
|
331
|
+
const zeroBuckets = Buffer.alloc(newBucketCount * 8);
|
|
332
|
+
this.mf.write(newIndexOffset + 8n, zeroBuckets);
|
|
333
|
+
// Update header to point to new index
|
|
334
|
+
const hdr = Buffer.alloc(8);
|
|
335
|
+
hdr.writeBigUInt64LE(newIndexOffset, 0);
|
|
336
|
+
this.mf.write(this.headerOffset + BigInt(HDR_HASH_INDEX_OFFSET), hdr);
|
|
337
|
+
// Re-insert all entries from old index
|
|
338
|
+
for (let i = 0; i < oldBucketCount; i++) {
|
|
339
|
+
const pos = oldIndexOffset + BigInt(IDX_BUCKETS + i * 8);
|
|
340
|
+
const buf = this.mf.read(pos, 8n);
|
|
341
|
+
const entryOffset = buf.readBigUInt64LE(0);
|
|
342
|
+
if (entryOffset === 0n)
|
|
343
|
+
continue;
|
|
344
|
+
// Read hash and insert into new index
|
|
345
|
+
const entry = this.readEntry(entryOffset);
|
|
346
|
+
let bucket = entry.hash % newBucketCount;
|
|
347
|
+
for (let j = 0; j < newBucketCount; j++) {
|
|
348
|
+
const slotIdx = (bucket + j) % newBucketCount;
|
|
349
|
+
const slotPos = newIndexOffset + BigInt(IDX_BUCKETS + slotIdx * 8);
|
|
350
|
+
const slotBuf = this.mf.read(slotPos, 8n);
|
|
351
|
+
if (slotBuf.readBigUInt64LE(0) === 0n) {
|
|
352
|
+
const writeBuf = Buffer.alloc(8);
|
|
353
|
+
writeBuf.writeBigUInt64LE(entryOffset, 0);
|
|
354
|
+
this.mf.write(slotPos, writeBuf);
|
|
355
|
+
break;
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
// Free old index block
|
|
360
|
+
this.mf.free(oldIndexOffset);
|
|
361
|
+
}
|
|
362
|
+
// --- Lifecycle ---
|
|
363
|
+
sync() {
|
|
364
|
+
this.mf.sync();
|
|
365
|
+
}
|
|
366
|
+
/** Refresh the mmap if the file was grown by another process. */
|
|
367
|
+
refresh() {
|
|
368
|
+
this.mf.refresh();
|
|
369
|
+
}
|
|
370
|
+
close() {
|
|
371
|
+
this.mf.close();
|
|
372
|
+
}
|
|
373
|
+
}
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Concurrency tests: two MCP server instances sharing the same binary files.
|
|
3
|
+
*
|
|
4
|
+
* Verifies that flock-based locking + mmap refresh works correctly when
|
|
5
|
+
* one instance writes and the other reads.
|
|
6
|
+
*/
|
|
7
|
+
import { createServer } from "../server.js";
|
|
8
|
+
import { createTestClient, callTool } from "./test-utils.js";
|
|
9
|
+
import * as fs from "fs";
|
|
10
|
+
import * as path from "path";
|
|
11
|
+
import * as os from "os";
|
|
12
|
+
describe("Concurrency - dual server instances", () => {
|
|
13
|
+
let tmpDir;
|
|
14
|
+
let memoryFilePath;
|
|
15
|
+
let clientA;
|
|
16
|
+
let clientB;
|
|
17
|
+
let cleanupA;
|
|
18
|
+
let cleanupB;
|
|
19
|
+
beforeEach(async () => {
|
|
20
|
+
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "memory-concurrency-"));
|
|
21
|
+
memoryFilePath = path.join(tmpDir, "memory.json");
|
|
22
|
+
// Create two MCP server instances sharing the same files
|
|
23
|
+
const serverA = createServer(memoryFilePath);
|
|
24
|
+
const serverB = createServer(memoryFilePath);
|
|
25
|
+
const setupA = await createTestClient(serverA);
|
|
26
|
+
const setupB = await createTestClient(serverB);
|
|
27
|
+
clientA = setupA.client;
|
|
28
|
+
clientB = setupB.client;
|
|
29
|
+
cleanupA = setupA.cleanup;
|
|
30
|
+
cleanupB = setupB.cleanup;
|
|
31
|
+
});
|
|
32
|
+
afterEach(async () => {
|
|
33
|
+
await cleanupA();
|
|
34
|
+
await cleanupB();
|
|
35
|
+
// Clean up temp files
|
|
36
|
+
const files = fs.readdirSync(tmpDir);
|
|
37
|
+
for (const f of files) {
|
|
38
|
+
fs.unlinkSync(path.join(tmpDir, f));
|
|
39
|
+
}
|
|
40
|
+
fs.rmdirSync(tmpDir);
|
|
41
|
+
});
|
|
42
|
+
it("instance B sees entities created by instance A", async () => {
|
|
43
|
+
// A creates entities
|
|
44
|
+
await callTool(clientA, "create_entities", {
|
|
45
|
+
entities: [
|
|
46
|
+
{ name: "Alpha", entityType: "Letter", observations: ["First letter"] },
|
|
47
|
+
{ name: "Beta", entityType: "Letter", observations: ["Second letter"] },
|
|
48
|
+
],
|
|
49
|
+
});
|
|
50
|
+
// B should see them
|
|
51
|
+
const result = (await callTool(clientB, "search_nodes", {
|
|
52
|
+
query: "Letter",
|
|
53
|
+
sortBy: "name",
|
|
54
|
+
}));
|
|
55
|
+
const names = result.entities.items.map((e) => e.name);
|
|
56
|
+
expect(names).toEqual(["Alpha", "Beta"]);
|
|
57
|
+
});
|
|
58
|
+
it("instance B sees relations created by instance A", async () => {
|
|
59
|
+
// A creates entities
|
|
60
|
+
await callTool(clientA, "create_entities", {
|
|
61
|
+
entities: [
|
|
62
|
+
{ name: "Node1", entityType: "Test", observations: [] },
|
|
63
|
+
{ name: "Node2", entityType: "Test", observations: [] },
|
|
64
|
+
],
|
|
65
|
+
});
|
|
66
|
+
// A creates relation
|
|
67
|
+
await callTool(clientA, "create_relations", {
|
|
68
|
+
relations: [{ from: "Node1", to: "Node2", relationType: "links_to" }],
|
|
69
|
+
});
|
|
70
|
+
// B reads neighbors
|
|
71
|
+
const result = (await callTool(clientB, "get_neighbors", {
|
|
72
|
+
entityName: "Node1",
|
|
73
|
+
sortBy: "name",
|
|
74
|
+
}));
|
|
75
|
+
expect(result.items).toHaveLength(1);
|
|
76
|
+
expect(result.items[0].name).toBe("Node2");
|
|
77
|
+
});
|
|
78
|
+
it("instance A sees entities created by instance B", async () => {
|
|
79
|
+
// B creates entities
|
|
80
|
+
await callTool(clientB, "create_entities", {
|
|
81
|
+
entities: [
|
|
82
|
+
{ name: "Gamma", entityType: "Letter", observations: ["Third letter"] },
|
|
83
|
+
],
|
|
84
|
+
});
|
|
85
|
+
// A should see them
|
|
86
|
+
const result = (await callTool(clientA, "open_nodes", {
|
|
87
|
+
names: ["Gamma"],
|
|
88
|
+
}));
|
|
89
|
+
expect(result.entities.items).toHaveLength(1);
|
|
90
|
+
expect(result.entities.items[0].name).toBe("Gamma");
|
|
91
|
+
expect(result.entities.items[0].observations).toEqual(["Third letter"]);
|
|
92
|
+
});
|
|
93
|
+
it("interleaved writes are visible to both instances", async () => {
|
|
94
|
+
// A creates first entity
|
|
95
|
+
await callTool(clientA, "create_entities", {
|
|
96
|
+
entities: [{ name: "E1", entityType: "Interleaved", observations: [] }],
|
|
97
|
+
});
|
|
98
|
+
// B creates second entity
|
|
99
|
+
await callTool(clientB, "create_entities", {
|
|
100
|
+
entities: [{ name: "E2", entityType: "Interleaved", observations: [] }],
|
|
101
|
+
});
|
|
102
|
+
// A creates third entity
|
|
103
|
+
await callTool(clientA, "create_entities", {
|
|
104
|
+
entities: [{ name: "E3", entityType: "Interleaved", observations: [] }],
|
|
105
|
+
});
|
|
106
|
+
// Both should see all 3
|
|
107
|
+
const resultA = (await callTool(clientA, "search_nodes", {
|
|
108
|
+
query: "Interleaved",
|
|
109
|
+
sortBy: "name",
|
|
110
|
+
}));
|
|
111
|
+
const resultB = (await callTool(clientB, "search_nodes", {
|
|
112
|
+
query: "Interleaved",
|
|
113
|
+
sortBy: "name",
|
|
114
|
+
}));
|
|
115
|
+
const namesA = resultA.entities.items.map((e) => e.name);
|
|
116
|
+
const namesB = resultB.entities.items.map((e) => e.name);
|
|
117
|
+
expect(namesA).toEqual(["E1", "E2", "E3"]);
|
|
118
|
+
expect(namesB).toEqual(["E1", "E2", "E3"]);
|
|
119
|
+
});
|
|
120
|
+
it("deletions by A are visible to B", async () => {
|
|
121
|
+
// A creates entities
|
|
122
|
+
await callTool(clientA, "create_entities", {
|
|
123
|
+
entities: [
|
|
124
|
+
{ name: "Keep", entityType: "Test", observations: [] },
|
|
125
|
+
{ name: "Remove", entityType: "Test", observations: [] },
|
|
126
|
+
],
|
|
127
|
+
});
|
|
128
|
+
// A deletes one
|
|
129
|
+
await callTool(clientA, "delete_entities", {
|
|
130
|
+
entityNames: ["Remove"],
|
|
131
|
+
});
|
|
132
|
+
// B should only see the remaining one
|
|
133
|
+
const result = (await callTool(clientB, "search_nodes", {
|
|
134
|
+
query: "Test",
|
|
135
|
+
sortBy: "name",
|
|
136
|
+
}));
|
|
137
|
+
const names = result.entities.items.map((e) => e.name);
|
|
138
|
+
expect(names).toEqual(["Keep"]);
|
|
139
|
+
});
|
|
140
|
+
it("observation changes by A are visible to B", async () => {
|
|
141
|
+
// A creates entity
|
|
142
|
+
await callTool(clientA, "create_entities", {
|
|
143
|
+
entities: [{ name: "Observable", entityType: "Test", observations: ["Initial"] }],
|
|
144
|
+
});
|
|
145
|
+
// A adds observation
|
|
146
|
+
await callTool(clientA, "add_observations", {
|
|
147
|
+
observations: [{ entityName: "Observable", contents: ["Added by A"] }],
|
|
148
|
+
});
|
|
149
|
+
// B reads it
|
|
150
|
+
const result = (await callTool(clientB, "open_nodes", {
|
|
151
|
+
names: ["Observable"],
|
|
152
|
+
}));
|
|
153
|
+
expect(result.entities.items[0].observations).toContain("Initial");
|
|
154
|
+
expect(result.entities.items[0].observations).toContain("Added by A");
|
|
155
|
+
});
|
|
156
|
+
it("stats are consistent across instances", async () => {
|
|
157
|
+
// A creates entities and relations
|
|
158
|
+
await callTool(clientA, "create_entities", {
|
|
159
|
+
entities: [
|
|
160
|
+
{ name: "S1", entityType: "Stats", observations: [] },
|
|
161
|
+
{ name: "S2", entityType: "Stats", observations: [] },
|
|
162
|
+
],
|
|
163
|
+
});
|
|
164
|
+
await callTool(clientA, "create_relations", {
|
|
165
|
+
relations: [{ from: "S1", to: "S2", relationType: "related" }],
|
|
166
|
+
});
|
|
167
|
+
// B checks stats
|
|
168
|
+
const stats = (await callTool(clientB, "get_stats", {}));
|
|
169
|
+
expect(stats.entityCount).toBe(2);
|
|
170
|
+
expect(stats.relationCount).toBe(1);
|
|
171
|
+
});
|
|
172
|
+
it("handles many entities created across instances (growth/remap)", async () => {
|
|
173
|
+
// Create enough entities to trigger file growth
|
|
174
|
+
const batchA = [];
|
|
175
|
+
const batchB = [];
|
|
176
|
+
for (let i = 0; i < 50; i++) {
|
|
177
|
+
batchA.push({ name: `A_${i}`, entityType: "Bulk", observations: [`Obs ${i}`] });
|
|
178
|
+
batchB.push({ name: `B_${i}`, entityType: "Bulk", observations: [`Obs ${i}`] });
|
|
179
|
+
}
|
|
180
|
+
// Create in parallel-ish: A first, then B
|
|
181
|
+
await callTool(clientA, "create_entities", { entities: batchA });
|
|
182
|
+
await callTool(clientB, "create_entities", { entities: batchB });
|
|
183
|
+
// Both see all 100
|
|
184
|
+
const statsA = (await callTool(clientA, "get_stats", {}));
|
|
185
|
+
const statsB = (await callTool(clientB, "get_stats", {}));
|
|
186
|
+
expect(statsA.entityCount).toBe(100);
|
|
187
|
+
expect(statsB.entityCount).toBe(100);
|
|
188
|
+
});
|
|
189
|
+
});
|