@levalicious/server-memory 0.0.13 → 0.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,396 @@
1
+ /**
2
+ * kb_load.ts — Load a plaintext document into the knowledge graph.
3
+ *
4
+ * Pipeline:
5
+ * 1. Normalize text
6
+ * 2. Split into observations (≤140 chars, word-boundary aligned)
7
+ * 3. Group observations into chunks (≤2 per entity)
8
+ * 4. Build chain: Document → starts_with/ends_with → chunks ↔ follows/preceded_by
9
+ * 5. Sentence TextRank: rank sentences by TF-IDF cosine PageRank
10
+ * 6. Build index entity: Document → has_index → Index → highlights → top chunks
11
+ *
12
+ * Returns arrays of entities and relations ready for createEntities/createRelations.
13
+ */
14
+ import * as crypto from 'crypto';
15
+ import * as path from 'path';
16
+ // ─── Constants ──────────────────────────────────────────────────────
17
+ const MAX_OBS_LENGTH = 140;
18
+ const MAX_OBS_PER_ENTITY = 2;
19
+ const TEXTRANK_DAMPING = 0.85;
20
+ const TEXTRANK_MAX_ITER = 30000;
21
+ const TEXTRANK_CONVERGENCE = 1e-6;
22
+ const ALLOWED_EXTENSIONS = new Set([
23
+ '.txt', '.tex', '.md', '.markdown', '.rst', '.org', '.adoc',
24
+ '.asciidoc', '.html', '.htm', '.xml', '.json', '.yaml', '.yml',
25
+ '.toml', '.csv', '.tsv', '.log', '.cfg', '.ini', '.conf',
26
+ '.py', '.js', '.ts', '.c', '.h', '.cpp', '.hpp', '.java',
27
+ '.go', '.rs', '.rb', '.pl', '.sh', '.bash', '.zsh', '.fish',
28
+ '.el', '.lisp', '.clj', '.hs', '.ml', '.scala', '.kt',
29
+ '.r', '.m', '.swift', '.lua', '.vim', '.sql',
30
+ '.bib', '.sty', '.cls',
31
+ ]);
32
+ // ─── Text Processing ────────────────────────────────────────────────
33
+ function normalize(text) {
34
+ text = text.replace(/\r\n/g, '\n');
35
+ text = text.replace(/[ \t]+/g, ' ');
36
+ text = text.replace(/\n{3,}/g, '\n\n');
37
+ text = text.trim();
38
+ return text.split(/\s+/).join(' ');
39
+ }
40
+ function labelWords(text, offset) {
41
+ const words = [];
42
+ let i = 0;
43
+ const n = text.length;
44
+ while (i < n) {
45
+ while (i < n && text[i] === ' ')
46
+ i++;
47
+ if (i >= n)
48
+ break;
49
+ const start = i;
50
+ while (i < n && text[i] !== ' ')
51
+ i++;
52
+ const raw = text.slice(start, i);
53
+ words.push({
54
+ text: raw,
55
+ normalized: raw.toLowerCase(),
56
+ start: offset + start,
57
+ end: offset + i,
58
+ });
59
+ }
60
+ return words;
61
+ }
62
+ function splitIntoObservations(text) {
63
+ const observations = [];
64
+ let pos = 0;
65
+ while (pos < text.length) {
66
+ const remaining = text.slice(pos);
67
+ if (remaining.length <= MAX_OBS_LENGTH) {
68
+ observations.push({
69
+ text: remaining,
70
+ start: pos,
71
+ end: pos + remaining.length,
72
+ words: labelWords(remaining, pos),
73
+ });
74
+ break;
75
+ }
76
+ let splitAt = 0;
77
+ for (let i = 0; i < remaining.length; i++) {
78
+ if (remaining[i] === ' ') {
79
+ if (remaining.slice(0, i).length <= MAX_OBS_LENGTH) {
80
+ splitAt = i;
81
+ }
82
+ else {
83
+ break;
84
+ }
85
+ }
86
+ }
87
+ if (splitAt === 0) {
88
+ // No space fits — hard split
89
+ let jsLen = 0;
90
+ for (let i = 0; i < remaining.length; i++) {
91
+ const charLen = remaining.codePointAt(i) > 0xFFFF ? 2 : 1;
92
+ if (jsLen + charLen > MAX_OBS_LENGTH) {
93
+ splitAt = i;
94
+ break;
95
+ }
96
+ jsLen += charLen;
97
+ if (charLen === 2)
98
+ i++;
99
+ }
100
+ if (splitAt === 0)
101
+ splitAt = remaining.length;
102
+ }
103
+ const obsText = remaining.slice(0, splitAt).trimEnd();
104
+ observations.push({
105
+ text: obsText,
106
+ start: pos,
107
+ end: pos + obsText.length,
108
+ words: labelWords(obsText, pos),
109
+ });
110
+ pos += splitAt;
111
+ while (pos < text.length && text[pos] === ' ')
112
+ pos++;
113
+ }
114
+ return observations;
115
+ }
116
+ function chunkObservations(observations) {
117
+ const chunks = [];
118
+ for (let i = 0; i < observations.length; i += MAX_OBS_PER_ENTITY) {
119
+ chunks.push({
120
+ index: chunks.length,
121
+ id: crypto.randomBytes(12).toString('hex'),
122
+ observations: observations.slice(i, i + MAX_OBS_PER_ENTITY),
123
+ });
124
+ }
125
+ return chunks;
126
+ }
127
+ // ─── Sentence Splitting ─────────────────────────────────────────────
128
+ function splitSentences(normalizedText) {
129
+ const sentences = [];
130
+ const re = /(?<=[.?!])\s+/g;
131
+ let pos = 0;
132
+ let match;
133
+ while ((match = re.exec(normalizedText)) !== null) {
134
+ const text = normalizedText.slice(pos, match.index + 1).trim();
135
+ if (text.length > 0) {
136
+ const words = text.toLowerCase().split(/\s+/).filter(w => w.length > 0);
137
+ if (words.length >= 3) {
138
+ sentences.push({ index: sentences.length, text, start: pos, words });
139
+ }
140
+ }
141
+ pos = match.index + match[0].length;
142
+ }
143
+ if (pos < normalizedText.length) {
144
+ const text = normalizedText.slice(pos).trim();
145
+ if (text.length > 0) {
146
+ const words = text.toLowerCase().split(/\s+/).filter(w => w.length > 0);
147
+ if (words.length >= 3) {
148
+ sentences.push({ index: sentences.length, text, start: pos, words });
149
+ }
150
+ }
151
+ }
152
+ return sentences;
153
+ }
154
+ // ─── TF-IDF ─────────────────────────────────────────────────────────
155
+ function buildWeightVector(allWords, idf) {
156
+ const rawCounts = new Map();
157
+ for (const w of allWords) {
158
+ rawCounts.set(w.normalized, (rawCounts.get(w.normalized) ?? 0) + 1);
159
+ }
160
+ const weights = new Map();
161
+ for (const [word, raw] of rawCounts) {
162
+ weights.set(word, raw * (idf.get(word) ?? 0));
163
+ }
164
+ return weights;
165
+ }
166
+ function deriveCorpusDocFreqs(st) {
167
+ const df = new Map();
168
+ let corpusSize = 0;
169
+ for (const entry of st.entries()) {
170
+ corpusSize += entry.refcount;
171
+ const uniqueWords = new Set(entry.text.toLowerCase().split(/\s+/).filter(w => w.length > 0));
172
+ for (const word of uniqueWords) {
173
+ df.set(word, (df.get(word) ?? 0) + entry.refcount);
174
+ }
175
+ }
176
+ return { df, corpusSize };
177
+ }
178
+ function buildIdfVector(docVocab, df, corpusSize) {
179
+ const idf = new Map();
180
+ for (const word of docVocab) {
181
+ const docFreq = df.get(word) ?? 0;
182
+ idf.set(word, Math.log(corpusSize / (1 + docFreq)) + 1);
183
+ }
184
+ return idf;
185
+ }
186
+ // ─── Cosine Similarity ──────────────────────────────────────────────
187
+ function cosineSimilarity(weights, keysA, keysB) {
188
+ let dot = 0;
189
+ for (const word of keysA) {
190
+ if (keysB.has(word)) {
191
+ const w = weights.get(word) ?? 0;
192
+ dot += w * w;
193
+ }
194
+ }
195
+ let normA = 0;
196
+ for (const word of keysA) {
197
+ const w = weights.get(word) ?? 0;
198
+ normA += w * w;
199
+ }
200
+ let normB = 0;
201
+ for (const word of keysB) {
202
+ const w = weights.get(word) ?? 0;
203
+ normB += w * w;
204
+ }
205
+ const denom = Math.sqrt(normA) * Math.sqrt(normB);
206
+ return denom === 0 ? 0 : dot / denom;
207
+ }
208
+ // ─── PageRank ───────────────────────────────────────────────────────
209
+ function pageRank(matrix) {
210
+ const n = matrix.length;
211
+ if (n === 0)
212
+ return [];
213
+ const rowSums = matrix.map(row => row.reduce((a, b) => a + b, 0));
214
+ let scores = new Array(n).fill(1 / n);
215
+ for (let iter = 0; iter < TEXTRANK_MAX_ITER; iter++) {
216
+ const next = new Array(n).fill(0);
217
+ for (let i = 0; i < n; i++) {
218
+ let sum = 0;
219
+ for (let j = 0; j < n; j++) {
220
+ if (j !== i && rowSums[j] > 0) {
221
+ sum += (matrix[j][i] / rowSums[j]) * scores[j];
222
+ }
223
+ }
224
+ next[i] = (1 - TEXTRANK_DAMPING) / n + TEXTRANK_DAMPING * sum;
225
+ }
226
+ let delta = 0;
227
+ for (let i = 0; i < n; i++)
228
+ delta += Math.abs(next[i] - scores[i]);
229
+ scores = next;
230
+ if (delta < TEXTRANK_CONVERGENCE)
231
+ break;
232
+ }
233
+ return scores;
234
+ }
235
+ // ─── Sentence TextRank ──────────────────────────────────────────────
236
+ function sentenceTextRank(sentences, weights) {
237
+ const n = sentences.length;
238
+ const keySets = sentences.map(s => new Set(s.words));
239
+ const matrix = Array.from({ length: n }, () => new Array(n).fill(0));
240
+ for (let i = 0; i < n; i++) {
241
+ for (let j = i + 1; j < n; j++) {
242
+ const sim = cosineSimilarity(weights, keySets[i], keySets[j]);
243
+ matrix[i][j] = sim;
244
+ matrix[j][i] = sim;
245
+ }
246
+ }
247
+ const scores = pageRank(matrix);
248
+ return sentences
249
+ .map((sentence, i) => ({ sentence, score: scores[i] }))
250
+ .sort((a, b) => b.score - a.score);
251
+ }
252
+ // ─── Sentence → Chunk mapping ───────────────────────────────────────
253
+ function sentenceToChunk(sentence, chunks) {
254
+ const target = sentence.start;
255
+ for (const chunk of chunks) {
256
+ const first = chunk.observations[0];
257
+ const last = chunk.observations[chunk.observations.length - 1];
258
+ if (target >= first.start && target < last.end)
259
+ return chunk;
260
+ }
261
+ return null;
262
+ }
263
+ // ─── Public API ─────────────────────────────────────────────────────
264
+ /**
265
+ * Validate that a file path has a plaintext extension.
266
+ * Returns the extension if valid, throws if not.
267
+ */
268
+ export function validateExtension(filePath) {
269
+ const ext = path.extname(filePath).toLowerCase();
270
+ if (!ext) {
271
+ throw new Error(`File has no extension: ${filePath}. Only plaintext files are accepted.`);
272
+ }
273
+ if (!ALLOWED_EXTENSIONS.has(ext)) {
274
+ throw new Error(`Unsupported file extension "${ext}". Only plaintext formats are accepted ` +
275
+ `(${[...ALLOWED_EXTENSIONS].slice(0, 10).join(', ')}, ...). ` +
276
+ `For PDFs, use pdftotext first. For other binary formats, convert to text.`);
277
+ }
278
+ return ext;
279
+ }
280
+ /**
281
+ * Load a plaintext document into the knowledge graph.
282
+ *
283
+ * @param text Raw document text
284
+ * @param title Document entity name (e.g. filename without extension)
285
+ * @param st StringTable for IDF corpus frequencies
286
+ * @param topK Number of sentences to highlight in the index (default: 15)
287
+ * @returns Entities and relations ready for createEntities/createRelations
288
+ */
289
+ export function loadDocument(text, title, st, topK = 15) {
290
+ // 1. Normalize and chunk
291
+ const normalizedText = normalize(text);
292
+ const observations = splitIntoObservations(normalizedText);
293
+ const chunks = chunkObservations(observations);
294
+ // Collect all words
295
+ const allWords = [];
296
+ for (const chunk of chunks) {
297
+ for (const obs of chunk.observations)
298
+ allWords.push(...obs.words);
299
+ }
300
+ const vocab = new Set(allWords.map(w => w.normalized));
301
+ // 2. IDF from corpus
302
+ const { df, corpusSize } = deriveCorpusDocFreqs(st);
303
+ const idf = buildIdfVector(vocab, df, corpusSize);
304
+ // 3. TF-IDF weight vector
305
+ const weights = buildWeightVector(allWords, idf);
306
+ // 4. Sentence TextRank
307
+ const sentences = splitSentences(normalizedText);
308
+ const rankedSentences = sentenceTextRank(sentences, weights);
309
+ // 5. Map top sentences to chunks (deduplicate)
310
+ const topSents = rankedSentences.slice(0, topK);
311
+ const highlights = [];
312
+ const seenChunks = new Set();
313
+ for (const { sentence, score } of topSents) {
314
+ const chunk = sentenceToChunk(sentence, chunks);
315
+ if (!chunk || seenChunks.has(chunk.id))
316
+ continue;
317
+ seenChunks.add(chunk.id);
318
+ highlights.push({ chunk, sentence, score });
319
+ }
320
+ // 6. Build index observations (compressed sentence previews)
321
+ const indexId = `${title}__index`;
322
+ const indexObs = [];
323
+ let current = '';
324
+ for (const { sentence } of highlights) {
325
+ const preview = sentence.text.length > 60
326
+ ? sentence.text.slice(0, 57) + '...'
327
+ : sentence.text;
328
+ const candidate = current ? current + ' | ' + preview : preview;
329
+ if (candidate.length <= MAX_OBS_LENGTH) {
330
+ current = candidate;
331
+ }
332
+ else {
333
+ if (current)
334
+ indexObs.push(current);
335
+ if (indexObs.length >= MAX_OBS_PER_ENTITY)
336
+ break;
337
+ current = preview.length <= MAX_OBS_LENGTH ? preview : preview.slice(0, MAX_OBS_LENGTH);
338
+ }
339
+ }
340
+ if (current && indexObs.length < MAX_OBS_PER_ENTITY)
341
+ indexObs.push(current);
342
+ // ─── Assemble entities ──────────────────────────────────────────
343
+ const entities = [];
344
+ const relations = [];
345
+ // Document entity (no observations — it's a pointer node)
346
+ entities.push({ name: title, entityType: 'Document', observations: [] });
347
+ // Chunk entities
348
+ for (const chunk of chunks) {
349
+ entities.push({
350
+ name: chunk.id,
351
+ entityType: 'TextChunk',
352
+ observations: chunk.observations.map(o => o.text),
353
+ });
354
+ }
355
+ // Index entity
356
+ entities.push({
357
+ name: indexId,
358
+ entityType: 'DocumentIndex',
359
+ observations: indexObs,
360
+ });
361
+ // ─── Assemble relations ─────────────────────────────────────────
362
+ // Document → chain endpoints
363
+ if (chunks.length > 0) {
364
+ relations.push({ from: title, to: chunks[0].id, relationType: 'starts_with' });
365
+ relations.push({ from: chunks[0].id, to: title, relationType: 'belongs_to' });
366
+ if (chunks.length > 1) {
367
+ relations.push({ from: title, to: chunks[chunks.length - 1].id, relationType: 'ends_with' });
368
+ relations.push({ from: chunks[chunks.length - 1].id, to: title, relationType: 'belongs_to' });
369
+ }
370
+ }
371
+ // Chain: follows/preceded_by
372
+ for (let i = 0; i < chunks.length - 1; i++) {
373
+ relations.push({ from: chunks[i].id, to: chunks[i + 1].id, relationType: 'follows' });
374
+ relations.push({ from: chunks[i + 1].id, to: chunks[i].id, relationType: 'preceded_by' });
375
+ }
376
+ // Document → index
377
+ relations.push({ from: title, to: indexId, relationType: 'has_index' });
378
+ relations.push({ from: indexId, to: title, relationType: 'indexes' });
379
+ // Index → highlighted chunks
380
+ for (const { chunk } of highlights) {
381
+ relations.push({ from: indexId, to: chunk.id, relationType: 'highlights' });
382
+ relations.push({ from: chunk.id, to: indexId, relationType: 'highlighted_by' });
383
+ }
384
+ return {
385
+ entities,
386
+ relations,
387
+ stats: {
388
+ chars: text.length,
389
+ words: allWords.length,
390
+ uniqueWords: vocab.size,
391
+ chunks: chunks.length,
392
+ sentences: sentences.length,
393
+ indexHighlights: highlights.length,
394
+ },
395
+ };
396
+ }
@@ -115,6 +115,23 @@ export class MemoryFile {
115
115
  this.assertOpen();
116
116
  return native.stats(this.handle);
117
117
  }
118
+ /**
119
+ * Read the memfile version field (u32 at offset 4).
120
+ */
121
+ getVersion() {
122
+ this.assertOpen();
123
+ const buf = native.read(this.handle, 4n, 4n);
124
+ return buf.readUInt32LE(0);
125
+ }
126
+ /**
127
+ * Write the memfile version field (u32 at offset 4).
128
+ */
129
+ setVersion(version) {
130
+ this.assertOpen();
131
+ const buf = Buffer.alloc(4);
132
+ buf.writeUInt32LE(version, 0);
133
+ native.write(this.handle, 4n, buf);
134
+ }
118
135
  /**
119
136
  * Close the memory file. Syncs and unmaps.
120
137
  * The instance is unusable after this.
@@ -0,0 +1,160 @@
1
+ /**
2
+ * Maximum Entropy Random Walk (MERW) — dominant eigenvector computation
3
+ * via power iteration on the graph's adjacency matrix.
4
+ *
5
+ * MERW transition probabilities: S_ij = (A_ij / λ) * (ψ_j / ψ_i)
6
+ * Stationary distribution: ρ_i = ψ_i² / ‖ψ‖₂²
7
+ *
8
+ * We compute ψ (the dominant right eigenvector of A) using sparse power
9
+ * iteration directly on the GraphFile adjacency lists. No dense matrix
10
+ * is ever constructed.
11
+ *
12
+ * For directed graphs that may not be strongly connected, we add
13
+ * teleportation damping (like PageRank): at each step, follow an edge
14
+ * with probability `alpha`, or jump to a uniform random node with
15
+ * probability `(1 - alpha)`. This guarantees convergence to a unique
16
+ * positive eigenvector.
17
+ */
18
+ import { DIR_FORWARD } from './graphfile.js';
19
+ const DEFAULT_ALPHA = 0.85;
20
+ const DEFAULT_MAX_ITER = 200;
21
+ const DEFAULT_TOL = 1e-8;
22
+ /**
23
+ * Compute the dominant eigenvector of the (damped) adjacency matrix
24
+ * via power iteration and write ψ_i into each entity record.
25
+ *
26
+ * Warm-starts from the ψ values already stored in the entity records.
27
+ * New nodes (psi === 0) are seeded with the mean of existing values.
28
+ * On a fresh graph (all zeros), falls back to uniform initialization.
29
+ *
30
+ * @param gf GraphFile to operate on
31
+ * @param alpha Damping factor (probability of following an edge). Default 0.85.
32
+ * @param maxIter Maximum iterations. Default 200.
33
+ * @param tol Convergence tolerance (L2 norm of change). Default 1e-8.
34
+ * @returns Number of iterations performed.
35
+ */
36
+ export function computeMerwPsi(gf, alpha = DEFAULT_ALPHA, maxIter = DEFAULT_MAX_ITER, tol = DEFAULT_TOL) {
37
+ const offsets = gf.getAllEntityOffsets();
38
+ const n = offsets.length;
39
+ if (n === 0)
40
+ return 0;
41
+ // Build offset → index map for O(1) lookup
42
+ const indexMap = new Map();
43
+ for (let i = 0; i < n; i++) {
44
+ indexMap.set(offsets[i], i);
45
+ }
46
+ // Build sparse adjacency: for each node, list of forward neighbor indices
47
+ const adj = new Array(n);
48
+ for (let i = 0; i < n; i++) {
49
+ const edges = gf.getEdges(offsets[i]);
50
+ const neighbors = [];
51
+ for (const e of edges) {
52
+ if (e.direction !== DIR_FORWARD)
53
+ continue;
54
+ const j = indexMap.get(e.targetOffset);
55
+ if (j !== undefined)
56
+ neighbors.push(j);
57
+ }
58
+ adj[i] = neighbors;
59
+ }
60
+ // Warm-start: read existing ψ from entity records
61
+ let psi = new Float64Array(n);
62
+ let hasWarm = false;
63
+ let warmSum = 0;
64
+ let warmCount = 0;
65
+ for (let i = 0; i < n; i++) {
66
+ const val = gf.getPsi(offsets[i]);
67
+ psi[i] = val;
68
+ if (val > 0) {
69
+ hasWarm = true;
70
+ warmSum += val;
71
+ warmCount++;
72
+ }
73
+ }
74
+ if (hasWarm) {
75
+ // Seed new/zero nodes with the mean of existing nonzero values
76
+ const mean = warmSum / warmCount;
77
+ for (let i = 0; i < n; i++) {
78
+ if (psi[i] <= 0)
79
+ psi[i] = mean;
80
+ }
81
+ }
82
+ else {
83
+ // Cold start: uniform
84
+ const uniform = 1.0 / Math.sqrt(n);
85
+ psi.fill(uniform);
86
+ }
87
+ // Normalize initial vector to unit L2
88
+ let initNorm = 0;
89
+ for (let i = 0; i < n; i++)
90
+ initNorm += psi[i] * psi[i];
91
+ initNorm = Math.sqrt(initNorm);
92
+ if (initNorm > 0) {
93
+ for (let i = 0; i < n; i++)
94
+ psi[i] /= initNorm;
95
+ }
96
+ let psiNext = new Float64Array(n);
97
+ const teleport = (1.0 - alpha) / n;
98
+ let iter = 0;
99
+ for (iter = 0; iter < maxIter; iter++) {
100
+ // Matrix-vector multiply: psiNext = alpha * A * psi + (1-alpha)/n * sum(psi)
101
+ // Since ψ is normalized, sum(psi) components contribute uniformly.
102
+ // For the adjacency multiply, A_ij = 1 if edge i→j exists.
103
+ // Power iteration: psiNext_j = alpha * Σ_{i: i→j} psi_i + teleport * Σ_k psi_k
104
+ //
105
+ // We iterate over source nodes and scatter to targets.
106
+ psiNext.fill(0);
107
+ // Compute sum of psi for teleportation
108
+ let psiSum = 0;
109
+ for (let i = 0; i < n; i++)
110
+ psiSum += psi[i];
111
+ const teleportContrib = teleport * psiSum;
112
+ // Sparse multiply: scatter from sources to targets
113
+ for (let i = 0; i < n; i++) {
114
+ const neighbors = adj[i];
115
+ const val = alpha * psi[i];
116
+ for (const j of neighbors) {
117
+ psiNext[j] += val;
118
+ }
119
+ }
120
+ // Add teleportation
121
+ for (let i = 0; i < n; i++) {
122
+ psiNext[i] += teleportContrib;
123
+ }
124
+ // Normalize to unit L2
125
+ let norm = 0;
126
+ for (let i = 0; i < n; i++)
127
+ norm += psiNext[i] * psiNext[i];
128
+ norm = Math.sqrt(norm);
129
+ if (norm > 0) {
130
+ for (let i = 0; i < n; i++)
131
+ psiNext[i] /= norm;
132
+ }
133
+ // Check convergence: L2 norm of difference
134
+ let diff = 0;
135
+ for (let i = 0; i < n; i++) {
136
+ const d = psiNext[i] - psi[i];
137
+ diff += d * d;
138
+ }
139
+ diff = Math.sqrt(diff);
140
+ // Swap buffers
141
+ const tmp = psi;
142
+ psi = psiNext;
143
+ psiNext = tmp;
144
+ if (diff < tol) {
145
+ iter++;
146
+ break;
147
+ }
148
+ }
149
+ // Ensure all components are positive (Perron-Frobenius: dominant eigenvector is non-negative,
150
+ // but numerical noise can produce tiny negatives). Clamp to 0.
151
+ for (let i = 0; i < n; i++) {
152
+ if (psi[i] < 0)
153
+ psi[i] = 0;
154
+ }
155
+ // Write ψ_i into each entity record
156
+ for (let i = 0; i < n; i++) {
157
+ gf.setPsi(offsets[i], psi[i]);
158
+ }
159
+ return iter;
160
+ }
@@ -43,7 +43,7 @@ const ENT_LEN = 8; // u16
43
43
  const ENT_DATA = 10; // u8[len]
44
44
  const ENT_HEADER_SIZE = 10;
45
45
  // Hash index field offsets (relative to index block start)
46
- const IDX_BUCKET_COUNT = 0; // u32
46
+ const _IDX_BUCKET_COUNT = 0; // u32
47
47
  const IDX_BUCKETS = 8; // u64[bucket_count]
48
48
  const INITIAL_BUCKETS = 4096;
49
49
  const LOAD_FACTOR_THRESHOLD = 0.7;
@@ -159,7 +159,7 @@ export class StringTable {
159
159
  const data = Buffer.from(str, 'utf-8');
160
160
  const hash = fnv1a(data);
161
161
  const bucketCount = this.getBucketCount();
162
- let bucket = hash % bucketCount;
162
+ const bucket = hash % bucketCount;
163
163
  // Linear probe to find existing or empty slot
164
164
  for (let i = 0; i < bucketCount; i++) {
165
165
  const slotIdx = (bucket + i) % bucketCount;
@@ -215,7 +215,7 @@ export class StringTable {
215
215
  const data = Buffer.from(str, 'utf-8');
216
216
  const hash = fnv1a(data);
217
217
  const bucketCount = this.getBucketCount();
218
- let bucket = hash % bucketCount;
218
+ const bucket = hash % bucketCount;
219
219
  for (let i = 0; i < bucketCount; i++) {
220
220
  const slotIdx = (bucket + i) % bucketCount;
221
221
  const entryOffset = this.getBucket(slotIdx);
@@ -264,10 +264,28 @@ export class StringTable {
264
264
  get count() {
265
265
  return this.getEntryCount();
266
266
  }
267
+ /**
268
+ * Iterate over all live strings in the table.
269
+ * Yields { id, text, refcount } for each entry.
270
+ */
271
+ *entries() {
272
+ const bucketCount = this.getBucketCount();
273
+ for (let i = 0; i < bucketCount; i++) {
274
+ const entryOffset = this.getBucket(i);
275
+ if (entryOffset === 0n)
276
+ continue;
277
+ const entry = this.readEntry(entryOffset);
278
+ yield {
279
+ id: entryOffset,
280
+ text: entry.data.toString('utf-8'),
281
+ refcount: entry.refcount,
282
+ };
283
+ }
284
+ }
267
285
  // --- Hash index management ---
268
286
  removeFromIndex(offset, hash) {
269
287
  const bucketCount = this.getBucketCount();
270
- let bucket = hash % bucketCount;
288
+ const bucket = hash % bucketCount;
271
289
  // Find the entry in the index
272
290
  for (let i = 0; i < bucketCount; i++) {
273
291
  const slotIdx = (bucket + i) % bucketCount;
@@ -305,7 +323,7 @@ export class StringTable {
305
323
  slot = (slot + 1) % bucketCount;
306
324
  }
307
325
  }
308
- needsRelocation(natural, empty, current, size) {
326
+ needsRelocation(natural, empty, current, _size) {
309
327
  // Is 'empty' between 'natural' and 'current' in the circular probe sequence?
310
328
  if (natural <= current) {
311
329
  return natural <= empty && empty < current;
@@ -343,7 +361,7 @@ export class StringTable {
343
361
  continue;
344
362
  // Read hash and insert into new index
345
363
  const entry = this.readEntry(entryOffset);
346
- let bucket = entry.hash % newBucketCount;
364
+ const bucket = entry.hash % newBucketCount;
347
365
  for (let j = 0; j < newBucketCount; j++) {
348
366
  const slotIdx = (bucket + j) % newBucketCount;
349
367
  const slotPos = newIndexOffset + BigInt(IDX_BUCKETS + slotIdx * 8);