npm - @levalicious/server-memory - Versions diffs - 0.0.13 → 0.0.15 - Mend

@levalicious/server-memory 0.0.13 → 0.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/README.md +79 -28
package/dist/scripts/delete-document.js +91 -0
package/dist/scripts/textrank-experiment.js +618 -0
package/dist/server.js +127 -59
package/dist/src/graphfile.js +118 -4
package/dist/src/kb_load.js +396 -0
package/dist/src/memoryfile.js +17 -0
package/dist/src/merw.js +160 -0
package/dist/src/stringtable.js +24 -6
package/dist/tests/memory-server.test.js +129 -0
package/dist/tests/test-utils.js +6 -0
package/package.json +6 -2

package/dist/src/kb_load.js ADDED Viewed

@@ -0,0 +1,396 @@
+/**
+ * kb_load.ts — Load a plaintext document into the knowledge graph.
+ *
+ * Pipeline:
+ *   1. Normalize text
+ *   2. Split into observations (≤140 chars, word-boundary aligned)
+ *   3. Group observations into chunks (≤2 per entity)
+ *   4. Build chain: Document → starts_with/ends_with → chunks ↔ follows/preceded_by
+ *   5. Sentence TextRank: rank sentences by TF-IDF cosine PageRank
+ *   6. Build index entity: Document → has_index → Index → highlights → top chunks
+ *
+ * Returns arrays of entities and relations ready for createEntities/createRelations.
+ */
+import * as crypto from 'crypto';
+import * as path from 'path';
+// ─── Constants ──────────────────────────────────────────────────────
+const MAX_OBS_LENGTH = 140;
+const MAX_OBS_PER_ENTITY = 2;
+const TEXTRANK_DAMPING = 0.85;
+const TEXTRANK_MAX_ITER = 30000;
+const TEXTRANK_CONVERGENCE = 1e-6;
+const ALLOWED_EXTENSIONS = new Set([
+    '.txt', '.tex', '.md', '.markdown', '.rst', '.org', '.adoc',
+    '.asciidoc', '.html', '.htm', '.xml', '.json', '.yaml', '.yml',
+    '.toml', '.csv', '.tsv', '.log', '.cfg', '.ini', '.conf',
+    '.py', '.js', '.ts', '.c', '.h', '.cpp', '.hpp', '.java',
+    '.go', '.rs', '.rb', '.pl', '.sh', '.bash', '.zsh', '.fish',
+    '.el', '.lisp', '.clj', '.hs', '.ml', '.scala', '.kt',
+    '.r', '.m', '.swift', '.lua', '.vim', '.sql',
+    '.bib', '.sty', '.cls',
+]);
+// ─── Text Processing ────────────────────────────────────────────────
+function normalize(text) {
+    text = text.replace(/\r\n/g, '\n');
+    text = text.replace(/[ \t]+/g, ' ');
+    text = text.replace(/\n{3,}/g, '\n\n');
+    text = text.trim();
+    return text.split(/\s+/).join(' ');
+}
+function labelWords(text, offset) {
+    const words = [];
+    let i = 0;
+    const n = text.length;
+    while (i < n) {
+        while (i < n && text[i] === ' ')
+            i++;
+        if (i >= n)
+            break;
+        const start = i;
+        while (i < n && text[i] !== ' ')
+            i++;
+        const raw = text.slice(start, i);
+        words.push({
+            text: raw,
+            normalized: raw.toLowerCase(),
+            start: offset + start,
+            end: offset + i,
+        });
+    }
+    return words;
+}
+function splitIntoObservations(text) {
+    const observations = [];
+    let pos = 0;
+    while (pos < text.length) {
+        const remaining = text.slice(pos);
+        if (remaining.length <= MAX_OBS_LENGTH) {
+            observations.push({
+                text: remaining,
+                start: pos,
+                end: pos + remaining.length,
+                words: labelWords(remaining, pos),
+            });
+            break;
+        }
+        let splitAt = 0;
+        for (let i = 0; i < remaining.length; i++) {
+            if (remaining[i] === ' ') {
+                if (remaining.slice(0, i).length <= MAX_OBS_LENGTH) {
+                    splitAt = i;
+                }
+                else {
+                    break;
+                }
+            }
+        }
+        if (splitAt === 0) {
+            // No space fits — hard split
+            let jsLen = 0;
+            for (let i = 0; i < remaining.length; i++) {
+                const charLen = remaining.codePointAt(i) > 0xFFFF ? 2 : 1;
+                if (jsLen + charLen > MAX_OBS_LENGTH) {
+                    splitAt = i;
+                    break;
+                }
+                jsLen += charLen;
+                if (charLen === 2)
+                    i++;
+            }
+            if (splitAt === 0)
+                splitAt = remaining.length;
+        }
+        const obsText = remaining.slice(0, splitAt).trimEnd();
+        observations.push({
+            text: obsText,
+            start: pos,
+            end: pos + obsText.length,
+            words: labelWords(obsText, pos),
+        });
+        pos += splitAt;
+        while (pos < text.length && text[pos] === ' ')
+            pos++;
+    }
+    return observations;
+}
+function chunkObservations(observations) {
+    const chunks = [];
+    for (let i = 0; i < observations.length; i += MAX_OBS_PER_ENTITY) {
+        chunks.push({
+            index: chunks.length,
+            id: crypto.randomBytes(12).toString('hex'),
+            observations: observations.slice(i, i + MAX_OBS_PER_ENTITY),
+        });
+    }
+    return chunks;
+}
+// ─── Sentence Splitting ─────────────────────────────────────────────
+function splitSentences(normalizedText) {
+    const sentences = [];
+    const re = /(?<=[.?!])\s+/g;
+    let pos = 0;
+    let match;
+    while ((match = re.exec(normalizedText)) !== null) {
+        const text = normalizedText.slice(pos, match.index + 1).trim();
+        if (text.length > 0) {
+            const words = text.toLowerCase().split(/\s+/).filter(w => w.length > 0);
+            if (words.length >= 3) {
+                sentences.push({ index: sentences.length, text, start: pos, words });
+            }
+        }
+        pos = match.index + match[0].length;
+    }
+    if (pos < normalizedText.length) {
+        const text = normalizedText.slice(pos).trim();
+        if (text.length > 0) {
+            const words = text.toLowerCase().split(/\s+/).filter(w => w.length > 0);
+            if (words.length >= 3) {
+                sentences.push({ index: sentences.length, text, start: pos, words });
+            }
+        }
+    }
+    return sentences;
+}
+// ─── TF-IDF ─────────────────────────────────────────────────────────
+function buildWeightVector(allWords, idf) {
+    const rawCounts = new Map();
+    for (const w of allWords) {
+        rawCounts.set(w.normalized, (rawCounts.get(w.normalized) ?? 0) + 1);
+    }
+    const weights = new Map();
+    for (const [word, raw] of rawCounts) {
+        weights.set(word, raw * (idf.get(word) ?? 0));
+    }
+    return weights;
+}
+function deriveCorpusDocFreqs(st) {
+    const df = new Map();
+    let corpusSize = 0;
+    for (const entry of st.entries()) {
+        corpusSize += entry.refcount;
+        const uniqueWords = new Set(entry.text.toLowerCase().split(/\s+/).filter(w => w.length > 0));
+        for (const word of uniqueWords) {
+            df.set(word, (df.get(word) ?? 0) + entry.refcount);
+        }
+    }
+    return { df, corpusSize };
+}
+function buildIdfVector(docVocab, df, corpusSize) {
+    const idf = new Map();
+    for (const word of docVocab) {
+        const docFreq = df.get(word) ?? 0;
+        idf.set(word, Math.log(corpusSize / (1 + docFreq)) + 1);
+    }
+    return idf;
+}
+// ─── Cosine Similarity ──────────────────────────────────────────────
+function cosineSimilarity(weights, keysA, keysB) {
+    let dot = 0;
+    for (const word of keysA) {
+        if (keysB.has(word)) {
+            const w = weights.get(word) ?? 0;
+            dot += w * w;
+        }
+    }
+    let normA = 0;
+    for (const word of keysA) {
+        const w = weights.get(word) ?? 0;
+        normA += w * w;
+    }
+    let normB = 0;
+    for (const word of keysB) {
+        const w = weights.get(word) ?? 0;
+        normB += w * w;
+    }
+    const denom = Math.sqrt(normA) * Math.sqrt(normB);
+    return denom === 0 ? 0 : dot / denom;
+}
+// ─── PageRank ───────────────────────────────────────────────────────
+function pageRank(matrix) {
+    const n = matrix.length;
+    if (n === 0)
+        return [];
+    const rowSums = matrix.map(row => row.reduce((a, b) => a + b, 0));
+    let scores = new Array(n).fill(1 / n);
+    for (let iter = 0; iter < TEXTRANK_MAX_ITER; iter++) {
+        const next = new Array(n).fill(0);
+        for (let i = 0; i < n; i++) {
+            let sum = 0;
+            for (let j = 0; j < n; j++) {
+                if (j !== i && rowSums[j] > 0) {
+                    sum += (matrix[j][i] / rowSums[j]) * scores[j];
+                }
+            }
+            next[i] = (1 - TEXTRANK_DAMPING) / n + TEXTRANK_DAMPING * sum;
+        }
+        let delta = 0;
+        for (let i = 0; i < n; i++)
+            delta += Math.abs(next[i] - scores[i]);
+        scores = next;
+        if (delta < TEXTRANK_CONVERGENCE)
+            break;
+    }
+    return scores;
+}
+// ─── Sentence TextRank ──────────────────────────────────────────────
+function sentenceTextRank(sentences, weights) {
+    const n = sentences.length;
+    const keySets = sentences.map(s => new Set(s.words));
+    const matrix = Array.from({ length: n }, () => new Array(n).fill(0));
+    for (let i = 0; i < n; i++) {
+        for (let j = i + 1; j < n; j++) {
+            const sim = cosineSimilarity(weights, keySets[i], keySets[j]);
+            matrix[i][j] = sim;
+            matrix[j][i] = sim;
+        }
+    }
+    const scores = pageRank(matrix);
+    return sentences
+        .map((sentence, i) => ({ sentence, score: scores[i] }))
+        .sort((a, b) => b.score - a.score);
+}
+// ─── Sentence → Chunk mapping ───────────────────────────────────────
+function sentenceToChunk(sentence, chunks) {
+    const target = sentence.start;
+    for (const chunk of chunks) {
+        const first = chunk.observations[0];
+        const last = chunk.observations[chunk.observations.length - 1];
+        if (target >= first.start && target < last.end)
+            return chunk;
+    }
+    return null;
+}
+// ─── Public API ─────────────────────────────────────────────────────
+/**
+ * Validate that a file path has a plaintext extension.
+ * Returns the extension if valid, throws if not.
+ */
+export function validateExtension(filePath) {
+    const ext = path.extname(filePath).toLowerCase();
+    if (!ext) {
+        throw new Error(`File has no extension: ${filePath}. Only plaintext files are accepted.`);
+    }
+    if (!ALLOWED_EXTENSIONS.has(ext)) {
+        throw new Error(`Unsupported file extension "${ext}". Only plaintext formats are accepted ` +
+            `(${[...ALLOWED_EXTENSIONS].slice(0, 10).join(', ')}, ...). ` +
+            `For PDFs, use pdftotext first. For other binary formats, convert to text.`);
+    }
+    return ext;
+}
+/**
+ * Load a plaintext document into the knowledge graph.
+ *
+ * @param text       Raw document text
+ * @param title      Document entity name (e.g. filename without extension)
+ * @param st         StringTable for IDF corpus frequencies
+ * @param topK       Number of sentences to highlight in the index (default: 15)
+ * @returns Entities and relations ready for createEntities/createRelations
+ */
+export function loadDocument(text, title, st, topK = 15) {
+    // 1. Normalize and chunk
+    const normalizedText = normalize(text);
+    const observations = splitIntoObservations(normalizedText);
+    const chunks = chunkObservations(observations);
+    // Collect all words
+    const allWords = [];
+    for (const chunk of chunks) {
+        for (const obs of chunk.observations)
+            allWords.push(...obs.words);
+    }
+    const vocab = new Set(allWords.map(w => w.normalized));
+    // 2. IDF from corpus
+    const { df, corpusSize } = deriveCorpusDocFreqs(st);
+    const idf = buildIdfVector(vocab, df, corpusSize);
+    // 3. TF-IDF weight vector
+    const weights = buildWeightVector(allWords, idf);
+    // 4. Sentence TextRank
+    const sentences = splitSentences(normalizedText);
+    const rankedSentences = sentenceTextRank(sentences, weights);
+    // 5. Map top sentences to chunks (deduplicate)
+    const topSents = rankedSentences.slice(0, topK);
+    const highlights = [];
+    const seenChunks = new Set();
+    for (const { sentence, score } of topSents) {
+        const chunk = sentenceToChunk(sentence, chunks);
+        if (!chunk || seenChunks.has(chunk.id))
+            continue;
+        seenChunks.add(chunk.id);
+        highlights.push({ chunk, sentence, score });
+    }
+    // 6. Build index observations (compressed sentence previews)
+    const indexId = `${title}__index`;
+    const indexObs = [];
+    let current = '';
+    for (const { sentence } of highlights) {
+        const preview = sentence.text.length > 60
+            ? sentence.text.slice(0, 57) + '...'
+            : sentence.text;
+        const candidate = current ? current + ' | ' + preview : preview;
+        if (candidate.length <= MAX_OBS_LENGTH) {
+            current = candidate;
+        }
+        else {
+            if (current)
+                indexObs.push(current);
+            if (indexObs.length >= MAX_OBS_PER_ENTITY)
+                break;
+            current = preview.length <= MAX_OBS_LENGTH ? preview : preview.slice(0, MAX_OBS_LENGTH);
+        }
+    }
+    if (current && indexObs.length < MAX_OBS_PER_ENTITY)
+        indexObs.push(current);
+    // ─── Assemble entities ──────────────────────────────────────────
+    const entities = [];
+    const relations = [];
+    // Document entity (no observations — it's a pointer node)
+    entities.push({ name: title, entityType: 'Document', observations: [] });
+    // Chunk entities
+    for (const chunk of chunks) {
+        entities.push({
+            name: chunk.id,
+            entityType: 'TextChunk',
+            observations: chunk.observations.map(o => o.text),
+        });
+    }
+    // Index entity
+    entities.push({
+        name: indexId,
+        entityType: 'DocumentIndex',
+        observations: indexObs,
+    });
+    // ─── Assemble relations ─────────────────────────────────────────
+    // Document → chain endpoints
+    if (chunks.length > 0) {
+        relations.push({ from: title, to: chunks[0].id, relationType: 'starts_with' });
+        relations.push({ from: chunks[0].id, to: title, relationType: 'belongs_to' });
+        if (chunks.length > 1) {
+            relations.push({ from: title, to: chunks[chunks.length - 1].id, relationType: 'ends_with' });
+            relations.push({ from: chunks[chunks.length - 1].id, to: title, relationType: 'belongs_to' });
+        }
+    }
+    // Chain: follows/preceded_by
+    for (let i = 0; i < chunks.length - 1; i++) {
+        relations.push({ from: chunks[i].id, to: chunks[i + 1].id, relationType: 'follows' });
+        relations.push({ from: chunks[i + 1].id, to: chunks[i].id, relationType: 'preceded_by' });
+    }
+    // Document → index
+    relations.push({ from: title, to: indexId, relationType: 'has_index' });
+    relations.push({ from: indexId, to: title, relationType: 'indexes' });
+    // Index → highlighted chunks
+    for (const { chunk } of highlights) {
+        relations.push({ from: indexId, to: chunk.id, relationType: 'highlights' });
+        relations.push({ from: chunk.id, to: indexId, relationType: 'highlighted_by' });
+    }
+    return {
+        entities,
+        relations,
+        stats: {
+            chars: text.length,
+            words: allWords.length,
+            uniqueWords: vocab.size,
+            chunks: chunks.length,
+            sentences: sentences.length,
+            indexHighlights: highlights.length,
+        },
+    };
+}

package/dist/src/memoryfile.js CHANGED Viewed

@@ -115,6 +115,23 @@ export class MemoryFile {
         this.assertOpen();
         return native.stats(this.handle);
     }
+    /**
+     * Read the memfile version field (u32 at offset 4).
+     */
+    getVersion() {
+        this.assertOpen();
+        const buf = native.read(this.handle, 4n, 4n);
+        return buf.readUInt32LE(0);
+    }
+    /**
+     * Write the memfile version field (u32 at offset 4).
+     */
+    setVersion(version) {
+        this.assertOpen();
+        const buf = Buffer.alloc(4);
+        buf.writeUInt32LE(version, 0);
+        native.write(this.handle, 4n, buf);
+    }
     /**
      * Close the memory file. Syncs and unmaps.
      * The instance is unusable after this.

package/dist/src/merw.js ADDED Viewed

@@ -0,0 +1,160 @@
+/**
+ * Maximum Entropy Random Walk (MERW) — dominant eigenvector computation
+ * via power iteration on the graph's adjacency matrix.
+ *
+ * MERW transition probabilities:  S_ij = (A_ij / λ) * (ψ_j / ψ_i)
+ * Stationary distribution:        ρ_i  = ψ_i² / ‖ψ‖₂²
+ *
+ * We compute ψ (the dominant right eigenvector of A) using sparse power
+ * iteration directly on the GraphFile adjacency lists. No dense matrix
+ * is ever constructed.
+ *
+ * For directed graphs that may not be strongly connected, we add
+ * teleportation damping (like PageRank): at each step, follow an edge
+ * with probability `alpha`, or jump to a uniform random node with
+ * probability `(1 - alpha)`. This guarantees convergence to a unique
+ * positive eigenvector.
+ */
+import { DIR_FORWARD } from './graphfile.js';
+const DEFAULT_ALPHA = 0.85;
+const DEFAULT_MAX_ITER = 200;
+const DEFAULT_TOL = 1e-8;
+/**
+ * Compute the dominant eigenvector of the (damped) adjacency matrix
+ * via power iteration and write ψ_i into each entity record.
+ *
+ * Warm-starts from the ψ values already stored in the entity records.
+ * New nodes (psi === 0) are seeded with the mean of existing values.
+ * On a fresh graph (all zeros), falls back to uniform initialization.
+ *
+ * @param gf      GraphFile to operate on
+ * @param alpha   Damping factor (probability of following an edge). Default 0.85.
+ * @param maxIter Maximum iterations. Default 200.
+ * @param tol     Convergence tolerance (L2 norm of change). Default 1e-8.
+ * @returns       Number of iterations performed.
+ */
+export function computeMerwPsi(gf, alpha = DEFAULT_ALPHA, maxIter = DEFAULT_MAX_ITER, tol = DEFAULT_TOL) {
+    const offsets = gf.getAllEntityOffsets();
+    const n = offsets.length;
+    if (n === 0)
+        return 0;
+    // Build offset → index map for O(1) lookup
+    const indexMap = new Map();
+    for (let i = 0; i < n; i++) {
+        indexMap.set(offsets[i], i);
+    }
+    // Build sparse adjacency: for each node, list of forward neighbor indices
+    const adj = new Array(n);
+    for (let i = 0; i < n; i++) {
+        const edges = gf.getEdges(offsets[i]);
+        const neighbors = [];
+        for (const e of edges) {
+            if (e.direction !== DIR_FORWARD)
+                continue;
+            const j = indexMap.get(e.targetOffset);
+            if (j !== undefined)
+                neighbors.push(j);
+        }
+        adj[i] = neighbors;
+    }
+    // Warm-start: read existing ψ from entity records
+    let psi = new Float64Array(n);
+    let hasWarm = false;
+    let warmSum = 0;
+    let warmCount = 0;
+    for (let i = 0; i < n; i++) {
+        const val = gf.getPsi(offsets[i]);
+        psi[i] = val;
+        if (val > 0) {
+            hasWarm = true;
+            warmSum += val;
+            warmCount++;
+        }
+    }
+    if (hasWarm) {
+        // Seed new/zero nodes with the mean of existing nonzero values
+        const mean = warmSum / warmCount;
+        for (let i = 0; i < n; i++) {
+            if (psi[i] <= 0)
+                psi[i] = mean;
+        }
+    }
+    else {
+        // Cold start: uniform
+        const uniform = 1.0 / Math.sqrt(n);
+        psi.fill(uniform);
+    }
+    // Normalize initial vector to unit L2
+    let initNorm = 0;
+    for (let i = 0; i < n; i++)
+        initNorm += psi[i] * psi[i];
+    initNorm = Math.sqrt(initNorm);
+    if (initNorm > 0) {
+        for (let i = 0; i < n; i++)
+            psi[i] /= initNorm;
+    }
+    let psiNext = new Float64Array(n);
+    const teleport = (1.0 - alpha) / n;
+    let iter = 0;
+    for (iter = 0; iter < maxIter; iter++) {
+        // Matrix-vector multiply: psiNext = alpha * A * psi + (1-alpha)/n * sum(psi)
+        // Since ψ is normalized, sum(psi) components contribute uniformly.
+        // For the adjacency multiply, A_ij = 1 if edge i→j exists.
+        // Power iteration: psiNext_j = alpha * Σ_{i: i→j} psi_i  +  teleport * Σ_k psi_k
+        //
+        // We iterate over source nodes and scatter to targets.
+        psiNext.fill(0);
+        // Compute sum of psi for teleportation
+        let psiSum = 0;
+        for (let i = 0; i < n; i++)
+            psiSum += psi[i];
+        const teleportContrib = teleport * psiSum;
+        // Sparse multiply: scatter from sources to targets
+        for (let i = 0; i < n; i++) {
+            const neighbors = adj[i];
+            const val = alpha * psi[i];
+            for (const j of neighbors) {
+                psiNext[j] += val;
+            }
+        }
+        // Add teleportation
+        for (let i = 0; i < n; i++) {
+            psiNext[i] += teleportContrib;
+        }
+        // Normalize to unit L2
+        let norm = 0;
+        for (let i = 0; i < n; i++)
+            norm += psiNext[i] * psiNext[i];
+        norm = Math.sqrt(norm);
+        if (norm > 0) {
+            for (let i = 0; i < n; i++)
+                psiNext[i] /= norm;
+        }
+        // Check convergence: L2 norm of difference
+        let diff = 0;
+        for (let i = 0; i < n; i++) {
+            const d = psiNext[i] - psi[i];
+            diff += d * d;
+        }
+        diff = Math.sqrt(diff);
+        // Swap buffers
+        const tmp = psi;
+        psi = psiNext;
+        psiNext = tmp;
+        if (diff < tol) {
+            iter++;
+            break;
+        }
+    }
+    // Ensure all components are positive (Perron-Frobenius: dominant eigenvector is non-negative,
+    // but numerical noise can produce tiny negatives). Clamp to 0.
+    for (let i = 0; i < n; i++) {
+        if (psi[i] < 0)
+            psi[i] = 0;
+    }
+    // Write ψ_i into each entity record
+    for (let i = 0; i < n; i++) {
+        gf.setPsi(offsets[i], psi[i]);
+    }
+    return iter;
+}

package/dist/src/stringtable.js CHANGED Viewed

@@ -43,7 +43,7 @@ const ENT_LEN = 8; // u16
 const ENT_DATA = 10; // u8[len]
 const ENT_HEADER_SIZE = 10;
 // Hash index field offsets (relative to index block start)
-const IDX_BUCKET_COUNT = 0; // u32
+const _IDX_BUCKET_COUNT = 0; // u32
 const IDX_BUCKETS = 8; // u64[bucket_count]
 const INITIAL_BUCKETS = 4096;
 const LOAD_FACTOR_THRESHOLD = 0.7;
@@ -159,7 +159,7 @@ export class StringTable {
         const data = Buffer.from(str, 'utf-8');
         const hash = fnv1a(data);
         const bucketCount = this.getBucketCount();
-        let bucket = hash % bucketCount;
+        const bucket = hash % bucketCount;
         // Linear probe to find existing or empty slot
         for (let i = 0; i < bucketCount; i++) {
             const slotIdx = (bucket + i) % bucketCount;
@@ -215,7 +215,7 @@ export class StringTable {
         const data = Buffer.from(str, 'utf-8');
         const hash = fnv1a(data);
         const bucketCount = this.getBucketCount();
-        let bucket = hash % bucketCount;
+        const bucket = hash % bucketCount;
         for (let i = 0; i < bucketCount; i++) {
             const slotIdx = (bucket + i) % bucketCount;
             const entryOffset = this.getBucket(slotIdx);
@@ -264,10 +264,28 @@ export class StringTable {
     get count() {
         return this.getEntryCount();
     }
+    /**
+     * Iterate over all live strings in the table.
+     * Yields { id, text, refcount } for each entry.
+     */
+    *entries() {
+        const bucketCount = this.getBucketCount();
+        for (let i = 0; i < bucketCount; i++) {
+            const entryOffset = this.getBucket(i);
+            if (entryOffset === 0n)
+                continue;
+            const entry = this.readEntry(entryOffset);
+            yield {
+                id: entryOffset,
+                text: entry.data.toString('utf-8'),
+                refcount: entry.refcount,
+            };
+        }
+    }
     // --- Hash index management ---
     removeFromIndex(offset, hash) {
         const bucketCount = this.getBucketCount();
-        let bucket = hash % bucketCount;
+        const bucket = hash % bucketCount;
         // Find the entry in the index
         for (let i = 0; i < bucketCount; i++) {
             const slotIdx = (bucket + i) % bucketCount;
@@ -305,7 +323,7 @@ export class StringTable {
             slot = (slot + 1) % bucketCount;
         }
     }
-    needsRelocation(natural, empty, current, size) {
+    needsRelocation(natural, empty, current, _size) {
         // Is 'empty' between 'natural' and 'current' in the circular probe sequence?
         if (natural <= current) {
             return natural <= empty && empty < current;
@@ -343,7 +361,7 @@ export class StringTable {
                 continue;
             // Read hash and insert into new index
             const entry = this.readEntry(entryOffset);
-            let bucket = entry.hash % newBucketCount;
+            const bucket = entry.hash % newBucketCount;
             for (let j = 0; j < newBucketCount; j++) {
                 const slotIdx = (bucket + j) % newBucketCount;
                 const slotPos = newIndexOffset + BigInt(IDX_BUCKETS + slotIdx * 8);