npm - @andespindola/brainlink - Versions diffs - 0.1.0-beta.16 → 0.1.0-beta.18 - Mend

@andespindola/brainlink 0.1.0-beta.16 → 0.1.0-beta.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/CHANGELOG.md +2 -0
package/README.md +3 -1
package/dist/domain/context.js +53 -11
package/dist/domain/middle-out.js +18 -0
package/dist/infrastructure/file-index.js +3 -0
package/dist/infrastructure/search-packs.js +174 -7
package/docs/AGENT_USAGE.md +2 -0
package/docs/ARCHITECTURE.md +3 -0
package/package.json +1 -1

package/CHANGELOG.md CHANGED Viewed

@@ -22,6 +22,8 @@
 - Added short-lived hybrid search cache with automatic invalidation on index changes.
 - Added `stats --extended` observability output with storage, quality and latency probes.
 - Added `docs/QUICKSTART.md` and aligned README/agent docs with the latest CLI/MCP flows.
+- Added middle-out context assembly so chunk selection expands around the strongest note chunk.
+- Added compressed-space pack prefiltering (token bloom index) before `.blpk` decryption and scan.
 ## 0.1.0-beta.3

package/README.md CHANGED Viewed

@@ -67,8 +67,9 @@ Legacy `.jsonl.gz` packs are upgraded to `.blpk` automatically on first search/c
 - Obsidian-compatible `[[wiki links]]` and `#tags`.
 - Weighted graph edges so agents can rank relationship importance and priority.
 - Backlinks, broken-link reports, orphan detection and validation.
-- Full-text, semantic and hybrid retrieval modes.
 - Full-text, semantic and hybrid retrieval on a local file index.
+- Middle-out context assembly around the strongest chunk per document.
+- Compressed-space prefiltering for `.blpk` packs before decryption and scan.
 - Agent namespaces under `agents/<agent-id>/`.
 - S3-compatible bucket vaults through `s3://bucket/prefix` URIs.
 - CLI with machine-readable `--json` output.
@@ -728,6 +729,7 @@ Modes:
 - `semantic`: local deterministic embedding similarity only.
 Hybrid results are cached in-memory for a short TTL and invalidated automatically when the local index file changes.
+Context selection uses a middle-out strategy: it starts from the strongest chunk in a note and expands to neighboring chunks while respecting token budget.
 ### `context`

package/dist/domain/context.js CHANGED Viewed

@@ -1,13 +1,50 @@
+import { middleOutIndices } from './middle-out.js';
+const maxSectionsPerDocument = 3;
+const byScore = (left, right) => right.score - left.score || left.title.localeCompare(right.title);
+const byOrdinal = (left, right) => (left.chunkOrdinal ?? Number.MAX_SAFE_INTEGER) - (right.chunkOrdinal ?? Number.MAX_SAFE_INTEGER);
+const middleOutDocumentResults = (results) => {
+    if (results.length <= 1) {
+        return results;
+    }
+    const sortedByOrdinal = [...results].sort(byOrdinal);
+    const pivotChunkId = [...results].sort(byScore)[0]?.chunkId;
+    const pivotIndex = sortedByOrdinal.findIndex((result) => result.chunkId === pivotChunkId);
+    if (pivotIndex < 0) {
+        return [...results].sort(byScore);
+    }
+    return middleOutIndices(sortedByOrdinal.length, pivotIndex).map((index) => sortedByOrdinal[index]);
+};
 export const selectContextSections = (results, maxTokens) => {
-    const selected = results.reduce((state, result) => {
-        const tokenCost = Math.ceil(result.content.length / 4);
-        if (state.usedTokens + tokenCost > maxTokens || state.seenDocuments.has(result.documentId)) {
-            return state;
+    const grouped = results.reduce((state, result) => {
+        const current = state.get(result.documentId) ?? [];
+        state.set(result.documentId, [...current, result]);
+        return state;
+    }, new Map());
+    const documentOrder = Array.from(results.reduce((state, result) => {
+        if (!state.has(result.documentId)) {
+            state.set(result.documentId, result.score);
         }
-        return {
-            usedTokens: state.usedTokens + tokenCost,
-            sections: [
-                ...state.sections,
+        return state;
+    }, new Map()).entries())
+        .sort((left, right) => right[1] - left[1] || left[0].localeCompare(right[0]))
+        .map(([documentId]) => documentId);
+    const selected = documentOrder.reduce((state, documentId) => {
+        const ordered = middleOutDocumentResults(grouped.get(documentId) ?? []);
+        let usedTokens = state.usedTokens;
+        let sections = state.sections;
+        let seenChunks = state.seenChunks;
+        for (let index = 0; index < ordered.length && index < maxSectionsPerDocument; index += 1) {
+            const result = ordered[index];
+            if (seenChunks.has(result.chunkId)) {
+                continue;
+            }
+            const tokenCost = Math.ceil(result.content.length / 4);
+            if (usedTokens + tokenCost > maxTokens) {
+                break;
+            }
+            usedTokens += tokenCost;
+            sections = [
+                ...sections,
                 {
                     title: result.title,
                     path: result.path,
@@ -16,13 +53,18 @@ export const selectContextSections = (results, maxTokens) => {
                     searchMode: result.searchMode,
                     tags: result.tags
                 }
-            ],
-            seenDocuments: new Set([...state.seenDocuments, result.documentId])
+            ];
+            seenChunks = new Set([...seenChunks, result.chunkId]);
+        }
+        return {
+            usedTokens,
+            sections,
+            seenChunks
         };
     }, {
         usedTokens: 0,
         sections: [],
-        seenDocuments: new Set()
+        seenChunks: new Set()
     });
     return selected.sections;
 };

package/dist/domain/middle-out.js ADDED Viewed

@@ -0,0 +1,18 @@
+export const middleOutIndices = (size, pivotIndex) => {
+    if (!Number.isFinite(size) || size <= 0) {
+        return [];
+    }
+    const clampedPivot = Math.max(0, Math.min(Math.floor(pivotIndex), size - 1));
+    const indices = [clampedPivot];
+    for (let offset = 1; indices.length < size; offset += 1) {
+        const left = clampedPivot - offset;
+        const right = clampedPivot + offset;
+        if (left >= 0) {
+            indices.push(left);
+        }
+        if (right < size) {
+            indices.push(right);
+        }
+    }
+    return indices;
+};

package/dist/infrastructure/file-index.js CHANGED Viewed

@@ -81,6 +81,7 @@ const toResult = (row, mode, text, semantic) => {
         title: row.title,
         path: row.path,
         chunkId: row.chunkId,
+        chunkOrdinal: row.chunkOrdinal,
         content: row.content,
         score,
         textScore: text,
@@ -138,6 +139,7 @@ export const openFileIndex = (vaultPath) => {
                         title: document.title,
                         path: document.path,
                         chunkId: chunk.id,
+                        chunkOrdinal: chunk.ordinal,
                         content: chunk.content,
                         tags: document.tags,
                         embedding: chunk.embedding
@@ -263,6 +265,7 @@ export const openFileIndex = (vaultPath) => {
                     title: document.title,
                     path: document.path,
                     chunkId: document.id,
+                    chunkOrdinal: 0,
                     content: document.content,
                     tags: document.tags,
                     embedding: []

package/dist/infrastructure/search-packs.js CHANGED Viewed

@@ -1,11 +1,15 @@
 import { gunzipSync } from 'node:zlib';
 import { mkdir, readdir, readFile, rm, writeFile } from 'node:fs/promises';
 import { join } from 'node:path';
+import { middleOutIndices } from '../domain/middle-out.js';
 import { decodePrivatePack, encodePrivatePack, isPrivatePackPayload } from './private-pack-codec.js';
 const packsDirectoryName = 'search-packs';
 const manifestFileName = 'manifest.json';
 const rowChunkSize = 5_000;
 const queryTokenPattern = /[\p{L}\p{N}_-]+/gu;
+const bloomBytes = 256;
+const bloomBitSize = bloomBytes * 8;
+const bloomSeeds = [0x9e3779b1, 0x85ebca6b, 0xc2b2ae35];
 const toPackDirectory = (vaultPath) => join(vaultPath, '.brainlink', packsDirectoryName);
 const toManifestPath = (vaultPath) => join(toPackDirectory(vaultPath), manifestFileName);
 const parseRowsFromPack = async (vaultPath, content) => {
@@ -15,7 +19,29 @@ const parseRowsFromPack = async (vaultPath, content) => {
         .split('\n')
         .map((line) => line.trim())
         .filter((line) => line.length > 0)
-        .map((line) => JSON.parse(line));
+        .map((line) => JSON.parse(line))
+        .flatMap((row) => {
+        if (typeof row.documentId !== 'string' ||
+            typeof row.agentId !== 'string' ||
+            typeof row.title !== 'string' ||
+            typeof row.path !== 'string' ||
+            typeof row.chunkId !== 'string' ||
+            typeof row.content !== 'string') {
+            return [];
+        }
+        return [
+            {
+                documentId: row.documentId,
+                agentId: row.agentId,
+                title: row.title,
+                path: row.path,
+                chunkId: row.chunkId,
+                chunkOrdinal: typeof row.chunkOrdinal === 'number' ? row.chunkOrdinal : 0,
+                content: row.content,
+                tags: Array.isArray(row.tags) ? row.tags.filter((item) => typeof item === 'string') : []
+            }
+        ];
+    });
 };
 const toRows = (documents) => documents.flatMap((document) => document.chunks.map((chunk) => ({
     documentId: document.document.id,
@@ -23,12 +49,60 @@ const toRows = (documents) => documents.flatMap((document) => document.chunks.ma
     title: document.document.title,
     path: document.document.path,
     chunkId: chunk.id,
+    chunkOrdinal: chunk.ordinal,
     content: chunk.content,
     tags: document.document.tags
 })));
 const writeManifest = async (vaultPath, manifest) => {
     await writeFile(toManifestPath(vaultPath), `${JSON.stringify(manifest, null, 2)}\n`, 'utf8');
 };
+const readManifest = async (vaultPath) => {
+    try {
+        const parsed = JSON.parse(await readFile(toManifestPath(vaultPath), 'utf8'));
+        if (parsed.version === 2 && parsed.format === 'private-v2') {
+            return {
+                version: 2,
+                createdAt: typeof parsed.createdAt === 'string' ? parsed.createdAt : new Date().toISOString(),
+                packCount: typeof parsed.packCount === 'number' ? parsed.packCount : 0,
+                recordCount: typeof parsed.recordCount === 'number' ? parsed.recordCount : 0,
+                format: 'private-v2'
+            };
+        }
+        if (parsed.version === 3 && parsed.format === 'private-v2') {
+            const packIndex = Array.isArray(parsed.packIndex)
+                ? parsed.packIndex.flatMap((entry) => {
+                    if (!entry || typeof entry !== 'object') {
+                        return [];
+                    }
+                    const candidate = entry;
+                    if (typeof candidate.fileName !== 'string' || typeof candidate.tokenBloomB64 !== 'string') {
+                        return [];
+                    }
+                    return [
+                        {
+                            fileName: candidate.fileName,
+                            recordCount: typeof candidate.recordCount === 'number' ? candidate.recordCount : 0,
+                            agents: Array.isArray(candidate.agents) ? candidate.agents.filter((item) => typeof item === 'string') : [],
+                            tokenBloomB64: candidate.tokenBloomB64
+                        }
+                    ];
+                })
+                : [];
+            return {
+                version: 3,
+                createdAt: typeof parsed.createdAt === 'string' ? parsed.createdAt : new Date().toISOString(),
+                packCount: typeof parsed.packCount === 'number' ? parsed.packCount : packIndex.length,
+                recordCount: typeof parsed.recordCount === 'number' ? parsed.recordCount : 0,
+                format: 'private-v2',
+                packIndex
+            };
+        }
+        return null;
+    }
+    catch {
+        return null;
+    }
+};
 const chunkRows = (rows, size) => {
     const chunks = [];
     for (let index = 0; index < rows.length; index += size) {
@@ -57,6 +131,51 @@ const countOccurrences = (text, token) => {
     }
     return hits;
 };
+const hashToken = (token, seed) => {
+    let hash = seed >>> 0;
+    for (let index = 0; index < token.length; index += 1) {
+        hash ^= token.charCodeAt(index);
+        hash = Math.imul(hash, 16777619) >>> 0;
+    }
+    return hash >>> 0;
+};
+const createBloom = () => new Uint8Array(bloomBytes);
+const bloomAdd = (bloom, token) => {
+    bloomSeeds.forEach((seed) => {
+        const bit = hashToken(token, seed) % bloomBitSize;
+        bloom[Math.floor(bit / 8)] |= 1 << (bit % 8);
+    });
+};
+const bloomMayContain = (bloom, token) => bloomSeeds.every((seed) => {
+    const bit = hashToken(token, seed) % bloomBitSize;
+    return (bloom[Math.floor(bit / 8)] & (1 << (bit % 8))) !== 0;
+});
+const bloomFromRows = (rows) => {
+    const bloom = createBloom();
+    rows.forEach((row) => {
+        tokenize([row.title, row.path, row.tags.join(' '), row.content].join(' ')).forEach((token) => bloomAdd(bloom, token));
+    });
+    return bloom;
+};
+const bloomToBase64 = (bloom) => Buffer.from(bloom).toString('base64url');
+const bloomFromBase64 = (value) => {
+    try {
+        const decoded = Buffer.from(value, 'base64url');
+        if (decoded.byteLength === bloomBytes) {
+            return {
+                bloom: new Uint8Array(decoded),
+                valid: true
+            };
+        }
+    }
+    catch {
+        // fallback below
+    }
+    return {
+        bloom: createBloom(),
+        valid: false
+    };
+};
 const computeTextScore = (row, tokens) => {
     if (tokens.length === 0) {
         return 0;
@@ -79,6 +198,7 @@ const toSearchResult = (row, score) => ({
     title: row.title,
     path: row.path,
     chunkId: row.chunkId,
+    chunkOrdinal: row.chunkOrdinal,
     content: row.content,
     score,
     textScore: score,
@@ -110,24 +230,66 @@ const writeRowsAsPrivatePacks = async (vaultPath, rows, clearExisting) => {
             .map((name) => rm(join(directory, name), { force: true })));
     }
     const chunks = chunkRows(rows, rowChunkSize);
-    await Promise.all(chunks.map(async (chunk, index) => {
+    const packIndex = [];
+    for (let index = 0; index < chunks.length; index += 1) {
+        const chunk = chunks[index];
         const fileName = `pack-${String(index + 1).padStart(4, '0')}.blpk`;
         const serialized = `${chunk.map((row) => JSON.stringify(row)).join('\n')}\n`;
         const compressed = await encodePrivatePack(vaultPath, Buffer.from(serialized, 'utf8'));
+        const tokenBloomB64 = bloomToBase64(bloomFromRows(chunk));
         await writeFile(join(directory, fileName), compressed);
-    }));
+        packIndex.push({
+            fileName,
+            recordCount: chunk.length,
+            agents: Array.from(new Set(chunk.map((row) => row.agentId))).sort((left, right) => left.localeCompare(right)),
+            tokenBloomB64
+        });
+    }
     await writeManifest(vaultPath, {
-        version: 2,
+        version: 3,
         createdAt: new Date().toISOString(),
         packCount: chunks.length,
         recordCount: rows.length,
-        format: 'private-v2'
+        format: 'private-v2',
+        packIndex
     });
     return {
         packCount: chunks.length,
         recordCount: rows.length
     };
 };
+const selectCandidatePackFiles = async (vaultPath, tokens, agentId) => {
+    const allFiles = await sortedPackFiles(vaultPath);
+    if (allFiles.length === 0) {
+        return [];
+    }
+    const manifest = await readManifest(vaultPath);
+    if (!manifest || manifest.version !== 3 || !Array.isArray(manifest.packIndex)) {
+        return allFiles;
+    }
+    const normalizedAgent = agentId?.trim();
+    const byAgent = manifest.packIndex.filter((entry) => normalizedAgent ? entry.agents.includes(normalizedAgent) : true);
+    if (tokens.length === 0) {
+        return byAgent.map((entry) => entry.fileName);
+    }
+    let hasInvalidBloomIndex = false;
+    const byToken = byAgent.filter((entry) => {
+        const decoded = bloomFromBase64(entry.tokenBloomB64);
+        if (!decoded.valid) {
+            hasInvalidBloomIndex = true;
+            return true;
+        }
+        return tokens.some((token) => bloomMayContain(decoded.bloom, token));
+    });
+    // Lossless guarantee: if compressed metadata is partially invalid, do not prune packs.
+    if (hasInvalidBloomIndex) {
+        return byAgent.map((entry) => entry.fileName);
+    }
+    if (byToken.length > 0) {
+        return byToken.map((entry) => entry.fileName);
+    }
+    return byAgent.length > 0 ? byAgent.map((entry) => entry.fileName) : allFiles;
+};
 export const buildSearchPacks = async (vaultPath, documents) => {
     return writeRowsAsPrivatePacks(vaultPath, toRows(documents), true);
 };
@@ -158,14 +320,19 @@ export const searchInPacks = async (vaultPath, query, limit, agentId) => {
     if (limit <= 0 || tokens.length === 0) {
         return [];
     }
-    const files = await sortedPackFiles(vaultPath);
+    const files = await selectCandidatePackFiles(vaultPath, tokens, normalizedAgent);
     if (files.length === 0) {
         return [];
     }
     const scored = [];
     for (const file of files) {
         const rows = await parseRowsFromPack(vaultPath, await readFile(join(toPackDirectory(vaultPath), file)));
-        rows.forEach((row) => {
+        const traversal = middleOutIndices(rows.length, Math.floor(rows.length / 2));
+        traversal.forEach((rowIndex) => {
+            const row = rows[rowIndex];
+            if (!row) {
+                return;
+            }
             if (normalizedAgent && row.agentId !== normalizedAgent) {
                 return;
             }

package/docs/AGENT_USAGE.md CHANGED Viewed

@@ -465,6 +465,7 @@ Search modes:
 - `semantic`: local deterministic embedding similarity.
 Hybrid results are cached in-memory for a short TTL and invalidated when `.brainlink/index.json` changes.
+Context assembly uses middle-out ordering inside each note: the highest-scoring chunk is selected first, then nearby chunks are expanded while token budget allows.
 ### Build Agent Context
@@ -635,6 +636,7 @@ GET  /api/validate
 The HTTP API is read-only. Use the CLI for writes and indexing.
 Indexing writes private encrypted search packs at `.brainlink/search-packs/*.blpk` for resilient retrieval and portability.
+Pack search now uses compressed-space prefiltering (token bloom index per pack) before decrypting/reading pack payloads.
 Pack decryption keys are resolved from `$BRAINLINK_HOME/keys` (or `BRAINLINK_SEARCH_PACK_KEY` when explicitly set).
 ## Agent Integration Contract

package/docs/ARCHITECTURE.md CHANGED Viewed

@@ -138,8 +138,10 @@ read markdown files
 question
   -> selected mode: fts | semantic | hybrid
   -> optional query embedding
+  -> optional compressed pack prefilter (token bloom)
   -> lexical scoring and/or semantic cosine scoring
   -> cosine similarity over candidate chunks
+  -> middle-out context expansion around strongest chunk
   -> ranked chunks with textScore and semanticScore
   -> token-budget selection
   -> Markdown context package
@@ -293,6 +295,7 @@ Markdown keeps the system portable, inspectable, Git-friendly, and compatible wi
 Brainlink uses a local JSON index plus encrypted pack exports for fast rebuildable retrieval without external infrastructure.
 Hybrid retrieval also uses a short-lived in-memory cache keyed by vault/query/agent and invalidated by index file mtime to reduce repeated query latency.
 Indexing exports private encrypted pack files (`.brainlink/search-packs/*.blpk`) from indexed chunks for fast retrieval and recovery continuity.
+Pack manifests include compressed-space token bloom metadata so retrieval can skip unrelated packs before decryption.
 Pack encryption keys are resolved from `$BRAINLINK_HOME/keys` or from `BRAINLINK_SEARCH_PACK_KEY` when configured.
 Legacy `.jsonl.gz` search packs are auto-upgraded to `.blpk` on first retrieval flow.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@andespindola/brainlink",
-  "version": "0.1.0-beta.16",
+  "version": "0.1.0-beta.18",
   "description": "Local-first knowledge memory for agents with Markdown, backlinks, indexing and context retrieval.",
   "type": "module",
   "license": "MIT",