@andespindola/brainlink 0.1.0-beta.15 → 0.1.0-beta.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +2 -0
- package/README.md +3 -1
- package/dist/application/frontend/client-css.js +2 -0
- package/dist/application/frontend/client-js.js +96 -9
- package/dist/domain/context.js +53 -11
- package/dist/domain/middle-out.js +18 -0
- package/dist/infrastructure/file-index.js +3 -0
- package/dist/infrastructure/search-packs.js +156 -6
- package/docs/AGENT_USAGE.md +2 -0
- package/docs/ARCHITECTURE.md +3 -0
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -22,6 +22,8 @@
|
|
|
22
22
|
- Added short-lived hybrid search cache with automatic invalidation on index changes.
|
|
23
23
|
- Added `stats --extended` observability output with storage, quality and latency probes.
|
|
24
24
|
- Added `docs/QUICKSTART.md` and aligned README/agent docs with the latest CLI/MCP flows.
|
|
25
|
+
- Added middle-out context assembly so chunk selection expands around the strongest note chunk.
|
|
26
|
+
- Added compressed-space pack prefiltering (token bloom index) before `.blpk` decryption and scan.
|
|
25
27
|
|
|
26
28
|
## 0.1.0-beta.3
|
|
27
29
|
|
package/README.md
CHANGED
|
@@ -67,8 +67,9 @@ Legacy `.jsonl.gz` packs are upgraded to `.blpk` automatically on first search/c
|
|
|
67
67
|
- Obsidian-compatible `[[wiki links]]` and `#tags`.
|
|
68
68
|
- Weighted graph edges so agents can rank relationship importance and priority.
|
|
69
69
|
- Backlinks, broken-link reports, orphan detection and validation.
|
|
70
|
-
- Full-text, semantic and hybrid retrieval modes.
|
|
71
70
|
- Full-text, semantic and hybrid retrieval on a local file index.
|
|
71
|
+
- Middle-out context assembly around the strongest chunk per document.
|
|
72
|
+
- Compressed-space prefiltering for `.blpk` packs before decryption and scan.
|
|
72
73
|
- Agent namespaces under `agents/<agent-id>/`.
|
|
73
74
|
- S3-compatible bucket vaults through `s3://bucket/prefix` URIs.
|
|
74
75
|
- CLI with machine-readable `--json` output.
|
|
@@ -728,6 +729,7 @@ Modes:
|
|
|
728
729
|
- `semantic`: local deterministic embedding similarity only.
|
|
729
730
|
|
|
730
731
|
Hybrid results are cached in-memory for a short TTL and invalidated automatically when the local index file changes.
|
|
732
|
+
Context selection uses a middle-out strategy: it starts from the strongest chunk in a note and expands to neighboring chunks while respecting token budget.
|
|
731
733
|
|
|
732
734
|
### `context`
|
|
733
735
|
|
|
@@ -2,12 +2,17 @@ export const createClientJs = () => `const canvas = document.getElementById('gra
|
|
|
2
2
|
const ctx = canvas.getContext('2d')
|
|
3
3
|
const largeGraphNodeThreshold = 4000
|
|
4
4
|
const largeGraphEdgeRenderLimit = 16000
|
|
5
|
+
const renderNodeBudget = 1800
|
|
6
|
+
const minNodePixelRadius = 1.8
|
|
7
|
+
const viewportPaddingPx = 280
|
|
5
8
|
const state = {
|
|
6
9
|
graph: { nodes: [], edges: [] },
|
|
7
10
|
nodes: [],
|
|
8
11
|
edges: [],
|
|
9
12
|
visibleNodes: [],
|
|
10
13
|
visibleEdges: [],
|
|
14
|
+
renderNodes: [],
|
|
15
|
+
renderEdges: [],
|
|
11
16
|
nodeDegrees: new Map(),
|
|
12
17
|
selected: null,
|
|
13
18
|
hovered: null,
|
|
@@ -133,7 +138,7 @@ const graphBounds = nodes => {
|
|
|
133
138
|
let maxY = Number.NEGATIVE_INFINITY
|
|
134
139
|
|
|
135
140
|
nodes.forEach(node => {
|
|
136
|
-
const radius =
|
|
141
|
+
const radius = baseNodeRadius(node)
|
|
137
142
|
minX = Math.min(minX, node.x - radius)
|
|
138
143
|
maxX = Math.max(maxX, node.x + radius)
|
|
139
144
|
minY = Math.min(minY, node.y - radius)
|
|
@@ -165,7 +170,9 @@ const fitView = (options = { useFiltered: true }) => {
|
|
|
165
170
|
const padding = 100
|
|
166
171
|
const scaleX = width / (bounds.width + padding * 2)
|
|
167
172
|
const scaleY = height / (bounds.height + padding * 2)
|
|
168
|
-
const
|
|
173
|
+
const fitScale = clampScale(Math.min(scaleX, scaleY))
|
|
174
|
+
const minimumLargeGraphScale = nodes.length > largeGraphNodeThreshold ? 0.13 : zoomRange.min
|
|
175
|
+
const scale = Math.max(fitScale, minimumLargeGraphScale)
|
|
169
176
|
const centerX = (bounds.minX + bounds.maxX) / 2
|
|
170
177
|
const centerY = (bounds.minY + bounds.maxY) / 2
|
|
171
178
|
|
|
@@ -267,8 +274,8 @@ const scheduleContentFilterSync = () => {
|
|
|
267
274
|
}
|
|
268
275
|
|
|
269
276
|
const tick = delta => {
|
|
270
|
-
const nodes = state.visibleNodes
|
|
271
|
-
const edges = state.visibleEdges
|
|
277
|
+
const nodes = state.renderNodes.length > 0 ? state.renderNodes : state.visibleNodes
|
|
278
|
+
const edges = state.renderEdges.length > 0 ? state.renderEdges : state.visibleEdges
|
|
272
279
|
if (nodes.length > 1200) {
|
|
273
280
|
return
|
|
274
281
|
}
|
|
@@ -334,7 +341,7 @@ const hitNode = point => {
|
|
|
334
341
|
return null
|
|
335
342
|
}
|
|
336
343
|
|
|
337
|
-
const nodes = state.
|
|
344
|
+
const nodes = state.renderNodes
|
|
338
345
|
for (let index = nodes.length - 1; index >= 0; index -= 1) {
|
|
339
346
|
const node = nodes[index]
|
|
340
347
|
const radius = nodeRadius(node)
|
|
@@ -343,15 +350,88 @@ const hitNode = point => {
|
|
|
343
350
|
return null
|
|
344
351
|
}
|
|
345
352
|
|
|
346
|
-
const
|
|
353
|
+
const baseNodeRadius = node => {
|
|
347
354
|
const degree = state.nodeDegrees.get(node.id) ?? 0
|
|
348
355
|
return 9 + Math.min(degree, 8) * 1.6
|
|
349
356
|
}
|
|
350
357
|
|
|
358
|
+
const nodeRadius = node => Math.max(baseNodeRadius(node), minNodePixelRadius / Math.max(state.transform.scale, 0.0001))
|
|
359
|
+
|
|
360
|
+
const worldViewportBounds = () => {
|
|
361
|
+
const rect = canvas.getBoundingClientRect()
|
|
362
|
+
const width = Math.max(rect.width, 320)
|
|
363
|
+
const height = Math.max(rect.height, 320)
|
|
364
|
+
const padding = viewportPaddingPx
|
|
365
|
+
|
|
366
|
+
return {
|
|
367
|
+
minX: (-state.transform.x - padding) / state.transform.scale,
|
|
368
|
+
maxX: (width - state.transform.x + padding) / state.transform.scale,
|
|
369
|
+
minY: (-state.transform.y - padding) / state.transform.scale,
|
|
370
|
+
maxY: (height - state.transform.y + padding) / state.transform.scale
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
const isNodeInViewport = (node, viewport) =>
|
|
375
|
+
node.x >= viewport.minX &&
|
|
376
|
+
node.x <= viewport.maxX &&
|
|
377
|
+
node.y >= viewport.minY &&
|
|
378
|
+
node.y <= viewport.maxY
|
|
379
|
+
|
|
380
|
+
const viewportNodeStride = () => {
|
|
381
|
+
if (state.nodes.length <= largeGraphNodeThreshold) {
|
|
382
|
+
return 1
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
if (state.transform.scale >= 0.95) {
|
|
386
|
+
return 1
|
|
387
|
+
}
|
|
388
|
+
if (state.transform.scale >= 0.7) {
|
|
389
|
+
return 2
|
|
390
|
+
}
|
|
391
|
+
if (state.transform.scale >= 0.48) {
|
|
392
|
+
return 3
|
|
393
|
+
}
|
|
394
|
+
if (state.transform.scale >= 0.28) {
|
|
395
|
+
return 5
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
return 8
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
const computeRenderVisibility = () => {
|
|
402
|
+
const viewport = worldViewportBounds()
|
|
403
|
+
const stride = viewportNodeStride()
|
|
404
|
+
const picked = []
|
|
405
|
+
|
|
406
|
+
for (let index = 0; index < state.visibleNodes.length; index += 1) {
|
|
407
|
+
const node = state.visibleNodes[index]
|
|
408
|
+
if (!isNodeInViewport(node, viewport)) {
|
|
409
|
+
continue
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
const isPriority =
|
|
413
|
+
node.id === state.selected?.id ||
|
|
414
|
+
node.id === state.hovered?.id ||
|
|
415
|
+
node.id === state.pointer.dragNode?.id
|
|
416
|
+
if (isPriority || index % stride === 0) {
|
|
417
|
+
picked.push(node)
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
const nodes = picked.length > renderNodeBudget
|
|
422
|
+
? picked.slice(0, renderNodeBudget)
|
|
423
|
+
: picked
|
|
424
|
+
const nodeIds = new Set(nodes.map((node) => node.id))
|
|
425
|
+
const edges = state.visibleEdges.filter((edge) => nodeIds.has(edge.source) && edge.target && nodeIds.has(edge.target))
|
|
426
|
+
|
|
427
|
+
state.renderNodes = nodes
|
|
428
|
+
state.renderEdges = edges
|
|
429
|
+
}
|
|
430
|
+
|
|
351
431
|
const render = now => {
|
|
352
432
|
const delta = now - state.last
|
|
353
433
|
state.last = now
|
|
354
|
-
const minFrameIntervalMs = state.nodes.length > largeGraphNodeThreshold ?
|
|
434
|
+
const minFrameIntervalMs = state.nodes.length > largeGraphNodeThreshold ? 48 : 16
|
|
355
435
|
if (delta < minFrameIntervalMs) {
|
|
356
436
|
requestAnimationFrame(render)
|
|
357
437
|
return
|
|
@@ -372,10 +452,11 @@ const render = now => {
|
|
|
372
452
|
ctx.translate(state.transform.x, state.transform.y)
|
|
373
453
|
ctx.scale(state.transform.scale, state.transform.scale)
|
|
374
454
|
|
|
455
|
+
computeRenderVisibility()
|
|
375
456
|
tick(delta)
|
|
376
457
|
const drawEdges = !(state.nodes.length > largeGraphNodeThreshold && state.transform.scale < 0.22)
|
|
377
458
|
if (drawEdges) {
|
|
378
|
-
state.
|
|
459
|
+
state.renderEdges.forEach(edge => {
|
|
379
460
|
const selectedEdge = state.selected && (edge.source === state.selected.id || edge.target === state.selected.id)
|
|
380
461
|
ctx.beginPath()
|
|
381
462
|
ctx.moveTo(edge.sourceNode.x, edge.sourceNode.y)
|
|
@@ -386,7 +467,7 @@ const render = now => {
|
|
|
386
467
|
})
|
|
387
468
|
}
|
|
388
469
|
|
|
389
|
-
state.
|
|
470
|
+
state.renderNodes.forEach(node => {
|
|
390
471
|
const radius = nodeRadius(node)
|
|
391
472
|
const isSelected = state.selected?.id === node.id
|
|
392
473
|
const isHovered = state.hovered?.id === node.id
|
|
@@ -416,6 +497,12 @@ const render = now => {
|
|
|
416
497
|
})
|
|
417
498
|
|
|
418
499
|
ctx.restore()
|
|
500
|
+
if (state.renderNodes.length === 0) {
|
|
501
|
+
ctx.fillStyle = '#99a5b5'
|
|
502
|
+
ctx.font = '12px Inter, system-ui, sans-serif'
|
|
503
|
+
ctx.textAlign = 'center'
|
|
504
|
+
ctx.fillText('Move or zoom to reveal nearby notes', width / 2, height / 2)
|
|
505
|
+
}
|
|
419
506
|
requestAnimationFrame(render)
|
|
420
507
|
}
|
|
421
508
|
|
package/dist/domain/context.js
CHANGED
|
@@ -1,13 +1,50 @@
|
|
|
1
|
+
import { middleOutIndices } from './middle-out.js';
|
|
2
|
+
const maxSectionsPerDocument = 3;
|
|
3
|
+
const byScore = (left, right) => right.score - left.score || left.title.localeCompare(right.title);
|
|
4
|
+
const byOrdinal = (left, right) => (left.chunkOrdinal ?? Number.MAX_SAFE_INTEGER) - (right.chunkOrdinal ?? Number.MAX_SAFE_INTEGER);
|
|
5
|
+
const middleOutDocumentResults = (results) => {
|
|
6
|
+
if (results.length <= 1) {
|
|
7
|
+
return results;
|
|
8
|
+
}
|
|
9
|
+
const sortedByOrdinal = [...results].sort(byOrdinal);
|
|
10
|
+
const pivotChunkId = [...results].sort(byScore)[0]?.chunkId;
|
|
11
|
+
const pivotIndex = sortedByOrdinal.findIndex((result) => result.chunkId === pivotChunkId);
|
|
12
|
+
if (pivotIndex < 0) {
|
|
13
|
+
return [...results].sort(byScore);
|
|
14
|
+
}
|
|
15
|
+
return middleOutIndices(sortedByOrdinal.length, pivotIndex).map((index) => sortedByOrdinal[index]);
|
|
16
|
+
};
|
|
1
17
|
export const selectContextSections = (results, maxTokens) => {
|
|
2
|
-
const
|
|
3
|
-
const
|
|
4
|
-
|
|
5
|
-
|
|
18
|
+
const grouped = results.reduce((state, result) => {
|
|
19
|
+
const current = state.get(result.documentId) ?? [];
|
|
20
|
+
state.set(result.documentId, [...current, result]);
|
|
21
|
+
return state;
|
|
22
|
+
}, new Map());
|
|
23
|
+
const documentOrder = Array.from(results.reduce((state, result) => {
|
|
24
|
+
if (!state.has(result.documentId)) {
|
|
25
|
+
state.set(result.documentId, result.score);
|
|
6
26
|
}
|
|
7
|
-
return
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
27
|
+
return state;
|
|
28
|
+
}, new Map()).entries())
|
|
29
|
+
.sort((left, right) => right[1] - left[1] || left[0].localeCompare(right[0]))
|
|
30
|
+
.map(([documentId]) => documentId);
|
|
31
|
+
const selected = documentOrder.reduce((state, documentId) => {
|
|
32
|
+
const ordered = middleOutDocumentResults(grouped.get(documentId) ?? []);
|
|
33
|
+
let usedTokens = state.usedTokens;
|
|
34
|
+
let sections = state.sections;
|
|
35
|
+
let seenChunks = state.seenChunks;
|
|
36
|
+
for (let index = 0; index < ordered.length && index < maxSectionsPerDocument; index += 1) {
|
|
37
|
+
const result = ordered[index];
|
|
38
|
+
if (seenChunks.has(result.chunkId)) {
|
|
39
|
+
continue;
|
|
40
|
+
}
|
|
41
|
+
const tokenCost = Math.ceil(result.content.length / 4);
|
|
42
|
+
if (usedTokens + tokenCost > maxTokens) {
|
|
43
|
+
break;
|
|
44
|
+
}
|
|
45
|
+
usedTokens += tokenCost;
|
|
46
|
+
sections = [
|
|
47
|
+
...sections,
|
|
11
48
|
{
|
|
12
49
|
title: result.title,
|
|
13
50
|
path: result.path,
|
|
@@ -16,13 +53,18 @@ export const selectContextSections = (results, maxTokens) => {
|
|
|
16
53
|
searchMode: result.searchMode,
|
|
17
54
|
tags: result.tags
|
|
18
55
|
}
|
|
19
|
-
]
|
|
20
|
-
|
|
56
|
+
];
|
|
57
|
+
seenChunks = new Set([...seenChunks, result.chunkId]);
|
|
58
|
+
}
|
|
59
|
+
return {
|
|
60
|
+
usedTokens,
|
|
61
|
+
sections,
|
|
62
|
+
seenChunks
|
|
21
63
|
};
|
|
22
64
|
}, {
|
|
23
65
|
usedTokens: 0,
|
|
24
66
|
sections: [],
|
|
25
|
-
|
|
67
|
+
seenChunks: new Set()
|
|
26
68
|
});
|
|
27
69
|
return selected.sections;
|
|
28
70
|
};
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
export const middleOutIndices = (size, pivotIndex) => {
|
|
2
|
+
if (!Number.isFinite(size) || size <= 0) {
|
|
3
|
+
return [];
|
|
4
|
+
}
|
|
5
|
+
const clampedPivot = Math.max(0, Math.min(Math.floor(pivotIndex), size - 1));
|
|
6
|
+
const indices = [clampedPivot];
|
|
7
|
+
for (let offset = 1; indices.length < size; offset += 1) {
|
|
8
|
+
const left = clampedPivot - offset;
|
|
9
|
+
const right = clampedPivot + offset;
|
|
10
|
+
if (left >= 0) {
|
|
11
|
+
indices.push(left);
|
|
12
|
+
}
|
|
13
|
+
if (right < size) {
|
|
14
|
+
indices.push(right);
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
return indices;
|
|
18
|
+
};
|
|
@@ -81,6 +81,7 @@ const toResult = (row, mode, text, semantic) => {
|
|
|
81
81
|
title: row.title,
|
|
82
82
|
path: row.path,
|
|
83
83
|
chunkId: row.chunkId,
|
|
84
|
+
chunkOrdinal: row.chunkOrdinal,
|
|
84
85
|
content: row.content,
|
|
85
86
|
score,
|
|
86
87
|
textScore: text,
|
|
@@ -138,6 +139,7 @@ export const openFileIndex = (vaultPath) => {
|
|
|
138
139
|
title: document.title,
|
|
139
140
|
path: document.path,
|
|
140
141
|
chunkId: chunk.id,
|
|
142
|
+
chunkOrdinal: chunk.ordinal,
|
|
141
143
|
content: chunk.content,
|
|
142
144
|
tags: document.tags,
|
|
143
145
|
embedding: chunk.embedding
|
|
@@ -263,6 +265,7 @@ export const openFileIndex = (vaultPath) => {
|
|
|
263
265
|
title: document.title,
|
|
264
266
|
path: document.path,
|
|
265
267
|
chunkId: document.id,
|
|
268
|
+
chunkOrdinal: 0,
|
|
266
269
|
content: document.content,
|
|
267
270
|
tags: document.tags,
|
|
268
271
|
embedding: []
|
|
@@ -1,11 +1,15 @@
|
|
|
1
1
|
import { gunzipSync } from 'node:zlib';
|
|
2
2
|
import { mkdir, readdir, readFile, rm, writeFile } from 'node:fs/promises';
|
|
3
3
|
import { join } from 'node:path';
|
|
4
|
+
import { middleOutIndices } from '../domain/middle-out.js';
|
|
4
5
|
import { decodePrivatePack, encodePrivatePack, isPrivatePackPayload } from './private-pack-codec.js';
|
|
5
6
|
const packsDirectoryName = 'search-packs';
|
|
6
7
|
const manifestFileName = 'manifest.json';
|
|
7
8
|
const rowChunkSize = 5_000;
|
|
8
9
|
const queryTokenPattern = /[\p{L}\p{N}_-]+/gu;
|
|
10
|
+
const bloomBytes = 256;
|
|
11
|
+
const bloomBitSize = bloomBytes * 8;
|
|
12
|
+
const bloomSeeds = [0x9e3779b1, 0x85ebca6b, 0xc2b2ae35];
|
|
9
13
|
const toPackDirectory = (vaultPath) => join(vaultPath, '.brainlink', packsDirectoryName);
|
|
10
14
|
const toManifestPath = (vaultPath) => join(toPackDirectory(vaultPath), manifestFileName);
|
|
11
15
|
const parseRowsFromPack = async (vaultPath, content) => {
|
|
@@ -15,7 +19,29 @@ const parseRowsFromPack = async (vaultPath, content) => {
|
|
|
15
19
|
.split('\n')
|
|
16
20
|
.map((line) => line.trim())
|
|
17
21
|
.filter((line) => line.length > 0)
|
|
18
|
-
.map((line) => JSON.parse(line))
|
|
22
|
+
.map((line) => JSON.parse(line))
|
|
23
|
+
.flatMap((row) => {
|
|
24
|
+
if (typeof row.documentId !== 'string' ||
|
|
25
|
+
typeof row.agentId !== 'string' ||
|
|
26
|
+
typeof row.title !== 'string' ||
|
|
27
|
+
typeof row.path !== 'string' ||
|
|
28
|
+
typeof row.chunkId !== 'string' ||
|
|
29
|
+
typeof row.content !== 'string') {
|
|
30
|
+
return [];
|
|
31
|
+
}
|
|
32
|
+
return [
|
|
33
|
+
{
|
|
34
|
+
documentId: row.documentId,
|
|
35
|
+
agentId: row.agentId,
|
|
36
|
+
title: row.title,
|
|
37
|
+
path: row.path,
|
|
38
|
+
chunkId: row.chunkId,
|
|
39
|
+
chunkOrdinal: typeof row.chunkOrdinal === 'number' ? row.chunkOrdinal : 0,
|
|
40
|
+
content: row.content,
|
|
41
|
+
tags: Array.isArray(row.tags) ? row.tags.filter((item) => typeof item === 'string') : []
|
|
42
|
+
}
|
|
43
|
+
];
|
|
44
|
+
});
|
|
19
45
|
};
|
|
20
46
|
const toRows = (documents) => documents.flatMap((document) => document.chunks.map((chunk) => ({
|
|
21
47
|
documentId: document.document.id,
|
|
@@ -23,12 +49,60 @@ const toRows = (documents) => documents.flatMap((document) => document.chunks.ma
|
|
|
23
49
|
title: document.document.title,
|
|
24
50
|
path: document.document.path,
|
|
25
51
|
chunkId: chunk.id,
|
|
52
|
+
chunkOrdinal: chunk.ordinal,
|
|
26
53
|
content: chunk.content,
|
|
27
54
|
tags: document.document.tags
|
|
28
55
|
})));
|
|
29
56
|
const writeManifest = async (vaultPath, manifest) => {
|
|
30
57
|
await writeFile(toManifestPath(vaultPath), `${JSON.stringify(manifest, null, 2)}\n`, 'utf8');
|
|
31
58
|
};
|
|
59
|
+
const readManifest = async (vaultPath) => {
|
|
60
|
+
try {
|
|
61
|
+
const parsed = JSON.parse(await readFile(toManifestPath(vaultPath), 'utf8'));
|
|
62
|
+
if (parsed.version === 2 && parsed.format === 'private-v2') {
|
|
63
|
+
return {
|
|
64
|
+
version: 2,
|
|
65
|
+
createdAt: typeof parsed.createdAt === 'string' ? parsed.createdAt : new Date().toISOString(),
|
|
66
|
+
packCount: typeof parsed.packCount === 'number' ? parsed.packCount : 0,
|
|
67
|
+
recordCount: typeof parsed.recordCount === 'number' ? parsed.recordCount : 0,
|
|
68
|
+
format: 'private-v2'
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
if (parsed.version === 3 && parsed.format === 'private-v2') {
|
|
72
|
+
const packIndex = Array.isArray(parsed.packIndex)
|
|
73
|
+
? parsed.packIndex.flatMap((entry) => {
|
|
74
|
+
if (!entry || typeof entry !== 'object') {
|
|
75
|
+
return [];
|
|
76
|
+
}
|
|
77
|
+
const candidate = entry;
|
|
78
|
+
if (typeof candidate.fileName !== 'string' || typeof candidate.tokenBloomB64 !== 'string') {
|
|
79
|
+
return [];
|
|
80
|
+
}
|
|
81
|
+
return [
|
|
82
|
+
{
|
|
83
|
+
fileName: candidate.fileName,
|
|
84
|
+
recordCount: typeof candidate.recordCount === 'number' ? candidate.recordCount : 0,
|
|
85
|
+
agents: Array.isArray(candidate.agents) ? candidate.agents.filter((item) => typeof item === 'string') : [],
|
|
86
|
+
tokenBloomB64: candidate.tokenBloomB64
|
|
87
|
+
}
|
|
88
|
+
];
|
|
89
|
+
})
|
|
90
|
+
: [];
|
|
91
|
+
return {
|
|
92
|
+
version: 3,
|
|
93
|
+
createdAt: typeof parsed.createdAt === 'string' ? parsed.createdAt : new Date().toISOString(),
|
|
94
|
+
packCount: typeof parsed.packCount === 'number' ? parsed.packCount : packIndex.length,
|
|
95
|
+
recordCount: typeof parsed.recordCount === 'number' ? parsed.recordCount : 0,
|
|
96
|
+
format: 'private-v2',
|
|
97
|
+
packIndex
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
return null;
|
|
101
|
+
}
|
|
102
|
+
catch {
|
|
103
|
+
return null;
|
|
104
|
+
}
|
|
105
|
+
};
|
|
32
106
|
const chunkRows = (rows, size) => {
|
|
33
107
|
const chunks = [];
|
|
34
108
|
for (let index = 0; index < rows.length; index += size) {
|
|
@@ -57,6 +131,45 @@ const countOccurrences = (text, token) => {
|
|
|
57
131
|
}
|
|
58
132
|
return hits;
|
|
59
133
|
};
|
|
134
|
+
const hashToken = (token, seed) => {
|
|
135
|
+
let hash = seed >>> 0;
|
|
136
|
+
for (let index = 0; index < token.length; index += 1) {
|
|
137
|
+
hash ^= token.charCodeAt(index);
|
|
138
|
+
hash = Math.imul(hash, 16777619) >>> 0;
|
|
139
|
+
}
|
|
140
|
+
return hash >>> 0;
|
|
141
|
+
};
|
|
142
|
+
const createBloom = () => new Uint8Array(bloomBytes);
|
|
143
|
+
const bloomAdd = (bloom, token) => {
|
|
144
|
+
bloomSeeds.forEach((seed) => {
|
|
145
|
+
const bit = hashToken(token, seed) % bloomBitSize;
|
|
146
|
+
bloom[Math.floor(bit / 8)] |= 1 << (bit % 8);
|
|
147
|
+
});
|
|
148
|
+
};
|
|
149
|
+
const bloomMayContain = (bloom, token) => bloomSeeds.every((seed) => {
|
|
150
|
+
const bit = hashToken(token, seed) % bloomBitSize;
|
|
151
|
+
return (bloom[Math.floor(bit / 8)] & (1 << (bit % 8))) !== 0;
|
|
152
|
+
});
|
|
153
|
+
const bloomFromRows = (rows) => {
|
|
154
|
+
const bloom = createBloom();
|
|
155
|
+
rows.forEach((row) => {
|
|
156
|
+
tokenize([row.title, row.path, row.tags.join(' '), row.content].join(' ')).forEach((token) => bloomAdd(bloom, token));
|
|
157
|
+
});
|
|
158
|
+
return bloom;
|
|
159
|
+
};
|
|
160
|
+
const bloomToBase64 = (bloom) => Buffer.from(bloom).toString('base64url');
|
|
161
|
+
const bloomFromBase64 = (value) => {
|
|
162
|
+
try {
|
|
163
|
+
const decoded = Buffer.from(value, 'base64url');
|
|
164
|
+
if (decoded.byteLength === bloomBytes) {
|
|
165
|
+
return new Uint8Array(decoded);
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
catch {
|
|
169
|
+
// fallback below
|
|
170
|
+
}
|
|
171
|
+
return createBloom();
|
|
172
|
+
};
|
|
60
173
|
const computeTextScore = (row, tokens) => {
|
|
61
174
|
if (tokens.length === 0) {
|
|
62
175
|
return 0;
|
|
@@ -79,6 +192,7 @@ const toSearchResult = (row, score) => ({
|
|
|
79
192
|
title: row.title,
|
|
80
193
|
path: row.path,
|
|
81
194
|
chunkId: row.chunkId,
|
|
195
|
+
chunkOrdinal: row.chunkOrdinal,
|
|
82
196
|
content: row.content,
|
|
83
197
|
score,
|
|
84
198
|
textScore: score,
|
|
@@ -110,24 +224,55 @@ const writeRowsAsPrivatePacks = async (vaultPath, rows, clearExisting) => {
|
|
|
110
224
|
.map((name) => rm(join(directory, name), { force: true })));
|
|
111
225
|
}
|
|
112
226
|
const chunks = chunkRows(rows, rowChunkSize);
|
|
113
|
-
await Promise.all(chunks.map(async (chunk, index) => {
|
|
227
|
+
const packIndex = await Promise.all(chunks.map(async (chunk, index) => {
|
|
114
228
|
const fileName = `pack-${String(index + 1).padStart(4, '0')}.blpk`;
|
|
115
229
|
const serialized = `${chunk.map((row) => JSON.stringify(row)).join('\n')}\n`;
|
|
116
230
|
const compressed = await encodePrivatePack(vaultPath, Buffer.from(serialized, 'utf8'));
|
|
231
|
+
const tokenBloomB64 = bloomToBase64(bloomFromRows(chunk));
|
|
117
232
|
await writeFile(join(directory, fileName), compressed);
|
|
233
|
+
return {
|
|
234
|
+
fileName,
|
|
235
|
+
recordCount: chunk.length,
|
|
236
|
+
agents: Array.from(new Set(chunk.map((row) => row.agentId))).sort((left, right) => left.localeCompare(right)),
|
|
237
|
+
tokenBloomB64
|
|
238
|
+
};
|
|
118
239
|
}));
|
|
119
240
|
await writeManifest(vaultPath, {
|
|
120
|
-
version:
|
|
241
|
+
version: 3,
|
|
121
242
|
createdAt: new Date().toISOString(),
|
|
122
243
|
packCount: chunks.length,
|
|
123
244
|
recordCount: rows.length,
|
|
124
|
-
format: 'private-v2'
|
|
245
|
+
format: 'private-v2',
|
|
246
|
+
packIndex
|
|
125
247
|
});
|
|
126
248
|
return {
|
|
127
249
|
packCount: chunks.length,
|
|
128
250
|
recordCount: rows.length
|
|
129
251
|
};
|
|
130
252
|
};
|
|
253
|
+
const selectCandidatePackFiles = async (vaultPath, tokens, agentId) => {
|
|
254
|
+
const allFiles = await sortedPackFiles(vaultPath);
|
|
255
|
+
if (allFiles.length === 0) {
|
|
256
|
+
return [];
|
|
257
|
+
}
|
|
258
|
+
const manifest = await readManifest(vaultPath);
|
|
259
|
+
if (!manifest || manifest.version !== 3 || !Array.isArray(manifest.packIndex)) {
|
|
260
|
+
return allFiles;
|
|
261
|
+
}
|
|
262
|
+
const normalizedAgent = agentId?.trim();
|
|
263
|
+
const byAgent = manifest.packIndex.filter((entry) => normalizedAgent ? entry.agents.includes(normalizedAgent) : true);
|
|
264
|
+
if (tokens.length === 0) {
|
|
265
|
+
return byAgent.map((entry) => entry.fileName);
|
|
266
|
+
}
|
|
267
|
+
const byToken = byAgent.filter((entry) => {
|
|
268
|
+
const bloom = bloomFromBase64(entry.tokenBloomB64);
|
|
269
|
+
return tokens.some((token) => bloomMayContain(bloom, token));
|
|
270
|
+
});
|
|
271
|
+
if (byToken.length > 0) {
|
|
272
|
+
return byToken.map((entry) => entry.fileName);
|
|
273
|
+
}
|
|
274
|
+
return byAgent.length > 0 ? byAgent.map((entry) => entry.fileName) : allFiles;
|
|
275
|
+
};
|
|
131
276
|
export const buildSearchPacks = async (vaultPath, documents) => {
|
|
132
277
|
return writeRowsAsPrivatePacks(vaultPath, toRows(documents), true);
|
|
133
278
|
};
|
|
@@ -158,14 +303,19 @@ export const searchInPacks = async (vaultPath, query, limit, agentId) => {
|
|
|
158
303
|
if (limit <= 0 || tokens.length === 0) {
|
|
159
304
|
return [];
|
|
160
305
|
}
|
|
161
|
-
const files = await
|
|
306
|
+
const files = await selectCandidatePackFiles(vaultPath, tokens, normalizedAgent);
|
|
162
307
|
if (files.length === 0) {
|
|
163
308
|
return [];
|
|
164
309
|
}
|
|
165
310
|
const scored = [];
|
|
166
311
|
for (const file of files) {
|
|
167
312
|
const rows = await parseRowsFromPack(vaultPath, await readFile(join(toPackDirectory(vaultPath), file)));
|
|
168
|
-
rows.
|
|
313
|
+
const traversal = middleOutIndices(rows.length, Math.floor(rows.length / 2));
|
|
314
|
+
traversal.forEach((rowIndex) => {
|
|
315
|
+
const row = rows[rowIndex];
|
|
316
|
+
if (!row) {
|
|
317
|
+
return;
|
|
318
|
+
}
|
|
169
319
|
if (normalizedAgent && row.agentId !== normalizedAgent) {
|
|
170
320
|
return;
|
|
171
321
|
}
|
package/docs/AGENT_USAGE.md
CHANGED
|
@@ -465,6 +465,7 @@ Search modes:
|
|
|
465
465
|
- `semantic`: local deterministic embedding similarity.
|
|
466
466
|
|
|
467
467
|
Hybrid results are cached in-memory for a short TTL and invalidated when `.brainlink/index.json` changes.
|
|
468
|
+
Context assembly uses middle-out ordering inside each note: the highest-scoring chunk is selected first, then nearby chunks are expanded while token budget allows.
|
|
468
469
|
|
|
469
470
|
### Build Agent Context
|
|
470
471
|
|
|
@@ -635,6 +636,7 @@ GET /api/validate
|
|
|
635
636
|
The HTTP API is read-only. Use the CLI for writes and indexing.
|
|
636
637
|
|
|
637
638
|
Indexing writes private encrypted search packs at `.brainlink/search-packs/*.blpk` for resilient retrieval and portability.
|
|
639
|
+
Pack search now uses compressed-space prefiltering (token bloom index per pack) before decrypting/reading pack payloads.
|
|
638
640
|
Pack decryption keys are resolved from `$BRAINLINK_HOME/keys` (or `BRAINLINK_SEARCH_PACK_KEY` when explicitly set).
|
|
639
641
|
|
|
640
642
|
## Agent Integration Contract
|
package/docs/ARCHITECTURE.md
CHANGED
|
@@ -138,8 +138,10 @@ read markdown files
|
|
|
138
138
|
question
|
|
139
139
|
-> selected mode: fts | semantic | hybrid
|
|
140
140
|
-> optional query embedding
|
|
141
|
+
-> optional compressed pack prefilter (token bloom)
|
|
141
142
|
-> lexical scoring and/or semantic cosine scoring
|
|
142
143
|
-> cosine similarity over candidate chunks
|
|
144
|
+
-> middle-out context expansion around strongest chunk
|
|
143
145
|
-> ranked chunks with textScore and semanticScore
|
|
144
146
|
-> token-budget selection
|
|
145
147
|
-> Markdown context package
|
|
@@ -293,6 +295,7 @@ Markdown keeps the system portable, inspectable, Git-friendly, and compatible wi
|
|
|
293
295
|
Brainlink uses a local JSON index plus encrypted pack exports for fast rebuildable retrieval without external infrastructure.
|
|
294
296
|
Hybrid retrieval also uses a short-lived in-memory cache keyed by vault/query/agent and invalidated by index file mtime to reduce repeated query latency.
|
|
295
297
|
Indexing exports private encrypted pack files (`.brainlink/search-packs/*.blpk`) from indexed chunks for fast retrieval and recovery continuity.
|
|
298
|
+
Pack manifests include compressed-space token bloom metadata so retrieval can skip unrelated packs before decryption.
|
|
296
299
|
Pack encryption keys are resolved from `$BRAINLINK_HOME/keys` or from `BRAINLINK_SEARCH_PACK_KEY` when configured.
|
|
297
300
|
Legacy `.jsonl.gz` search packs are auto-upgraded to `.blpk` on first retrieval flow.
|
|
298
301
|
|
package/package.json
CHANGED