@knolo/core 3.2.0 → 3.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -319,10 +319,73 @@ Properties:
319
319
 
320
320
  ---
321
321
 
322
- # 📄 License
322
+ # 🕸 ClaimGraph API
323
323
 
324
- Apache-2.0
324
+ `@knolo/core` includes a deterministic ClaimGraph subsystem.
325
+
326
+ ## Build-time config
327
+
328
+ ```ts
329
+ type BuildPackOptions = {
330
+ graph?: {
331
+ enabled?: boolean; // default true
332
+ maxEdgesPerDoc?: number; // default 500
333
+ };
334
+ };
335
+ ```
336
+
337
+ ## Query-time config
338
+
339
+ ```ts
340
+ type QueryOptions = {
341
+ graph?: {
342
+ expand?: boolean; // default false
343
+ maxExtraTerms?: number; // default 12
344
+ predicates?: string[]; // default ['defined_as', 'is', 'mentions', 'ref']
345
+ };
346
+ };
347
+ ```
348
+
349
+ ## Exports
325
350
 
351
+ ```ts
352
+ import {
353
+ buildClaimGraph,
354
+ getClaimGraph,
355
+ applyClaimGraphLog,
356
+ mergeClaimGraphLogs,
357
+ expandQueryWithGraph,
358
+ createGraphLog,
359
+ appendOp,
360
+ } from '@knolo/core';
361
+ ```
362
+
363
+ Types:
364
+
365
+ * `ClaimNode`
366
+ * `ClaimEdge`
367
+ * `ClaimGraph`
368
+ * `ClaimOp`
369
+ * `ClaimGraphLog`
370
+
371
+ ## Notes on determinism and bounds
372
+
373
+ * Node IDs are hash-derived from normalized labels.
374
+ * Edge IDs are hash-derived from `(from, predicate, to, evidence)`.
375
+ * Node labels are normalized and deterministically truncated.
376
+ * Evidence arrays are sorted + unique.
377
+ * Node/edge arrays are sorted by ID in final graph.
378
+ * Extraction is bounded with `maxEdgesPerDoc`.
379
+ * Query expansion is bounded with `maxExtraTerms` and stable ordering.
326
380
 
381
+ ## Pack format note
327
382
 
383
+ `.knolo` binary layout now supports an optional trailing ClaimGraph JSON section after existing sections.
384
+ Runtimes that ignore unknown trailing bytes remain compatible.
385
+
386
+ ---
387
+
388
+ # 📄 License
389
+
390
+ Apache-2.0
328
391
 
package/dist/builder.d.ts CHANGED
@@ -16,5 +16,9 @@ export type BuildPackOptions = {
16
16
  perVectorScale?: true;
17
17
  };
18
18
  };
19
+ graph?: {
20
+ enabled?: boolean;
21
+ maxEdgesPerDoc?: number;
22
+ };
19
23
  };
20
24
  export declare function buildPack(docs: BuildInputDoc[], opts?: BuildPackOptions): Promise<Uint8Array>;
package/dist/builder.js CHANGED
@@ -9,6 +9,7 @@ import { tokenize } from './tokenize.js';
9
9
  import { getTextEncoder } from './utils/utf8.js';
10
10
  import { encodeScaleF16, quantizeEmbeddingInt8L2Norm } from './semantic.js';
11
11
  import { validateAgentRegistry } from './agent.js';
12
+ import { buildClaimGraph } from './graph/build_claim_graph.js';
12
13
  export async function buildPack(docs, opts = {}) {
13
14
  const normalizedDocs = validateDocs(docs);
14
15
  // Prepare blocks (strip MD) and carry heading/docId for optional boosts.
@@ -23,6 +24,10 @@ export async function buildPack(docs, opts = {}) {
23
24
  const totalTokens = blockTokenLens.reduce((sum, len) => sum + len, 0);
24
25
  const avgBlockLen = blocks.length ? totalTokens / blocks.length : 1;
25
26
  const agents = normalizeAgents(opts.agents);
27
+ const graphEnabled = opts.graph?.enabled ?? true;
28
+ const claimGraph = graphEnabled
29
+ ? buildClaimGraph(normalizedDocs, { maxEdgesPerDoc: opts.graph?.maxEdgesPerDoc })
30
+ : null;
26
31
  const meta = {
27
32
  version: 3,
28
33
  stats: {
@@ -32,6 +37,15 @@ export async function buildPack(docs, opts = {}) {
32
37
  avgBlockLen,
33
38
  },
34
39
  ...(agents ? { agents } : {}),
40
+ ...(claimGraph
41
+ ? {
42
+ claimGraph: {
43
+ version: 1,
44
+ nodes: claimGraph.nodes.length,
45
+ edges: claimGraph.edges.length,
46
+ },
47
+ }
48
+ : {}),
35
49
  };
36
50
  // Persist blocks as objects to optionally carry heading/docId/token length.
37
51
  const blocksPayload = blocks.map((b, i) => ({
@@ -54,6 +68,7 @@ export async function buildPack(docs, opts = {}) {
54
68
  ? enc.encode(JSON.stringify(semanticSection.semJson))
55
69
  : undefined;
56
70
  const semBlob = semanticSection?.semBlob;
71
+ const graphBytes = claimGraph ? enc.encode(JSON.stringify(claimGraph)) : undefined;
57
72
  const totalLength = 4 +
58
73
  metaBytes.length +
59
74
  4 +
@@ -64,7 +79,8 @@ export async function buildPack(docs, opts = {}) {
64
79
  blocksBytes.length +
65
80
  (semanticEnabled && semBytes && semBlob
66
81
  ? 4 + semBytes.length + 4 + semBlob.length
67
- : 0);
82
+ : 0) +
83
+ (graphBytes ? 4 + graphBytes.length : 0);
68
84
  const out = new Uint8Array(totalLength);
69
85
  const dv = new DataView(out.buffer);
70
86
  let offset = 0;
@@ -98,6 +114,12 @@ export async function buildPack(docs, opts = {}) {
98
114
  dv.setUint32(offset, semBlob.length, true);
99
115
  offset += 4;
100
116
  out.set(semBlob, offset);
117
+ offset += semBlob.length;
118
+ }
119
+ if (graphBytes) {
120
+ dv.setUint32(offset, graphBytes.length, true);
121
+ offset += 4;
122
+ out.set(graphBytes, offset);
101
123
  }
102
124
  return out;
103
125
  }
@@ -0,0 +1,5 @@
1
+ import type { BuildInputDoc } from '../builder.js';
2
+ import type { ClaimGraph } from './claim_graph.js';
3
+ export declare function buildClaimGraph(docs: BuildInputDoc[], opts?: {
4
+ maxEdgesPerDoc?: number;
5
+ }): ClaimGraph;
@@ -0,0 +1,88 @@
1
+ import { canonicalEvidence, computeEdgeId, computeNodeId, finalizeGraph, normalizeClaimLabel, } from './claim_graph.js';
2
+ const DEF_RE = /^([A-Za-z0-9 _-]{2,80})\s+(is|are)\s+(.{2,120})[.?!]/;
3
+ const MD_LINK_RE = /\[([^\]]{1,200})\]\(([^)\s]{1,200})\)/g;
4
+ const WIKI_RE = /\[\[([^\]]{1,200})\]\]/g;
5
+ const HEADING_RE = /^(#{1,3})\s+(.+)$/gm;
6
+ const STOPWORDS = new Set(['a', 'an', 'and', 'or', 'the', 'it', 'they', 'this', 'that', 'these', 'those']);
7
+ export function buildClaimGraph(docs, opts = {}) {
8
+ const maxEdgesPerDoc = Math.max(1, opts.maxEdgesPerDoc ?? 500);
9
+ const nodeById = new Map();
10
+ const edgeById = new Map();
11
+ for (let i = 0; i < docs.length; i++) {
12
+ const doc = docs[i];
13
+ const docLabel = normalizeLabel(doc.id || doc.heading || `doc_${i}`);
14
+ const local = [];
15
+ for (const m of doc.text.matchAll(MD_LINK_RE)) {
16
+ addEdge(local, nodeById, normalizeLabel(m[1]), 'ref', normalizeLabel(m[2]), [i]);
17
+ }
18
+ for (const m of doc.text.matchAll(WIKI_RE)) {
19
+ addEdge(local, nodeById, docLabel, 'mentions', normalizeLabel(m[1]), [i]);
20
+ }
21
+ const headingMatches = Array.from(doc.text.matchAll(HEADING_RE));
22
+ for (const h of headingMatches) {
23
+ const headingLabel = normalizeLabel(h[2] || '');
24
+ const headingStart = h.index ?? 0;
25
+ const sentence = firstSentenceAfter(doc.text, headingStart + h[0].length);
26
+ if (sentence) {
27
+ addEdge(local, nodeById, headingLabel, 'defined_as', normalizeLabel(sentence), [i]);
28
+ }
29
+ }
30
+ for (const sentence of splitSentences(doc.text)) {
31
+ const m = sentence.match(DEF_RE);
32
+ if (!m)
33
+ continue;
34
+ const subject = normalizeLabel(m[1]);
35
+ if (!subject || isStopwordOnly(subject))
36
+ continue;
37
+ const objectSnippet = normalizeLabel(m[3]);
38
+ addEdge(local, nodeById, subject, 'is', objectSnippet, [i]);
39
+ }
40
+ local.sort((a, b) => a.id.localeCompare(b.id));
41
+ for (const edge of local.slice(0, maxEdgesPerDoc)) {
42
+ const existing = edgeById.get(edge.id);
43
+ if (existing) {
44
+ existing.evidence = canonicalEvidence([...(existing.evidence ?? []), ...(edge.evidence ?? [])]);
45
+ }
46
+ else {
47
+ edgeById.set(edge.id, edge);
48
+ }
49
+ }
50
+ }
51
+ return finalizeGraph({ version: 1, nodes: [...nodeById.values()], edges: [...edgeById.values()] });
52
+ }
53
+ function addEdge(local, nodeById, fromLabel, p, toLabel, evidence) {
54
+ if (!fromLabel || !toLabel)
55
+ return;
56
+ const fromId = ensureNode(nodeById, fromLabel);
57
+ const toId = ensureNode(nodeById, toLabel);
58
+ const edgeEvidence = canonicalEvidence(evidence);
59
+ const id = computeEdgeId(fromId, p, toId, edgeEvidence);
60
+ local.push({ id, from: fromId, p, to: toId, evidence: edgeEvidence });
61
+ }
62
+ function ensureNode(nodeById, label) {
63
+ const id = computeNodeId(label);
64
+ if (!nodeById.has(id))
65
+ nodeById.set(id, { id, label });
66
+ return id;
67
+ }
68
+ function normalizeLabel(input) {
69
+ return normalizeClaimLabel(input, 200);
70
+ }
71
+ function splitSentences(text) {
72
+ return text
73
+ .replace(/\r\n/g, '\n')
74
+ .split(/(?<=[.?!])\s+/)
75
+ .map((s) => s.trim())
76
+ .filter(Boolean);
77
+ }
78
+ function firstSentenceAfter(text, startIdx) {
79
+ const tail = text.slice(startIdx).replace(/^[^\n]*\n+/, '').trim();
80
+ if (!tail)
81
+ return '';
82
+ const first = splitSentences(tail)[0] ?? '';
83
+ return first.slice(0, 240);
84
+ }
85
+ function isStopwordOnly(subject) {
86
+ const words = subject.split(/\s+/).filter(Boolean);
87
+ return words.length > 0 && words.every((w) => STOPWORDS.has(w));
88
+ }
@@ -0,0 +1,34 @@
1
+ import type { Pack } from '../pack.runtime.js';
2
+ export type ClaimNode = {
3
+ id: string;
4
+ label: string;
5
+ props?: Record<string, string>;
6
+ };
7
+ export type ClaimEdge = {
8
+ id: string;
9
+ from: string;
10
+ p: string;
11
+ to: string;
12
+ evidence?: number[];
13
+ actor?: string;
14
+ ts?: number;
15
+ };
16
+ export type ClaimGraph = {
17
+ version: 1;
18
+ nodes: ClaimNode[];
19
+ edges: ClaimEdge[];
20
+ index?: {
21
+ labelToId?: Record<string, string>;
22
+ out?: Record<string, string[]>;
23
+ in?: Record<string, string[]>;
24
+ };
25
+ };
26
+ export declare function normalizeClaimLabel(label: string, maxLen?: number): string;
27
+ export declare function computeNodeId(label: string): string;
28
+ export declare function computeEdgeId(from: string, p: string, to: string, evidence?: number[]): string;
29
+ export declare function canonicalEvidence(evidence?: number[]): number[];
30
+ export declare function buildGraphIndex(graph: ClaimGraph): ClaimGraph['index'];
31
+ export declare function finalizeGraph(graph: ClaimGraph): ClaimGraph;
32
+ export declare function getClaimGraph(pack: Pack): ClaimGraph | null;
33
+ export declare function validateClaimGraph(input: unknown): ClaimGraph | null;
34
+ export declare function expandLabelToTerms(label: string): string[];
@@ -0,0 +1,65 @@
1
+ import { normalize, tokenize } from '../tokenize.js';
2
+ export function normalizeClaimLabel(label, maxLen = 200) {
3
+ const compact = normalize(label).replace(/\s+/g, ' ').trim();
4
+ return compact.slice(0, maxLen);
5
+ }
6
+ export function computeNodeId(label) {
7
+ return `n_${hash32Hex(normalizeClaimLabel(label))}`;
8
+ }
9
+ export function computeEdgeId(from, p, to, evidence) {
10
+ const evidenceCsv = canonicalEvidence(evidence).join(',');
11
+ return `e_${hash32Hex(`${from}\n${p}\n${to}\n${evidenceCsv}`)}`;
12
+ }
13
+ export function canonicalEvidence(evidence) {
14
+ if (!evidence?.length)
15
+ return [];
16
+ return Array.from(new Set(evidence.filter((n) => Number.isInteger(n) && n >= 0))).sort((a, b) => a - b);
17
+ }
18
+ export function buildGraphIndex(graph) {
19
+ const labelToId = {};
20
+ const out = {};
21
+ const inbound = {};
22
+ for (const node of graph.nodes) {
23
+ labelToId[normalizeClaimLabel(node.label)] = node.id;
24
+ }
25
+ for (const edge of graph.edges) {
26
+ (out[edge.from] ||= []).push(edge.id);
27
+ (inbound[edge.to] ||= []).push(edge.id);
28
+ }
29
+ for (const key of Object.keys(out))
30
+ out[key].sort();
31
+ for (const key of Object.keys(inbound))
32
+ inbound[key].sort();
33
+ return { labelToId, out, in: inbound };
34
+ }
35
+ export function finalizeGraph(graph) {
36
+ const nodes = [...graph.nodes].sort((a, b) => a.id.localeCompare(b.id));
37
+ const edges = [...graph.edges]
38
+ .map((e) => ({ ...e, evidence: canonicalEvidence(e.evidence) }))
39
+ .sort((a, b) => a.id.localeCompare(b.id));
40
+ const out = { version: 1, nodes, edges };
41
+ out.index = buildGraphIndex(out);
42
+ return out;
43
+ }
44
+ export function getClaimGraph(pack) {
45
+ return pack.claimGraph ?? null;
46
+ }
47
+ export function validateClaimGraph(input) {
48
+ if (!input || typeof input !== 'object')
49
+ return null;
50
+ const g = input;
51
+ if (g.version !== 1 || !Array.isArray(g.nodes) || !Array.isArray(g.edges))
52
+ return null;
53
+ return finalizeGraph({ version: 1, nodes: g.nodes, edges: g.edges });
54
+ }
55
+ export function expandLabelToTerms(label) {
56
+ return tokenize(normalizeClaimLabel(label)).map((t) => t.term);
57
+ }
58
+ function hash32Hex(input) {
59
+ let h = 0x811c9dc5;
60
+ for (let i = 0; i < input.length; i++) {
61
+ h ^= input.charCodeAt(i);
62
+ h = Math.imul(h, 0x01000193);
63
+ }
64
+ return (h >>> 0).toString(16).padStart(8, '0');
65
+ }
@@ -0,0 +1,33 @@
1
+ import type { Pack } from '../pack.runtime.js';
2
+ import type { ClaimGraph } from './claim_graph.js';
3
+ export type ClaimOp = {
4
+ op: 'upsert_node';
5
+ id?: string;
6
+ label: string;
7
+ props?: Record<string, string>;
8
+ ts: number;
9
+ actor: string;
10
+ } | {
11
+ op: 'add_edge';
12
+ from: string;
13
+ p: string;
14
+ to: string;
15
+ evidence?: number[];
16
+ ts: number;
17
+ actor: string;
18
+ } | {
19
+ op: 'tombstone_edge';
20
+ edgeId: string;
21
+ ts: number;
22
+ actor: string;
23
+ };
24
+ export type ClaimGraphLog = {
25
+ version: 1;
26
+ ops: ClaimOp[];
27
+ };
28
+ export declare function createGraphLog(): ClaimGraphLog;
29
+ export declare function appendOp(log: ClaimGraphLog, op: ClaimOp): ClaimGraphLog;
30
+ export declare function mergeClaimGraphLogs(a: ClaimGraphLog, b: ClaimGraphLog): ClaimGraphLog;
31
+ export declare function serializeClaimGraphLog(log: ClaimGraphLog): Uint8Array;
32
+ export declare function deserializeClaimGraphLog(data: Uint8Array): ClaimGraphLog;
33
+ export declare function applyClaimGraphLog(graphOrPack: ClaimGraph | Pack, log: ClaimGraphLog): ClaimGraph;
@@ -0,0 +1,106 @@
1
+ import { canonicalEvidence, computeEdgeId, computeNodeId, finalizeGraph, normalizeClaimLabel, } from './claim_graph.js';
2
+ export function createGraphLog() {
3
+ return { version: 1, ops: [] };
4
+ }
5
+ export function appendOp(log, op) {
6
+ return { version: 1, ops: [...log.ops, op] };
7
+ }
8
+ export function mergeClaimGraphLogs(a, b) {
9
+ return { version: 1, ops: [...a.ops, ...b.ops].sort(compareOps) };
10
+ }
11
+ export function serializeClaimGraphLog(log) {
12
+ return new TextEncoder().encode(JSON.stringify({ version: 1, ops: [...log.ops].sort(compareOps) }));
13
+ }
14
+ export function deserializeClaimGraphLog(data) {
15
+ const parsed = JSON.parse(new TextDecoder().decode(data));
16
+ if (!parsed || parsed.version !== 1 || !Array.isArray(parsed.ops)) {
17
+ throw new Error('Invalid ClaimGraphLog payload');
18
+ }
19
+ return { version: 1, ops: parsed.ops.sort(compareOps) };
20
+ }
21
+ export function applyClaimGraphLog(graphOrPack, log) {
22
+ const baseGraph = isPack(graphOrPack)
23
+ ? graphOrPack.claimGraph ?? { version: 1, nodes: [], edges: [] }
24
+ : graphOrPack;
25
+ const nodeById = new Map(baseGraph.nodes.map((n) => [n.id, { ...n, props: n.props ? { ...n.props } : undefined }]));
26
+ const edgeById = new Map(baseGraph.edges.map((e) => [e.id, { ...e, evidence: canonicalEvidence(e.evidence) }]));
27
+ const nodeStamp = new Map();
28
+ const addStamp = new Map();
29
+ const tombstoneStamp = new Map();
30
+ for (const op of [...log.ops].sort(compareOps)) {
31
+ if (op.op === 'upsert_node') {
32
+ const label = normalizeClaimLabel(op.label);
33
+ const id = op.id || computeNodeId(label);
34
+ const prev = nodeStamp.get(id);
35
+ if (!prev || compareStamp([op.ts, op.actor], prev) >= 0) {
36
+ nodeStamp.set(id, [op.ts, op.actor]);
37
+ nodeById.set(id, { id, label, props: op.props ? { ...op.props } : undefined });
38
+ }
39
+ continue;
40
+ }
41
+ if (op.op === 'add_edge') {
42
+ const evidence = canonicalEvidence(op.evidence);
43
+ const edgeId = computeEdgeId(op.from, op.p, op.to, evidence);
44
+ const prevAdd = addStamp.get(edgeId);
45
+ if (!prevAdd || compareStamp([op.ts, op.actor], prevAdd) >= 0) {
46
+ addStamp.set(edgeId, [op.ts, op.actor]);
47
+ }
48
+ const existing = edgeById.get(edgeId);
49
+ const mergedEvidence = canonicalEvidence([...(existing?.evidence ?? []), ...evidence]);
50
+ edgeById.set(edgeId, {
51
+ id: edgeId,
52
+ from: op.from,
53
+ p: op.p,
54
+ to: op.to,
55
+ evidence: mergedEvidence,
56
+ actor: op.actor,
57
+ ts: op.ts,
58
+ });
59
+ continue;
60
+ }
61
+ const prev = tombstoneStamp.get(op.edgeId);
62
+ if (!prev || compareStamp([op.ts, op.actor], prev) >= 0) {
63
+ tombstoneStamp.set(op.edgeId, [op.ts, op.actor]);
64
+ }
65
+ }
66
+ for (const [edgeId, edge] of edgeById) {
67
+ const add = addStamp.get(edgeId) ?? [-Infinity, ''];
68
+ const tomb = tombstoneStamp.get(edgeId);
69
+ if (tomb && compareStamp(tomb, add) > 0) {
70
+ edgeById.delete(edgeId);
71
+ continue;
72
+ }
73
+ if (!nodeById.has(edge.from)) {
74
+ nodeById.set(edge.from, { id: edge.from, label: edge.from });
75
+ }
76
+ if (!nodeById.has(edge.to)) {
77
+ nodeById.set(edge.to, { id: edge.to, label: edge.to });
78
+ }
79
+ }
80
+ return finalizeGraph({ version: 1, nodes: [...nodeById.values()], edges: [...edgeById.values()] });
81
+ }
82
+ function compareOps(a, b) {
83
+ if (a.ts !== b.ts)
84
+ return a.ts - b.ts;
85
+ const actorCmp = a.actor.localeCompare(b.actor);
86
+ if (actorCmp !== 0)
87
+ return actorCmp;
88
+ return stableSerializeOp(a).localeCompare(stableSerializeOp(b));
89
+ }
90
+ function stableSerializeOp(op) {
91
+ if (op.op === 'upsert_node') {
92
+ return `upsert_node|${op.id || ''}|${normalizeClaimLabel(op.label)}|${JSON.stringify(op.props || {})}`;
93
+ }
94
+ if (op.op === 'add_edge') {
95
+ return `add_edge|${op.from}|${op.p}|${op.to}|${canonicalEvidence(op.evidence).join(',')}`;
96
+ }
97
+ return `tombstone_edge|${op.edgeId}`;
98
+ }
99
+ function compareStamp(a, b) {
100
+ if (a[0] !== b[0])
101
+ return a[0] - b[0];
102
+ return a[1].localeCompare(b[1]);
103
+ }
104
+ function isPack(input) {
105
+ return Boolean(input.meta && input.blocks);
106
+ }
@@ -0,0 +1,6 @@
1
+ import type { Pack } from '../pack.runtime.js';
2
+ export type GraphQueryExpandOptions = {
3
+ maxExtraTerms?: number;
4
+ predicates?: string[];
5
+ };
6
+ export declare function expandQueryWithGraph(pack: Pack, queryString: string, opts?: GraphQueryExpandOptions): string;
@@ -0,0 +1,57 @@
1
+ import { normalize, tokenize } from '../tokenize.js';
2
+ import { expandLabelToTerms } from './claim_graph.js';
3
+ export function expandQueryWithGraph(pack, queryString, opts = {}) {
4
+ const graph = pack.claimGraph;
5
+ if (!graph || graph.nodes.length === 0 || graph.edges.length === 0)
6
+ return queryString;
7
+ const maxExtraTerms = Math.max(1, opts.maxExtraTerms ?? 12);
8
+ const predicates = new Set((opts.predicates ?? ['defined_as', 'is', 'mentions', 'ref']).map((p) => normalize(p)));
9
+ const qTokens = tokenize(queryString).map((t) => t.term);
10
+ if (qTokens.length === 0)
11
+ return queryString;
12
+ const qSet = new Set(qTokens);
13
+ const candidateNodeIds = new Set();
14
+ const labelEntries = Object.entries(graph.index?.labelToId ?? {}).sort((a, b) => a[0].localeCompare(b[0]));
15
+ for (const [labelNorm, nodeId] of labelEntries) {
16
+ if (qSet.has(labelNorm))
17
+ candidateNodeIds.add(nodeId);
18
+ }
19
+ for (const token of qTokens.sort()) {
20
+ for (const [labelNorm, nodeId] of labelEntries) {
21
+ if (labelNorm.startsWith(token))
22
+ candidateNodeIds.add(nodeId);
23
+ if (candidateNodeIds.size >= maxExtraTerms * 4)
24
+ break;
25
+ }
26
+ if (candidateNodeIds.size >= maxExtraTerms * 4)
27
+ break;
28
+ }
29
+ const edgeById = new Map(graph.edges.map((e) => [e.id, e]));
30
+ const outIdx = graph.index?.out ?? {};
31
+ const extraTerms = new Set();
32
+ const sortedNodeIds = [...candidateNodeIds].sort();
33
+ for (const nodeId of sortedNodeIds) {
34
+ const edgeIds = [...(outIdx[nodeId] ?? [])].sort();
35
+ for (const edgeId of edgeIds) {
36
+ const edge = edgeById.get(edgeId);
37
+ if (!edge || !predicates.has(normalize(edge.p)))
38
+ continue;
39
+ const target = graph.nodes.find((n) => n.id === edge.to);
40
+ if (!target)
41
+ continue;
42
+ for (const term of expandLabelToTerms(target.label)) {
43
+ if (!qSet.has(term))
44
+ extraTerms.add(term);
45
+ if (extraTerms.size >= maxExtraTerms)
46
+ break;
47
+ }
48
+ if (extraTerms.size >= maxExtraTerms)
49
+ break;
50
+ }
51
+ if (extraTerms.size >= maxExtraTerms)
52
+ break;
53
+ }
54
+ if (extraTerms.size === 0)
55
+ return queryString;
56
+ return `${queryString} ${[...extraTerms].sort().join(' ')}`.trim();
57
+ }
package/dist/index.d.ts CHANGED
@@ -3,12 +3,23 @@ export { query, lexConfidence, validateQueryOptions, validateSemanticQueryOption
3
3
  export { makeContextPatch } from './patch.js';
4
4
  export { buildPack } from './builder.js';
5
5
  export { quantizeEmbeddingInt8L2Norm, encodeScaleF16, decodeScaleF16, } from './semantic.js';
6
+ export { cosineSimilarity, normalizeVector } from './semantic/cosine.js';
7
+ export { createPackFingerprint, serializeSidecar, parseSidecar, validateSidecarForPack, } from './semantic/sidecar.js';
8
+ export { rerankCandidates } from './semantic/rerank.js';
9
+ export { assertProviderCompatible, ensureProviderModelId } from './semantic/provider.js';
6
10
  export { listAgents, getAgent, resolveAgent, buildSystemPrompt, isToolAllowed, assertToolAllowed, validateAgentRegistry, validateAgentDefinition, } from './agent.js';
11
+ export { getClaimGraph, validateClaimGraph, } from './graph/claim_graph.js';
12
+ export { buildClaimGraph } from './graph/build_claim_graph.js';
13
+ export { createGraphLog, appendOp, applyClaimGraphLog, mergeClaimGraphLogs, serializeClaimGraphLog, deserializeClaimGraphLog, } from './graph/log.js';
14
+ export { expandQueryWithGraph } from './graph/query_expand.js';
7
15
  export type { MountOptions, PackMeta, Pack } from './pack.runtime.js';
8
16
  export type { QueryOptions, Hit } from './query.js';
17
+ export type { EmbeddingProvider, SemanticSidecar, SemanticQueryOptions, RetrievalEvidence } from './semantic/types.js';
9
18
  export type { ContextPatch } from './patch.js';
10
19
  export type { BuildInputDoc, BuildPackOptions } from './builder.js';
11
20
  export type { AgentPromptTemplate, AgentToolPolicy, AgentRetrievalDefaults, AgentDefinitionV1, AgentRegistry, ResolveAgentInput, ResolvedAgent, } from './agent.js';
21
+ export type { ClaimGraph, ClaimNode, ClaimEdge } from './graph/claim_graph.js';
22
+ export type { ClaimGraphLog, ClaimOp } from './graph/log.js';
12
23
  export { parseToolCallV1FromText } from './tool_parse.js';
13
24
  export { nowIso, createTrace } from './trace.js';
14
25
  export { assertToolCallAllowed } from './tool_gate.js';
package/dist/index.js CHANGED
@@ -4,7 +4,15 @@ export { query, lexConfidence, validateQueryOptions, validateSemanticQueryOption
4
4
  export { makeContextPatch } from './patch.js';
5
5
  export { buildPack } from './builder.js';
6
6
  export { quantizeEmbeddingInt8L2Norm, encodeScaleF16, decodeScaleF16, } from './semantic.js';
7
+ export { cosineSimilarity, normalizeVector } from './semantic/cosine.js';
8
+ export { createPackFingerprint, serializeSidecar, parseSidecar, validateSidecarForPack, } from './semantic/sidecar.js';
9
+ export { rerankCandidates } from './semantic/rerank.js';
10
+ export { assertProviderCompatible, ensureProviderModelId } from './semantic/provider.js';
7
11
  export { listAgents, getAgent, resolveAgent, buildSystemPrompt, isToolAllowed, assertToolAllowed, validateAgentRegistry, validateAgentDefinition, } from './agent.js';
12
+ export { getClaimGraph, validateClaimGraph, } from './graph/claim_graph.js';
13
+ export { buildClaimGraph } from './graph/build_claim_graph.js';
14
+ export { createGraphLog, appendOp, applyClaimGraphLog, mergeClaimGraphLogs, serializeClaimGraphLog, deserializeClaimGraphLog, } from './graph/log.js';
15
+ export { expandQueryWithGraph } from './graph/query_expand.js';
8
16
  export { parseToolCallV1FromText } from './tool_parse.js';
9
17
  export { nowIso, createTrace } from './trace.js';
10
18
  export { assertToolCallAllowed } from './tool_gate.js';
@@ -1,4 +1,5 @@
1
1
  import type { AgentRegistry } from './agent.js';
2
+ import type { ClaimGraph } from './graph/claim_graph.js';
2
3
  export type MountOptions = {
3
4
  src: string | ArrayBufferLike | Uint8Array;
4
5
  };
@@ -11,6 +12,11 @@ export type PackMeta = {
11
12
  avgBlockLen?: number;
12
13
  };
13
14
  agents?: AgentRegistry;
15
+ claimGraph?: {
16
+ version: 1;
17
+ nodes: number;
18
+ edges: number;
19
+ };
14
20
  };
15
21
  export type Pack = {
16
22
  meta: PackMeta;
@@ -30,6 +36,7 @@ export type Pack = {
30
36
  vecs: Int8Array;
31
37
  scales?: Uint16Array;
32
38
  };
39
+ claimGraph?: ClaimGraph;
33
40
  };
34
41
  export declare function hasSemantic(pack: Pack): boolean;
35
42
  export declare function mountPack(opts: MountOptions): Promise<Pack>;
@@ -6,6 +6,7 @@
6
6
  */
7
7
  import { getTextDecoder } from './utils/utf8.js';
8
8
  import { validateAgentRegistry } from './agent.js';
9
+ import { validateClaimGraph } from './graph/claim_graph.js';
9
10
  export function hasSemantic(pack) {
10
11
  return Boolean(pack.semantic && pack.semantic.dims > 0 && pack.semantic.vecs.length > 0);
11
12
  }
@@ -75,16 +76,50 @@ export function mountPackFromBuffer(buf) {
75
76
  }
76
77
  }
77
78
  let semantic;
78
- if (offset < buf.byteLength) {
79
- const semLen = dv.getUint32(offset, true);
79
+ let claimGraph;
80
+ while (offset < buf.byteLength) {
81
+ const sectionStart = offset;
82
+ if (buf.byteLength - offset < 4)
83
+ break;
84
+ const jsonLen = dv.getUint32(offset, true);
80
85
  offset += 4;
81
- const semJson = dec.decode(new Uint8Array(buf, offset, semLen));
82
- offset += semLen;
83
- const sem = JSON.parse(semJson);
84
- const semBlobLen = dv.getUint32(offset, true);
85
- offset += 4;
86
- const semBlob = new Uint8Array(buf, offset, semBlobLen);
87
- semantic = parseSemanticSection(sem, semBlob);
86
+ if (jsonLen < 0 || offset + jsonLen > buf.byteLength) {
87
+ offset = sectionStart;
88
+ break;
89
+ }
90
+ let parsed;
91
+ try {
92
+ const json = dec.decode(new Uint8Array(buf, offset, jsonLen));
93
+ parsed = JSON.parse(json);
94
+ }
95
+ catch {
96
+ offset = sectionStart;
97
+ break;
98
+ }
99
+ offset += jsonLen;
100
+ if (!semantic && looksLikeSemanticJson(parsed)) {
101
+ if (buf.byteLength - offset < 4) {
102
+ offset = sectionStart;
103
+ break;
104
+ }
105
+ const semBlobLen = dv.getUint32(offset, true);
106
+ offset += 4;
107
+ if (semBlobLen < 0 || offset + semBlobLen > buf.byteLength) {
108
+ offset = sectionStart;
109
+ break;
110
+ }
111
+ const semBlob = new Uint8Array(buf, offset, semBlobLen);
112
+ offset += semBlobLen;
113
+ semantic = parseSemanticSection(parsed, semBlob);
114
+ continue;
115
+ }
116
+ const graph = validateClaimGraph(parsed);
117
+ if (!claimGraph && graph) {
118
+ claimGraph = graph;
119
+ continue;
120
+ }
121
+ offset = sectionStart;
122
+ break;
88
123
  }
89
124
  return {
90
125
  meta,
@@ -96,8 +131,18 @@ export function mountPackFromBuffer(buf) {
96
131
  namespaces,
97
132
  blockTokenLens,
98
133
  semantic,
134
+ claimGraph,
99
135
  };
100
136
  }
137
+ function looksLikeSemanticJson(parsed) {
138
+ if (!parsed || typeof parsed !== 'object')
139
+ return false;
140
+ const sem = parsed;
141
+ return (sem.version === 1 &&
142
+ sem.encoding === 'int8_l2norm' &&
143
+ typeof sem.blocks?.vectors?.byteOffset === 'number' &&
144
+ typeof sem.blocks?.vectors?.length === 'number');
145
+ }
101
146
  function parseSemanticSection(sem, blob) {
102
147
  const vectors = sem?.blocks?.vectors;
103
148
  const scales = sem?.blocks?.scales;
package/dist/query.d.ts CHANGED
@@ -1,4 +1,5 @@
1
1
  import type { Pack } from "./pack.js";
2
+ import type { RetrievalEvidence, SemanticSidecar } from "./semantic/types.js";
2
3
  export type QueryOptions = {
3
4
  topK?: number;
4
5
  minScore?: number;
@@ -12,6 +13,11 @@ export type QueryOptions = {
12
13
  weight?: number;
13
14
  minTermLength?: number;
14
15
  };
16
+ graph?: {
17
+ expand?: boolean;
18
+ maxExtraTerms?: number;
19
+ predicates?: string[];
20
+ };
15
21
  semantic?: {
16
22
  enabled?: boolean;
17
23
  mode?: "rerank";
@@ -23,6 +29,14 @@ export type QueryOptions = {
23
29
  wSem?: number;
24
30
  };
25
31
  queryEmbedding?: Float32Array;
32
+ sidecar?: SemanticSidecar;
33
+ provider?: {
34
+ type: "ollama";
35
+ modelId: string;
36
+ endpoint?: string;
37
+ };
38
+ sidecarPath?: string;
39
+ minSemanticScore?: number;
26
40
  force?: boolean;
27
41
  };
28
42
  };
@@ -34,6 +48,7 @@ export type Hit = {
34
48
  text: string;
35
49
  source?: string;
36
50
  namespace?: string;
51
+ evidence?: RetrievalEvidence;
37
52
  };
38
53
  export declare function query(pack: Pack, q: string, opts?: QueryOptions): Hit[];
39
54
  export declare function lexConfidence(hits: Array<{
package/dist/query.js CHANGED
@@ -14,6 +14,9 @@ import { minCoverSpan, proximityMultiplier } from "./quality/proximity.js";
14
14
  import { diversifyAndDedupe } from "./quality/diversify.js";
15
15
  import { knsSignature, knsDistance } from "./quality/signature.js";
16
16
  import { decodeScaleF16, quantizeEmbeddingInt8L2Norm } from "./semantic.js";
17
+ import { expandQueryWithGraph } from "./graph/query_expand.js";
18
+ import { rerankCandidates } from "./semantic/rerank.js";
19
+ import { parseSidecar } from "./semantic/sidecar.js";
17
20
  export function validateQueryOptions(opts) {
18
21
  if (!opts)
19
22
  return;
@@ -46,6 +49,17 @@ export function validateQueryOptions(opts) {
46
49
  throw new Error("query(...): queryExpansion.minTermLength must be a positive integer.");
47
50
  }
48
51
  }
52
+ if (opts.graph) {
53
+ if (opts.graph.expand !== undefined && typeof opts.graph.expand !== "boolean") {
54
+ throw new Error("query(...): graph.expand must be a boolean when provided.");
55
+ }
56
+ if (opts.graph.maxExtraTerms !== undefined && (!Number.isInteger(opts.graph.maxExtraTerms) || opts.graph.maxExtraTerms < 1)) {
57
+ throw new Error("query(...): graph.maxExtraTerms must be a positive integer.");
58
+ }
59
+ if (opts.graph.predicates !== undefined && (!Array.isArray(opts.graph.predicates) || opts.graph.predicates.some((p) => typeof p !== "string"))) {
60
+ throw new Error("query(...): graph.predicates must be an array of strings when provided.");
61
+ }
62
+ }
49
63
  validateSemanticQueryOptions(opts.semantic);
50
64
  }
51
65
  export function validateSemanticQueryOptions(options) {
@@ -66,6 +80,19 @@ export function validateSemanticQueryOptions(options) {
66
80
  if (options.queryEmbedding !== undefined && !(options.queryEmbedding instanceof Float32Array)) {
67
81
  throw new Error("query(...): semantic.queryEmbedding must be a Float32Array.");
68
82
  }
83
+ if (options.sidecarPath !== undefined && typeof options.sidecarPath !== "string") {
84
+ throw new Error("query(...): semantic.sidecarPath must be a string when provided.");
85
+ }
86
+ if (options.minSemanticScore !== undefined && (!Number.isFinite(options.minSemanticScore) || options.minSemanticScore < 0 || options.minSemanticScore > 1)) {
87
+ throw new Error("query(...): semantic.minSemanticScore must be a finite number between 0 and 1.");
88
+ }
89
+ if (options.provider) {
90
+ if (options.provider.type !== "ollama")
91
+ throw new Error('query(...): semantic.provider.type must be "ollama".');
92
+ if (typeof options.provider.modelId !== "string" || !options.provider.modelId.trim()) {
93
+ throw new Error("query(...): semantic.provider.modelId must be a non-empty string.");
94
+ }
95
+ }
69
96
  if (options.blend) {
70
97
  if (options.blend.enabled !== undefined && typeof options.blend.enabled !== "boolean") {
71
98
  throw new Error("query(...): semantic.blend.enabled must be a boolean when provided.");
@@ -103,10 +130,19 @@ export function query(pack, q, opts = {}) {
103
130
  wSem: Math.max(0, opts.semantic?.blend?.wSem ?? 0.25),
104
131
  },
105
132
  queryEmbedding: opts.semantic?.queryEmbedding,
133
+ sidecar: resolveSemanticSidecar(opts.semantic?.sidecar, opts.semantic?.sidecarPath),
134
+ provider: opts.semantic?.provider,
135
+ minSemanticScore: opts.semantic?.minSemanticScore,
106
136
  force: opts.semantic?.force ?? false,
107
137
  };
138
+ const graphQuery = opts.graph?.expand === true
139
+ ? expandQueryWithGraph(pack, q, {
140
+ maxExtraTerms: opts.graph?.maxExtraTerms,
141
+ predicates: opts.graph?.predicates,
142
+ })
143
+ : q;
108
144
  // --- Query parsing
109
- const normTokens = tokenize(q).map((t) => t.term);
145
+ const normTokens = tokenize(graphQuery).map((t) => t.term);
110
146
  // Normalize quoted phrases from q
111
147
  const quotedRaw = parsePhrases(q);
112
148
  const quoted = quotedRaw.map((seq) => seq.map((t) => normalize(t)).flatMap((s) => s.split(/\s+/)).filter(Boolean));
@@ -266,9 +302,16 @@ export function query(pack, q, opts = {}) {
266
302
  return [];
267
303
  }
268
304
  const confidence = lexConfidence(prelim);
305
+ let semanticScores;
306
+ let blendedScores;
307
+ const originalLexicalScores = new Map(prelim.map((item) => [item.blockId, item.score]));
269
308
  if (shouldRerankWithSemantic(pack, semanticOpts, confidence)) {
270
- prelim = rerankLexicalHitsWithSemantic(pack, prelim, semanticOpts);
309
+ const semanticResult = rerankLexicalHitsWithSemantic(pack, prelim, semanticOpts);
310
+ prelim = semanticResult.hits;
311
+ semanticScores = semanticResult.semanticScores;
312
+ blendedScores = semanticResult.blendedScores;
271
313
  }
314
+ const retrievalMode = semanticScores ? "hybrid" : "lexical";
272
315
  // --- KNS tie-breaker + de-dup/MMR
273
316
  const qSig = knsSignature(normalize(q));
274
317
  const pool = prelim.slice(0, topK * 5).map((r) => {
@@ -280,6 +323,13 @@ export function query(pack, q, opts = {}) {
280
323
  text,
281
324
  source: pack.docIds?.[r.blockId] ?? undefined,
282
325
  namespace: pack.namespaces?.[r.blockId] ?? undefined,
326
+ evidence: {
327
+ retrieval: retrievalMode,
328
+ lexicalScore: originalLexicalScores.get(r.blockId) ?? r.score,
329
+ semanticScore: semanticScores?.get(r.blockId),
330
+ blendedScore: blendedScores?.get(r.blockId),
331
+ modelId: semanticOpts.provider?.modelId ?? semanticOpts.sidecar?.modelId,
332
+ },
283
333
  };
284
334
  });
285
335
  const finalHits = diversifyAndDedupe(pool, { k: topK });
@@ -297,19 +347,66 @@ export function lexConfidence(hits) {
297
347
  function shouldRerankWithSemantic(pack, opts, confidence) {
298
348
  if (!opts.enabled || opts.mode !== "rerank")
299
349
  return false;
300
- if (!pack.semantic)
350
+ if (!pack.semantic && !opts.sidecar)
301
351
  return false;
302
352
  if (!opts.queryEmbedding) {
303
353
  throw new Error("query(...): semantic.queryEmbedding (Float32Array) is required when semantic.enabled=true.");
304
354
  }
305
355
  return opts.force || confidence < opts.minLexConfidence;
306
356
  }
357
+ function resolveSemanticSidecar(sidecar, sidecarPath) {
358
+ if (sidecar)
359
+ return sidecar;
360
+ if (!sidecarPath)
361
+ return undefined;
362
+ const raw = sidecarPath.trim();
363
+ if (!raw)
364
+ return undefined;
365
+ if (raw.startsWith("{")) {
366
+ return parseSidecar(raw);
367
+ }
368
+ if (raw.startsWith("data:")) {
369
+ const comma = raw.indexOf(",");
370
+ if (comma <= 0)
371
+ return undefined;
372
+ const meta = raw.slice(5, comma).toLowerCase();
373
+ const payload = raw.slice(comma + 1);
374
+ const decoded = meta.includes(";base64")
375
+ ? decodeBase64(payload)
376
+ : decodeURIComponent(payload);
377
+ if (!decoded.trim())
378
+ return undefined;
379
+ return parseSidecar(decoded);
380
+ }
381
+ return undefined;
382
+ }
383
+ function decodeBase64(input) {
384
+ const normalized = input.replace(/\s+/g, "");
385
+ const atobFn = globalThis.atob;
386
+ if (typeof atobFn === "function")
387
+ return atobFn(normalized);
388
+ const maybeBufferCtor = globalThis.Buffer;
389
+ if (maybeBufferCtor?.from)
390
+ return maybeBufferCtor.from(normalized, "base64").toString("utf8");
391
+ throw new Error("query(...): Unable to decode semantic.sidecarPath base64 payload in this runtime.");
392
+ }
307
393
  function rerankLexicalHitsWithSemantic(pack, prelim, opts) {
394
+ if (opts.sidecar && opts.queryEmbedding) {
395
+ const sidecarResult = rerankCandidates({
396
+ lexical: prelim,
397
+ sidecar: opts.sidecar,
398
+ queryEmbedding: opts.queryEmbedding,
399
+ topN: opts.topN,
400
+ blend: opts.blend,
401
+ minSemanticScore: opts.minSemanticScore,
402
+ });
403
+ return { hits: sidecarResult.reranked, semanticScores: sidecarResult.semanticScores, blendedScores: sidecarResult.blendedScores };
404
+ }
308
405
  const sem = pack.semantic;
309
406
  if (!sem || !opts.queryEmbedding)
310
- return prelim;
407
+ return { hits: prelim };
311
408
  if (sem.dims <= 0 || sem.vecs.length === 0 || sem.dims !== opts.queryEmbedding.length)
312
- return prelim;
409
+ return { hits: prelim };
313
410
  const topN = Math.min(opts.topN, prelim.length);
314
411
  const rerankSlice = prelim.slice(0, topN);
315
412
  const tail = prelim.slice(topN);
@@ -324,15 +421,19 @@ function rerankLexicalHitsWithSemantic(pack, prelim, opts) {
324
421
  const wLex = denom > 0 ? opts.blend.wLex / denom : 0.5;
325
422
  const wSem = denom > 0 ? opts.blend.wSem / denom : 0.5;
326
423
  const reranked = new Array(topN);
424
+ const semanticScores = new Map();
425
+ const blendedScores = new Map();
327
426
  for (let i = 0; i < topN; i++) {
328
427
  const hit = rerankSlice[i];
428
+ semanticScores.set(hit.blockId, normSem[i]);
429
+ blendedScores.set(hit.blockId, opts.blend.enabled ? wLex * normLex[i] + wSem * normSem[i] : semScores[i]);
329
430
  reranked[i] = {
330
431
  blockId: hit.blockId,
331
- score: opts.blend.enabled ? wLex * normLex[i] + wSem * normSem[i] : semScores[i],
432
+ score: blendedScores.get(hit.blockId) ?? hit.score,
332
433
  };
333
434
  }
334
435
  reranked.sort((a, b) => b.score - a.score || a.blockId - b.blockId);
335
- return [...reranked, ...tail];
436
+ return { hits: [...reranked, ...tail], semanticScores, blendedScores };
336
437
  }
337
438
  function scoreSemanticInt8(queryQ, queryScale, semantic, hits) {
338
439
  const scores = new Float64Array(hits.length);
@@ -0,0 +1,2 @@
1
+ export declare function normalizeVector(vector: Float32Array): Float32Array;
2
+ export declare function cosineSimilarity(a: Float32Array, b: Float32Array): number;
@@ -0,0 +1,20 @@
1
+ export function normalizeVector(vector) {
2
+ let normSq = 0;
3
+ for (let i = 0; i < vector.length; i++)
4
+ normSq += vector[i] * vector[i];
5
+ const norm = Math.sqrt(normSq);
6
+ if (!norm)
7
+ return new Float32Array(vector.length);
8
+ const out = new Float32Array(vector.length);
9
+ for (let i = 0; i < vector.length; i++)
10
+ out[i] = vector[i] / norm;
11
+ return out;
12
+ }
13
+ export function cosineSimilarity(a, b) {
14
+ if (a.length !== b.length || a.length === 0)
15
+ return 0;
16
+ let dot = 0;
17
+ for (let i = 0; i < a.length; i++)
18
+ dot += a[i] * b[i];
19
+ return dot;
20
+ }
@@ -0,0 +1,3 @@
1
+ import type { EmbeddingProvider, SemanticQueryOptions } from './types.js';
2
+ export declare function ensureProviderModelId(options?: SemanticQueryOptions): string | undefined;
3
+ export declare function assertProviderCompatible(options?: SemanticQueryOptions, provider?: EmbeddingProvider): void;
@@ -0,0 +1,13 @@
1
+ export function ensureProviderModelId(options) {
2
+ return options?.provider?.modelId;
3
+ }
4
+ export function assertProviderCompatible(options, provider) {
5
+ if (!options?.enabled)
6
+ return;
7
+ if (!provider && !options.queryEmbedding) {
8
+ throw new Error('semantic.enabled=true requires either semantic.queryEmbedding or an EmbeddingProvider.');
9
+ }
10
+ if (provider && options.provider?.modelId && options.provider.modelId !== provider.modelId) {
11
+ throw new Error(`Semantic provider model mismatch: options requested ${options.provider.modelId}, provider exposes ${provider.modelId}.`);
12
+ }
13
+ }
@@ -0,0 +1,23 @@
1
+ import type { SemanticSidecar } from './types.js';
2
+ export declare function rerankCandidates(params: {
3
+ lexical: Array<{
4
+ blockId: number;
5
+ score: number;
6
+ }>;
7
+ sidecar: SemanticSidecar;
8
+ queryEmbedding: Float32Array;
9
+ topN: number;
10
+ blend: {
11
+ enabled: boolean;
12
+ wLex: number;
13
+ wSem: number;
14
+ };
15
+ minSemanticScore?: number;
16
+ }): {
17
+ reranked: Array<{
18
+ blockId: number;
19
+ score: number;
20
+ }>;
21
+ semanticScores: Map<number, number>;
22
+ blendedScores: Map<number, number>;
23
+ };
@@ -0,0 +1,42 @@
1
+ import { cosineSimilarity, normalizeVector } from './cosine.js';
2
+ export function rerankCandidates(params) {
3
+ const topN = Math.min(params.topN, params.lexical.length);
4
+ const head = params.lexical.slice(0, topN);
5
+ const tail = params.lexical.slice(topN);
6
+ const q = normalizeVector(params.queryEmbedding);
7
+ const semanticScores = new Map();
8
+ const blendedScores = new Map();
9
+ const lexNorm = minMax(head.map((h) => h.score));
10
+ const semRaw = [];
11
+ for (const item of head) {
12
+ const rec = params.sidecar.blocks.find((b) => b.blockId === item.blockId);
13
+ const vec = rec ? Float32Array.from(rec.vector) : new Float32Array(q.length);
14
+ semRaw.push(cosineSimilarity(q, vec));
15
+ }
16
+ const semNorm = minMax(semRaw);
17
+ const denom = params.blend.wLex + params.blend.wSem;
18
+ const wLex = denom > 0 ? params.blend.wLex / denom : 0.7;
19
+ const wSem = denom > 0 ? params.blend.wSem / denom : 0.3;
20
+ const reranked = head.map((item, idx) => {
21
+ const sem = semNorm[idx];
22
+ semanticScores.set(item.blockId, sem);
23
+ if ((params.minSemanticScore ?? 0) > sem) {
24
+ blendedScores.set(item.blockId, lexNorm[idx]);
25
+ return { blockId: item.blockId, score: lexNorm[idx] };
26
+ }
27
+ const blended = params.blend.enabled ? wLex * lexNorm[idx] + wSem * sem : sem;
28
+ blendedScores.set(item.blockId, blended);
29
+ return { blockId: item.blockId, score: blended };
30
+ });
31
+ reranked.sort((a, b) => b.score - a.score || a.blockId - b.blockId);
32
+ return { reranked: [...reranked, ...tail], semanticScores, blendedScores };
33
+ }
34
+ function minMax(values) {
35
+ if (values.length === 0)
36
+ return values;
37
+ const min = Math.min(...values);
38
+ const max = Math.max(...values);
39
+ if (!Number.isFinite(min) || !Number.isFinite(max) || max <= min)
40
+ return values.map(() => 1);
41
+ return values.map((v) => Math.min(1, Math.max(0, (v - min) / (max - min))));
42
+ }
@@ -0,0 +1,10 @@
1
+ import type { Pack } from '../pack.runtime.js';
2
+ import type { SemanticSidecar } from './types.js';
3
+ export declare function createPackFingerprint(pack: Pick<Pack, 'blocks' | 'docIds' | 'meta'>): string;
4
+ export declare function serializeSidecar(sidecar: SemanticSidecar): string;
5
+ export declare function parseSidecar(raw: string): SemanticSidecar;
6
+ export declare function validateSidecarForPack(input: {
7
+ sidecar: SemanticSidecar;
8
+ pack: Pick<Pack, 'blocks' | 'docIds' | 'meta'>;
9
+ modelId: string;
10
+ }): void;
@@ -0,0 +1,32 @@
1
+ export function createPackFingerprint(pack) {
2
+ let hash = 2166136261;
3
+ const parts = [String(pack.meta?.version ?? 0), ...(pack.docIds ?? []), ...pack.blocks];
4
+ for (const part of parts) {
5
+ const text = String(part ?? '');
6
+ for (let i = 0; i < text.length; i++) {
7
+ hash ^= text.charCodeAt(i);
8
+ hash = Math.imul(hash, 16777619);
9
+ }
10
+ }
11
+ return `fnv1a-${(hash >>> 0).toString(16).padStart(8, '0')}`;
12
+ }
13
+ export function serializeSidecar(sidecar) {
14
+ return `${JSON.stringify(sidecar, null, 2)}\n`;
15
+ }
16
+ export function parseSidecar(raw) {
17
+ const parsed = JSON.parse(raw);
18
+ if (parsed.version !== 1)
19
+ throw new Error(`Unsupported semantic sidecar version: ${parsed.version}`);
20
+ if (parsed.metric !== 'cosine')
21
+ throw new Error(`Unsupported semantic metric: ${parsed.metric}`);
22
+ return parsed;
23
+ }
24
+ export function validateSidecarForPack(input) {
25
+ const expectedFingerprint = createPackFingerprint(input.pack);
26
+ if (input.sidecar.packFingerprint !== expectedFingerprint) {
27
+ throw new Error(`Semantic sidecar pack fingerprint mismatch: expected ${expectedFingerprint}, got ${input.sidecar.packFingerprint}. Regenerate the sidecar for this pack.`);
28
+ }
29
+ if (input.sidecar.modelId !== input.modelId) {
30
+ throw new Error(`Semantic model mismatch: sidecar model is ${input.sidecar.modelId}, but query provider is ${input.modelId}. Use the same embedding model or regenerate the sidecar.`);
31
+ }
32
+ }
@@ -0,0 +1,44 @@
1
+ export interface EmbeddingProvider {
2
+ readonly modelId: string;
3
+ embedQuery(text: string): Promise<Float32Array>;
4
+ embedTexts(texts: string[]): Promise<Float32Array[]>;
5
+ }
6
+ export interface SemanticSidecar {
7
+ version: 1;
8
+ packFingerprint: string;
9
+ modelId: string;
10
+ dimension: number;
11
+ metric: 'cosine';
12
+ createdAt: string;
13
+ blocks: Array<{
14
+ blockId: number;
15
+ vector: number[];
16
+ }>;
17
+ }
18
+ export type SemanticQueryOptions = {
19
+ enabled?: boolean;
20
+ mode?: 'rerank';
21
+ topN?: number;
22
+ minLexConfidence?: number;
23
+ minSemanticScore?: number;
24
+ blend?: {
25
+ enabled?: boolean;
26
+ wLex?: number;
27
+ wSem?: number;
28
+ };
29
+ provider?: {
30
+ type: 'ollama';
31
+ modelId: string;
32
+ endpoint?: string;
33
+ };
34
+ sidecarPath?: string;
35
+ queryEmbedding?: Float32Array;
36
+ force?: boolean;
37
+ };
38
+ export type RetrievalEvidence = {
39
+ retrieval: 'lexical' | 'hybrid';
40
+ lexicalScore?: number;
41
+ semanticScore?: number;
42
+ blendedScore?: number;
43
+ modelId?: string;
44
+ };
@@ -0,0 +1 @@
1
+ export {};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@knolo/core",
3
- "version": "3.2.0",
3
+ "version": "3.2.2",
4
4
  "type": "module",
5
5
  "description": "Local-first knowledge packs for small LLMs.",
6
6
  "keywords": [