@knolo/core 3.2.0 → 3.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -319,10 +319,73 @@ Properties:
319
319
 
320
320
  ---
321
321
 
322
- # 📄 License
322
+ # 🕸 ClaimGraph API
323
323
 
324
- Apache-2.0
324
+ `@knolo/core` includes a deterministic ClaimGraph subsystem.
325
+
326
+ ## Build-time config
327
+
328
+ ```ts
329
+ type BuildPackOptions = {
330
+ graph?: {
331
+ enabled?: boolean; // default true
332
+ maxEdgesPerDoc?: number; // default 500
333
+ };
334
+ };
335
+ ```
336
+
337
+ ## Query-time config
338
+
339
+ ```ts
340
+ type QueryOptions = {
341
+ graph?: {
342
+ expand?: boolean; // default false
343
+ maxExtraTerms?: number; // default 12
344
+ predicates?: string[]; // default ['defined_as', 'is', 'mentions', 'ref']
345
+ };
346
+ };
347
+ ```
348
+
349
+ ## Exports
325
350
 
351
+ ```ts
352
+ import {
353
+ buildClaimGraph,
354
+ getClaimGraph,
355
+ applyClaimGraphLog,
356
+ mergeClaimGraphLogs,
357
+ expandQueryWithGraph,
358
+ createGraphLog,
359
+ appendOp,
360
+ } from '@knolo/core';
361
+ ```
362
+
363
+ Types:
364
+
365
+ * `ClaimNode`
366
+ * `ClaimEdge`
367
+ * `ClaimGraph`
368
+ * `ClaimOp`
369
+ * `ClaimGraphLog`
370
+
371
+ ## Notes on determinism and bounds
372
+
373
+ * Node IDs are hash-derived from normalized labels.
374
+ * Edge IDs are hash-derived from `(from, predicate, to, evidence)`.
375
+ * Node labels are normalized and deterministically truncated.
376
+ * Evidence arrays are sorted + unique.
377
+ * Node/edge arrays are sorted by ID in final graph.
378
+ * Extraction is bounded with `maxEdgesPerDoc`.
379
+ * Query expansion is bounded with `maxExtraTerms` and stable ordering.
326
380
 
381
+ ## Pack format note
327
382
 
383
+ `.knolo` binary layout now supports an optional trailing ClaimGraph JSON section after existing sections.
384
+ Runtimes that ignore unknown trailing bytes remain compatible.
385
+
386
+ ---
387
+
388
+ # 📄 License
389
+
390
+ Apache-2.0
328
391
 
package/dist/builder.d.ts CHANGED
@@ -16,5 +16,9 @@ export type BuildPackOptions = {
16
16
  perVectorScale?: true;
17
17
  };
18
18
  };
19
+ graph?: {
20
+ enabled?: boolean;
21
+ maxEdgesPerDoc?: number;
22
+ };
19
23
  };
20
24
  export declare function buildPack(docs: BuildInputDoc[], opts?: BuildPackOptions): Promise<Uint8Array>;
package/dist/builder.js CHANGED
@@ -9,6 +9,7 @@ import { tokenize } from './tokenize.js';
9
9
  import { getTextEncoder } from './utils/utf8.js';
10
10
  import { encodeScaleF16, quantizeEmbeddingInt8L2Norm } from './semantic.js';
11
11
  import { validateAgentRegistry } from './agent.js';
12
+ import { buildClaimGraph } from './graph/build_claim_graph.js';
12
13
  export async function buildPack(docs, opts = {}) {
13
14
  const normalizedDocs = validateDocs(docs);
14
15
  // Prepare blocks (strip MD) and carry heading/docId for optional boosts.
@@ -23,6 +24,10 @@ export async function buildPack(docs, opts = {}) {
23
24
  const totalTokens = blockTokenLens.reduce((sum, len) => sum + len, 0);
24
25
  const avgBlockLen = blocks.length ? totalTokens / blocks.length : 1;
25
26
  const agents = normalizeAgents(opts.agents);
27
+ const graphEnabled = opts.graph?.enabled ?? true;
28
+ const claimGraph = graphEnabled
29
+ ? buildClaimGraph(normalizedDocs, { maxEdgesPerDoc: opts.graph?.maxEdgesPerDoc })
30
+ : null;
26
31
  const meta = {
27
32
  version: 3,
28
33
  stats: {
@@ -32,6 +37,15 @@ export async function buildPack(docs, opts = {}) {
32
37
  avgBlockLen,
33
38
  },
34
39
  ...(agents ? { agents } : {}),
40
+ ...(claimGraph
41
+ ? {
42
+ claimGraph: {
43
+ version: 1,
44
+ nodes: claimGraph.nodes.length,
45
+ edges: claimGraph.edges.length,
46
+ },
47
+ }
48
+ : {}),
35
49
  };
36
50
  // Persist blocks as objects to optionally carry heading/docId/token length.
37
51
  const blocksPayload = blocks.map((b, i) => ({
@@ -54,6 +68,7 @@ export async function buildPack(docs, opts = {}) {
54
68
  ? enc.encode(JSON.stringify(semanticSection.semJson))
55
69
  : undefined;
56
70
  const semBlob = semanticSection?.semBlob;
71
+ const graphBytes = claimGraph ? enc.encode(JSON.stringify(claimGraph)) : undefined;
57
72
  const totalLength = 4 +
58
73
  metaBytes.length +
59
74
  4 +
@@ -64,7 +79,8 @@ export async function buildPack(docs, opts = {}) {
64
79
  blocksBytes.length +
65
80
  (semanticEnabled && semBytes && semBlob
66
81
  ? 4 + semBytes.length + 4 + semBlob.length
67
- : 0);
82
+ : 0) +
83
+ (graphBytes ? 4 + graphBytes.length : 0);
68
84
  const out = new Uint8Array(totalLength);
69
85
  const dv = new DataView(out.buffer);
70
86
  let offset = 0;
@@ -98,6 +114,12 @@ export async function buildPack(docs, opts = {}) {
98
114
  dv.setUint32(offset, semBlob.length, true);
99
115
  offset += 4;
100
116
  out.set(semBlob, offset);
117
+ offset += semBlob.length;
118
+ }
119
+ if (graphBytes) {
120
+ dv.setUint32(offset, graphBytes.length, true);
121
+ offset += 4;
122
+ out.set(graphBytes, offset);
101
123
  }
102
124
  return out;
103
125
  }
@@ -0,0 +1,5 @@
1
+ import type { BuildInputDoc } from '../builder.js';
2
+ import type { ClaimGraph } from './claim_graph.js';
3
+ export declare function buildClaimGraph(docs: BuildInputDoc[], opts?: {
4
+ maxEdgesPerDoc?: number;
5
+ }): ClaimGraph;
@@ -0,0 +1,88 @@
1
+ import { canonicalEvidence, computeEdgeId, computeNodeId, finalizeGraph, normalizeClaimLabel, } from './claim_graph.js';
2
+ const DEF_RE = /^([A-Za-z0-9 _-]{2,80})\s+(is|are)\s+(.{2,120})[.?!]/;
3
+ const MD_LINK_RE = /\[([^\]]{1,200})\]\(([^)\s]{1,200})\)/g;
4
+ const WIKI_RE = /\[\[([^\]]{1,200})\]\]/g;
5
+ const HEADING_RE = /^(#{1,3})\s+(.+)$/gm;
6
+ const STOPWORDS = new Set(['a', 'an', 'and', 'or', 'the', 'it', 'they', 'this', 'that', 'these', 'those']);
7
+ export function buildClaimGraph(docs, opts = {}) {
8
+ const maxEdgesPerDoc = Math.max(1, opts.maxEdgesPerDoc ?? 500);
9
+ const nodeById = new Map();
10
+ const edgeById = new Map();
11
+ for (let i = 0; i < docs.length; i++) {
12
+ const doc = docs[i];
13
+ const docLabel = normalizeLabel(doc.id || doc.heading || `doc_${i}`);
14
+ const local = [];
15
+ for (const m of doc.text.matchAll(MD_LINK_RE)) {
16
+ addEdge(local, nodeById, normalizeLabel(m[1]), 'ref', normalizeLabel(m[2]), [i]);
17
+ }
18
+ for (const m of doc.text.matchAll(WIKI_RE)) {
19
+ addEdge(local, nodeById, docLabel, 'mentions', normalizeLabel(m[1]), [i]);
20
+ }
21
+ const headingMatches = Array.from(doc.text.matchAll(HEADING_RE));
22
+ for (const h of headingMatches) {
23
+ const headingLabel = normalizeLabel(h[2] || '');
24
+ const headingStart = h.index ?? 0;
25
+ const sentence = firstSentenceAfter(doc.text, headingStart + h[0].length);
26
+ if (sentence) {
27
+ addEdge(local, nodeById, headingLabel, 'defined_as', normalizeLabel(sentence), [i]);
28
+ }
29
+ }
30
+ for (const sentence of splitSentences(doc.text)) {
31
+ const m = sentence.match(DEF_RE);
32
+ if (!m)
33
+ continue;
34
+ const subject = normalizeLabel(m[1]);
35
+ if (!subject || isStopwordOnly(subject))
36
+ continue;
37
+ const objectSnippet = normalizeLabel(m[3]);
38
+ addEdge(local, nodeById, subject, 'is', objectSnippet, [i]);
39
+ }
40
+ local.sort((a, b) => a.id.localeCompare(b.id));
41
+ for (const edge of local.slice(0, maxEdgesPerDoc)) {
42
+ const existing = edgeById.get(edge.id);
43
+ if (existing) {
44
+ existing.evidence = canonicalEvidence([...(existing.evidence ?? []), ...(edge.evidence ?? [])]);
45
+ }
46
+ else {
47
+ edgeById.set(edge.id, edge);
48
+ }
49
+ }
50
+ }
51
+ return finalizeGraph({ version: 1, nodes: [...nodeById.values()], edges: [...edgeById.values()] });
52
+ }
53
+ function addEdge(local, nodeById, fromLabel, p, toLabel, evidence) {
54
+ if (!fromLabel || !toLabel)
55
+ return;
56
+ const fromId = ensureNode(nodeById, fromLabel);
57
+ const toId = ensureNode(nodeById, toLabel);
58
+ const edgeEvidence = canonicalEvidence(evidence);
59
+ const id = computeEdgeId(fromId, p, toId, edgeEvidence);
60
+ local.push({ id, from: fromId, p, to: toId, evidence: edgeEvidence });
61
+ }
62
+ function ensureNode(nodeById, label) {
63
+ const id = computeNodeId(label);
64
+ if (!nodeById.has(id))
65
+ nodeById.set(id, { id, label });
66
+ return id;
67
+ }
68
+ function normalizeLabel(input) {
69
+ return normalizeClaimLabel(input, 200);
70
+ }
71
+ function splitSentences(text) {
72
+ return text
73
+ .replace(/\r\n/g, '\n')
74
+ .split(/(?<=[.?!])\s+/)
75
+ .map((s) => s.trim())
76
+ .filter(Boolean);
77
+ }
78
+ function firstSentenceAfter(text, startIdx) {
79
+ const tail = text.slice(startIdx).replace(/^[^\n]*\n+/, '').trim();
80
+ if (!tail)
81
+ return '';
82
+ const first = splitSentences(tail)[0] ?? '';
83
+ return first.slice(0, 240);
84
+ }
85
+ function isStopwordOnly(subject) {
86
+ const words = subject.split(/\s+/).filter(Boolean);
87
+ return words.length > 0 && words.every((w) => STOPWORDS.has(w));
88
+ }
@@ -0,0 +1,34 @@
1
+ import type { Pack } from '../pack.runtime.js';
2
+ export type ClaimNode = {
3
+ id: string;
4
+ label: string;
5
+ props?: Record<string, string>;
6
+ };
7
+ export type ClaimEdge = {
8
+ id: string;
9
+ from: string;
10
+ p: string;
11
+ to: string;
12
+ evidence?: number[];
13
+ actor?: string;
14
+ ts?: number;
15
+ };
16
+ export type ClaimGraph = {
17
+ version: 1;
18
+ nodes: ClaimNode[];
19
+ edges: ClaimEdge[];
20
+ index?: {
21
+ labelToId?: Record<string, string>;
22
+ out?: Record<string, string[]>;
23
+ in?: Record<string, string[]>;
24
+ };
25
+ };
26
+ export declare function normalizeClaimLabel(label: string, maxLen?: number): string;
27
+ export declare function computeNodeId(label: string): string;
28
+ export declare function computeEdgeId(from: string, p: string, to: string, evidence?: number[]): string;
29
+ export declare function canonicalEvidence(evidence?: number[]): number[];
30
+ export declare function buildGraphIndex(graph: ClaimGraph): ClaimGraph['index'];
31
+ export declare function finalizeGraph(graph: ClaimGraph): ClaimGraph;
32
+ export declare function getClaimGraph(pack: Pack): ClaimGraph | null;
33
+ export declare function validateClaimGraph(input: unknown): ClaimGraph | null;
34
+ export declare function expandLabelToTerms(label: string): string[];
@@ -0,0 +1,65 @@
1
+ import { normalize, tokenize } from '../tokenize.js';
2
+ export function normalizeClaimLabel(label, maxLen = 200) {
3
+ const compact = normalize(label).replace(/\s+/g, ' ').trim();
4
+ return compact.slice(0, maxLen);
5
+ }
6
+ export function computeNodeId(label) {
7
+ return `n_${hash32Hex(normalizeClaimLabel(label))}`;
8
+ }
9
+ export function computeEdgeId(from, p, to, evidence) {
10
+ const evidenceCsv = canonicalEvidence(evidence).join(',');
11
+ return `e_${hash32Hex(`${from}\n${p}\n${to}\n${evidenceCsv}`)}`;
12
+ }
13
+ export function canonicalEvidence(evidence) {
14
+ if (!evidence?.length)
15
+ return [];
16
+ return Array.from(new Set(evidence.filter((n) => Number.isInteger(n) && n >= 0))).sort((a, b) => a - b);
17
+ }
18
+ export function buildGraphIndex(graph) {
19
+ const labelToId = {};
20
+ const out = {};
21
+ const inbound = {};
22
+ for (const node of graph.nodes) {
23
+ labelToId[normalizeClaimLabel(node.label)] = node.id;
24
+ }
25
+ for (const edge of graph.edges) {
26
+ (out[edge.from] ||= []).push(edge.id);
27
+ (inbound[edge.to] ||= []).push(edge.id);
28
+ }
29
+ for (const key of Object.keys(out))
30
+ out[key].sort();
31
+ for (const key of Object.keys(inbound))
32
+ inbound[key].sort();
33
+ return { labelToId, out, in: inbound };
34
+ }
35
+ export function finalizeGraph(graph) {
36
+ const nodes = [...graph.nodes].sort((a, b) => a.id.localeCompare(b.id));
37
+ const edges = [...graph.edges]
38
+ .map((e) => ({ ...e, evidence: canonicalEvidence(e.evidence) }))
39
+ .sort((a, b) => a.id.localeCompare(b.id));
40
+ const out = { version: 1, nodes, edges };
41
+ out.index = buildGraphIndex(out);
42
+ return out;
43
+ }
44
+ export function getClaimGraph(pack) {
45
+ return pack.claimGraph ?? null;
46
+ }
47
+ export function validateClaimGraph(input) {
48
+ if (!input || typeof input !== 'object')
49
+ return null;
50
+ const g = input;
51
+ if (g.version !== 1 || !Array.isArray(g.nodes) || !Array.isArray(g.edges))
52
+ return null;
53
+ return finalizeGraph({ version: 1, nodes: g.nodes, edges: g.edges });
54
+ }
55
+ export function expandLabelToTerms(label) {
56
+ return tokenize(normalizeClaimLabel(label)).map((t) => t.term);
57
+ }
58
+ function hash32Hex(input) {
59
+ let h = 0x811c9dc5;
60
+ for (let i = 0; i < input.length; i++) {
61
+ h ^= input.charCodeAt(i);
62
+ h = Math.imul(h, 0x01000193);
63
+ }
64
+ return (h >>> 0).toString(16).padStart(8, '0');
65
+ }
@@ -0,0 +1,33 @@
1
+ import type { Pack } from '../pack.runtime.js';
2
+ import type { ClaimGraph } from './claim_graph.js';
3
+ export type ClaimOp = {
4
+ op: 'upsert_node';
5
+ id?: string;
6
+ label: string;
7
+ props?: Record<string, string>;
8
+ ts: number;
9
+ actor: string;
10
+ } | {
11
+ op: 'add_edge';
12
+ from: string;
13
+ p: string;
14
+ to: string;
15
+ evidence?: number[];
16
+ ts: number;
17
+ actor: string;
18
+ } | {
19
+ op: 'tombstone_edge';
20
+ edgeId: string;
21
+ ts: number;
22
+ actor: string;
23
+ };
24
+ export type ClaimGraphLog = {
25
+ version: 1;
26
+ ops: ClaimOp[];
27
+ };
28
+ export declare function createGraphLog(): ClaimGraphLog;
29
+ export declare function appendOp(log: ClaimGraphLog, op: ClaimOp): ClaimGraphLog;
30
+ export declare function mergeClaimGraphLogs(a: ClaimGraphLog, b: ClaimGraphLog): ClaimGraphLog;
31
+ export declare function serializeClaimGraphLog(log: ClaimGraphLog): Uint8Array;
32
+ export declare function deserializeClaimGraphLog(data: Uint8Array): ClaimGraphLog;
33
+ export declare function applyClaimGraphLog(graphOrPack: ClaimGraph | Pack, log: ClaimGraphLog): ClaimGraph;
@@ -0,0 +1,106 @@
1
+ import { canonicalEvidence, computeEdgeId, computeNodeId, finalizeGraph, normalizeClaimLabel, } from './claim_graph.js';
2
+ export function createGraphLog() {
3
+ return { version: 1, ops: [] };
4
+ }
5
+ export function appendOp(log, op) {
6
+ return { version: 1, ops: [...log.ops, op] };
7
+ }
8
+ export function mergeClaimGraphLogs(a, b) {
9
+ return { version: 1, ops: [...a.ops, ...b.ops].sort(compareOps) };
10
+ }
11
+ export function serializeClaimGraphLog(log) {
12
+ return new TextEncoder().encode(JSON.stringify({ version: 1, ops: [...log.ops].sort(compareOps) }));
13
+ }
14
+ export function deserializeClaimGraphLog(data) {
15
+ const parsed = JSON.parse(new TextDecoder().decode(data));
16
+ if (!parsed || parsed.version !== 1 || !Array.isArray(parsed.ops)) {
17
+ throw new Error('Invalid ClaimGraphLog payload');
18
+ }
19
+ return { version: 1, ops: parsed.ops.sort(compareOps) };
20
+ }
21
+ export function applyClaimGraphLog(graphOrPack, log) {
22
+ const baseGraph = isPack(graphOrPack)
23
+ ? graphOrPack.claimGraph ?? { version: 1, nodes: [], edges: [] }
24
+ : graphOrPack;
25
+ const nodeById = new Map(baseGraph.nodes.map((n) => [n.id, { ...n, props: n.props ? { ...n.props } : undefined }]));
26
+ const edgeById = new Map(baseGraph.edges.map((e) => [e.id, { ...e, evidence: canonicalEvidence(e.evidence) }]));
27
+ const nodeStamp = new Map();
28
+ const addStamp = new Map();
29
+ const tombstoneStamp = new Map();
30
+ for (const op of [...log.ops].sort(compareOps)) {
31
+ if (op.op === 'upsert_node') {
32
+ const label = normalizeClaimLabel(op.label);
33
+ const id = op.id || computeNodeId(label);
34
+ const prev = nodeStamp.get(id);
35
+ if (!prev || compareStamp([op.ts, op.actor], prev) >= 0) {
36
+ nodeStamp.set(id, [op.ts, op.actor]);
37
+ nodeById.set(id, { id, label, props: op.props ? { ...op.props } : undefined });
38
+ }
39
+ continue;
40
+ }
41
+ if (op.op === 'add_edge') {
42
+ const evidence = canonicalEvidence(op.evidence);
43
+ const edgeId = computeEdgeId(op.from, op.p, op.to, evidence);
44
+ const prevAdd = addStamp.get(edgeId);
45
+ if (!prevAdd || compareStamp([op.ts, op.actor], prevAdd) >= 0) {
46
+ addStamp.set(edgeId, [op.ts, op.actor]);
47
+ }
48
+ const existing = edgeById.get(edgeId);
49
+ const mergedEvidence = canonicalEvidence([...(existing?.evidence ?? []), ...evidence]);
50
+ edgeById.set(edgeId, {
51
+ id: edgeId,
52
+ from: op.from,
53
+ p: op.p,
54
+ to: op.to,
55
+ evidence: mergedEvidence,
56
+ actor: op.actor,
57
+ ts: op.ts,
58
+ });
59
+ continue;
60
+ }
61
+ const prev = tombstoneStamp.get(op.edgeId);
62
+ if (!prev || compareStamp([op.ts, op.actor], prev) >= 0) {
63
+ tombstoneStamp.set(op.edgeId, [op.ts, op.actor]);
64
+ }
65
+ }
66
+ for (const [edgeId, edge] of edgeById) {
67
+ const add = addStamp.get(edgeId) ?? [-Infinity, ''];
68
+ const tomb = tombstoneStamp.get(edgeId);
69
+ if (tomb && compareStamp(tomb, add) > 0) {
70
+ edgeById.delete(edgeId);
71
+ continue;
72
+ }
73
+ if (!nodeById.has(edge.from)) {
74
+ nodeById.set(edge.from, { id: edge.from, label: edge.from });
75
+ }
76
+ if (!nodeById.has(edge.to)) {
77
+ nodeById.set(edge.to, { id: edge.to, label: edge.to });
78
+ }
79
+ }
80
+ return finalizeGraph({ version: 1, nodes: [...nodeById.values()], edges: [...edgeById.values()] });
81
+ }
82
+ function compareOps(a, b) {
83
+ if (a.ts !== b.ts)
84
+ return a.ts - b.ts;
85
+ const actorCmp = a.actor.localeCompare(b.actor);
86
+ if (actorCmp !== 0)
87
+ return actorCmp;
88
+ return stableSerializeOp(a).localeCompare(stableSerializeOp(b));
89
+ }
90
+ function stableSerializeOp(op) {
91
+ if (op.op === 'upsert_node') {
92
+ return `upsert_node|${op.id || ''}|${normalizeClaimLabel(op.label)}|${JSON.stringify(op.props || {})}`;
93
+ }
94
+ if (op.op === 'add_edge') {
95
+ return `add_edge|${op.from}|${op.p}|${op.to}|${canonicalEvidence(op.evidence).join(',')}`;
96
+ }
97
+ return `tombstone_edge|${op.edgeId}`;
98
+ }
99
+ function compareStamp(a, b) {
100
+ if (a[0] !== b[0])
101
+ return a[0] - b[0];
102
+ return a[1].localeCompare(b[1]);
103
+ }
104
+ function isPack(input) {
105
+ return Boolean(input.meta && input.blocks);
106
+ }
@@ -0,0 +1,6 @@
1
+ import type { Pack } from '../pack.runtime.js';
2
+ export type GraphQueryExpandOptions = {
3
+ maxExtraTerms?: number;
4
+ predicates?: string[];
5
+ };
6
+ export declare function expandQueryWithGraph(pack: Pack, queryString: string, opts?: GraphQueryExpandOptions): string;
@@ -0,0 +1,57 @@
1
+ import { normalize, tokenize } from '../tokenize.js';
2
+ import { expandLabelToTerms } from './claim_graph.js';
3
+ export function expandQueryWithGraph(pack, queryString, opts = {}) {
4
+ const graph = pack.claimGraph;
5
+ if (!graph || graph.nodes.length === 0 || graph.edges.length === 0)
6
+ return queryString;
7
+ const maxExtraTerms = Math.max(1, opts.maxExtraTerms ?? 12);
8
+ const predicates = new Set((opts.predicates ?? ['defined_as', 'is', 'mentions', 'ref']).map((p) => normalize(p)));
9
+ const qTokens = tokenize(queryString).map((t) => t.term);
10
+ if (qTokens.length === 0)
11
+ return queryString;
12
+ const qSet = new Set(qTokens);
13
+ const candidateNodeIds = new Set();
14
+ const labelEntries = Object.entries(graph.index?.labelToId ?? {}).sort((a, b) => a[0].localeCompare(b[0]));
15
+ for (const [labelNorm, nodeId] of labelEntries) {
16
+ if (qSet.has(labelNorm))
17
+ candidateNodeIds.add(nodeId);
18
+ }
19
+ for (const token of qTokens.sort()) {
20
+ for (const [labelNorm, nodeId] of labelEntries) {
21
+ if (labelNorm.startsWith(token))
22
+ candidateNodeIds.add(nodeId);
23
+ if (candidateNodeIds.size >= maxExtraTerms * 4)
24
+ break;
25
+ }
26
+ if (candidateNodeIds.size >= maxExtraTerms * 4)
27
+ break;
28
+ }
29
+ const edgeById = new Map(graph.edges.map((e) => [e.id, e]));
30
+ const outIdx = graph.index?.out ?? {};
31
+ const extraTerms = new Set();
32
+ const sortedNodeIds = [...candidateNodeIds].sort();
33
+ for (const nodeId of sortedNodeIds) {
34
+ const edgeIds = [...(outIdx[nodeId] ?? [])].sort();
35
+ for (const edgeId of edgeIds) {
36
+ const edge = edgeById.get(edgeId);
37
+ if (!edge || !predicates.has(normalize(edge.p)))
38
+ continue;
39
+ const target = graph.nodes.find((n) => n.id === edge.to);
40
+ if (!target)
41
+ continue;
42
+ for (const term of expandLabelToTerms(target.label)) {
43
+ if (!qSet.has(term))
44
+ extraTerms.add(term);
45
+ if (extraTerms.size >= maxExtraTerms)
46
+ break;
47
+ }
48
+ if (extraTerms.size >= maxExtraTerms)
49
+ break;
50
+ }
51
+ if (extraTerms.size >= maxExtraTerms)
52
+ break;
53
+ }
54
+ if (extraTerms.size === 0)
55
+ return queryString;
56
+ return `${queryString} ${[...extraTerms].sort().join(' ')}`.trim();
57
+ }
package/dist/index.d.ts CHANGED
@@ -4,11 +4,17 @@ export { makeContextPatch } from './patch.js';
4
4
  export { buildPack } from './builder.js';
5
5
  export { quantizeEmbeddingInt8L2Norm, encodeScaleF16, decodeScaleF16, } from './semantic.js';
6
6
  export { listAgents, getAgent, resolveAgent, buildSystemPrompt, isToolAllowed, assertToolAllowed, validateAgentRegistry, validateAgentDefinition, } from './agent.js';
7
+ export { getClaimGraph, validateClaimGraph, } from './graph/claim_graph.js';
8
+ export { buildClaimGraph } from './graph/build_claim_graph.js';
9
+ export { createGraphLog, appendOp, applyClaimGraphLog, mergeClaimGraphLogs, serializeClaimGraphLog, deserializeClaimGraphLog, } from './graph/log.js';
10
+ export { expandQueryWithGraph } from './graph/query_expand.js';
7
11
  export type { MountOptions, PackMeta, Pack } from './pack.runtime.js';
8
12
  export type { QueryOptions, Hit } from './query.js';
9
13
  export type { ContextPatch } from './patch.js';
10
14
  export type { BuildInputDoc, BuildPackOptions } from './builder.js';
11
15
  export type { AgentPromptTemplate, AgentToolPolicy, AgentRetrievalDefaults, AgentDefinitionV1, AgentRegistry, ResolveAgentInput, ResolvedAgent, } from './agent.js';
16
+ export type { ClaimGraph, ClaimNode, ClaimEdge } from './graph/claim_graph.js';
17
+ export type { ClaimGraphLog, ClaimOp } from './graph/log.js';
12
18
  export { parseToolCallV1FromText } from './tool_parse.js';
13
19
  export { nowIso, createTrace } from './trace.js';
14
20
  export { assertToolCallAllowed } from './tool_gate.js';
package/dist/index.js CHANGED
@@ -5,6 +5,10 @@ export { makeContextPatch } from './patch.js';
5
5
  export { buildPack } from './builder.js';
6
6
  export { quantizeEmbeddingInt8L2Norm, encodeScaleF16, decodeScaleF16, } from './semantic.js';
7
7
  export { listAgents, getAgent, resolveAgent, buildSystemPrompt, isToolAllowed, assertToolAllowed, validateAgentRegistry, validateAgentDefinition, } from './agent.js';
8
+ export { getClaimGraph, validateClaimGraph, } from './graph/claim_graph.js';
9
+ export { buildClaimGraph } from './graph/build_claim_graph.js';
10
+ export { createGraphLog, appendOp, applyClaimGraphLog, mergeClaimGraphLogs, serializeClaimGraphLog, deserializeClaimGraphLog, } from './graph/log.js';
11
+ export { expandQueryWithGraph } from './graph/query_expand.js';
8
12
  export { parseToolCallV1FromText } from './tool_parse.js';
9
13
  export { nowIso, createTrace } from './trace.js';
10
14
  export { assertToolCallAllowed } from './tool_gate.js';
@@ -1,4 +1,5 @@
1
1
  import type { AgentRegistry } from './agent.js';
2
+ import type { ClaimGraph } from './graph/claim_graph.js';
2
3
  export type MountOptions = {
3
4
  src: string | ArrayBufferLike | Uint8Array;
4
5
  };
@@ -11,6 +12,11 @@ export type PackMeta = {
11
12
  avgBlockLen?: number;
12
13
  };
13
14
  agents?: AgentRegistry;
15
+ claimGraph?: {
16
+ version: 1;
17
+ nodes: number;
18
+ edges: number;
19
+ };
14
20
  };
15
21
  export type Pack = {
16
22
  meta: PackMeta;
@@ -30,6 +36,7 @@ export type Pack = {
30
36
  vecs: Int8Array;
31
37
  scales?: Uint16Array;
32
38
  };
39
+ claimGraph?: ClaimGraph;
33
40
  };
34
41
  export declare function hasSemantic(pack: Pack): boolean;
35
42
  export declare function mountPack(opts: MountOptions): Promise<Pack>;
@@ -6,6 +6,7 @@
6
6
  */
7
7
  import { getTextDecoder } from './utils/utf8.js';
8
8
  import { validateAgentRegistry } from './agent.js';
9
+ import { validateClaimGraph } from './graph/claim_graph.js';
9
10
  export function hasSemantic(pack) {
10
11
  return Boolean(pack.semantic && pack.semantic.dims > 0 && pack.semantic.vecs.length > 0);
11
12
  }
@@ -75,16 +76,50 @@ export function mountPackFromBuffer(buf) {
75
76
  }
76
77
  }
77
78
  let semantic;
78
- if (offset < buf.byteLength) {
79
- const semLen = dv.getUint32(offset, true);
79
+ let claimGraph;
80
+ while (offset < buf.byteLength) {
81
+ const sectionStart = offset;
82
+ if (buf.byteLength - offset < 4)
83
+ break;
84
+ const jsonLen = dv.getUint32(offset, true);
80
85
  offset += 4;
81
- const semJson = dec.decode(new Uint8Array(buf, offset, semLen));
82
- offset += semLen;
83
- const sem = JSON.parse(semJson);
84
- const semBlobLen = dv.getUint32(offset, true);
85
- offset += 4;
86
- const semBlob = new Uint8Array(buf, offset, semBlobLen);
87
- semantic = parseSemanticSection(sem, semBlob);
86
+ if (jsonLen < 0 || offset + jsonLen > buf.byteLength) {
87
+ offset = sectionStart;
88
+ break;
89
+ }
90
+ let parsed;
91
+ try {
92
+ const json = dec.decode(new Uint8Array(buf, offset, jsonLen));
93
+ parsed = JSON.parse(json);
94
+ }
95
+ catch {
96
+ offset = sectionStart;
97
+ break;
98
+ }
99
+ offset += jsonLen;
100
+ if (!semantic && looksLikeSemanticJson(parsed)) {
101
+ if (buf.byteLength - offset < 4) {
102
+ offset = sectionStart;
103
+ break;
104
+ }
105
+ const semBlobLen = dv.getUint32(offset, true);
106
+ offset += 4;
107
+ if (semBlobLen < 0 || offset + semBlobLen > buf.byteLength) {
108
+ offset = sectionStart;
109
+ break;
110
+ }
111
+ const semBlob = new Uint8Array(buf, offset, semBlobLen);
112
+ offset += semBlobLen;
113
+ semantic = parseSemanticSection(parsed, semBlob);
114
+ continue;
115
+ }
116
+ const graph = validateClaimGraph(parsed);
117
+ if (!claimGraph && graph) {
118
+ claimGraph = graph;
119
+ continue;
120
+ }
121
+ offset = sectionStart;
122
+ break;
88
123
  }
89
124
  return {
90
125
  meta,
@@ -96,8 +131,18 @@ export function mountPackFromBuffer(buf) {
96
131
  namespaces,
97
132
  blockTokenLens,
98
133
  semantic,
134
+ claimGraph,
99
135
  };
100
136
  }
137
+ function looksLikeSemanticJson(parsed) {
138
+ if (!parsed || typeof parsed !== 'object')
139
+ return false;
140
+ const sem = parsed;
141
+ return (sem.version === 1 &&
142
+ sem.encoding === 'int8_l2norm' &&
143
+ typeof sem.blocks?.vectors?.byteOffset === 'number' &&
144
+ typeof sem.blocks?.vectors?.length === 'number');
145
+ }
101
146
  function parseSemanticSection(sem, blob) {
102
147
  const vectors = sem?.blocks?.vectors;
103
148
  const scales = sem?.blocks?.scales;
package/dist/query.d.ts CHANGED
@@ -12,6 +12,11 @@ export type QueryOptions = {
12
12
  weight?: number;
13
13
  minTermLength?: number;
14
14
  };
15
+ graph?: {
16
+ expand?: boolean;
17
+ maxExtraTerms?: number;
18
+ predicates?: string[];
19
+ };
15
20
  semantic?: {
16
21
  enabled?: boolean;
17
22
  mode?: "rerank";
package/dist/query.js CHANGED
@@ -14,6 +14,7 @@ import { minCoverSpan, proximityMultiplier } from "./quality/proximity.js";
14
14
  import { diversifyAndDedupe } from "./quality/diversify.js";
15
15
  import { knsSignature, knsDistance } from "./quality/signature.js";
16
16
  import { decodeScaleF16, quantizeEmbeddingInt8L2Norm } from "./semantic.js";
17
+ import { expandQueryWithGraph } from "./graph/query_expand.js";
17
18
  export function validateQueryOptions(opts) {
18
19
  if (!opts)
19
20
  return;
@@ -46,6 +47,17 @@ export function validateQueryOptions(opts) {
46
47
  throw new Error("query(...): queryExpansion.minTermLength must be a positive integer.");
47
48
  }
48
49
  }
50
+ if (opts.graph) {
51
+ if (opts.graph.expand !== undefined && typeof opts.graph.expand !== "boolean") {
52
+ throw new Error("query(...): graph.expand must be a boolean when provided.");
53
+ }
54
+ if (opts.graph.maxExtraTerms !== undefined && (!Number.isInteger(opts.graph.maxExtraTerms) || opts.graph.maxExtraTerms < 1)) {
55
+ throw new Error("query(...): graph.maxExtraTerms must be a positive integer.");
56
+ }
57
+ if (opts.graph.predicates !== undefined && (!Array.isArray(opts.graph.predicates) || opts.graph.predicates.some((p) => typeof p !== "string"))) {
58
+ throw new Error("query(...): graph.predicates must be an array of strings when provided.");
59
+ }
60
+ }
49
61
  validateSemanticQueryOptions(opts.semantic);
50
62
  }
51
63
  export function validateSemanticQueryOptions(options) {
@@ -105,8 +117,14 @@ export function query(pack, q, opts = {}) {
105
117
  queryEmbedding: opts.semantic?.queryEmbedding,
106
118
  force: opts.semantic?.force ?? false,
107
119
  };
120
+ const graphQuery = opts.graph?.expand === true
121
+ ? expandQueryWithGraph(pack, q, {
122
+ maxExtraTerms: opts.graph?.maxExtraTerms,
123
+ predicates: opts.graph?.predicates,
124
+ })
125
+ : q;
108
126
  // --- Query parsing
109
- const normTokens = tokenize(q).map((t) => t.term);
127
+ const normTokens = tokenize(graphQuery).map((t) => t.term);
110
128
  // Normalize quoted phrases from q
111
129
  const quotedRaw = parsePhrases(q);
112
130
  const quoted = quotedRaw.map((seq) => seq.map((t) => normalize(t)).flatMap((s) => s.split(/\s+/)).filter(Boolean));
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@knolo/core",
3
- "version": "3.2.0",
3
+ "version": "3.2.1",
4
4
  "type": "module",
5
5
  "description": "Local-first knowledge packs for small LLMs.",
6
6
  "keywords": [