@knolo/core 3.2.0 → 3.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +65 -2
- package/dist/builder.d.ts +4 -0
- package/dist/builder.js +23 -1
- package/dist/graph/build_claim_graph.d.ts +5 -0
- package/dist/graph/build_claim_graph.js +88 -0
- package/dist/graph/claim_graph.d.ts +34 -0
- package/dist/graph/claim_graph.js +65 -0
- package/dist/graph/log.d.ts +33 -0
- package/dist/graph/log.js +106 -0
- package/dist/graph/query_expand.d.ts +6 -0
- package/dist/graph/query_expand.js +57 -0
- package/dist/index.d.ts +6 -0
- package/dist/index.js +4 -0
- package/dist/pack.runtime.d.ts +7 -0
- package/dist/pack.runtime.js +54 -9
- package/dist/query.d.ts +5 -0
- package/dist/query.js +19 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -319,10 +319,73 @@ Properties:
|
|
|
319
319
|
|
|
320
320
|
---
|
|
321
321
|
|
|
322
|
-
#
|
|
322
|
+
# 🕸 ClaimGraph API
|
|
323
323
|
|
|
324
|
-
|
|
324
|
+
`@knolo/core` includes a deterministic ClaimGraph subsystem.
|
|
325
|
+
|
|
326
|
+
## Build-time config
|
|
327
|
+
|
|
328
|
+
```ts
|
|
329
|
+
type BuildPackOptions = {
|
|
330
|
+
graph?: {
|
|
331
|
+
enabled?: boolean; // default true
|
|
332
|
+
maxEdgesPerDoc?: number; // default 500
|
|
333
|
+
};
|
|
334
|
+
};
|
|
335
|
+
```
|
|
336
|
+
|
|
337
|
+
## Query-time config
|
|
338
|
+
|
|
339
|
+
```ts
|
|
340
|
+
type QueryOptions = {
|
|
341
|
+
graph?: {
|
|
342
|
+
expand?: boolean; // default false
|
|
343
|
+
maxExtraTerms?: number; // default 12
|
|
344
|
+
predicates?: string[]; // default ['defined_as', 'is', 'mentions', 'ref']
|
|
345
|
+
};
|
|
346
|
+
};
|
|
347
|
+
```
|
|
348
|
+
|
|
349
|
+
## Exports
|
|
325
350
|
|
|
351
|
+
```ts
|
|
352
|
+
import {
|
|
353
|
+
buildClaimGraph,
|
|
354
|
+
getClaimGraph,
|
|
355
|
+
applyClaimGraphLog,
|
|
356
|
+
mergeClaimGraphLogs,
|
|
357
|
+
expandQueryWithGraph,
|
|
358
|
+
createGraphLog,
|
|
359
|
+
appendOp,
|
|
360
|
+
} from '@knolo/core';
|
|
361
|
+
```
|
|
362
|
+
|
|
363
|
+
Types:
|
|
364
|
+
|
|
365
|
+
* `ClaimNode`
|
|
366
|
+
* `ClaimEdge`
|
|
367
|
+
* `ClaimGraph`
|
|
368
|
+
* `ClaimOp`
|
|
369
|
+
* `ClaimGraphLog`
|
|
370
|
+
|
|
371
|
+
## Notes on determinism and bounds
|
|
372
|
+
|
|
373
|
+
* Node IDs are hash-derived from normalized labels.
|
|
374
|
+
* Edge IDs are hash-derived from `(from, predicate, to, evidence)`.
|
|
375
|
+
* Node labels are normalized and deterministically truncated.
|
|
376
|
+
* Evidence arrays are sorted + unique.
|
|
377
|
+
* Node/edge arrays are sorted by ID in final graph.
|
|
378
|
+
* Extraction is bounded with `maxEdgesPerDoc`.
|
|
379
|
+
* Query expansion is bounded with `maxExtraTerms` and stable ordering.
|
|
326
380
|
|
|
381
|
+
## Pack format note
|
|
327
382
|
|
|
383
|
+
`.knolo` binary layout now supports an optional trailing ClaimGraph JSON section after existing sections.
|
|
384
|
+
Runtimes that ignore unknown trailing bytes remain compatible.
|
|
385
|
+
|
|
386
|
+
---
|
|
387
|
+
|
|
388
|
+
# 📄 License
|
|
389
|
+
|
|
390
|
+
Apache-2.0
|
|
328
391
|
|
package/dist/builder.d.ts
CHANGED
package/dist/builder.js
CHANGED
|
@@ -9,6 +9,7 @@ import { tokenize } from './tokenize.js';
|
|
|
9
9
|
import { getTextEncoder } from './utils/utf8.js';
|
|
10
10
|
import { encodeScaleF16, quantizeEmbeddingInt8L2Norm } from './semantic.js';
|
|
11
11
|
import { validateAgentRegistry } from './agent.js';
|
|
12
|
+
import { buildClaimGraph } from './graph/build_claim_graph.js';
|
|
12
13
|
export async function buildPack(docs, opts = {}) {
|
|
13
14
|
const normalizedDocs = validateDocs(docs);
|
|
14
15
|
// Prepare blocks (strip MD) and carry heading/docId for optional boosts.
|
|
@@ -23,6 +24,10 @@ export async function buildPack(docs, opts = {}) {
|
|
|
23
24
|
const totalTokens = blockTokenLens.reduce((sum, len) => sum + len, 0);
|
|
24
25
|
const avgBlockLen = blocks.length ? totalTokens / blocks.length : 1;
|
|
25
26
|
const agents = normalizeAgents(opts.agents);
|
|
27
|
+
const graphEnabled = opts.graph?.enabled ?? true;
|
|
28
|
+
const claimGraph = graphEnabled
|
|
29
|
+
? buildClaimGraph(normalizedDocs, { maxEdgesPerDoc: opts.graph?.maxEdgesPerDoc })
|
|
30
|
+
: null;
|
|
26
31
|
const meta = {
|
|
27
32
|
version: 3,
|
|
28
33
|
stats: {
|
|
@@ -32,6 +37,15 @@ export async function buildPack(docs, opts = {}) {
|
|
|
32
37
|
avgBlockLen,
|
|
33
38
|
},
|
|
34
39
|
...(agents ? { agents } : {}),
|
|
40
|
+
...(claimGraph
|
|
41
|
+
? {
|
|
42
|
+
claimGraph: {
|
|
43
|
+
version: 1,
|
|
44
|
+
nodes: claimGraph.nodes.length,
|
|
45
|
+
edges: claimGraph.edges.length,
|
|
46
|
+
},
|
|
47
|
+
}
|
|
48
|
+
: {}),
|
|
35
49
|
};
|
|
36
50
|
// Persist blocks as objects to optionally carry heading/docId/token length.
|
|
37
51
|
const blocksPayload = blocks.map((b, i) => ({
|
|
@@ -54,6 +68,7 @@ export async function buildPack(docs, opts = {}) {
|
|
|
54
68
|
? enc.encode(JSON.stringify(semanticSection.semJson))
|
|
55
69
|
: undefined;
|
|
56
70
|
const semBlob = semanticSection?.semBlob;
|
|
71
|
+
const graphBytes = claimGraph ? enc.encode(JSON.stringify(claimGraph)) : undefined;
|
|
57
72
|
const totalLength = 4 +
|
|
58
73
|
metaBytes.length +
|
|
59
74
|
4 +
|
|
@@ -64,7 +79,8 @@ export async function buildPack(docs, opts = {}) {
|
|
|
64
79
|
blocksBytes.length +
|
|
65
80
|
(semanticEnabled && semBytes && semBlob
|
|
66
81
|
? 4 + semBytes.length + 4 + semBlob.length
|
|
67
|
-
: 0)
|
|
82
|
+
: 0) +
|
|
83
|
+
(graphBytes ? 4 + graphBytes.length : 0);
|
|
68
84
|
const out = new Uint8Array(totalLength);
|
|
69
85
|
const dv = new DataView(out.buffer);
|
|
70
86
|
let offset = 0;
|
|
@@ -98,6 +114,12 @@ export async function buildPack(docs, opts = {}) {
|
|
|
98
114
|
dv.setUint32(offset, semBlob.length, true);
|
|
99
115
|
offset += 4;
|
|
100
116
|
out.set(semBlob, offset);
|
|
117
|
+
offset += semBlob.length;
|
|
118
|
+
}
|
|
119
|
+
if (graphBytes) {
|
|
120
|
+
dv.setUint32(offset, graphBytes.length, true);
|
|
121
|
+
offset += 4;
|
|
122
|
+
out.set(graphBytes, offset);
|
|
101
123
|
}
|
|
102
124
|
return out;
|
|
103
125
|
}
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import { canonicalEvidence, computeEdgeId, computeNodeId, finalizeGraph, normalizeClaimLabel, } from './claim_graph.js';
|
|
2
|
+
const DEF_RE = /^([A-Za-z0-9 _-]{2,80})\s+(is|are)\s+(.{2,120})[.?!]/;
|
|
3
|
+
const MD_LINK_RE = /\[([^\]]{1,200})\]\(([^)\s]{1,200})\)/g;
|
|
4
|
+
const WIKI_RE = /\[\[([^\]]{1,200})\]\]/g;
|
|
5
|
+
const HEADING_RE = /^(#{1,3})\s+(.+)$/gm;
|
|
6
|
+
const STOPWORDS = new Set(['a', 'an', 'and', 'or', 'the', 'it', 'they', 'this', 'that', 'these', 'those']);
|
|
7
|
+
export function buildClaimGraph(docs, opts = {}) {
|
|
8
|
+
const maxEdgesPerDoc = Math.max(1, opts.maxEdgesPerDoc ?? 500);
|
|
9
|
+
const nodeById = new Map();
|
|
10
|
+
const edgeById = new Map();
|
|
11
|
+
for (let i = 0; i < docs.length; i++) {
|
|
12
|
+
const doc = docs[i];
|
|
13
|
+
const docLabel = normalizeLabel(doc.id || doc.heading || `doc_${i}`);
|
|
14
|
+
const local = [];
|
|
15
|
+
for (const m of doc.text.matchAll(MD_LINK_RE)) {
|
|
16
|
+
addEdge(local, nodeById, normalizeLabel(m[1]), 'ref', normalizeLabel(m[2]), [i]);
|
|
17
|
+
}
|
|
18
|
+
for (const m of doc.text.matchAll(WIKI_RE)) {
|
|
19
|
+
addEdge(local, nodeById, docLabel, 'mentions', normalizeLabel(m[1]), [i]);
|
|
20
|
+
}
|
|
21
|
+
const headingMatches = Array.from(doc.text.matchAll(HEADING_RE));
|
|
22
|
+
for (const h of headingMatches) {
|
|
23
|
+
const headingLabel = normalizeLabel(h[2] || '');
|
|
24
|
+
const headingStart = h.index ?? 0;
|
|
25
|
+
const sentence = firstSentenceAfter(doc.text, headingStart + h[0].length);
|
|
26
|
+
if (sentence) {
|
|
27
|
+
addEdge(local, nodeById, headingLabel, 'defined_as', normalizeLabel(sentence), [i]);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
for (const sentence of splitSentences(doc.text)) {
|
|
31
|
+
const m = sentence.match(DEF_RE);
|
|
32
|
+
if (!m)
|
|
33
|
+
continue;
|
|
34
|
+
const subject = normalizeLabel(m[1]);
|
|
35
|
+
if (!subject || isStopwordOnly(subject))
|
|
36
|
+
continue;
|
|
37
|
+
const objectSnippet = normalizeLabel(m[3]);
|
|
38
|
+
addEdge(local, nodeById, subject, 'is', objectSnippet, [i]);
|
|
39
|
+
}
|
|
40
|
+
local.sort((a, b) => a.id.localeCompare(b.id));
|
|
41
|
+
for (const edge of local.slice(0, maxEdgesPerDoc)) {
|
|
42
|
+
const existing = edgeById.get(edge.id);
|
|
43
|
+
if (existing) {
|
|
44
|
+
existing.evidence = canonicalEvidence([...(existing.evidence ?? []), ...(edge.evidence ?? [])]);
|
|
45
|
+
}
|
|
46
|
+
else {
|
|
47
|
+
edgeById.set(edge.id, edge);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
return finalizeGraph({ version: 1, nodes: [...nodeById.values()], edges: [...edgeById.values()] });
|
|
52
|
+
}
|
|
53
|
+
function addEdge(local, nodeById, fromLabel, p, toLabel, evidence) {
|
|
54
|
+
if (!fromLabel || !toLabel)
|
|
55
|
+
return;
|
|
56
|
+
const fromId = ensureNode(nodeById, fromLabel);
|
|
57
|
+
const toId = ensureNode(nodeById, toLabel);
|
|
58
|
+
const edgeEvidence = canonicalEvidence(evidence);
|
|
59
|
+
const id = computeEdgeId(fromId, p, toId, edgeEvidence);
|
|
60
|
+
local.push({ id, from: fromId, p, to: toId, evidence: edgeEvidence });
|
|
61
|
+
}
|
|
62
|
+
function ensureNode(nodeById, label) {
|
|
63
|
+
const id = computeNodeId(label);
|
|
64
|
+
if (!nodeById.has(id))
|
|
65
|
+
nodeById.set(id, { id, label });
|
|
66
|
+
return id;
|
|
67
|
+
}
|
|
68
|
+
function normalizeLabel(input) {
|
|
69
|
+
return normalizeClaimLabel(input, 200);
|
|
70
|
+
}
|
|
71
|
+
function splitSentences(text) {
|
|
72
|
+
return text
|
|
73
|
+
.replace(/\r\n/g, '\n')
|
|
74
|
+
.split(/(?<=[.?!])\s+/)
|
|
75
|
+
.map((s) => s.trim())
|
|
76
|
+
.filter(Boolean);
|
|
77
|
+
}
|
|
78
|
+
function firstSentenceAfter(text, startIdx) {
|
|
79
|
+
const tail = text.slice(startIdx).replace(/^[^\n]*\n+/, '').trim();
|
|
80
|
+
if (!tail)
|
|
81
|
+
return '';
|
|
82
|
+
const first = splitSentences(tail)[0] ?? '';
|
|
83
|
+
return first.slice(0, 240);
|
|
84
|
+
}
|
|
85
|
+
function isStopwordOnly(subject) {
|
|
86
|
+
const words = subject.split(/\s+/).filter(Boolean);
|
|
87
|
+
return words.length > 0 && words.every((w) => STOPWORDS.has(w));
|
|
88
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import type { Pack } from '../pack.runtime.js';
|
|
2
|
+
export type ClaimNode = {
|
|
3
|
+
id: string;
|
|
4
|
+
label: string;
|
|
5
|
+
props?: Record<string, string>;
|
|
6
|
+
};
|
|
7
|
+
export type ClaimEdge = {
|
|
8
|
+
id: string;
|
|
9
|
+
from: string;
|
|
10
|
+
p: string;
|
|
11
|
+
to: string;
|
|
12
|
+
evidence?: number[];
|
|
13
|
+
actor?: string;
|
|
14
|
+
ts?: number;
|
|
15
|
+
};
|
|
16
|
+
export type ClaimGraph = {
|
|
17
|
+
version: 1;
|
|
18
|
+
nodes: ClaimNode[];
|
|
19
|
+
edges: ClaimEdge[];
|
|
20
|
+
index?: {
|
|
21
|
+
labelToId?: Record<string, string>;
|
|
22
|
+
out?: Record<string, string[]>;
|
|
23
|
+
in?: Record<string, string[]>;
|
|
24
|
+
};
|
|
25
|
+
};
|
|
26
|
+
export declare function normalizeClaimLabel(label: string, maxLen?: number): string;
|
|
27
|
+
export declare function computeNodeId(label: string): string;
|
|
28
|
+
export declare function computeEdgeId(from: string, p: string, to: string, evidence?: number[]): string;
|
|
29
|
+
export declare function canonicalEvidence(evidence?: number[]): number[];
|
|
30
|
+
export declare function buildGraphIndex(graph: ClaimGraph): ClaimGraph['index'];
|
|
31
|
+
export declare function finalizeGraph(graph: ClaimGraph): ClaimGraph;
|
|
32
|
+
export declare function getClaimGraph(pack: Pack): ClaimGraph | null;
|
|
33
|
+
export declare function validateClaimGraph(input: unknown): ClaimGraph | null;
|
|
34
|
+
export declare function expandLabelToTerms(label: string): string[];
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import { normalize, tokenize } from '../tokenize.js';
|
|
2
|
+
export function normalizeClaimLabel(label, maxLen = 200) {
|
|
3
|
+
const compact = normalize(label).replace(/\s+/g, ' ').trim();
|
|
4
|
+
return compact.slice(0, maxLen);
|
|
5
|
+
}
|
|
6
|
+
export function computeNodeId(label) {
|
|
7
|
+
return `n_${hash32Hex(normalizeClaimLabel(label))}`;
|
|
8
|
+
}
|
|
9
|
+
export function computeEdgeId(from, p, to, evidence) {
|
|
10
|
+
const evidenceCsv = canonicalEvidence(evidence).join(',');
|
|
11
|
+
return `e_${hash32Hex(`${from}\n${p}\n${to}\n${evidenceCsv}`)}`;
|
|
12
|
+
}
|
|
13
|
+
export function canonicalEvidence(evidence) {
|
|
14
|
+
if (!evidence?.length)
|
|
15
|
+
return [];
|
|
16
|
+
return Array.from(new Set(evidence.filter((n) => Number.isInteger(n) && n >= 0))).sort((a, b) => a - b);
|
|
17
|
+
}
|
|
18
|
+
export function buildGraphIndex(graph) {
|
|
19
|
+
const labelToId = {};
|
|
20
|
+
const out = {};
|
|
21
|
+
const inbound = {};
|
|
22
|
+
for (const node of graph.nodes) {
|
|
23
|
+
labelToId[normalizeClaimLabel(node.label)] = node.id;
|
|
24
|
+
}
|
|
25
|
+
for (const edge of graph.edges) {
|
|
26
|
+
(out[edge.from] ||= []).push(edge.id);
|
|
27
|
+
(inbound[edge.to] ||= []).push(edge.id);
|
|
28
|
+
}
|
|
29
|
+
for (const key of Object.keys(out))
|
|
30
|
+
out[key].sort();
|
|
31
|
+
for (const key of Object.keys(inbound))
|
|
32
|
+
inbound[key].sort();
|
|
33
|
+
return { labelToId, out, in: inbound };
|
|
34
|
+
}
|
|
35
|
+
export function finalizeGraph(graph) {
|
|
36
|
+
const nodes = [...graph.nodes].sort((a, b) => a.id.localeCompare(b.id));
|
|
37
|
+
const edges = [...graph.edges]
|
|
38
|
+
.map((e) => ({ ...e, evidence: canonicalEvidence(e.evidence) }))
|
|
39
|
+
.sort((a, b) => a.id.localeCompare(b.id));
|
|
40
|
+
const out = { version: 1, nodes, edges };
|
|
41
|
+
out.index = buildGraphIndex(out);
|
|
42
|
+
return out;
|
|
43
|
+
}
|
|
44
|
+
export function getClaimGraph(pack) {
|
|
45
|
+
return pack.claimGraph ?? null;
|
|
46
|
+
}
|
|
47
|
+
export function validateClaimGraph(input) {
|
|
48
|
+
if (!input || typeof input !== 'object')
|
|
49
|
+
return null;
|
|
50
|
+
const g = input;
|
|
51
|
+
if (g.version !== 1 || !Array.isArray(g.nodes) || !Array.isArray(g.edges))
|
|
52
|
+
return null;
|
|
53
|
+
return finalizeGraph({ version: 1, nodes: g.nodes, edges: g.edges });
|
|
54
|
+
}
|
|
55
|
+
export function expandLabelToTerms(label) {
|
|
56
|
+
return tokenize(normalizeClaimLabel(label)).map((t) => t.term);
|
|
57
|
+
}
|
|
58
|
+
function hash32Hex(input) {
|
|
59
|
+
let h = 0x811c9dc5;
|
|
60
|
+
for (let i = 0; i < input.length; i++) {
|
|
61
|
+
h ^= input.charCodeAt(i);
|
|
62
|
+
h = Math.imul(h, 0x01000193);
|
|
63
|
+
}
|
|
64
|
+
return (h >>> 0).toString(16).padStart(8, '0');
|
|
65
|
+
}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import type { Pack } from '../pack.runtime.js';
|
|
2
|
+
import type { ClaimGraph } from './claim_graph.js';
|
|
3
|
+
export type ClaimOp = {
|
|
4
|
+
op: 'upsert_node';
|
|
5
|
+
id?: string;
|
|
6
|
+
label: string;
|
|
7
|
+
props?: Record<string, string>;
|
|
8
|
+
ts: number;
|
|
9
|
+
actor: string;
|
|
10
|
+
} | {
|
|
11
|
+
op: 'add_edge';
|
|
12
|
+
from: string;
|
|
13
|
+
p: string;
|
|
14
|
+
to: string;
|
|
15
|
+
evidence?: number[];
|
|
16
|
+
ts: number;
|
|
17
|
+
actor: string;
|
|
18
|
+
} | {
|
|
19
|
+
op: 'tombstone_edge';
|
|
20
|
+
edgeId: string;
|
|
21
|
+
ts: number;
|
|
22
|
+
actor: string;
|
|
23
|
+
};
|
|
24
|
+
export type ClaimGraphLog = {
|
|
25
|
+
version: 1;
|
|
26
|
+
ops: ClaimOp[];
|
|
27
|
+
};
|
|
28
|
+
export declare function createGraphLog(): ClaimGraphLog;
|
|
29
|
+
export declare function appendOp(log: ClaimGraphLog, op: ClaimOp): ClaimGraphLog;
|
|
30
|
+
export declare function mergeClaimGraphLogs(a: ClaimGraphLog, b: ClaimGraphLog): ClaimGraphLog;
|
|
31
|
+
export declare function serializeClaimGraphLog(log: ClaimGraphLog): Uint8Array;
|
|
32
|
+
export declare function deserializeClaimGraphLog(data: Uint8Array): ClaimGraphLog;
|
|
33
|
+
export declare function applyClaimGraphLog(graphOrPack: ClaimGraph | Pack, log: ClaimGraphLog): ClaimGraph;
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import { canonicalEvidence, computeEdgeId, computeNodeId, finalizeGraph, normalizeClaimLabel, } from './claim_graph.js';
|
|
2
|
+
export function createGraphLog() {
|
|
3
|
+
return { version: 1, ops: [] };
|
|
4
|
+
}
|
|
5
|
+
export function appendOp(log, op) {
|
|
6
|
+
return { version: 1, ops: [...log.ops, op] };
|
|
7
|
+
}
|
|
8
|
+
export function mergeClaimGraphLogs(a, b) {
|
|
9
|
+
return { version: 1, ops: [...a.ops, ...b.ops].sort(compareOps) };
|
|
10
|
+
}
|
|
11
|
+
export function serializeClaimGraphLog(log) {
|
|
12
|
+
return new TextEncoder().encode(JSON.stringify({ version: 1, ops: [...log.ops].sort(compareOps) }));
|
|
13
|
+
}
|
|
14
|
+
export function deserializeClaimGraphLog(data) {
|
|
15
|
+
const parsed = JSON.parse(new TextDecoder().decode(data));
|
|
16
|
+
if (!parsed || parsed.version !== 1 || !Array.isArray(parsed.ops)) {
|
|
17
|
+
throw new Error('Invalid ClaimGraphLog payload');
|
|
18
|
+
}
|
|
19
|
+
return { version: 1, ops: parsed.ops.sort(compareOps) };
|
|
20
|
+
}
|
|
21
|
+
export function applyClaimGraphLog(graphOrPack, log) {
|
|
22
|
+
const baseGraph = isPack(graphOrPack)
|
|
23
|
+
? graphOrPack.claimGraph ?? { version: 1, nodes: [], edges: [] }
|
|
24
|
+
: graphOrPack;
|
|
25
|
+
const nodeById = new Map(baseGraph.nodes.map((n) => [n.id, { ...n, props: n.props ? { ...n.props } : undefined }]));
|
|
26
|
+
const edgeById = new Map(baseGraph.edges.map((e) => [e.id, { ...e, evidence: canonicalEvidence(e.evidence) }]));
|
|
27
|
+
const nodeStamp = new Map();
|
|
28
|
+
const addStamp = new Map();
|
|
29
|
+
const tombstoneStamp = new Map();
|
|
30
|
+
for (const op of [...log.ops].sort(compareOps)) {
|
|
31
|
+
if (op.op === 'upsert_node') {
|
|
32
|
+
const label = normalizeClaimLabel(op.label);
|
|
33
|
+
const id = op.id || computeNodeId(label);
|
|
34
|
+
const prev = nodeStamp.get(id);
|
|
35
|
+
if (!prev || compareStamp([op.ts, op.actor], prev) >= 0) {
|
|
36
|
+
nodeStamp.set(id, [op.ts, op.actor]);
|
|
37
|
+
nodeById.set(id, { id, label, props: op.props ? { ...op.props } : undefined });
|
|
38
|
+
}
|
|
39
|
+
continue;
|
|
40
|
+
}
|
|
41
|
+
if (op.op === 'add_edge') {
|
|
42
|
+
const evidence = canonicalEvidence(op.evidence);
|
|
43
|
+
const edgeId = computeEdgeId(op.from, op.p, op.to, evidence);
|
|
44
|
+
const prevAdd = addStamp.get(edgeId);
|
|
45
|
+
if (!prevAdd || compareStamp([op.ts, op.actor], prevAdd) >= 0) {
|
|
46
|
+
addStamp.set(edgeId, [op.ts, op.actor]);
|
|
47
|
+
}
|
|
48
|
+
const existing = edgeById.get(edgeId);
|
|
49
|
+
const mergedEvidence = canonicalEvidence([...(existing?.evidence ?? []), ...evidence]);
|
|
50
|
+
edgeById.set(edgeId, {
|
|
51
|
+
id: edgeId,
|
|
52
|
+
from: op.from,
|
|
53
|
+
p: op.p,
|
|
54
|
+
to: op.to,
|
|
55
|
+
evidence: mergedEvidence,
|
|
56
|
+
actor: op.actor,
|
|
57
|
+
ts: op.ts,
|
|
58
|
+
});
|
|
59
|
+
continue;
|
|
60
|
+
}
|
|
61
|
+
const prev = tombstoneStamp.get(op.edgeId);
|
|
62
|
+
if (!prev || compareStamp([op.ts, op.actor], prev) >= 0) {
|
|
63
|
+
tombstoneStamp.set(op.edgeId, [op.ts, op.actor]);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
for (const [edgeId, edge] of edgeById) {
|
|
67
|
+
const add = addStamp.get(edgeId) ?? [-Infinity, ''];
|
|
68
|
+
const tomb = tombstoneStamp.get(edgeId);
|
|
69
|
+
if (tomb && compareStamp(tomb, add) > 0) {
|
|
70
|
+
edgeById.delete(edgeId);
|
|
71
|
+
continue;
|
|
72
|
+
}
|
|
73
|
+
if (!nodeById.has(edge.from)) {
|
|
74
|
+
nodeById.set(edge.from, { id: edge.from, label: edge.from });
|
|
75
|
+
}
|
|
76
|
+
if (!nodeById.has(edge.to)) {
|
|
77
|
+
nodeById.set(edge.to, { id: edge.to, label: edge.to });
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
return finalizeGraph({ version: 1, nodes: [...nodeById.values()], edges: [...edgeById.values()] });
|
|
81
|
+
}
|
|
82
|
+
function compareOps(a, b) {
|
|
83
|
+
if (a.ts !== b.ts)
|
|
84
|
+
return a.ts - b.ts;
|
|
85
|
+
const actorCmp = a.actor.localeCompare(b.actor);
|
|
86
|
+
if (actorCmp !== 0)
|
|
87
|
+
return actorCmp;
|
|
88
|
+
return stableSerializeOp(a).localeCompare(stableSerializeOp(b));
|
|
89
|
+
}
|
|
90
|
+
function stableSerializeOp(op) {
|
|
91
|
+
if (op.op === 'upsert_node') {
|
|
92
|
+
return `upsert_node|${op.id || ''}|${normalizeClaimLabel(op.label)}|${JSON.stringify(op.props || {})}`;
|
|
93
|
+
}
|
|
94
|
+
if (op.op === 'add_edge') {
|
|
95
|
+
return `add_edge|${op.from}|${op.p}|${op.to}|${canonicalEvidence(op.evidence).join(',')}`;
|
|
96
|
+
}
|
|
97
|
+
return `tombstone_edge|${op.edgeId}`;
|
|
98
|
+
}
|
|
99
|
+
function compareStamp(a, b) {
|
|
100
|
+
if (a[0] !== b[0])
|
|
101
|
+
return a[0] - b[0];
|
|
102
|
+
return a[1].localeCompare(b[1]);
|
|
103
|
+
}
|
|
104
|
+
function isPack(input) {
|
|
105
|
+
return Boolean(input.meta && input.blocks);
|
|
106
|
+
}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import { normalize, tokenize } from '../tokenize.js';
|
|
2
|
+
import { expandLabelToTerms } from './claim_graph.js';
|
|
3
|
+
export function expandQueryWithGraph(pack, queryString, opts = {}) {
|
|
4
|
+
const graph = pack.claimGraph;
|
|
5
|
+
if (!graph || graph.nodes.length === 0 || graph.edges.length === 0)
|
|
6
|
+
return queryString;
|
|
7
|
+
const maxExtraTerms = Math.max(1, opts.maxExtraTerms ?? 12);
|
|
8
|
+
const predicates = new Set((opts.predicates ?? ['defined_as', 'is', 'mentions', 'ref']).map((p) => normalize(p)));
|
|
9
|
+
const qTokens = tokenize(queryString).map((t) => t.term);
|
|
10
|
+
if (qTokens.length === 0)
|
|
11
|
+
return queryString;
|
|
12
|
+
const qSet = new Set(qTokens);
|
|
13
|
+
const candidateNodeIds = new Set();
|
|
14
|
+
const labelEntries = Object.entries(graph.index?.labelToId ?? {}).sort((a, b) => a[0].localeCompare(b[0]));
|
|
15
|
+
for (const [labelNorm, nodeId] of labelEntries) {
|
|
16
|
+
if (qSet.has(labelNorm))
|
|
17
|
+
candidateNodeIds.add(nodeId);
|
|
18
|
+
}
|
|
19
|
+
for (const token of qTokens.sort()) {
|
|
20
|
+
for (const [labelNorm, nodeId] of labelEntries) {
|
|
21
|
+
if (labelNorm.startsWith(token))
|
|
22
|
+
candidateNodeIds.add(nodeId);
|
|
23
|
+
if (candidateNodeIds.size >= maxExtraTerms * 4)
|
|
24
|
+
break;
|
|
25
|
+
}
|
|
26
|
+
if (candidateNodeIds.size >= maxExtraTerms * 4)
|
|
27
|
+
break;
|
|
28
|
+
}
|
|
29
|
+
const edgeById = new Map(graph.edges.map((e) => [e.id, e]));
|
|
30
|
+
const outIdx = graph.index?.out ?? {};
|
|
31
|
+
const extraTerms = new Set();
|
|
32
|
+
const sortedNodeIds = [...candidateNodeIds].sort();
|
|
33
|
+
for (const nodeId of sortedNodeIds) {
|
|
34
|
+
const edgeIds = [...(outIdx[nodeId] ?? [])].sort();
|
|
35
|
+
for (const edgeId of edgeIds) {
|
|
36
|
+
const edge = edgeById.get(edgeId);
|
|
37
|
+
if (!edge || !predicates.has(normalize(edge.p)))
|
|
38
|
+
continue;
|
|
39
|
+
const target = graph.nodes.find((n) => n.id === edge.to);
|
|
40
|
+
if (!target)
|
|
41
|
+
continue;
|
|
42
|
+
for (const term of expandLabelToTerms(target.label)) {
|
|
43
|
+
if (!qSet.has(term))
|
|
44
|
+
extraTerms.add(term);
|
|
45
|
+
if (extraTerms.size >= maxExtraTerms)
|
|
46
|
+
break;
|
|
47
|
+
}
|
|
48
|
+
if (extraTerms.size >= maxExtraTerms)
|
|
49
|
+
break;
|
|
50
|
+
}
|
|
51
|
+
if (extraTerms.size >= maxExtraTerms)
|
|
52
|
+
break;
|
|
53
|
+
}
|
|
54
|
+
if (extraTerms.size === 0)
|
|
55
|
+
return queryString;
|
|
56
|
+
return `${queryString} ${[...extraTerms].sort().join(' ')}`.trim();
|
|
57
|
+
}
|
package/dist/index.d.ts
CHANGED
|
@@ -4,11 +4,17 @@ export { makeContextPatch } from './patch.js';
|
|
|
4
4
|
export { buildPack } from './builder.js';
|
|
5
5
|
export { quantizeEmbeddingInt8L2Norm, encodeScaleF16, decodeScaleF16, } from './semantic.js';
|
|
6
6
|
export { listAgents, getAgent, resolveAgent, buildSystemPrompt, isToolAllowed, assertToolAllowed, validateAgentRegistry, validateAgentDefinition, } from './agent.js';
|
|
7
|
+
export { getClaimGraph, validateClaimGraph, } from './graph/claim_graph.js';
|
|
8
|
+
export { buildClaimGraph } from './graph/build_claim_graph.js';
|
|
9
|
+
export { createGraphLog, appendOp, applyClaimGraphLog, mergeClaimGraphLogs, serializeClaimGraphLog, deserializeClaimGraphLog, } from './graph/log.js';
|
|
10
|
+
export { expandQueryWithGraph } from './graph/query_expand.js';
|
|
7
11
|
export type { MountOptions, PackMeta, Pack } from './pack.runtime.js';
|
|
8
12
|
export type { QueryOptions, Hit } from './query.js';
|
|
9
13
|
export type { ContextPatch } from './patch.js';
|
|
10
14
|
export type { BuildInputDoc, BuildPackOptions } from './builder.js';
|
|
11
15
|
export type { AgentPromptTemplate, AgentToolPolicy, AgentRetrievalDefaults, AgentDefinitionV1, AgentRegistry, ResolveAgentInput, ResolvedAgent, } from './agent.js';
|
|
16
|
+
export type { ClaimGraph, ClaimNode, ClaimEdge } from './graph/claim_graph.js';
|
|
17
|
+
export type { ClaimGraphLog, ClaimOp } from './graph/log.js';
|
|
12
18
|
export { parseToolCallV1FromText } from './tool_parse.js';
|
|
13
19
|
export { nowIso, createTrace } from './trace.js';
|
|
14
20
|
export { assertToolCallAllowed } from './tool_gate.js';
|
package/dist/index.js
CHANGED
|
@@ -5,6 +5,10 @@ export { makeContextPatch } from './patch.js';
|
|
|
5
5
|
export { buildPack } from './builder.js';
|
|
6
6
|
export { quantizeEmbeddingInt8L2Norm, encodeScaleF16, decodeScaleF16, } from './semantic.js';
|
|
7
7
|
export { listAgents, getAgent, resolveAgent, buildSystemPrompt, isToolAllowed, assertToolAllowed, validateAgentRegistry, validateAgentDefinition, } from './agent.js';
|
|
8
|
+
export { getClaimGraph, validateClaimGraph, } from './graph/claim_graph.js';
|
|
9
|
+
export { buildClaimGraph } from './graph/build_claim_graph.js';
|
|
10
|
+
export { createGraphLog, appendOp, applyClaimGraphLog, mergeClaimGraphLogs, serializeClaimGraphLog, deserializeClaimGraphLog, } from './graph/log.js';
|
|
11
|
+
export { expandQueryWithGraph } from './graph/query_expand.js';
|
|
8
12
|
export { parseToolCallV1FromText } from './tool_parse.js';
|
|
9
13
|
export { nowIso, createTrace } from './trace.js';
|
|
10
14
|
export { assertToolCallAllowed } from './tool_gate.js';
|
package/dist/pack.runtime.d.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { AgentRegistry } from './agent.js';
|
|
2
|
+
import type { ClaimGraph } from './graph/claim_graph.js';
|
|
2
3
|
export type MountOptions = {
|
|
3
4
|
src: string | ArrayBufferLike | Uint8Array;
|
|
4
5
|
};
|
|
@@ -11,6 +12,11 @@ export type PackMeta = {
|
|
|
11
12
|
avgBlockLen?: number;
|
|
12
13
|
};
|
|
13
14
|
agents?: AgentRegistry;
|
|
15
|
+
claimGraph?: {
|
|
16
|
+
version: 1;
|
|
17
|
+
nodes: number;
|
|
18
|
+
edges: number;
|
|
19
|
+
};
|
|
14
20
|
};
|
|
15
21
|
export type Pack = {
|
|
16
22
|
meta: PackMeta;
|
|
@@ -30,6 +36,7 @@ export type Pack = {
|
|
|
30
36
|
vecs: Int8Array;
|
|
31
37
|
scales?: Uint16Array;
|
|
32
38
|
};
|
|
39
|
+
claimGraph?: ClaimGraph;
|
|
33
40
|
};
|
|
34
41
|
export declare function hasSemantic(pack: Pack): boolean;
|
|
35
42
|
export declare function mountPack(opts: MountOptions): Promise<Pack>;
|
package/dist/pack.runtime.js
CHANGED
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
*/
|
|
7
7
|
import { getTextDecoder } from './utils/utf8.js';
|
|
8
8
|
import { validateAgentRegistry } from './agent.js';
|
|
9
|
+
import { validateClaimGraph } from './graph/claim_graph.js';
|
|
9
10
|
export function hasSemantic(pack) {
|
|
10
11
|
return Boolean(pack.semantic && pack.semantic.dims > 0 && pack.semantic.vecs.length > 0);
|
|
11
12
|
}
|
|
@@ -75,16 +76,50 @@ export function mountPackFromBuffer(buf) {
|
|
|
75
76
|
}
|
|
76
77
|
}
|
|
77
78
|
let semantic;
|
|
78
|
-
|
|
79
|
-
|
|
79
|
+
let claimGraph;
|
|
80
|
+
while (offset < buf.byteLength) {
|
|
81
|
+
const sectionStart = offset;
|
|
82
|
+
if (buf.byteLength - offset < 4)
|
|
83
|
+
break;
|
|
84
|
+
const jsonLen = dv.getUint32(offset, true);
|
|
80
85
|
offset += 4;
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
86
|
+
if (jsonLen < 0 || offset + jsonLen > buf.byteLength) {
|
|
87
|
+
offset = sectionStart;
|
|
88
|
+
break;
|
|
89
|
+
}
|
|
90
|
+
let parsed;
|
|
91
|
+
try {
|
|
92
|
+
const json = dec.decode(new Uint8Array(buf, offset, jsonLen));
|
|
93
|
+
parsed = JSON.parse(json);
|
|
94
|
+
}
|
|
95
|
+
catch {
|
|
96
|
+
offset = sectionStart;
|
|
97
|
+
break;
|
|
98
|
+
}
|
|
99
|
+
offset += jsonLen;
|
|
100
|
+
if (!semantic && looksLikeSemanticJson(parsed)) {
|
|
101
|
+
if (buf.byteLength - offset < 4) {
|
|
102
|
+
offset = sectionStart;
|
|
103
|
+
break;
|
|
104
|
+
}
|
|
105
|
+
const semBlobLen = dv.getUint32(offset, true);
|
|
106
|
+
offset += 4;
|
|
107
|
+
if (semBlobLen < 0 || offset + semBlobLen > buf.byteLength) {
|
|
108
|
+
offset = sectionStart;
|
|
109
|
+
break;
|
|
110
|
+
}
|
|
111
|
+
const semBlob = new Uint8Array(buf, offset, semBlobLen);
|
|
112
|
+
offset += semBlobLen;
|
|
113
|
+
semantic = parseSemanticSection(parsed, semBlob);
|
|
114
|
+
continue;
|
|
115
|
+
}
|
|
116
|
+
const graph = validateClaimGraph(parsed);
|
|
117
|
+
if (!claimGraph && graph) {
|
|
118
|
+
claimGraph = graph;
|
|
119
|
+
continue;
|
|
120
|
+
}
|
|
121
|
+
offset = sectionStart;
|
|
122
|
+
break;
|
|
88
123
|
}
|
|
89
124
|
return {
|
|
90
125
|
meta,
|
|
@@ -96,8 +131,18 @@ export function mountPackFromBuffer(buf) {
|
|
|
96
131
|
namespaces,
|
|
97
132
|
blockTokenLens,
|
|
98
133
|
semantic,
|
|
134
|
+
claimGraph,
|
|
99
135
|
};
|
|
100
136
|
}
|
|
137
|
+
function looksLikeSemanticJson(parsed) {
|
|
138
|
+
if (!parsed || typeof parsed !== 'object')
|
|
139
|
+
return false;
|
|
140
|
+
const sem = parsed;
|
|
141
|
+
return (sem.version === 1 &&
|
|
142
|
+
sem.encoding === 'int8_l2norm' &&
|
|
143
|
+
typeof sem.blocks?.vectors?.byteOffset === 'number' &&
|
|
144
|
+
typeof sem.blocks?.vectors?.length === 'number');
|
|
145
|
+
}
|
|
101
146
|
function parseSemanticSection(sem, blob) {
|
|
102
147
|
const vectors = sem?.blocks?.vectors;
|
|
103
148
|
const scales = sem?.blocks?.scales;
|
package/dist/query.d.ts
CHANGED
package/dist/query.js
CHANGED
|
@@ -14,6 +14,7 @@ import { minCoverSpan, proximityMultiplier } from "./quality/proximity.js";
|
|
|
14
14
|
import { diversifyAndDedupe } from "./quality/diversify.js";
|
|
15
15
|
import { knsSignature, knsDistance } from "./quality/signature.js";
|
|
16
16
|
import { decodeScaleF16, quantizeEmbeddingInt8L2Norm } from "./semantic.js";
|
|
17
|
+
import { expandQueryWithGraph } from "./graph/query_expand.js";
|
|
17
18
|
export function validateQueryOptions(opts) {
|
|
18
19
|
if (!opts)
|
|
19
20
|
return;
|
|
@@ -46,6 +47,17 @@ export function validateQueryOptions(opts) {
|
|
|
46
47
|
throw new Error("query(...): queryExpansion.minTermLength must be a positive integer.");
|
|
47
48
|
}
|
|
48
49
|
}
|
|
50
|
+
if (opts.graph) {
|
|
51
|
+
if (opts.graph.expand !== undefined && typeof opts.graph.expand !== "boolean") {
|
|
52
|
+
throw new Error("query(...): graph.expand must be a boolean when provided.");
|
|
53
|
+
}
|
|
54
|
+
if (opts.graph.maxExtraTerms !== undefined && (!Number.isInteger(opts.graph.maxExtraTerms) || opts.graph.maxExtraTerms < 1)) {
|
|
55
|
+
throw new Error("query(...): graph.maxExtraTerms must be a positive integer.");
|
|
56
|
+
}
|
|
57
|
+
if (opts.graph.predicates !== undefined && (!Array.isArray(opts.graph.predicates) || opts.graph.predicates.some((p) => typeof p !== "string"))) {
|
|
58
|
+
throw new Error("query(...): graph.predicates must be an array of strings when provided.");
|
|
59
|
+
}
|
|
60
|
+
}
|
|
49
61
|
validateSemanticQueryOptions(opts.semantic);
|
|
50
62
|
}
|
|
51
63
|
export function validateSemanticQueryOptions(options) {
|
|
@@ -105,8 +117,14 @@ export function query(pack, q, opts = {}) {
|
|
|
105
117
|
queryEmbedding: opts.semantic?.queryEmbedding,
|
|
106
118
|
force: opts.semantic?.force ?? false,
|
|
107
119
|
};
|
|
120
|
+
const graphQuery = opts.graph?.expand === true
|
|
121
|
+
? expandQueryWithGraph(pack, q, {
|
|
122
|
+
maxExtraTerms: opts.graph?.maxExtraTerms,
|
|
123
|
+
predicates: opts.graph?.predicates,
|
|
124
|
+
})
|
|
125
|
+
: q;
|
|
108
126
|
// --- Query parsing
|
|
109
|
-
const normTokens = tokenize(
|
|
127
|
+
const normTokens = tokenize(graphQuery).map((t) => t.term);
|
|
110
128
|
// Normalize quoted phrases from q
|
|
111
129
|
const quotedRaw = parsePhrases(q);
|
|
112
130
|
const quoted = quotedRaw.map((seq) => seq.map((t) => normalize(t)).flatMap((s) => s.split(/\s+/)).filter(Boolean));
|