@knolo/core 3.1.4 → 3.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +79 -5
- package/dist/builder.d.ts +4 -0
- package/dist/builder.js +23 -1
- package/dist/graph/build_claim_graph.d.ts +5 -0
- package/dist/graph/build_claim_graph.js +88 -0
- package/dist/graph/claim_graph.d.ts +34 -0
- package/dist/graph/claim_graph.js +65 -0
- package/dist/graph/log.d.ts +33 -0
- package/dist/graph/log.js +106 -0
- package/dist/graph/query_expand.d.ts +6 -0
- package/dist/graph/query_expand.js +57 -0
- package/dist/index.d.ts +8 -2
- package/dist/index.js +5 -1
- package/dist/node.d.ts +2 -0
- package/dist/node.js +1 -0
- package/dist/pack.d.ts +2 -35
- package/dist/pack.js +1 -175
- package/dist/pack.node.d.ts +4 -0
- package/dist/pack.node.js +35 -0
- package/dist/pack.runtime.d.ts +44 -0
- package/dist/pack.runtime.js +191 -0
- package/dist/query.d.ts +5 -0
- package/dist/query.js +19 -1
- package/package.json +11 -4
package/README.md
CHANGED
|
@@ -91,19 +91,30 @@ You can write it to disk or store it in object storage.
|
|
|
91
91
|
|
|
92
92
|
## 2️⃣ Mount a Pack
|
|
93
93
|
|
|
94
|
+
### Node.js (local path convenience)
|
|
95
|
+
|
|
94
96
|
```ts
|
|
95
|
-
import { mountPack } from "@knolo/core";
|
|
97
|
+
import { mountPack } from "@knolo/core/node";
|
|
96
98
|
|
|
97
99
|
const pack = await mountPack({
|
|
98
100
|
src: "./dist/knowledge.knolo"
|
|
99
101
|
});
|
|
100
102
|
```
|
|
101
103
|
|
|
104
|
+
### React Native / Expo (URL or bytes)
|
|
105
|
+
|
|
106
|
+
```ts
|
|
107
|
+
import { mountPack } from "@knolo/core";
|
|
108
|
+
|
|
109
|
+
const ab = await (await fetch(PACK_URL)).arrayBuffer();
|
|
110
|
+
const pack = await mountPack({ src: new Uint8Array(ab) });
|
|
111
|
+
```
|
|
112
|
+
|
|
102
113
|
You can mount from:
|
|
103
114
|
|
|
104
|
-
*
|
|
115
|
+
* URL string (runtime-safe entry)
|
|
105
116
|
* Buffer / Uint8Array
|
|
106
|
-
*
|
|
117
|
+
* Local file path in Node via `@knolo/core/node`
|
|
107
118
|
* Object storage download
|
|
108
119
|
|
|
109
120
|
Mount-time validation ensures:
|
|
@@ -308,10 +319,73 @@ Properties:
|
|
|
308
319
|
|
|
309
320
|
---
|
|
310
321
|
|
|
311
|
-
#
|
|
322
|
+
# 🕸 ClaimGraph API
|
|
312
323
|
|
|
313
|
-
|
|
324
|
+
`@knolo/core` includes a deterministic ClaimGraph subsystem.
|
|
325
|
+
|
|
326
|
+
## Build-time config
|
|
327
|
+
|
|
328
|
+
```ts
|
|
329
|
+
type BuildPackOptions = {
|
|
330
|
+
graph?: {
|
|
331
|
+
enabled?: boolean; // default true
|
|
332
|
+
maxEdgesPerDoc?: number; // default 500
|
|
333
|
+
};
|
|
334
|
+
};
|
|
335
|
+
```
|
|
336
|
+
|
|
337
|
+
## Query-time config
|
|
338
|
+
|
|
339
|
+
```ts
|
|
340
|
+
type QueryOptions = {
|
|
341
|
+
graph?: {
|
|
342
|
+
expand?: boolean; // default false
|
|
343
|
+
maxExtraTerms?: number; // default 12
|
|
344
|
+
predicates?: string[]; // default ['defined_as', 'is', 'mentions', 'ref']
|
|
345
|
+
};
|
|
346
|
+
};
|
|
347
|
+
```
|
|
348
|
+
|
|
349
|
+
## Exports
|
|
350
|
+
|
|
351
|
+
```ts
|
|
352
|
+
import {
|
|
353
|
+
buildClaimGraph,
|
|
354
|
+
getClaimGraph,
|
|
355
|
+
applyClaimGraphLog,
|
|
356
|
+
mergeClaimGraphLogs,
|
|
357
|
+
expandQueryWithGraph,
|
|
358
|
+
createGraphLog,
|
|
359
|
+
appendOp,
|
|
360
|
+
} from '@knolo/core';
|
|
361
|
+
```
|
|
362
|
+
|
|
363
|
+
Types:
|
|
314
364
|
|
|
365
|
+
* `ClaimNode`
|
|
366
|
+
* `ClaimEdge`
|
|
367
|
+
* `ClaimGraph`
|
|
368
|
+
* `ClaimOp`
|
|
369
|
+
* `ClaimGraphLog`
|
|
315
370
|
|
|
371
|
+
## Notes on determinism and bounds
|
|
316
372
|
|
|
373
|
+
* Node IDs are hash-derived from normalized labels.
|
|
374
|
+
* Edge IDs are hash-derived from `(from, predicate, to, evidence)`.
|
|
375
|
+
* Node labels are normalized and deterministically truncated.
|
|
376
|
+
* Evidence arrays are sorted + unique.
|
|
377
|
+
* Node/edge arrays are sorted by ID in final graph.
|
|
378
|
+
* Extraction is bounded with `maxEdgesPerDoc`.
|
|
379
|
+
* Query expansion is bounded with `maxExtraTerms` and stable ordering.
|
|
380
|
+
|
|
381
|
+
## Pack format note
|
|
382
|
+
|
|
383
|
+
`.knolo` binary layout now supports an optional trailing ClaimGraph JSON section after existing sections.
|
|
384
|
+
Runtimes that ignore unknown trailing bytes remain compatible.
|
|
385
|
+
|
|
386
|
+
---
|
|
387
|
+
|
|
388
|
+
# 📄 License
|
|
389
|
+
|
|
390
|
+
Apache-2.0
|
|
317
391
|
|
package/dist/builder.d.ts
CHANGED
package/dist/builder.js
CHANGED
|
@@ -9,6 +9,7 @@ import { tokenize } from './tokenize.js';
|
|
|
9
9
|
import { getTextEncoder } from './utils/utf8.js';
|
|
10
10
|
import { encodeScaleF16, quantizeEmbeddingInt8L2Norm } from './semantic.js';
|
|
11
11
|
import { validateAgentRegistry } from './agent.js';
|
|
12
|
+
import { buildClaimGraph } from './graph/build_claim_graph.js';
|
|
12
13
|
export async function buildPack(docs, opts = {}) {
|
|
13
14
|
const normalizedDocs = validateDocs(docs);
|
|
14
15
|
// Prepare blocks (strip MD) and carry heading/docId for optional boosts.
|
|
@@ -23,6 +24,10 @@ export async function buildPack(docs, opts = {}) {
|
|
|
23
24
|
const totalTokens = blockTokenLens.reduce((sum, len) => sum + len, 0);
|
|
24
25
|
const avgBlockLen = blocks.length ? totalTokens / blocks.length : 1;
|
|
25
26
|
const agents = normalizeAgents(opts.agents);
|
|
27
|
+
const graphEnabled = opts.graph?.enabled ?? true;
|
|
28
|
+
const claimGraph = graphEnabled
|
|
29
|
+
? buildClaimGraph(normalizedDocs, { maxEdgesPerDoc: opts.graph?.maxEdgesPerDoc })
|
|
30
|
+
: null;
|
|
26
31
|
const meta = {
|
|
27
32
|
version: 3,
|
|
28
33
|
stats: {
|
|
@@ -32,6 +37,15 @@ export async function buildPack(docs, opts = {}) {
|
|
|
32
37
|
avgBlockLen,
|
|
33
38
|
},
|
|
34
39
|
...(agents ? { agents } : {}),
|
|
40
|
+
...(claimGraph
|
|
41
|
+
? {
|
|
42
|
+
claimGraph: {
|
|
43
|
+
version: 1,
|
|
44
|
+
nodes: claimGraph.nodes.length,
|
|
45
|
+
edges: claimGraph.edges.length,
|
|
46
|
+
},
|
|
47
|
+
}
|
|
48
|
+
: {}),
|
|
35
49
|
};
|
|
36
50
|
// Persist blocks as objects to optionally carry heading/docId/token length.
|
|
37
51
|
const blocksPayload = blocks.map((b, i) => ({
|
|
@@ -54,6 +68,7 @@ export async function buildPack(docs, opts = {}) {
|
|
|
54
68
|
? enc.encode(JSON.stringify(semanticSection.semJson))
|
|
55
69
|
: undefined;
|
|
56
70
|
const semBlob = semanticSection?.semBlob;
|
|
71
|
+
const graphBytes = claimGraph ? enc.encode(JSON.stringify(claimGraph)) : undefined;
|
|
57
72
|
const totalLength = 4 +
|
|
58
73
|
metaBytes.length +
|
|
59
74
|
4 +
|
|
@@ -64,7 +79,8 @@ export async function buildPack(docs, opts = {}) {
|
|
|
64
79
|
blocksBytes.length +
|
|
65
80
|
(semanticEnabled && semBytes && semBlob
|
|
66
81
|
? 4 + semBytes.length + 4 + semBlob.length
|
|
67
|
-
: 0)
|
|
82
|
+
: 0) +
|
|
83
|
+
(graphBytes ? 4 + graphBytes.length : 0);
|
|
68
84
|
const out = new Uint8Array(totalLength);
|
|
69
85
|
const dv = new DataView(out.buffer);
|
|
70
86
|
let offset = 0;
|
|
@@ -98,6 +114,12 @@ export async function buildPack(docs, opts = {}) {
|
|
|
98
114
|
dv.setUint32(offset, semBlob.length, true);
|
|
99
115
|
offset += 4;
|
|
100
116
|
out.set(semBlob, offset);
|
|
117
|
+
offset += semBlob.length;
|
|
118
|
+
}
|
|
119
|
+
if (graphBytes) {
|
|
120
|
+
dv.setUint32(offset, graphBytes.length, true);
|
|
121
|
+
offset += 4;
|
|
122
|
+
out.set(graphBytes, offset);
|
|
101
123
|
}
|
|
102
124
|
return out;
|
|
103
125
|
}
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import { canonicalEvidence, computeEdgeId, computeNodeId, finalizeGraph, normalizeClaimLabel, } from './claim_graph.js';
|
|
2
|
+
const DEF_RE = /^([A-Za-z0-9 _-]{2,80})\s+(is|are)\s+(.{2,120})[.?!]/;
|
|
3
|
+
const MD_LINK_RE = /\[([^\]]{1,200})\]\(([^)\s]{1,200})\)/g;
|
|
4
|
+
const WIKI_RE = /\[\[([^\]]{1,200})\]\]/g;
|
|
5
|
+
const HEADING_RE = /^(#{1,3})\s+(.+)$/gm;
|
|
6
|
+
const STOPWORDS = new Set(['a', 'an', 'and', 'or', 'the', 'it', 'they', 'this', 'that', 'these', 'those']);
|
|
7
|
+
export function buildClaimGraph(docs, opts = {}) {
|
|
8
|
+
const maxEdgesPerDoc = Math.max(1, opts.maxEdgesPerDoc ?? 500);
|
|
9
|
+
const nodeById = new Map();
|
|
10
|
+
const edgeById = new Map();
|
|
11
|
+
for (let i = 0; i < docs.length; i++) {
|
|
12
|
+
const doc = docs[i];
|
|
13
|
+
const docLabel = normalizeLabel(doc.id || doc.heading || `doc_${i}`);
|
|
14
|
+
const local = [];
|
|
15
|
+
for (const m of doc.text.matchAll(MD_LINK_RE)) {
|
|
16
|
+
addEdge(local, nodeById, normalizeLabel(m[1]), 'ref', normalizeLabel(m[2]), [i]);
|
|
17
|
+
}
|
|
18
|
+
for (const m of doc.text.matchAll(WIKI_RE)) {
|
|
19
|
+
addEdge(local, nodeById, docLabel, 'mentions', normalizeLabel(m[1]), [i]);
|
|
20
|
+
}
|
|
21
|
+
const headingMatches = Array.from(doc.text.matchAll(HEADING_RE));
|
|
22
|
+
for (const h of headingMatches) {
|
|
23
|
+
const headingLabel = normalizeLabel(h[2] || '');
|
|
24
|
+
const headingStart = h.index ?? 0;
|
|
25
|
+
const sentence = firstSentenceAfter(doc.text, headingStart + h[0].length);
|
|
26
|
+
if (sentence) {
|
|
27
|
+
addEdge(local, nodeById, headingLabel, 'defined_as', normalizeLabel(sentence), [i]);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
for (const sentence of splitSentences(doc.text)) {
|
|
31
|
+
const m = sentence.match(DEF_RE);
|
|
32
|
+
if (!m)
|
|
33
|
+
continue;
|
|
34
|
+
const subject = normalizeLabel(m[1]);
|
|
35
|
+
if (!subject || isStopwordOnly(subject))
|
|
36
|
+
continue;
|
|
37
|
+
const objectSnippet = normalizeLabel(m[3]);
|
|
38
|
+
addEdge(local, nodeById, subject, 'is', objectSnippet, [i]);
|
|
39
|
+
}
|
|
40
|
+
local.sort((a, b) => a.id.localeCompare(b.id));
|
|
41
|
+
for (const edge of local.slice(0, maxEdgesPerDoc)) {
|
|
42
|
+
const existing = edgeById.get(edge.id);
|
|
43
|
+
if (existing) {
|
|
44
|
+
existing.evidence = canonicalEvidence([...(existing.evidence ?? []), ...(edge.evidence ?? [])]);
|
|
45
|
+
}
|
|
46
|
+
else {
|
|
47
|
+
edgeById.set(edge.id, edge);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
return finalizeGraph({ version: 1, nodes: [...nodeById.values()], edges: [...edgeById.values()] });
|
|
52
|
+
}
|
|
53
|
+
function addEdge(local, nodeById, fromLabel, p, toLabel, evidence) {
|
|
54
|
+
if (!fromLabel || !toLabel)
|
|
55
|
+
return;
|
|
56
|
+
const fromId = ensureNode(nodeById, fromLabel);
|
|
57
|
+
const toId = ensureNode(nodeById, toLabel);
|
|
58
|
+
const edgeEvidence = canonicalEvidence(evidence);
|
|
59
|
+
const id = computeEdgeId(fromId, p, toId, edgeEvidence);
|
|
60
|
+
local.push({ id, from: fromId, p, to: toId, evidence: edgeEvidence });
|
|
61
|
+
}
|
|
62
|
+
function ensureNode(nodeById, label) {
|
|
63
|
+
const id = computeNodeId(label);
|
|
64
|
+
if (!nodeById.has(id))
|
|
65
|
+
nodeById.set(id, { id, label });
|
|
66
|
+
return id;
|
|
67
|
+
}
|
|
68
|
+
function normalizeLabel(input) {
|
|
69
|
+
return normalizeClaimLabel(input, 200);
|
|
70
|
+
}
|
|
71
|
+
function splitSentences(text) {
|
|
72
|
+
return text
|
|
73
|
+
.replace(/\r\n/g, '\n')
|
|
74
|
+
.split(/(?<=[.?!])\s+/)
|
|
75
|
+
.map((s) => s.trim())
|
|
76
|
+
.filter(Boolean);
|
|
77
|
+
}
|
|
78
|
+
function firstSentenceAfter(text, startIdx) {
|
|
79
|
+
const tail = text.slice(startIdx).replace(/^[^\n]*\n+/, '').trim();
|
|
80
|
+
if (!tail)
|
|
81
|
+
return '';
|
|
82
|
+
const first = splitSentences(tail)[0] ?? '';
|
|
83
|
+
return first.slice(0, 240);
|
|
84
|
+
}
|
|
85
|
+
function isStopwordOnly(subject) {
|
|
86
|
+
const words = subject.split(/\s+/).filter(Boolean);
|
|
87
|
+
return words.length > 0 && words.every((w) => STOPWORDS.has(w));
|
|
88
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import type { Pack } from '../pack.runtime.js';
|
|
2
|
+
export type ClaimNode = {
|
|
3
|
+
id: string;
|
|
4
|
+
label: string;
|
|
5
|
+
props?: Record<string, string>;
|
|
6
|
+
};
|
|
7
|
+
export type ClaimEdge = {
|
|
8
|
+
id: string;
|
|
9
|
+
from: string;
|
|
10
|
+
p: string;
|
|
11
|
+
to: string;
|
|
12
|
+
evidence?: number[];
|
|
13
|
+
actor?: string;
|
|
14
|
+
ts?: number;
|
|
15
|
+
};
|
|
16
|
+
export type ClaimGraph = {
|
|
17
|
+
version: 1;
|
|
18
|
+
nodes: ClaimNode[];
|
|
19
|
+
edges: ClaimEdge[];
|
|
20
|
+
index?: {
|
|
21
|
+
labelToId?: Record<string, string>;
|
|
22
|
+
out?: Record<string, string[]>;
|
|
23
|
+
in?: Record<string, string[]>;
|
|
24
|
+
};
|
|
25
|
+
};
|
|
26
|
+
export declare function normalizeClaimLabel(label: string, maxLen?: number): string;
|
|
27
|
+
export declare function computeNodeId(label: string): string;
|
|
28
|
+
export declare function computeEdgeId(from: string, p: string, to: string, evidence?: number[]): string;
|
|
29
|
+
export declare function canonicalEvidence(evidence?: number[]): number[];
|
|
30
|
+
export declare function buildGraphIndex(graph: ClaimGraph): ClaimGraph['index'];
|
|
31
|
+
export declare function finalizeGraph(graph: ClaimGraph): ClaimGraph;
|
|
32
|
+
export declare function getClaimGraph(pack: Pack): ClaimGraph | null;
|
|
33
|
+
export declare function validateClaimGraph(input: unknown): ClaimGraph | null;
|
|
34
|
+
export declare function expandLabelToTerms(label: string): string[];
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import { normalize, tokenize } from '../tokenize.js';
|
|
2
|
+
export function normalizeClaimLabel(label, maxLen = 200) {
|
|
3
|
+
const compact = normalize(label).replace(/\s+/g, ' ').trim();
|
|
4
|
+
return compact.slice(0, maxLen);
|
|
5
|
+
}
|
|
6
|
+
export function computeNodeId(label) {
|
|
7
|
+
return `n_${hash32Hex(normalizeClaimLabel(label))}`;
|
|
8
|
+
}
|
|
9
|
+
export function computeEdgeId(from, p, to, evidence) {
|
|
10
|
+
const evidenceCsv = canonicalEvidence(evidence).join(',');
|
|
11
|
+
return `e_${hash32Hex(`${from}\n${p}\n${to}\n${evidenceCsv}`)}`;
|
|
12
|
+
}
|
|
13
|
+
export function canonicalEvidence(evidence) {
|
|
14
|
+
if (!evidence?.length)
|
|
15
|
+
return [];
|
|
16
|
+
return Array.from(new Set(evidence.filter((n) => Number.isInteger(n) && n >= 0))).sort((a, b) => a - b);
|
|
17
|
+
}
|
|
18
|
+
export function buildGraphIndex(graph) {
|
|
19
|
+
const labelToId = {};
|
|
20
|
+
const out = {};
|
|
21
|
+
const inbound = {};
|
|
22
|
+
for (const node of graph.nodes) {
|
|
23
|
+
labelToId[normalizeClaimLabel(node.label)] = node.id;
|
|
24
|
+
}
|
|
25
|
+
for (const edge of graph.edges) {
|
|
26
|
+
(out[edge.from] ||= []).push(edge.id);
|
|
27
|
+
(inbound[edge.to] ||= []).push(edge.id);
|
|
28
|
+
}
|
|
29
|
+
for (const key of Object.keys(out))
|
|
30
|
+
out[key].sort();
|
|
31
|
+
for (const key of Object.keys(inbound))
|
|
32
|
+
inbound[key].sort();
|
|
33
|
+
return { labelToId, out, in: inbound };
|
|
34
|
+
}
|
|
35
|
+
export function finalizeGraph(graph) {
|
|
36
|
+
const nodes = [...graph.nodes].sort((a, b) => a.id.localeCompare(b.id));
|
|
37
|
+
const edges = [...graph.edges]
|
|
38
|
+
.map((e) => ({ ...e, evidence: canonicalEvidence(e.evidence) }))
|
|
39
|
+
.sort((a, b) => a.id.localeCompare(b.id));
|
|
40
|
+
const out = { version: 1, nodes, edges };
|
|
41
|
+
out.index = buildGraphIndex(out);
|
|
42
|
+
return out;
|
|
43
|
+
}
|
|
44
|
+
export function getClaimGraph(pack) {
|
|
45
|
+
return pack.claimGraph ?? null;
|
|
46
|
+
}
|
|
47
|
+
export function validateClaimGraph(input) {
|
|
48
|
+
if (!input || typeof input !== 'object')
|
|
49
|
+
return null;
|
|
50
|
+
const g = input;
|
|
51
|
+
if (g.version !== 1 || !Array.isArray(g.nodes) || !Array.isArray(g.edges))
|
|
52
|
+
return null;
|
|
53
|
+
return finalizeGraph({ version: 1, nodes: g.nodes, edges: g.edges });
|
|
54
|
+
}
|
|
55
|
+
export function expandLabelToTerms(label) {
|
|
56
|
+
return tokenize(normalizeClaimLabel(label)).map((t) => t.term);
|
|
57
|
+
}
|
|
58
|
+
function hash32Hex(input) {
|
|
59
|
+
let h = 0x811c9dc5;
|
|
60
|
+
for (let i = 0; i < input.length; i++) {
|
|
61
|
+
h ^= input.charCodeAt(i);
|
|
62
|
+
h = Math.imul(h, 0x01000193);
|
|
63
|
+
}
|
|
64
|
+
return (h >>> 0).toString(16).padStart(8, '0');
|
|
65
|
+
}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import type { Pack } from '../pack.runtime.js';
|
|
2
|
+
import type { ClaimGraph } from './claim_graph.js';
|
|
3
|
+
export type ClaimOp = {
|
|
4
|
+
op: 'upsert_node';
|
|
5
|
+
id?: string;
|
|
6
|
+
label: string;
|
|
7
|
+
props?: Record<string, string>;
|
|
8
|
+
ts: number;
|
|
9
|
+
actor: string;
|
|
10
|
+
} | {
|
|
11
|
+
op: 'add_edge';
|
|
12
|
+
from: string;
|
|
13
|
+
p: string;
|
|
14
|
+
to: string;
|
|
15
|
+
evidence?: number[];
|
|
16
|
+
ts: number;
|
|
17
|
+
actor: string;
|
|
18
|
+
} | {
|
|
19
|
+
op: 'tombstone_edge';
|
|
20
|
+
edgeId: string;
|
|
21
|
+
ts: number;
|
|
22
|
+
actor: string;
|
|
23
|
+
};
|
|
24
|
+
export type ClaimGraphLog = {
|
|
25
|
+
version: 1;
|
|
26
|
+
ops: ClaimOp[];
|
|
27
|
+
};
|
|
28
|
+
export declare function createGraphLog(): ClaimGraphLog;
|
|
29
|
+
export declare function appendOp(log: ClaimGraphLog, op: ClaimOp): ClaimGraphLog;
|
|
30
|
+
export declare function mergeClaimGraphLogs(a: ClaimGraphLog, b: ClaimGraphLog): ClaimGraphLog;
|
|
31
|
+
export declare function serializeClaimGraphLog(log: ClaimGraphLog): Uint8Array;
|
|
32
|
+
export declare function deserializeClaimGraphLog(data: Uint8Array): ClaimGraphLog;
|
|
33
|
+
export declare function applyClaimGraphLog(graphOrPack: ClaimGraph | Pack, log: ClaimGraphLog): ClaimGraph;
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import { canonicalEvidence, computeEdgeId, computeNodeId, finalizeGraph, normalizeClaimLabel, } from './claim_graph.js';
|
|
2
|
+
export function createGraphLog() {
|
|
3
|
+
return { version: 1, ops: [] };
|
|
4
|
+
}
|
|
5
|
+
export function appendOp(log, op) {
|
|
6
|
+
return { version: 1, ops: [...log.ops, op] };
|
|
7
|
+
}
|
|
8
|
+
export function mergeClaimGraphLogs(a, b) {
|
|
9
|
+
return { version: 1, ops: [...a.ops, ...b.ops].sort(compareOps) };
|
|
10
|
+
}
|
|
11
|
+
export function serializeClaimGraphLog(log) {
|
|
12
|
+
return new TextEncoder().encode(JSON.stringify({ version: 1, ops: [...log.ops].sort(compareOps) }));
|
|
13
|
+
}
|
|
14
|
+
export function deserializeClaimGraphLog(data) {
|
|
15
|
+
const parsed = JSON.parse(new TextDecoder().decode(data));
|
|
16
|
+
if (!parsed || parsed.version !== 1 || !Array.isArray(parsed.ops)) {
|
|
17
|
+
throw new Error('Invalid ClaimGraphLog payload');
|
|
18
|
+
}
|
|
19
|
+
return { version: 1, ops: parsed.ops.sort(compareOps) };
|
|
20
|
+
}
|
|
21
|
+
export function applyClaimGraphLog(graphOrPack, log) {
|
|
22
|
+
const baseGraph = isPack(graphOrPack)
|
|
23
|
+
? graphOrPack.claimGraph ?? { version: 1, nodes: [], edges: [] }
|
|
24
|
+
: graphOrPack;
|
|
25
|
+
const nodeById = new Map(baseGraph.nodes.map((n) => [n.id, { ...n, props: n.props ? { ...n.props } : undefined }]));
|
|
26
|
+
const edgeById = new Map(baseGraph.edges.map((e) => [e.id, { ...e, evidence: canonicalEvidence(e.evidence) }]));
|
|
27
|
+
const nodeStamp = new Map();
|
|
28
|
+
const addStamp = new Map();
|
|
29
|
+
const tombstoneStamp = new Map();
|
|
30
|
+
for (const op of [...log.ops].sort(compareOps)) {
|
|
31
|
+
if (op.op === 'upsert_node') {
|
|
32
|
+
const label = normalizeClaimLabel(op.label);
|
|
33
|
+
const id = op.id || computeNodeId(label);
|
|
34
|
+
const prev = nodeStamp.get(id);
|
|
35
|
+
if (!prev || compareStamp([op.ts, op.actor], prev) >= 0) {
|
|
36
|
+
nodeStamp.set(id, [op.ts, op.actor]);
|
|
37
|
+
nodeById.set(id, { id, label, props: op.props ? { ...op.props } : undefined });
|
|
38
|
+
}
|
|
39
|
+
continue;
|
|
40
|
+
}
|
|
41
|
+
if (op.op === 'add_edge') {
|
|
42
|
+
const evidence = canonicalEvidence(op.evidence);
|
|
43
|
+
const edgeId = computeEdgeId(op.from, op.p, op.to, evidence);
|
|
44
|
+
const prevAdd = addStamp.get(edgeId);
|
|
45
|
+
if (!prevAdd || compareStamp([op.ts, op.actor], prevAdd) >= 0) {
|
|
46
|
+
addStamp.set(edgeId, [op.ts, op.actor]);
|
|
47
|
+
}
|
|
48
|
+
const existing = edgeById.get(edgeId);
|
|
49
|
+
const mergedEvidence = canonicalEvidence([...(existing?.evidence ?? []), ...evidence]);
|
|
50
|
+
edgeById.set(edgeId, {
|
|
51
|
+
id: edgeId,
|
|
52
|
+
from: op.from,
|
|
53
|
+
p: op.p,
|
|
54
|
+
to: op.to,
|
|
55
|
+
evidence: mergedEvidence,
|
|
56
|
+
actor: op.actor,
|
|
57
|
+
ts: op.ts,
|
|
58
|
+
});
|
|
59
|
+
continue;
|
|
60
|
+
}
|
|
61
|
+
const prev = tombstoneStamp.get(op.edgeId);
|
|
62
|
+
if (!prev || compareStamp([op.ts, op.actor], prev) >= 0) {
|
|
63
|
+
tombstoneStamp.set(op.edgeId, [op.ts, op.actor]);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
for (const [edgeId, edge] of edgeById) {
|
|
67
|
+
const add = addStamp.get(edgeId) ?? [-Infinity, ''];
|
|
68
|
+
const tomb = tombstoneStamp.get(edgeId);
|
|
69
|
+
if (tomb && compareStamp(tomb, add) > 0) {
|
|
70
|
+
edgeById.delete(edgeId);
|
|
71
|
+
continue;
|
|
72
|
+
}
|
|
73
|
+
if (!nodeById.has(edge.from)) {
|
|
74
|
+
nodeById.set(edge.from, { id: edge.from, label: edge.from });
|
|
75
|
+
}
|
|
76
|
+
if (!nodeById.has(edge.to)) {
|
|
77
|
+
nodeById.set(edge.to, { id: edge.to, label: edge.to });
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
return finalizeGraph({ version: 1, nodes: [...nodeById.values()], edges: [...edgeById.values()] });
|
|
81
|
+
}
|
|
82
|
+
function compareOps(a, b) {
|
|
83
|
+
if (a.ts !== b.ts)
|
|
84
|
+
return a.ts - b.ts;
|
|
85
|
+
const actorCmp = a.actor.localeCompare(b.actor);
|
|
86
|
+
if (actorCmp !== 0)
|
|
87
|
+
return actorCmp;
|
|
88
|
+
return stableSerializeOp(a).localeCompare(stableSerializeOp(b));
|
|
89
|
+
}
|
|
90
|
+
function stableSerializeOp(op) {
|
|
91
|
+
if (op.op === 'upsert_node') {
|
|
92
|
+
return `upsert_node|${op.id || ''}|${normalizeClaimLabel(op.label)}|${JSON.stringify(op.props || {})}`;
|
|
93
|
+
}
|
|
94
|
+
if (op.op === 'add_edge') {
|
|
95
|
+
return `add_edge|${op.from}|${op.p}|${op.to}|${canonicalEvidence(op.evidence).join(',')}`;
|
|
96
|
+
}
|
|
97
|
+
return `tombstone_edge|${op.edgeId}`;
|
|
98
|
+
}
|
|
99
|
+
function compareStamp(a, b) {
|
|
100
|
+
if (a[0] !== b[0])
|
|
101
|
+
return a[0] - b[0];
|
|
102
|
+
return a[1].localeCompare(b[1]);
|
|
103
|
+
}
|
|
104
|
+
function isPack(input) {
|
|
105
|
+
return Boolean(input.meta && input.blocks);
|
|
106
|
+
}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import { normalize, tokenize } from '../tokenize.js';
|
|
2
|
+
import { expandLabelToTerms } from './claim_graph.js';
|
|
3
|
+
export function expandQueryWithGraph(pack, queryString, opts = {}) {
|
|
4
|
+
const graph = pack.claimGraph;
|
|
5
|
+
if (!graph || graph.nodes.length === 0 || graph.edges.length === 0)
|
|
6
|
+
return queryString;
|
|
7
|
+
const maxExtraTerms = Math.max(1, opts.maxExtraTerms ?? 12);
|
|
8
|
+
const predicates = new Set((opts.predicates ?? ['defined_as', 'is', 'mentions', 'ref']).map((p) => normalize(p)));
|
|
9
|
+
const qTokens = tokenize(queryString).map((t) => t.term);
|
|
10
|
+
if (qTokens.length === 0)
|
|
11
|
+
return queryString;
|
|
12
|
+
const qSet = new Set(qTokens);
|
|
13
|
+
const candidateNodeIds = new Set();
|
|
14
|
+
const labelEntries = Object.entries(graph.index?.labelToId ?? {}).sort((a, b) => a[0].localeCompare(b[0]));
|
|
15
|
+
for (const [labelNorm, nodeId] of labelEntries) {
|
|
16
|
+
if (qSet.has(labelNorm))
|
|
17
|
+
candidateNodeIds.add(nodeId);
|
|
18
|
+
}
|
|
19
|
+
for (const token of qTokens.sort()) {
|
|
20
|
+
for (const [labelNorm, nodeId] of labelEntries) {
|
|
21
|
+
if (labelNorm.startsWith(token))
|
|
22
|
+
candidateNodeIds.add(nodeId);
|
|
23
|
+
if (candidateNodeIds.size >= maxExtraTerms * 4)
|
|
24
|
+
break;
|
|
25
|
+
}
|
|
26
|
+
if (candidateNodeIds.size >= maxExtraTerms * 4)
|
|
27
|
+
break;
|
|
28
|
+
}
|
|
29
|
+
const edgeById = new Map(graph.edges.map((e) => [e.id, e]));
|
|
30
|
+
const outIdx = graph.index?.out ?? {};
|
|
31
|
+
const extraTerms = new Set();
|
|
32
|
+
const sortedNodeIds = [...candidateNodeIds].sort();
|
|
33
|
+
for (const nodeId of sortedNodeIds) {
|
|
34
|
+
const edgeIds = [...(outIdx[nodeId] ?? [])].sort();
|
|
35
|
+
for (const edgeId of edgeIds) {
|
|
36
|
+
const edge = edgeById.get(edgeId);
|
|
37
|
+
if (!edge || !predicates.has(normalize(edge.p)))
|
|
38
|
+
continue;
|
|
39
|
+
const target = graph.nodes.find((n) => n.id === edge.to);
|
|
40
|
+
if (!target)
|
|
41
|
+
continue;
|
|
42
|
+
for (const term of expandLabelToTerms(target.label)) {
|
|
43
|
+
if (!qSet.has(term))
|
|
44
|
+
extraTerms.add(term);
|
|
45
|
+
if (extraTerms.size >= maxExtraTerms)
|
|
46
|
+
break;
|
|
47
|
+
}
|
|
48
|
+
if (extraTerms.size >= maxExtraTerms)
|
|
49
|
+
break;
|
|
50
|
+
}
|
|
51
|
+
if (extraTerms.size >= maxExtraTerms)
|
|
52
|
+
break;
|
|
53
|
+
}
|
|
54
|
+
if (extraTerms.size === 0)
|
|
55
|
+
return queryString;
|
|
56
|
+
return `${queryString} ${[...extraTerms].sort().join(' ')}`.trim();
|
|
57
|
+
}
|
package/dist/index.d.ts
CHANGED
|
@@ -1,14 +1,20 @@
|
|
|
1
|
-
export { mountPack, hasSemantic } from './pack.js';
|
|
1
|
+
export { mountPack, hasSemantic } from './pack.runtime.js';
|
|
2
2
|
export { query, lexConfidence, validateQueryOptions, validateSemanticQueryOptions, } from './query.js';
|
|
3
3
|
export { makeContextPatch } from './patch.js';
|
|
4
4
|
export { buildPack } from './builder.js';
|
|
5
5
|
export { quantizeEmbeddingInt8L2Norm, encodeScaleF16, decodeScaleF16, } from './semantic.js';
|
|
6
6
|
export { listAgents, getAgent, resolveAgent, buildSystemPrompt, isToolAllowed, assertToolAllowed, validateAgentRegistry, validateAgentDefinition, } from './agent.js';
|
|
7
|
-
export
|
|
7
|
+
export { getClaimGraph, validateClaimGraph, } from './graph/claim_graph.js';
|
|
8
|
+
export { buildClaimGraph } from './graph/build_claim_graph.js';
|
|
9
|
+
export { createGraphLog, appendOp, applyClaimGraphLog, mergeClaimGraphLogs, serializeClaimGraphLog, deserializeClaimGraphLog, } from './graph/log.js';
|
|
10
|
+
export { expandQueryWithGraph } from './graph/query_expand.js';
|
|
11
|
+
export type { MountOptions, PackMeta, Pack } from './pack.runtime.js';
|
|
8
12
|
export type { QueryOptions, Hit } from './query.js';
|
|
9
13
|
export type { ContextPatch } from './patch.js';
|
|
10
14
|
export type { BuildInputDoc, BuildPackOptions } from './builder.js';
|
|
11
15
|
export type { AgentPromptTemplate, AgentToolPolicy, AgentRetrievalDefaults, AgentDefinitionV1, AgentRegistry, ResolveAgentInput, ResolvedAgent, } from './agent.js';
|
|
16
|
+
export type { ClaimGraph, ClaimNode, ClaimEdge } from './graph/claim_graph.js';
|
|
17
|
+
export type { ClaimGraphLog, ClaimOp } from './graph/log.js';
|
|
12
18
|
export { parseToolCallV1FromText } from './tool_parse.js';
|
|
13
19
|
export { nowIso, createTrace } from './trace.js';
|
|
14
20
|
export { assertToolCallAllowed } from './tool_gate.js';
|
package/dist/index.js
CHANGED
|
@@ -1,10 +1,14 @@
|
|
|
1
1
|
// src/index.ts
|
|
2
|
-
export { mountPack, hasSemantic } from './pack.js';
|
|
2
|
+
export { mountPack, hasSemantic } from './pack.runtime.js';
|
|
3
3
|
export { query, lexConfidence, validateQueryOptions, validateSemanticQueryOptions, } from './query.js';
|
|
4
4
|
export { makeContextPatch } from './patch.js';
|
|
5
5
|
export { buildPack } from './builder.js';
|
|
6
6
|
export { quantizeEmbeddingInt8L2Norm, encodeScaleF16, decodeScaleF16, } from './semantic.js';
|
|
7
7
|
export { listAgents, getAgent, resolveAgent, buildSystemPrompt, isToolAllowed, assertToolAllowed, validateAgentRegistry, validateAgentDefinition, } from './agent.js';
|
|
8
|
+
export { getClaimGraph, validateClaimGraph, } from './graph/claim_graph.js';
|
|
9
|
+
export { buildClaimGraph } from './graph/build_claim_graph.js';
|
|
10
|
+
export { createGraphLog, appendOp, applyClaimGraphLog, mergeClaimGraphLogs, serializeClaimGraphLog, deserializeClaimGraphLog, } from './graph/log.js';
|
|
11
|
+
export { expandQueryWithGraph } from './graph/query_expand.js';
|
|
8
12
|
export { parseToolCallV1FromText } from './tool_parse.js';
|
|
9
13
|
export { nowIso, createTrace } from './trace.js';
|
|
10
14
|
export { assertToolCallAllowed } from './tool_gate.js';
|
package/dist/node.d.ts
ADDED
package/dist/node.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { mountPack, hasSemantic } from './pack.node.js';
|
package/dist/pack.d.ts
CHANGED
|
@@ -1,35 +1,2 @@
|
|
|
1
|
-
|
|
2
|
-
export type MountOptions
|
|
3
|
-
src: string | ArrayBufferLike | Uint8Array;
|
|
4
|
-
};
|
|
5
|
-
export type PackMeta = {
|
|
6
|
-
version: number;
|
|
7
|
-
stats: {
|
|
8
|
-
docs: number;
|
|
9
|
-
blocks: number;
|
|
10
|
-
terms: number;
|
|
11
|
-
avgBlockLen?: number;
|
|
12
|
-
};
|
|
13
|
-
agents?: AgentRegistry;
|
|
14
|
-
};
|
|
15
|
-
export type Pack = {
|
|
16
|
-
meta: PackMeta;
|
|
17
|
-
lexicon: Map<string, number>;
|
|
18
|
-
postings: Uint32Array;
|
|
19
|
-
blocks: string[];
|
|
20
|
-
headings?: (string | null)[];
|
|
21
|
-
docIds?: (string | null)[];
|
|
22
|
-
namespaces?: (string | null)[];
|
|
23
|
-
blockTokenLens?: number[];
|
|
24
|
-
semantic?: {
|
|
25
|
-
version: 1;
|
|
26
|
-
modelId: string;
|
|
27
|
-
dims: number;
|
|
28
|
-
encoding: 'int8_l2norm';
|
|
29
|
-
perVectorScale: boolean;
|
|
30
|
-
vecs: Int8Array;
|
|
31
|
-
scales?: Uint16Array;
|
|
32
|
-
};
|
|
33
|
-
};
|
|
34
|
-
export declare function hasSemantic(pack: Pack): boolean;
|
|
35
|
-
export declare function mountPack(opts: MountOptions): Promise<Pack>;
|
|
1
|
+
export { mountPack, hasSemantic } from './pack.runtime.js';
|
|
2
|
+
export type { MountOptions, PackMeta, Pack } from './pack.runtime.js';
|
package/dist/pack.js
CHANGED
|
@@ -1,175 +1 @@
|
|
|
1
|
-
|
|
2
|
-
* pack.ts
|
|
3
|
-
*
|
|
4
|
-
* Mount `.knolo` packs across Node, browsers, and RN/Expo. Tolerant of:
|
|
5
|
-
* - blocks as string[] (v1) or object[] with { text, heading?, docId?, namespace?, len? }
|
|
6
|
-
* - meta.stats.avgBlockLen (optional)
|
|
7
|
-
* Includes RN/Expo-safe TextDecoder via ponyfill.
|
|
8
|
-
*/
|
|
9
|
-
import { getTextDecoder } from './utils/utf8.js';
|
|
10
|
-
import { validateAgentRegistry } from './agent.js';
|
|
11
|
-
export function hasSemantic(pack) {
|
|
12
|
-
return Boolean(pack.semantic && pack.semantic.dims > 0 && pack.semantic.vecs.length > 0);
|
|
13
|
-
}
|
|
14
|
-
export async function mountPack(opts) {
|
|
15
|
-
const buf = await resolveToBuffer(opts.src);
|
|
16
|
-
const dv = new DataView(buf);
|
|
17
|
-
const dec = getTextDecoder();
|
|
18
|
-
let offset = 0;
|
|
19
|
-
// meta
|
|
20
|
-
const metaLen = dv.getUint32(offset, true);
|
|
21
|
-
offset += 4;
|
|
22
|
-
const metaJson = dec.decode(new Uint8Array(buf, offset, metaLen));
|
|
23
|
-
offset += metaLen;
|
|
24
|
-
const meta = JSON.parse(metaJson);
|
|
25
|
-
if (meta.agents) {
|
|
26
|
-
validateAgentRegistry(meta.agents);
|
|
27
|
-
}
|
|
28
|
-
// lexicon
|
|
29
|
-
const lexLen = dv.getUint32(offset, true);
|
|
30
|
-
offset += 4;
|
|
31
|
-
const lexJson = dec.decode(new Uint8Array(buf, offset, lexLen));
|
|
32
|
-
offset += lexLen;
|
|
33
|
-
const lexEntries = JSON.parse(lexJson);
|
|
34
|
-
const lexicon = new Map(lexEntries);
|
|
35
|
-
// postings
|
|
36
|
-
const postCount = dv.getUint32(offset, true);
|
|
37
|
-
offset += 4;
|
|
38
|
-
const postings = new Uint32Array(postCount);
|
|
39
|
-
for (let i = 0; i < postCount; i++) {
|
|
40
|
-
postings[i] = dv.getUint32(offset, true);
|
|
41
|
-
offset += 4;
|
|
42
|
-
}
|
|
43
|
-
// blocks (v1: string[]; v2/v3: {text, heading?, docId?, namespace?, len?}[])
|
|
44
|
-
const blocksLen = dv.getUint32(offset, true);
|
|
45
|
-
offset += 4;
|
|
46
|
-
const blocksJson = dec.decode(new Uint8Array(buf, offset, blocksLen));
|
|
47
|
-
offset += blocksLen;
|
|
48
|
-
const parsed = JSON.parse(blocksJson);
|
|
49
|
-
let blocks = [];
|
|
50
|
-
let headings;
|
|
51
|
-
let docIds;
|
|
52
|
-
let namespaces;
|
|
53
|
-
let blockTokenLens;
|
|
54
|
-
if (Array.isArray(parsed) && parsed.length && typeof parsed[0] === 'string') {
|
|
55
|
-
// v1
|
|
56
|
-
blocks = parsed;
|
|
57
|
-
}
|
|
58
|
-
else if (Array.isArray(parsed)) {
|
|
59
|
-
blocks = [];
|
|
60
|
-
headings = [];
|
|
61
|
-
docIds = [];
|
|
62
|
-
namespaces = [];
|
|
63
|
-
blockTokenLens = [];
|
|
64
|
-
for (const it of parsed) {
|
|
65
|
-
if (it && typeof it === 'object') {
|
|
66
|
-
blocks.push(String(it.text ?? ''));
|
|
67
|
-
headings.push(it.heading ?? null);
|
|
68
|
-
docIds.push(it.docId ?? null);
|
|
69
|
-
namespaces.push(it.namespace ?? null);
|
|
70
|
-
blockTokenLens.push(typeof it.len === 'number' ? it.len : 0);
|
|
71
|
-
}
|
|
72
|
-
else {
|
|
73
|
-
blocks.push(String(it ?? ''));
|
|
74
|
-
headings.push(null);
|
|
75
|
-
docIds.push(null);
|
|
76
|
-
namespaces.push(null);
|
|
77
|
-
blockTokenLens.push(0);
|
|
78
|
-
}
|
|
79
|
-
}
|
|
80
|
-
}
|
|
81
|
-
else {
|
|
82
|
-
blocks = [];
|
|
83
|
-
}
|
|
84
|
-
let semantic;
|
|
85
|
-
if (offset < buf.byteLength) {
|
|
86
|
-
const semLen = dv.getUint32(offset, true);
|
|
87
|
-
offset += 4;
|
|
88
|
-
const semJson = dec.decode(new Uint8Array(buf, offset, semLen));
|
|
89
|
-
offset += semLen;
|
|
90
|
-
const sem = JSON.parse(semJson);
|
|
91
|
-
const semBlobLen = dv.getUint32(offset, true);
|
|
92
|
-
offset += 4;
|
|
93
|
-
const semBlob = new Uint8Array(buf, offset, semBlobLen);
|
|
94
|
-
semantic = parseSemanticSection(sem, semBlob);
|
|
95
|
-
}
|
|
96
|
-
return {
|
|
97
|
-
meta,
|
|
98
|
-
lexicon,
|
|
99
|
-
postings,
|
|
100
|
-
blocks,
|
|
101
|
-
headings,
|
|
102
|
-
docIds,
|
|
103
|
-
namespaces,
|
|
104
|
-
blockTokenLens,
|
|
105
|
-
semantic,
|
|
106
|
-
};
|
|
107
|
-
}
|
|
108
|
-
function parseSemanticSection(sem, blob) {
|
|
109
|
-
const vectors = sem?.blocks?.vectors;
|
|
110
|
-
const scales = sem?.blocks?.scales;
|
|
111
|
-
const vecs = new Int8Array(blob.buffer, blob.byteOffset + Number(vectors?.byteOffset ?? 0), Number(vectors?.length ?? 0));
|
|
112
|
-
let scaleView;
|
|
113
|
-
if (scales) {
|
|
114
|
-
const scaleLen = Number(scales.length ?? 0);
|
|
115
|
-
const scaleOffset = Number(scales.byteOffset ?? 0);
|
|
116
|
-
const dv = new DataView(blob.buffer, blob.byteOffset + scaleOffset, scaleLen * 2);
|
|
117
|
-
scaleView = new Uint16Array(scaleLen);
|
|
118
|
-
for (let i = 0; i < scaleLen; i++) {
|
|
119
|
-
scaleView[i] = dv.getUint16(i * 2, true);
|
|
120
|
-
}
|
|
121
|
-
}
|
|
122
|
-
return {
|
|
123
|
-
version: 1,
|
|
124
|
-
modelId: String(sem?.modelId ?? ''),
|
|
125
|
-
dims: Number(sem?.dims ?? 0),
|
|
126
|
-
encoding: 'int8_l2norm',
|
|
127
|
-
perVectorScale: Boolean(sem?.perVectorScale),
|
|
128
|
-
vecs,
|
|
129
|
-
scales: scaleView,
|
|
130
|
-
};
|
|
131
|
-
}
|
|
132
|
-
async function resolveToBuffer(src) {
|
|
133
|
-
if (typeof src === 'string') {
|
|
134
|
-
if (isNodeRuntime() && isLikelyLocalPath(src)) {
|
|
135
|
-
return await readLocalFileAsBuffer(src);
|
|
136
|
-
}
|
|
137
|
-
const res = await fetch(src);
|
|
138
|
-
return await res.arrayBuffer();
|
|
139
|
-
}
|
|
140
|
-
if (src instanceof Uint8Array) {
|
|
141
|
-
if (src.byteOffset === 0 && src.byteLength === src.buffer.byteLength) {
|
|
142
|
-
return src.buffer;
|
|
143
|
-
}
|
|
144
|
-
const copy = src.slice();
|
|
145
|
-
return copy.buffer;
|
|
146
|
-
}
|
|
147
|
-
return src;
|
|
148
|
-
}
|
|
149
|
-
function isNodeRuntime() {
|
|
150
|
-
const p = globalThis
|
|
151
|
-
.process;
|
|
152
|
-
return !!p?.versions?.node;
|
|
153
|
-
}
|
|
154
|
-
function isLikelyLocalPath(value) {
|
|
155
|
-
if (value.startsWith('file://'))
|
|
156
|
-
return true;
|
|
157
|
-
if (value.startsWith('./') ||
|
|
158
|
-
value.startsWith('../') ||
|
|
159
|
-
value.startsWith('/') ||
|
|
160
|
-
value.startsWith('~'))
|
|
161
|
-
return true;
|
|
162
|
-
if (/^[A-Za-z]:[\\/]/.test(value))
|
|
163
|
-
return true; // Windows absolute path
|
|
164
|
-
if (/^[A-Za-z][A-Za-z\d+.-]*:/.test(value))
|
|
165
|
-
return false; // URL scheme
|
|
166
|
-
return true; // plain relative path like "knowledge.knolo"
|
|
167
|
-
}
|
|
168
|
-
async function readLocalFileAsBuffer(pathOrFileUrl) {
|
|
169
|
-
const { readFile } = await import('node:fs/promises');
|
|
170
|
-
const filePath = pathOrFileUrl.startsWith('file://')
|
|
171
|
-
? decodeURIComponent(new URL(pathOrFileUrl).pathname)
|
|
172
|
-
: pathOrFileUrl;
|
|
173
|
-
const data = await readFile(filePath);
|
|
174
|
-
return data.buffer.slice(data.byteOffset, data.byteOffset + data.byteLength);
|
|
175
|
-
}
|
|
1
|
+
export { mountPack, hasSemantic } from './pack.runtime.js';
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { mountPackFromBuffer, toArrayBuffer } from './pack.runtime.js';
|
|
2
|
+
export { hasSemantic } from './pack.runtime.js';
|
|
3
|
+
export async function mountPack(opts) {
|
|
4
|
+
const buf = await resolveToBuffer(opts.src);
|
|
5
|
+
return mountPackFromBuffer(buf);
|
|
6
|
+
}
|
|
7
|
+
async function resolveToBuffer(src) {
|
|
8
|
+
if (typeof src === 'string') {
|
|
9
|
+
if (isLikelyLocalPath(src)) {
|
|
10
|
+
const { readFile } = await import('node:fs/promises');
|
|
11
|
+
const filePath = src.startsWith('file://')
|
|
12
|
+
? decodeURIComponent(new URL(src).pathname)
|
|
13
|
+
: src;
|
|
14
|
+
const data = await readFile(filePath);
|
|
15
|
+
return data.buffer.slice(data.byteOffset, data.byteOffset + data.byteLength);
|
|
16
|
+
}
|
|
17
|
+
const res = await fetch(src);
|
|
18
|
+
return await res.arrayBuffer();
|
|
19
|
+
}
|
|
20
|
+
return toArrayBuffer(src);
|
|
21
|
+
}
|
|
22
|
+
function isLikelyLocalPath(value) {
|
|
23
|
+
if (value.startsWith('file://'))
|
|
24
|
+
return true;
|
|
25
|
+
if (value.startsWith('./') ||
|
|
26
|
+
value.startsWith('../') ||
|
|
27
|
+
value.startsWith('/') ||
|
|
28
|
+
value.startsWith('~'))
|
|
29
|
+
return true;
|
|
30
|
+
if (/^[A-Za-z]:[\\/]/.test(value))
|
|
31
|
+
return true;
|
|
32
|
+
if (/^[A-Za-z][A-Za-z\d+.-]*:/.test(value))
|
|
33
|
+
return false;
|
|
34
|
+
return true;
|
|
35
|
+
}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import type { AgentRegistry } from './agent.js';
|
|
2
|
+
import type { ClaimGraph } from './graph/claim_graph.js';
|
|
3
|
+
export type MountOptions = {
|
|
4
|
+
src: string | ArrayBufferLike | Uint8Array;
|
|
5
|
+
};
|
|
6
|
+
export type PackMeta = {
|
|
7
|
+
version: number;
|
|
8
|
+
stats: {
|
|
9
|
+
docs: number;
|
|
10
|
+
blocks: number;
|
|
11
|
+
terms: number;
|
|
12
|
+
avgBlockLen?: number;
|
|
13
|
+
};
|
|
14
|
+
agents?: AgentRegistry;
|
|
15
|
+
claimGraph?: {
|
|
16
|
+
version: 1;
|
|
17
|
+
nodes: number;
|
|
18
|
+
edges: number;
|
|
19
|
+
};
|
|
20
|
+
};
|
|
21
|
+
export type Pack = {
|
|
22
|
+
meta: PackMeta;
|
|
23
|
+
lexicon: Map<string, number>;
|
|
24
|
+
postings: Uint32Array;
|
|
25
|
+
blocks: string[];
|
|
26
|
+
headings?: (string | null)[];
|
|
27
|
+
docIds?: (string | null)[];
|
|
28
|
+
namespaces?: (string | null)[];
|
|
29
|
+
blockTokenLens?: number[];
|
|
30
|
+
semantic?: {
|
|
31
|
+
version: 1;
|
|
32
|
+
modelId: string;
|
|
33
|
+
dims: number;
|
|
34
|
+
encoding: 'int8_l2norm';
|
|
35
|
+
perVectorScale: boolean;
|
|
36
|
+
vecs: Int8Array;
|
|
37
|
+
scales?: Uint16Array;
|
|
38
|
+
};
|
|
39
|
+
claimGraph?: ClaimGraph;
|
|
40
|
+
};
|
|
41
|
+
export declare function hasSemantic(pack: Pack): boolean;
|
|
42
|
+
export declare function mountPack(opts: MountOptions): Promise<Pack>;
|
|
43
|
+
export declare function mountPackFromBuffer(buf: ArrayBuffer): Pack;
|
|
44
|
+
export declare function toArrayBuffer(src: ArrayBufferLike | Uint8Array): ArrayBuffer;
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* pack.runtime.ts
|
|
3
|
+
*
|
|
4
|
+
* Runtime-safe pack mounting for browser and React Native environments.
|
|
5
|
+
* No Node stdlib imports are allowed in this module.
|
|
6
|
+
*/
|
|
7
|
+
import { getTextDecoder } from './utils/utf8.js';
|
|
8
|
+
import { validateAgentRegistry } from './agent.js';
|
|
9
|
+
import { validateClaimGraph } from './graph/claim_graph.js';
|
|
10
|
+
export function hasSemantic(pack) {
|
|
11
|
+
return Boolean(pack.semantic && pack.semantic.dims > 0 && pack.semantic.vecs.length > 0);
|
|
12
|
+
}
|
|
13
|
+
export async function mountPack(opts) {
|
|
14
|
+
const buf = await resolveToBuffer(opts.src);
|
|
15
|
+
return mountPackFromBuffer(buf);
|
|
16
|
+
}
|
|
17
|
+
export function mountPackFromBuffer(buf) {
|
|
18
|
+
const dv = new DataView(buf);
|
|
19
|
+
const dec = getTextDecoder();
|
|
20
|
+
let offset = 0;
|
|
21
|
+
const metaLen = dv.getUint32(offset, true);
|
|
22
|
+
offset += 4;
|
|
23
|
+
const metaJson = dec.decode(new Uint8Array(buf, offset, metaLen));
|
|
24
|
+
offset += metaLen;
|
|
25
|
+
const meta = JSON.parse(metaJson);
|
|
26
|
+
if (meta.agents) {
|
|
27
|
+
validateAgentRegistry(meta.agents);
|
|
28
|
+
}
|
|
29
|
+
const lexLen = dv.getUint32(offset, true);
|
|
30
|
+
offset += 4;
|
|
31
|
+
const lexJson = dec.decode(new Uint8Array(buf, offset, lexLen));
|
|
32
|
+
offset += lexLen;
|
|
33
|
+
const lexEntries = JSON.parse(lexJson);
|
|
34
|
+
const lexicon = new Map(lexEntries);
|
|
35
|
+
const postCount = dv.getUint32(offset, true);
|
|
36
|
+
offset += 4;
|
|
37
|
+
const postings = new Uint32Array(postCount);
|
|
38
|
+
for (let i = 0; i < postCount; i++) {
|
|
39
|
+
postings[i] = dv.getUint32(offset, true);
|
|
40
|
+
offset += 4;
|
|
41
|
+
}
|
|
42
|
+
const blocksLen = dv.getUint32(offset, true);
|
|
43
|
+
offset += 4;
|
|
44
|
+
const blocksJson = dec.decode(new Uint8Array(buf, offset, blocksLen));
|
|
45
|
+
offset += blocksLen;
|
|
46
|
+
const parsed = JSON.parse(blocksJson);
|
|
47
|
+
let blocks = [];
|
|
48
|
+
let headings;
|
|
49
|
+
let docIds;
|
|
50
|
+
let namespaces;
|
|
51
|
+
let blockTokenLens;
|
|
52
|
+
if (Array.isArray(parsed) && parsed.length && typeof parsed[0] === 'string') {
|
|
53
|
+
blocks = parsed;
|
|
54
|
+
}
|
|
55
|
+
else if (Array.isArray(parsed)) {
|
|
56
|
+
blocks = [];
|
|
57
|
+
headings = [];
|
|
58
|
+
docIds = [];
|
|
59
|
+
namespaces = [];
|
|
60
|
+
blockTokenLens = [];
|
|
61
|
+
for (const it of parsed) {
|
|
62
|
+
if (it && typeof it === 'object') {
|
|
63
|
+
blocks.push(String(it.text ?? ''));
|
|
64
|
+
headings.push(it.heading ?? null);
|
|
65
|
+
docIds.push(it.docId ?? null);
|
|
66
|
+
namespaces.push(it.namespace ?? null);
|
|
67
|
+
blockTokenLens.push(typeof it.len === 'number' ? it.len : 0);
|
|
68
|
+
}
|
|
69
|
+
else {
|
|
70
|
+
blocks.push(String(it ?? ''));
|
|
71
|
+
headings.push(null);
|
|
72
|
+
docIds.push(null);
|
|
73
|
+
namespaces.push(null);
|
|
74
|
+
blockTokenLens.push(0);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
let semantic;
|
|
79
|
+
let claimGraph;
|
|
80
|
+
while (offset < buf.byteLength) {
|
|
81
|
+
const sectionStart = offset;
|
|
82
|
+
if (buf.byteLength - offset < 4)
|
|
83
|
+
break;
|
|
84
|
+
const jsonLen = dv.getUint32(offset, true);
|
|
85
|
+
offset += 4;
|
|
86
|
+
if (jsonLen < 0 || offset + jsonLen > buf.byteLength) {
|
|
87
|
+
offset = sectionStart;
|
|
88
|
+
break;
|
|
89
|
+
}
|
|
90
|
+
let parsed;
|
|
91
|
+
try {
|
|
92
|
+
const json = dec.decode(new Uint8Array(buf, offset, jsonLen));
|
|
93
|
+
parsed = JSON.parse(json);
|
|
94
|
+
}
|
|
95
|
+
catch {
|
|
96
|
+
offset = sectionStart;
|
|
97
|
+
break;
|
|
98
|
+
}
|
|
99
|
+
offset += jsonLen;
|
|
100
|
+
if (!semantic && looksLikeSemanticJson(parsed)) {
|
|
101
|
+
if (buf.byteLength - offset < 4) {
|
|
102
|
+
offset = sectionStart;
|
|
103
|
+
break;
|
|
104
|
+
}
|
|
105
|
+
const semBlobLen = dv.getUint32(offset, true);
|
|
106
|
+
offset += 4;
|
|
107
|
+
if (semBlobLen < 0 || offset + semBlobLen > buf.byteLength) {
|
|
108
|
+
offset = sectionStart;
|
|
109
|
+
break;
|
|
110
|
+
}
|
|
111
|
+
const semBlob = new Uint8Array(buf, offset, semBlobLen);
|
|
112
|
+
offset += semBlobLen;
|
|
113
|
+
semantic = parseSemanticSection(parsed, semBlob);
|
|
114
|
+
continue;
|
|
115
|
+
}
|
|
116
|
+
const graph = validateClaimGraph(parsed);
|
|
117
|
+
if (!claimGraph && graph) {
|
|
118
|
+
claimGraph = graph;
|
|
119
|
+
continue;
|
|
120
|
+
}
|
|
121
|
+
offset = sectionStart;
|
|
122
|
+
break;
|
|
123
|
+
}
|
|
124
|
+
return {
|
|
125
|
+
meta,
|
|
126
|
+
lexicon,
|
|
127
|
+
postings,
|
|
128
|
+
blocks,
|
|
129
|
+
headings,
|
|
130
|
+
docIds,
|
|
131
|
+
namespaces,
|
|
132
|
+
blockTokenLens,
|
|
133
|
+
semantic,
|
|
134
|
+
claimGraph,
|
|
135
|
+
};
|
|
136
|
+
}
|
|
137
|
+
function looksLikeSemanticJson(parsed) {
|
|
138
|
+
if (!parsed || typeof parsed !== 'object')
|
|
139
|
+
return false;
|
|
140
|
+
const sem = parsed;
|
|
141
|
+
return (sem.version === 1 &&
|
|
142
|
+
sem.encoding === 'int8_l2norm' &&
|
|
143
|
+
typeof sem.blocks?.vectors?.byteOffset === 'number' &&
|
|
144
|
+
typeof sem.blocks?.vectors?.length === 'number');
|
|
145
|
+
}
|
|
146
|
+
function parseSemanticSection(sem, blob) {
|
|
147
|
+
const vectors = sem?.blocks?.vectors;
|
|
148
|
+
const scales = sem?.blocks?.scales;
|
|
149
|
+
const vecs = new Int8Array(blob.buffer, blob.byteOffset + Number(vectors?.byteOffset ?? 0), Number(vectors?.length ?? 0));
|
|
150
|
+
let scaleView;
|
|
151
|
+
if (scales) {
|
|
152
|
+
const scaleLen = Number(scales.length ?? 0);
|
|
153
|
+
const scaleOffset = Number(scales.byteOffset ?? 0);
|
|
154
|
+
const dv = new DataView(blob.buffer, blob.byteOffset + scaleOffset, scaleLen * 2);
|
|
155
|
+
scaleView = new Uint16Array(scaleLen);
|
|
156
|
+
for (let i = 0; i < scaleLen; i++) {
|
|
157
|
+
scaleView[i] = dv.getUint16(i * 2, true);
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
return {
|
|
161
|
+
version: 1,
|
|
162
|
+
modelId: String(sem?.modelId ?? ''),
|
|
163
|
+
dims: Number(sem?.dims ?? 0),
|
|
164
|
+
encoding: 'int8_l2norm',
|
|
165
|
+
perVectorScale: Boolean(sem?.perVectorScale),
|
|
166
|
+
vecs,
|
|
167
|
+
scales: scaleView,
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
async function resolveToBuffer(src) {
|
|
171
|
+
if (typeof src === 'string') {
|
|
172
|
+
try {
|
|
173
|
+
const res = await fetch(src);
|
|
174
|
+
return await res.arrayBuffer();
|
|
175
|
+
}
|
|
176
|
+
catch {
|
|
177
|
+
throw new Error('mountPack({src: string}) expects a URL in React Native. For local files, load bytes in your app and pass Uint8Array/ArrayBuffer.');
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
return toArrayBuffer(src);
|
|
181
|
+
}
|
|
182
|
+
export function toArrayBuffer(src) {
|
|
183
|
+
if (src instanceof Uint8Array) {
|
|
184
|
+
if (src.byteOffset === 0 && src.byteLength === src.buffer.byteLength) {
|
|
185
|
+
return src.buffer;
|
|
186
|
+
}
|
|
187
|
+
const copy = src.slice();
|
|
188
|
+
return copy.buffer;
|
|
189
|
+
}
|
|
190
|
+
return src;
|
|
191
|
+
}
|
package/dist/query.d.ts
CHANGED
package/dist/query.js
CHANGED
|
@@ -14,6 +14,7 @@ import { minCoverSpan, proximityMultiplier } from "./quality/proximity.js";
|
|
|
14
14
|
import { diversifyAndDedupe } from "./quality/diversify.js";
|
|
15
15
|
import { knsSignature, knsDistance } from "./quality/signature.js";
|
|
16
16
|
import { decodeScaleF16, quantizeEmbeddingInt8L2Norm } from "./semantic.js";
|
|
17
|
+
import { expandQueryWithGraph } from "./graph/query_expand.js";
|
|
17
18
|
export function validateQueryOptions(opts) {
|
|
18
19
|
if (!opts)
|
|
19
20
|
return;
|
|
@@ -46,6 +47,17 @@ export function validateQueryOptions(opts) {
|
|
|
46
47
|
throw new Error("query(...): queryExpansion.minTermLength must be a positive integer.");
|
|
47
48
|
}
|
|
48
49
|
}
|
|
50
|
+
if (opts.graph) {
|
|
51
|
+
if (opts.graph.expand !== undefined && typeof opts.graph.expand !== "boolean") {
|
|
52
|
+
throw new Error("query(...): graph.expand must be a boolean when provided.");
|
|
53
|
+
}
|
|
54
|
+
if (opts.graph.maxExtraTerms !== undefined && (!Number.isInteger(opts.graph.maxExtraTerms) || opts.graph.maxExtraTerms < 1)) {
|
|
55
|
+
throw new Error("query(...): graph.maxExtraTerms must be a positive integer.");
|
|
56
|
+
}
|
|
57
|
+
if (opts.graph.predicates !== undefined && (!Array.isArray(opts.graph.predicates) || opts.graph.predicates.some((p) => typeof p !== "string"))) {
|
|
58
|
+
throw new Error("query(...): graph.predicates must be an array of strings when provided.");
|
|
59
|
+
}
|
|
60
|
+
}
|
|
49
61
|
validateSemanticQueryOptions(opts.semantic);
|
|
50
62
|
}
|
|
51
63
|
export function validateSemanticQueryOptions(options) {
|
|
@@ -105,8 +117,14 @@ export function query(pack, q, opts = {}) {
|
|
|
105
117
|
queryEmbedding: opts.semantic?.queryEmbedding,
|
|
106
118
|
force: opts.semantic?.force ?? false,
|
|
107
119
|
};
|
|
120
|
+
const graphQuery = opts.graph?.expand === true
|
|
121
|
+
? expandQueryWithGraph(pack, q, {
|
|
122
|
+
maxExtraTerms: opts.graph?.maxExtraTerms,
|
|
123
|
+
predicates: opts.graph?.predicates,
|
|
124
|
+
})
|
|
125
|
+
: q;
|
|
108
126
|
// --- Query parsing
|
|
109
|
-
const normTokens = tokenize(
|
|
127
|
+
const normTokens = tokenize(graphQuery).map((t) => t.term);
|
|
110
128
|
// Normalize quoted phrases from q
|
|
111
129
|
const quotedRaw = parsePhrases(q);
|
|
112
130
|
const quoted = quotedRaw.map((seq) => seq.map((t) => normalize(t)).flatMap((s) => s.split(/\s+/)).filter(Boolean));
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@knolo/core",
|
|
3
|
-
"version": "3.1
|
|
3
|
+
"version": "3.2.1",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Local-first knowledge packs for small LLMs.",
|
|
6
6
|
"keywords": [
|
|
@@ -19,8 +19,14 @@
|
|
|
19
19
|
],
|
|
20
20
|
"exports": {
|
|
21
21
|
".": {
|
|
22
|
+
"react-native": "./dist/index.js",
|
|
23
|
+
"browser": "./dist/index.js",
|
|
22
24
|
"import": "./dist/index.js",
|
|
23
25
|
"types": "./dist/index.d.ts"
|
|
26
|
+
},
|
|
27
|
+
"./node": {
|
|
28
|
+
"import": "./dist/node.js",
|
|
29
|
+
"types": "./dist/node.d.ts"
|
|
24
30
|
}
|
|
25
31
|
},
|
|
26
32
|
"sideEffects": false,
|
|
@@ -28,9 +34,10 @@
|
|
|
28
34
|
"build": "tsc -p tsconfig.json",
|
|
29
35
|
"prepublishOnly": "npm run build",
|
|
30
36
|
"smoke": "node scripts/smoke.mjs",
|
|
31
|
-
"test": "npm run build && node scripts/test.mjs",
|
|
32
|
-
"format": "prettier --write src/agent.ts src/pack.ts src/builder.ts src/index.ts scripts/test.mjs ../../README.md",
|
|
33
|
-
"format:check": "prettier --check src/agent.ts src/pack.ts src/builder.ts src/index.ts scripts/test.mjs ../../README.md"
|
|
37
|
+
"test": "npm run build && node scripts/check-runtime-no-node.mjs && node scripts/test.mjs",
|
|
38
|
+
"format": "prettier --write src/agent.ts src/pack.ts src/pack.runtime.ts src/pack.node.ts src/node.ts src/builder.ts src/index.ts scripts/test.mjs scripts/check-runtime-no-node.mjs ../../README.md README.md",
|
|
39
|
+
"format:check": "prettier --check src/agent.ts src/pack.ts src/pack.runtime.ts src/pack.node.ts src/node.ts src/builder.ts src/index.ts scripts/test.mjs scripts/check-runtime-no-node.mjs ../../README.md README.md",
|
|
40
|
+
"check:runtime-no-node": "node scripts/check-runtime-no-node.mjs"
|
|
34
41
|
},
|
|
35
42
|
"devDependencies": {
|
|
36
43
|
"@types/node": "^20.11.0",
|