@knolo/core 3.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent.d.ts +53 -0
- package/dist/agent.js +175 -0
- package/dist/builder.d.ts +20 -0
- package/dist/builder.js +196 -0
- package/dist/index.d.ts +21 -0
- package/dist/index.js +13 -0
- package/dist/indexer.d.ts +23 -0
- package/dist/indexer.js +71 -0
- package/dist/pack.d.ts +35 -0
- package/dist/pack.js +175 -0
- package/dist/patch.d.ts +22 -0
- package/dist/patch.js +35 -0
- package/dist/quality/diversify.d.ts +13 -0
- package/dist/quality/diversify.js +41 -0
- package/dist/quality/proximity.d.ts +2 -0
- package/dist/quality/proximity.js +31 -0
- package/dist/quality/signature.d.ts +3 -0
- package/dist/quality/signature.js +24 -0
- package/dist/quality/similarity.d.ts +3 -0
- package/dist/quality/similarity.js +27 -0
- package/dist/query.d.ts +41 -0
- package/dist/query.js +463 -0
- package/dist/rank.d.ts +21 -0
- package/dist/rank.js +31 -0
- package/dist/router.d.ts +28 -0
- package/dist/router.js +74 -0
- package/dist/routing_profile.d.ts +19 -0
- package/dist/routing_profile.js +102 -0
- package/dist/semantic.d.ts +7 -0
- package/dist/semantic.js +98 -0
- package/dist/tokenize.d.ts +24 -0
- package/dist/tokenize.js +53 -0
- package/dist/tool_gate.d.ts +3 -0
- package/dist/tool_gate.js +8 -0
- package/dist/tool_parse.d.ts +2 -0
- package/dist/tool_parse.js +102 -0
- package/dist/tools.d.ts +27 -0
- package/dist/tools.js +34 -0
- package/dist/trace.d.ts +45 -0
- package/dist/trace.js +12 -0
- package/dist/utils/utf8.d.ts +8 -0
- package/dist/utils/utf8.js +72 -0
- package/package.json +39 -0
package/dist/pack.js
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* pack.ts
|
|
3
|
+
*
|
|
4
|
+
* Mount `.knolo` packs across Node, browsers, and RN/Expo. Tolerant of:
|
|
5
|
+
* - blocks as string[] (v1) or object[] with { text, heading?, docId?, namespace?, len? }
|
|
6
|
+
* - meta.stats.avgBlockLen (optional)
|
|
7
|
+
* Includes RN/Expo-safe TextDecoder via ponyfill.
|
|
8
|
+
*/
|
|
9
|
+
import { getTextDecoder } from './utils/utf8.js';
|
|
10
|
+
import { validateAgentRegistry } from './agent.js';
|
|
11
|
+
export function hasSemantic(pack) {
|
|
12
|
+
return Boolean(pack.semantic && pack.semantic.dims > 0 && pack.semantic.vecs.length > 0);
|
|
13
|
+
}
|
|
14
|
+
export async function mountPack(opts) {
|
|
15
|
+
const buf = await resolveToBuffer(opts.src);
|
|
16
|
+
const dv = new DataView(buf);
|
|
17
|
+
const dec = getTextDecoder();
|
|
18
|
+
let offset = 0;
|
|
19
|
+
// meta
|
|
20
|
+
const metaLen = dv.getUint32(offset, true);
|
|
21
|
+
offset += 4;
|
|
22
|
+
const metaJson = dec.decode(new Uint8Array(buf, offset, metaLen));
|
|
23
|
+
offset += metaLen;
|
|
24
|
+
const meta = JSON.parse(metaJson);
|
|
25
|
+
if (meta.agents) {
|
|
26
|
+
validateAgentRegistry(meta.agents);
|
|
27
|
+
}
|
|
28
|
+
// lexicon
|
|
29
|
+
const lexLen = dv.getUint32(offset, true);
|
|
30
|
+
offset += 4;
|
|
31
|
+
const lexJson = dec.decode(new Uint8Array(buf, offset, lexLen));
|
|
32
|
+
offset += lexLen;
|
|
33
|
+
const lexEntries = JSON.parse(lexJson);
|
|
34
|
+
const lexicon = new Map(lexEntries);
|
|
35
|
+
// postings
|
|
36
|
+
const postCount = dv.getUint32(offset, true);
|
|
37
|
+
offset += 4;
|
|
38
|
+
const postings = new Uint32Array(postCount);
|
|
39
|
+
for (let i = 0; i < postCount; i++) {
|
|
40
|
+
postings[i] = dv.getUint32(offset, true);
|
|
41
|
+
offset += 4;
|
|
42
|
+
}
|
|
43
|
+
// blocks (v1: string[]; v2/v3: {text, heading?, docId?, namespace?, len?}[])
|
|
44
|
+
const blocksLen = dv.getUint32(offset, true);
|
|
45
|
+
offset += 4;
|
|
46
|
+
const blocksJson = dec.decode(new Uint8Array(buf, offset, blocksLen));
|
|
47
|
+
offset += blocksLen;
|
|
48
|
+
const parsed = JSON.parse(blocksJson);
|
|
49
|
+
let blocks = [];
|
|
50
|
+
let headings;
|
|
51
|
+
let docIds;
|
|
52
|
+
let namespaces;
|
|
53
|
+
let blockTokenLens;
|
|
54
|
+
if (Array.isArray(parsed) && parsed.length && typeof parsed[0] === 'string') {
|
|
55
|
+
// v1
|
|
56
|
+
blocks = parsed;
|
|
57
|
+
}
|
|
58
|
+
else if (Array.isArray(parsed)) {
|
|
59
|
+
blocks = [];
|
|
60
|
+
headings = [];
|
|
61
|
+
docIds = [];
|
|
62
|
+
namespaces = [];
|
|
63
|
+
blockTokenLens = [];
|
|
64
|
+
for (const it of parsed) {
|
|
65
|
+
if (it && typeof it === 'object') {
|
|
66
|
+
blocks.push(String(it.text ?? ''));
|
|
67
|
+
headings.push(it.heading ?? null);
|
|
68
|
+
docIds.push(it.docId ?? null);
|
|
69
|
+
namespaces.push(it.namespace ?? null);
|
|
70
|
+
blockTokenLens.push(typeof it.len === 'number' ? it.len : 0);
|
|
71
|
+
}
|
|
72
|
+
else {
|
|
73
|
+
blocks.push(String(it ?? ''));
|
|
74
|
+
headings.push(null);
|
|
75
|
+
docIds.push(null);
|
|
76
|
+
namespaces.push(null);
|
|
77
|
+
blockTokenLens.push(0);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
else {
|
|
82
|
+
blocks = [];
|
|
83
|
+
}
|
|
84
|
+
let semantic;
|
|
85
|
+
if (offset < buf.byteLength) {
|
|
86
|
+
const semLen = dv.getUint32(offset, true);
|
|
87
|
+
offset += 4;
|
|
88
|
+
const semJson = dec.decode(new Uint8Array(buf, offset, semLen));
|
|
89
|
+
offset += semLen;
|
|
90
|
+
const sem = JSON.parse(semJson);
|
|
91
|
+
const semBlobLen = dv.getUint32(offset, true);
|
|
92
|
+
offset += 4;
|
|
93
|
+
const semBlob = new Uint8Array(buf, offset, semBlobLen);
|
|
94
|
+
semantic = parseSemanticSection(sem, semBlob);
|
|
95
|
+
}
|
|
96
|
+
return {
|
|
97
|
+
meta,
|
|
98
|
+
lexicon,
|
|
99
|
+
postings,
|
|
100
|
+
blocks,
|
|
101
|
+
headings,
|
|
102
|
+
docIds,
|
|
103
|
+
namespaces,
|
|
104
|
+
blockTokenLens,
|
|
105
|
+
semantic,
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
function parseSemanticSection(sem, blob) {
|
|
109
|
+
const vectors = sem?.blocks?.vectors;
|
|
110
|
+
const scales = sem?.blocks?.scales;
|
|
111
|
+
const vecs = new Int8Array(blob.buffer, blob.byteOffset + Number(vectors?.byteOffset ?? 0), Number(vectors?.length ?? 0));
|
|
112
|
+
let scaleView;
|
|
113
|
+
if (scales) {
|
|
114
|
+
const scaleLen = Number(scales.length ?? 0);
|
|
115
|
+
const scaleOffset = Number(scales.byteOffset ?? 0);
|
|
116
|
+
const dv = new DataView(blob.buffer, blob.byteOffset + scaleOffset, scaleLen * 2);
|
|
117
|
+
scaleView = new Uint16Array(scaleLen);
|
|
118
|
+
for (let i = 0; i < scaleLen; i++) {
|
|
119
|
+
scaleView[i] = dv.getUint16(i * 2, true);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
return {
|
|
123
|
+
version: 1,
|
|
124
|
+
modelId: String(sem?.modelId ?? ''),
|
|
125
|
+
dims: Number(sem?.dims ?? 0),
|
|
126
|
+
encoding: 'int8_l2norm',
|
|
127
|
+
perVectorScale: Boolean(sem?.perVectorScale),
|
|
128
|
+
vecs,
|
|
129
|
+
scales: scaleView,
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
async function resolveToBuffer(src) {
|
|
133
|
+
if (typeof src === 'string') {
|
|
134
|
+
if (isNodeRuntime() && isLikelyLocalPath(src)) {
|
|
135
|
+
return await readLocalFileAsBuffer(src);
|
|
136
|
+
}
|
|
137
|
+
const res = await fetch(src);
|
|
138
|
+
return await res.arrayBuffer();
|
|
139
|
+
}
|
|
140
|
+
if (src instanceof Uint8Array) {
|
|
141
|
+
if (src.byteOffset === 0 && src.byteLength === src.buffer.byteLength) {
|
|
142
|
+
return src.buffer;
|
|
143
|
+
}
|
|
144
|
+
const copy = src.slice();
|
|
145
|
+
return copy.buffer;
|
|
146
|
+
}
|
|
147
|
+
return src;
|
|
148
|
+
}
|
|
149
|
+
function isNodeRuntime() {
|
|
150
|
+
const p = globalThis
|
|
151
|
+
.process;
|
|
152
|
+
return !!p?.versions?.node;
|
|
153
|
+
}
|
|
154
|
+
function isLikelyLocalPath(value) {
|
|
155
|
+
if (value.startsWith('file://'))
|
|
156
|
+
return true;
|
|
157
|
+
if (value.startsWith('./') ||
|
|
158
|
+
value.startsWith('../') ||
|
|
159
|
+
value.startsWith('/') ||
|
|
160
|
+
value.startsWith('~'))
|
|
161
|
+
return true;
|
|
162
|
+
if (/^[A-Za-z]:[\\/]/.test(value))
|
|
163
|
+
return true; // Windows absolute path
|
|
164
|
+
if (/^[A-Za-z][A-Za-z\d+.-]*:/.test(value))
|
|
165
|
+
return false; // URL scheme
|
|
166
|
+
return true; // plain relative path like "knowledge.knolo"
|
|
167
|
+
}
|
|
168
|
+
async function readLocalFileAsBuffer(pathOrFileUrl) {
|
|
169
|
+
const { readFile } = await import('node:fs/promises');
|
|
170
|
+
const filePath = pathOrFileUrl.startsWith('file://')
|
|
171
|
+
? decodeURIComponent(new URL(pathOrFileUrl).pathname)
|
|
172
|
+
: pathOrFileUrl;
|
|
173
|
+
const data = await readFile(filePath);
|
|
174
|
+
return data.buffer.slice(data.byteOffset, data.byteOffset + data.byteLength);
|
|
175
|
+
}
|
package/dist/patch.d.ts
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import type { Hit } from './query.js';
|
|
2
|
+
export type ContextPatch = {
|
|
3
|
+
background: string[];
|
|
4
|
+
snippets: Array<{
|
|
5
|
+
text: string;
|
|
6
|
+
source?: string;
|
|
7
|
+
}>;
|
|
8
|
+
definitions: Array<{
|
|
9
|
+
term: string;
|
|
10
|
+
def: string;
|
|
11
|
+
evidence?: number[];
|
|
12
|
+
}>;
|
|
13
|
+
facts: Array<{
|
|
14
|
+
s: string;
|
|
15
|
+
p: string;
|
|
16
|
+
o: string;
|
|
17
|
+
evidence?: number[];
|
|
18
|
+
}>;
|
|
19
|
+
};
|
|
20
|
+
export declare function makeContextPatch(hits: Hit[], opts?: {
|
|
21
|
+
budget?: 'mini' | 'small' | 'full';
|
|
22
|
+
}): ContextPatch;
|
package/dist/patch.js
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* patch.ts
|
|
3
|
+
*
|
|
4
|
+
* Produces a compact, deterministic “context patch” from ranked hits.
|
|
5
|
+
*/
|
|
6
|
+
export function makeContextPatch(hits, opts = {}) {
|
|
7
|
+
const budget = opts.budget ?? 'small';
|
|
8
|
+
const limits = {
|
|
9
|
+
mini: { snippets: 3, chars: 240 },
|
|
10
|
+
small: { snippets: 6, chars: 420 },
|
|
11
|
+
full: { snippets: 10, chars: 900 },
|
|
12
|
+
};
|
|
13
|
+
const limit = limits[budget];
|
|
14
|
+
const snippets = hits.slice(0, limit.snippets).map((h) => ({
|
|
15
|
+
text: truncate(h.text, limit.chars),
|
|
16
|
+
source: h.source,
|
|
17
|
+
}));
|
|
18
|
+
// Build background summary from first two snippets by extracting first sentence
|
|
19
|
+
const background = snippets.slice(0, 2).map((s) => firstSentence(s.text));
|
|
20
|
+
return {
|
|
21
|
+
background,
|
|
22
|
+
snippets,
|
|
23
|
+
definitions: [],
|
|
24
|
+
facts: [],
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
function firstSentence(text) {
|
|
28
|
+
const m = text.match(/^(.{10,200}?[.!?])\s/);
|
|
29
|
+
if (m)
|
|
30
|
+
return m[1];
|
|
31
|
+
return text.slice(0, 160);
|
|
32
|
+
}
|
|
33
|
+
function truncate(text, maxChars) {
|
|
34
|
+
return text.length > maxChars ? text.slice(0, maxChars) + '…' : text;
|
|
35
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
export type HitLike = {
|
|
2
|
+
blockId: number;
|
|
3
|
+
score: number;
|
|
4
|
+
text: string;
|
|
5
|
+
source?: string;
|
|
6
|
+
};
|
|
7
|
+
export type DiversifyOptions = {
|
|
8
|
+
k: number;
|
|
9
|
+
lambda?: number;
|
|
10
|
+
simThreshold?: number;
|
|
11
|
+
sim?: (a: HitLike, b: HitLike) => number;
|
|
12
|
+
};
|
|
13
|
+
export declare function diversifyAndDedupe(hits: HitLike[], opts: DiversifyOptions): HitLike[];
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
// src/quality/diversify.ts
|
|
2
|
+
import { jaccard5 } from './similarity.js';
|
|
3
|
+
export function diversifyAndDedupe(hits, opts) {
|
|
4
|
+
const { k, lambda = 0.8, simThreshold = 0.92, sim = (a, b) => jaccard5(a.text, b.text) } = opts;
|
|
5
|
+
const pool = [...hits].sort((a, b) => b.score - a.score);
|
|
6
|
+
const kept = [];
|
|
7
|
+
while (pool.length && kept.length < k) {
|
|
8
|
+
// compute MMR for current pool against kept
|
|
9
|
+
let bestIdx = 0;
|
|
10
|
+
let bestMMR = -Infinity;
|
|
11
|
+
for (let i = 0; i < pool.length; i++) {
|
|
12
|
+
const h = pool[i];
|
|
13
|
+
let maxSim = 0;
|
|
14
|
+
for (const s of kept) {
|
|
15
|
+
const v = sim(h, s);
|
|
16
|
+
if (v > maxSim)
|
|
17
|
+
maxSim = v;
|
|
18
|
+
if (v >= simThreshold) {
|
|
19
|
+
maxSim = v;
|
|
20
|
+
break;
|
|
21
|
+
} // early out
|
|
22
|
+
}
|
|
23
|
+
// skip near-duplicates
|
|
24
|
+
if (maxSim >= simThreshold)
|
|
25
|
+
continue;
|
|
26
|
+
const mmr = lambda * h.score - (1 - lambda) * maxSim;
|
|
27
|
+
if (mmr > bestMMR) {
|
|
28
|
+
bestMMR = mmr;
|
|
29
|
+
bestIdx = i;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
// if everything was a near-duplicate, just take the next best by score
|
|
33
|
+
const pick = pool.splice(bestMMR === -Infinity ? 0 : bestIdx, 1)[0];
|
|
34
|
+
if (!pick)
|
|
35
|
+
break;
|
|
36
|
+
// final dedupe check before push
|
|
37
|
+
if (!kept.some((x) => sim(x, pick) >= simThreshold))
|
|
38
|
+
kept.push(pick);
|
|
39
|
+
}
|
|
40
|
+
return kept;
|
|
41
|
+
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
// src/quality/proximity.ts
|
|
2
|
+
// Map<termId, positions[]>
|
|
3
|
+
export function minCoverSpan(posMap) {
|
|
4
|
+
const lists = posMap ? [...posMap.values()].map(arr => arr.slice().sort((a, b) => a - b)) : [];
|
|
5
|
+
if (lists.length === 0)
|
|
6
|
+
return null;
|
|
7
|
+
const idx = new Array(lists.length).fill(0);
|
|
8
|
+
let best = null;
|
|
9
|
+
while (true) {
|
|
10
|
+
const cur = [];
|
|
11
|
+
for (let i = 0; i < lists.length; i++) {
|
|
12
|
+
const val = lists[i][idx[i]];
|
|
13
|
+
if (val === undefined)
|
|
14
|
+
return best;
|
|
15
|
+
cur.push(val);
|
|
16
|
+
}
|
|
17
|
+
const min = Math.min(...cur);
|
|
18
|
+
const max = Math.max(...cur);
|
|
19
|
+
const span = max - min;
|
|
20
|
+
if (best === null || span < best)
|
|
21
|
+
best = span;
|
|
22
|
+
// advance list with current min
|
|
23
|
+
const minList = cur.indexOf(min);
|
|
24
|
+
idx[minList]++;
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
export function proximityMultiplier(span, strength = 0.15) {
|
|
28
|
+
if (span === null)
|
|
29
|
+
return 1;
|
|
30
|
+
return 1 + strength / (1 + span); // gentle, bounded
|
|
31
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
// src/quality/signature.ts
|
|
2
|
+
// "KNS" — simple, deterministic lexical numeric signature for tie-breaking.
|
|
3
|
+
const PRIMES = [257, 263, 269];
|
|
4
|
+
export function knsSignature(s) {
|
|
5
|
+
let s1 = 0, s2 = 0, s3 = 0;
|
|
6
|
+
for (let i = 0; i < s.length; i++) {
|
|
7
|
+
const code = s.charCodeAt(i);
|
|
8
|
+
s1 = (s1 + code) % PRIMES[0];
|
|
9
|
+
s2 = (s2 + code * (i + 1)) % PRIMES[1];
|
|
10
|
+
s3 = (s3 + ((code << 1) ^ (i + 7))) % PRIMES[2];
|
|
11
|
+
}
|
|
12
|
+
return [s1, s2, s3];
|
|
13
|
+
}
|
|
14
|
+
export function knsDistance(a, b) {
|
|
15
|
+
// circular distance on a mod prime, averaged & normalized to 0..1
|
|
16
|
+
let acc = 0;
|
|
17
|
+
for (let i = 0; i < PRIMES.length; i++) {
|
|
18
|
+
const p = PRIMES[i];
|
|
19
|
+
const diff = Math.abs(a[i] - b[i]);
|
|
20
|
+
const circ = Math.min(diff, p - diff) / p;
|
|
21
|
+
acc += circ;
|
|
22
|
+
}
|
|
23
|
+
return acc / PRIMES.length;
|
|
24
|
+
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
// src/quality/similarity.ts
|
|
2
|
+
import { normalize } from '../tokenize.js';
|
|
3
|
+
export function ngramSet(s, n = 5) {
|
|
4
|
+
const t = normalize(s);
|
|
5
|
+
const out = new Set();
|
|
6
|
+
if (t.length < n) {
|
|
7
|
+
if (t)
|
|
8
|
+
out.add(t);
|
|
9
|
+
return out;
|
|
10
|
+
}
|
|
11
|
+
for (let i = 0; i <= t.length - n; i++)
|
|
12
|
+
out.add(t.slice(i, i + n));
|
|
13
|
+
return out;
|
|
14
|
+
}
|
|
15
|
+
export function jaccardFromSets(a, b) {
|
|
16
|
+
if (a.size === 0 && b.size === 0)
|
|
17
|
+
return 1;
|
|
18
|
+
let inter = 0;
|
|
19
|
+
for (const x of a)
|
|
20
|
+
if (b.has(x))
|
|
21
|
+
inter++;
|
|
22
|
+
const uni = a.size + b.size - inter;
|
|
23
|
+
return uni ? inter / uni : 0;
|
|
24
|
+
}
|
|
25
|
+
export function jaccard5(s1, s2) {
|
|
26
|
+
return jaccardFromSets(ngramSet(s1, 5), ngramSet(s2, 5));
|
|
27
|
+
}
|
package/dist/query.d.ts
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import type { Pack } from "./pack.js";
|
|
2
|
+
export type QueryOptions = {
|
|
3
|
+
topK?: number;
|
|
4
|
+
minScore?: number;
|
|
5
|
+
requirePhrases?: string[];
|
|
6
|
+
namespace?: string | string[];
|
|
7
|
+
source?: string | string[];
|
|
8
|
+
queryExpansion?: {
|
|
9
|
+
enabled?: boolean;
|
|
10
|
+
docs?: number;
|
|
11
|
+
terms?: number;
|
|
12
|
+
weight?: number;
|
|
13
|
+
minTermLength?: number;
|
|
14
|
+
};
|
|
15
|
+
semantic?: {
|
|
16
|
+
enabled?: boolean;
|
|
17
|
+
mode?: "rerank";
|
|
18
|
+
topN?: number;
|
|
19
|
+
minLexConfidence?: number;
|
|
20
|
+
blend?: {
|
|
21
|
+
enabled?: boolean;
|
|
22
|
+
wLex?: number;
|
|
23
|
+
wSem?: number;
|
|
24
|
+
};
|
|
25
|
+
queryEmbedding?: Float32Array;
|
|
26
|
+
force?: boolean;
|
|
27
|
+
};
|
|
28
|
+
};
|
|
29
|
+
export declare function validateQueryOptions(opts?: QueryOptions): void;
|
|
30
|
+
export declare function validateSemanticQueryOptions(options?: QueryOptions["semantic"]): void;
|
|
31
|
+
export type Hit = {
|
|
32
|
+
blockId: number;
|
|
33
|
+
score: number;
|
|
34
|
+
text: string;
|
|
35
|
+
source?: string;
|
|
36
|
+
namespace?: string;
|
|
37
|
+
};
|
|
38
|
+
export declare function query(pack: Pack, q: string, opts?: QueryOptions): Hit[];
|
|
39
|
+
export declare function lexConfidence(hits: Array<{
|
|
40
|
+
score: number;
|
|
41
|
+
}>): number;
|