@knolo/core 3.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/pack.js ADDED
@@ -0,0 +1,175 @@
1
+ /*
2
+ * pack.ts
3
+ *
4
+ * Mount `.knolo` packs across Node, browsers, and RN/Expo. Tolerant of:
5
+ * - blocks as string[] (v1) or object[] with { text, heading?, docId?, namespace?, len? }
6
+ * - meta.stats.avgBlockLen (optional)
7
+ * Includes RN/Expo-safe TextDecoder via ponyfill.
8
+ */
9
+ import { getTextDecoder } from './utils/utf8.js';
10
+ import { validateAgentRegistry } from './agent.js';
11
+ export function hasSemantic(pack) {
12
+ return Boolean(pack.semantic && pack.semantic.dims > 0 && pack.semantic.vecs.length > 0);
13
+ }
14
+ export async function mountPack(opts) {
15
+ const buf = await resolveToBuffer(opts.src);
16
+ const dv = new DataView(buf);
17
+ const dec = getTextDecoder();
18
+ let offset = 0;
19
+ // meta
20
+ const metaLen = dv.getUint32(offset, true);
21
+ offset += 4;
22
+ const metaJson = dec.decode(new Uint8Array(buf, offset, metaLen));
23
+ offset += metaLen;
24
+ const meta = JSON.parse(metaJson);
25
+ if (meta.agents) {
26
+ validateAgentRegistry(meta.agents);
27
+ }
28
+ // lexicon
29
+ const lexLen = dv.getUint32(offset, true);
30
+ offset += 4;
31
+ const lexJson = dec.decode(new Uint8Array(buf, offset, lexLen));
32
+ offset += lexLen;
33
+ const lexEntries = JSON.parse(lexJson);
34
+ const lexicon = new Map(lexEntries);
35
+ // postings
36
+ const postCount = dv.getUint32(offset, true);
37
+ offset += 4;
38
+ const postings = new Uint32Array(postCount);
39
+ for (let i = 0; i < postCount; i++) {
40
+ postings[i] = dv.getUint32(offset, true);
41
+ offset += 4;
42
+ }
43
+ // blocks (v1: string[]; v2/v3: {text, heading?, docId?, namespace?, len?}[])
44
+ const blocksLen = dv.getUint32(offset, true);
45
+ offset += 4;
46
+ const blocksJson = dec.decode(new Uint8Array(buf, offset, blocksLen));
47
+ offset += blocksLen;
48
+ const parsed = JSON.parse(blocksJson);
49
+ let blocks = [];
50
+ let headings;
51
+ let docIds;
52
+ let namespaces;
53
+ let blockTokenLens;
54
+ if (Array.isArray(parsed) && parsed.length && typeof parsed[0] === 'string') {
55
+ // v1
56
+ blocks = parsed;
57
+ }
58
+ else if (Array.isArray(parsed)) {
59
+ blocks = [];
60
+ headings = [];
61
+ docIds = [];
62
+ namespaces = [];
63
+ blockTokenLens = [];
64
+ for (const it of parsed) {
65
+ if (it && typeof it === 'object') {
66
+ blocks.push(String(it.text ?? ''));
67
+ headings.push(it.heading ?? null);
68
+ docIds.push(it.docId ?? null);
69
+ namespaces.push(it.namespace ?? null);
70
+ blockTokenLens.push(typeof it.len === 'number' ? it.len : 0);
71
+ }
72
+ else {
73
+ blocks.push(String(it ?? ''));
74
+ headings.push(null);
75
+ docIds.push(null);
76
+ namespaces.push(null);
77
+ blockTokenLens.push(0);
78
+ }
79
+ }
80
+ }
81
+ else {
82
+ blocks = [];
83
+ }
84
+ let semantic;
85
+ if (offset < buf.byteLength) {
86
+ const semLen = dv.getUint32(offset, true);
87
+ offset += 4;
88
+ const semJson = dec.decode(new Uint8Array(buf, offset, semLen));
89
+ offset += semLen;
90
+ const sem = JSON.parse(semJson);
91
+ const semBlobLen = dv.getUint32(offset, true);
92
+ offset += 4;
93
+ const semBlob = new Uint8Array(buf, offset, semBlobLen);
94
+ semantic = parseSemanticSection(sem, semBlob);
95
+ }
96
+ return {
97
+ meta,
98
+ lexicon,
99
+ postings,
100
+ blocks,
101
+ headings,
102
+ docIds,
103
+ namespaces,
104
+ blockTokenLens,
105
+ semantic,
106
+ };
107
+ }
108
+ function parseSemanticSection(sem, blob) {
109
+ const vectors = sem?.blocks?.vectors;
110
+ const scales = sem?.blocks?.scales;
111
+ const vecs = new Int8Array(blob.buffer, blob.byteOffset + Number(vectors?.byteOffset ?? 0), Number(vectors?.length ?? 0));
112
+ let scaleView;
113
+ if (scales) {
114
+ const scaleLen = Number(scales.length ?? 0);
115
+ const scaleOffset = Number(scales.byteOffset ?? 0);
116
+ const dv = new DataView(blob.buffer, blob.byteOffset + scaleOffset, scaleLen * 2);
117
+ scaleView = new Uint16Array(scaleLen);
118
+ for (let i = 0; i < scaleLen; i++) {
119
+ scaleView[i] = dv.getUint16(i * 2, true);
120
+ }
121
+ }
122
+ return {
123
+ version: 1,
124
+ modelId: String(sem?.modelId ?? ''),
125
+ dims: Number(sem?.dims ?? 0),
126
+ encoding: 'int8_l2norm',
127
+ perVectorScale: Boolean(sem?.perVectorScale),
128
+ vecs,
129
+ scales: scaleView,
130
+ };
131
+ }
132
+ async function resolveToBuffer(src) {
133
+ if (typeof src === 'string') {
134
+ if (isNodeRuntime() && isLikelyLocalPath(src)) {
135
+ return await readLocalFileAsBuffer(src);
136
+ }
137
+ const res = await fetch(src);
138
+ return await res.arrayBuffer();
139
+ }
140
+ if (src instanceof Uint8Array) {
141
+ if (src.byteOffset === 0 && src.byteLength === src.buffer.byteLength) {
142
+ return src.buffer;
143
+ }
144
+ const copy = src.slice();
145
+ return copy.buffer;
146
+ }
147
+ return src;
148
+ }
149
+ function isNodeRuntime() {
150
+ const p = globalThis
151
+ .process;
152
+ return !!p?.versions?.node;
153
+ }
154
+ function isLikelyLocalPath(value) {
155
+ if (value.startsWith('file://'))
156
+ return true;
157
+ if (value.startsWith('./') ||
158
+ value.startsWith('../') ||
159
+ value.startsWith('/') ||
160
+ value.startsWith('~'))
161
+ return true;
162
+ if (/^[A-Za-z]:[\\/]/.test(value))
163
+ return true; // Windows absolute path
164
+ if (/^[A-Za-z][A-Za-z\d+.-]*:/.test(value))
165
+ return false; // URL scheme
166
+ return true; // plain relative path like "knowledge.knolo"
167
+ }
168
+ async function readLocalFileAsBuffer(pathOrFileUrl) {
169
+ const { readFile } = await import('node:fs/promises');
170
+ const filePath = pathOrFileUrl.startsWith('file://')
171
+ ? decodeURIComponent(new URL(pathOrFileUrl).pathname)
172
+ : pathOrFileUrl;
173
+ const data = await readFile(filePath);
174
+ return data.buffer.slice(data.byteOffset, data.byteOffset + data.byteLength);
175
+ }
@@ -0,0 +1,22 @@
1
+ import type { Hit } from './query.js';
2
+ export type ContextPatch = {
3
+ background: string[];
4
+ snippets: Array<{
5
+ text: string;
6
+ source?: string;
7
+ }>;
8
+ definitions: Array<{
9
+ term: string;
10
+ def: string;
11
+ evidence?: number[];
12
+ }>;
13
+ facts: Array<{
14
+ s: string;
15
+ p: string;
16
+ o: string;
17
+ evidence?: number[];
18
+ }>;
19
+ };
20
+ export declare function makeContextPatch(hits: Hit[], opts?: {
21
+ budget?: 'mini' | 'small' | 'full';
22
+ }): ContextPatch;
package/dist/patch.js ADDED
@@ -0,0 +1,35 @@
1
+ /*
2
+ * patch.ts
3
+ *
4
+ * Produces a compact, deterministic “context patch” from ranked hits.
5
+ */
6
+ export function makeContextPatch(hits, opts = {}) {
7
+ const budget = opts.budget ?? 'small';
8
+ const limits = {
9
+ mini: { snippets: 3, chars: 240 },
10
+ small: { snippets: 6, chars: 420 },
11
+ full: { snippets: 10, chars: 900 },
12
+ };
13
+ const limit = limits[budget];
14
+ const snippets = hits.slice(0, limit.snippets).map((h) => ({
15
+ text: truncate(h.text, limit.chars),
16
+ source: h.source,
17
+ }));
18
+ // Build background summary from first two snippets by extracting first sentence
19
+ const background = snippets.slice(0, 2).map((s) => firstSentence(s.text));
20
+ return {
21
+ background,
22
+ snippets,
23
+ definitions: [],
24
+ facts: [],
25
+ };
26
+ }
27
+ function firstSentence(text) {
28
+ const m = text.match(/^(.{10,200}?[.!?])\s/);
29
+ if (m)
30
+ return m[1];
31
+ return text.slice(0, 160);
32
+ }
33
+ function truncate(text, maxChars) {
34
+ return text.length > maxChars ? text.slice(0, maxChars) + '…' : text;
35
+ }
@@ -0,0 +1,13 @@
1
+ export type HitLike = {
2
+ blockId: number;
3
+ score: number;
4
+ text: string;
5
+ source?: string;
6
+ };
7
+ export type DiversifyOptions = {
8
+ k: number;
9
+ lambda?: number;
10
+ simThreshold?: number;
11
+ sim?: (a: HitLike, b: HitLike) => number;
12
+ };
13
+ export declare function diversifyAndDedupe(hits: HitLike[], opts: DiversifyOptions): HitLike[];
@@ -0,0 +1,41 @@
1
+ // src/quality/diversify.ts
2
+ import { jaccard5 } from './similarity.js';
3
+ export function diversifyAndDedupe(hits, opts) {
4
+ const { k, lambda = 0.8, simThreshold = 0.92, sim = (a, b) => jaccard5(a.text, b.text) } = opts;
5
+ const pool = [...hits].sort((a, b) => b.score - a.score);
6
+ const kept = [];
7
+ while (pool.length && kept.length < k) {
8
+ // compute MMR for current pool against kept
9
+ let bestIdx = 0;
10
+ let bestMMR = -Infinity;
11
+ for (let i = 0; i < pool.length; i++) {
12
+ const h = pool[i];
13
+ let maxSim = 0;
14
+ for (const s of kept) {
15
+ const v = sim(h, s);
16
+ if (v > maxSim)
17
+ maxSim = v;
18
+ if (v >= simThreshold) {
19
+ maxSim = v;
20
+ break;
21
+ } // early out
22
+ }
23
+ // skip near-duplicates
24
+ if (maxSim >= simThreshold)
25
+ continue;
26
+ const mmr = lambda * h.score - (1 - lambda) * maxSim;
27
+ if (mmr > bestMMR) {
28
+ bestMMR = mmr;
29
+ bestIdx = i;
30
+ }
31
+ }
32
+ // if everything was a near-duplicate, just take the next best by score
33
+ const pick = pool.splice(bestMMR === -Infinity ? 0 : bestIdx, 1)[0];
34
+ if (!pick)
35
+ break;
36
+ // final dedupe check before push
37
+ if (!kept.some((x) => sim(x, pick) >= simThreshold))
38
+ kept.push(pick);
39
+ }
40
+ return kept;
41
+ }
@@ -0,0 +1,2 @@
1
+ export declare function minCoverSpan(posMap?: Map<number, number[]>): number | null;
2
+ export declare function proximityMultiplier(span: number | null, strength?: number): number;
@@ -0,0 +1,31 @@
1
+ // src/quality/proximity.ts
2
+ // Map<termId, positions[]>
3
+ export function minCoverSpan(posMap) {
4
+ const lists = posMap ? [...posMap.values()].map(arr => arr.slice().sort((a, b) => a - b)) : [];
5
+ if (lists.length === 0)
6
+ return null;
7
+ const idx = new Array(lists.length).fill(0);
8
+ let best = null;
9
+ while (true) {
10
+ const cur = [];
11
+ for (let i = 0; i < lists.length; i++) {
12
+ const val = lists[i][idx[i]];
13
+ if (val === undefined)
14
+ return best;
15
+ cur.push(val);
16
+ }
17
+ const min = Math.min(...cur);
18
+ const max = Math.max(...cur);
19
+ const span = max - min;
20
+ if (best === null || span < best)
21
+ best = span;
22
+ // advance list with current min
23
+ const minList = cur.indexOf(min);
24
+ idx[minList]++;
25
+ }
26
+ }
27
+ export function proximityMultiplier(span, strength = 0.15) {
28
+ if (span === null)
29
+ return 1;
30
+ return 1 + strength / (1 + span); // gentle, bounded
31
+ }
@@ -0,0 +1,3 @@
1
+ export type KNSSignature = [number, number, number];
2
+ export declare function knsSignature(s: string): KNSSignature;
3
+ export declare function knsDistance(a: KNSSignature, b: KNSSignature): number;
@@ -0,0 +1,24 @@
1
+ // src/quality/signature.ts
2
+ // "KNS" — simple, deterministic lexical numeric signature for tie-breaking.
3
+ const PRIMES = [257, 263, 269];
4
+ export function knsSignature(s) {
5
+ let s1 = 0, s2 = 0, s3 = 0;
6
+ for (let i = 0; i < s.length; i++) {
7
+ const code = s.charCodeAt(i);
8
+ s1 = (s1 + code) % PRIMES[0];
9
+ s2 = (s2 + code * (i + 1)) % PRIMES[1];
10
+ s3 = (s3 + ((code << 1) ^ (i + 7))) % PRIMES[2];
11
+ }
12
+ return [s1, s2, s3];
13
+ }
14
+ export function knsDistance(a, b) {
15
+ // circular distance on a mod prime, averaged & normalized to 0..1
16
+ let acc = 0;
17
+ for (let i = 0; i < PRIMES.length; i++) {
18
+ const p = PRIMES[i];
19
+ const diff = Math.abs(a[i] - b[i]);
20
+ const circ = Math.min(diff, p - diff) / p;
21
+ acc += circ;
22
+ }
23
+ return acc / PRIMES.length;
24
+ }
@@ -0,0 +1,3 @@
1
+ export declare function ngramSet(s: string, n?: number): Set<string>;
2
+ export declare function jaccardFromSets(a: Set<string>, b: Set<string>): number;
3
+ export declare function jaccard5(s1: string, s2: string): number;
@@ -0,0 +1,27 @@
1
+ // src/quality/similarity.ts
2
+ import { normalize } from '../tokenize.js';
3
+ export function ngramSet(s, n = 5) {
4
+ const t = normalize(s);
5
+ const out = new Set();
6
+ if (t.length < n) {
7
+ if (t)
8
+ out.add(t);
9
+ return out;
10
+ }
11
+ for (let i = 0; i <= t.length - n; i++)
12
+ out.add(t.slice(i, i + n));
13
+ return out;
14
+ }
15
+ export function jaccardFromSets(a, b) {
16
+ if (a.size === 0 && b.size === 0)
17
+ return 1;
18
+ let inter = 0;
19
+ for (const x of a)
20
+ if (b.has(x))
21
+ inter++;
22
+ const uni = a.size + b.size - inter;
23
+ return uni ? inter / uni : 0;
24
+ }
25
+ export function jaccard5(s1, s2) {
26
+ return jaccardFromSets(ngramSet(s1, 5), ngramSet(s2, 5));
27
+ }
@@ -0,0 +1,41 @@
1
+ import type { Pack } from "./pack.js";
2
+ export type QueryOptions = {
3
+ topK?: number;
4
+ minScore?: number;
5
+ requirePhrases?: string[];
6
+ namespace?: string | string[];
7
+ source?: string | string[];
8
+ queryExpansion?: {
9
+ enabled?: boolean;
10
+ docs?: number;
11
+ terms?: number;
12
+ weight?: number;
13
+ minTermLength?: number;
14
+ };
15
+ semantic?: {
16
+ enabled?: boolean;
17
+ mode?: "rerank";
18
+ topN?: number;
19
+ minLexConfidence?: number;
20
+ blend?: {
21
+ enabled?: boolean;
22
+ wLex?: number;
23
+ wSem?: number;
24
+ };
25
+ queryEmbedding?: Float32Array;
26
+ force?: boolean;
27
+ };
28
+ };
29
+ export declare function validateQueryOptions(opts?: QueryOptions): void;
30
+ export declare function validateSemanticQueryOptions(options?: QueryOptions["semantic"]): void;
31
+ export type Hit = {
32
+ blockId: number;
33
+ score: number;
34
+ text: string;
35
+ source?: string;
36
+ namespace?: string;
37
+ };
38
+ export declare function query(pack: Pack, q: string, opts?: QueryOptions): Hit[];
39
+ export declare function lexConfidence(hits: Array<{
40
+ score: number;
41
+ }>): number;