@knolo/core 3.1.3 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +240 -88
- package/dist/index.d.ts +2 -2
- package/dist/index.js +1 -1
- package/dist/node.d.ts +2 -0
- package/dist/node.js +1 -0
- package/dist/pack.d.ts +2 -35
- package/dist/pack.js +1 -175
- package/dist/pack.node.d.ts +4 -0
- package/dist/pack.node.js +35 -0
- package/dist/pack.runtime.d.ts +37 -0
- package/dist/pack.runtime.js +146 -0
- package/package.json +11 -4
package/README.md
CHANGED
|
@@ -1,176 +1,328 @@
|
|
|
1
|
+
# 📦 `@knolo/core`
|
|
1
2
|
|
|
3
|
+
`@knolo/core` is the **deterministic retrieval engine and pack runtime** behind Knolo.
|
|
2
4
|
|
|
3
|
-
|
|
5
|
+
It lets you:
|
|
6
|
+
|
|
7
|
+
* Build structured knowledge packs
|
|
8
|
+
* Mount portable `.knolo` artifacts
|
|
9
|
+
* Run deterministic lexical retrieval
|
|
10
|
+
* Optionally apply hybrid semantic reranking
|
|
11
|
+
* Enforce strict runtime contracts for advanced workflows
|
|
12
|
+
|
|
13
|
+
No vector database required.
|
|
14
|
+
No cloud dependency required.
|
|
15
|
+
Works fully offline.
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
# 🧠 What It Is
|
|
20
|
+
|
|
21
|
+
`@knolo/core` is **not**:
|
|
4
22
|
|
|
5
|
-
|
|
23
|
+
* A vector database wrapper
|
|
24
|
+
* A hosted RAG service
|
|
25
|
+
* A probabilistic similarity engine
|
|
6
26
|
|
|
7
|
-
|
|
27
|
+
It is:
|
|
8
28
|
|
|
9
|
-
|
|
29
|
+
* A versioned binary pack format
|
|
30
|
+
* A deterministic lexical retrieval engine
|
|
31
|
+
* An optional semantic rerank layer
|
|
32
|
+
* A portable knowledge runtime
|
|
10
33
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
- Deterministic AI systems
|
|
14
|
-
- Agent routing
|
|
15
|
-
- Air-gapped or privacy-first environments
|
|
34
|
+
You build once.
|
|
35
|
+
You mount anywhere — Node, browser, React Native, serverless, offline.
|
|
16
36
|
|
|
17
37
|
---
|
|
18
38
|
|
|
19
|
-
|
|
39
|
+
# 📊 Retrieval Characteristics
|
|
20
40
|
|
|
21
|
-
|
|
22
|
-
- Embeddings
|
|
23
|
-
- Vector databases
|
|
24
|
-
- External services
|
|
25
|
-
- Non-deterministic similarity scoring
|
|
41
|
+
Lexical retrieval is:
|
|
26
42
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
- Compact `.knolo` bundles
|
|
43
|
+
* Deterministic
|
|
44
|
+
* Reproducible
|
|
45
|
+
* Stable across runs
|
|
46
|
+
* Independent of embeddings
|
|
32
47
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
-
|
|
48
|
+
Hybrid reranking is:
|
|
49
|
+
|
|
50
|
+
* Optional
|
|
51
|
+
* Deterministic for fixed vectors
|
|
52
|
+
* Lexical-first (semantic never replaces grounding)
|
|
53
|
+
|
|
54
|
+
In benchmark testing (March 2026):
|
|
55
|
+
|
|
56
|
+
* **Recall@5:** 1.000
|
|
57
|
+
* **MRR@5:** 0.867
|
|
58
|
+
* **nDCG@5:** 0.900
|
|
59
|
+
|
|
60
|
+
Strong ranking quality without requiring a vector database.
|
|
38
61
|
|
|
39
62
|
---
|
|
40
63
|
|
|
41
|
-
|
|
64
|
+
# 📦 Installation
|
|
42
65
|
|
|
43
66
|
```bash
|
|
44
67
|
npm install @knolo/core
|
|
45
|
-
|
|
68
|
+
```
|
|
46
69
|
|
|
47
70
|
---
|
|
48
71
|
|
|
49
|
-
|
|
72
|
+
# 🚀 Core Concepts
|
|
50
73
|
|
|
51
|
-
|
|
74
|
+
## 1️⃣ Build a Pack
|
|
75
|
+
|
|
76
|
+
```ts
|
|
77
|
+
import { buildPack } from "@knolo/core";
|
|
78
|
+
|
|
79
|
+
const bytes = await buildPack(docs, {
|
|
80
|
+
semantic: {
|
|
81
|
+
enabled: false
|
|
82
|
+
}
|
|
83
|
+
});
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
`buildPack` produces a versioned `.knolo` binary artifact.
|
|
87
|
+
|
|
88
|
+
You can write it to disk or store it in object storage.
|
|
89
|
+
|
|
90
|
+
---
|
|
91
|
+
|
|
92
|
+
## 2️⃣ Mount a Pack
|
|
93
|
+
|
|
94
|
+
### Node.js (local path convenience)
|
|
95
|
+
|
|
96
|
+
```ts
|
|
97
|
+
import { mountPack } from "@knolo/core/node";
|
|
98
|
+
|
|
99
|
+
const pack = await mountPack({
|
|
100
|
+
src: "./dist/knowledge.knolo"
|
|
101
|
+
});
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
### React Native / Expo (URL or bytes)
|
|
52
105
|
|
|
53
106
|
```ts
|
|
54
107
|
import { mountPack } from "@knolo/core";
|
|
55
108
|
|
|
56
|
-
const
|
|
109
|
+
const ab = await (await fetch(PACK_URL)).arrayBuffer();
|
|
110
|
+
const pack = await mountPack({ src: new Uint8Array(ab) });
|
|
57
111
|
```
|
|
58
112
|
|
|
113
|
+
You can mount from:
|
|
114
|
+
|
|
115
|
+
* URL string (runtime-safe entry)
|
|
116
|
+
* Buffer / Uint8Array
|
|
117
|
+
* Local file path in Node via `@knolo/core/node`
|
|
118
|
+
* Object storage download
|
|
119
|
+
|
|
120
|
+
Mount-time validation ensures:
|
|
121
|
+
|
|
122
|
+
* Pack version compatibility
|
|
123
|
+
* Metadata integrity
|
|
124
|
+
* Optional agent registry validation
|
|
125
|
+
|
|
59
126
|
---
|
|
60
127
|
|
|
61
|
-
|
|
128
|
+
## 3️⃣ Query (Deterministic Lexical Retrieval)
|
|
62
129
|
|
|
63
130
|
```ts
|
|
64
131
|
import { query } from "@knolo/core";
|
|
65
132
|
|
|
66
|
-
const
|
|
67
|
-
|
|
68
|
-
q: "debounce vs throttle"
|
|
133
|
+
const hits = query(pack, "debounce vs throttle", {
|
|
134
|
+
topK: 5
|
|
69
135
|
});
|
|
70
136
|
|
|
71
|
-
|
|
137
|
+
for (const hit of hits) {
|
|
138
|
+
console.log(hit.text);
|
|
139
|
+
console.log(hit.metadata); // { score, source, namespace, id }
|
|
140
|
+
}
|
|
72
141
|
```
|
|
73
142
|
|
|
143
|
+
Properties:
|
|
144
|
+
|
|
145
|
+
* Fully deterministic
|
|
146
|
+
* No embedding dependency
|
|
147
|
+
* Namespace-aware
|
|
148
|
+
* Evaluation-friendly scoring
|
|
149
|
+
|
|
74
150
|
---
|
|
75
151
|
|
|
76
|
-
|
|
152
|
+
# 🔀 Optional: Hybrid Semantic Rerank
|
|
77
153
|
|
|
78
|
-
|
|
79
|
-
|
|
154
|
+
Semantic rerank runs **after lexical retrieval**.
|
|
155
|
+
|
|
156
|
+
It never replaces lexical grounding.
|
|
157
|
+
|
|
158
|
+
## Build with embeddings
|
|
80
159
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
160
|
+
```ts
|
|
161
|
+
const bytes = await buildPack(docs, {
|
|
162
|
+
semantic: {
|
|
163
|
+
enabled: true,
|
|
164
|
+
modelId: "text-embedding-3-small",
|
|
165
|
+
embeddings,
|
|
166
|
+
quantization: {
|
|
167
|
+
type: "int8_l2norm",
|
|
168
|
+
perVectorScale: true
|
|
169
|
+
}
|
|
170
|
+
}
|
|
84
171
|
});
|
|
85
172
|
```
|
|
86
173
|
|
|
87
|
-
|
|
174
|
+
## Query with rerank
|
|
88
175
|
|
|
89
176
|
```ts
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
177
|
+
import { hasSemantic } from "@knolo/core";
|
|
178
|
+
|
|
179
|
+
const hits = query(pack, "react native throttling issue", {
|
|
180
|
+
topK: 8,
|
|
181
|
+
semantic: {
|
|
182
|
+
enabled: hasSemantic(pack),
|
|
183
|
+
mode: "rerank",
|
|
184
|
+
topN: 50,
|
|
185
|
+
minLexConfidence: 0.35,
|
|
186
|
+
blend: { enabled: true, wLex: 0.75, wSem: 0.25 },
|
|
187
|
+
queryEmbedding
|
|
188
|
+
}
|
|
93
189
|
});
|
|
94
190
|
```
|
|
95
191
|
|
|
192
|
+
Design principles:
|
|
193
|
+
|
|
194
|
+
* Lexical-first
|
|
195
|
+
* Deterministic scoring
|
|
196
|
+
* No external vector store
|
|
197
|
+
* Quantized embedding storage inside pack
|
|
198
|
+
|
|
96
199
|
---
|
|
97
200
|
|
|
98
|
-
|
|
201
|
+
# 🤖 Optional: Agent Metadata & Routing
|
|
202
|
+
|
|
203
|
+
Knolo is a knowledge engine first.
|
|
99
204
|
|
|
100
|
-
|
|
205
|
+
However, packs may optionally embed structured metadata for:
|
|
101
206
|
|
|
102
|
-
|
|
207
|
+
* System prompts
|
|
208
|
+
* Namespace restrictions
|
|
209
|
+
* Tool policies
|
|
210
|
+
* Routing hints
|
|
103
211
|
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
* Registry validation at mount-time
|
|
212
|
+
Agent registries are validated once at `mountPack()`.
|
|
213
|
+
|
|
214
|
+
These features are additive and do not affect retrieval.
|
|
108
215
|
|
|
109
216
|
---
|
|
110
217
|
|
|
111
|
-
|
|
218
|
+
# 🛠 Runtime Contracts (Advanced)
|
|
219
|
+
|
|
220
|
+
For strict deterministic workflows:
|
|
221
|
+
|
|
222
|
+
## RouteDecisionV1
|
|
112
223
|
|
|
113
224
|
```ts
|
|
114
|
-
|
|
225
|
+
type RouteDecisionV1 = {
|
|
226
|
+
type: "route_decision";
|
|
227
|
+
intent?: string;
|
|
228
|
+
entities?: Record<string, unknown>;
|
|
229
|
+
candidates: { agentId: string; score: number }[];
|
|
230
|
+
selected: string;
|
|
231
|
+
};
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
## ToolCallV1
|
|
235
|
+
|
|
236
|
+
```ts
|
|
237
|
+
type ToolCallV1 = {
|
|
238
|
+
type: "tool_call";
|
|
239
|
+
callId: string;
|
|
240
|
+
tool: string;
|
|
241
|
+
args: Record<string, unknown>;
|
|
242
|
+
};
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
Helpers:
|
|
115
246
|
|
|
116
|
-
|
|
117
|
-
|
|
247
|
+
```ts
|
|
248
|
+
import {
|
|
249
|
+
isRouteDecisionV1,
|
|
250
|
+
validateRouteDecisionV1,
|
|
251
|
+
isToolAllowed,
|
|
252
|
+
assertToolCallAllowed
|
|
253
|
+
} from "@knolo/core";
|
|
118
254
|
```
|
|
119
255
|
|
|
120
|
-
|
|
256
|
+
Enables:
|
|
257
|
+
|
|
258
|
+
* Deterministic routing validation
|
|
259
|
+
* Policy enforcement
|
|
260
|
+
* Tool permission checks
|
|
261
|
+
* Structured AI pipelines
|
|
121
262
|
|
|
122
|
-
|
|
123
|
-
* Explicit deny → deterministic error
|
|
263
|
+
These are optional — not required for standard retrieval usage.
|
|
124
264
|
|
|
125
265
|
---
|
|
126
266
|
|
|
127
|
-
|
|
267
|
+
# 📁 `.knolo` Pack Format
|
|
128
268
|
|
|
129
|
-
|
|
269
|
+
Binary layout:
|
|
130
270
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
271
|
+
```
|
|
272
|
+
[metaLen][meta]
|
|
273
|
+
[lexLen][lexicon]
|
|
274
|
+
[postCount][postings]
|
|
275
|
+
[blocksLen][blocks]
|
|
276
|
+
[semantic?]
|
|
277
|
+
```
|
|
136
278
|
|
|
137
|
-
|
|
279
|
+
Properties:
|
|
138
280
|
|
|
139
|
-
|
|
281
|
+
* Versioned
|
|
282
|
+
* Compact
|
|
283
|
+
* Immutable
|
|
284
|
+
* Semantic section auto-detected
|
|
285
|
+
* Designed for fast mount + query
|
|
140
286
|
|
|
141
|
-
|
|
287
|
+
---
|
|
142
288
|
|
|
143
|
-
|
|
289
|
+
# ⚙️ Design Guarantees
|
|
144
290
|
|
|
145
|
-
*
|
|
146
|
-
*
|
|
147
|
-
*
|
|
148
|
-
*
|
|
149
|
-
*
|
|
291
|
+
* Deterministic lexical retrieval
|
|
292
|
+
* Deterministic hybrid rerank (fixed vectors)
|
|
293
|
+
* No vector database required
|
|
294
|
+
* No cloud dependency required
|
|
295
|
+
* Works offline
|
|
296
|
+
* Works in React Native / Expo
|
|
297
|
+
* Portable binary artifacts
|
|
150
298
|
|
|
151
299
|
---
|
|
152
300
|
|
|
153
|
-
|
|
301
|
+
# 🔐 Ideal For
|
|
154
302
|
|
|
155
|
-
*
|
|
156
|
-
*
|
|
157
|
-
*
|
|
158
|
-
* Secure environments
|
|
159
|
-
*
|
|
303
|
+
* Local-first AI systems
|
|
304
|
+
* Offline assistants
|
|
305
|
+
* On-device LLM retrieval
|
|
306
|
+
* Secure / air-gapped environments
|
|
307
|
+
* Deterministic RAG pipelines
|
|
308
|
+
* Evaluation-heavy workflows
|
|
160
309
|
|
|
161
310
|
---
|
|
162
311
|
|
|
163
|
-
|
|
312
|
+
# 🗺 Roadmap
|
|
164
313
|
|
|
165
|
-
*
|
|
314
|
+
* Incremental pack updates
|
|
315
|
+
* Evaluation tooling
|
|
316
|
+
* Performance introspection APIs
|
|
166
317
|
* WASM builds
|
|
167
|
-
*
|
|
168
|
-
* Advanced agent routing
|
|
169
|
-
* Deterministic tool orchestration
|
|
318
|
+
* Continued local-first optimization
|
|
170
319
|
|
|
171
320
|
---
|
|
172
321
|
|
|
173
|
-
|
|
322
|
+
# 📄 License
|
|
323
|
+
|
|
324
|
+
Apache-2.0
|
|
325
|
+
|
|
326
|
+
|
|
174
327
|
|
|
175
|
-
MIT
|
|
176
328
|
|
package/dist/index.d.ts
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
export { mountPack, hasSemantic } from './pack.js';
|
|
1
|
+
export { mountPack, hasSemantic } from './pack.runtime.js';
|
|
2
2
|
export { query, lexConfidence, validateQueryOptions, validateSemanticQueryOptions, } from './query.js';
|
|
3
3
|
export { makeContextPatch } from './patch.js';
|
|
4
4
|
export { buildPack } from './builder.js';
|
|
5
5
|
export { quantizeEmbeddingInt8L2Norm, encodeScaleF16, decodeScaleF16, } from './semantic.js';
|
|
6
6
|
export { listAgents, getAgent, resolveAgent, buildSystemPrompt, isToolAllowed, assertToolAllowed, validateAgentRegistry, validateAgentDefinition, } from './agent.js';
|
|
7
|
-
export type { MountOptions, PackMeta, Pack } from './pack.js';
|
|
7
|
+
export type { MountOptions, PackMeta, Pack } from './pack.runtime.js';
|
|
8
8
|
export type { QueryOptions, Hit } from './query.js';
|
|
9
9
|
export type { ContextPatch } from './patch.js';
|
|
10
10
|
export type { BuildInputDoc, BuildPackOptions } from './builder.js';
|
package/dist/index.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
// src/index.ts
|
|
2
|
-
export { mountPack, hasSemantic } from './pack.js';
|
|
2
|
+
export { mountPack, hasSemantic } from './pack.runtime.js';
|
|
3
3
|
export { query, lexConfidence, validateQueryOptions, validateSemanticQueryOptions, } from './query.js';
|
|
4
4
|
export { makeContextPatch } from './patch.js';
|
|
5
5
|
export { buildPack } from './builder.js';
|
package/dist/node.d.ts
ADDED
package/dist/node.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { mountPack, hasSemantic } from './pack.node.js';
|
package/dist/pack.d.ts
CHANGED
|
@@ -1,35 +1,2 @@
|
|
|
1
|
-
|
|
2
|
-
export type MountOptions
|
|
3
|
-
src: string | ArrayBufferLike | Uint8Array;
|
|
4
|
-
};
|
|
5
|
-
export type PackMeta = {
|
|
6
|
-
version: number;
|
|
7
|
-
stats: {
|
|
8
|
-
docs: number;
|
|
9
|
-
blocks: number;
|
|
10
|
-
terms: number;
|
|
11
|
-
avgBlockLen?: number;
|
|
12
|
-
};
|
|
13
|
-
agents?: AgentRegistry;
|
|
14
|
-
};
|
|
15
|
-
export type Pack = {
|
|
16
|
-
meta: PackMeta;
|
|
17
|
-
lexicon: Map<string, number>;
|
|
18
|
-
postings: Uint32Array;
|
|
19
|
-
blocks: string[];
|
|
20
|
-
headings?: (string | null)[];
|
|
21
|
-
docIds?: (string | null)[];
|
|
22
|
-
namespaces?: (string | null)[];
|
|
23
|
-
blockTokenLens?: number[];
|
|
24
|
-
semantic?: {
|
|
25
|
-
version: 1;
|
|
26
|
-
modelId: string;
|
|
27
|
-
dims: number;
|
|
28
|
-
encoding: 'int8_l2norm';
|
|
29
|
-
perVectorScale: boolean;
|
|
30
|
-
vecs: Int8Array;
|
|
31
|
-
scales?: Uint16Array;
|
|
32
|
-
};
|
|
33
|
-
};
|
|
34
|
-
export declare function hasSemantic(pack: Pack): boolean;
|
|
35
|
-
export declare function mountPack(opts: MountOptions): Promise<Pack>;
|
|
1
|
+
export { mountPack, hasSemantic } from './pack.runtime.js';
|
|
2
|
+
export type { MountOptions, PackMeta, Pack } from './pack.runtime.js';
|
package/dist/pack.js
CHANGED
|
@@ -1,175 +1 @@
|
|
|
1
|
-
|
|
2
|
-
* pack.ts
|
|
3
|
-
*
|
|
4
|
-
* Mount `.knolo` packs across Node, browsers, and RN/Expo. Tolerant of:
|
|
5
|
-
* - blocks as string[] (v1) or object[] with { text, heading?, docId?, namespace?, len? }
|
|
6
|
-
* - meta.stats.avgBlockLen (optional)
|
|
7
|
-
* Includes RN/Expo-safe TextDecoder via ponyfill.
|
|
8
|
-
*/
|
|
9
|
-
import { getTextDecoder } from './utils/utf8.js';
|
|
10
|
-
import { validateAgentRegistry } from './agent.js';
|
|
11
|
-
export function hasSemantic(pack) {
|
|
12
|
-
return Boolean(pack.semantic && pack.semantic.dims > 0 && pack.semantic.vecs.length > 0);
|
|
13
|
-
}
|
|
14
|
-
export async function mountPack(opts) {
|
|
15
|
-
const buf = await resolveToBuffer(opts.src);
|
|
16
|
-
const dv = new DataView(buf);
|
|
17
|
-
const dec = getTextDecoder();
|
|
18
|
-
let offset = 0;
|
|
19
|
-
// meta
|
|
20
|
-
const metaLen = dv.getUint32(offset, true);
|
|
21
|
-
offset += 4;
|
|
22
|
-
const metaJson = dec.decode(new Uint8Array(buf, offset, metaLen));
|
|
23
|
-
offset += metaLen;
|
|
24
|
-
const meta = JSON.parse(metaJson);
|
|
25
|
-
if (meta.agents) {
|
|
26
|
-
validateAgentRegistry(meta.agents);
|
|
27
|
-
}
|
|
28
|
-
// lexicon
|
|
29
|
-
const lexLen = dv.getUint32(offset, true);
|
|
30
|
-
offset += 4;
|
|
31
|
-
const lexJson = dec.decode(new Uint8Array(buf, offset, lexLen));
|
|
32
|
-
offset += lexLen;
|
|
33
|
-
const lexEntries = JSON.parse(lexJson);
|
|
34
|
-
const lexicon = new Map(lexEntries);
|
|
35
|
-
// postings
|
|
36
|
-
const postCount = dv.getUint32(offset, true);
|
|
37
|
-
offset += 4;
|
|
38
|
-
const postings = new Uint32Array(postCount);
|
|
39
|
-
for (let i = 0; i < postCount; i++) {
|
|
40
|
-
postings[i] = dv.getUint32(offset, true);
|
|
41
|
-
offset += 4;
|
|
42
|
-
}
|
|
43
|
-
// blocks (v1: string[]; v2/v3: {text, heading?, docId?, namespace?, len?}[])
|
|
44
|
-
const blocksLen = dv.getUint32(offset, true);
|
|
45
|
-
offset += 4;
|
|
46
|
-
const blocksJson = dec.decode(new Uint8Array(buf, offset, blocksLen));
|
|
47
|
-
offset += blocksLen;
|
|
48
|
-
const parsed = JSON.parse(blocksJson);
|
|
49
|
-
let blocks = [];
|
|
50
|
-
let headings;
|
|
51
|
-
let docIds;
|
|
52
|
-
let namespaces;
|
|
53
|
-
let blockTokenLens;
|
|
54
|
-
if (Array.isArray(parsed) && parsed.length && typeof parsed[0] === 'string') {
|
|
55
|
-
// v1
|
|
56
|
-
blocks = parsed;
|
|
57
|
-
}
|
|
58
|
-
else if (Array.isArray(parsed)) {
|
|
59
|
-
blocks = [];
|
|
60
|
-
headings = [];
|
|
61
|
-
docIds = [];
|
|
62
|
-
namespaces = [];
|
|
63
|
-
blockTokenLens = [];
|
|
64
|
-
for (const it of parsed) {
|
|
65
|
-
if (it && typeof it === 'object') {
|
|
66
|
-
blocks.push(String(it.text ?? ''));
|
|
67
|
-
headings.push(it.heading ?? null);
|
|
68
|
-
docIds.push(it.docId ?? null);
|
|
69
|
-
namespaces.push(it.namespace ?? null);
|
|
70
|
-
blockTokenLens.push(typeof it.len === 'number' ? it.len : 0);
|
|
71
|
-
}
|
|
72
|
-
else {
|
|
73
|
-
blocks.push(String(it ?? ''));
|
|
74
|
-
headings.push(null);
|
|
75
|
-
docIds.push(null);
|
|
76
|
-
namespaces.push(null);
|
|
77
|
-
blockTokenLens.push(0);
|
|
78
|
-
}
|
|
79
|
-
}
|
|
80
|
-
}
|
|
81
|
-
else {
|
|
82
|
-
blocks = [];
|
|
83
|
-
}
|
|
84
|
-
let semantic;
|
|
85
|
-
if (offset < buf.byteLength) {
|
|
86
|
-
const semLen = dv.getUint32(offset, true);
|
|
87
|
-
offset += 4;
|
|
88
|
-
const semJson = dec.decode(new Uint8Array(buf, offset, semLen));
|
|
89
|
-
offset += semLen;
|
|
90
|
-
const sem = JSON.parse(semJson);
|
|
91
|
-
const semBlobLen = dv.getUint32(offset, true);
|
|
92
|
-
offset += 4;
|
|
93
|
-
const semBlob = new Uint8Array(buf, offset, semBlobLen);
|
|
94
|
-
semantic = parseSemanticSection(sem, semBlob);
|
|
95
|
-
}
|
|
96
|
-
return {
|
|
97
|
-
meta,
|
|
98
|
-
lexicon,
|
|
99
|
-
postings,
|
|
100
|
-
blocks,
|
|
101
|
-
headings,
|
|
102
|
-
docIds,
|
|
103
|
-
namespaces,
|
|
104
|
-
blockTokenLens,
|
|
105
|
-
semantic,
|
|
106
|
-
};
|
|
107
|
-
}
|
|
108
|
-
function parseSemanticSection(sem, blob) {
|
|
109
|
-
const vectors = sem?.blocks?.vectors;
|
|
110
|
-
const scales = sem?.blocks?.scales;
|
|
111
|
-
const vecs = new Int8Array(blob.buffer, blob.byteOffset + Number(vectors?.byteOffset ?? 0), Number(vectors?.length ?? 0));
|
|
112
|
-
let scaleView;
|
|
113
|
-
if (scales) {
|
|
114
|
-
const scaleLen = Number(scales.length ?? 0);
|
|
115
|
-
const scaleOffset = Number(scales.byteOffset ?? 0);
|
|
116
|
-
const dv = new DataView(blob.buffer, blob.byteOffset + scaleOffset, scaleLen * 2);
|
|
117
|
-
scaleView = new Uint16Array(scaleLen);
|
|
118
|
-
for (let i = 0; i < scaleLen; i++) {
|
|
119
|
-
scaleView[i] = dv.getUint16(i * 2, true);
|
|
120
|
-
}
|
|
121
|
-
}
|
|
122
|
-
return {
|
|
123
|
-
version: 1,
|
|
124
|
-
modelId: String(sem?.modelId ?? ''),
|
|
125
|
-
dims: Number(sem?.dims ?? 0),
|
|
126
|
-
encoding: 'int8_l2norm',
|
|
127
|
-
perVectorScale: Boolean(sem?.perVectorScale),
|
|
128
|
-
vecs,
|
|
129
|
-
scales: scaleView,
|
|
130
|
-
};
|
|
131
|
-
}
|
|
132
|
-
async function resolveToBuffer(src) {
|
|
133
|
-
if (typeof src === 'string') {
|
|
134
|
-
if (isNodeRuntime() && isLikelyLocalPath(src)) {
|
|
135
|
-
return await readLocalFileAsBuffer(src);
|
|
136
|
-
}
|
|
137
|
-
const res = await fetch(src);
|
|
138
|
-
return await res.arrayBuffer();
|
|
139
|
-
}
|
|
140
|
-
if (src instanceof Uint8Array) {
|
|
141
|
-
if (src.byteOffset === 0 && src.byteLength === src.buffer.byteLength) {
|
|
142
|
-
return src.buffer;
|
|
143
|
-
}
|
|
144
|
-
const copy = src.slice();
|
|
145
|
-
return copy.buffer;
|
|
146
|
-
}
|
|
147
|
-
return src;
|
|
148
|
-
}
|
|
149
|
-
function isNodeRuntime() {
|
|
150
|
-
const p = globalThis
|
|
151
|
-
.process;
|
|
152
|
-
return !!p?.versions?.node;
|
|
153
|
-
}
|
|
154
|
-
function isLikelyLocalPath(value) {
|
|
155
|
-
if (value.startsWith('file://'))
|
|
156
|
-
return true;
|
|
157
|
-
if (value.startsWith('./') ||
|
|
158
|
-
value.startsWith('../') ||
|
|
159
|
-
value.startsWith('/') ||
|
|
160
|
-
value.startsWith('~'))
|
|
161
|
-
return true;
|
|
162
|
-
if (/^[A-Za-z]:[\\/]/.test(value))
|
|
163
|
-
return true; // Windows absolute path
|
|
164
|
-
if (/^[A-Za-z][A-Za-z\d+.-]*:/.test(value))
|
|
165
|
-
return false; // URL scheme
|
|
166
|
-
return true; // plain relative path like "knowledge.knolo"
|
|
167
|
-
}
|
|
168
|
-
async function readLocalFileAsBuffer(pathOrFileUrl) {
|
|
169
|
-
const { readFile } = await import('node:fs/promises');
|
|
170
|
-
const filePath = pathOrFileUrl.startsWith('file://')
|
|
171
|
-
? decodeURIComponent(new URL(pathOrFileUrl).pathname)
|
|
172
|
-
: pathOrFileUrl;
|
|
173
|
-
const data = await readFile(filePath);
|
|
174
|
-
return data.buffer.slice(data.byteOffset, data.byteOffset + data.byteLength);
|
|
175
|
-
}
|
|
1
|
+
export { mountPack, hasSemantic } from './pack.runtime.js';
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { mountPackFromBuffer, toArrayBuffer } from './pack.runtime.js';
|
|
2
|
+
export { hasSemantic } from './pack.runtime.js';
|
|
3
|
+
export async function mountPack(opts) {
|
|
4
|
+
const buf = await resolveToBuffer(opts.src);
|
|
5
|
+
return mountPackFromBuffer(buf);
|
|
6
|
+
}
|
|
7
|
+
async function resolveToBuffer(src) {
|
|
8
|
+
if (typeof src === 'string') {
|
|
9
|
+
if (isLikelyLocalPath(src)) {
|
|
10
|
+
const { readFile } = await import('node:fs/promises');
|
|
11
|
+
const filePath = src.startsWith('file://')
|
|
12
|
+
? decodeURIComponent(new URL(src).pathname)
|
|
13
|
+
: src;
|
|
14
|
+
const data = await readFile(filePath);
|
|
15
|
+
return data.buffer.slice(data.byteOffset, data.byteOffset + data.byteLength);
|
|
16
|
+
}
|
|
17
|
+
const res = await fetch(src);
|
|
18
|
+
return await res.arrayBuffer();
|
|
19
|
+
}
|
|
20
|
+
return toArrayBuffer(src);
|
|
21
|
+
}
|
|
22
|
+
function isLikelyLocalPath(value) {
|
|
23
|
+
if (value.startsWith('file://'))
|
|
24
|
+
return true;
|
|
25
|
+
if (value.startsWith('./') ||
|
|
26
|
+
value.startsWith('../') ||
|
|
27
|
+
value.startsWith('/') ||
|
|
28
|
+
value.startsWith('~'))
|
|
29
|
+
return true;
|
|
30
|
+
if (/^[A-Za-z]:[\\/]/.test(value))
|
|
31
|
+
return true;
|
|
32
|
+
if (/^[A-Za-z][A-Za-z\d+.-]*:/.test(value))
|
|
33
|
+
return false;
|
|
34
|
+
return true;
|
|
35
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import type { AgentRegistry } from './agent.js';
|
|
2
|
+
export type MountOptions = {
|
|
3
|
+
src: string | ArrayBufferLike | Uint8Array;
|
|
4
|
+
};
|
|
5
|
+
export type PackMeta = {
|
|
6
|
+
version: number;
|
|
7
|
+
stats: {
|
|
8
|
+
docs: number;
|
|
9
|
+
blocks: number;
|
|
10
|
+
terms: number;
|
|
11
|
+
avgBlockLen?: number;
|
|
12
|
+
};
|
|
13
|
+
agents?: AgentRegistry;
|
|
14
|
+
};
|
|
15
|
+
export type Pack = {
|
|
16
|
+
meta: PackMeta;
|
|
17
|
+
lexicon: Map<string, number>;
|
|
18
|
+
postings: Uint32Array;
|
|
19
|
+
blocks: string[];
|
|
20
|
+
headings?: (string | null)[];
|
|
21
|
+
docIds?: (string | null)[];
|
|
22
|
+
namespaces?: (string | null)[];
|
|
23
|
+
blockTokenLens?: number[];
|
|
24
|
+
semantic?: {
|
|
25
|
+
version: 1;
|
|
26
|
+
modelId: string;
|
|
27
|
+
dims: number;
|
|
28
|
+
encoding: 'int8_l2norm';
|
|
29
|
+
perVectorScale: boolean;
|
|
30
|
+
vecs: Int8Array;
|
|
31
|
+
scales?: Uint16Array;
|
|
32
|
+
};
|
|
33
|
+
};
|
|
34
|
+
export declare function hasSemantic(pack: Pack): boolean;
|
|
35
|
+
export declare function mountPack(opts: MountOptions): Promise<Pack>;
|
|
36
|
+
export declare function mountPackFromBuffer(buf: ArrayBuffer): Pack;
|
|
37
|
+
export declare function toArrayBuffer(src: ArrayBufferLike | Uint8Array): ArrayBuffer;
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* pack.runtime.ts
|
|
3
|
+
*
|
|
4
|
+
* Runtime-safe pack mounting for browser and React Native environments.
|
|
5
|
+
* No Node stdlib imports are allowed in this module.
|
|
6
|
+
*/
|
|
7
|
+
import { getTextDecoder } from './utils/utf8.js';
|
|
8
|
+
import { validateAgentRegistry } from './agent.js';
|
|
9
|
+
export function hasSemantic(pack) {
|
|
10
|
+
return Boolean(pack.semantic && pack.semantic.dims > 0 && pack.semantic.vecs.length > 0);
|
|
11
|
+
}
|
|
12
|
+
export async function mountPack(opts) {
|
|
13
|
+
const buf = await resolveToBuffer(opts.src);
|
|
14
|
+
return mountPackFromBuffer(buf);
|
|
15
|
+
}
|
|
16
|
+
export function mountPackFromBuffer(buf) {
|
|
17
|
+
const dv = new DataView(buf);
|
|
18
|
+
const dec = getTextDecoder();
|
|
19
|
+
let offset = 0;
|
|
20
|
+
const metaLen = dv.getUint32(offset, true);
|
|
21
|
+
offset += 4;
|
|
22
|
+
const metaJson = dec.decode(new Uint8Array(buf, offset, metaLen));
|
|
23
|
+
offset += metaLen;
|
|
24
|
+
const meta = JSON.parse(metaJson);
|
|
25
|
+
if (meta.agents) {
|
|
26
|
+
validateAgentRegistry(meta.agents);
|
|
27
|
+
}
|
|
28
|
+
const lexLen = dv.getUint32(offset, true);
|
|
29
|
+
offset += 4;
|
|
30
|
+
const lexJson = dec.decode(new Uint8Array(buf, offset, lexLen));
|
|
31
|
+
offset += lexLen;
|
|
32
|
+
const lexEntries = JSON.parse(lexJson);
|
|
33
|
+
const lexicon = new Map(lexEntries);
|
|
34
|
+
const postCount = dv.getUint32(offset, true);
|
|
35
|
+
offset += 4;
|
|
36
|
+
const postings = new Uint32Array(postCount);
|
|
37
|
+
for (let i = 0; i < postCount; i++) {
|
|
38
|
+
postings[i] = dv.getUint32(offset, true);
|
|
39
|
+
offset += 4;
|
|
40
|
+
}
|
|
41
|
+
const blocksLen = dv.getUint32(offset, true);
|
|
42
|
+
offset += 4;
|
|
43
|
+
const blocksJson = dec.decode(new Uint8Array(buf, offset, blocksLen));
|
|
44
|
+
offset += blocksLen;
|
|
45
|
+
const parsed = JSON.parse(blocksJson);
|
|
46
|
+
let blocks = [];
|
|
47
|
+
let headings;
|
|
48
|
+
let docIds;
|
|
49
|
+
let namespaces;
|
|
50
|
+
let blockTokenLens;
|
|
51
|
+
if (Array.isArray(parsed) && parsed.length && typeof parsed[0] === 'string') {
|
|
52
|
+
blocks = parsed;
|
|
53
|
+
}
|
|
54
|
+
else if (Array.isArray(parsed)) {
|
|
55
|
+
blocks = [];
|
|
56
|
+
headings = [];
|
|
57
|
+
docIds = [];
|
|
58
|
+
namespaces = [];
|
|
59
|
+
blockTokenLens = [];
|
|
60
|
+
for (const it of parsed) {
|
|
61
|
+
if (it && typeof it === 'object') {
|
|
62
|
+
blocks.push(String(it.text ?? ''));
|
|
63
|
+
headings.push(it.heading ?? null);
|
|
64
|
+
docIds.push(it.docId ?? null);
|
|
65
|
+
namespaces.push(it.namespace ?? null);
|
|
66
|
+
blockTokenLens.push(typeof it.len === 'number' ? it.len : 0);
|
|
67
|
+
}
|
|
68
|
+
else {
|
|
69
|
+
blocks.push(String(it ?? ''));
|
|
70
|
+
headings.push(null);
|
|
71
|
+
docIds.push(null);
|
|
72
|
+
namespaces.push(null);
|
|
73
|
+
blockTokenLens.push(0);
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
let semantic;
|
|
78
|
+
if (offset < buf.byteLength) {
|
|
79
|
+
const semLen = dv.getUint32(offset, true);
|
|
80
|
+
offset += 4;
|
|
81
|
+
const semJson = dec.decode(new Uint8Array(buf, offset, semLen));
|
|
82
|
+
offset += semLen;
|
|
83
|
+
const sem = JSON.parse(semJson);
|
|
84
|
+
const semBlobLen = dv.getUint32(offset, true);
|
|
85
|
+
offset += 4;
|
|
86
|
+
const semBlob = new Uint8Array(buf, offset, semBlobLen);
|
|
87
|
+
semantic = parseSemanticSection(sem, semBlob);
|
|
88
|
+
}
|
|
89
|
+
return {
|
|
90
|
+
meta,
|
|
91
|
+
lexicon,
|
|
92
|
+
postings,
|
|
93
|
+
blocks,
|
|
94
|
+
headings,
|
|
95
|
+
docIds,
|
|
96
|
+
namespaces,
|
|
97
|
+
blockTokenLens,
|
|
98
|
+
semantic,
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
function parseSemanticSection(sem, blob) {
|
|
102
|
+
const vectors = sem?.blocks?.vectors;
|
|
103
|
+
const scales = sem?.blocks?.scales;
|
|
104
|
+
const vecs = new Int8Array(blob.buffer, blob.byteOffset + Number(vectors?.byteOffset ?? 0), Number(vectors?.length ?? 0));
|
|
105
|
+
let scaleView;
|
|
106
|
+
if (scales) {
|
|
107
|
+
const scaleLen = Number(scales.length ?? 0);
|
|
108
|
+
const scaleOffset = Number(scales.byteOffset ?? 0);
|
|
109
|
+
const dv = new DataView(blob.buffer, blob.byteOffset + scaleOffset, scaleLen * 2);
|
|
110
|
+
scaleView = new Uint16Array(scaleLen);
|
|
111
|
+
for (let i = 0; i < scaleLen; i++) {
|
|
112
|
+
scaleView[i] = dv.getUint16(i * 2, true);
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
return {
|
|
116
|
+
version: 1,
|
|
117
|
+
modelId: String(sem?.modelId ?? ''),
|
|
118
|
+
dims: Number(sem?.dims ?? 0),
|
|
119
|
+
encoding: 'int8_l2norm',
|
|
120
|
+
perVectorScale: Boolean(sem?.perVectorScale),
|
|
121
|
+
vecs,
|
|
122
|
+
scales: scaleView,
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
async function resolveToBuffer(src) {
|
|
126
|
+
if (typeof src === 'string') {
|
|
127
|
+
try {
|
|
128
|
+
const res = await fetch(src);
|
|
129
|
+
return await res.arrayBuffer();
|
|
130
|
+
}
|
|
131
|
+
catch {
|
|
132
|
+
throw new Error('mountPack({src: string}) expects a URL in React Native. For local files, load bytes in your app and pass Uint8Array/ArrayBuffer.');
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
return toArrayBuffer(src);
|
|
136
|
+
}
|
|
137
|
+
export function toArrayBuffer(src) {
|
|
138
|
+
if (src instanceof Uint8Array) {
|
|
139
|
+
if (src.byteOffset === 0 && src.byteLength === src.buffer.byteLength) {
|
|
140
|
+
return src.buffer;
|
|
141
|
+
}
|
|
142
|
+
const copy = src.slice();
|
|
143
|
+
return copy.buffer;
|
|
144
|
+
}
|
|
145
|
+
return src;
|
|
146
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@knolo/core",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.2.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Local-first knowledge packs for small LLMs.",
|
|
6
6
|
"keywords": [
|
|
@@ -19,8 +19,14 @@
|
|
|
19
19
|
],
|
|
20
20
|
"exports": {
|
|
21
21
|
".": {
|
|
22
|
+
"react-native": "./dist/index.js",
|
|
23
|
+
"browser": "./dist/index.js",
|
|
22
24
|
"import": "./dist/index.js",
|
|
23
25
|
"types": "./dist/index.d.ts"
|
|
26
|
+
},
|
|
27
|
+
"./node": {
|
|
28
|
+
"import": "./dist/node.js",
|
|
29
|
+
"types": "./dist/node.d.ts"
|
|
24
30
|
}
|
|
25
31
|
},
|
|
26
32
|
"sideEffects": false,
|
|
@@ -28,9 +34,10 @@
|
|
|
28
34
|
"build": "tsc -p tsconfig.json",
|
|
29
35
|
"prepublishOnly": "npm run build",
|
|
30
36
|
"smoke": "node scripts/smoke.mjs",
|
|
31
|
-
"test": "npm run build && node scripts/test.mjs",
|
|
32
|
-
"format": "prettier --write src/agent.ts src/pack.ts src/builder.ts src/index.ts scripts/test.mjs ../../README.md",
|
|
33
|
-
"format:check": "prettier --check src/agent.ts src/pack.ts src/builder.ts src/index.ts scripts/test.mjs ../../README.md"
|
|
37
|
+
"test": "npm run build && node scripts/check-runtime-no-node.mjs && node scripts/test.mjs",
|
|
38
|
+
"format": "prettier --write src/agent.ts src/pack.ts src/pack.runtime.ts src/pack.node.ts src/node.ts src/builder.ts src/index.ts scripts/test.mjs scripts/check-runtime-no-node.mjs ../../README.md README.md",
|
|
39
|
+
"format:check": "prettier --check src/agent.ts src/pack.ts src/pack.runtime.ts src/pack.node.ts src/node.ts src/builder.ts src/index.ts scripts/test.mjs scripts/check-runtime-no-node.mjs ../../README.md README.md",
|
|
40
|
+
"check:runtime-no-node": "node scripts/check-runtime-no-node.mjs"
|
|
34
41
|
},
|
|
35
42
|
"devDependencies": {
|
|
36
43
|
"@types/node": "^20.11.0",
|