knolo-core 0.2.3 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/DOCS.md +242 -14
- package/README.md +342 -150
- package/bin/knolo.mjs +354 -36
- package/dist/agent.d.ts +53 -0
- package/dist/agent.js +175 -0
- package/dist/builder.d.ts +15 -1
- package/dist/builder.js +128 -14
- package/dist/index.d.ts +6 -2
- package/dist/index.js +4 -2
- package/dist/indexer.d.ts +2 -1
- package/dist/indexer.js +3 -2
- package/dist/pack.d.ts +14 -0
- package/dist/pack.js +96 -4
- package/dist/patch.d.ts +1 -8
- package/dist/patch.js +2 -17
- package/dist/query.d.ts +29 -0
- package/dist/query.js +324 -18
- package/dist/rank.d.ts +1 -1
- package/dist/rank.js +5 -4
- package/dist/semantic.d.ts +7 -0
- package/dist/semantic.js +98 -0
- package/dist/tokenize.js +1 -1
- package/package.json +5 -2
package/DOCS.md
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
|
|
2
2
|
# DOCS.md — KnoLo Core
|
|
3
3
|
|
|
4
|
-
> Deterministic, embedding-
|
|
4
|
+
> Deterministic, embedding-first optional hybrid retrieval and portable knowledge packs.
|
|
5
|
+
|
|
6
|
+
Determinism note: lexical retrieval is deterministic, and semantic rerank is deterministic given the same `.knolo` pack bytes, query embedding model, and embedding provider outputs.
|
|
5
7
|
|
|
6
8
|
## Table of Contents
|
|
7
9
|
|
|
@@ -51,8 +53,8 @@ npm install knolo-core
|
|
|
51
53
|
import { buildPack, mountPack, query, makeContextPatch } from "knolo-core";
|
|
52
54
|
|
|
53
55
|
const docs = [
|
|
54
|
-
{ id: "guide", heading: "React Native Bridge", text: "The bridge sends messages between JS and native. You can throttle events..." },
|
|
55
|
-
{ id: "throttle", heading: "Throttling", text: "Throttling reduces frequency of events..." }
|
|
56
|
+
{ id: "guide", namespace: "mobile", heading: "React Native Bridge", text: "The bridge sends messages between JS and native. You can throttle events..." },
|
|
57
|
+
{ id: "throttle", namespace: "mobile", heading: "Throttling", text: "Throttling reduces frequency of events..." }
|
|
56
58
|
];
|
|
57
59
|
|
|
58
60
|
const bytes = await buildPack(docs);
|
|
@@ -64,10 +66,21 @@ const patch = makeContextPatch(hits, { budget: "small" });
|
|
|
64
66
|
### CLI build
|
|
65
67
|
|
|
66
68
|
```bash
|
|
67
|
-
#
|
|
69
|
+
# lexical-only
|
|
68
70
|
npx knolo docs.json knowledge.knolo
|
|
71
|
+
|
|
72
|
+
# semantic-enabled build (embeddings JSON + model id)
|
|
73
|
+
npx knolo docs.json knowledge.knolo --embeddings embeddings.json --model-id text-embedding-3-small
|
|
74
|
+
|
|
75
|
+
# embed agents from a local directory (.json/.yml/.yaml)
|
|
76
|
+
npx knolo docs.json knowledge.knolo --agents ./examples/agents
|
|
69
77
|
```
|
|
70
78
|
|
|
79
|
+
|
|
80
|
+
### Agents and namespace binding
|
|
81
|
+
|
|
82
|
+
When agent definitions are embedded into `meta.agents`, `resolveAgent(pack, { agentId, query, patch })` enforces **strict namespace binding**: `retrievalDefaults.namespace` always wins over caller `query.namespace`. This keeps retrieval deterministic and on-policy for each agent.
|
|
83
|
+
|
|
71
84
|
---
|
|
72
85
|
|
|
73
86
|
## Concepts
|
|
@@ -90,6 +103,7 @@ npx knolo docs.json knowledge.knolo
|
|
|
90
103
|
type BuildInputDoc = {
|
|
91
104
|
id?: string; // exposed later as hit.source
|
|
92
105
|
heading?: string; // boosts relevance when overlapping query terms
|
|
106
|
+
namespace?: string; // optional namespace for scoped retrieval
|
|
93
107
|
text: string; // raw markdown accepted (lightly stripped)
|
|
94
108
|
};
|
|
95
109
|
```
|
|
@@ -97,7 +111,17 @@ type BuildInputDoc = {
|
|
|
97
111
|
### API
|
|
98
112
|
|
|
99
113
|
```ts
|
|
100
|
-
const bytes: Uint8Array = await buildPack(docs: BuildInputDoc[]
|
|
114
|
+
const bytes: Uint8Array = await buildPack(docs: BuildInputDoc[], {
|
|
115
|
+
semantic?: {
|
|
116
|
+
enabled: boolean;
|
|
117
|
+
modelId: string;
|
|
118
|
+
embeddings: Float32Array[]; // same length/order as blocks
|
|
119
|
+
quantization?: {
|
|
120
|
+
type: 'int8_l2norm';
|
|
121
|
+
perVectorScale?: true;
|
|
122
|
+
};
|
|
123
|
+
};
|
|
124
|
+
});
|
|
101
125
|
```
|
|
102
126
|
|
|
103
127
|
**Tips**
|
|
@@ -115,7 +139,30 @@ const bytes: Uint8Array = await buildPack(docs: BuildInputDoc[]);
|
|
|
115
139
|
```ts
|
|
116
140
|
type QueryOptions = {
|
|
117
141
|
topK?: number; // default 10
|
|
142
|
+
minScore?: number; // optional absolute score floor
|
|
118
143
|
requirePhrases?: string[]; // phrases that must appear verbatim
|
|
144
|
+
namespace?: string | string[]; // optional namespace filter(s)
|
|
145
|
+
source?: string | string[]; // optional source/docId filter(s)
|
|
146
|
+
queryExpansion?: {
|
|
147
|
+
enabled?: boolean; // default true
|
|
148
|
+
docs?: number; // top seed docs, default 3
|
|
149
|
+
terms?: number; // expanded lexical terms, default 4
|
|
150
|
+
weight?: number; // tf scaling for expansion terms, default 0.35
|
|
151
|
+
minTermLength?: number; // default 3
|
|
152
|
+
};
|
|
153
|
+
semantic?: {
|
|
154
|
+
enabled?: boolean; // default false
|
|
155
|
+
mode?: "rerank"; // default "rerank"
|
|
156
|
+
topN?: number; // default 50
|
|
157
|
+
minLexConfidence?: number; // default 0.35
|
|
158
|
+
blend?: {
|
|
159
|
+
enabled?: boolean; // default true
|
|
160
|
+
wLex?: number; // default 0.75
|
|
161
|
+
wSem?: number; // default 0.25
|
|
162
|
+
};
|
|
163
|
+
queryEmbedding?: Float32Array; // required if enabled=true
|
|
164
|
+
force?: boolean; // rerank even when lexical confidence is high
|
|
165
|
+
};
|
|
119
166
|
};
|
|
120
167
|
|
|
121
168
|
type Hit = {
|
|
@@ -123,22 +170,50 @@ type Hit = {
|
|
|
123
170
|
score: number;
|
|
124
171
|
text: string;
|
|
125
172
|
source?: string; // docId if provided
|
|
173
|
+
namespace?: string; // namespace if provided
|
|
126
174
|
};
|
|
127
175
|
|
|
128
176
|
const hits: Hit[] = query(pack, '“react native bridge” throttling', {
|
|
129
177
|
topK: 5,
|
|
130
|
-
requirePhrases: ["maximum rate"] // hard constraint
|
|
178
|
+
requirePhrases: ["maximum rate"], // hard constraint
|
|
179
|
+
namespace: "mobile",
|
|
180
|
+
source: ["guide", "faq"]
|
|
131
181
|
});
|
|
132
182
|
```
|
|
133
183
|
|
|
184
|
+
### Semantic helper ergonomics
|
|
185
|
+
|
|
186
|
+
```ts
|
|
187
|
+
import { hasSemantic, validateQueryOptions, validateSemanticQueryOptions } from "knolo-core";
|
|
188
|
+
|
|
189
|
+
if (hasSemantic(pack)) {
|
|
190
|
+
validateQueryOptions({
|
|
191
|
+
topK: 10,
|
|
192
|
+
namespace: "mobile",
|
|
193
|
+
queryExpansion: { enabled: true, docs: 3, terms: 4 },
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
validateSemanticQueryOptions({
|
|
197
|
+
enabled: true,
|
|
198
|
+
topN: 40,
|
|
199
|
+
minLexConfidence: 0.35,
|
|
200
|
+
queryEmbedding,
|
|
201
|
+
});
|
|
202
|
+
}
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
`validateQueryOptions(...)` and `validateSemanticQueryOptions(...)` throw useful errors for invalid option types/ranges (for example `topK`, `queryExpansion.docs`, `topN`, `minLexConfidence`, blend weights, and missing `Float32Array` embedding types).
|
|
206
|
+
|
|
134
207
|
**What the ranker does**
|
|
135
208
|
|
|
136
209
|
1. Enforces quoted/required phrases (hard filter)
|
|
137
|
-
2. BM25L with
|
|
210
|
+
2. Corpus-aware BM25L with true IDF, query-time DF collection, and per-block length normalization
|
|
138
211
|
3. **Proximity bonus** (minimal span cover)
|
|
139
212
|
4. **Heading overlap** boost
|
|
140
|
-
5. **
|
|
141
|
-
6. **
|
|
213
|
+
5. Deterministic **pseudo-relevance query expansion** from top lexical seeds
|
|
214
|
+
6. **KNS** tie-breaker (small, deterministic)
|
|
215
|
+
7. Optional semantic rerank over lexical top-N when confidence is low
|
|
216
|
+
8. **De-dupe + MMR** diversity for final top-K
|
|
142
217
|
|
|
143
218
|
---
|
|
144
219
|
|
|
@@ -178,6 +253,75 @@ const patch = makeContextPatch(hits, { budget: "mini" | "small" | "full" });
|
|
|
178
253
|
query(pack, "throttling", { requirePhrases: ["react native bridge"] });
|
|
179
254
|
```
|
|
180
255
|
|
|
256
|
+
### Namespace-scoped retrieval
|
|
257
|
+
|
|
258
|
+
```ts
|
|
259
|
+
query(pack, "bridge events", { namespace: ["mobile", "sdk"] });
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
### Source/docId-scoped retrieval
|
|
263
|
+
|
|
264
|
+
```ts
|
|
265
|
+
query(pack, "throttling", { source: ["guide", "faq"] });
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
### Minimum score threshold
|
|
269
|
+
|
|
270
|
+
```ts
|
|
271
|
+
query(pack, "throttle bridge", { minScore: 2.5 });
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
Use this when you prefer precision over recall and only want confident lexical matches.
|
|
275
|
+
|
|
276
|
+
### Query expansion controls
|
|
277
|
+
|
|
278
|
+
```ts
|
|
279
|
+
query(pack, "throttle bridge", {
|
|
280
|
+
queryExpansion: { enabled: true, docs: 4, terms: 6, weight: 0.3 }
|
|
281
|
+
});
|
|
282
|
+
```
|
|
283
|
+
|
|
284
|
+
This keeps retrieval lexical/deterministic while increasing recall for related vocabulary found in top-ranked seed blocks.
|
|
285
|
+
|
|
286
|
+
### Optional semantic rerank (hybrid MVP)
|
|
287
|
+
|
|
288
|
+
```ts
|
|
289
|
+
query(pack, "throttle bridge", {
|
|
290
|
+
topK: 5,
|
|
291
|
+
semantic: {
|
|
292
|
+
enabled: true,
|
|
293
|
+
queryEmbedding, // Float32Array from your embedding model (required)
|
|
294
|
+
topN: 50,
|
|
295
|
+
minLexConfidence: 0.35,
|
|
296
|
+
force: false,
|
|
297
|
+
blend: { enabled: true, wLex: 0.75, wSem: 0.25 },
|
|
298
|
+
},
|
|
299
|
+
});
|
|
300
|
+
```
|
|
301
|
+
|
|
302
|
+
Lexical retrieval still runs first. Semantic rerank only touches top-N lexical candidates, and runs before de-dupe/MMR. If `pack.semantic` is missing, rerank is skipped silently; if `queryEmbedding` is omitted while enabled, `query(...)` throws.
|
|
303
|
+
|
|
304
|
+
Example with explicit validation:
|
|
305
|
+
|
|
306
|
+
```ts
|
|
307
|
+
validateSemanticQueryOptions({
|
|
308
|
+
enabled: true,
|
|
309
|
+
topN: 64,
|
|
310
|
+
minLexConfidence: 0.25,
|
|
311
|
+
blend: { enabled: true, wLex: 0.7, wSem: 0.3 },
|
|
312
|
+
queryEmbedding,
|
|
313
|
+
});
|
|
314
|
+
|
|
315
|
+
const hits = query(pack, userQuery, {
|
|
316
|
+
semantic: {
|
|
317
|
+
enabled: true,
|
|
318
|
+
queryEmbedding,
|
|
319
|
+
topN: 64,
|
|
320
|
+
minLexConfidence: 0.25,
|
|
321
|
+
},
|
|
322
|
+
});
|
|
323
|
+
```
|
|
324
|
+
|
|
181
325
|
### Tight vs. scattered matches
|
|
182
326
|
|
|
183
327
|
Proximity bonus favors blocks where all query terms co-occur in a small span.
|
|
@@ -203,13 +347,14 @@ Top-K results apply near-duplicate suppression (5-gram Jaccard) and MMR (λ≈0.
|
|
|
203
347
|
[lexLen:u32][lexicon JSON]
|
|
204
348
|
[postCount:u32][postings u32[]]
|
|
205
349
|
[blocksLen:u32][blocks JSON]
|
|
350
|
+
[semLen:u32][semantic JSON][semBlobLen:u32][semantic blob] // optional tail at EOF
|
|
206
351
|
```
|
|
207
352
|
|
|
208
353
|
**Meta JSON**
|
|
209
354
|
|
|
210
355
|
```json
|
|
211
356
|
{
|
|
212
|
-
"version":
|
|
357
|
+
"version": 3,
|
|
213
358
|
"stats": {
|
|
214
359
|
"docs": <number>,
|
|
215
360
|
"blocks": <number>,
|
|
@@ -219,6 +364,77 @@ Top-K results apply near-duplicate suppression (5-gram Jaccard) and MMR (λ≈0.
|
|
|
219
364
|
}
|
|
220
365
|
```
|
|
221
366
|
|
|
367
|
+
**Optional semantic tail**
|
|
368
|
+
|
|
369
|
+
* Fully backward compatible: if EOF is reached immediately after `blocks JSON`, no semantic data is present.
|
|
370
|
+
* Semantic tail schema version is `1` (`semantic.version = 1`).
|
|
371
|
+
* `buildPack(..., { semantic })` can now generate this section from provided `Float32Array` embeddings (no model inference at build time).
|
|
372
|
+
* Quantization is deterministic `int8_l2norm` per vector:
|
|
373
|
+
1. L2-normalize the input embedding.
|
|
374
|
+
2. Compute `scale = max(abs(e_i)) / 127`.
|
|
375
|
+
3. Quantize `q_i = clamp(round(e_i / scale), -127..127)`.
|
|
376
|
+
4. Store scale in `Uint16Array` using float16 encoding.
|
|
377
|
+
* Blob layout is **vectors first, scales second**:
|
|
378
|
+
* `blocks.vectors.byteOffset = 0`
|
|
379
|
+
* `blocks.vectors.length = blockCount * dims` (Int8 elements)
|
|
380
|
+
* `blocks.scales.byteOffset = vectors.byteLength`
|
|
381
|
+
* `blocks.scales.length = blockCount` (Uint16 elements)
|
|
382
|
+
|
|
383
|
+
Semantic JSON schema (stored verbatim in `[semantic JSON]`):
|
|
384
|
+
|
|
385
|
+
```json
|
|
386
|
+
{
|
|
387
|
+
"version": 1,
|
|
388
|
+
"modelId": "string",
|
|
389
|
+
"dims": 384,
|
|
390
|
+
"encoding": "int8_l2norm",
|
|
391
|
+
"perVectorScale": true,
|
|
392
|
+
"blocks": {
|
|
393
|
+
"vectors": { "byteOffset": 0, "length": 1152 },
|
|
394
|
+
"scales": { "byteOffset": 1152, "length": 3, "encoding": "float16" }
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
```
|
|
398
|
+
|
|
399
|
+
### Building packs with embeddings (library usage)
|
|
400
|
+
|
|
401
|
+
```ts
|
|
402
|
+
const embeddings: Float32Array[] = await Promise.all(
|
|
403
|
+
docs.map(async (doc) => embedText(doc.text))
|
|
404
|
+
);
|
|
405
|
+
|
|
406
|
+
const bytes = await buildPack(docs, {
|
|
407
|
+
semantic: {
|
|
408
|
+
enabled: true,
|
|
409
|
+
modelId: "text-embedding-3-small",
|
|
410
|
+
embeddings,
|
|
411
|
+
quantization: { type: "int8_l2norm", perVectorScale: true },
|
|
412
|
+
},
|
|
413
|
+
});
|
|
414
|
+
```
|
|
415
|
+
|
|
416
|
+
Embedding validation rules:
|
|
417
|
+
|
|
418
|
+
* `embeddings.length` must match block count exactly.
|
|
419
|
+
* every embedding must be `Float32Array`.
|
|
420
|
+
* every vector must have identical `dims`.
|
|
421
|
+
|
|
422
|
+
### Querying with semantic rerank
|
|
423
|
+
|
|
424
|
+
```ts
|
|
425
|
+
const queryEmbedding = await embedText(userQuestion);
|
|
426
|
+
const hits = query(pack, userQuestion, {
|
|
427
|
+
topK: 8,
|
|
428
|
+
semantic: {
|
|
429
|
+
enabled: true,
|
|
430
|
+
queryEmbedding,
|
|
431
|
+
topN: 64,
|
|
432
|
+
minLexConfidence: 0.35,
|
|
433
|
+
blend: { enabled: true, wLex: 0.75, wSem: 0.25 },
|
|
434
|
+
},
|
|
435
|
+
});
|
|
436
|
+
```
|
|
437
|
+
|
|
222
438
|
**Lexicon JSON**
|
|
223
439
|
|
|
224
440
|
* Array of `[term, termId]` pairs.
|
|
@@ -228,15 +444,16 @@ Top-K results apply near-duplicate suppression (5-gram Jaccard) and MMR (λ≈0.
|
|
|
228
444
|
* Flattened `Uint32Array`:
|
|
229
445
|
|
|
230
446
|
```
|
|
231
|
-
termId, blockId, pos, pos, …, 0, blockId, …, 0, 0, termId, ...
|
|
447
|
+
termId, blockId+1, pos, pos, …, 0, blockId+1, …, 0, 0, termId, ...
|
|
232
448
|
```
|
|
233
449
|
|
|
234
|
-
Each block section ends with `0`, each term section ends with `0`.
|
|
450
|
+
Block IDs are encoded as `bid + 1` so `0` is reserved as the delimiter. Each block section ends with `0`, each term section ends with `0`.
|
|
235
451
|
|
|
236
452
|
**Blocks JSON (v1 / v2)**
|
|
237
453
|
|
|
238
454
|
* **v1**: `string[]` (text only)
|
|
239
455
|
* **v2**: `{ text, heading?, docId? }[]`
|
|
456
|
+
* **v3**: `{ text, heading?, docId?, namespace?, len }[]` (`len` is block token length for stable ranking)
|
|
240
457
|
|
|
241
458
|
Runtime auto-detects and exposes:
|
|
242
459
|
|
|
@@ -244,7 +461,18 @@ Runtime auto-detects and exposes:
|
|
|
244
461
|
type Pack = {
|
|
245
462
|
meta, lexicon, postings, blocks: string[],
|
|
246
463
|
headings?: (string|null)[],
|
|
247
|
-
docIds?: (string|null)[]
|
|
464
|
+
docIds?: (string|null)[],
|
|
465
|
+
namespaces?: (string|null)[],
|
|
466
|
+
blockTokenLens?: number[],
|
|
467
|
+
semantic?: {
|
|
468
|
+
version: 1,
|
|
469
|
+
modelId: string,
|
|
470
|
+
dims: number,
|
|
471
|
+
encoding: "int8_l2norm",
|
|
472
|
+
perVectorScale: boolean,
|
|
473
|
+
vecs: Int8Array,
|
|
474
|
+
scales?: Uint16Array
|
|
475
|
+
}
|
|
248
476
|
}
|
|
249
477
|
```
|
|
250
478
|
|
|
@@ -319,7 +547,7 @@ npm run smoke
|
|
|
319
547
|
## FAQ
|
|
320
548
|
|
|
321
549
|
**Q: Does this use embeddings or a vector DB?**
|
|
322
|
-
A:
|
|
550
|
+
A: Default retrieval is lexical. Optional semantic hybrid rerank is supported when packs are built with embeddings; no external vector DB is required.
|
|
323
551
|
|
|
324
552
|
**Q: Why am I still seeing similar results?**
|
|
325
553
|
A: De-dup suppresses near-duplicates but allows related passages. Increase Jaccard threshold or tune λ (if forking).
|