knolo-core 0.2.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/DOCS.md +228 -14
- package/README.md +229 -151
- package/bin/knolo.mjs +120 -13
- package/dist/builder.d.ts +13 -1
- package/dist/builder.js +103 -11
- package/dist/index.d.ts +4 -2
- package/dist/index.js +3 -2
- package/dist/indexer.d.ts +2 -1
- package/dist/indexer.js +3 -2
- package/dist/pack.d.ts +12 -0
- package/dist/pack.js +77 -4
- package/dist/patch.d.ts +1 -8
- package/dist/patch.js +2 -17
- package/dist/query.d.ts +28 -0
- package/dist/query.js +282 -18
- package/dist/rank.d.ts +1 -1
- package/dist/rank.js +5 -4
- package/dist/semantic.d.ts +7 -0
- package/dist/semantic.js +98 -0
- package/dist/tokenize.js +1 -1
- package/package.json +3 -2
package/DOCS.md
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
|
|
2
2
|
# DOCS.md — KnoLo Core
|
|
3
3
|
|
|
4
|
-
> Deterministic, embedding-
|
|
4
|
+
> Deterministic, embedding-first optional hybrid retrieval and portable knowledge packs.
|
|
5
|
+
|
|
6
|
+
Determinism note: lexical retrieval is deterministic, and semantic rerank is deterministic given the same `.knolo` pack bytes, query embedding model, and embedding provider outputs.
|
|
5
7
|
|
|
6
8
|
## Table of Contents
|
|
7
9
|
|
|
@@ -51,8 +53,8 @@ npm install knolo-core
|
|
|
51
53
|
import { buildPack, mountPack, query, makeContextPatch } from "knolo-core";
|
|
52
54
|
|
|
53
55
|
const docs = [
|
|
54
|
-
{ id: "guide", heading: "React Native Bridge", text: "The bridge sends messages between JS and native. You can throttle events..." },
|
|
55
|
-
{ id: "throttle", heading: "Throttling", text: "Throttling reduces frequency of events..." }
|
|
56
|
+
{ id: "guide", namespace: "mobile", heading: "React Native Bridge", text: "The bridge sends messages between JS and native. You can throttle events..." },
|
|
57
|
+
{ id: "throttle", namespace: "mobile", heading: "Throttling", text: "Throttling reduces frequency of events..." }
|
|
56
58
|
];
|
|
57
59
|
|
|
58
60
|
const bytes = await buildPack(docs);
|
|
@@ -64,8 +66,11 @@ const patch = makeContextPatch(hits, { budget: "small" });
|
|
|
64
66
|
### CLI build
|
|
65
67
|
|
|
66
68
|
```bash
|
|
67
|
-
#
|
|
69
|
+
# lexical-only
|
|
68
70
|
npx knolo docs.json knowledge.knolo
|
|
71
|
+
|
|
72
|
+
# semantic-enabled build (embeddings JSON + model id)
|
|
73
|
+
npx knolo docs.json knowledge.knolo --embeddings embeddings.json --model-id text-embedding-3-small
|
|
69
74
|
```
|
|
70
75
|
|
|
71
76
|
---
|
|
@@ -90,6 +95,7 @@ npx knolo docs.json knowledge.knolo
|
|
|
90
95
|
type BuildInputDoc = {
|
|
91
96
|
id?: string; // exposed later as hit.source
|
|
92
97
|
heading?: string; // boosts relevance when overlapping query terms
|
|
98
|
+
namespace?: string; // optional namespace for scoped retrieval
|
|
93
99
|
text: string; // raw markdown accepted (lightly stripped)
|
|
94
100
|
};
|
|
95
101
|
```
|
|
@@ -97,7 +103,17 @@ type BuildInputDoc = {
|
|
|
97
103
|
### API
|
|
98
104
|
|
|
99
105
|
```ts
|
|
100
|
-
const bytes: Uint8Array = await buildPack(docs: BuildInputDoc[]
|
|
106
|
+
const bytes: Uint8Array = await buildPack(docs: BuildInputDoc[], {
|
|
107
|
+
semantic?: {
|
|
108
|
+
enabled: boolean;
|
|
109
|
+
modelId: string;
|
|
110
|
+
embeddings: Float32Array[]; // same length/order as blocks
|
|
111
|
+
quantization?: {
|
|
112
|
+
type: 'int8_l2norm';
|
|
113
|
+
perVectorScale?: true;
|
|
114
|
+
};
|
|
115
|
+
};
|
|
116
|
+
});
|
|
101
117
|
```
|
|
102
118
|
|
|
103
119
|
**Tips**
|
|
@@ -115,7 +131,30 @@ const bytes: Uint8Array = await buildPack(docs: BuildInputDoc[]);
|
|
|
115
131
|
```ts
|
|
116
132
|
type QueryOptions = {
|
|
117
133
|
topK?: number; // default 10
|
|
134
|
+
minScore?: number; // optional absolute score floor
|
|
118
135
|
requirePhrases?: string[]; // phrases that must appear verbatim
|
|
136
|
+
namespace?: string | string[]; // optional namespace filter(s)
|
|
137
|
+
source?: string | string[]; // optional source/docId filter(s)
|
|
138
|
+
queryExpansion?: {
|
|
139
|
+
enabled?: boolean; // default true
|
|
140
|
+
docs?: number; // top seed docs, default 3
|
|
141
|
+
terms?: number; // expanded lexical terms, default 4
|
|
142
|
+
weight?: number; // tf scaling for expansion terms, default 0.35
|
|
143
|
+
minTermLength?: number; // default 3
|
|
144
|
+
};
|
|
145
|
+
semantic?: {
|
|
146
|
+
enabled?: boolean; // default false
|
|
147
|
+
mode?: "rerank"; // default "rerank"
|
|
148
|
+
topN?: number; // default 50
|
|
149
|
+
minLexConfidence?: number; // default 0.35
|
|
150
|
+
blend?: {
|
|
151
|
+
enabled?: boolean; // default true
|
|
152
|
+
wLex?: number; // default 0.75
|
|
153
|
+
wSem?: number; // default 0.25
|
|
154
|
+
};
|
|
155
|
+
queryEmbedding?: Float32Array; // required if enabled=true
|
|
156
|
+
force?: boolean; // rerank even when lexical confidence is high
|
|
157
|
+
};
|
|
119
158
|
};
|
|
120
159
|
|
|
121
160
|
type Hit = {
|
|
@@ -123,22 +162,44 @@ type Hit = {
|
|
|
123
162
|
score: number;
|
|
124
163
|
text: string;
|
|
125
164
|
source?: string; // docId if provided
|
|
165
|
+
namespace?: string; // namespace if provided
|
|
126
166
|
};
|
|
127
167
|
|
|
128
168
|
const hits: Hit[] = query(pack, '“react native bridge” throttling', {
|
|
129
169
|
topK: 5,
|
|
130
|
-
requirePhrases: ["maximum rate"] // hard constraint
|
|
170
|
+
requirePhrases: ["maximum rate"], // hard constraint
|
|
171
|
+
namespace: "mobile",
|
|
172
|
+
source: ["guide", "faq"]
|
|
131
173
|
});
|
|
132
174
|
```
|
|
133
175
|
|
|
176
|
+
### Semantic helper ergonomics
|
|
177
|
+
|
|
178
|
+
```ts
|
|
179
|
+
import { hasSemantic, validateSemanticQueryOptions } from "knolo-core";
|
|
180
|
+
|
|
181
|
+
if (hasSemantic(pack)) {
|
|
182
|
+
validateSemanticQueryOptions({
|
|
183
|
+
enabled: true,
|
|
184
|
+
topN: 40,
|
|
185
|
+
minLexConfidence: 0.35,
|
|
186
|
+
queryEmbedding,
|
|
187
|
+
});
|
|
188
|
+
}
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
`validateSemanticQueryOptions(...)` throws useful errors for invalid option types/ranges (`topN`, `minLexConfidence`, blend weights, missing `Float32Array` embedding type).
|
|
192
|
+
|
|
134
193
|
**What the ranker does**
|
|
135
194
|
|
|
136
195
|
1. Enforces quoted/required phrases (hard filter)
|
|
137
|
-
2. BM25L with
|
|
196
|
+
2. Corpus-aware BM25L with true IDF, query-time DF collection, and per-block length normalization
|
|
138
197
|
3. **Proximity bonus** (minimal span cover)
|
|
139
198
|
4. **Heading overlap** boost
|
|
140
|
-
5. **
|
|
141
|
-
6. **
|
|
199
|
+
5. Deterministic **pseudo-relevance query expansion** from top lexical seeds
|
|
200
|
+
6. **KNS** tie-breaker (small, deterministic)
|
|
201
|
+
7. Optional semantic rerank over lexical top-N when confidence is low
|
|
202
|
+
8. **De-dupe + MMR** diversity for final top-K
|
|
142
203
|
|
|
143
204
|
---
|
|
144
205
|
|
|
@@ -178,6 +239,75 @@ const patch = makeContextPatch(hits, { budget: "mini" | "small" | "full" });
|
|
|
178
239
|
query(pack, "throttling", { requirePhrases: ["react native bridge"] });
|
|
179
240
|
```
|
|
180
241
|
|
|
242
|
+
### Namespace-scoped retrieval
|
|
243
|
+
|
|
244
|
+
```ts
|
|
245
|
+
query(pack, "bridge events", { namespace: ["mobile", "sdk"] });
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
### Source/docId-scoped retrieval
|
|
249
|
+
|
|
250
|
+
```ts
|
|
251
|
+
query(pack, "throttling", { source: ["guide", "faq"] });
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
### Minimum score threshold
|
|
255
|
+
|
|
256
|
+
```ts
|
|
257
|
+
query(pack, "throttle bridge", { minScore: 2.5 });
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
Use this when you prefer precision over recall and only want confident lexical matches.
|
|
261
|
+
|
|
262
|
+
### Query expansion controls
|
|
263
|
+
|
|
264
|
+
```ts
|
|
265
|
+
query(pack, "throttle bridge", {
|
|
266
|
+
queryExpansion: { enabled: true, docs: 4, terms: 6, weight: 0.3 }
|
|
267
|
+
});
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
This keeps retrieval lexical/deterministic while increasing recall for related vocabulary found in top-ranked seed blocks.
|
|
271
|
+
|
|
272
|
+
### Optional semantic rerank (hybrid MVP)
|
|
273
|
+
|
|
274
|
+
```ts
|
|
275
|
+
query(pack, "throttle bridge", {
|
|
276
|
+
topK: 5,
|
|
277
|
+
semantic: {
|
|
278
|
+
enabled: true,
|
|
279
|
+
queryEmbedding, // Float32Array from your embedding model (required)
|
|
280
|
+
topN: 50,
|
|
281
|
+
minLexConfidence: 0.35,
|
|
282
|
+
force: false,
|
|
283
|
+
blend: { enabled: true, wLex: 0.75, wSem: 0.25 },
|
|
284
|
+
},
|
|
285
|
+
});
|
|
286
|
+
```
|
|
287
|
+
|
|
288
|
+
Lexical retrieval still runs first. Semantic rerank only touches top-N lexical candidates, and runs before de-dupe/MMR. If `pack.semantic` is missing, rerank is skipped silently; if `queryEmbedding` is omitted while enabled, `query(...)` throws.
|
|
289
|
+
|
|
290
|
+
Example with explicit validation:
|
|
291
|
+
|
|
292
|
+
```ts
|
|
293
|
+
validateSemanticQueryOptions({
|
|
294
|
+
enabled: true,
|
|
295
|
+
topN: 64,
|
|
296
|
+
minLexConfidence: 0.25,
|
|
297
|
+
blend: { enabled: true, wLex: 0.7, wSem: 0.3 },
|
|
298
|
+
queryEmbedding,
|
|
299
|
+
});
|
|
300
|
+
|
|
301
|
+
const hits = query(pack, userQuery, {
|
|
302
|
+
semantic: {
|
|
303
|
+
enabled: true,
|
|
304
|
+
queryEmbedding,
|
|
305
|
+
topN: 64,
|
|
306
|
+
minLexConfidence: 0.25,
|
|
307
|
+
},
|
|
308
|
+
});
|
|
309
|
+
```
|
|
310
|
+
|
|
181
311
|
### Tight vs. scattered matches
|
|
182
312
|
|
|
183
313
|
Proximity bonus favors blocks where all query terms co-occur in a small span.
|
|
@@ -203,13 +333,14 @@ Top-K results apply near-duplicate suppression (5-gram Jaccard) and MMR (λ≈0.
|
|
|
203
333
|
[lexLen:u32][lexicon JSON]
|
|
204
334
|
[postCount:u32][postings u32[]]
|
|
205
335
|
[blocksLen:u32][blocks JSON]
|
|
336
|
+
[semLen:u32][semantic JSON][semBlobLen:u32][semantic blob] // optional tail at EOF
|
|
206
337
|
```
|
|
207
338
|
|
|
208
339
|
**Meta JSON**
|
|
209
340
|
|
|
210
341
|
```json
|
|
211
342
|
{
|
|
212
|
-
"version":
|
|
343
|
+
"version": 3,
|
|
213
344
|
"stats": {
|
|
214
345
|
"docs": <number>,
|
|
215
346
|
"blocks": <number>,
|
|
@@ -219,6 +350,77 @@ Top-K results apply near-duplicate suppression (5-gram Jaccard) and MMR (λ≈0.
|
|
|
219
350
|
}
|
|
220
351
|
```
|
|
221
352
|
|
|
353
|
+
**Optional semantic tail**
|
|
354
|
+
|
|
355
|
+
* Fully backward compatible: if EOF is reached immediately after `blocks JSON`, no semantic data is present.
|
|
356
|
+
* Semantic tail schema version is `1` (`semantic.version = 1`).
|
|
357
|
+
* `buildPack(..., { semantic })` can now generate this section from provided `Float32Array` embeddings (no model inference at build time).
|
|
358
|
+
* Quantization is deterministic `int8_l2norm` per vector:
|
|
359
|
+
1. L2-normalize the input embedding.
|
|
360
|
+
2. Compute `scale = max(abs(e_i)) / 127`.
|
|
361
|
+
3. Quantize `q_i = clamp(round(e_i / scale), -127..127)`.
|
|
362
|
+
4. Store scale in `Uint16Array` using float16 encoding.
|
|
363
|
+
* Blob layout is **vectors first, scales second**:
|
|
364
|
+
* `blocks.vectors.byteOffset = 0`
|
|
365
|
+
* `blocks.vectors.length = blockCount * dims` (Int8 elements)
|
|
366
|
+
* `blocks.scales.byteOffset = vectors.byteLength`
|
|
367
|
+
* `blocks.scales.length = blockCount` (Uint16 elements)
|
|
368
|
+
|
|
369
|
+
Semantic JSON schema (stored verbatim in `[semantic JSON]`):
|
|
370
|
+
|
|
371
|
+
```json
|
|
372
|
+
{
|
|
373
|
+
"version": 1,
|
|
374
|
+
"modelId": "string",
|
|
375
|
+
"dims": 384,
|
|
376
|
+
"encoding": "int8_l2norm",
|
|
377
|
+
"perVectorScale": true,
|
|
378
|
+
"blocks": {
|
|
379
|
+
"vectors": { "byteOffset": 0, "length": 1152 },
|
|
380
|
+
"scales": { "byteOffset": 1152, "length": 3, "encoding": "float16" }
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
```
|
|
384
|
+
|
|
385
|
+
### Building packs with embeddings (library usage)
|
|
386
|
+
|
|
387
|
+
```ts
|
|
388
|
+
const embeddings: Float32Array[] = await Promise.all(
|
|
389
|
+
docs.map(async (doc) => embedText(doc.text))
|
|
390
|
+
);
|
|
391
|
+
|
|
392
|
+
const bytes = await buildPack(docs, {
|
|
393
|
+
semantic: {
|
|
394
|
+
enabled: true,
|
|
395
|
+
modelId: "text-embedding-3-small",
|
|
396
|
+
embeddings,
|
|
397
|
+
quantization: { type: "int8_l2norm", perVectorScale: true },
|
|
398
|
+
},
|
|
399
|
+
});
|
|
400
|
+
```
|
|
401
|
+
|
|
402
|
+
Embedding validation rules:
|
|
403
|
+
|
|
404
|
+
* `embeddings.length` must match block count exactly.
|
|
405
|
+
* every embedding must be `Float32Array`.
|
|
406
|
+
* every vector must have identical `dims`.
|
|
407
|
+
|
|
408
|
+
### Querying with semantic rerank
|
|
409
|
+
|
|
410
|
+
```ts
|
|
411
|
+
const queryEmbedding = await embedText(userQuestion);
|
|
412
|
+
const hits = query(pack, userQuestion, {
|
|
413
|
+
topK: 8,
|
|
414
|
+
semantic: {
|
|
415
|
+
enabled: true,
|
|
416
|
+
queryEmbedding,
|
|
417
|
+
topN: 64,
|
|
418
|
+
minLexConfidence: 0.35,
|
|
419
|
+
blend: { enabled: true, wLex: 0.75, wSem: 0.25 },
|
|
420
|
+
},
|
|
421
|
+
});
|
|
422
|
+
```
|
|
423
|
+
|
|
222
424
|
**Lexicon JSON**
|
|
223
425
|
|
|
224
426
|
* Array of `[term, termId]` pairs.
|
|
@@ -228,15 +430,16 @@ Top-K results apply near-duplicate suppression (5-gram Jaccard) and MMR (λ≈0.
|
|
|
228
430
|
* Flattened `Uint32Array`:
|
|
229
431
|
|
|
230
432
|
```
|
|
231
|
-
termId, blockId, pos, pos, …, 0, blockId, …, 0, 0, termId, ...
|
|
433
|
+
termId, blockId+1, pos, pos, …, 0, blockId+1, …, 0, 0, termId, ...
|
|
232
434
|
```
|
|
233
435
|
|
|
234
|
-
Each block section ends with `0`, each term section ends with `0`.
|
|
436
|
+
Block IDs are encoded as `bid + 1` so `0` is reserved as the delimiter. Each block section ends with `0`, each term section ends with `0`.
|
|
235
437
|
|
|
236
438
|
**Blocks JSON (v1 / v2)**
|
|
237
439
|
|
|
238
440
|
* **v1**: `string[]` (text only)
|
|
239
441
|
* **v2**: `{ text, heading?, docId? }[]`
|
|
442
|
+
* **v3**: `{ text, heading?, docId?, namespace?, len }[]` (`len` is block token length for stable ranking)
|
|
240
443
|
|
|
241
444
|
Runtime auto-detects and exposes:
|
|
242
445
|
|
|
@@ -244,7 +447,18 @@ Runtime auto-detects and exposes:
|
|
|
244
447
|
type Pack = {
|
|
245
448
|
meta, lexicon, postings, blocks: string[],
|
|
246
449
|
headings?: (string|null)[],
|
|
247
|
-
docIds?: (string|null)[]
|
|
450
|
+
docIds?: (string|null)[],
|
|
451
|
+
namespaces?: (string|null)[],
|
|
452
|
+
blockTokenLens?: number[],
|
|
453
|
+
semantic?: {
|
|
454
|
+
version: 1,
|
|
455
|
+
modelId: string,
|
|
456
|
+
dims: number,
|
|
457
|
+
encoding: "int8_l2norm",
|
|
458
|
+
perVectorScale: boolean,
|
|
459
|
+
vecs: Int8Array,
|
|
460
|
+
scales?: Uint16Array
|
|
461
|
+
}
|
|
248
462
|
}
|
|
249
463
|
```
|
|
250
464
|
|
|
@@ -319,7 +533,7 @@ npm run smoke
|
|
|
319
533
|
## FAQ
|
|
320
534
|
|
|
321
535
|
**Q: Does this use embeddings or a vector DB?**
|
|
322
|
-
A:
|
|
536
|
+
A: Default retrieval is lexical. Optional semantic hybrid rerank is supported when packs are built with embeddings; no external vector DB is required.
|
|
323
537
|
|
|
324
538
|
**Q: Why am I still seeing similar results?**
|
|
325
539
|
A: De-dup suppresses near-duplicates but allows related passages. Increase Jaccard threshold or tune λ (if forking).
|