knolo-core 0.2.3 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/DOCS.md CHANGED
@@ -1,7 +1,9 @@
1
1
 
2
2
  # DOCS.md — KnoLo Core
3
3
 
4
- > Deterministic, embedding-free retrieval and portable knowledge packs.
4
+ > Deterministic, embedding-first optional hybrid retrieval and portable knowledge packs.
5
+
6
+ Determinism note: lexical retrieval is deterministic, and semantic rerank is deterministic given the same `.knolo` pack bytes, query embedding model, and embedding provider outputs.
5
7
 
6
8
  ## Table of Contents
7
9
 
@@ -51,8 +53,8 @@ npm install knolo-core
51
53
  import { buildPack, mountPack, query, makeContextPatch } from "knolo-core";
52
54
 
53
55
  const docs = [
54
- { id: "guide", heading: "React Native Bridge", text: "The bridge sends messages between JS and native. You can throttle events..." },
55
- { id: "throttle", heading: "Throttling", text: "Throttling reduces frequency of events..." }
56
+ { id: "guide", namespace: "mobile", heading: "React Native Bridge", text: "The bridge sends messages between JS and native. You can throttle events..." },
57
+ { id: "throttle", namespace: "mobile", heading: "Throttling", text: "Throttling reduces frequency of events..." }
56
58
  ];
57
59
 
58
60
  const bytes = await buildPack(docs);
@@ -64,10 +66,21 @@ const patch = makeContextPatch(hits, { budget: "small" });
64
66
  ### CLI build
65
67
 
66
68
  ```bash
67
- # input docs.json -> output knowledge.knolo
69
+ # lexical-only
68
70
  npx knolo docs.json knowledge.knolo
71
+
72
+ # semantic-enabled build (embeddings JSON + model id)
73
+ npx knolo docs.json knowledge.knolo --embeddings embeddings.json --model-id text-embedding-3-small
74
+
75
+ # embed agents from a local directory (.json/.yml/.yaml)
76
+ npx knolo docs.json knowledge.knolo --agents ./examples/agents
69
77
  ```
70
78
 
79
+
80
+ ### Agents and namespace binding
81
+
82
+ When agent definitions are embedded into `meta.agents`, `resolveAgent(pack, { agentId, query, patch })` enforces **strict namespace binding**: `retrievalDefaults.namespace` always wins over caller `query.namespace`. This keeps retrieval deterministic and on-policy for each agent.
83
+
71
84
  ---
72
85
 
73
86
  ## Concepts
@@ -90,6 +103,7 @@ npx knolo docs.json knowledge.knolo
90
103
  type BuildInputDoc = {
91
104
  id?: string; // exposed later as hit.source
92
105
  heading?: string; // boosts relevance when overlapping query terms
106
+ namespace?: string; // optional namespace for scoped retrieval
93
107
  text: string; // raw markdown accepted (lightly stripped)
94
108
  };
95
109
  ```
@@ -97,7 +111,17 @@ type BuildInputDoc = {
97
111
  ### API
98
112
 
99
113
  ```ts
100
- const bytes: Uint8Array = await buildPack(docs: BuildInputDoc[]);
114
+ const bytes: Uint8Array = await buildPack(docs: BuildInputDoc[], {
115
+ semantic?: {
116
+ enabled: boolean;
117
+ modelId: string;
118
+ embeddings: Float32Array[]; // same length/order as blocks
119
+ quantization?: {
120
+ type: 'int8_l2norm';
121
+ perVectorScale?: true;
122
+ };
123
+ };
124
+ });
101
125
  ```
102
126
 
103
127
  **Tips**
@@ -115,7 +139,30 @@ const bytes: Uint8Array = await buildPack(docs: BuildInputDoc[]);
115
139
  ```ts
116
140
  type QueryOptions = {
117
141
  topK?: number; // default 10
142
+ minScore?: number; // optional absolute score floor
118
143
  requirePhrases?: string[]; // phrases that must appear verbatim
144
+ namespace?: string | string[]; // optional namespace filter(s)
145
+ source?: string | string[]; // optional source/docId filter(s)
146
+ queryExpansion?: {
147
+ enabled?: boolean; // default true
148
+ docs?: number; // top seed docs, default 3
149
+ terms?: number; // expanded lexical terms, default 4
150
+ weight?: number; // tf scaling for expansion terms, default 0.35
151
+ minTermLength?: number; // default 3
152
+ };
153
+ semantic?: {
154
+ enabled?: boolean; // default false
155
+ mode?: "rerank"; // default "rerank"
156
+ topN?: number; // default 50
157
+ minLexConfidence?: number; // default 0.35
158
+ blend?: {
159
+ enabled?: boolean; // default true
160
+ wLex?: number; // default 0.75
161
+ wSem?: number; // default 0.25
162
+ };
163
+ queryEmbedding?: Float32Array; // required if enabled=true
164
+ force?: boolean; // rerank even when lexical confidence is high
165
+ };
119
166
  };
120
167
 
121
168
  type Hit = {
@@ -123,22 +170,50 @@ type Hit = {
123
170
  score: number;
124
171
  text: string;
125
172
  source?: string; // docId if provided
173
+ namespace?: string; // namespace if provided
126
174
  };
127
175
 
128
176
  const hits: Hit[] = query(pack, '“react native bridge” throttling', {
129
177
  topK: 5,
130
- requirePhrases: ["maximum rate"] // hard constraint
178
+ requirePhrases: ["maximum rate"], // hard constraint
179
+ namespace: "mobile",
180
+ source: ["guide", "faq"]
131
181
  });
132
182
  ```
133
183
 
184
+ ### Semantic helper ergonomics
185
+
186
+ ```ts
187
+ import { hasSemantic, validateQueryOptions, validateSemanticQueryOptions } from "knolo-core";
188
+
189
+ if (hasSemantic(pack)) {
190
+ validateQueryOptions({
191
+ topK: 10,
192
+ namespace: "mobile",
193
+ queryExpansion: { enabled: true, docs: 3, terms: 4 },
194
+ });
195
+
196
+ validateSemanticQueryOptions({
197
+ enabled: true,
198
+ topN: 40,
199
+ minLexConfidence: 0.35,
200
+ queryEmbedding,
201
+ });
202
+ }
203
+ ```
204
+
205
+ `validateQueryOptions(...)` and `validateSemanticQueryOptions(...)` throw useful errors for invalid option types/ranges (for example `topK`, `queryExpansion.docs`, `topN`, `minLexConfidence`, blend weights, and missing `Float32Array` embedding types).
206
+
134
207
  **What the ranker does**
135
208
 
136
209
  1. Enforces quoted/required phrases (hard filter)
137
- 2. BM25L with precomputed avg block length
210
+ 2. Corpus-aware BM25L with true IDF, query-time DF collection, and per-block length normalization
138
211
  3. **Proximity bonus** (minimal span cover)
139
212
  4. **Heading overlap** boost
140
- 5. **KNS** tie-breaker (small, deterministic)
141
- 6. **De-dupe + MMR** diversity for final top-K
213
+ 5. Deterministic **pseudo-relevance query expansion** from top lexical seeds
214
+ 6. **KNS** tie-breaker (small, deterministic)
215
+ 7. Optional semantic rerank over lexical top-N when confidence is low
216
+ 8. **De-dupe + MMR** diversity for final top-K
142
217
 
143
218
  ---
144
219
 
@@ -178,6 +253,75 @@ const patch = makeContextPatch(hits, { budget: "mini" | "small" | "full" });
178
253
  query(pack, "throttling", { requirePhrases: ["react native bridge"] });
179
254
  ```
180
255
 
256
+ ### Namespace-scoped retrieval
257
+
258
+ ```ts
259
+ query(pack, "bridge events", { namespace: ["mobile", "sdk"] });
260
+ ```
261
+
262
+ ### Source/docId-scoped retrieval
263
+
264
+ ```ts
265
+ query(pack, "throttling", { source: ["guide", "faq"] });
266
+ ```
267
+
268
+ ### Minimum score threshold
269
+
270
+ ```ts
271
+ query(pack, "throttle bridge", { minScore: 2.5 });
272
+ ```
273
+
274
+ Use this when you prefer precision over recall and only want confident lexical matches.
275
+
276
+ ### Query expansion controls
277
+
278
+ ```ts
279
+ query(pack, "throttle bridge", {
280
+ queryExpansion: { enabled: true, docs: 4, terms: 6, weight: 0.3 }
281
+ });
282
+ ```
283
+
284
+ This keeps retrieval lexical/deterministic while increasing recall for related vocabulary found in top-ranked seed blocks.
285
+
286
+ ### Optional semantic rerank (hybrid MVP)
287
+
288
+ ```ts
289
+ query(pack, "throttle bridge", {
290
+ topK: 5,
291
+ semantic: {
292
+ enabled: true,
293
+ queryEmbedding, // Float32Array from your embedding model (required)
294
+ topN: 50,
295
+ minLexConfidence: 0.35,
296
+ force: false,
297
+ blend: { enabled: true, wLex: 0.75, wSem: 0.25 },
298
+ },
299
+ });
300
+ ```
301
+
302
+ Lexical retrieval still runs first. Semantic rerank only touches top-N lexical candidates, and runs before de-dupe/MMR. If `pack.semantic` is missing, rerank is skipped silently; if `queryEmbedding` is omitted while enabled, `query(...)` throws.
303
+
304
+ Example with explicit validation:
305
+
306
+ ```ts
307
+ validateSemanticQueryOptions({
308
+ enabled: true,
309
+ topN: 64,
310
+ minLexConfidence: 0.25,
311
+ blend: { enabled: true, wLex: 0.7, wSem: 0.3 },
312
+ queryEmbedding,
313
+ });
314
+
315
+ const hits = query(pack, userQuery, {
316
+ semantic: {
317
+ enabled: true,
318
+ queryEmbedding,
319
+ topN: 64,
320
+ minLexConfidence: 0.25,
321
+ },
322
+ });
323
+ ```
324
+
181
325
  ### Tight vs. scattered matches
182
326
 
183
327
  Proximity bonus favors blocks where all query terms co-occur in a small span.
@@ -203,13 +347,14 @@ Top-K results apply near-duplicate suppression (5-gram Jaccard) and MMR (λ≈0.
203
347
  [lexLen:u32][lexicon JSON]
204
348
  [postCount:u32][postings u32[]]
205
349
  [blocksLen:u32][blocks JSON]
350
+ [semLen:u32][semantic JSON][semBlobLen:u32][semantic blob] // optional tail at EOF
206
351
  ```
207
352
 
208
353
  **Meta JSON**
209
354
 
210
355
  ```json
211
356
  {
212
- "version": 2,
357
+ "version": 3,
213
358
  "stats": {
214
359
  "docs": <number>,
215
360
  "blocks": <number>,
@@ -219,6 +364,77 @@ Top-K results apply near-duplicate suppression (5-gram Jaccard) and MMR (λ≈0.
219
364
  }
220
365
  ```
221
366
 
367
+ **Optional semantic tail**
368
+
369
+ * Fully backward compatible: if EOF is reached immediately after `blocks JSON`, no semantic data is present.
370
+ * Semantic tail schema version is `1` (`semantic.version = 1`).
371
+ * `buildPack(..., { semantic })` can now generate this section from provided `Float32Array` embeddings (no model inference at build time).
372
+ * Quantization is deterministic `int8_l2norm` per vector:
373
+ 1. L2-normalize the input embedding.
374
+ 2. Compute `scale = max(abs(e_i)) / 127`.
375
+ 3. Quantize `q_i = clamp(round(e_i / scale), -127..127)`.
376
+ 4. Store scale in `Uint16Array` using float16 encoding.
377
+ * Blob layout is **vectors first, scales second**:
378
+ * `blocks.vectors.byteOffset = 0`
379
+ * `blocks.vectors.length = blockCount * dims` (Int8 elements)
380
+ * `blocks.scales.byteOffset = vectors.byteLength`
381
+ * `blocks.scales.length = blockCount` (Uint16 elements)
382
+
383
+ Semantic JSON schema (stored verbatim in `[semantic JSON]`):
384
+
385
+ ```json
386
+ {
387
+ "version": 1,
388
+ "modelId": "string",
389
+ "dims": 384,
390
+ "encoding": "int8_l2norm",
391
+ "perVectorScale": true,
392
+ "blocks": {
393
+ "vectors": { "byteOffset": 0, "length": 1152 },
394
+ "scales": { "byteOffset": 1152, "length": 3, "encoding": "float16" }
395
+ }
396
+ }
397
+ ```
398
+
399
+ ### Building packs with embeddings (library usage)
400
+
401
+ ```ts
402
+ const embeddings: Float32Array[] = await Promise.all(
403
+ docs.map(async (doc) => embedText(doc.text))
404
+ );
405
+
406
+ const bytes = await buildPack(docs, {
407
+ semantic: {
408
+ enabled: true,
409
+ modelId: "text-embedding-3-small",
410
+ embeddings,
411
+ quantization: { type: "int8_l2norm", perVectorScale: true },
412
+ },
413
+ });
414
+ ```
415
+
416
+ Embedding validation rules:
417
+
418
+ * `embeddings.length` must match block count exactly.
419
+ * every embedding must be `Float32Array`.
420
+ * every vector must have identical `dims`.
421
+
422
+ ### Querying with semantic rerank
423
+
424
+ ```ts
425
+ const queryEmbedding = await embedText(userQuestion);
426
+ const hits = query(pack, userQuestion, {
427
+ topK: 8,
428
+ semantic: {
429
+ enabled: true,
430
+ queryEmbedding,
431
+ topN: 64,
432
+ minLexConfidence: 0.35,
433
+ blend: { enabled: true, wLex: 0.75, wSem: 0.25 },
434
+ },
435
+ });
436
+ ```
437
+
222
438
  **Lexicon JSON**
223
439
 
224
440
  * Array of `[term, termId]` pairs.
@@ -228,15 +444,16 @@ Top-K results apply near-duplicate suppression (5-gram Jaccard) and MMR (λ≈0.
228
444
  * Flattened `Uint32Array`:
229
445
 
230
446
  ```
231
- termId, blockId, pos, pos, …, 0, blockId, …, 0, 0, termId, ...
447
+ termId, blockId+1, pos, pos, …, 0, blockId+1, …, 0, 0, termId, ...
232
448
  ```
233
449
 
234
- Each block section ends with `0`, each term section ends with `0`.
450
+ Block IDs are encoded as `bid + 1` so `0` is reserved as the delimiter. Each block section ends with `0`, each term section ends with `0`.
235
451
 
236
452
  **Blocks JSON (v1 / v2)**
237
453
 
238
454
  * **v1**: `string[]` (text only)
239
455
  * **v2**: `{ text, heading?, docId? }[]`
456
+ * **v3**: `{ text, heading?, docId?, namespace?, len }[]` (`len` is block token length for stable ranking)
240
457
 
241
458
  Runtime auto-detects and exposes:
242
459
 
@@ -244,7 +461,18 @@ Runtime auto-detects and exposes:
244
461
  type Pack = {
245
462
  meta, lexicon, postings, blocks: string[],
246
463
  headings?: (string|null)[],
247
- docIds?: (string|null)[]
464
+ docIds?: (string|null)[],
465
+ namespaces?: (string|null)[],
466
+ blockTokenLens?: number[],
467
+ semantic?: {
468
+ version: 1,
469
+ modelId: string,
470
+ dims: number,
471
+ encoding: "int8_l2norm",
472
+ perVectorScale: boolean,
473
+ vecs: Int8Array,
474
+ scales?: Uint16Array
475
+ }
248
476
  }
249
477
  ```
250
478
 
@@ -319,7 +547,7 @@ npm run smoke
319
547
  ## FAQ
320
548
 
321
549
  **Q: Does this use embeddings or a vector DB?**
322
- A: No—pure lexical retrieval with positions and structural cues.
550
+ A: Default retrieval is lexical. Optional semantic hybrid rerank is supported when packs are built with embeddings; no external vector DB is required.
323
551
 
324
552
  **Q: Why am I still seeing similar results?**
325
553
  A: De-dup suppresses near-duplicates but allows related passages. Increase Jaccard threshold or tune λ (if forking).