knolo-core 0.2.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/DOCS.md CHANGED
@@ -1,7 +1,9 @@
1
1
 
2
2
  # DOCS.md — KnoLo Core
3
3
 
4
- > Deterministic, embedding-free retrieval and portable knowledge packs.
4
+ > Deterministic, embedding-first optional hybrid retrieval and portable knowledge packs.
5
+
6
+ Determinism note: lexical retrieval is deterministic, and semantic rerank is deterministic given the same `.knolo` pack bytes, query embedding model, and embedding provider outputs.
5
7
 
6
8
  ## Table of Contents
7
9
 
@@ -51,8 +53,8 @@ npm install knolo-core
51
53
  import { buildPack, mountPack, query, makeContextPatch } from "knolo-core";
52
54
 
53
55
  const docs = [
54
- { id: "guide", heading: "React Native Bridge", text: "The bridge sends messages between JS and native. You can throttle events..." },
55
- { id: "throttle", heading: "Throttling", text: "Throttling reduces frequency of events..." }
56
+ { id: "guide", namespace: "mobile", heading: "React Native Bridge", text: "The bridge sends messages between JS and native. You can throttle events..." },
57
+ { id: "throttle", namespace: "mobile", heading: "Throttling", text: "Throttling reduces frequency of events..." }
56
58
  ];
57
59
 
58
60
  const bytes = await buildPack(docs);
@@ -64,8 +66,11 @@ const patch = makeContextPatch(hits, { budget: "small" });
64
66
  ### CLI build
65
67
 
66
68
  ```bash
67
- # input docs.json -> output knowledge.knolo
69
+ # lexical-only
68
70
  npx knolo docs.json knowledge.knolo
71
+
72
+ # semantic-enabled build (embeddings JSON + model id)
73
+ npx knolo docs.json knowledge.knolo --embeddings embeddings.json --model-id text-embedding-3-small
69
74
  ```
70
75
 
71
76
  ---
@@ -90,6 +95,7 @@ npx knolo docs.json knowledge.knolo
90
95
  type BuildInputDoc = {
91
96
  id?: string; // exposed later as hit.source
92
97
  heading?: string; // boosts relevance when overlapping query terms
98
+ namespace?: string; // optional namespace for scoped retrieval
93
99
  text: string; // raw markdown accepted (lightly stripped)
94
100
  };
95
101
  ```
@@ -97,7 +103,17 @@ type BuildInputDoc = {
97
103
  ### API
98
104
 
99
105
  ```ts
100
- const bytes: Uint8Array = await buildPack(docs: BuildInputDoc[]);
106
+ const bytes: Uint8Array = await buildPack(docs: BuildInputDoc[], {
107
+ semantic?: {
108
+ enabled: boolean;
109
+ modelId: string;
110
+ embeddings: Float32Array[]; // same length/order as blocks
111
+ quantization?: {
112
+ type: 'int8_l2norm';
113
+ perVectorScale?: true;
114
+ };
115
+ };
116
+ });
101
117
  ```
102
118
 
103
119
  **Tips**
@@ -115,7 +131,30 @@ const bytes: Uint8Array = await buildPack(docs: BuildInputDoc[]);
115
131
  ```ts
116
132
  type QueryOptions = {
117
133
  topK?: number; // default 10
134
+ minScore?: number; // optional absolute score floor
118
135
  requirePhrases?: string[]; // phrases that must appear verbatim
136
+ namespace?: string | string[]; // optional namespace filter(s)
137
+ source?: string | string[]; // optional source/docId filter(s)
138
+ queryExpansion?: {
139
+ enabled?: boolean; // default true
140
+ docs?: number; // top seed docs, default 3
141
+ terms?: number; // expanded lexical terms, default 4
142
+ weight?: number; // tf scaling for expansion terms, default 0.35
143
+ minTermLength?: number; // default 3
144
+ };
145
+ semantic?: {
146
+ enabled?: boolean; // default false
147
+ mode?: "rerank"; // default "rerank"
148
+ topN?: number; // default 50
149
+ minLexConfidence?: number; // default 0.35
150
+ blend?: {
151
+ enabled?: boolean; // default true
152
+ wLex?: number; // default 0.75
153
+ wSem?: number; // default 0.25
154
+ };
155
+ queryEmbedding?: Float32Array; // required if enabled=true
156
+ force?: boolean; // rerank even when lexical confidence is high
157
+ };
119
158
  };
120
159
 
121
160
  type Hit = {
@@ -123,22 +162,44 @@ type Hit = {
123
162
  score: number;
124
163
  text: string;
125
164
  source?: string; // docId if provided
165
+ namespace?: string; // namespace if provided
126
166
  };
127
167
 
128
168
  const hits: Hit[] = query(pack, '“react native bridge” throttling', {
129
169
  topK: 5,
130
- requirePhrases: ["maximum rate"] // hard constraint
170
+ requirePhrases: ["maximum rate"], // hard constraint
171
+ namespace: "mobile",
172
+ source: ["guide", "faq"]
131
173
  });
132
174
  ```
133
175
 
176
+ ### Semantic helper ergonomics
177
+
178
+ ```ts
179
+ import { hasSemantic, validateSemanticQueryOptions } from "knolo-core";
180
+
181
+ if (hasSemantic(pack)) {
182
+ validateSemanticQueryOptions({
183
+ enabled: true,
184
+ topN: 40,
185
+ minLexConfidence: 0.35,
186
+ queryEmbedding,
187
+ });
188
+ }
189
+ ```
190
+
191
+ `validateSemanticQueryOptions(...)` throws useful errors for invalid option types/ranges (`topN`, `minLexConfidence`, blend weights, missing `Float32Array` embedding type).
192
+
134
193
  **What the ranker does**
135
194
 
136
195
  1. Enforces quoted/required phrases (hard filter)
137
- 2. BM25L with precomputed avg block length
196
+ 2. Corpus-aware BM25L with true IDF, query-time DF collection, and per-block length normalization
138
197
  3. **Proximity bonus** (minimal span cover)
139
198
  4. **Heading overlap** boost
140
- 5. **KNS** tie-breaker (small, deterministic)
141
- 6. **De-dupe + MMR** diversity for final top-K
199
+ 5. Deterministic **pseudo-relevance query expansion** from top lexical seeds
200
+ 6. **KNS** tie-breaker (small, deterministic)
201
+ 7. Optional semantic rerank over lexical top-N when confidence is low
202
+ 8. **De-dupe + MMR** diversity for final top-K
142
203
 
143
204
  ---
144
205
 
@@ -178,6 +239,75 @@ const patch = makeContextPatch(hits, { budget: "mini" | "small" | "full" });
178
239
  query(pack, "throttling", { requirePhrases: ["react native bridge"] });
179
240
  ```
180
241
 
242
+ ### Namespace-scoped retrieval
243
+
244
+ ```ts
245
+ query(pack, "bridge events", { namespace: ["mobile", "sdk"] });
246
+ ```
247
+
248
+ ### Source/docId-scoped retrieval
249
+
250
+ ```ts
251
+ query(pack, "throttling", { source: ["guide", "faq"] });
252
+ ```
253
+
254
+ ### Minimum score threshold
255
+
256
+ ```ts
257
+ query(pack, "throttle bridge", { minScore: 2.5 });
258
+ ```
259
+
260
+ Use this when you prefer precision over recall and only want confident lexical matches.
261
+
262
+ ### Query expansion controls
263
+
264
+ ```ts
265
+ query(pack, "throttle bridge", {
266
+ queryExpansion: { enabled: true, docs: 4, terms: 6, weight: 0.3 }
267
+ });
268
+ ```
269
+
270
+ This keeps retrieval lexical/deterministic while increasing recall for related vocabulary found in top-ranked seed blocks.
271
+
272
+ ### Optional semantic rerank (hybrid MVP)
273
+
274
+ ```ts
275
+ query(pack, "throttle bridge", {
276
+ topK: 5,
277
+ semantic: {
278
+ enabled: true,
279
+ queryEmbedding, // Float32Array from your embedding model (required)
280
+ topN: 50,
281
+ minLexConfidence: 0.35,
282
+ force: false,
283
+ blend: { enabled: true, wLex: 0.75, wSem: 0.25 },
284
+ },
285
+ });
286
+ ```
287
+
288
+ Lexical retrieval still runs first. Semantic rerank only touches top-N lexical candidates, and runs before de-dupe/MMR. If `pack.semantic` is missing, rerank is skipped silently; if `queryEmbedding` is omitted while enabled, `query(...)` throws.
289
+
290
+ Example with explicit validation:
291
+
292
+ ```ts
293
+ validateSemanticQueryOptions({
294
+ enabled: true,
295
+ topN: 64,
296
+ minLexConfidence: 0.25,
297
+ blend: { enabled: true, wLex: 0.7, wSem: 0.3 },
298
+ queryEmbedding,
299
+ });
300
+
301
+ const hits = query(pack, userQuery, {
302
+ semantic: {
303
+ enabled: true,
304
+ queryEmbedding,
305
+ topN: 64,
306
+ minLexConfidence: 0.25,
307
+ },
308
+ });
309
+ ```
310
+
181
311
  ### Tight vs. scattered matches
182
312
 
183
313
  Proximity bonus favors blocks where all query terms co-occur in a small span.
@@ -203,13 +333,14 @@ Top-K results apply near-duplicate suppression (5-gram Jaccard) and MMR (λ≈0.
203
333
  [lexLen:u32][lexicon JSON]
204
334
  [postCount:u32][postings u32[]]
205
335
  [blocksLen:u32][blocks JSON]
336
+ [semLen:u32][semantic JSON][semBlobLen:u32][semantic blob] // optional tail at EOF
206
337
  ```
207
338
 
208
339
  **Meta JSON**
209
340
 
210
341
  ```json
211
342
  {
212
- "version": 2,
343
+ "version": 3,
213
344
  "stats": {
214
345
  "docs": <number>,
215
346
  "blocks": <number>,
@@ -219,6 +350,77 @@ Top-K results apply near-duplicate suppression (5-gram Jaccard) and MMR (λ≈0.
219
350
  }
220
351
  ```
221
352
 
353
+ **Optional semantic tail**
354
+
355
+ * Fully backward compatible: if EOF is reached immediately after `blocks JSON`, no semantic data is present.
356
+ * Semantic tail schema version is `1` (`semantic.version = 1`).
357
+ * `buildPack(..., { semantic })` can now generate this section from provided `Float32Array` embeddings (no model inference at build time).
358
+ * Quantization is deterministic `int8_l2norm` per vector:
359
+ 1. L2-normalize the input embedding.
360
+ 2. Compute `scale = max(abs(e_i)) / 127`.
361
+ 3. Quantize `q_i = clamp(round(e_i / scale), -127..127)`.
362
+ 4. Store scale in `Uint16Array` using float16 encoding.
363
+ * Blob layout is **vectors first, scales second**:
364
+ * `blocks.vectors.byteOffset = 0`
365
+ * `blocks.vectors.length = blockCount * dims` (Int8 elements)
366
+ * `blocks.scales.byteOffset = vectors.byteLength`
367
+ * `blocks.scales.length = blockCount` (Uint16 elements)
368
+
369
+ Semantic JSON schema (stored verbatim in `[semantic JSON]`):
370
+
371
+ ```json
372
+ {
373
+ "version": 1,
374
+ "modelId": "string",
375
+ "dims": 384,
376
+ "encoding": "int8_l2norm",
377
+ "perVectorScale": true,
378
+ "blocks": {
379
+ "vectors": { "byteOffset": 0, "length": 1152 },
380
+ "scales": { "byteOffset": 1152, "length": 3, "encoding": "float16" }
381
+ }
382
+ }
383
+ ```
384
+
385
+ ### Building packs with embeddings (library usage)
386
+
387
+ ```ts
388
+ const embeddings: Float32Array[] = await Promise.all(
389
+ docs.map(async (doc) => embedText(doc.text))
390
+ );
391
+
392
+ const bytes = await buildPack(docs, {
393
+ semantic: {
394
+ enabled: true,
395
+ modelId: "text-embedding-3-small",
396
+ embeddings,
397
+ quantization: { type: "int8_l2norm", perVectorScale: true },
398
+ },
399
+ });
400
+ ```
401
+
402
+ Embedding validation rules:
403
+
404
+ * `embeddings.length` must match block count exactly.
405
+ * every embedding must be `Float32Array`.
406
+ * every vector must have identical `dims`.
407
+
408
+ ### Querying with semantic rerank
409
+
410
+ ```ts
411
+ const queryEmbedding = await embedText(userQuestion);
412
+ const hits = query(pack, userQuestion, {
413
+ topK: 8,
414
+ semantic: {
415
+ enabled: true,
416
+ queryEmbedding,
417
+ topN: 64,
418
+ minLexConfidence: 0.35,
419
+ blend: { enabled: true, wLex: 0.75, wSem: 0.25 },
420
+ },
421
+ });
422
+ ```
423
+
222
424
  **Lexicon JSON**
223
425
 
224
426
  * Array of `[term, termId]` pairs.
@@ -228,15 +430,16 @@ Top-K results apply near-duplicate suppression (5-gram Jaccard) and MMR (λ≈0.
228
430
  * Flattened `Uint32Array`:
229
431
 
230
432
  ```
231
- termId, blockId, pos, pos, …, 0, blockId, …, 0, 0, termId, ...
433
+ termId, blockId+1, pos, pos, …, 0, blockId+1, …, 0, 0, termId, ...
232
434
  ```
233
435
 
234
- Each block section ends with `0`, each term section ends with `0`.
436
+ Block IDs are encoded as `bid + 1` so `0` is reserved as the delimiter. Each block section ends with `0`, each term section ends with `0`.
235
437
 
236
438
  **Blocks JSON (v1 / v2)**
237
439
 
238
440
  * **v1**: `string[]` (text only)
239
441
  * **v2**: `{ text, heading?, docId? }[]`
442
+ * **v3**: `{ text, heading?, docId?, namespace?, len }[]` (`len` is block token length for stable ranking)
240
443
 
241
444
  Runtime auto-detects and exposes:
242
445
 
@@ -244,7 +447,18 @@ Runtime auto-detects and exposes:
244
447
  type Pack = {
245
448
  meta, lexicon, postings, blocks: string[],
246
449
  headings?: (string|null)[],
247
- docIds?: (string|null)[]
450
+ docIds?: (string|null)[],
451
+ namespaces?: (string|null)[],
452
+ blockTokenLens?: number[],
453
+ semantic?: {
454
+ version: 1,
455
+ modelId: string,
456
+ dims: number,
457
+ encoding: "int8_l2norm",
458
+ perVectorScale: boolean,
459
+ vecs: Int8Array,
460
+ scales?: Uint16Array
461
+ }
248
462
  }
249
463
  ```
250
464
 
@@ -319,7 +533,7 @@ npm run smoke
319
533
  ## FAQ
320
534
 
321
535
  **Q: Does this use embeddings or a vector DB?**
322
- A: No—pure lexical retrieval with positions and structural cues.
536
+ A: Default retrieval is lexical. Optional semantic hybrid rerank is supported when packs are built with embeddings; no external vector DB is required.
323
537
 
324
538
  **Q: Why am I still seeing similar results?**
325
539
  A: De-dup suppresses near-duplicates but allows related passages. Increase Jaccard threshold or tune λ (if forking).