knolo-core 0.3.0 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/DOCS.md +16 -2
- package/README.md +157 -43
- package/bin/knolo.mjs +256 -45
- package/dist/agent.d.ts +53 -0
- package/dist/agent.js +175 -0
- package/dist/builder.d.ts +2 -0
- package/dist/builder.js +28 -6
- package/dist/index.d.ts +4 -2
- package/dist/index.js +3 -2
- package/dist/pack.d.ts +2 -0
- package/dist/pack.js +22 -3
- package/dist/query.d.ts +1 -0
- package/dist/query.js +43 -1
- package/package.json +4 -2
package/DOCS.md
CHANGED
|
@@ -71,8 +71,16 @@ npx knolo docs.json knowledge.knolo
|
|
|
71
71
|
|
|
72
72
|
# semantic-enabled build (embeddings JSON + model id)
|
|
73
73
|
npx knolo docs.json knowledge.knolo --embeddings embeddings.json --model-id text-embedding-3-small
|
|
74
|
+
|
|
75
|
+
# embed agents from a local directory (.json/.yml/.yaml)
|
|
76
|
+
npx knolo docs.json knowledge.knolo --agents ./examples/agents
|
|
74
77
|
```
|
|
75
78
|
|
|
79
|
+
|
|
80
|
+
### Agents and namespace binding
|
|
81
|
+
|
|
82
|
+
When agent definitions are embedded into `meta.agents`, `resolveAgent(pack, { agentId, query, patch })` enforces **strict namespace binding**: `retrievalDefaults.namespace` always wins over caller `query.namespace`. This keeps retrieval deterministic and on-policy for each agent.
|
|
83
|
+
|
|
76
84
|
---
|
|
77
85
|
|
|
78
86
|
## Concepts
|
|
@@ -176,9 +184,15 @@ const hits: Hit[] = query(pack, '“react native bridge” throttling', {
|
|
|
176
184
|
### Semantic helper ergonomics
|
|
177
185
|
|
|
178
186
|
```ts
|
|
179
|
-
import { hasSemantic, validateSemanticQueryOptions } from "knolo-core";
|
|
187
|
+
import { hasSemantic, validateQueryOptions, validateSemanticQueryOptions } from "knolo-core";
|
|
180
188
|
|
|
181
189
|
if (hasSemantic(pack)) {
|
|
190
|
+
validateQueryOptions({
|
|
191
|
+
topK: 10,
|
|
192
|
+
namespace: "mobile",
|
|
193
|
+
queryExpansion: { enabled: true, docs: 3, terms: 4 },
|
|
194
|
+
});
|
|
195
|
+
|
|
182
196
|
validateSemanticQueryOptions({
|
|
183
197
|
enabled: true,
|
|
184
198
|
topN: 40,
|
|
@@ -188,7 +202,7 @@ if (hasSemantic(pack)) {
|
|
|
188
202
|
}
|
|
189
203
|
```
|
|
190
204
|
|
|
191
|
-
`validateSemanticQueryOptions(...)`
|
|
205
|
+
`validateQueryOptions(...)` and `validateSemanticQueryOptions(...)` throw useful errors for invalid option types/ranges (for example `topK`, `queryExpansion.docs`, `topN`, `minLexConfidence`, blend weights, and missing `Float32Array` embedding types).
|
|
192
206
|
|
|
193
207
|
**What the ranker does**
|
|
194
208
|
|
package/README.md
CHANGED
|
@@ -59,21 +59,21 @@ npm run build
|
|
|
59
59
|
### 1) Build + mount + query
|
|
60
60
|
|
|
61
61
|
```ts
|
|
62
|
-
import { buildPack, mountPack, query, makeContextPatch } from
|
|
62
|
+
import { buildPack, mountPack, query, makeContextPatch } from 'knolo-core';
|
|
63
63
|
|
|
64
64
|
const docs = [
|
|
65
65
|
{
|
|
66
|
-
id:
|
|
67
|
-
namespace:
|
|
68
|
-
heading:
|
|
69
|
-
text:
|
|
66
|
+
id: 'bridge-guide',
|
|
67
|
+
namespace: 'mobile',
|
|
68
|
+
heading: 'React Native Bridge',
|
|
69
|
+
text: 'The bridge sends messages between JS and native modules. Throttling limits event frequency.',
|
|
70
70
|
},
|
|
71
71
|
{
|
|
72
|
-
id:
|
|
73
|
-
namespace:
|
|
74
|
-
heading:
|
|
75
|
-
text:
|
|
76
|
-
}
|
|
72
|
+
id: 'perf-notes',
|
|
73
|
+
namespace: 'mobile',
|
|
74
|
+
heading: 'Debounce vs Throttle',
|
|
75
|
+
text: 'Debounce waits for silence; throttle enforces a maximum trigger rate.',
|
|
76
|
+
},
|
|
77
77
|
];
|
|
78
78
|
|
|
79
79
|
const bytes = await buildPack(docs);
|
|
@@ -81,11 +81,11 @@ const kb = await mountPack({ src: bytes });
|
|
|
81
81
|
|
|
82
82
|
const hits = query(kb, '"react native" throttle', {
|
|
83
83
|
topK: 5,
|
|
84
|
-
requirePhrases: [
|
|
85
|
-
namespace:
|
|
84
|
+
requirePhrases: ['maximum trigger rate'],
|
|
85
|
+
namespace: 'mobile',
|
|
86
86
|
});
|
|
87
87
|
|
|
88
|
-
const patch = makeContextPatch(hits, { budget:
|
|
88
|
+
const patch = makeContextPatch(hits, { budget: 'small' });
|
|
89
89
|
console.log(hits, patch);
|
|
90
90
|
```
|
|
91
91
|
|
|
@@ -95,22 +95,33 @@ console.log(hits, patch);
|
|
|
95
95
|
|
|
96
96
|
```json
|
|
97
97
|
[
|
|
98
|
-
{
|
|
99
|
-
|
|
98
|
+
{
|
|
99
|
+
"id": "guide",
|
|
100
|
+
"heading": "Guide",
|
|
101
|
+
"text": "Install deps.\n\n## Throttle\nLimit event frequency."
|
|
102
|
+
},
|
|
103
|
+
{
|
|
104
|
+
"id": "faq",
|
|
105
|
+
"heading": "FAQ",
|
|
106
|
+
"text": "What is throttling? It reduces event frequency."
|
|
107
|
+
}
|
|
100
108
|
]
|
|
101
109
|
```
|
|
102
110
|
|
|
103
111
|
```bash
|
|
104
112
|
npx knolo docs.json knowledge.knolo
|
|
113
|
+
|
|
114
|
+
# embed agents from a local directory (.json/.yml/.yaml)
|
|
115
|
+
npx knolo docs.json knowledge.knolo --agents ./examples/agents
|
|
105
116
|
```
|
|
106
117
|
|
|
107
118
|
Then query in app:
|
|
108
119
|
|
|
109
120
|
```ts
|
|
110
|
-
import { mountPack, query } from
|
|
121
|
+
import { mountPack, query } from 'knolo-core';
|
|
111
122
|
|
|
112
|
-
const kb = await mountPack({ src:
|
|
113
|
-
const hits = query(kb,
|
|
123
|
+
const kb = await mountPack({ src: './knowledge.knolo' });
|
|
124
|
+
const hits = query(kb, 'throttle events', { topK: 3 });
|
|
114
125
|
```
|
|
115
126
|
|
|
116
127
|
---
|
|
@@ -122,7 +133,7 @@ KnoLo’s core retrieval remains lexical-first and deterministic. Semantic signa
|
|
|
122
133
|
### Build a semantic-enabled pack
|
|
123
134
|
|
|
124
135
|
```ts
|
|
125
|
-
import { buildPack } from
|
|
136
|
+
import { buildPack } from 'knolo-core';
|
|
126
137
|
|
|
127
138
|
// embeddings must align 1:1 with docs/block order
|
|
128
139
|
const embeddings: Float32Array[] = await embedDocumentsInOrder(docs);
|
|
@@ -130,32 +141,32 @@ const embeddings: Float32Array[] = await embedDocumentsInOrder(docs);
|
|
|
130
141
|
const bytes = await buildPack(docs, {
|
|
131
142
|
semantic: {
|
|
132
143
|
enabled: true,
|
|
133
|
-
modelId:
|
|
144
|
+
modelId: 'text-embedding-3-small',
|
|
134
145
|
embeddings,
|
|
135
|
-
quantization: { type:
|
|
136
|
-
}
|
|
146
|
+
quantization: { type: 'int8_l2norm', perVectorScale: true },
|
|
147
|
+
},
|
|
137
148
|
});
|
|
138
149
|
```
|
|
139
150
|
|
|
140
151
|
### Query with semantic rerank
|
|
141
152
|
|
|
142
153
|
```ts
|
|
143
|
-
import { mountPack, query, hasSemantic } from
|
|
154
|
+
import { mountPack, query, hasSemantic } from 'knolo-core';
|
|
144
155
|
|
|
145
156
|
const kb = await mountPack({ src: bytes });
|
|
146
|
-
const queryEmbedding = await embedQuery(
|
|
157
|
+
const queryEmbedding = await embedQuery('react native bridge throttling');
|
|
147
158
|
|
|
148
|
-
const hits = query(kb,
|
|
159
|
+
const hits = query(kb, 'react native bridge throttling', {
|
|
149
160
|
topK: 8,
|
|
150
161
|
semantic: {
|
|
151
162
|
enabled: hasSemantic(kb),
|
|
152
|
-
mode:
|
|
163
|
+
mode: 'rerank',
|
|
153
164
|
topN: 50,
|
|
154
165
|
minLexConfidence: 0.35,
|
|
155
166
|
blend: { enabled: true, wLex: 0.75, wSem: 0.25 },
|
|
156
167
|
queryEmbedding,
|
|
157
|
-
force: false
|
|
158
|
-
}
|
|
168
|
+
force: false,
|
|
169
|
+
},
|
|
159
170
|
});
|
|
160
171
|
```
|
|
161
172
|
|
|
@@ -165,8 +176,8 @@ const hits = query(kb, "react native bridge throttling", {
|
|
|
165
176
|
import {
|
|
166
177
|
quantizeEmbeddingInt8L2Norm,
|
|
167
178
|
encodeScaleF16,
|
|
168
|
-
decodeScaleF16
|
|
169
|
-
} from
|
|
179
|
+
decodeScaleF16,
|
|
180
|
+
} from 'knolo-core';
|
|
170
181
|
|
|
171
182
|
const { q, scale } = quantizeEmbeddingInt8L2Norm(queryEmbedding);
|
|
172
183
|
const packed = encodeScaleF16(scale);
|
|
@@ -188,25 +199,75 @@ type BuildInputDoc = {
|
|
|
188
199
|
};
|
|
189
200
|
|
|
190
201
|
type BuildPackOptions = {
|
|
202
|
+
agents?: AgentRegistry | AgentDefinitionV1[];
|
|
191
203
|
semantic?: {
|
|
192
204
|
enabled: boolean;
|
|
193
205
|
modelId: string;
|
|
194
206
|
embeddings: Float32Array[];
|
|
195
207
|
quantization?: {
|
|
196
|
-
type:
|
|
208
|
+
type: 'int8_l2norm';
|
|
197
209
|
perVectorScale?: true;
|
|
198
210
|
};
|
|
199
211
|
};
|
|
200
212
|
};
|
|
201
213
|
```
|
|
202
214
|
|
|
215
|
+
### Agents in pack metadata
|
|
216
|
+
|
|
217
|
+
Agents are optional and embedded in `meta.agents` so a single `.knolo` artifact can ship retrieval behavior + prompt defaults on-prem. Agent registries are validated once at `mountPack()` time, so invalid embedded registries fail fast during mount.
|
|
218
|
+
|
|
219
|
+
Agent namespace binding is **strict**: when `resolveAgent()` composes retrieval options, `retrievalDefaults.namespace` is always enforced and caller-provided `query.namespace` is ignored.
|
|
220
|
+
|
|
221
|
+
```ts
|
|
222
|
+
type AgentPromptTemplate = string[] | { format: 'markdown'; template: string };
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
type AgentRegistry = {
|
|
226
|
+
version: 1;
|
|
227
|
+
agents: AgentDefinitionV1[];
|
|
228
|
+
};
|
|
229
|
+
|
|
230
|
+
type PackMeta = {
|
|
231
|
+
version: number;
|
|
232
|
+
stats: { docs: number; blocks: number; terms: number; avgBlockLen?: number };
|
|
233
|
+
agents?: AgentRegistry;
|
|
234
|
+
};
|
|
235
|
+
|
|
236
|
+
type AgentDefinitionV1 = {
|
|
237
|
+
id: string;
|
|
238
|
+
version: 1;
|
|
239
|
+
name?: string;
|
|
240
|
+
description?: string;
|
|
241
|
+
systemPrompt: AgentPromptTemplate;
|
|
242
|
+
retrievalDefaults: {
|
|
243
|
+
namespace: string[]; // required
|
|
244
|
+
topK?: number;
|
|
245
|
+
queryExpansion?: QueryOptions['queryExpansion'];
|
|
246
|
+
semantic?: Omit<
|
|
247
|
+
NonNullable<QueryOptions['semantic']>,
|
|
248
|
+
'queryEmbedding' | 'enabled' | 'force'
|
|
249
|
+
> & { enabled?: boolean };
|
|
250
|
+
minScore?: number;
|
|
251
|
+
requirePhrases?: string[];
|
|
252
|
+
source?: string[];
|
|
253
|
+
};
|
|
254
|
+
toolPolicy?: { mode: 'allow' | 'deny'; tools: string[] };
|
|
255
|
+
metadata?: Record<string, string | number | boolean | null>;
|
|
256
|
+
};
|
|
257
|
+
```
|
|
258
|
+
|
|
203
259
|
### `mountPack({ src }) => Promise<Pack>`
|
|
204
260
|
|
|
205
261
|
```ts
|
|
206
262
|
type Pack = {
|
|
207
263
|
meta: {
|
|
208
264
|
version: number;
|
|
209
|
-
stats: {
|
|
265
|
+
stats: {
|
|
266
|
+
docs: number;
|
|
267
|
+
blocks: number;
|
|
268
|
+
terms: number;
|
|
269
|
+
avgBlockLen?: number;
|
|
270
|
+
};
|
|
210
271
|
};
|
|
211
272
|
lexicon: Map<string, number>;
|
|
212
273
|
postings: Uint32Array;
|
|
@@ -219,7 +280,7 @@ type Pack = {
|
|
|
219
280
|
version: 1;
|
|
220
281
|
modelId: string;
|
|
221
282
|
dims: number;
|
|
222
|
-
encoding:
|
|
283
|
+
encoding: 'int8_l2norm';
|
|
223
284
|
perVectorScale: boolean;
|
|
224
285
|
vecs: Int8Array;
|
|
225
286
|
scales?: Uint16Array;
|
|
@@ -245,7 +306,7 @@ type QueryOptions = {
|
|
|
245
306
|
};
|
|
246
307
|
semantic?: {
|
|
247
308
|
enabled?: boolean;
|
|
248
|
-
mode?:
|
|
309
|
+
mode?: 'rerank';
|
|
249
310
|
topN?: number;
|
|
250
311
|
minLexConfidence?: number;
|
|
251
312
|
blend?: {
|
|
@@ -267,6 +328,59 @@ type Hit = {
|
|
|
267
328
|
};
|
|
268
329
|
```
|
|
269
330
|
|
|
331
|
+
### Agent runtime helpers
|
|
332
|
+
|
|
333
|
+
- `listAgents(pack) => string[]`
|
|
334
|
+
- `getAgent(pack, agentId) => AgentDefinitionV1 | undefined`
|
|
335
|
+
- `resolveAgent(pack, { agentId, query?, patch? }) => { agent, systemPrompt, retrievalOptions }`
|
|
336
|
+
- `buildSystemPrompt(agent, patch?) => string`
|
|
337
|
+
- `isToolAllowed(agent, toolId) => boolean` (defaults to allow-all when no `toolPolicy`)
|
|
338
|
+
- `assertToolAllowed(agent, toolId) => void` (throws deterministic error when blocked)
|
|
339
|
+
|
|
340
|
+
### Build a pack with agents and resolve at runtime
|
|
341
|
+
|
|
342
|
+
```ts
|
|
343
|
+
import {
|
|
344
|
+
buildPack,
|
|
345
|
+
mountPack,
|
|
346
|
+
resolveAgent,
|
|
347
|
+
query,
|
|
348
|
+
isToolAllowed,
|
|
349
|
+
assertToolAllowed,
|
|
350
|
+
} from 'knolo-core';
|
|
351
|
+
|
|
352
|
+
const bytes = await buildPack(docs, {
|
|
353
|
+
agents: [
|
|
354
|
+
{
|
|
355
|
+
id: 'mobile.agent',
|
|
356
|
+
version: 1,
|
|
357
|
+
systemPrompt: {
|
|
358
|
+
format: 'markdown',
|
|
359
|
+
template: 'You are {{team}} support.',
|
|
360
|
+
},
|
|
361
|
+
retrievalDefaults: { namespace: ['mobile'], topK: 5 },
|
|
362
|
+
toolPolicy: { mode: 'allow', tools: ['search_docs'] },
|
|
363
|
+
},
|
|
364
|
+
],
|
|
365
|
+
});
|
|
366
|
+
|
|
367
|
+
const pack = await mountPack({ src: bytes });
|
|
368
|
+
const resolved = resolveAgent(pack, {
|
|
369
|
+
agentId: 'mobile.agent',
|
|
370
|
+
patch: { team: 'mobile' },
|
|
371
|
+
query: { namespace: ['backend'], topK: 8 },
|
|
372
|
+
});
|
|
373
|
+
|
|
374
|
+
console.log(resolved.retrievalOptions.namespace); // ['mobile'] (strict binding)
|
|
375
|
+
|
|
376
|
+
if (isToolAllowed(resolved.agent, 'search_docs')) {
|
|
377
|
+
// invoke search_docs
|
|
378
|
+
}
|
|
379
|
+
assertToolAllowed(resolved.agent, 'search_docs');
|
|
380
|
+
|
|
381
|
+
const hits = query(pack, 'bridge throttle', resolved.retrievalOptions);
|
|
382
|
+
```
|
|
383
|
+
|
|
270
384
|
### `makeContextPatch(hits, { budget }) => ContextPatch`
|
|
271
385
|
|
|
272
386
|
Budgets: `"mini" | "small" | "full"`
|
|
@@ -278,10 +392,10 @@ Budgets: `"mini" | "small" | "full"`
|
|
|
278
392
|
### Namespace + source filtering
|
|
279
393
|
|
|
280
394
|
```ts
|
|
281
|
-
const hits = query(kb,
|
|
282
|
-
namespace: [
|
|
283
|
-
source: [
|
|
284
|
-
topK: 6
|
|
395
|
+
const hits = query(kb, 'retry backoff', {
|
|
396
|
+
namespace: ['sdk', 'api'],
|
|
397
|
+
source: ['errors-guide', 'http-reference'],
|
|
398
|
+
topK: 6,
|
|
285
399
|
});
|
|
286
400
|
```
|
|
287
401
|
|
|
@@ -290,29 +404,29 @@ const hits = query(kb, "retry backoff", {
|
|
|
290
404
|
If your query has no free tokens but includes required phrases, KnoLo still forms candidates from phrase tokens and enforces phrase presence.
|
|
291
405
|
|
|
292
406
|
```ts
|
|
293
|
-
const hits = query(kb, '"event loop"', { requirePhrases: [
|
|
407
|
+
const hits = query(kb, '"event loop"', { requirePhrases: ['single thread'] });
|
|
294
408
|
```
|
|
295
409
|
|
|
296
410
|
### Precision mode with minimum score
|
|
297
411
|
|
|
298
412
|
```ts
|
|
299
|
-
const strictHits = query(kb,
|
|
413
|
+
const strictHits = query(kb, 'jwt refresh token rotation', {
|
|
300
414
|
topK: 5,
|
|
301
|
-
minScore: 2.5
|
|
415
|
+
minScore: 2.5,
|
|
302
416
|
});
|
|
303
417
|
```
|
|
304
418
|
|
|
305
419
|
### Validate semantic query options early
|
|
306
420
|
|
|
307
421
|
```ts
|
|
308
|
-
import { validateSemanticQueryOptions } from
|
|
422
|
+
import { validateSemanticQueryOptions } from 'knolo-core';
|
|
309
423
|
|
|
310
424
|
validateSemanticQueryOptions({
|
|
311
425
|
enabled: true,
|
|
312
426
|
topN: 40,
|
|
313
427
|
minLexConfidence: 0.3,
|
|
314
428
|
blend: { enabled: true, wLex: 0.8, wSem: 0.2 },
|
|
315
|
-
queryEmbedding
|
|
429
|
+
queryEmbedding,
|
|
316
430
|
});
|
|
317
431
|
```
|
|
318
432
|
|
package/bin/knolo.mjs
CHANGED
|
@@ -1,14 +1,111 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
// Robust CLI that works with ESM or CJS builds and odd resolution cases.
|
|
3
3
|
|
|
4
|
-
import { readFileSync, writeFileSync } from
|
|
5
|
-
import path from
|
|
6
|
-
import { fileURLToPath, pathToFileURL } from
|
|
7
|
-
import { createRequire } from
|
|
4
|
+
import { readFileSync, writeFileSync, readdirSync, statSync } from 'node:fs';
|
|
5
|
+
import path from 'node:path';
|
|
6
|
+
import { fileURLToPath, pathToFileURL } from 'node:url';
|
|
7
|
+
import { createRequire } from 'node:module';
|
|
8
8
|
|
|
9
9
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
10
10
|
const require = createRequire(import.meta.url);
|
|
11
11
|
|
|
12
|
+
function parseScalar(value) {
|
|
13
|
+
if (value === 'true') return true;
|
|
14
|
+
if (value === 'false') return false;
|
|
15
|
+
if (value === 'null') return null;
|
|
16
|
+
if (/^-?\d+(?:\.\d+)?$/.test(value)) return Number(value);
|
|
17
|
+
const quoted = value.match(/^("|')(.*)\1$/);
|
|
18
|
+
if (quoted) return quoted[2];
|
|
19
|
+
return value;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function parseSimpleYaml(content) {
|
|
23
|
+
const lines = content
|
|
24
|
+
.split(/\n/)
|
|
25
|
+
.map((line) => line.replace(/\r$/, ''))
|
|
26
|
+
.map((line) => line.replace(/\t/g, ' ').replace(/\s+#.*$/, ''));
|
|
27
|
+
|
|
28
|
+
const root = {};
|
|
29
|
+
const stack = [{ indent: -1, value: root }];
|
|
30
|
+
|
|
31
|
+
const nextMeaningfulLine = (from) => {
|
|
32
|
+
for (let i = from + 1; i < lines.length; i++) {
|
|
33
|
+
if (lines[i].trim()) return lines[i].trim();
|
|
34
|
+
}
|
|
35
|
+
return '';
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
for (let i = 0; i < lines.length; i++) {
|
|
39
|
+
const raw = lines[i];
|
|
40
|
+
if (!raw.trim()) continue;
|
|
41
|
+
|
|
42
|
+
const indent = raw.match(/^\s*/)[0].length;
|
|
43
|
+
const line = raw.trim();
|
|
44
|
+
|
|
45
|
+
while (stack.length > 1 && indent <= stack[stack.length - 1].indent) {
|
|
46
|
+
stack.pop();
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const parent = stack[stack.length - 1].value;
|
|
50
|
+
|
|
51
|
+
if (line.startsWith('- ')) {
|
|
52
|
+
if (!Array.isArray(parent)) {
|
|
53
|
+
throw new Error('YAML array item found under non-array parent.');
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
const body = line.slice(2).trim();
|
|
57
|
+
const pair = body.match(/^([^:]+):(.*)$/);
|
|
58
|
+
if (!pair) {
|
|
59
|
+
parent.push(parseScalar(body));
|
|
60
|
+
continue;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
const obj = {};
|
|
64
|
+
const key = pair[1].trim();
|
|
65
|
+
const rhs = pair[2].trim();
|
|
66
|
+
if (rhs) {
|
|
67
|
+
obj[key] = parseScalar(rhs);
|
|
68
|
+
} else {
|
|
69
|
+
const next = nextMeaningfulLine(i);
|
|
70
|
+
obj[key] = next.startsWith('- ') ? [] : {};
|
|
71
|
+
stack.push({ indent, value: obj[key] });
|
|
72
|
+
}
|
|
73
|
+
parent.push(obj);
|
|
74
|
+
continue;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const pair = line.match(/^([^:]+):(.*)$/);
|
|
78
|
+
if (!pair) {
|
|
79
|
+
throw new Error(`Unsupported YAML line: ${line}`);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const key = pair[1].trim();
|
|
83
|
+
const rhs = pair[2].trim();
|
|
84
|
+
if (Array.isArray(parent)) {
|
|
85
|
+
const obj = {};
|
|
86
|
+
parent.push(obj);
|
|
87
|
+
if (rhs) {
|
|
88
|
+
obj[key] = parseScalar(rhs);
|
|
89
|
+
} else {
|
|
90
|
+
const next = nextMeaningfulLine(i);
|
|
91
|
+
obj[key] = next.startsWith('- ') ? [] : {};
|
|
92
|
+
stack.push({ indent, value: obj[key] });
|
|
93
|
+
}
|
|
94
|
+
continue;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
if (rhs) {
|
|
98
|
+
parent[key] = parseScalar(rhs);
|
|
99
|
+
} else {
|
|
100
|
+
const next = nextMeaningfulLine(i);
|
|
101
|
+
parent[key] = next.startsWith('- ') ? [] : {};
|
|
102
|
+
stack.push({ indent, value: parent[key] });
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
return root;
|
|
107
|
+
}
|
|
108
|
+
|
|
12
109
|
async function tryImport(filePath) {
|
|
13
110
|
try {
|
|
14
111
|
const url = pathToFileURL(filePath).href;
|
|
@@ -20,43 +117,57 @@ async function tryImport(filePath) {
|
|
|
20
117
|
return null;
|
|
21
118
|
}
|
|
22
119
|
|
|
23
|
-
function
|
|
24
|
-
if (!mod) return
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
120
|
+
function pickBuildExports(mod) {
|
|
121
|
+
if (!mod) return null;
|
|
122
|
+
const root = mod.default && typeof mod.default === 'object' ? mod.default : mod;
|
|
123
|
+
const buildPack =
|
|
124
|
+
typeof mod.buildPack === 'function'
|
|
125
|
+
? mod.buildPack
|
|
126
|
+
: typeof root.buildPack === 'function'
|
|
127
|
+
? root.buildPack
|
|
128
|
+
: typeof root === 'function'
|
|
129
|
+
? root
|
|
130
|
+
: undefined;
|
|
131
|
+
const validateAgentDefinition =
|
|
132
|
+
typeof mod.validateAgentDefinition === 'function'
|
|
133
|
+
? mod.validateAgentDefinition
|
|
134
|
+
: root.validateAgentDefinition;
|
|
135
|
+
const validateAgentRegistry =
|
|
136
|
+
typeof mod.validateAgentRegistry === 'function'
|
|
137
|
+
? mod.validateAgentRegistry
|
|
138
|
+
: root.validateAgentRegistry;
|
|
139
|
+
|
|
140
|
+
if (!buildPack) return null;
|
|
141
|
+
return { buildPack, validateAgentDefinition, validateAgentRegistry };
|
|
33
142
|
}
|
|
34
143
|
|
|
35
|
-
async function
|
|
144
|
+
async function loadBuildExports() {
|
|
36
145
|
const candidates = [
|
|
37
|
-
path.resolve(__dirname,
|
|
38
|
-
path.resolve(__dirname,
|
|
39
|
-
path.resolve(__dirname,
|
|
40
|
-
path.resolve(__dirname,
|
|
146
|
+
path.resolve(__dirname, '../dist/index.js'),
|
|
147
|
+
path.resolve(__dirname, '../dist/builder.js'),
|
|
148
|
+
path.resolve(__dirname, '../dist/index.cjs'),
|
|
149
|
+
path.resolve(__dirname, '../dist/builder.cjs'),
|
|
41
150
|
];
|
|
42
151
|
for (const p of candidates) {
|
|
43
152
|
const mod = await tryImport(p);
|
|
44
|
-
const
|
|
45
|
-
if (
|
|
153
|
+
const exports = pickBuildExports(mod);
|
|
154
|
+
if (exports) return exports;
|
|
46
155
|
}
|
|
47
|
-
throw new Error(
|
|
156
|
+
throw new Error('Could not locate a buildPack function in dist/');
|
|
48
157
|
}
|
|
49
158
|
|
|
50
159
|
function validateCliDocs(raw) {
|
|
51
160
|
if (!Array.isArray(raw)) {
|
|
52
|
-
throw new Error(
|
|
161
|
+
throw new Error(
|
|
162
|
+
'Input JSON must be an array of docs: [{ "text": "...", "id"?: "...", "heading"?: "..." }]'
|
|
163
|
+
);
|
|
53
164
|
}
|
|
54
165
|
for (let i = 0; i < raw.length; i++) {
|
|
55
166
|
const doc = raw[i];
|
|
56
|
-
if (!doc || typeof doc !==
|
|
167
|
+
if (!doc || typeof doc !== 'object') {
|
|
57
168
|
throw new Error(`Invalid doc at index ${i}: expected an object.`);
|
|
58
169
|
}
|
|
59
|
-
if (typeof doc.text !==
|
|
170
|
+
if (typeof doc.text !== 'string' || !doc.text.trim()) {
|
|
60
171
|
throw new Error(`Invalid doc at index ${i}: "text" must be a non-empty string.`);
|
|
61
172
|
}
|
|
62
173
|
}
|
|
@@ -65,23 +176,27 @@ function validateCliDocs(raw) {
|
|
|
65
176
|
|
|
66
177
|
function parseArgs(argv) {
|
|
67
178
|
const positional = [];
|
|
68
|
-
const flags = { embeddingsPath: undefined, modelId: undefined };
|
|
179
|
+
const flags = { embeddingsPath: undefined, modelId: undefined, agentsDir: undefined };
|
|
69
180
|
|
|
70
181
|
for (let i = 0; i < argv.length; i++) {
|
|
71
182
|
const arg = argv[i];
|
|
72
|
-
if (!arg.startsWith(
|
|
183
|
+
if (!arg.startsWith('--')) {
|
|
73
184
|
positional.push(arg);
|
|
74
185
|
continue;
|
|
75
186
|
}
|
|
76
|
-
if (arg ===
|
|
187
|
+
if (arg === '--embeddings') {
|
|
77
188
|
flags.embeddingsPath = argv[++i];
|
|
78
189
|
continue;
|
|
79
190
|
}
|
|
80
|
-
if (arg ===
|
|
191
|
+
if (arg === '--model-id') {
|
|
81
192
|
flags.modelId = argv[++i];
|
|
82
193
|
continue;
|
|
83
194
|
}
|
|
84
|
-
if (arg ===
|
|
195
|
+
if (arg === '--agents') {
|
|
196
|
+
flags.agentsDir = argv[++i];
|
|
197
|
+
continue;
|
|
198
|
+
}
|
|
199
|
+
if (arg === '--help' || arg === '-h') {
|
|
85
200
|
flags.help = true;
|
|
86
201
|
continue;
|
|
87
202
|
}
|
|
@@ -90,8 +205,85 @@ function parseArgs(argv) {
|
|
|
90
205
|
return { positional, flags };
|
|
91
206
|
}
|
|
92
207
|
|
|
208
|
+
function parseAgentFileContent(content, filePath) {
|
|
209
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
210
|
+
if (ext === '.json') return JSON.parse(content);
|
|
211
|
+
if (ext === '.yaml' || ext === '.yml') return parseSimpleYaml(content);
|
|
212
|
+
throw new Error(`Unsupported agent file extension: ${filePath}`);
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
function normalizeAgentFromFile(parsed, filePath) {
|
|
216
|
+
if (!parsed || typeof parsed !== 'object') {
|
|
217
|
+
throw new Error(`Invalid agent definition in ${filePath}: expected object.`);
|
|
218
|
+
}
|
|
219
|
+
if ('agent' in parsed && parsed.agent && typeof parsed.agent === 'object') {
|
|
220
|
+
return parsed.agent;
|
|
221
|
+
}
|
|
222
|
+
return parsed;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
function loadAgentsFromDir(agentsDir, validators = {}) {
|
|
226
|
+
const { validateAgentDefinition, validateAgentRegistry } = validators;
|
|
227
|
+
const dirPath = path.resolve(agentsDir);
|
|
228
|
+
let entries;
|
|
229
|
+
try {
|
|
230
|
+
entries = readdirSync(dirPath, { withFileTypes: true });
|
|
231
|
+
} catch (err) {
|
|
232
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
233
|
+
throw new Error(`Unable to read agents directory ${agentsDir}: ${message}`);
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
const files = entries
|
|
237
|
+
.filter((entry) => entry.isFile())
|
|
238
|
+
.map((entry) => entry.name)
|
|
239
|
+
.filter((name) => ['.json', '.yaml', '.yml'].includes(path.extname(name).toLowerCase()))
|
|
240
|
+
.sort((a, b) => a.localeCompare(b));
|
|
241
|
+
|
|
242
|
+
const loaded = [];
|
|
243
|
+
for (const file of files) {
|
|
244
|
+
const fullPath = path.join(dirPath, file);
|
|
245
|
+
try {
|
|
246
|
+
const content = readFileSync(fullPath, 'utf8');
|
|
247
|
+
const parsed = parseAgentFileContent(content, fullPath);
|
|
248
|
+
const agent = normalizeAgentFromFile(parsed, fullPath);
|
|
249
|
+
if (typeof validateAgentDefinition === 'function') {
|
|
250
|
+
validateAgentDefinition(agent);
|
|
251
|
+
}
|
|
252
|
+
loaded.push({ file, agent });
|
|
253
|
+
} catch (err) {
|
|
254
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
255
|
+
throw new Error(`Failed to load agent file ${fullPath}: ${message}`);
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
const duplicateById = new Map();
|
|
260
|
+
for (const item of loaded) {
|
|
261
|
+
const key = String(item.agent?.id ?? '');
|
|
262
|
+
if (!duplicateById.has(key)) duplicateById.set(key, []);
|
|
263
|
+
duplicateById.get(key).push(item.file);
|
|
264
|
+
}
|
|
265
|
+
for (const [id, fileNames] of duplicateById.entries()) {
|
|
266
|
+
if (fileNames.length > 1) {
|
|
267
|
+
throw new Error(
|
|
268
|
+
`Duplicate agent id "${id}" found in files: ${fileNames.sort((a, b) => a.localeCompare(b)).join(', ')}`
|
|
269
|
+
);
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
const agents = loaded
|
|
274
|
+
.map((item) => item.agent)
|
|
275
|
+
.sort((a, b) => String(a.id).localeCompare(String(b.id)));
|
|
276
|
+
const registry = { version: 1, agents };
|
|
277
|
+
|
|
278
|
+
if (typeof validateAgentRegistry === 'function') {
|
|
279
|
+
validateAgentRegistry(registry);
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
return registry;
|
|
283
|
+
}
|
|
284
|
+
|
|
93
285
|
function loadEmbeddingsFromJson(filePath, expectedCount) {
|
|
94
|
-
const parsed = JSON.parse(readFileSync(filePath,
|
|
286
|
+
const parsed = JSON.parse(readFileSync(filePath, 'utf8'));
|
|
95
287
|
const vectors = Array.isArray(parsed?.embeddings) ? parsed.embeddings : parsed;
|
|
96
288
|
if (!Array.isArray(vectors)) {
|
|
97
289
|
throw new Error('Embeddings JSON must be either an array of vectors or { "embeddings": [...] }.');
|
|
@@ -102,7 +294,7 @@ function loadEmbeddingsFromJson(filePath, expectedCount) {
|
|
|
102
294
|
|
|
103
295
|
const first = vectors[0];
|
|
104
296
|
if (!Array.isArray(first) || first.length === 0) {
|
|
105
|
-
throw new Error(
|
|
297
|
+
throw new Error('Embeddings must contain non-empty numeric vectors.');
|
|
106
298
|
}
|
|
107
299
|
const dims = first.length;
|
|
108
300
|
|
|
@@ -126,10 +318,12 @@ function loadEmbeddingsFromJson(filePath, expectedCount) {
|
|
|
126
318
|
}
|
|
127
319
|
|
|
128
320
|
function printUsage() {
|
|
129
|
-
console.log(
|
|
321
|
+
console.log(
|
|
322
|
+
'Usage: knolo <input.json> [output.knolo] [--agents ./agents] [--embeddings embeddings.json --model-id model-name]'
|
|
323
|
+
);
|
|
130
324
|
}
|
|
131
325
|
|
|
132
|
-
const buildPack = await
|
|
326
|
+
const { buildPack, validateAgentDefinition, validateAgentRegistry } = await loadBuildExports();
|
|
133
327
|
const { positional, flags } = parseArgs(process.argv.slice(2));
|
|
134
328
|
|
|
135
329
|
if (flags.help) {
|
|
@@ -138,7 +332,7 @@ if (flags.help) {
|
|
|
138
332
|
}
|
|
139
333
|
|
|
140
334
|
const inFile = positional[0];
|
|
141
|
-
const outFile = positional[1] ||
|
|
335
|
+
const outFile = positional[1] || 'knowledge.knolo';
|
|
142
336
|
|
|
143
337
|
if (!inFile) {
|
|
144
338
|
printUsage();
|
|
@@ -146,26 +340,43 @@ if (!inFile) {
|
|
|
146
340
|
}
|
|
147
341
|
|
|
148
342
|
try {
|
|
149
|
-
const rawText = readFileSync(inFile,
|
|
343
|
+
const rawText = readFileSync(inFile, 'utf8');
|
|
150
344
|
const parsed = JSON.parse(rawText);
|
|
151
345
|
const docs = validateCliDocs(parsed);
|
|
152
346
|
|
|
153
|
-
|
|
347
|
+
const options = {};
|
|
348
|
+
|
|
154
349
|
if (flags.embeddingsPath || flags.modelId) {
|
|
155
350
|
if (!flags.embeddingsPath || !flags.modelId) {
|
|
156
|
-
throw new Error(
|
|
351
|
+
throw new Error('Both --embeddings and --model-id are required when enabling semantic build output.');
|
|
157
352
|
}
|
|
158
353
|
const embeddings = loadEmbeddingsFromJson(flags.embeddingsPath, docs.length);
|
|
159
|
-
options = {
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
quantization: { type: "int8_l2norm", perVectorScale: true },
|
|
165
|
-
},
|
|
354
|
+
options.semantic = {
|
|
355
|
+
enabled: true,
|
|
356
|
+
modelId: flags.modelId,
|
|
357
|
+
embeddings,
|
|
358
|
+
quantization: { type: 'int8_l2norm', perVectorScale: true },
|
|
166
359
|
};
|
|
167
360
|
}
|
|
168
361
|
|
|
362
|
+
if (flags.agentsDir) {
|
|
363
|
+
let dirStats;
|
|
364
|
+
try {
|
|
365
|
+
dirStats = statSync(flags.agentsDir);
|
|
366
|
+
} catch (err) {
|
|
367
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
368
|
+
throw new Error(`Unable to access --agents path ${flags.agentsDir}: ${message}`);
|
|
369
|
+
}
|
|
370
|
+
if (!dirStats.isDirectory()) {
|
|
371
|
+
throw new Error(`--agents path must be a directory: ${flags.agentsDir}`);
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
options.agents = loadAgentsFromDir(flags.agentsDir, {
|
|
375
|
+
validateAgentDefinition,
|
|
376
|
+
validateAgentRegistry,
|
|
377
|
+
});
|
|
378
|
+
}
|
|
379
|
+
|
|
169
380
|
const bytes = await buildPack(docs, options);
|
|
170
381
|
writeFileSync(outFile, Buffer.from(bytes));
|
|
171
382
|
console.log(`wrote ${outFile}`);
|
package/dist/agent.d.ts
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import type { Pack } from './pack.js';
|
|
2
|
+
import type { QueryOptions } from './query.js';
|
|
3
|
+
export type AgentPromptTemplate = string[] | {
|
|
4
|
+
format: 'markdown';
|
|
5
|
+
template: string;
|
|
6
|
+
};
|
|
7
|
+
export type AgentToolPolicy = {
|
|
8
|
+
mode: 'allow' | 'deny';
|
|
9
|
+
tools: string[];
|
|
10
|
+
};
|
|
11
|
+
export type AgentRetrievalDefaults = {
|
|
12
|
+
namespace: string[];
|
|
13
|
+
topK?: number;
|
|
14
|
+
queryExpansion?: QueryOptions['queryExpansion'];
|
|
15
|
+
semantic?: Omit<NonNullable<QueryOptions['semantic']>, 'queryEmbedding' | 'enabled' | 'force'> & {
|
|
16
|
+
enabled?: boolean;
|
|
17
|
+
};
|
|
18
|
+
minScore?: number;
|
|
19
|
+
requirePhrases?: string[];
|
|
20
|
+
source?: string[];
|
|
21
|
+
};
|
|
22
|
+
export type AgentDefinitionV1 = {
|
|
23
|
+
id: string;
|
|
24
|
+
version: 1;
|
|
25
|
+
name?: string;
|
|
26
|
+
description?: string;
|
|
27
|
+
systemPrompt: AgentPromptTemplate;
|
|
28
|
+
retrievalDefaults: AgentRetrievalDefaults;
|
|
29
|
+
toolPolicy?: AgentToolPolicy;
|
|
30
|
+
metadata?: Record<string, string | number | boolean | null>;
|
|
31
|
+
};
|
|
32
|
+
export type AgentRegistry = {
|
|
33
|
+
version: 1;
|
|
34
|
+
agents: AgentDefinitionV1[];
|
|
35
|
+
};
|
|
36
|
+
export type ResolveAgentInput = {
|
|
37
|
+
agentId: string;
|
|
38
|
+
query?: QueryOptions;
|
|
39
|
+
patch?: Record<string, string | number | boolean>;
|
|
40
|
+
};
|
|
41
|
+
export type ResolvedAgent = {
|
|
42
|
+
agent: AgentDefinitionV1;
|
|
43
|
+
systemPrompt: string;
|
|
44
|
+
retrievalOptions: QueryOptions;
|
|
45
|
+
};
|
|
46
|
+
export declare function validateAgentRegistry(reg: AgentRegistry): void;
|
|
47
|
+
export declare function validateAgentDefinition(agent: AgentDefinitionV1): void;
|
|
48
|
+
export declare function listAgents(pack: Pack): string[];
|
|
49
|
+
export declare function getAgent(pack: Pack, agentId: string): AgentDefinitionV1 | undefined;
|
|
50
|
+
export declare function buildSystemPrompt(agent: AgentDefinitionV1, patch?: Record<string, string | number | boolean>): string;
|
|
51
|
+
export declare function resolveAgent(pack: Pack, input: ResolveAgentInput): ResolvedAgent;
|
|
52
|
+
export declare function isToolAllowed(agent: AgentDefinitionV1, toolId: string): boolean;
|
|
53
|
+
export declare function assertToolAllowed(agent: AgentDefinitionV1, toolId: string): void;
|
package/dist/agent.js
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
import { validateQueryOptions } from './query.js';
|
|
2
|
+
export function validateAgentRegistry(reg) {
|
|
3
|
+
if (!reg || typeof reg !== 'object') {
|
|
4
|
+
throw new Error('agent registry must be an object.');
|
|
5
|
+
}
|
|
6
|
+
if (reg.version !== 1) {
|
|
7
|
+
throw new Error('agent registry version must be 1.');
|
|
8
|
+
}
|
|
9
|
+
if (!Array.isArray(reg.agents)) {
|
|
10
|
+
throw new Error('agent registry agents must be an array.');
|
|
11
|
+
}
|
|
12
|
+
const seen = new Set();
|
|
13
|
+
for (const agent of reg.agents) {
|
|
14
|
+
validateAgentDefinition(agent);
|
|
15
|
+
if (seen.has(agent.id)) {
|
|
16
|
+
throw new Error(`agent id must be unique: ${agent.id}`);
|
|
17
|
+
}
|
|
18
|
+
seen.add(agent.id);
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
export function validateAgentDefinition(agent) {
|
|
22
|
+
if (!agent || typeof agent !== 'object') {
|
|
23
|
+
throw new Error('agent definition must be an object.');
|
|
24
|
+
}
|
|
25
|
+
if (typeof agent.id !== 'string' || !agent.id.trim()) {
|
|
26
|
+
throw new Error('agent id must be a non-empty string.');
|
|
27
|
+
}
|
|
28
|
+
if (!/^[a-z0-9]+(?:[._-][a-z0-9]+)*$/.test(agent.id)) {
|
|
29
|
+
throw new Error(`agent id must be slug-like: ${agent.id}`);
|
|
30
|
+
}
|
|
31
|
+
if (agent.version !== 1) {
|
|
32
|
+
throw new Error(`agent ${agent.id} version must be 1.`);
|
|
33
|
+
}
|
|
34
|
+
validateSystemPrompt(agent);
|
|
35
|
+
const defaults = agent.retrievalDefaults;
|
|
36
|
+
if (!defaults || typeof defaults !== 'object') {
|
|
37
|
+
throw new Error(`agent ${agent.id} retrievalDefaults must be an object.`);
|
|
38
|
+
}
|
|
39
|
+
if (!Array.isArray(defaults.namespace) ||
|
|
40
|
+
defaults.namespace.length === 0 ||
|
|
41
|
+
defaults.namespace.some((ns) => typeof ns !== 'string' || !ns.trim())) {
|
|
42
|
+
throw new Error(`agent ${agent.id} retrievalDefaults.namespace must be a non-empty string array.`);
|
|
43
|
+
}
|
|
44
|
+
if (defaults.topK !== undefined &&
|
|
45
|
+
(!Number.isInteger(defaults.topK) || defaults.topK < 1)) {
|
|
46
|
+
throw new Error(`agent ${agent.id} retrievalDefaults.topK must be a positive integer.`);
|
|
47
|
+
}
|
|
48
|
+
if (agent.toolPolicy) {
|
|
49
|
+
const { mode, tools } = agent.toolPolicy;
|
|
50
|
+
if (mode !== 'allow' && mode !== 'deny') {
|
|
51
|
+
throw new Error(`agent ${agent.id} toolPolicy.mode must be "allow" or "deny".`);
|
|
52
|
+
}
|
|
53
|
+
if (!Array.isArray(tools) ||
|
|
54
|
+
tools.some((tool) => typeof tool !== 'string' || !tool.trim())) {
|
|
55
|
+
throw new Error(`agent ${agent.id} toolPolicy.tools must be a string array.`);
|
|
56
|
+
}
|
|
57
|
+
if (new Set(tools).size !== tools.length) {
|
|
58
|
+
throw new Error(`agent ${agent.id} toolPolicy.tools must contain unique values.`);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
const syntheticOpts = {
|
|
62
|
+
namespace: defaults.namespace,
|
|
63
|
+
topK: defaults.topK,
|
|
64
|
+
queryExpansion: defaults.queryExpansion,
|
|
65
|
+
semantic: defaults.semantic,
|
|
66
|
+
minScore: defaults.minScore,
|
|
67
|
+
requirePhrases: defaults.requirePhrases,
|
|
68
|
+
source: defaults.source,
|
|
69
|
+
};
|
|
70
|
+
validateQueryOptions(syntheticOpts);
|
|
71
|
+
}
|
|
72
|
+
export function listAgents(pack) {
|
|
73
|
+
const reg = pack.meta.agents;
|
|
74
|
+
if (!reg?.agents?.length)
|
|
75
|
+
return [];
|
|
76
|
+
return reg.agents.map((agent) => agent.id);
|
|
77
|
+
}
|
|
78
|
+
export function getAgent(pack, agentId) {
|
|
79
|
+
return pack.meta.agents?.agents.find((agent) => agent.id === agentId);
|
|
80
|
+
}
|
|
81
|
+
export function buildSystemPrompt(agent, patch = {}) {
|
|
82
|
+
const template = agent.systemPrompt;
|
|
83
|
+
if (Array.isArray(template)) {
|
|
84
|
+
return template.join('\n');
|
|
85
|
+
}
|
|
86
|
+
const source = template.template;
|
|
87
|
+
const placeholders = Array.from(source.matchAll(/\{\{\s*([A-Za-z0-9_.-]+)\s*\}\}/g)).map((m) => m[1]);
|
|
88
|
+
for (const key of placeholders) {
|
|
89
|
+
if (!(key in patch)) {
|
|
90
|
+
throw new Error(`agent ${agent.id} system prompt missing patch value for placeholder: ${key}`);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
return source.replace(/\{\{\s*([A-Za-z0-9_.-]+)\s*\}\}/g, (_match, key) => String(patch[key]));
|
|
94
|
+
}
|
|
95
|
+
export function resolveAgent(pack, input) {
|
|
96
|
+
const agent = getAgent(pack, input.agentId);
|
|
97
|
+
if (!agent) {
|
|
98
|
+
throw new Error(`agent not found: ${input.agentId}`);
|
|
99
|
+
}
|
|
100
|
+
const defaults = {
|
|
101
|
+
namespace: agent.retrievalDefaults.namespace,
|
|
102
|
+
topK: agent.retrievalDefaults.topK,
|
|
103
|
+
queryExpansion: agent.retrievalDefaults.queryExpansion,
|
|
104
|
+
semantic: agent.retrievalDefaults.semantic,
|
|
105
|
+
minScore: agent.retrievalDefaults.minScore,
|
|
106
|
+
requirePhrases: agent.retrievalDefaults.requirePhrases,
|
|
107
|
+
source: agent.retrievalDefaults.source,
|
|
108
|
+
};
|
|
109
|
+
const caller = input.query ?? {};
|
|
110
|
+
const retrievalOptions = {
|
|
111
|
+
...defaults,
|
|
112
|
+
...caller,
|
|
113
|
+
namespace: defaults.namespace,
|
|
114
|
+
queryExpansion: {
|
|
115
|
+
...(defaults.queryExpansion ?? {}),
|
|
116
|
+
...(caller.queryExpansion ?? {}),
|
|
117
|
+
},
|
|
118
|
+
semantic: {
|
|
119
|
+
...(defaults.semantic ?? {}),
|
|
120
|
+
...(caller.semantic ?? {}),
|
|
121
|
+
blend: {
|
|
122
|
+
...(defaults.semantic?.blend ?? {}),
|
|
123
|
+
...(caller.semantic?.blend ?? {}),
|
|
124
|
+
},
|
|
125
|
+
},
|
|
126
|
+
};
|
|
127
|
+
if (!defaults.queryExpansion && !caller.queryExpansion)
|
|
128
|
+
delete retrievalOptions.queryExpansion;
|
|
129
|
+
if (!defaults.semantic && !caller.semantic)
|
|
130
|
+
delete retrievalOptions.semantic;
|
|
131
|
+
if (retrievalOptions.semantic &&
|
|
132
|
+
!defaults.semantic?.blend &&
|
|
133
|
+
!caller.semantic?.blend) {
|
|
134
|
+
delete retrievalOptions.semantic.blend;
|
|
135
|
+
}
|
|
136
|
+
validateQueryOptions(retrievalOptions);
|
|
137
|
+
return {
|
|
138
|
+
agent,
|
|
139
|
+
systemPrompt: buildSystemPrompt(agent, input.patch),
|
|
140
|
+
retrievalOptions,
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
export function isToolAllowed(agent, toolId) {
|
|
144
|
+
const policy = agent.toolPolicy;
|
|
145
|
+
if (!policy)
|
|
146
|
+
return true;
|
|
147
|
+
const hasTool = policy.tools.includes(toolId);
|
|
148
|
+
if (policy.mode === 'allow') {
|
|
149
|
+
return hasTool;
|
|
150
|
+
}
|
|
151
|
+
return !hasTool;
|
|
152
|
+
}
|
|
153
|
+
export function assertToolAllowed(agent, toolId) {
|
|
154
|
+
if (!isToolAllowed(agent, toolId)) {
|
|
155
|
+
throw new Error(`agent ${agent.id} does not allow tool: ${toolId}`);
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
function validateSystemPrompt(agent) {
|
|
159
|
+
const prompt = agent.systemPrompt;
|
|
160
|
+
if (Array.isArray(prompt)) {
|
|
161
|
+
if (!prompt.length || prompt.some((line) => typeof line !== 'string')) {
|
|
162
|
+
throw new Error(`agent ${agent.id} systemPrompt must be a non-empty string array.`);
|
|
163
|
+
}
|
|
164
|
+
if (!prompt.join('').trim()) {
|
|
165
|
+
throw new Error(`agent ${agent.id} systemPrompt must not be empty.`);
|
|
166
|
+
}
|
|
167
|
+
return;
|
|
168
|
+
}
|
|
169
|
+
if (!prompt ||
|
|
170
|
+
prompt.format !== 'markdown' ||
|
|
171
|
+
typeof prompt.template !== 'string' ||
|
|
172
|
+
!prompt.template.trim()) {
|
|
173
|
+
throw new Error(`agent ${agent.id} systemPrompt markdown template must be a non-empty string.`);
|
|
174
|
+
}
|
|
175
|
+
}
|
package/dist/builder.d.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import type { AgentDefinitionV1, AgentRegistry } from './agent.js';
|
|
1
2
|
export type BuildInputDoc = {
|
|
2
3
|
id?: string;
|
|
3
4
|
heading?: string;
|
|
@@ -5,6 +6,7 @@ export type BuildInputDoc = {
|
|
|
5
6
|
text: string;
|
|
6
7
|
};
|
|
7
8
|
export type BuildPackOptions = {
|
|
9
|
+
agents?: AgentRegistry | AgentDefinitionV1[];
|
|
8
10
|
semantic?: {
|
|
9
11
|
enabled: boolean;
|
|
10
12
|
modelId: string;
|
package/dist/builder.js
CHANGED
|
@@ -8,6 +8,7 @@ import { buildIndex } from './indexer.js';
|
|
|
8
8
|
import { tokenize } from './tokenize.js';
|
|
9
9
|
import { getTextEncoder } from './utils/utf8.js';
|
|
10
10
|
import { encodeScaleF16, quantizeEmbeddingInt8L2Norm } from './semantic.js';
|
|
11
|
+
import { validateAgentRegistry } from './agent.js';
|
|
11
12
|
export async function buildPack(docs, opts = {}) {
|
|
12
13
|
const normalizedDocs = validateDocs(docs);
|
|
13
14
|
// Prepare blocks (strip MD) and carry heading/docId for optional boosts.
|
|
@@ -21,6 +22,7 @@ export async function buildPack(docs, opts = {}) {
|
|
|
21
22
|
const blockTokenLens = blocks.map((b) => tokenize(b.text).length);
|
|
22
23
|
const totalTokens = blockTokenLens.reduce((sum, len) => sum + len, 0);
|
|
23
24
|
const avgBlockLen = blocks.length ? totalTokens / blocks.length : 1;
|
|
25
|
+
const agents = normalizeAgents(opts.agents);
|
|
24
26
|
const meta = {
|
|
25
27
|
version: 3,
|
|
26
28
|
stats: {
|
|
@@ -29,6 +31,7 @@ export async function buildPack(docs, opts = {}) {
|
|
|
29
31
|
terms: lexicon.length,
|
|
30
32
|
avgBlockLen,
|
|
31
33
|
},
|
|
34
|
+
...(agents ? { agents } : {}),
|
|
32
35
|
};
|
|
33
36
|
// Persist blocks as objects to optionally carry heading/docId/token length.
|
|
34
37
|
const blocksPayload = blocks.map((b, i) => ({
|
|
@@ -47,12 +50,18 @@ export async function buildPack(docs, opts = {}) {
|
|
|
47
50
|
const semanticSection = semanticEnabled && opts.semantic
|
|
48
51
|
? buildSemanticSection(blocks.length, opts.semantic)
|
|
49
52
|
: undefined;
|
|
50
|
-
const semBytes = semanticSection
|
|
53
|
+
const semBytes = semanticSection
|
|
54
|
+
? enc.encode(JSON.stringify(semanticSection.semJson))
|
|
55
|
+
: undefined;
|
|
51
56
|
const semBlob = semanticSection?.semBlob;
|
|
52
|
-
const totalLength = 4 +
|
|
53
|
-
|
|
54
|
-
4 +
|
|
55
|
-
|
|
57
|
+
const totalLength = 4 +
|
|
58
|
+
metaBytes.length +
|
|
59
|
+
4 +
|
|
60
|
+
lexBytes.length +
|
|
61
|
+
4 +
|
|
62
|
+
postings.length * 4 +
|
|
63
|
+
4 +
|
|
64
|
+
blocksBytes.length +
|
|
56
65
|
(semanticEnabled && semBytes && semBlob
|
|
57
66
|
? 4 + semBytes.length + 4 + semBlob.length
|
|
58
67
|
: 0);
|
|
@@ -92,6 +101,15 @@ export async function buildPack(docs, opts = {}) {
|
|
|
92
101
|
}
|
|
93
102
|
return out;
|
|
94
103
|
}
|
|
104
|
+
function normalizeAgents(input) {
|
|
105
|
+
if (!input)
|
|
106
|
+
return undefined;
|
|
107
|
+
const registry = Array.isArray(input)
|
|
108
|
+
? { version: 1, agents: input }
|
|
109
|
+
: input;
|
|
110
|
+
validateAgentRegistry(registry);
|
|
111
|
+
return registry;
|
|
112
|
+
}
|
|
95
113
|
function buildSemanticSection(blockCount, semantic) {
|
|
96
114
|
const { embeddings } = semantic;
|
|
97
115
|
if (!Array.isArray(embeddings) || embeddings.length !== blockCount) {
|
|
@@ -133,7 +151,11 @@ function buildSemanticSection(blockCount, semantic) {
|
|
|
133
151
|
perVectorScale: true,
|
|
134
152
|
blocks: {
|
|
135
153
|
vectors: { byteOffset: vecByteOffset, length: vecs.length },
|
|
136
|
-
scales: {
|
|
154
|
+
scales: {
|
|
155
|
+
byteOffset: scalesByteOffset,
|
|
156
|
+
length: scales.length,
|
|
157
|
+
encoding: 'float16',
|
|
158
|
+
},
|
|
137
159
|
},
|
|
138
160
|
};
|
|
139
161
|
return { semJson, semBlob };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
export { mountPack, hasSemantic } from './pack.js';
|
|
2
|
-
export { query, lexConfidence, validateSemanticQueryOptions } from './query.js';
|
|
2
|
+
export { query, lexConfidence, validateQueryOptions, validateSemanticQueryOptions, } from './query.js';
|
|
3
3
|
export { makeContextPatch } from './patch.js';
|
|
4
4
|
export { buildPack } from './builder.js';
|
|
5
|
-
export { quantizeEmbeddingInt8L2Norm, encodeScaleF16, decodeScaleF16 } from './semantic.js';
|
|
5
|
+
export { quantizeEmbeddingInt8L2Norm, encodeScaleF16, decodeScaleF16, } from './semantic.js';
|
|
6
|
+
export { listAgents, getAgent, resolveAgent, buildSystemPrompt, isToolAllowed, assertToolAllowed, validateAgentRegistry, validateAgentDefinition, } from './agent.js';
|
|
6
7
|
export type { MountOptions, PackMeta, Pack } from './pack.js';
|
|
7
8
|
export type { QueryOptions, Hit } from './query.js';
|
|
8
9
|
export type { ContextPatch } from './patch.js';
|
|
9
10
|
export type { BuildInputDoc, BuildPackOptions } from './builder.js';
|
|
11
|
+
export type { AgentPromptTemplate, AgentToolPolicy, AgentRetrievalDefaults, AgentDefinitionV1, AgentRegistry, ResolveAgentInput, ResolvedAgent, } from './agent.js';
|
package/dist/index.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
// src/index.ts
|
|
2
2
|
export { mountPack, hasSemantic } from './pack.js';
|
|
3
|
-
export { query, lexConfidence, validateSemanticQueryOptions } from './query.js';
|
|
3
|
+
export { query, lexConfidence, validateQueryOptions, validateSemanticQueryOptions, } from './query.js';
|
|
4
4
|
export { makeContextPatch } from './patch.js';
|
|
5
5
|
export { buildPack } from './builder.js';
|
|
6
|
-
export { quantizeEmbeddingInt8L2Norm, encodeScaleF16, decodeScaleF16 } from './semantic.js';
|
|
6
|
+
export { quantizeEmbeddingInt8L2Norm, encodeScaleF16, decodeScaleF16, } from './semantic.js';
|
|
7
|
+
export { listAgents, getAgent, resolveAgent, buildSystemPrompt, isToolAllowed, assertToolAllowed, validateAgentRegistry, validateAgentDefinition, } from './agent.js';
|
package/dist/pack.d.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import type { AgentRegistry } from './agent.js';
|
|
1
2
|
export type MountOptions = {
|
|
2
3
|
src: string | ArrayBufferLike | Uint8Array;
|
|
3
4
|
};
|
|
@@ -9,6 +10,7 @@ export type PackMeta = {
|
|
|
9
10
|
terms: number;
|
|
10
11
|
avgBlockLen?: number;
|
|
11
12
|
};
|
|
13
|
+
agents?: AgentRegistry;
|
|
12
14
|
};
|
|
13
15
|
export type Pack = {
|
|
14
16
|
meta: PackMeta;
|
package/dist/pack.js
CHANGED
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
* Includes RN/Expo-safe TextDecoder via ponyfill.
|
|
8
8
|
*/
|
|
9
9
|
import { getTextDecoder } from './utils/utf8.js';
|
|
10
|
+
import { validateAgentRegistry } from './agent.js';
|
|
10
11
|
export function hasSemantic(pack) {
|
|
11
12
|
return Boolean(pack.semantic && pack.semantic.dims > 0 && pack.semantic.vecs.length > 0);
|
|
12
13
|
}
|
|
@@ -21,6 +22,9 @@ export async function mountPack(opts) {
|
|
|
21
22
|
const metaJson = dec.decode(new Uint8Array(buf, offset, metaLen));
|
|
22
23
|
offset += metaLen;
|
|
23
24
|
const meta = JSON.parse(metaJson);
|
|
25
|
+
if (meta.agents) {
|
|
26
|
+
validateAgentRegistry(meta.agents);
|
|
27
|
+
}
|
|
24
28
|
// lexicon
|
|
25
29
|
const lexLen = dv.getUint32(offset, true);
|
|
26
30
|
offset += 4;
|
|
@@ -89,7 +93,17 @@ export async function mountPack(opts) {
|
|
|
89
93
|
const semBlob = new Uint8Array(buf, offset, semBlobLen);
|
|
90
94
|
semantic = parseSemanticSection(sem, semBlob);
|
|
91
95
|
}
|
|
92
|
-
return {
|
|
96
|
+
return {
|
|
97
|
+
meta,
|
|
98
|
+
lexicon,
|
|
99
|
+
postings,
|
|
100
|
+
blocks,
|
|
101
|
+
headings,
|
|
102
|
+
docIds,
|
|
103
|
+
namespaces,
|
|
104
|
+
blockTokenLens,
|
|
105
|
+
semantic,
|
|
106
|
+
};
|
|
93
107
|
}
|
|
94
108
|
function parseSemanticSection(sem, blob) {
|
|
95
109
|
const vectors = sem?.blocks?.vectors;
|
|
@@ -133,12 +147,17 @@ async function resolveToBuffer(src) {
|
|
|
133
147
|
return src;
|
|
134
148
|
}
|
|
135
149
|
function isNodeRuntime() {
|
|
136
|
-
|
|
150
|
+
const p = globalThis
|
|
151
|
+
.process;
|
|
152
|
+
return !!p?.versions?.node;
|
|
137
153
|
}
|
|
138
154
|
function isLikelyLocalPath(value) {
|
|
139
155
|
if (value.startsWith('file://'))
|
|
140
156
|
return true;
|
|
141
|
-
if (value.startsWith('./') ||
|
|
157
|
+
if (value.startsWith('./') ||
|
|
158
|
+
value.startsWith('../') ||
|
|
159
|
+
value.startsWith('/') ||
|
|
160
|
+
value.startsWith('~'))
|
|
142
161
|
return true;
|
|
143
162
|
if (/^[A-Za-z]:[\\/]/.test(value))
|
|
144
163
|
return true; // Windows absolute path
|
package/dist/query.d.ts
CHANGED
|
@@ -26,6 +26,7 @@ export type QueryOptions = {
|
|
|
26
26
|
force?: boolean;
|
|
27
27
|
};
|
|
28
28
|
};
|
|
29
|
+
export declare function validateQueryOptions(opts?: QueryOptions): void;
|
|
29
30
|
export declare function validateSemanticQueryOptions(options?: QueryOptions["semantic"]): void;
|
|
30
31
|
export type Hit = {
|
|
31
32
|
blockId: number;
|
package/dist/query.js
CHANGED
|
@@ -14,6 +14,40 @@ import { minCoverSpan, proximityMultiplier } from "./quality/proximity.js";
|
|
|
14
14
|
import { diversifyAndDedupe } from "./quality/diversify.js";
|
|
15
15
|
import { knsSignature, knsDistance } from "./quality/signature.js";
|
|
16
16
|
import { decodeScaleF16, quantizeEmbeddingInt8L2Norm } from "./semantic.js";
|
|
17
|
+
export function validateQueryOptions(opts) {
|
|
18
|
+
if (!opts)
|
|
19
|
+
return;
|
|
20
|
+
if (opts.topK !== undefined && (!Number.isInteger(opts.topK) || opts.topK < 1)) {
|
|
21
|
+
throw new Error("query(...): topK must be a positive integer.");
|
|
22
|
+
}
|
|
23
|
+
if (opts.minScore !== undefined && (!Number.isFinite(opts.minScore) || opts.minScore < 0)) {
|
|
24
|
+
throw new Error("query(...): minScore must be a finite number >= 0.");
|
|
25
|
+
}
|
|
26
|
+
if (opts.requirePhrases !== undefined && (!Array.isArray(opts.requirePhrases) || opts.requirePhrases.some((p) => typeof p !== "string"))) {
|
|
27
|
+
throw new Error("query(...): requirePhrases must be an array of strings when provided.");
|
|
28
|
+
}
|
|
29
|
+
validateStringOrStringArrayOption("namespace", opts.namespace);
|
|
30
|
+
validateStringOrStringArrayOption("source", opts.source);
|
|
31
|
+
if (opts.queryExpansion) {
|
|
32
|
+
const qe = opts.queryExpansion;
|
|
33
|
+
if (qe.enabled !== undefined && typeof qe.enabled !== "boolean") {
|
|
34
|
+
throw new Error("query(...): queryExpansion.enabled must be a boolean when provided.");
|
|
35
|
+
}
|
|
36
|
+
if (qe.docs !== undefined && (!Number.isInteger(qe.docs) || qe.docs < 1)) {
|
|
37
|
+
throw new Error("query(...): queryExpansion.docs must be a positive integer.");
|
|
38
|
+
}
|
|
39
|
+
if (qe.terms !== undefined && (!Number.isInteger(qe.terms) || qe.terms < 1)) {
|
|
40
|
+
throw new Error("query(...): queryExpansion.terms must be a positive integer.");
|
|
41
|
+
}
|
|
42
|
+
if (qe.weight !== undefined && (!Number.isFinite(qe.weight) || qe.weight < 0)) {
|
|
43
|
+
throw new Error("query(...): queryExpansion.weight must be a finite number >= 0.");
|
|
44
|
+
}
|
|
45
|
+
if (qe.minTermLength !== undefined && (!Number.isInteger(qe.minTermLength) || qe.minTermLength < 1)) {
|
|
46
|
+
throw new Error("query(...): queryExpansion.minTermLength must be a positive integer.");
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
validateSemanticQueryOptions(opts.semantic);
|
|
50
|
+
}
|
|
17
51
|
export function validateSemanticQueryOptions(options) {
|
|
18
52
|
if (!options)
|
|
19
53
|
return;
|
|
@@ -48,7 +82,7 @@ export function validateSemanticQueryOptions(options) {
|
|
|
48
82
|
}
|
|
49
83
|
}
|
|
50
84
|
export function query(pack, q, opts = {}) {
|
|
51
|
-
|
|
85
|
+
validateQueryOptions(opts);
|
|
52
86
|
const topK = opts.topK ?? 10;
|
|
53
87
|
const minScore = Number.isFinite(opts.minScore) ? Math.max(0, opts.minScore) : 0;
|
|
54
88
|
const expansionOpts = {
|
|
@@ -419,3 +453,11 @@ function normalizeSourceFilter(input) {
|
|
|
419
453
|
const values = Array.isArray(input) ? input : [input];
|
|
420
454
|
return new Set(values.map((v) => normalize(v)).filter(Boolean));
|
|
421
455
|
}
|
|
456
|
+
function validateStringOrStringArrayOption(name, value) {
|
|
457
|
+
if (value === undefined)
|
|
458
|
+
return;
|
|
459
|
+
const valid = typeof value === "string" || (Array.isArray(value) && value.every((entry) => typeof entry === "string"));
|
|
460
|
+
if (!valid) {
|
|
461
|
+
throw new Error(`query(...): ${name} must be a string or an array of strings when provided.`);
|
|
462
|
+
}
|
|
463
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "knolo-core",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "3.1.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Local-first knowledge packs for small LLMs.",
|
|
6
6
|
"keywords": [
|
|
@@ -34,7 +34,9 @@
|
|
|
34
34
|
"build": "tsc -p tsconfig.json",
|
|
35
35
|
"prepublishOnly": "npm run build",
|
|
36
36
|
"smoke": "node scripts/smoke.mjs",
|
|
37
|
-
"test": "npm run build && node scripts/test.mjs"
|
|
37
|
+
"test": "npm run build && node scripts/test.mjs",
|
|
38
|
+
"format": "prettier --write src/agent.ts src/pack.ts src/builder.ts src/index.ts scripts/test.mjs README.md",
|
|
39
|
+
"format:check": "prettier --check src/agent.ts src/pack.ts src/builder.ts src/index.ts scripts/test.mjs README.md"
|
|
38
40
|
},
|
|
39
41
|
"devDependencies": {
|
|
40
42
|
"typescript": "^5.5.0",
|