@kaelio/ktx 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/python/{kaelio_ktx-0.1.0-py3-none-any.whl → kaelio_ktx-0.2.0-py3-none-any.whl} +0 -0
- package/assets/python/manifest.json +4 -4
- package/dist/admin-reindex.d.ts +15 -0
- package/dist/admin-reindex.js +168 -0
- package/dist/admin-reindex.test.js +116 -0
- package/dist/{dev.d.ts → admin.d.ts} +1 -1
- package/dist/{dev.js → admin.js} +14 -12
- package/dist/admin.test.d.ts +1 -0
- package/dist/{dev.test.js → admin.test.js} +36 -31
- package/dist/cli-program.js +7 -7
- package/dist/cli-program.test.js +1 -1
- package/dist/cli-runtime.d.ts +2 -0
- package/dist/commands/connection-commands.js +11 -10
- package/dist/commands/connection-selection.d.ts +11 -0
- package/dist/commands/connection-selection.js +9 -0
- package/dist/commands/ingest-commands.js +32 -26
- package/dist/commands/knowledge-commands.js +17 -28
- package/dist/commands/mcp-commands.js +17 -11
- package/dist/commands/setup-commands.js +14 -26
- package/dist/commands/sl-commands.js +27 -32
- package/dist/doctor.test.js +7 -8
- package/dist/example-smoke.test.js +3 -3
- package/dist/index.test.js +102 -70
- package/dist/ingest-depth.js +0 -1
- package/dist/ingest.test-utils.js +2 -2
- package/dist/ingest.test.js +4 -4
- package/dist/io/print-list.test.js +4 -4
- package/dist/knowledge.js +1 -1
- package/dist/managed-local-embeddings.d.ts +2 -0
- package/dist/managed-local-embeddings.js +2 -0
- package/dist/managed-local-embeddings.test.js +2 -0
- package/dist/managed-mcp-daemon.js +3 -2
- package/dist/managed-mcp-daemon.test.js +25 -0
- package/dist/managed-python-command.js +2 -2
- package/dist/managed-python-command.test.js +4 -3
- package/dist/managed-python-daemon.js +3 -2
- package/dist/managed-python-daemon.test.js +20 -0
- package/dist/managed-python-runtime.d.ts +5 -1
- package/dist/managed-python-runtime.js +50 -6
- package/dist/managed-python-runtime.test.js +53 -23
- package/dist/memory-flow-tui.test.js +2 -2
- package/dist/next-steps.d.ts +6 -6
- package/dist/next-steps.js +4 -4
- package/dist/next-steps.test.js +5 -5
- package/dist/print-command-tree.test.js +1 -1
- package/dist/proxy-env.d.ts +1 -0
- package/dist/proxy-env.js +23 -0
- package/dist/proxy-env.test.d.ts +1 -0
- package/dist/proxy-env.test.js +17 -0
- package/dist/public-ingest.js +3 -5
- package/dist/public-ingest.test.js +7 -3
- package/dist/runtime.test.js +2 -1
- package/dist/scan.test.js +2 -2
- package/dist/setup-agents.js +6 -4
- package/dist/setup-agents.test.js +35 -1
- package/dist/setup-embeddings.d.ts +1 -0
- package/dist/setup-embeddings.js +29 -7
- package/dist/setup-embeddings.test.js +49 -7
- package/dist/setup-models.d.ts +0 -1
- package/dist/setup-models.js +2 -3
- package/dist/setup-models.test.js +8 -10
- package/dist/setup-project.d.ts +9 -1
- package/dist/setup-project.js +52 -25
- package/dist/setup-project.test.js +8 -8
- package/dist/setup-runtime.test.js +4 -2
- package/dist/setup.d.ts +1 -2
- package/dist/setup.js +21 -5
- package/dist/setup.test.js +160 -43
- package/dist/sl.js +1 -1
- package/dist/sl.test.js +2 -1
- package/dist/standalone-smoke.test.js +8 -5
- package/dist/status-project.js +1 -10
- package/node_modules/@ktx/context/dist/index-sync/index.d.ts +2 -0
- package/node_modules/@ktx/context/dist/index-sync/index.js +1 -0
- package/node_modules/@ktx/context/dist/index-sync/reindex.d.ts +20 -0
- package/node_modules/@ktx/context/dist/index-sync/reindex.js +141 -0
- package/node_modules/@ktx/context/dist/index-sync/reindex.test.d.ts +1 -0
- package/node_modules/@ktx/context/dist/index-sync/reindex.test.js +139 -0
- package/node_modules/@ktx/context/dist/index-sync/types.d.ts +29 -0
- package/node_modules/@ktx/context/dist/index-sync/types.js +1 -0
- package/node_modules/@ktx/context/dist/index.d.ts +1 -0
- package/node_modules/@ktx/context/dist/index.js +1 -0
- package/node_modules/@ktx/context/dist/ingest/adapters/historic-sql/local-ingest-acceptance.test.js +1 -1
- package/node_modules/@ktx/context/dist/ingest/local-bundle-ingest.test.js +8 -8
- package/node_modules/@ktx/context/dist/ingest/local-bundle-runtime.js +4 -1
- package/node_modules/@ktx/context/dist/ingest/local-bundle-runtime.test.js +3 -3
- package/node_modules/@ktx/context/dist/ingest/local-embedding-provider.integration.test.js +9 -10
- package/node_modules/@ktx/context/dist/ingest/memory-flow/schema.d.ts +2 -2
- package/node_modules/@ktx/context/dist/ingest/report-snapshot.d.ts +2 -2
- package/node_modules/@ktx/context/dist/llm/local-config.js +2 -15
- package/node_modules/@ktx/context/dist/llm/local-config.test.js +3 -7
- package/node_modules/@ktx/context/dist/memory/local-memory.js +9 -3
- package/node_modules/@ktx/context/dist/project/config.d.ts +0 -5
- package/node_modules/@ktx/context/dist/project/config.js +5 -5
- package/node_modules/@ktx/context/dist/project/config.test.js +4 -7
- package/node_modules/@ktx/context/dist/scan/enrichment-state.test.js +4 -4
- package/node_modules/@ktx/context/dist/scan/index.d.ts +1 -1
- package/node_modules/@ktx/context/dist/scan/local-enrichment.d.ts +2 -6
- package/node_modules/@ktx/context/dist/scan/local-enrichment.js +31 -47
- package/node_modules/@ktx/context/dist/scan/local-enrichment.test.js +35 -18
- package/node_modules/@ktx/context/dist/scan/local-scan.test.js +2 -3
- package/node_modules/@ktx/context/dist/sl/ports.d.ts +3 -3
- package/node_modules/@ktx/context/dist/sl/sl-search.service.d.ts +3 -2
- package/node_modules/@ktx/context/dist/sl/sl-search.service.js +47 -45
- package/node_modules/@ktx/context/dist/sl/sl-search.service.test.js +61 -0
- package/node_modules/@ktx/context/dist/sl/sqlite-sl-sources-index.d.ts +4 -3
- package/node_modules/@ktx/context/dist/sl/sqlite-sl-sources-index.js +15 -5
- package/node_modules/@ktx/context/dist/sl/sqlite-sl-sources-index.test.js +24 -0
- package/node_modules/@ktx/context/dist/wiki/knowledge-wiki.service.d.ts +3 -2
- package/node_modules/@ktx/context/dist/wiki/knowledge-wiki.service.js +62 -51
- package/node_modules/@ktx/context/dist/wiki/knowledge-wiki.service.test.js +59 -3
- package/node_modules/@ktx/context/dist/wiki/ports.d.ts +3 -3
- package/node_modules/@ktx/context/dist/wiki/sqlite-knowledge-index.d.ts +33 -0
- package/node_modules/@ktx/context/dist/wiki/sqlite-knowledge-index.js +155 -2
- package/node_modules/@ktx/context/dist/wiki/sqlite-knowledge-index.test.js +26 -0
- package/node_modules/@ktx/context/package.json +5 -0
- package/node_modules/@ktx/llm/dist/embedding-provider.d.ts +0 -7
- package/node_modules/@ktx/llm/dist/embedding-provider.js +12 -138
- package/node_modules/@ktx/llm/dist/embedding-provider.test.js +10 -25
- package/node_modules/@ktx/llm/dist/types.d.ts +1 -1
- package/package.json +1 -1
- /package/dist/{dev.test.d.ts → admin-reindex.test.d.ts} +0 -0
|
@@ -58,6 +58,7 @@ export class SqliteKnowledgeIndex {
|
|
|
58
58
|
path TEXT PRIMARY KEY,
|
|
59
59
|
key TEXT NOT NULL,
|
|
60
60
|
scope TEXT NOT NULL,
|
|
61
|
+
scope_id TEXT,
|
|
61
62
|
summary TEXT NOT NULL,
|
|
62
63
|
content TEXT NOT NULL,
|
|
63
64
|
tags TEXT NOT NULL,
|
|
@@ -81,6 +82,9 @@ export class SqliteKnowledgeIndex {
|
|
|
81
82
|
if (!columnNames.has('embedding_json')) {
|
|
82
83
|
this.db.exec('ALTER TABLE knowledge_pages ADD COLUMN embedding_json TEXT');
|
|
83
84
|
}
|
|
85
|
+
if (!columnNames.has('scope_id')) {
|
|
86
|
+
this.db.exec('ALTER TABLE knowledge_pages ADD COLUMN scope_id TEXT');
|
|
87
|
+
}
|
|
84
88
|
}
|
|
85
89
|
sync(pages) {
|
|
86
90
|
const keepPaths = pages.map((page) => page.path);
|
|
@@ -91,11 +95,12 @@ export class SqliteKnowledgeIndex {
|
|
|
91
95
|
? this.db.prepare('DELETE FROM knowledge_pages_fts')
|
|
92
96
|
: this.db.prepare(`DELETE FROM knowledge_pages_fts WHERE path NOT IN (${keepPaths.map(() => '?').join(', ')})`);
|
|
93
97
|
const upsertPage = this.db.prepare(`
|
|
94
|
-
INSERT INTO knowledge_pages (path, key, scope, summary, content, tags, search_text, embedding_json)
|
|
95
|
-
VALUES (@path, @key, @scope, @summary, @content, @tags, @searchText, @embeddingJson)
|
|
98
|
+
INSERT INTO knowledge_pages (path, key, scope, scope_id, summary, content, tags, search_text, embedding_json)
|
|
99
|
+
VALUES (@path, @key, @scope, @scopeId, @summary, @content, @tags, @searchText, @embeddingJson)
|
|
96
100
|
ON CONFLICT(path) DO UPDATE SET
|
|
97
101
|
key = excluded.key,
|
|
98
102
|
scope = excluded.scope,
|
|
103
|
+
scope_id = excluded.scope_id,
|
|
99
104
|
summary = excluded.summary,
|
|
100
105
|
content = excluded.content,
|
|
101
106
|
tags = excluded.tags,
|
|
@@ -116,6 +121,7 @@ export class SqliteKnowledgeIndex {
|
|
|
116
121
|
path: page.path,
|
|
117
122
|
key: page.key,
|
|
118
123
|
scope: page.scope,
|
|
124
|
+
scopeId: page.scopeId ?? null,
|
|
119
125
|
summary: page.summary,
|
|
120
126
|
content: searchText,
|
|
121
127
|
tags: page.tags.join(' '),
|
|
@@ -205,4 +211,151 @@ export class SqliteKnowledgeIndex {
|
|
|
205
211
|
score: scoreFromRank(row.rawScore),
|
|
206
212
|
}));
|
|
207
213
|
}
|
|
214
|
+
pathForPage(scope, scopeId, pageKey) {
|
|
215
|
+
return scope === 'GLOBAL' ? `wiki/global/${pageKey}.md` : `wiki/user/${scopeId ?? 'local'}/${pageKey}.md`;
|
|
216
|
+
}
|
|
217
|
+
async upsertPage(params) {
|
|
218
|
+
const path = this.pathForPage(params.scope, params.scopeId, params.pageKey);
|
|
219
|
+
const row = {
|
|
220
|
+
path,
|
|
221
|
+
key: params.pageKey,
|
|
222
|
+
scope: params.scope,
|
|
223
|
+
scopeId: params.scopeId,
|
|
224
|
+
summary: params.summary,
|
|
225
|
+
content: params.searchText,
|
|
226
|
+
tags: '',
|
|
227
|
+
searchText: params.searchText,
|
|
228
|
+
embeddingJson: params.embedding && params.embedding.length > 0 ? JSON.stringify(params.embedding) : null,
|
|
229
|
+
};
|
|
230
|
+
const write = this.db.transaction(() => {
|
|
231
|
+
this.db
|
|
232
|
+
.prepare(`
|
|
233
|
+
INSERT INTO knowledge_pages (path, key, scope, scope_id, summary, content, tags, search_text, embedding_json)
|
|
234
|
+
VALUES (@path, @key, @scope, @scopeId, @summary, @content, @tags, @searchText, @embeddingJson)
|
|
235
|
+
ON CONFLICT(path) DO UPDATE SET
|
|
236
|
+
key = excluded.key,
|
|
237
|
+
scope = excluded.scope,
|
|
238
|
+
scope_id = excluded.scope_id,
|
|
239
|
+
summary = excluded.summary,
|
|
240
|
+
content = excluded.content,
|
|
241
|
+
tags = excluded.tags,
|
|
242
|
+
search_text = excluded.search_text,
|
|
243
|
+
embedding_json = excluded.embedding_json
|
|
244
|
+
`)
|
|
245
|
+
.run(row);
|
|
246
|
+
this.db.prepare('DELETE FROM knowledge_pages_fts WHERE path = @path').run(row);
|
|
247
|
+
this.db
|
|
248
|
+
.prepare(`
|
|
249
|
+
INSERT INTO knowledge_pages_fts (path, key, summary, content, tags)
|
|
250
|
+
VALUES (@path, @key, @summary, @content, @tags)
|
|
251
|
+
`)
|
|
252
|
+
.run(row);
|
|
253
|
+
});
|
|
254
|
+
write();
|
|
255
|
+
}
|
|
256
|
+
async getExistingSearchTexts(scope, scopeId) {
|
|
257
|
+
const rows = this.db
|
|
258
|
+
.prepare(`
|
|
259
|
+
SELECT key, search_text, embedding_json
|
|
260
|
+
FROM knowledge_pages
|
|
261
|
+
WHERE scope = ?
|
|
262
|
+
AND scope_id IS ?
|
|
263
|
+
ORDER BY key ASC
|
|
264
|
+
`)
|
|
265
|
+
.all(scope, scopeId);
|
|
266
|
+
return new Map(rows.map((row) => [row.key, { searchText: row.search_text, hasEmbedding: row.embedding_json !== null }]));
|
|
267
|
+
}
|
|
268
|
+
async deleteStale(scope, scopeId, keepKeys) {
|
|
269
|
+
if (keepKeys.length === 0) {
|
|
270
|
+
return this.deleteByScope(scope, scopeId);
|
|
271
|
+
}
|
|
272
|
+
const placeholders = keepKeys.map(() => '?').join(', ');
|
|
273
|
+
const stale = this.db
|
|
274
|
+
.prepare(`
|
|
275
|
+
SELECT key
|
|
276
|
+
FROM knowledge_pages
|
|
277
|
+
WHERE scope = ?
|
|
278
|
+
AND scope_id IS ?
|
|
279
|
+
AND key NOT IN (${placeholders})
|
|
280
|
+
`)
|
|
281
|
+
.all(scope, scopeId, ...keepKeys);
|
|
282
|
+
for (const row of stale) {
|
|
283
|
+
await this.deleteByKey(scope, scopeId, row.key);
|
|
284
|
+
}
|
|
285
|
+
return stale.length;
|
|
286
|
+
}
|
|
287
|
+
async deleteByScope(scope, scopeId) {
|
|
288
|
+
return this.clear(scope, scopeId);
|
|
289
|
+
}
|
|
290
|
+
async deleteByKey(scope, scopeId, pageKey) {
|
|
291
|
+
const path = this.pathForPage(scope, scopeId, pageKey);
|
|
292
|
+
const remove = this.db.transaction(() => {
|
|
293
|
+
this.db.prepare('DELETE FROM knowledge_pages_fts WHERE path = ?').run(path);
|
|
294
|
+
const result = this.db.prepare('DELETE FROM knowledge_pages WHERE path = ?').run(path);
|
|
295
|
+
return Number(result.changes);
|
|
296
|
+
});
|
|
297
|
+
return remove();
|
|
298
|
+
}
|
|
299
|
+
clear(scope, scopeId) {
|
|
300
|
+
const rows = this.db
|
|
301
|
+
.prepare('SELECT path FROM knowledge_pages WHERE scope = ? AND scope_id IS ?')
|
|
302
|
+
.all(scope, scopeId);
|
|
303
|
+
const remove = this.db.transaction((paths) => {
|
|
304
|
+
for (const path of paths) {
|
|
305
|
+
this.db.prepare('DELETE FROM knowledge_pages_fts WHERE path = ?').run(path);
|
|
306
|
+
this.db.prepare('DELETE FROM knowledge_pages WHERE path = ?').run(path);
|
|
307
|
+
}
|
|
308
|
+
});
|
|
309
|
+
remove(rows.map((row) => row.path));
|
|
310
|
+
return rows.length;
|
|
311
|
+
}
|
|
312
|
+
async applyDiffTransactional(params) {
|
|
313
|
+
void params.runId;
|
|
314
|
+
for (const page of params.upserts) {
|
|
315
|
+
await this.upsertPage(page);
|
|
316
|
+
}
|
|
317
|
+
for (const page of params.deletes) {
|
|
318
|
+
await this.deleteByKey(page.scope, page.scopeId, page.pageKey);
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
async findPageByKey(scope, scopeId, pageKey) {
|
|
322
|
+
const path = this.pathForPage(scope, scopeId, pageKey);
|
|
323
|
+
const row = this.db.prepare('SELECT path, key FROM knowledge_pages WHERE path = ?').get(path);
|
|
324
|
+
return row ? { id: row.path, page_key: row.key } : null;
|
|
325
|
+
}
|
|
326
|
+
async listPagesForUser(userId) {
|
|
327
|
+
const rows = this.db
|
|
328
|
+
.prepare(`
|
|
329
|
+
SELECT path, key, scope, scope_id, summary, tags
|
|
330
|
+
FROM knowledge_pages
|
|
331
|
+
WHERE scope = 'GLOBAL'
|
|
332
|
+
OR (scope = 'USER' AND scope_id = ?)
|
|
333
|
+
ORDER BY scope ASC, key ASC
|
|
334
|
+
`)
|
|
335
|
+
.all(userId);
|
|
336
|
+
return rows.map((row) => ({
|
|
337
|
+
id: row.path,
|
|
338
|
+
page_key: row.key,
|
|
339
|
+
summary: row.summary,
|
|
340
|
+
scope: row.scope,
|
|
341
|
+
scope_id: row.scope_id,
|
|
342
|
+
tags: row.tags.split(/\s+/).filter(Boolean),
|
|
343
|
+
}));
|
|
344
|
+
}
|
|
345
|
+
async getUserPageCount(userId) {
|
|
346
|
+
const row = this.db
|
|
347
|
+
.prepare("SELECT COUNT(*) AS count FROM knowledge_pages WHERE scope = 'USER' AND scope_id = ?")
|
|
348
|
+
.get(userId);
|
|
349
|
+
return row.count;
|
|
350
|
+
}
|
|
351
|
+
async incrementUsageCount() { }
|
|
352
|
+
async searchRRF(userId, _embedding, queryText, limit) {
|
|
353
|
+
const allowedPages = new Map((await this.listPagesForUser(userId)).map((page) => [page.id, page]));
|
|
354
|
+
return this.search(queryText, limit)
|
|
355
|
+
.map((row) => {
|
|
356
|
+
const page = allowedPages.get(row.path);
|
|
357
|
+
return page ? { pageKey: page.page_key, summary: page.summary, rrfScore: row.score } : null;
|
|
358
|
+
})
|
|
359
|
+
.filter((row) => row !== null);
|
|
360
|
+
}
|
|
208
361
|
}
|
|
@@ -54,6 +54,32 @@ describe('SqliteKnowledgeIndex', () => {
|
|
|
54
54
|
index.rebuild([page()]);
|
|
55
55
|
expect(index.search('churn', 10)).toEqual([]);
|
|
56
56
|
});
|
|
57
|
+
it('clear removes one wiki scope and leaves other scopes intact', async () => {
|
|
58
|
+
const index = new SqliteKnowledgeIndex({ dbPath });
|
|
59
|
+
index.sync([
|
|
60
|
+
page({ path: 'wiki/global/revenue.md', key: 'revenue', scope: 'GLOBAL', scopeId: null }),
|
|
61
|
+
page({
|
|
62
|
+
path: 'wiki/user/local/revenue.md',
|
|
63
|
+
key: 'revenue',
|
|
64
|
+
scope: 'USER',
|
|
65
|
+
scopeId: 'local',
|
|
66
|
+
summary: 'Local revenue',
|
|
67
|
+
content: 'Local revenue notes.',
|
|
68
|
+
}),
|
|
69
|
+
page({
|
|
70
|
+
path: 'wiki/user/alex/revenue.md',
|
|
71
|
+
key: 'revenue',
|
|
72
|
+
scope: 'USER',
|
|
73
|
+
scopeId: 'alex',
|
|
74
|
+
summary: 'Alex revenue',
|
|
75
|
+
content: 'Alex revenue notes.',
|
|
76
|
+
}),
|
|
77
|
+
]);
|
|
78
|
+
expect(index.clear('USER', 'local')).toBe(1);
|
|
79
|
+
expect(index.search('Local', 10)).toEqual([]);
|
|
80
|
+
expect(index.search('Alex', 10)).toEqual([expect.objectContaining({ path: 'wiki/user/alex/revenue.md' })]);
|
|
81
|
+
expect(index.search('definition', 10)).toEqual([expect.objectContaining({ path: 'wiki/global/revenue.md' })]);
|
|
82
|
+
});
|
|
57
83
|
it('exposes existing search text and embedding state for incremental refresh', () => {
|
|
58
84
|
const index = new SqliteKnowledgeIndex({ dbPath });
|
|
59
85
|
index.sync([page({ path: 'wiki/global/revenue.md', key: 'revenue', embedding: [1, 0] })]);
|
|
@@ -46,6 +46,11 @@
|
|
|
46
46
|
"import": "./dist/ingest/metabase-mapping.js",
|
|
47
47
|
"default": "./dist/ingest/metabase-mapping.js"
|
|
48
48
|
},
|
|
49
|
+
"./index-sync": {
|
|
50
|
+
"types": "./dist/index-sync/index.d.ts",
|
|
51
|
+
"import": "./dist/index-sync/index.js",
|
|
52
|
+
"default": "./dist/index-sync/index.js"
|
|
53
|
+
},
|
|
49
54
|
"./scan": {
|
|
50
55
|
"types": "./dist/scan/index.d.ts",
|
|
51
56
|
"import": "./dist/scan/index.js",
|
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
import type { KtxEmbeddingConfig, KtxEmbeddingProvider } from './types.js';
|
|
2
2
|
type FetchFn = typeof fetch;
|
|
3
|
-
type SentenceTransformersCommand = 'embedding-compute' | 'embedding-compute-bulk';
|
|
4
|
-
type SentenceTransformersJsonRunner = (subcommand: SentenceTransformersCommand, payload: Record<string, unknown>) => Promise<Record<string, unknown>>;
|
|
5
3
|
export interface KtxEmbeddingProviderDeps {
|
|
6
4
|
createOpenAIClient?: (options: {
|
|
7
5
|
apiKey?: string;
|
|
@@ -25,11 +23,6 @@ export interface KtxEmbeddingProviderDeps {
|
|
|
25
23
|
};
|
|
26
24
|
};
|
|
27
25
|
fetch?: FetchFn;
|
|
28
|
-
runSentenceTransformersJson?: SentenceTransformersJsonRunner;
|
|
29
|
-
sentenceTransformersCommand?: string;
|
|
30
|
-
sentenceTransformersArgs?: string[];
|
|
31
|
-
sentenceTransformersCwd?: string;
|
|
32
|
-
sentenceTransformersEnv?: NodeJS.ProcessEnv;
|
|
33
26
|
}
|
|
34
27
|
export declare function createKtxEmbeddingProvider(config: KtxEmbeddingConfig, deps?: KtxEmbeddingProviderDeps): KtxEmbeddingProvider;
|
|
35
28
|
export {};
|
|
@@ -1,15 +1,6 @@
|
|
|
1
|
-
import { createHash } from 'node:crypto';
|
|
2
|
-
import { spawn } from 'node:child_process';
|
|
3
|
-
import { join } from 'node:path';
|
|
4
1
|
import OpenAI from 'openai';
|
|
5
2
|
const DEFAULT_BATCH_SIZE = 100;
|
|
6
|
-
|
|
7
|
-
const digest = createHash('sha256').update(text).digest();
|
|
8
|
-
return Array.from({ length: dimensions }, (_, index) => {
|
|
9
|
-
const byte = digest[index % digest.length];
|
|
10
|
-
return Number(((byte / 255) * 2 - 1).toFixed(6));
|
|
11
|
-
});
|
|
12
|
-
}
|
|
3
|
+
const HTTP_ERROR_BODY_MAX_LENGTH = 2_000;
|
|
13
4
|
function assertNonEmptyText(text) {
|
|
14
5
|
if (!text.trim()) {
|
|
15
6
|
throw new Error('Embedding text must be non-empty');
|
|
@@ -41,102 +32,12 @@ function joinUrl(baseURL, pathPrefix, path) {
|
|
|
41
32
|
const suffix = path.replace(/^\/+/, '');
|
|
42
33
|
return prefix ? `${base}/${prefix}/${suffix}` : `${base}/${suffix}`;
|
|
43
34
|
}
|
|
44
|
-
function
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
: `${error.name}: ${error.message}`;
|
|
49
|
-
}
|
|
50
|
-
return String(error);
|
|
51
|
-
}
|
|
52
|
-
function parseJsonObject(raw, subcommand) {
|
|
53
|
-
const parsed = JSON.parse(raw);
|
|
54
|
-
if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
|
|
55
|
-
throw new Error(`ktx-daemon ${subcommand} returned non-object JSON`);
|
|
56
|
-
}
|
|
57
|
-
return parsed;
|
|
58
|
-
}
|
|
59
|
-
function isCommandNotFound(error) {
|
|
60
|
-
return (error instanceof Error &&
|
|
61
|
-
('code' in error || 'errno' in error) &&
|
|
62
|
-
(error.code === 'ENOENT' || error.errno === 'ENOENT'));
|
|
63
|
-
}
|
|
64
|
-
function defaultSentenceTransformersProcessCommands() {
|
|
65
|
-
const venvBin = process.platform === 'win32' ? join('.venv', 'Scripts', 'ktx-daemon.exe') : join('.venv', 'bin', 'ktx-daemon');
|
|
66
|
-
const repoVenvBin = process.platform === 'win32'
|
|
67
|
-
? join('ktx', '.venv', 'Scripts', 'ktx-daemon.exe')
|
|
68
|
-
: join('ktx', '.venv', 'bin', 'ktx-daemon');
|
|
69
|
-
return [
|
|
70
|
-
{ command: 'ktx-daemon', args: [] },
|
|
71
|
-
{ command: venvBin, args: [] },
|
|
72
|
-
{ command: repoVenvBin, args: [] },
|
|
73
|
-
];
|
|
74
|
-
}
|
|
75
|
-
function runSentenceTransformersProcessCommand(options) {
|
|
76
|
-
return async (subcommand, payload) => new Promise((resolve, reject) => {
|
|
77
|
-
const child = spawn(options.command, [...options.args, subcommand], {
|
|
78
|
-
cwd: options.cwd,
|
|
79
|
-
env: { ...process.env, ...options.env },
|
|
80
|
-
stdio: ['pipe', 'pipe', 'pipe'],
|
|
81
|
-
});
|
|
82
|
-
const stdout = [];
|
|
83
|
-
const stderr = [];
|
|
84
|
-
child.stdout.on('data', (chunk) => stdout.push(chunk));
|
|
85
|
-
child.stderr.on('data', (chunk) => stderr.push(chunk));
|
|
86
|
-
child.on('error', reject);
|
|
87
|
-
child.on('close', (code) => {
|
|
88
|
-
const stdoutText = Buffer.concat(stdout).toString('utf8').trim();
|
|
89
|
-
const stderrText = Buffer.concat(stderr).toString('utf8').trim();
|
|
90
|
-
if (code !== 0) {
|
|
91
|
-
reject(new Error(`ktx-daemon ${subcommand} failed: ${stderrText || `exit code ${code}`}`));
|
|
92
|
-
return;
|
|
93
|
-
}
|
|
94
|
-
try {
|
|
95
|
-
resolve(parseJsonObject(stdoutText, subcommand));
|
|
96
|
-
}
|
|
97
|
-
catch (error) {
|
|
98
|
-
reject(error);
|
|
99
|
-
}
|
|
100
|
-
});
|
|
101
|
-
child.stdin.end(`${JSON.stringify(payload)}\n`);
|
|
102
|
-
});
|
|
103
|
-
}
|
|
104
|
-
function runSentenceTransformersProcessJson(options) {
|
|
105
|
-
return async (subcommand, payload) => {
|
|
106
|
-
const errors = [];
|
|
107
|
-
for (const command of options.commands) {
|
|
108
|
-
try {
|
|
109
|
-
return await runSentenceTransformersProcessCommand({
|
|
110
|
-
...command,
|
|
111
|
-
cwd: options.cwd,
|
|
112
|
-
env: options.env,
|
|
113
|
-
})(subcommand, payload);
|
|
114
|
-
}
|
|
115
|
-
catch (error) {
|
|
116
|
-
errors.push(`${command.command}: ${errorText(error)}`);
|
|
117
|
-
if (!isCommandNotFound(error)) {
|
|
118
|
-
break;
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
}
|
|
122
|
-
throw new Error(`ktx-daemon ${subcommand} failed: ${errors.join('; ')}`);
|
|
123
|
-
};
|
|
124
|
-
}
|
|
125
|
-
class DeterministicEmbeddingProvider {
|
|
126
|
-
dimensions;
|
|
127
|
-
maxBatchSize;
|
|
128
|
-
constructor(dimensions, batchSize = DEFAULT_BATCH_SIZE) {
|
|
129
|
-
this.dimensions = dimensions;
|
|
130
|
-
this.maxBatchSize = batchSize;
|
|
131
|
-
}
|
|
132
|
-
async embed(text) {
|
|
133
|
-
assertNonEmptyText(text);
|
|
134
|
-
return deterministicVector(text, this.dimensions);
|
|
135
|
-
}
|
|
136
|
-
async embedMany(texts) {
|
|
137
|
-
assertBatchSize(texts, this.maxBatchSize);
|
|
138
|
-
return texts.map((text) => deterministicVector(text, this.dimensions));
|
|
35
|
+
function boundedHttpBody(text) {
|
|
36
|
+
const normalized = text.trim();
|
|
37
|
+
if (normalized.length <= HTTP_ERROR_BODY_MAX_LENGTH) {
|
|
38
|
+
return normalized;
|
|
139
39
|
}
|
|
40
|
+
return `${normalized.slice(0, HTTP_ERROR_BODY_MAX_LENGTH)}...`;
|
|
140
41
|
}
|
|
141
42
|
class OpenAIEmbeddingProvider {
|
|
142
43
|
config;
|
|
@@ -186,9 +87,7 @@ class SentenceTransformersEmbeddingProvider {
|
|
|
186
87
|
fetch;
|
|
187
88
|
baseURL;
|
|
188
89
|
pathPrefix;
|
|
189
|
-
runJson;
|
|
190
90
|
startupProbe;
|
|
191
|
-
useProcessRunner = false;
|
|
192
91
|
constructor(config, deps) {
|
|
193
92
|
if (!config.sentenceTransformers?.baseURL) {
|
|
194
93
|
throw new Error('sentenceTransformers.baseURL is required when KTX embedding backend is sentence-transformers');
|
|
@@ -198,15 +97,6 @@ class SentenceTransformersEmbeddingProvider {
|
|
|
198
97
|
this.fetch = deps.fetch ?? fetch;
|
|
199
98
|
this.baseURL = config.sentenceTransformers.baseURL;
|
|
200
99
|
this.pathPrefix = config.sentenceTransformers.pathPrefix ?? '/api';
|
|
201
|
-
this.runJson =
|
|
202
|
-
deps.runSentenceTransformersJson ??
|
|
203
|
-
runSentenceTransformersProcessJson({
|
|
204
|
-
commands: deps.sentenceTransformersCommand
|
|
205
|
-
? [{ command: deps.sentenceTransformersCommand, args: deps.sentenceTransformersArgs ?? [] }]
|
|
206
|
-
: defaultSentenceTransformersProcessCommands(),
|
|
207
|
-
cwd: deps.sentenceTransformersCwd,
|
|
208
|
-
env: deps.sentenceTransformersEnv,
|
|
209
|
-
});
|
|
210
100
|
this.startupProbe = this.requestSingle('__ktx_embedding_probe__').then((embedding) => {
|
|
211
101
|
assertVectorDimensions(embedding, this.dimensions, 'sentence-transformers');
|
|
212
102
|
});
|
|
@@ -219,7 +109,7 @@ class SentenceTransformersEmbeddingProvider {
|
|
|
219
109
|
async embedMany(texts) {
|
|
220
110
|
assertBatchSize(texts, this.maxBatchSize);
|
|
221
111
|
await this.startupProbe;
|
|
222
|
-
const response = await this.requestJson('
|
|
112
|
+
const response = await this.requestJson('/embeddings/compute-bulk', { texts });
|
|
223
113
|
if (!response ||
|
|
224
114
|
typeof response !== 'object' ||
|
|
225
115
|
!('embeddings' in response) ||
|
|
@@ -233,29 +123,14 @@ class SentenceTransformersEmbeddingProvider {
|
|
|
233
123
|
return response.embeddings.map((embedding) => assertVectorDimensions(embedding, this.dimensions, 'sentence-transformers'));
|
|
234
124
|
}
|
|
235
125
|
async requestSingle(text) {
|
|
236
|
-
const response = await this.requestJson('
|
|
126
|
+
const response = await this.requestJson('/embeddings/compute', { text });
|
|
237
127
|
if (!response || typeof response !== 'object' || !('embedding' in response) || !Array.isArray(response.embedding)) {
|
|
238
128
|
throw new Error('Embedding provider sentence-transformers returned malformed single response');
|
|
239
129
|
}
|
|
240
130
|
return response.embedding;
|
|
241
131
|
}
|
|
242
|
-
async requestJson(
|
|
243
|
-
|
|
244
|
-
return this.runJson(command, body);
|
|
245
|
-
}
|
|
246
|
-
try {
|
|
247
|
-
return await this.postJson(path, body);
|
|
248
|
-
}
|
|
249
|
-
catch (httpError) {
|
|
250
|
-
try {
|
|
251
|
-
const response = await this.runJson(command, body);
|
|
252
|
-
this.useProcessRunner = true;
|
|
253
|
-
return response;
|
|
254
|
-
}
|
|
255
|
-
catch (processError) {
|
|
256
|
-
throw new Error(`Embedding provider sentence-transformers local HTTP request failed (${errorText(httpError)}) and ktx-daemon fallback failed (${errorText(processError)})`);
|
|
257
|
-
}
|
|
258
|
-
}
|
|
132
|
+
async requestJson(path, body) {
|
|
133
|
+
return await this.postJson(path, body);
|
|
259
134
|
}
|
|
260
135
|
async postJson(path, body) {
|
|
261
136
|
const response = await this.fetch(joinUrl(this.baseURL, this.pathPrefix, path), {
|
|
@@ -264,7 +139,8 @@ class SentenceTransformersEmbeddingProvider {
|
|
|
264
139
|
body: JSON.stringify(body),
|
|
265
140
|
});
|
|
266
141
|
if (!response.ok) {
|
|
267
|
-
|
|
142
|
+
const bodyText = boundedHttpBody(await response.text());
|
|
143
|
+
throw new Error(`Embedding provider sentence-transformers request failed with HTTP ${response.status}${bodyText ? `: ${bodyText}` : ''}`);
|
|
268
144
|
}
|
|
269
145
|
const parsed = (await response.json());
|
|
270
146
|
if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
|
|
@@ -275,8 +151,6 @@ class SentenceTransformersEmbeddingProvider {
|
|
|
275
151
|
}
|
|
276
152
|
export function createKtxEmbeddingProvider(config, deps = {}) {
|
|
277
153
|
switch (config.backend) {
|
|
278
|
-
case 'deterministic':
|
|
279
|
-
return new DeterministicEmbeddingProvider(config.dimensions, config.batchSize);
|
|
280
154
|
case 'openai':
|
|
281
155
|
return new OpenAIEmbeddingProvider(config, deps);
|
|
282
156
|
case 'sentence-transformers':
|
|
@@ -1,18 +1,13 @@
|
|
|
1
1
|
import { describe, expect, it, vi } from 'vitest';
|
|
2
2
|
import { createKtxEmbeddingProvider } from './embedding-provider.js';
|
|
3
3
|
describe('createKtxEmbeddingProvider', () => {
|
|
4
|
-
it('
|
|
5
|
-
const
|
|
4
|
+
it('rejects deterministic embeddings', () => {
|
|
5
|
+
const config = JSON.parse(JSON.stringify({
|
|
6
6
|
backend: 'deterministic',
|
|
7
7
|
model: 'sha256',
|
|
8
8
|
dimensions: 6,
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
await expect(provider.embed('Revenue policy')).resolves.toHaveLength(6);
|
|
12
|
-
await expect(provider.embed('Revenue policy')).resolves.toEqual(await provider.embed('Revenue policy'));
|
|
13
|
-
await expect(provider.embed('Revenue policy')).resolves.not.toEqual(await provider.embed('Approval policy'));
|
|
14
|
-
await expect(provider.embedMany(['a', 'b'])).resolves.toHaveLength(2);
|
|
15
|
-
expect(provider.maxBatchSize).toBe(4);
|
|
9
|
+
}));
|
|
10
|
+
expect(() => createKtxEmbeddingProvider(config)).toThrow('Unsupported KTX embedding backend: deterministic');
|
|
16
11
|
});
|
|
17
12
|
it('rejects gateway embeddings', () => {
|
|
18
13
|
const config = JSON.parse(JSON.stringify({
|
|
@@ -72,28 +67,18 @@ describe('createKtxEmbeddingProvider', () => {
|
|
|
72
67
|
expect(daemonFetch).toHaveBeenNthCalledWith(1, 'https://daemon.test/base/embeddings/compute', expect.objectContaining({ method: 'POST' }));
|
|
73
68
|
expect(daemonFetch).toHaveBeenNthCalledWith(2, 'https://daemon.test/base/embeddings/compute-bulk', expect.objectContaining({ method: 'POST' }));
|
|
74
69
|
});
|
|
75
|
-
it('
|
|
76
|
-
const fetch = vi
|
|
77
|
-
const runSentenceTransformersJson = vi
|
|
70
|
+
it('reports local HTTP daemon failures without a ktx-daemon spawn fallback cascade', async () => {
|
|
71
|
+
const fetch = vi
|
|
78
72
|
.fn()
|
|
79
|
-
.
|
|
80
|
-
.mockResolvedValueOnce({ embeddings: [[0.3, 0.4], [0.5, 0.6]] });
|
|
73
|
+
.mockResolvedValue(new Response('Embedding compute failed: httpx.InvalidURL: Invalid port', { status: 500 }));
|
|
81
74
|
const provider = createKtxEmbeddingProvider({
|
|
82
75
|
backend: 'sentence-transformers',
|
|
83
76
|
model: 'all-MiniLM-L6-v2',
|
|
84
77
|
dimensions: 2,
|
|
85
78
|
sentenceTransformers: { baseURL: 'http://127.0.0.1:8765', pathPrefix: '' },
|
|
86
|
-
}, { fetch
|
|
87
|
-
await expect(provider.
|
|
88
|
-
|
|
89
|
-
[0.5, 0.6],
|
|
90
|
-
]);
|
|
79
|
+
}, { fetch });
|
|
80
|
+
await expect(provider.embed('hello')).rejects.toThrow('Embedding provider sentence-transformers request failed with HTTP 500: Embedding compute failed: httpx.InvalidURL: Invalid port');
|
|
81
|
+
await expect(provider.embed('hello')).rejects.not.toThrow('ktx-daemon fallback failed');
|
|
91
82
|
expect(fetch).toHaveBeenCalledTimes(1);
|
|
92
|
-
expect(runSentenceTransformersJson).toHaveBeenNthCalledWith(1, 'embedding-compute', {
|
|
93
|
-
text: '__ktx_embedding_probe__',
|
|
94
|
-
});
|
|
95
|
-
expect(runSentenceTransformersJson).toHaveBeenNthCalledWith(2, 'embedding-compute-bulk', {
|
|
96
|
-
texts: ['hello', 'world'],
|
|
97
|
-
});
|
|
98
83
|
});
|
|
99
84
|
});
|
|
@@ -68,7 +68,7 @@ export interface KtxLlmProvider {
|
|
|
68
68
|
promptCachingConfig(): KtxPromptCachingConfig;
|
|
69
69
|
activeBackend(): KtxLlmBackend;
|
|
70
70
|
}
|
|
71
|
-
export type KtxEmbeddingBackend = 'openai' | '
|
|
71
|
+
export type KtxEmbeddingBackend = 'openai' | 'sentence-transformers';
|
|
72
72
|
export interface KtxEmbeddingTokenUsageEvent {
|
|
73
73
|
backend: KtxEmbeddingBackend;
|
|
74
74
|
model: string;
|
package/package.json
CHANGED
|
File without changes
|