@kaelio/ktx 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/python/{kaelio_ktx-0.1.0-py3-none-any.whl → kaelio_ktx-0.1.1-py3-none-any.whl} +0 -0
- package/assets/python/manifest.json +4 -4
- package/dist/commands/setup-commands.js +14 -26
- package/dist/doctor.test.js +3 -4
- package/dist/index.test.js +26 -10
- package/dist/ingest-depth.js +0 -1
- package/dist/ingest.test-utils.js +2 -2
- package/dist/ingest.test.js +4 -4
- package/dist/managed-local-embeddings.d.ts +2 -0
- package/dist/managed-local-embeddings.js +2 -0
- package/dist/managed-local-embeddings.test.js +2 -0
- package/dist/managed-mcp-daemon.js +3 -2
- package/dist/managed-mcp-daemon.test.js +25 -0
- package/dist/managed-python-command.test.js +1 -0
- package/dist/managed-python-daemon.js +3 -2
- package/dist/managed-python-daemon.test.js +20 -0
- package/dist/managed-python-runtime.d.ts +4 -0
- package/dist/managed-python-runtime.js +47 -3
- package/dist/managed-python-runtime.test.js +51 -21
- package/dist/proxy-env.d.ts +1 -0
- package/dist/proxy-env.js +23 -0
- package/dist/proxy-env.test.d.ts +1 -0
- package/dist/proxy-env.test.js +17 -0
- package/dist/runtime.test.js +1 -0
- package/dist/setup-agents.js +3 -1
- package/dist/setup-agents.test.js +34 -0
- package/dist/setup-embeddings.d.ts +1 -0
- package/dist/setup-embeddings.js +28 -6
- package/dist/setup-embeddings.test.js +46 -4
- package/dist/setup-models.d.ts +0 -1
- package/dist/setup-models.js +2 -3
- package/dist/setup-models.test.js +8 -10
- package/dist/setup-project.d.ts +9 -1
- package/dist/setup-project.js +52 -25
- package/dist/setup-project.test.js +8 -8
- package/dist/setup-runtime.test.js +2 -0
- package/dist/setup.d.ts +1 -2
- package/dist/setup.js +21 -5
- package/dist/setup.test.js +160 -43
- package/dist/sl.test.js +2 -1
- package/dist/standalone-smoke.test.js +2 -3
- package/dist/status-project.js +1 -10
- package/node_modules/@ktx/context/dist/ingest/adapters/historic-sql/local-ingest-acceptance.test.js +1 -1
- package/node_modules/@ktx/context/dist/ingest/local-bundle-ingest.test.js +8 -8
- package/node_modules/@ktx/context/dist/ingest/local-bundle-runtime.js +1 -1
- package/node_modules/@ktx/context/dist/ingest/local-bundle-runtime.test.js +3 -3
- package/node_modules/@ktx/context/dist/ingest/local-embedding-provider.integration.test.js +9 -10
- package/node_modules/@ktx/context/dist/llm/local-config.js +2 -15
- package/node_modules/@ktx/context/dist/llm/local-config.test.js +3 -7
- package/node_modules/@ktx/context/dist/project/config.d.ts +0 -5
- package/node_modules/@ktx/context/dist/project/config.js +5 -5
- package/node_modules/@ktx/context/dist/project/config.test.js +4 -7
- package/node_modules/@ktx/context/dist/scan/enrichment-state.test.js +4 -4
- package/node_modules/@ktx/context/dist/scan/index.d.ts +1 -1
- package/node_modules/@ktx/context/dist/scan/local-enrichment.d.ts +2 -6
- package/node_modules/@ktx/context/dist/scan/local-enrichment.js +31 -47
- package/node_modules/@ktx/context/dist/scan/local-enrichment.test.js +35 -18
- package/node_modules/@ktx/context/dist/scan/local-scan.test.js +2 -3
- package/node_modules/@ktx/llm/dist/embedding-provider.d.ts +0 -7
- package/node_modules/@ktx/llm/dist/embedding-provider.js +12 -138
- package/node_modules/@ktx/llm/dist/embedding-provider.test.js +10 -25
- package/node_modules/@ktx/llm/dist/types.d.ts +1 -1
- package/package.json +1 -1
|
@@ -1,15 +1,6 @@
|
|
|
1
|
-
import { createHash } from 'node:crypto';
|
|
2
|
-
import { spawn } from 'node:child_process';
|
|
3
|
-
import { join } from 'node:path';
|
|
4
1
|
import OpenAI from 'openai';
|
|
5
2
|
const DEFAULT_BATCH_SIZE = 100;
|
|
6
|
-
|
|
7
|
-
const digest = createHash('sha256').update(text).digest();
|
|
8
|
-
return Array.from({ length: dimensions }, (_, index) => {
|
|
9
|
-
const byte = digest[index % digest.length];
|
|
10
|
-
return Number(((byte / 255) * 2 - 1).toFixed(6));
|
|
11
|
-
});
|
|
12
|
-
}
|
|
3
|
+
const HTTP_ERROR_BODY_MAX_LENGTH = 2_000;
|
|
13
4
|
function assertNonEmptyText(text) {
|
|
14
5
|
if (!text.trim()) {
|
|
15
6
|
throw new Error('Embedding text must be non-empty');
|
|
@@ -41,102 +32,12 @@ function joinUrl(baseURL, pathPrefix, path) {
|
|
|
41
32
|
const suffix = path.replace(/^\/+/, '');
|
|
42
33
|
return prefix ? `${base}/${prefix}/${suffix}` : `${base}/${suffix}`;
|
|
43
34
|
}
|
|
44
|
-
function
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
: `${error.name}: ${error.message}`;
|
|
49
|
-
}
|
|
50
|
-
return String(error);
|
|
51
|
-
}
|
|
52
|
-
function parseJsonObject(raw, subcommand) {
|
|
53
|
-
const parsed = JSON.parse(raw);
|
|
54
|
-
if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
|
|
55
|
-
throw new Error(`ktx-daemon ${subcommand} returned non-object JSON`);
|
|
56
|
-
}
|
|
57
|
-
return parsed;
|
|
58
|
-
}
|
|
59
|
-
function isCommandNotFound(error) {
|
|
60
|
-
return (error instanceof Error &&
|
|
61
|
-
('code' in error || 'errno' in error) &&
|
|
62
|
-
(error.code === 'ENOENT' || error.errno === 'ENOENT'));
|
|
63
|
-
}
|
|
64
|
-
function defaultSentenceTransformersProcessCommands() {
|
|
65
|
-
const venvBin = process.platform === 'win32' ? join('.venv', 'Scripts', 'ktx-daemon.exe') : join('.venv', 'bin', 'ktx-daemon');
|
|
66
|
-
const repoVenvBin = process.platform === 'win32'
|
|
67
|
-
? join('ktx', '.venv', 'Scripts', 'ktx-daemon.exe')
|
|
68
|
-
: join('ktx', '.venv', 'bin', 'ktx-daemon');
|
|
69
|
-
return [
|
|
70
|
-
{ command: 'ktx-daemon', args: [] },
|
|
71
|
-
{ command: venvBin, args: [] },
|
|
72
|
-
{ command: repoVenvBin, args: [] },
|
|
73
|
-
];
|
|
74
|
-
}
|
|
75
|
-
function runSentenceTransformersProcessCommand(options) {
|
|
76
|
-
return async (subcommand, payload) => new Promise((resolve, reject) => {
|
|
77
|
-
const child = spawn(options.command, [...options.args, subcommand], {
|
|
78
|
-
cwd: options.cwd,
|
|
79
|
-
env: { ...process.env, ...options.env },
|
|
80
|
-
stdio: ['pipe', 'pipe', 'pipe'],
|
|
81
|
-
});
|
|
82
|
-
const stdout = [];
|
|
83
|
-
const stderr = [];
|
|
84
|
-
child.stdout.on('data', (chunk) => stdout.push(chunk));
|
|
85
|
-
child.stderr.on('data', (chunk) => stderr.push(chunk));
|
|
86
|
-
child.on('error', reject);
|
|
87
|
-
child.on('close', (code) => {
|
|
88
|
-
const stdoutText = Buffer.concat(stdout).toString('utf8').trim();
|
|
89
|
-
const stderrText = Buffer.concat(stderr).toString('utf8').trim();
|
|
90
|
-
if (code !== 0) {
|
|
91
|
-
reject(new Error(`ktx-daemon ${subcommand} failed: ${stderrText || `exit code ${code}`}`));
|
|
92
|
-
return;
|
|
93
|
-
}
|
|
94
|
-
try {
|
|
95
|
-
resolve(parseJsonObject(stdoutText, subcommand));
|
|
96
|
-
}
|
|
97
|
-
catch (error) {
|
|
98
|
-
reject(error);
|
|
99
|
-
}
|
|
100
|
-
});
|
|
101
|
-
child.stdin.end(`${JSON.stringify(payload)}\n`);
|
|
102
|
-
});
|
|
103
|
-
}
|
|
104
|
-
function runSentenceTransformersProcessJson(options) {
|
|
105
|
-
return async (subcommand, payload) => {
|
|
106
|
-
const errors = [];
|
|
107
|
-
for (const command of options.commands) {
|
|
108
|
-
try {
|
|
109
|
-
return await runSentenceTransformersProcessCommand({
|
|
110
|
-
...command,
|
|
111
|
-
cwd: options.cwd,
|
|
112
|
-
env: options.env,
|
|
113
|
-
})(subcommand, payload);
|
|
114
|
-
}
|
|
115
|
-
catch (error) {
|
|
116
|
-
errors.push(`${command.command}: ${errorText(error)}`);
|
|
117
|
-
if (!isCommandNotFound(error)) {
|
|
118
|
-
break;
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
}
|
|
122
|
-
throw new Error(`ktx-daemon ${subcommand} failed: ${errors.join('; ')}`);
|
|
123
|
-
};
|
|
124
|
-
}
|
|
125
|
-
class DeterministicEmbeddingProvider {
|
|
126
|
-
dimensions;
|
|
127
|
-
maxBatchSize;
|
|
128
|
-
constructor(dimensions, batchSize = DEFAULT_BATCH_SIZE) {
|
|
129
|
-
this.dimensions = dimensions;
|
|
130
|
-
this.maxBatchSize = batchSize;
|
|
131
|
-
}
|
|
132
|
-
async embed(text) {
|
|
133
|
-
assertNonEmptyText(text);
|
|
134
|
-
return deterministicVector(text, this.dimensions);
|
|
135
|
-
}
|
|
136
|
-
async embedMany(texts) {
|
|
137
|
-
assertBatchSize(texts, this.maxBatchSize);
|
|
138
|
-
return texts.map((text) => deterministicVector(text, this.dimensions));
|
|
35
|
+
function boundedHttpBody(text) {
|
|
36
|
+
const normalized = text.trim();
|
|
37
|
+
if (normalized.length <= HTTP_ERROR_BODY_MAX_LENGTH) {
|
|
38
|
+
return normalized;
|
|
139
39
|
}
|
|
40
|
+
return `${normalized.slice(0, HTTP_ERROR_BODY_MAX_LENGTH)}...`;
|
|
140
41
|
}
|
|
141
42
|
class OpenAIEmbeddingProvider {
|
|
142
43
|
config;
|
|
@@ -186,9 +87,7 @@ class SentenceTransformersEmbeddingProvider {
|
|
|
186
87
|
fetch;
|
|
187
88
|
baseURL;
|
|
188
89
|
pathPrefix;
|
|
189
|
-
runJson;
|
|
190
90
|
startupProbe;
|
|
191
|
-
useProcessRunner = false;
|
|
192
91
|
constructor(config, deps) {
|
|
193
92
|
if (!config.sentenceTransformers?.baseURL) {
|
|
194
93
|
throw new Error('sentenceTransformers.baseURL is required when KTX embedding backend is sentence-transformers');
|
|
@@ -198,15 +97,6 @@ class SentenceTransformersEmbeddingProvider {
|
|
|
198
97
|
this.fetch = deps.fetch ?? fetch;
|
|
199
98
|
this.baseURL = config.sentenceTransformers.baseURL;
|
|
200
99
|
this.pathPrefix = config.sentenceTransformers.pathPrefix ?? '/api';
|
|
201
|
-
this.runJson =
|
|
202
|
-
deps.runSentenceTransformersJson ??
|
|
203
|
-
runSentenceTransformersProcessJson({
|
|
204
|
-
commands: deps.sentenceTransformersCommand
|
|
205
|
-
? [{ command: deps.sentenceTransformersCommand, args: deps.sentenceTransformersArgs ?? [] }]
|
|
206
|
-
: defaultSentenceTransformersProcessCommands(),
|
|
207
|
-
cwd: deps.sentenceTransformersCwd,
|
|
208
|
-
env: deps.sentenceTransformersEnv,
|
|
209
|
-
});
|
|
210
100
|
this.startupProbe = this.requestSingle('__ktx_embedding_probe__').then((embedding) => {
|
|
211
101
|
assertVectorDimensions(embedding, this.dimensions, 'sentence-transformers');
|
|
212
102
|
});
|
|
@@ -219,7 +109,7 @@ class SentenceTransformersEmbeddingProvider {
|
|
|
219
109
|
async embedMany(texts) {
|
|
220
110
|
assertBatchSize(texts, this.maxBatchSize);
|
|
221
111
|
await this.startupProbe;
|
|
222
|
-
const response = await this.requestJson('
|
|
112
|
+
const response = await this.requestJson('/embeddings/compute-bulk', { texts });
|
|
223
113
|
if (!response ||
|
|
224
114
|
typeof response !== 'object' ||
|
|
225
115
|
!('embeddings' in response) ||
|
|
@@ -233,29 +123,14 @@ class SentenceTransformersEmbeddingProvider {
|
|
|
233
123
|
return response.embeddings.map((embedding) => assertVectorDimensions(embedding, this.dimensions, 'sentence-transformers'));
|
|
234
124
|
}
|
|
235
125
|
async requestSingle(text) {
|
|
236
|
-
const response = await this.requestJson('
|
|
126
|
+
const response = await this.requestJson('/embeddings/compute', { text });
|
|
237
127
|
if (!response || typeof response !== 'object' || !('embedding' in response) || !Array.isArray(response.embedding)) {
|
|
238
128
|
throw new Error('Embedding provider sentence-transformers returned malformed single response');
|
|
239
129
|
}
|
|
240
130
|
return response.embedding;
|
|
241
131
|
}
|
|
242
|
-
async requestJson(
|
|
243
|
-
|
|
244
|
-
return this.runJson(command, body);
|
|
245
|
-
}
|
|
246
|
-
try {
|
|
247
|
-
return await this.postJson(path, body);
|
|
248
|
-
}
|
|
249
|
-
catch (httpError) {
|
|
250
|
-
try {
|
|
251
|
-
const response = await this.runJson(command, body);
|
|
252
|
-
this.useProcessRunner = true;
|
|
253
|
-
return response;
|
|
254
|
-
}
|
|
255
|
-
catch (processError) {
|
|
256
|
-
throw new Error(`Embedding provider sentence-transformers local HTTP request failed (${errorText(httpError)}) and ktx-daemon fallback failed (${errorText(processError)})`);
|
|
257
|
-
}
|
|
258
|
-
}
|
|
132
|
+
async requestJson(path, body) {
|
|
133
|
+
return await this.postJson(path, body);
|
|
259
134
|
}
|
|
260
135
|
async postJson(path, body) {
|
|
261
136
|
const response = await this.fetch(joinUrl(this.baseURL, this.pathPrefix, path), {
|
|
@@ -264,7 +139,8 @@ class SentenceTransformersEmbeddingProvider {
|
|
|
264
139
|
body: JSON.stringify(body),
|
|
265
140
|
});
|
|
266
141
|
if (!response.ok) {
|
|
267
|
-
|
|
142
|
+
const bodyText = boundedHttpBody(await response.text());
|
|
143
|
+
throw new Error(`Embedding provider sentence-transformers request failed with HTTP ${response.status}${bodyText ? `: ${bodyText}` : ''}`);
|
|
268
144
|
}
|
|
269
145
|
const parsed = (await response.json());
|
|
270
146
|
if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
|
|
@@ -275,8 +151,6 @@ class SentenceTransformersEmbeddingProvider {
|
|
|
275
151
|
}
|
|
276
152
|
export function createKtxEmbeddingProvider(config, deps = {}) {
|
|
277
153
|
switch (config.backend) {
|
|
278
|
-
case 'deterministic':
|
|
279
|
-
return new DeterministicEmbeddingProvider(config.dimensions, config.batchSize);
|
|
280
154
|
case 'openai':
|
|
281
155
|
return new OpenAIEmbeddingProvider(config, deps);
|
|
282
156
|
case 'sentence-transformers':
|
|
@@ -1,18 +1,13 @@
|
|
|
1
1
|
import { describe, expect, it, vi } from 'vitest';
|
|
2
2
|
import { createKtxEmbeddingProvider } from './embedding-provider.js';
|
|
3
3
|
describe('createKtxEmbeddingProvider', () => {
|
|
4
|
-
it('
|
|
5
|
-
const
|
|
4
|
+
it('rejects deterministic embeddings', () => {
|
|
5
|
+
const config = JSON.parse(JSON.stringify({
|
|
6
6
|
backend: 'deterministic',
|
|
7
7
|
model: 'sha256',
|
|
8
8
|
dimensions: 6,
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
await expect(provider.embed('Revenue policy')).resolves.toHaveLength(6);
|
|
12
|
-
await expect(provider.embed('Revenue policy')).resolves.toEqual(await provider.embed('Revenue policy'));
|
|
13
|
-
await expect(provider.embed('Revenue policy')).resolves.not.toEqual(await provider.embed('Approval policy'));
|
|
14
|
-
await expect(provider.embedMany(['a', 'b'])).resolves.toHaveLength(2);
|
|
15
|
-
expect(provider.maxBatchSize).toBe(4);
|
|
9
|
+
}));
|
|
10
|
+
expect(() => createKtxEmbeddingProvider(config)).toThrow('Unsupported KTX embedding backend: deterministic');
|
|
16
11
|
});
|
|
17
12
|
it('rejects gateway embeddings', () => {
|
|
18
13
|
const config = JSON.parse(JSON.stringify({
|
|
@@ -72,28 +67,18 @@ describe('createKtxEmbeddingProvider', () => {
|
|
|
72
67
|
expect(daemonFetch).toHaveBeenNthCalledWith(1, 'https://daemon.test/base/embeddings/compute', expect.objectContaining({ method: 'POST' }));
|
|
73
68
|
expect(daemonFetch).toHaveBeenNthCalledWith(2, 'https://daemon.test/base/embeddings/compute-bulk', expect.objectContaining({ method: 'POST' }));
|
|
74
69
|
});
|
|
75
|
-
it('
|
|
76
|
-
const fetch = vi
|
|
77
|
-
const runSentenceTransformersJson = vi
|
|
70
|
+
it('reports local HTTP daemon failures without a ktx-daemon spawn fallback cascade', async () => {
|
|
71
|
+
const fetch = vi
|
|
78
72
|
.fn()
|
|
79
|
-
.
|
|
80
|
-
.mockResolvedValueOnce({ embeddings: [[0.3, 0.4], [0.5, 0.6]] });
|
|
73
|
+
.mockResolvedValue(new Response('Embedding compute failed: httpx.InvalidURL: Invalid port', { status: 500 }));
|
|
81
74
|
const provider = createKtxEmbeddingProvider({
|
|
82
75
|
backend: 'sentence-transformers',
|
|
83
76
|
model: 'all-MiniLM-L6-v2',
|
|
84
77
|
dimensions: 2,
|
|
85
78
|
sentenceTransformers: { baseURL: 'http://127.0.0.1:8765', pathPrefix: '' },
|
|
86
|
-
}, { fetch
|
|
87
|
-
await expect(provider.
|
|
88
|
-
|
|
89
|
-
[0.5, 0.6],
|
|
90
|
-
]);
|
|
79
|
+
}, { fetch });
|
|
80
|
+
await expect(provider.embed('hello')).rejects.toThrow('Embedding provider sentence-transformers request failed with HTTP 500: Embedding compute failed: httpx.InvalidURL: Invalid port');
|
|
81
|
+
await expect(provider.embed('hello')).rejects.not.toThrow('ktx-daemon fallback failed');
|
|
91
82
|
expect(fetch).toHaveBeenCalledTimes(1);
|
|
92
|
-
expect(runSentenceTransformersJson).toHaveBeenNthCalledWith(1, 'embedding-compute', {
|
|
93
|
-
text: '__ktx_embedding_probe__',
|
|
94
|
-
});
|
|
95
|
-
expect(runSentenceTransformersJson).toHaveBeenNthCalledWith(2, 'embedding-compute-bulk', {
|
|
96
|
-
texts: ['hello', 'world'],
|
|
97
|
-
});
|
|
98
83
|
});
|
|
99
84
|
});
|
|
@@ -68,7 +68,7 @@ export interface KtxLlmProvider {
|
|
|
68
68
|
promptCachingConfig(): KtxPromptCachingConfig;
|
|
69
69
|
activeBackend(): KtxLlmBackend;
|
|
70
70
|
}
|
|
71
|
-
export type KtxEmbeddingBackend = 'openai' | '
|
|
71
|
+
export type KtxEmbeddingBackend = 'openai' | 'sentence-transformers';
|
|
72
72
|
export interface KtxEmbeddingTokenUsageEvent {
|
|
73
73
|
backend: KtxEmbeddingBackend;
|
|
74
74
|
model: string;
|