@hasna/knowledge 0.2.27 → 0.2.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +41 -0
- package/bin/open-knowledge-mcp.js +15 -7
- package/bin/open-knowledge.js +17 -17
- package/dist/agent.d.ts +35 -0
- package/dist/artifact-store.d.ts +63 -0
- package/dist/auth.d.ts +35 -0
- package/dist/embeddings.d.ts +77 -0
- package/dist/index.d.ts +20 -0
- package/dist/index.js +5709 -0
- package/dist/knowledge-db.d.ts +27 -0
- package/dist/manifest-ingest.d.ts +35 -0
- package/dist/outbox-consume.d.ts +25 -0
- package/dist/provenance.d.ts +50 -0
- package/dist/providers.d.ts +89 -0
- package/dist/reindex.d.ts +37 -0
- package/dist/remote-client.d.ts +108 -0
- package/dist/retrieval.d.ts +71 -0
- package/dist/safety.d.ts +70 -0
- package/dist/sdk.d.ts +72 -0
- package/dist/search.d.ts +65 -0
- package/dist/service.d.ts +117 -0
- package/dist/source-ingest.d.ts +18 -0
- package/dist/source-ref.d.ts +30 -0
- package/dist/source-resolver.d.ts +92 -0
- package/dist/storage-contract.d.ts +106 -0
- package/dist/web-search.d.ts +40 -0
- package/dist/wiki-compiler.d.ts +67 -0
- package/dist/wiki-layout.d.ts +23 -0
- package/dist/workspace.d.ts +111 -0
- package/package.json +15 -7
- package/src/agent.ts +0 -367
- package/src/artifact-store.ts +0 -184
- package/src/auth.ts +0 -123
- package/src/cli.ts +0 -1184
- package/src/embeddings.ts +0 -516
- package/src/knowledge-db.ts +0 -354
- package/src/manifest-ingest.ts +0 -515
- package/src/mcp-http.js +0 -110
- package/src/mcp.js +0 -1503
- package/src/outbox-consume.ts +0 -463
- package/src/provenance.ts +0 -93
- package/src/providers.ts +0 -308
- package/src/reindex.ts +0 -260
- package/src/remote-client.ts +0 -268
- package/src/retrieval.ts +0 -326
- package/src/safety.ts +0 -265
- package/src/schema.js +0 -25
- package/src/search.ts +0 -510
- package/src/service.ts +0 -443
- package/src/source-ingest.ts +0 -268
- package/src/source-ref.ts +0 -104
- package/src/source-resolver.ts +0 -436
- package/src/storage-contract.ts +0 -346
- package/src/store.ts +0 -113
- package/src/web-search.ts +0 -330
- package/src/wiki-compiler.ts +0 -711
- package/src/wiki-layout.ts +0 -251
- package/src/workspace.ts +0 -251
package/src/providers.ts
DELETED
|
@@ -1,308 +0,0 @@
|
|
|
1
|
-
import { randomUUID } from 'node:crypto';
|
|
2
|
-
import type { Database } from 'bun:sqlite';
|
|
3
|
-
import type { KnowledgeConfig } from './workspace';
|
|
4
|
-
|
|
5
|
-
export type AiProviderId = 'openai' | 'anthropic' | 'deepseek';
|
|
6
|
-
|
|
7
|
-
export interface AiProviderSettings {
|
|
8
|
-
api_key_env: string;
|
|
9
|
-
base_url?: string;
|
|
10
|
-
default_model: string;
|
|
11
|
-
}
|
|
12
|
-
|
|
13
|
-
export interface AiProvidersConfig {
|
|
14
|
-
default_model?: string;
|
|
15
|
-
aliases?: Record<string, string>;
|
|
16
|
-
openai?: Partial<AiProviderSettings>;
|
|
17
|
-
anthropic?: Partial<AiProviderSettings>;
|
|
18
|
-
deepseek?: Partial<AiProviderSettings>;
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
export interface ModelCapabilities {
|
|
22
|
-
text_generation: boolean;
|
|
23
|
-
structured_output: boolean;
|
|
24
|
-
tool_usage: boolean;
|
|
25
|
-
tool_streaming: boolean;
|
|
26
|
-
image_input: boolean;
|
|
27
|
-
native_web_search: boolean;
|
|
28
|
-
reasoning: boolean;
|
|
29
|
-
embeddings: boolean;
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
export interface ModelRegistryEntry {
|
|
33
|
-
alias: string;
|
|
34
|
-
model_ref: string;
|
|
35
|
-
provider: AiProviderId;
|
|
36
|
-
model: string;
|
|
37
|
-
default: boolean;
|
|
38
|
-
capabilities: ModelCapabilities;
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
export interface ProviderCredentialStatus {
|
|
42
|
-
provider: AiProviderId;
|
|
43
|
-
api_key_env: string;
|
|
44
|
-
configured: boolean;
|
|
45
|
-
source: 'env' | 'missing';
|
|
46
|
-
base_url: string | null;
|
|
47
|
-
default_model: string;
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
export interface ProviderStatusResult {
|
|
51
|
-
default_model: string;
|
|
52
|
-
providers: ProviderCredentialStatus[];
|
|
53
|
-
models: ModelRegistryEntry[];
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
export interface NormalizedProviderUsage {
|
|
57
|
-
provider: string;
|
|
58
|
-
model: string;
|
|
59
|
-
input_tokens: number;
|
|
60
|
-
output_tokens: number;
|
|
61
|
-
cost_usd: number;
|
|
62
|
-
metadata: Record<string, unknown>;
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
type ProviderFactory = (settings: { apiKey: string; baseURL?: string }) => unknown;
|
|
66
|
-
|
|
67
|
-
export interface AiProviderRuntimeOptions {
|
|
68
|
-
config?: KnowledgeConfig;
|
|
69
|
-
env?: Record<string, string | undefined>;
|
|
70
|
-
factories?: Partial<Record<AiProviderId, ProviderFactory>>;
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
const DEFAULT_PROVIDER_SETTINGS: Record<AiProviderId, AiProviderSettings> = {
|
|
74
|
-
openai: {
|
|
75
|
-
api_key_env: 'OPENAI_API_KEY',
|
|
76
|
-
default_model: 'gpt-5.2',
|
|
77
|
-
},
|
|
78
|
-
anthropic: {
|
|
79
|
-
api_key_env: 'ANTHROPIC_API_KEY',
|
|
80
|
-
default_model: 'claude-sonnet-4-6',
|
|
81
|
-
},
|
|
82
|
-
deepseek: {
|
|
83
|
-
api_key_env: 'DEEPSEEK_API_KEY',
|
|
84
|
-
default_model: 'deepseek-chat',
|
|
85
|
-
},
|
|
86
|
-
};
|
|
87
|
-
|
|
88
|
-
const PROVIDER_CAPABILITIES: Record<AiProviderId, ModelCapabilities> = {
|
|
89
|
-
openai: {
|
|
90
|
-
text_generation: true,
|
|
91
|
-
structured_output: true,
|
|
92
|
-
tool_usage: true,
|
|
93
|
-
tool_streaming: true,
|
|
94
|
-
image_input: true,
|
|
95
|
-
native_web_search: true,
|
|
96
|
-
reasoning: true,
|
|
97
|
-
embeddings: true,
|
|
98
|
-
},
|
|
99
|
-
anthropic: {
|
|
100
|
-
text_generation: true,
|
|
101
|
-
structured_output: true,
|
|
102
|
-
tool_usage: true,
|
|
103
|
-
tool_streaming: true,
|
|
104
|
-
image_input: true,
|
|
105
|
-
native_web_search: false,
|
|
106
|
-
reasoning: true,
|
|
107
|
-
embeddings: false,
|
|
108
|
-
},
|
|
109
|
-
deepseek: {
|
|
110
|
-
text_generation: true,
|
|
111
|
-
structured_output: true,
|
|
112
|
-
tool_usage: true,
|
|
113
|
-
tool_streaming: true,
|
|
114
|
-
image_input: false,
|
|
115
|
-
native_web_search: false,
|
|
116
|
-
reasoning: true,
|
|
117
|
-
embeddings: false,
|
|
118
|
-
},
|
|
119
|
-
};
|
|
120
|
-
|
|
121
|
-
const BUILTIN_ALIASES: Record<string, string> = {
|
|
122
|
-
default: 'openai:gpt-5.2',
|
|
123
|
-
fast: 'openai:gpt-5-mini',
|
|
124
|
-
reasoning: 'anthropic:claude-opus-4-6',
|
|
125
|
-
sonnet: 'anthropic:claude-sonnet-4-6',
|
|
126
|
-
deepseek: 'deepseek:deepseek-chat',
|
|
127
|
-
'deepseek-reasoning': 'deepseek:deepseek-reasoner',
|
|
128
|
-
};
|
|
129
|
-
|
|
130
|
-
function providerConfig(config?: KnowledgeConfig): AiProvidersConfig {
|
|
131
|
-
return (config as KnowledgeConfig & { providers?: AiProvidersConfig } | undefined)?.providers ?? {};
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
export function providerSettings(config: KnowledgeConfig | undefined, provider: AiProviderId): AiProviderSettings {
|
|
135
|
-
const configured = providerConfig(config)[provider] ?? {};
|
|
136
|
-
return {
|
|
137
|
-
...DEFAULT_PROVIDER_SETTINGS[provider],
|
|
138
|
-
...configured,
|
|
139
|
-
};
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
export function modelAliases(config?: KnowledgeConfig): Record<string, string> {
|
|
143
|
-
const configured = providerConfig(config);
|
|
144
|
-
return {
|
|
145
|
-
...BUILTIN_ALIASES,
|
|
146
|
-
...(configured.default_model ? { default: configured.default_model } : {}),
|
|
147
|
-
...(configured.aliases ?? {}),
|
|
148
|
-
};
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
export function parseModelRef(modelRef: string): { provider: AiProviderId; model: string } {
|
|
152
|
-
const [provider, ...rest] = modelRef.split(':');
|
|
153
|
-
const model = rest.join(':');
|
|
154
|
-
if (provider !== 'openai' && provider !== 'anthropic' && provider !== 'deepseek') {
|
|
155
|
-
throw new Error(`Unsupported AI provider: ${provider}`);
|
|
156
|
-
}
|
|
157
|
-
if (!model) throw new Error(`Invalid model ref: ${modelRef}. Expected provider:model.`);
|
|
158
|
-
return { provider, model };
|
|
159
|
-
}
|
|
160
|
-
|
|
161
|
-
export function resolveModelRef(aliasOrRef: string, config?: KnowledgeConfig): string {
|
|
162
|
-
const aliases = modelAliases(config);
|
|
163
|
-
return aliases[aliasOrRef] ?? aliasOrRef;
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
export function listModelRegistry(config?: KnowledgeConfig): ModelRegistryEntry[] {
|
|
167
|
-
const aliases = modelAliases(config);
|
|
168
|
-
return Object.entries(aliases).map(([alias, modelRef]) => {
|
|
169
|
-
const parsed = parseModelRef(modelRef);
|
|
170
|
-
return {
|
|
171
|
-
alias,
|
|
172
|
-
model_ref: modelRef,
|
|
173
|
-
provider: parsed.provider,
|
|
174
|
-
model: parsed.model,
|
|
175
|
-
default: alias === 'default',
|
|
176
|
-
capabilities: PROVIDER_CAPABILITIES[parsed.provider],
|
|
177
|
-
};
|
|
178
|
-
});
|
|
179
|
-
}
|
|
180
|
-
|
|
181
|
-
export function providerCredentialStatus(config: KnowledgeConfig | undefined, env: Record<string, string | undefined> = process.env): ProviderCredentialStatus[] {
|
|
182
|
-
return (Object.keys(DEFAULT_PROVIDER_SETTINGS) as AiProviderId[]).map((provider) => {
|
|
183
|
-
const settings = providerSettings(config, provider);
|
|
184
|
-
const configured = Boolean(env[settings.api_key_env]);
|
|
185
|
-
return {
|
|
186
|
-
provider,
|
|
187
|
-
api_key_env: settings.api_key_env,
|
|
188
|
-
configured,
|
|
189
|
-
source: configured ? 'env' : 'missing',
|
|
190
|
-
base_url: settings.base_url ?? null,
|
|
191
|
-
default_model: settings.default_model,
|
|
192
|
-
};
|
|
193
|
-
});
|
|
194
|
-
}
|
|
195
|
-
|
|
196
|
-
export function providerStatus(config?: KnowledgeConfig, env: Record<string, string | undefined> = process.env): ProviderStatusResult {
|
|
197
|
-
return {
|
|
198
|
-
default_model: resolveModelRef('default', config),
|
|
199
|
-
providers: providerCredentialStatus(config, env),
|
|
200
|
-
models: listModelRegistry(config),
|
|
201
|
-
};
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
export function assertProviderCredentials(provider: AiProviderId, config?: KnowledgeConfig, env: Record<string, string | undefined> = process.env): ProviderCredentialStatus {
|
|
205
|
-
const status = providerCredentialStatus(config, env).find((entry) => entry.provider === provider);
|
|
206
|
-
if (!status) throw new Error(`Unsupported AI provider: ${provider}`);
|
|
207
|
-
if (!status.configured) throw new Error(`Missing ${status.api_key_env} for ${provider}. Set the env var to use this provider.`);
|
|
208
|
-
return status;
|
|
209
|
-
}
|
|
210
|
-
|
|
211
|
-
async function defaultFactory(provider: AiProviderId): Promise<ProviderFactory> {
|
|
212
|
-
if (provider === 'openai') {
|
|
213
|
-
const { createOpenAI } = await import('@ai-sdk/openai');
|
|
214
|
-
return createOpenAI as ProviderFactory;
|
|
215
|
-
}
|
|
216
|
-
if (provider === 'anthropic') {
|
|
217
|
-
const { createAnthropic } = await import('@ai-sdk/anthropic');
|
|
218
|
-
return createAnthropic as ProviderFactory;
|
|
219
|
-
}
|
|
220
|
-
const { createDeepSeek } = await import('@ai-sdk/deepseek');
|
|
221
|
-
return createDeepSeek as ProviderFactory;
|
|
222
|
-
}
|
|
223
|
-
|
|
224
|
-
export async function createAiSdkProviderRegistry(options: AiProviderRuntimeOptions = {}) {
|
|
225
|
-
const { createProviderRegistry } = await import('ai');
|
|
226
|
-
const env = options.env ?? process.env;
|
|
227
|
-
const providers: Record<string, unknown> = {};
|
|
228
|
-
for (const provider of Object.keys(DEFAULT_PROVIDER_SETTINGS) as AiProviderId[]) {
|
|
229
|
-
const settings = providerSettings(options.config, provider);
|
|
230
|
-
const apiKey = env[settings.api_key_env];
|
|
231
|
-
if (!apiKey) continue;
|
|
232
|
-
const factory = options.factories?.[provider] ?? await defaultFactory(provider);
|
|
233
|
-
providers[provider] = factory({ apiKey, baseURL: settings.base_url });
|
|
234
|
-
}
|
|
235
|
-
return createProviderRegistry(providers as never);
|
|
236
|
-
}
|
|
237
|
-
|
|
238
|
-
export async function languageModelFor(aliasOrRef: string, options: AiProviderRuntimeOptions = {}) {
|
|
239
|
-
const modelRef = resolveModelRef(aliasOrRef, options.config);
|
|
240
|
-
const parsed = parseModelRef(modelRef);
|
|
241
|
-
assertProviderCredentials(parsed.provider, options.config, options.env);
|
|
242
|
-
const registry = await createAiSdkProviderRegistry(options);
|
|
243
|
-
return registry.languageModel(modelRef as `${string}:${string}`);
|
|
244
|
-
}
|
|
245
|
-
|
|
246
|
-
function usageNumber(usage: Record<string, unknown>, keys: string[]): number {
|
|
247
|
-
for (const key of keys) {
|
|
248
|
-
const value = usage[key];
|
|
249
|
-
if (typeof value === 'number' && Number.isFinite(value)) return value;
|
|
250
|
-
}
|
|
251
|
-
return 0;
|
|
252
|
-
}
|
|
253
|
-
|
|
254
|
-
export function normalizeAiSdkUsage(input: {
|
|
255
|
-
provider: string;
|
|
256
|
-
model: string;
|
|
257
|
-
usage?: Record<string, unknown> | null;
|
|
258
|
-
providerMetadata?: Record<string, unknown> | null;
|
|
259
|
-
costUsd?: number;
|
|
260
|
-
}): NormalizedProviderUsage {
|
|
261
|
-
const usage = input.usage ?? {};
|
|
262
|
-
return {
|
|
263
|
-
provider: input.provider,
|
|
264
|
-
model: input.model,
|
|
265
|
-
input_tokens: usageNumber(usage, ['inputTokens', 'promptTokens', 'input_tokens', 'prompt_tokens']),
|
|
266
|
-
output_tokens: usageNumber(usage, ['outputTokens', 'completionTokens', 'output_tokens', 'completion_tokens']),
|
|
267
|
-
cost_usd: input.costUsd ?? 0,
|
|
268
|
-
metadata: {
|
|
269
|
-
usage,
|
|
270
|
-
provider_metadata: input.providerMetadata ?? {},
|
|
271
|
-
},
|
|
272
|
-
};
|
|
273
|
-
}
|
|
274
|
-
|
|
275
|
-
export function recordProviderUsage(db: Database, input: NormalizedProviderUsage & { run_id?: string | null; created_at?: string }): string {
|
|
276
|
-
const id = `usage_${randomUUID()}`;
|
|
277
|
-
db.run(
|
|
278
|
-
`INSERT INTO provider_usage (id, run_id, provider, model, input_tokens, output_tokens, cost_usd, metadata_json, created_at)
|
|
279
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
280
|
-
[
|
|
281
|
-
id,
|
|
282
|
-
input.run_id ?? null,
|
|
283
|
-
input.provider,
|
|
284
|
-
input.model,
|
|
285
|
-
input.input_tokens,
|
|
286
|
-
input.output_tokens,
|
|
287
|
-
input.cost_usd,
|
|
288
|
-
JSON.stringify(input.metadata),
|
|
289
|
-
input.created_at ?? new Date().toISOString(),
|
|
290
|
-
],
|
|
291
|
-
);
|
|
292
|
-
return id;
|
|
293
|
-
}
|
|
294
|
-
|
|
295
|
-
export function createDeterministicFakeProvider(provider: AiProviderId): ProviderFactory {
|
|
296
|
-
return () => ({
|
|
297
|
-
languageModel: (modelId: string) => ({
|
|
298
|
-
provider,
|
|
299
|
-
modelId,
|
|
300
|
-
specificationVersion: 'v3',
|
|
301
|
-
}),
|
|
302
|
-
chat: (modelId: string) => ({
|
|
303
|
-
provider,
|
|
304
|
-
modelId,
|
|
305
|
-
specificationVersion: 'v3',
|
|
306
|
-
}),
|
|
307
|
-
});
|
|
308
|
-
}
|
package/src/reindex.ts
DELETED
|
@@ -1,260 +0,0 @@
|
|
|
1
|
-
import { createHash, randomUUID } from 'node:crypto';
|
|
2
|
-
import { indexKnowledgeEmbeddings, resolveEmbeddingModelRef, type EmbeddingRuntimeOptions } from './embeddings';
|
|
3
|
-
import { migrateKnowledgeDb, openKnowledgeDb } from './knowledge-db';
|
|
4
|
-
import { parseModelRef } from './providers';
|
|
5
|
-
import type { KnowledgeConfig } from './workspace';
|
|
6
|
-
|
|
7
|
-
export interface ReindexRuntimeOptions extends EmbeddingRuntimeOptions {
|
|
8
|
-
dbPath: string;
|
|
9
|
-
config?: KnowledgeConfig;
|
|
10
|
-
now?: Date;
|
|
11
|
-
}
|
|
12
|
-
|
|
13
|
-
export interface ReindexHealthResult {
|
|
14
|
-
schema_version: number;
|
|
15
|
-
chunks: number;
|
|
16
|
-
vector_entries: number;
|
|
17
|
-
missing_embeddings: number;
|
|
18
|
-
queued: Record<string, number>;
|
|
19
|
-
stale_revisions: number;
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
export interface ReindexEnqueueResult {
|
|
23
|
-
enqueued: number;
|
|
24
|
-
already_queued: number;
|
|
25
|
-
reason: string;
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
export interface ReindexEmbeddingsResult {
|
|
29
|
-
run_id: string;
|
|
30
|
-
full: boolean;
|
|
31
|
-
deleted_embeddings: number;
|
|
32
|
-
deleted_vector_entries: number;
|
|
33
|
-
queued: ReindexEnqueueResult;
|
|
34
|
-
indexed: Awaited<ReturnType<typeof indexKnowledgeEmbeddings>>;
|
|
35
|
-
completed_queue_items: number;
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
interface MissingChunkRow {
|
|
39
|
-
chunk_id: string;
|
|
40
|
-
source_revision_id: string | null;
|
|
41
|
-
source_uri: string | null;
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
function stableId(prefix: string, value: string): string {
|
|
45
|
-
return `${prefix}_${createHash('sha256').update(value).digest('hex').slice(0, 20)}`;
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
function queueCounts(dbPath: string): Record<string, number> {
|
|
49
|
-
const db = openKnowledgeDb(dbPath);
|
|
50
|
-
try {
|
|
51
|
-
const rows = db.query<{ status: string; n: number }, []>(
|
|
52
|
-
`SELECT status, COUNT(*) AS n FROM reindex_queue GROUP BY status ORDER BY status`,
|
|
53
|
-
).all();
|
|
54
|
-
return Object.fromEntries(rows.map((row) => [row.status, row.n]));
|
|
55
|
-
} finally {
|
|
56
|
-
db.close();
|
|
57
|
-
}
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
function missingEmbeddingRows(dbPath: string, options: ReindexRuntimeOptions): MissingChunkRow[] {
|
|
61
|
-
const modelRef = resolveEmbeddingModelRef(options.modelRef, options.config);
|
|
62
|
-
const parsed = parseModelRef(modelRef);
|
|
63
|
-
const db = openKnowledgeDb(dbPath);
|
|
64
|
-
try {
|
|
65
|
-
return db.query<MissingChunkRow, [string, string]>(
|
|
66
|
-
`SELECT c.id AS chunk_id, c.source_revision_id, s.uri AS source_uri
|
|
67
|
-
FROM chunks c
|
|
68
|
-
LEFT JOIN source_revisions sr ON sr.id = c.source_revision_id
|
|
69
|
-
LEFT JOIN sources s ON s.id = sr.source_id
|
|
70
|
-
LEFT JOIN vector_index_entries v ON v.chunk_id = c.id AND v.provider = ? AND v.model = ?
|
|
71
|
-
WHERE v.id IS NULL
|
|
72
|
-
ORDER BY c.created_at ASC, c.ordinal ASC`,
|
|
73
|
-
).all(parsed.provider, parsed.model);
|
|
74
|
-
} finally {
|
|
75
|
-
db.close();
|
|
76
|
-
}
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
export function reindexHealth(options: ReindexRuntimeOptions): ReindexHealthResult {
|
|
80
|
-
migrateKnowledgeDb(options.dbPath);
|
|
81
|
-
const db = openKnowledgeDb(options.dbPath);
|
|
82
|
-
try {
|
|
83
|
-
const version = db.query<{ version: number }, []>('SELECT MAX(version) AS version FROM schema_versions').get()?.version ?? 0;
|
|
84
|
-
const chunks = db.query<{ n: number }, []>('SELECT COUNT(*) AS n FROM chunks').get()?.n ?? 0;
|
|
85
|
-
const vectorEntries = db.query<{ n: number }, []>('SELECT COUNT(*) AS n FROM vector_index_entries').get()?.n ?? 0;
|
|
86
|
-
const missing = missingEmbeddingRows(options.dbPath, options).length;
|
|
87
|
-
const stale = db.query<{ n: number }, []>(
|
|
88
|
-
`SELECT COUNT(*) AS n FROM source_revisions
|
|
89
|
-
WHERE metadata_json LIKE '%"reindex_required":true%' OR metadata_json LIKE '%"status":"stale"%'`,
|
|
90
|
-
).get()?.n ?? 0;
|
|
91
|
-
return {
|
|
92
|
-
schema_version: version,
|
|
93
|
-
chunks,
|
|
94
|
-
vector_entries: vectorEntries,
|
|
95
|
-
missing_embeddings: missing,
|
|
96
|
-
queued: queueCounts(options.dbPath),
|
|
97
|
-
stale_revisions: stale,
|
|
98
|
-
};
|
|
99
|
-
} finally {
|
|
100
|
-
db.close();
|
|
101
|
-
}
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
export function enqueueMissingEmbeddings(options: ReindexRuntimeOptions & { reason?: string }): ReindexEnqueueResult {
|
|
105
|
-
migrateKnowledgeDb(options.dbPath);
|
|
106
|
-
const now = (options.now ?? new Date()).toISOString();
|
|
107
|
-
const reason = options.reason ?? 'missing_embedding';
|
|
108
|
-
const rows = missingEmbeddingRows(options.dbPath, options);
|
|
109
|
-
const db = openKnowledgeDb(options.dbPath);
|
|
110
|
-
let enqueued = 0;
|
|
111
|
-
let alreadyQueued = 0;
|
|
112
|
-
try {
|
|
113
|
-
const write = db.transaction(() => {
|
|
114
|
-
for (const row of rows) {
|
|
115
|
-
const id = stableId('rq', `embedding\u0000${row.chunk_id}\u0000${reason}`);
|
|
116
|
-
const before = db.query<{ id: string }, [string, string, string]>(
|
|
117
|
-
'SELECT id FROM reindex_queue WHERE kind = ? AND target_id = ? AND reason = ?',
|
|
118
|
-
).get('embedding', row.chunk_id, reason);
|
|
119
|
-
if (before) {
|
|
120
|
-
alreadyQueued += 1;
|
|
121
|
-
continue;
|
|
122
|
-
}
|
|
123
|
-
db.run(
|
|
124
|
-
`INSERT INTO reindex_queue (id, kind, target_id, source_uri, reason, status, metadata_json, created_at, updated_at)
|
|
125
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
126
|
-
[
|
|
127
|
-
id,
|
|
128
|
-
'embedding',
|
|
129
|
-
row.chunk_id,
|
|
130
|
-
row.source_uri,
|
|
131
|
-
reason,
|
|
132
|
-
'pending',
|
|
133
|
-
JSON.stringify({ source_revision_id: row.source_revision_id }),
|
|
134
|
-
now,
|
|
135
|
-
now,
|
|
136
|
-
],
|
|
137
|
-
);
|
|
138
|
-
enqueued += 1;
|
|
139
|
-
}
|
|
140
|
-
});
|
|
141
|
-
write();
|
|
142
|
-
} finally {
|
|
143
|
-
db.close();
|
|
144
|
-
}
|
|
145
|
-
return { enqueued, already_queued: alreadyQueued, reason };
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
function clearEmbeddingIndex(dbPath: string): { embeddings: number; vectorEntries: number } {
|
|
149
|
-
const db = openKnowledgeDb(dbPath);
|
|
150
|
-
try {
|
|
151
|
-
const embeddings = db.query<{ n: number }, []>('SELECT COUNT(*) AS n FROM chunk_embeddings').get()?.n ?? 0;
|
|
152
|
-
const vectorEntries = db.query<{ n: number }, []>('SELECT COUNT(*) AS n FROM vector_index_entries').get()?.n ?? 0;
|
|
153
|
-
db.run('DELETE FROM vector_index_entries');
|
|
154
|
-
db.run('DELETE FROM chunk_embeddings');
|
|
155
|
-
return { embeddings, vectorEntries };
|
|
156
|
-
} finally {
|
|
157
|
-
db.close();
|
|
158
|
-
}
|
|
159
|
-
}
|
|
160
|
-
|
|
161
|
-
function completeIndexedQueueItems(dbPath: string, options: ReindexRuntimeOptions, now: string): number {
|
|
162
|
-
const modelRef = resolveEmbeddingModelRef(options.modelRef, options.config);
|
|
163
|
-
const parsed = parseModelRef(modelRef);
|
|
164
|
-
const db = openKnowledgeDb(dbPath);
|
|
165
|
-
try {
|
|
166
|
-
const result = db.run(
|
|
167
|
-
`UPDATE reindex_queue
|
|
168
|
-
SET status = ?, updated_at = ?
|
|
169
|
-
WHERE kind = ?
|
|
170
|
-
AND status = ?
|
|
171
|
-
AND EXISTS (
|
|
172
|
-
SELECT 1 FROM vector_index_entries v
|
|
173
|
-
WHERE v.chunk_id = reindex_queue.target_id
|
|
174
|
-
AND v.provider = ?
|
|
175
|
-
AND v.model = ?
|
|
176
|
-
)`,
|
|
177
|
-
['completed', now, 'embedding', 'pending', parsed.provider, parsed.model],
|
|
178
|
-
);
|
|
179
|
-
return result.changes;
|
|
180
|
-
} finally {
|
|
181
|
-
db.close();
|
|
182
|
-
}
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
export async function refreshEmbeddingIndex(options: ReindexRuntimeOptions & { full?: boolean; limit?: number }): Promise<ReindexEmbeddingsResult> {
|
|
186
|
-
migrateKnowledgeDb(options.dbPath);
|
|
187
|
-
const now = (options.now ?? new Date()).toISOString();
|
|
188
|
-
const runId = `run_${randomUUID()}`;
|
|
189
|
-
const deleted = options.full ? clearEmbeddingIndex(options.dbPath) : { embeddings: 0, vectorEntries: 0 };
|
|
190
|
-
const queued = enqueueMissingEmbeddings({ ...options, reason: options.full ? 'full_embedding_rebuild' : 'missing_embedding' });
|
|
191
|
-
const db = openKnowledgeDb(options.dbPath);
|
|
192
|
-
try {
|
|
193
|
-
db.run(
|
|
194
|
-
`INSERT INTO runs (id, type, prompt, status, provider, model, metadata_json, created_at, updated_at)
|
|
195
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
196
|
-
[
|
|
197
|
-
runId,
|
|
198
|
-
'embedding-refresh',
|
|
199
|
-
options.full ? 'full' : 'incremental',
|
|
200
|
-
'running',
|
|
201
|
-
'local',
|
|
202
|
-
resolveEmbeddingModelRef(options.modelRef, options.config),
|
|
203
|
-
JSON.stringify({ full: options.full === true, queued }),
|
|
204
|
-
now,
|
|
205
|
-
now,
|
|
206
|
-
],
|
|
207
|
-
);
|
|
208
|
-
} finally {
|
|
209
|
-
db.close();
|
|
210
|
-
}
|
|
211
|
-
|
|
212
|
-
const indexed = await indexKnowledgeEmbeddings({
|
|
213
|
-
dbPath: options.dbPath,
|
|
214
|
-
config: options.config,
|
|
215
|
-
env: options.env,
|
|
216
|
-
modelRef: options.modelRef,
|
|
217
|
-
dimensions: options.dimensions,
|
|
218
|
-
fake: options.fake,
|
|
219
|
-
limit: options.limit,
|
|
220
|
-
now: options.now,
|
|
221
|
-
});
|
|
222
|
-
|
|
223
|
-
const completedQueueItems = completeIndexedQueueItems(options.dbPath, options, now);
|
|
224
|
-
const doneDb = openKnowledgeDb(options.dbPath);
|
|
225
|
-
try {
|
|
226
|
-
doneDb.run(
|
|
227
|
-
`UPDATE runs SET status = ?, metadata_json = ?, updated_at = ? WHERE id = ?`,
|
|
228
|
-
[
|
|
229
|
-
'completed',
|
|
230
|
-
JSON.stringify({ full: options.full === true, queued, indexed, completed_queue_items: completedQueueItems }),
|
|
231
|
-
now,
|
|
232
|
-
runId,
|
|
233
|
-
],
|
|
234
|
-
);
|
|
235
|
-
doneDb.run(
|
|
236
|
-
`INSERT INTO run_events (id, run_id, level, event, metadata_json, created_at)
|
|
237
|
-
VALUES (?, ?, ?, ?, ?, ?)`,
|
|
238
|
-
[
|
|
239
|
-
`evt_${randomUUID()}`,
|
|
240
|
-
runId,
|
|
241
|
-
'info',
|
|
242
|
-
'embedding_refresh_completed',
|
|
243
|
-
JSON.stringify({ queued, indexed, completed_queue_items: completedQueueItems }),
|
|
244
|
-
now,
|
|
245
|
-
],
|
|
246
|
-
);
|
|
247
|
-
} finally {
|
|
248
|
-
doneDb.close();
|
|
249
|
-
}
|
|
250
|
-
|
|
251
|
-
return {
|
|
252
|
-
run_id: runId,
|
|
253
|
-
full: options.full === true,
|
|
254
|
-
deleted_embeddings: deleted.embeddings,
|
|
255
|
-
deleted_vector_entries: deleted.vectorEntries,
|
|
256
|
-
queued,
|
|
257
|
-
indexed,
|
|
258
|
-
completed_queue_items: completedQueueItems,
|
|
259
|
-
};
|
|
260
|
-
}
|