@hasna/knowledge 0.2.27 → 0.2.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +41 -0
- package/bin/open-knowledge-mcp.js +15 -7
- package/bin/open-knowledge.js +17 -17
- package/dist/agent.d.ts +35 -0
- package/dist/artifact-store.d.ts +63 -0
- package/dist/auth.d.ts +35 -0
- package/dist/embeddings.d.ts +77 -0
- package/dist/index.d.ts +20 -0
- package/dist/index.js +5709 -0
- package/dist/knowledge-db.d.ts +27 -0
- package/dist/manifest-ingest.d.ts +35 -0
- package/dist/outbox-consume.d.ts +25 -0
- package/dist/provenance.d.ts +50 -0
- package/dist/providers.d.ts +89 -0
- package/dist/reindex.d.ts +37 -0
- package/dist/remote-client.d.ts +108 -0
- package/dist/retrieval.d.ts +71 -0
- package/dist/safety.d.ts +70 -0
- package/dist/sdk.d.ts +72 -0
- package/dist/search.d.ts +65 -0
- package/dist/service.d.ts +117 -0
- package/dist/source-ingest.d.ts +18 -0
- package/dist/source-ref.d.ts +30 -0
- package/dist/source-resolver.d.ts +92 -0
- package/dist/storage-contract.d.ts +106 -0
- package/dist/web-search.d.ts +40 -0
- package/dist/wiki-compiler.d.ts +67 -0
- package/dist/wiki-layout.d.ts +23 -0
- package/dist/workspace.d.ts +111 -0
- package/package.json +15 -7
- package/src/agent.ts +0 -367
- package/src/artifact-store.ts +0 -184
- package/src/auth.ts +0 -123
- package/src/cli.ts +0 -1184
- package/src/embeddings.ts +0 -516
- package/src/knowledge-db.ts +0 -354
- package/src/manifest-ingest.ts +0 -515
- package/src/mcp-http.js +0 -110
- package/src/mcp.js +0 -1503
- package/src/outbox-consume.ts +0 -463
- package/src/provenance.ts +0 -93
- package/src/providers.ts +0 -308
- package/src/reindex.ts +0 -260
- package/src/remote-client.ts +0 -268
- package/src/retrieval.ts +0 -326
- package/src/safety.ts +0 -265
- package/src/schema.js +0 -25
- package/src/search.ts +0 -510
- package/src/service.ts +0 -443
- package/src/source-ingest.ts +0 -268
- package/src/source-ref.ts +0 -104
- package/src/source-resolver.ts +0 -436
- package/src/storage-contract.ts +0 -346
- package/src/store.ts +0 -113
- package/src/web-search.ts +0 -330
- package/src/wiki-compiler.ts +0 -711
- package/src/wiki-layout.ts +0 -251
- package/src/workspace.ts +0 -251
package/src/service.ts
DELETED
|
@@ -1,443 +0,0 @@
|
|
|
1
|
-
import { createArtifactStore } from './artifact-store';
|
|
2
|
-
import {
|
|
3
|
-
clearKnowledgeAuth,
|
|
4
|
-
knowledgeAuthStatus,
|
|
5
|
-
normalizeKnowledgeApiOrigin,
|
|
6
|
-
saveKnowledgeAuth,
|
|
7
|
-
type KnowledgeAuthStatus,
|
|
8
|
-
} from './auth';
|
|
9
|
-
import { runKnowledgePrompt, type KnowledgePromptOptions } from './agent';
|
|
10
|
-
import {
|
|
11
|
-
embeddingIndexStatus,
|
|
12
|
-
indexKnowledgeEmbeddings,
|
|
13
|
-
searchVectorIndex,
|
|
14
|
-
type EmbeddingIndexOptions,
|
|
15
|
-
type EmbeddingSearchOptions,
|
|
16
|
-
} from './embeddings';
|
|
17
|
-
import { consumeOpenFilesOutbox } from './outbox-consume';
|
|
18
|
-
import { getKnowledgeDbStats, migrateKnowledgeDb, openKnowledgeDb } from './knowledge-db';
|
|
19
|
-
import { ingestOpenFilesManifest } from './manifest-ingest';
|
|
20
|
-
import { ingestSourceRef } from './source-ingest';
|
|
21
|
-
import { resolveOpenFilesSource } from './source-resolver';
|
|
22
|
-
import { providerStatus, listModelRegistry, type ProviderStatusResult, type ModelRegistryEntry } from './providers';
|
|
23
|
-
import { enqueueMissingEmbeddings, refreshEmbeddingIndex, reindexHealth, type ReindexRuntimeOptions } from './reindex';
|
|
24
|
-
import { knowledgeRegistryContract, RemoteKnowledgeClient, type RemoteKnowledgeRegistryContract } from './remote-client';
|
|
25
|
-
import { retrieveKnowledgeContext, type RetrievalOptions } from './retrieval';
|
|
26
|
-
import { hybridSearch, type HybridSearchOptions } from './search';
|
|
27
|
-
import { resolveSafetyPolicy } from './safety';
|
|
28
|
-
import { runProviderWebSearch, type WebSearchOptions } from './web-search';
|
|
29
|
-
import { compileWikiPage, fileAnswerToWiki, lintWiki, type WikiCompileOptions } from './wiki-compiler';
|
|
30
|
-
import {
|
|
31
|
-
recordStorageObjects,
|
|
32
|
-
resolveStorageContract,
|
|
33
|
-
validateStorageConfig,
|
|
34
|
-
type StorageContract,
|
|
35
|
-
type StorageValidationResult,
|
|
36
|
-
} from './storage-contract';
|
|
37
|
-
import { initializeWikiLayout, recordWikiLayoutCatalog } from './wiki-layout';
|
|
38
|
-
import {
|
|
39
|
-
canonicalHasnaXyzKnowledgeStorage,
|
|
40
|
-
ensureKnowledgeWorkspace,
|
|
41
|
-
readKnowledgeConfig,
|
|
42
|
-
resolveScopedWorkspace,
|
|
43
|
-
writeKnowledgeConfig,
|
|
44
|
-
type KnowledgeConfig,
|
|
45
|
-
type KnowledgeWorkspace,
|
|
46
|
-
} from './workspace';
|
|
47
|
-
|
|
48
|
-
export interface KnowledgeServiceOptions {
|
|
49
|
-
scope?: string;
|
|
50
|
-
cwd?: string;
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
export interface KnowledgePathsResult {
|
|
54
|
-
ok: true;
|
|
55
|
-
scope: string;
|
|
56
|
-
home: string;
|
|
57
|
-
config_path: string;
|
|
58
|
-
json_store_path: string;
|
|
59
|
-
knowledge_db_path: string;
|
|
60
|
-
artifacts_dir: string;
|
|
61
|
-
indexes_dir: string;
|
|
62
|
-
logs_dir: string;
|
|
63
|
-
runs_dir: string;
|
|
64
|
-
schemas_dir: string;
|
|
65
|
-
wiki_dir: string;
|
|
66
|
-
config: KnowledgeConfig;
|
|
67
|
-
message: string;
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
export interface KnowledgeSetupResult {
|
|
71
|
-
ok: true;
|
|
72
|
-
mode: KnowledgeConfig['mode'];
|
|
73
|
-
api_url: string | null;
|
|
74
|
-
storage_type: KnowledgeConfig['storage']['type'];
|
|
75
|
-
artifact_uri_prefix: string;
|
|
76
|
-
canonical_hasna_xyz: StorageContract['canonical_hasna_xyz'];
|
|
77
|
-
config_path: string;
|
|
78
|
-
next: string[];
|
|
79
|
-
message: string;
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
function normalizeMode(value: string | undefined): KnowledgeConfig['mode'] | undefined {
|
|
83
|
-
if (!value) return undefined;
|
|
84
|
-
const normalized = value.trim().toLowerCase();
|
|
85
|
-
if (normalized === 'local' || normalized === 'offline') return 'local';
|
|
86
|
-
if (normalized === 'hosted' || normalized === 'remote' || normalized === 'knowledge.hasna.xyz') return 'hosted';
|
|
87
|
-
throw new Error('Invalid setup mode. Use hosted or local.');
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
export class KnowledgeService {
|
|
91
|
-
private ensuredWorkspace?: KnowledgeWorkspace;
|
|
92
|
-
private cachedConfig?: KnowledgeConfig;
|
|
93
|
-
|
|
94
|
-
constructor(private readonly options: KnowledgeServiceOptions = {}) {}
|
|
95
|
-
|
|
96
|
-
get scope(): string {
|
|
97
|
-
return this.options.scope ?? 'global';
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
get workspace(): KnowledgeWorkspace {
|
|
101
|
-
return this.ensuredWorkspace ?? resolveScopedWorkspace(this.options.scope, this.options.cwd);
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
ensureWorkspace(): KnowledgeWorkspace {
|
|
105
|
-
if (!this.ensuredWorkspace) this.ensuredWorkspace = ensureKnowledgeWorkspace(this.workspace.home);
|
|
106
|
-
return this.ensuredWorkspace;
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
jsonStorePath(): string {
|
|
110
|
-
return this.ensureWorkspace().jsonStorePath;
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
config(): KnowledgeConfig {
|
|
114
|
-
if (!this.cachedConfig) {
|
|
115
|
-
const workspace = this.ensureWorkspace();
|
|
116
|
-
this.cachedConfig = readKnowledgeConfig(workspace.configPath);
|
|
117
|
-
}
|
|
118
|
-
return this.cachedConfig;
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
safetyPolicy() {
|
|
122
|
-
return resolveSafetyPolicy(this.config(), this.ensureWorkspace());
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
artifactStore() {
|
|
126
|
-
return createArtifactStore(this.config(), this.ensureWorkspace());
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
storageContract(): StorageContract {
|
|
130
|
-
return resolveStorageContract(this.config(), this.ensureWorkspace(), this.scope);
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
validateStorage(): StorageValidationResult {
|
|
134
|
-
return validateStorageConfig(this.config(), this.ensureWorkspace());
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
setup(options: { mode?: string; apiUrl?: string; canonicalHasnaXyz?: boolean } = {}): KnowledgeSetupResult {
|
|
138
|
-
const workspace = this.ensureWorkspace();
|
|
139
|
-
const current = this.config();
|
|
140
|
-
const mode = normalizeMode(options.mode) ?? current.mode;
|
|
141
|
-
const apiUrl = options.apiUrl
|
|
142
|
-
? normalizeKnowledgeApiOrigin(options.apiUrl)
|
|
143
|
-
: current.hosted?.api_url
|
|
144
|
-
? normalizeKnowledgeApiOrigin(current.hosted.api_url)
|
|
145
|
-
: null;
|
|
146
|
-
const nextConfig: KnowledgeConfig = {
|
|
147
|
-
...current,
|
|
148
|
-
mode,
|
|
149
|
-
hosted: {
|
|
150
|
-
...(current.hosted ?? {}),
|
|
151
|
-
...(apiUrl ? { api_url: apiUrl } : {}),
|
|
152
|
-
},
|
|
153
|
-
storage: options.canonicalHasnaXyz
|
|
154
|
-
? canonicalHasnaXyzKnowledgeStorage()
|
|
155
|
-
: current.storage,
|
|
156
|
-
};
|
|
157
|
-
writeKnowledgeConfig(workspace.configPath, nextConfig);
|
|
158
|
-
this.cachedConfig = nextConfig;
|
|
159
|
-
const storage = resolveStorageContract(nextConfig, workspace, this.scope);
|
|
160
|
-
return {
|
|
161
|
-
ok: true,
|
|
162
|
-
mode,
|
|
163
|
-
api_url: nextConfig.hosted?.api_url ?? null,
|
|
164
|
-
storage_type: nextConfig.storage.type,
|
|
165
|
-
artifact_uri_prefix: storage.artifact_store.uri_prefix,
|
|
166
|
-
canonical_hasna_xyz: storage.canonical_hasna_xyz,
|
|
167
|
-
config_path: workspace.configPath,
|
|
168
|
-
next: mode === 'hosted'
|
|
169
|
-
? ['open-knowledge auth login --api-key <key>', 'open-knowledge storage status --json', 'open-knowledge remote contracts --json']
|
|
170
|
-
: ['open-knowledge search <query>', 'knowledge <prompt>'],
|
|
171
|
-
message: `Set knowledge mode to ${mode}`,
|
|
172
|
-
};
|
|
173
|
-
}
|
|
174
|
-
|
|
175
|
-
authStatus(env: Record<string, string | undefined> = process.env): KnowledgeAuthStatus {
|
|
176
|
-
return knowledgeAuthStatus(this.config(), env);
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
saveAuth(input: {
|
|
180
|
-
apiKey: string;
|
|
181
|
-
email?: string;
|
|
182
|
-
orgId?: string;
|
|
183
|
-
orgSlug?: string;
|
|
184
|
-
userId?: string;
|
|
185
|
-
apiUrl?: string;
|
|
186
|
-
}, env: Record<string, string | undefined> = process.env) {
|
|
187
|
-
const apiUrl = input.apiUrl ?? this.config().hosted?.api_url;
|
|
188
|
-
return saveKnowledgeAuth({
|
|
189
|
-
api_key: input.apiKey,
|
|
190
|
-
email: input.email,
|
|
191
|
-
org_id: input.orgId,
|
|
192
|
-
org_slug: input.orgSlug,
|
|
193
|
-
user_id: input.userId,
|
|
194
|
-
api_url: apiUrl,
|
|
195
|
-
}, env);
|
|
196
|
-
}
|
|
197
|
-
|
|
198
|
-
clearAuth(env: Record<string, string | undefined> = process.env) {
|
|
199
|
-
return clearKnowledgeAuth(env);
|
|
200
|
-
}
|
|
201
|
-
|
|
202
|
-
remoteContract(): RemoteKnowledgeRegistryContract {
|
|
203
|
-
const storage = this.storageContract();
|
|
204
|
-
return knowledgeRegistryContract({
|
|
205
|
-
mode: this.config().mode,
|
|
206
|
-
sourceSchemes: this.config().sources.allowed_schemes,
|
|
207
|
-
storageType: storage.artifact_store.type,
|
|
208
|
-
artifactUriPrefix: storage.artifact_store.uri_prefix,
|
|
209
|
-
});
|
|
210
|
-
}
|
|
211
|
-
|
|
212
|
-
remoteClient(env: Record<string, string | undefined> = process.env): RemoteKnowledgeClient | null {
|
|
213
|
-
return RemoteKnowledgeClient.fromConfig(this.config(), env);
|
|
214
|
-
}
|
|
215
|
-
|
|
216
|
-
paths(): KnowledgePathsResult {
|
|
217
|
-
const workspace = this.ensureWorkspace();
|
|
218
|
-
return {
|
|
219
|
-
ok: true,
|
|
220
|
-
scope: this.scope,
|
|
221
|
-
home: workspace.home,
|
|
222
|
-
config_path: workspace.configPath,
|
|
223
|
-
json_store_path: workspace.jsonStorePath,
|
|
224
|
-
knowledge_db_path: workspace.knowledgeDbPath,
|
|
225
|
-
artifacts_dir: workspace.artifactsDir,
|
|
226
|
-
indexes_dir: workspace.indexesDir,
|
|
227
|
-
logs_dir: workspace.logsDir,
|
|
228
|
-
runs_dir: workspace.runsDir,
|
|
229
|
-
schemas_dir: workspace.schemasDir,
|
|
230
|
-
wiki_dir: workspace.wikiDir,
|
|
231
|
-
config: this.config(),
|
|
232
|
-
message: workspace.home,
|
|
233
|
-
};
|
|
234
|
-
}
|
|
235
|
-
|
|
236
|
-
initDb() {
|
|
237
|
-
return migrateKnowledgeDb(this.ensureWorkspace().knowledgeDbPath);
|
|
238
|
-
}
|
|
239
|
-
|
|
240
|
-
dbStats() {
|
|
241
|
-
const workspace = this.ensureWorkspace();
|
|
242
|
-
migrateKnowledgeDb(workspace.knowledgeDbPath);
|
|
243
|
-
return getKnowledgeDbStats(workspace.knowledgeDbPath);
|
|
244
|
-
}
|
|
245
|
-
|
|
246
|
-
async initWiki() {
|
|
247
|
-
const workspace = this.ensureWorkspace();
|
|
248
|
-
migrateKnowledgeDb(workspace.knowledgeDbPath);
|
|
249
|
-
const result = await initializeWikiLayout(this.artifactStore());
|
|
250
|
-
const db = openKnowledgeDb(workspace.knowledgeDbPath);
|
|
251
|
-
try {
|
|
252
|
-
recordStorageObjects(db, result.artifacts);
|
|
253
|
-
recordWikiLayoutCatalog(db, result.artifacts);
|
|
254
|
-
} finally {
|
|
255
|
-
db.close();
|
|
256
|
-
}
|
|
257
|
-
return result;
|
|
258
|
-
}
|
|
259
|
-
|
|
260
|
-
async compileWiki(options: Omit<WikiCompileOptions, 'dbPath' | 'store'> = {}) {
|
|
261
|
-
const workspace = this.ensureWorkspace();
|
|
262
|
-
return compileWikiPage({
|
|
263
|
-
...options,
|
|
264
|
-
dbPath: workspace.knowledgeDbPath,
|
|
265
|
-
store: this.artifactStore(),
|
|
266
|
-
});
|
|
267
|
-
}
|
|
268
|
-
|
|
269
|
-
async fileAnswer(options: {
|
|
270
|
-
prompt: string;
|
|
271
|
-
answer: string;
|
|
272
|
-
approveWrite?: boolean;
|
|
273
|
-
limit?: number;
|
|
274
|
-
semantic?: boolean;
|
|
275
|
-
modelRef?: string;
|
|
276
|
-
dimensions?: number;
|
|
277
|
-
fake?: boolean;
|
|
278
|
-
}) {
|
|
279
|
-
const workspace = this.ensureWorkspace();
|
|
280
|
-
const context = await this.retrieveContext({
|
|
281
|
-
query: options.prompt,
|
|
282
|
-
limit: options.limit,
|
|
283
|
-
semantic: options.semantic,
|
|
284
|
-
modelRef: options.modelRef,
|
|
285
|
-
dimensions: options.dimensions,
|
|
286
|
-
fake: options.fake,
|
|
287
|
-
});
|
|
288
|
-
return fileAnswerToWiki({
|
|
289
|
-
dbPath: workspace.knowledgeDbPath,
|
|
290
|
-
store: this.artifactStore(),
|
|
291
|
-
prompt: options.prompt,
|
|
292
|
-
answer: options.answer,
|
|
293
|
-
context,
|
|
294
|
-
approveWrite: options.approveWrite,
|
|
295
|
-
});
|
|
296
|
-
}
|
|
297
|
-
|
|
298
|
-
lintWiki() {
|
|
299
|
-
const workspace = this.ensureWorkspace();
|
|
300
|
-
return lintWiki({ dbPath: workspace.knowledgeDbPath });
|
|
301
|
-
}
|
|
302
|
-
|
|
303
|
-
async ingestManifest(input: string) {
|
|
304
|
-
const workspace = this.ensureWorkspace();
|
|
305
|
-
return ingestOpenFilesManifest({
|
|
306
|
-
dbPath: workspace.knowledgeDbPath,
|
|
307
|
-
input,
|
|
308
|
-
config: this.config(),
|
|
309
|
-
safetyPolicy: this.safetyPolicy(),
|
|
310
|
-
});
|
|
311
|
-
}
|
|
312
|
-
|
|
313
|
-
async ingestSource(sourceRef: string, purpose?: string) {
|
|
314
|
-
const workspace = this.ensureWorkspace();
|
|
315
|
-
return ingestSourceRef({
|
|
316
|
-
dbPath: workspace.knowledgeDbPath,
|
|
317
|
-
sourceRef,
|
|
318
|
-
purpose,
|
|
319
|
-
config: this.config(),
|
|
320
|
-
safetyPolicy: this.safetyPolicy(),
|
|
321
|
-
});
|
|
322
|
-
}
|
|
323
|
-
|
|
324
|
-
async resolveSource(sourceRef: string, options: { purpose?: string; limit?: number } = {}) {
|
|
325
|
-
const workspace = this.ensureWorkspace();
|
|
326
|
-
return resolveOpenFilesSource({
|
|
327
|
-
dbPath: workspace.knowledgeDbPath,
|
|
328
|
-
sourceRef,
|
|
329
|
-
purpose: options.purpose,
|
|
330
|
-
limit: options.limit,
|
|
331
|
-
safetyPolicy: this.safetyPolicy(),
|
|
332
|
-
});
|
|
333
|
-
}
|
|
334
|
-
|
|
335
|
-
async consumeOutbox(input: string) {
|
|
336
|
-
const workspace = this.ensureWorkspace();
|
|
337
|
-
return consumeOpenFilesOutbox({
|
|
338
|
-
dbPath: workspace.knowledgeDbPath,
|
|
339
|
-
input,
|
|
340
|
-
config: this.config(),
|
|
341
|
-
safetyPolicy: this.safetyPolicy(),
|
|
342
|
-
});
|
|
343
|
-
}
|
|
344
|
-
|
|
345
|
-
reindexHealth(options: Omit<ReindexRuntimeOptions, 'dbPath' | 'config'> = {}) {
|
|
346
|
-
const workspace = this.ensureWorkspace();
|
|
347
|
-
return reindexHealth({
|
|
348
|
-
...options,
|
|
349
|
-
dbPath: workspace.knowledgeDbPath,
|
|
350
|
-
config: this.config(),
|
|
351
|
-
});
|
|
352
|
-
}
|
|
353
|
-
|
|
354
|
-
enqueueReindex(options: Omit<ReindexRuntimeOptions, 'dbPath' | 'config'> = {}) {
|
|
355
|
-
const workspace = this.ensureWorkspace();
|
|
356
|
-
return enqueueMissingEmbeddings({
|
|
357
|
-
...options,
|
|
358
|
-
dbPath: workspace.knowledgeDbPath,
|
|
359
|
-
config: this.config(),
|
|
360
|
-
});
|
|
361
|
-
}
|
|
362
|
-
|
|
363
|
-
async refreshEmbeddings(options: Omit<ReindexRuntimeOptions & { full?: boolean; limit?: number }, 'dbPath' | 'config'> = {}) {
|
|
364
|
-
const workspace = this.ensureWorkspace();
|
|
365
|
-
return refreshEmbeddingIndex({
|
|
366
|
-
...options,
|
|
367
|
-
dbPath: workspace.knowledgeDbPath,
|
|
368
|
-
config: this.config(),
|
|
369
|
-
});
|
|
370
|
-
}
|
|
371
|
-
|
|
372
|
-
providerStatus(env: Record<string, string | undefined> = process.env): ProviderStatusResult {
|
|
373
|
-
return providerStatus(this.config(), env);
|
|
374
|
-
}
|
|
375
|
-
|
|
376
|
-
modelRegistry(): ModelRegistryEntry[] {
|
|
377
|
-
return listModelRegistry(this.config());
|
|
378
|
-
}
|
|
379
|
-
|
|
380
|
-
embeddingStatus() {
|
|
381
|
-
const workspace = this.ensureWorkspace();
|
|
382
|
-
return embeddingIndexStatus(workspace.knowledgeDbPath);
|
|
383
|
-
}
|
|
384
|
-
|
|
385
|
-
async indexEmbeddings(options: Omit<EmbeddingIndexOptions, 'dbPath' | 'config'> = {}) {
|
|
386
|
-
const workspace = this.ensureWorkspace();
|
|
387
|
-
return indexKnowledgeEmbeddings({
|
|
388
|
-
...options,
|
|
389
|
-
dbPath: workspace.knowledgeDbPath,
|
|
390
|
-
config: this.config(),
|
|
391
|
-
});
|
|
392
|
-
}
|
|
393
|
-
|
|
394
|
-
async semanticSearch(options: Omit<EmbeddingSearchOptions, 'dbPath' | 'config'>) {
|
|
395
|
-
const workspace = this.ensureWorkspace();
|
|
396
|
-
return searchVectorIndex({
|
|
397
|
-
...options,
|
|
398
|
-
dbPath: workspace.knowledgeDbPath,
|
|
399
|
-
config: this.config(),
|
|
400
|
-
});
|
|
401
|
-
}
|
|
402
|
-
|
|
403
|
-
async search(options: Omit<HybridSearchOptions, 'dbPath' | 'config'>) {
|
|
404
|
-
const workspace = this.ensureWorkspace();
|
|
405
|
-
return hybridSearch({
|
|
406
|
-
...options,
|
|
407
|
-
dbPath: workspace.knowledgeDbPath,
|
|
408
|
-
config: this.config(),
|
|
409
|
-
});
|
|
410
|
-
}
|
|
411
|
-
|
|
412
|
-
async retrieveContext(options: Omit<RetrievalOptions, 'dbPath' | 'config'>) {
|
|
413
|
-
const workspace = this.ensureWorkspace();
|
|
414
|
-
return retrieveKnowledgeContext({
|
|
415
|
-
...options,
|
|
416
|
-
dbPath: workspace.knowledgeDbPath,
|
|
417
|
-
config: this.config(),
|
|
418
|
-
});
|
|
419
|
-
}
|
|
420
|
-
|
|
421
|
-
async runPrompt(options: Omit<KnowledgePromptOptions, 'dbPath' | 'config'>) {
|
|
422
|
-
const workspace = this.ensureWorkspace();
|
|
423
|
-
return runKnowledgePrompt({
|
|
424
|
-
...options,
|
|
425
|
-
dbPath: workspace.knowledgeDbPath,
|
|
426
|
-
config: this.config(),
|
|
427
|
-
});
|
|
428
|
-
}
|
|
429
|
-
|
|
430
|
-
async webSearch(options: Omit<WebSearchOptions, 'dbPath' | 'config' | 'safetyPolicy'>) {
|
|
431
|
-
const workspace = this.ensureWorkspace();
|
|
432
|
-
return runProviderWebSearch({
|
|
433
|
-
...options,
|
|
434
|
-
dbPath: workspace.knowledgeDbPath,
|
|
435
|
-
config: this.config(),
|
|
436
|
-
safetyPolicy: this.safetyPolicy(),
|
|
437
|
-
});
|
|
438
|
-
}
|
|
439
|
-
}
|
|
440
|
-
|
|
441
|
-
export function createKnowledgeService(options: KnowledgeServiceOptions = {}): KnowledgeService {
|
|
442
|
-
return new KnowledgeService(options);
|
|
443
|
-
}
|
package/src/source-ingest.ts
DELETED
|
@@ -1,268 +0,0 @@
|
|
|
1
|
-
import { createHash } from 'node:crypto';
|
|
2
|
-
import { existsSync, readFileSync } from 'node:fs';
|
|
3
|
-
import { basename } from 'node:path';
|
|
4
|
-
import { ingestOpenFilesManifestItems, type ManifestIngestResult, type ManifestObject } from './manifest-ingest';
|
|
5
|
-
import { parseSourceRef, type SourceRef } from './source-ref';
|
|
6
|
-
import { resolveOpenFilesSource } from './source-resolver';
|
|
7
|
-
import type { KnowledgeConfig } from './workspace';
|
|
8
|
-
import { assertS3ReadAllowed, assertWebSearchAllowed, type SafetyPolicy } from './safety';
|
|
9
|
-
|
|
10
|
-
export interface SourceIngestOptions {
|
|
11
|
-
dbPath: string;
|
|
12
|
-
sourceRef: string;
|
|
13
|
-
purpose?: string;
|
|
14
|
-
config?: KnowledgeConfig;
|
|
15
|
-
safetyPolicy?: SafetyPolicy;
|
|
16
|
-
now?: Date;
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
export interface SourceIngestResult extends ManifestIngestResult {
|
|
20
|
-
source_ref: string;
|
|
21
|
-
content_source: 'catalog_chunks' | 'extracted_text_ref' | 'file' | 's3' | 'web';
|
|
22
|
-
read_only: true;
|
|
23
|
-
hash: string;
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
interface ResolvedText {
|
|
27
|
-
text: string;
|
|
28
|
-
contentSource: SourceIngestResult['content_source'];
|
|
29
|
-
title: string | null;
|
|
30
|
-
mime: string | null;
|
|
31
|
-
size: number | null;
|
|
32
|
-
hash: string | null;
|
|
33
|
-
revision: string | null;
|
|
34
|
-
extractedTextRef: string | null;
|
|
35
|
-
metadata: Record<string, unknown>;
|
|
36
|
-
permissions: Record<string, unknown>;
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
function sha256Text(text: string): string {
|
|
40
|
-
return `sha256:${createHash('sha256').update(text).digest('hex')}`;
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
function stripHtml(html: string): string {
|
|
44
|
-
return html
|
|
45
|
-
.replace(/<script[\s\S]*?<\/script>/gi, ' ')
|
|
46
|
-
.replace(/<style[\s\S]*?<\/style>/gi, ' ')
|
|
47
|
-
.replace(/<[^>]+>/g, ' ')
|
|
48
|
-
.replace(/ /g, ' ')
|
|
49
|
-
.replace(/&/g, '&')
|
|
50
|
-
.replace(/</g, '<')
|
|
51
|
-
.replace(/>/g, '>')
|
|
52
|
-
.replace(/\s+\n/g, '\n')
|
|
53
|
-
.replace(/\n\s+/g, '\n')
|
|
54
|
-
.replace(/[ \t]{2,}/g, ' ')
|
|
55
|
-
.trim();
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
async function readS3Text(uri: string, config?: KnowledgeConfig, safetyPolicy?: SafetyPolicy): Promise<string> {
|
|
59
|
-
const parsed = new URL(uri);
|
|
60
|
-
const bucket = parsed.hostname;
|
|
61
|
-
const key = decodeURIComponent(parsed.pathname.replace(/^\/+/, ''));
|
|
62
|
-
if (!bucket || !key) throw new Error(`Invalid S3 source URI: ${uri}`);
|
|
63
|
-
if (safetyPolicy) assertS3ReadAllowed(uri, safetyPolicy);
|
|
64
|
-
const [{ S3Client, GetObjectCommand }, { fromIni }] = await Promise.all([
|
|
65
|
-
import('@aws-sdk/client-s3'),
|
|
66
|
-
import('@aws-sdk/credential-providers'),
|
|
67
|
-
]);
|
|
68
|
-
const s3Config = config?.storage.type === 's3' && config.storage.s3?.bucket === bucket ? config.storage.s3 : undefined;
|
|
69
|
-
const client = new S3Client({
|
|
70
|
-
region: s3Config?.region,
|
|
71
|
-
credentials: s3Config?.profile ? fromIni({ profile: s3Config.profile }) : undefined,
|
|
72
|
-
maxAttempts: s3Config?.max_attempts,
|
|
73
|
-
});
|
|
74
|
-
const response = await client.send(new GetObjectCommand({ Bucket: bucket, Key: key }));
|
|
75
|
-
if (!response.Body) return '';
|
|
76
|
-
return await response.Body.transformToString();
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
async function readWebText(uri: string, safetyPolicy?: SafetyPolicy): Promise<{ text: string; mime: string | null }> {
|
|
80
|
-
if (safetyPolicy) assertWebSearchAllowed(safetyPolicy);
|
|
81
|
-
const response = await fetch(uri, {
|
|
82
|
-
headers: {
|
|
83
|
-
accept: 'text/markdown,text/plain,text/html,application/json;q=0.8,*/*;q=0.5',
|
|
84
|
-
'user-agent': '@hasna/knowledge source-ingest',
|
|
85
|
-
},
|
|
86
|
-
});
|
|
87
|
-
if (!response.ok) throw new Error(`Web source read failed ${response.status}: ${uri}`);
|
|
88
|
-
const mime = response.headers.get('content-type');
|
|
89
|
-
const body = await response.text();
|
|
90
|
-
return { text: mime?.includes('html') ? stripHtml(body) : body, mime };
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
function titleForRef(parsed: SourceRef): string | null {
|
|
94
|
-
if (parsed.kind === 'file') return basename(parsed.path);
|
|
95
|
-
if (parsed.kind === 's3') return basename(parsed.key);
|
|
96
|
-
if (parsed.kind === 'web') return basename(new URL(parsed.url).pathname) || parsed.url;
|
|
97
|
-
return parsed.path ? basename(parsed.path) : parsed.id;
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
async function readDirectSourceText(parsed: SourceRef, config?: KnowledgeConfig, safetyPolicy?: SafetyPolicy): Promise<ResolvedText> {
|
|
101
|
-
if (parsed.kind === 'file') {
|
|
102
|
-
if (!existsSync(parsed.path)) throw new Error(`Source file not found: ${parsed.path}`);
|
|
103
|
-
const text = readFileSync(parsed.path, 'utf8');
|
|
104
|
-
return {
|
|
105
|
-
text,
|
|
106
|
-
contentSource: 'file',
|
|
107
|
-
title: titleForRef(parsed),
|
|
108
|
-
mime: 'text/plain',
|
|
109
|
-
size: text.length,
|
|
110
|
-
hash: sha256Text(text),
|
|
111
|
-
revision: null,
|
|
112
|
-
extractedTextRef: null,
|
|
113
|
-
metadata: { path: parsed.path },
|
|
114
|
-
permissions: { mode: 'read_only' },
|
|
115
|
-
};
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
if (parsed.kind === 's3') {
|
|
119
|
-
const text = await readS3Text(parsed.uri, config, safetyPolicy);
|
|
120
|
-
return {
|
|
121
|
-
text,
|
|
122
|
-
contentSource: 's3',
|
|
123
|
-
title: titleForRef(parsed),
|
|
124
|
-
mime: 'text/plain',
|
|
125
|
-
size: text.length,
|
|
126
|
-
hash: sha256Text(text),
|
|
127
|
-
revision: null,
|
|
128
|
-
extractedTextRef: null,
|
|
129
|
-
metadata: { bucket: parsed.bucket, key: parsed.key },
|
|
130
|
-
permissions: { mode: 'read_only' },
|
|
131
|
-
};
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
if (parsed.kind === 'web') {
|
|
135
|
-
const web = await readWebText(parsed.url, safetyPolicy);
|
|
136
|
-
return {
|
|
137
|
-
text: web.text,
|
|
138
|
-
contentSource: 'web',
|
|
139
|
-
title: titleForRef(parsed),
|
|
140
|
-
mime: web.mime,
|
|
141
|
-
size: web.text.length,
|
|
142
|
-
hash: sha256Text(web.text),
|
|
143
|
-
revision: null,
|
|
144
|
-
extractedTextRef: null,
|
|
145
|
-
metadata: { url: parsed.url },
|
|
146
|
-
permissions: { mode: 'read_only' },
|
|
147
|
-
};
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
throw new Error(`Direct source reading is not available for ${parsed.uri}`);
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
async function readTextRef(uri: string, config?: KnowledgeConfig, safetyPolicy?: SafetyPolicy): Promise<{ text: string; contentSource: SourceIngestResult['content_source'] }> {
|
|
154
|
-
if (uri.startsWith('open-files://')) {
|
|
155
|
-
throw new Error('Open-files extracted text refs require an open-files resolver API. Ingest an open-files manifest with extracted_text or an extracted_text_ref using file://, s3://, or https://.');
|
|
156
|
-
}
|
|
157
|
-
const parsed = parseSourceRef(uri);
|
|
158
|
-
const direct = await readDirectSourceText(parsed, config, safetyPolicy);
|
|
159
|
-
return { text: direct.text, contentSource: 'extracted_text_ref' };
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
async function readOpenFilesSourceText(options: SourceIngestOptions): Promise<ResolvedText> {
|
|
163
|
-
const resolved = await resolveOpenFilesSource({
|
|
164
|
-
dbPath: options.dbPath,
|
|
165
|
-
sourceRef: options.sourceRef,
|
|
166
|
-
purpose: options.purpose ?? 'knowledge_index',
|
|
167
|
-
limit: 100,
|
|
168
|
-
safetyPolicy: options.safetyPolicy,
|
|
169
|
-
now: options.now,
|
|
170
|
-
});
|
|
171
|
-
if (!resolved.resolved) {
|
|
172
|
-
throw new Error('Open-files source is not in the local knowledge catalog. Ingest an open-files manifest first or use the open-files resolver API.');
|
|
173
|
-
}
|
|
174
|
-
if (resolved.revision?.extracted_text_uri && !resolved.content.text_available) {
|
|
175
|
-
const textRef = await readTextRef(resolved.revision.extracted_text_uri, options.config, options.safetyPolicy);
|
|
176
|
-
return {
|
|
177
|
-
text: textRef.text,
|
|
178
|
-
contentSource: textRef.contentSource,
|
|
179
|
-
title: resolved.source?.title ?? null,
|
|
180
|
-
mime: resolved.content.mime,
|
|
181
|
-
size: textRef.text.length,
|
|
182
|
-
hash: resolved.revision.hash ?? sha256Text(textRef.text),
|
|
183
|
-
revision: resolved.revision.revision,
|
|
184
|
-
extractedTextRef: resolved.revision.extracted_text_uri,
|
|
185
|
-
metadata: resolved.source?.metadata ?? {},
|
|
186
|
-
permissions: resolved.source?.permissions ?? { mode: 'read_only' },
|
|
187
|
-
};
|
|
188
|
-
}
|
|
189
|
-
if (resolved.chunks.length === 0) {
|
|
190
|
-
throw new Error('Open-files source has no extracted text chunks yet. Ingest an open-files manifest with extracted_text or extracted_text_ref first.');
|
|
191
|
-
}
|
|
192
|
-
const text = resolved.chunks.map((chunk) => chunk.text).join('\n\n');
|
|
193
|
-
return {
|
|
194
|
-
text,
|
|
195
|
-
contentSource: 'catalog_chunks',
|
|
196
|
-
title: resolved.source?.title ?? null,
|
|
197
|
-
mime: resolved.content.mime,
|
|
198
|
-
size: text.length,
|
|
199
|
-
hash: resolved.revision?.hash ?? sha256Text(text),
|
|
200
|
-
revision: resolved.revision?.revision ?? null,
|
|
201
|
-
extractedTextRef: resolved.revision?.extracted_text_uri ?? null,
|
|
202
|
-
metadata: resolved.source?.metadata ?? {},
|
|
203
|
-
permissions: resolved.source?.permissions ?? { mode: 'read_only' },
|
|
204
|
-
};
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
function manifestItemForSource(sourceRef: string, parsed: SourceRef, resolved: ResolvedText, purpose: string): ManifestObject {
|
|
208
|
-
const hash = resolved.hash ?? sha256Text(resolved.text);
|
|
209
|
-
const metadata = {
|
|
210
|
-
...resolved.metadata,
|
|
211
|
-
source_ref: sourceRef,
|
|
212
|
-
content_source: resolved.contentSource,
|
|
213
|
-
read_only: true,
|
|
214
|
-
};
|
|
215
|
-
const item: ManifestObject = {
|
|
216
|
-
source_ref: sourceRef,
|
|
217
|
-
name: resolved.title ?? titleForRef(parsed),
|
|
218
|
-
mime: resolved.mime ?? 'text/plain',
|
|
219
|
-
size: resolved.size ?? resolved.text.length,
|
|
220
|
-
hash,
|
|
221
|
-
revision: resolved.revision ?? hash,
|
|
222
|
-
status: 'active',
|
|
223
|
-
updated_at: new Date().toISOString(),
|
|
224
|
-
permissions: {
|
|
225
|
-
mode: 'read_only',
|
|
226
|
-
allowed_purposes: [purpose],
|
|
227
|
-
...resolved.permissions,
|
|
228
|
-
},
|
|
229
|
-
metadata,
|
|
230
|
-
extracted_text_ref: resolved.extractedTextRef,
|
|
231
|
-
extracted_text: resolved.text,
|
|
232
|
-
};
|
|
233
|
-
if (parsed.kind === 'open-files') {
|
|
234
|
-
if (parsed.entity === 'file') item.file_id = parsed.id;
|
|
235
|
-
if (parsed.entity === 'source') {
|
|
236
|
-
item.source_id = parsed.id;
|
|
237
|
-
item.path = parsed.path;
|
|
238
|
-
}
|
|
239
|
-
}
|
|
240
|
-
if (parsed.kind === 'file') item.path = parsed.path;
|
|
241
|
-
if (parsed.kind === 's3') item.path = parsed.key;
|
|
242
|
-
if (parsed.kind === 'web') item.url = parsed.url;
|
|
243
|
-
return item;
|
|
244
|
-
}
|
|
245
|
-
|
|
246
|
-
export async function ingestSourceRef(options: SourceIngestOptions): Promise<SourceIngestResult> {
|
|
247
|
-
const purpose = options.purpose ?? 'knowledge_index';
|
|
248
|
-
const parsed = parseSourceRef(options.sourceRef);
|
|
249
|
-
const resolved = parsed.kind === 'open-files'
|
|
250
|
-
? await readOpenFilesSourceText(options)
|
|
251
|
-
: await readDirectSourceText(parsed, options.config, options.safetyPolicy);
|
|
252
|
-
const item = manifestItemForSource(options.sourceRef, parsed, resolved, purpose);
|
|
253
|
-
const result = await ingestOpenFilesManifestItems({
|
|
254
|
-
dbPath: options.dbPath,
|
|
255
|
-
items: [item],
|
|
256
|
-
sourceLabel: options.sourceRef,
|
|
257
|
-
readAction: 'source_ref_ingest_read',
|
|
258
|
-
safetyPolicy: options.safetyPolicy,
|
|
259
|
-
now: options.now,
|
|
260
|
-
});
|
|
261
|
-
return {
|
|
262
|
-
...result,
|
|
263
|
-
source_ref: options.sourceRef,
|
|
264
|
-
content_source: resolved.contentSource,
|
|
265
|
-
read_only: true,
|
|
266
|
-
hash: String(item.hash),
|
|
267
|
-
};
|
|
268
|
-
}
|