@hasna/knowledge 0.2.26 → 0.2.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +61 -0
- package/bin/open-knowledge-mcp.js +85 -9
- package/bin/open-knowledge.js +86 -86
- package/dist/agent.d.ts +35 -0
- package/dist/artifact-store.d.ts +63 -0
- package/dist/auth.d.ts +35 -0
- package/dist/embeddings.d.ts +77 -0
- package/dist/index.d.ts +20 -0
- package/dist/index.js +5709 -0
- package/dist/knowledge-db.d.ts +27 -0
- package/dist/manifest-ingest.d.ts +35 -0
- package/dist/outbox-consume.d.ts +25 -0
- package/dist/provenance.d.ts +50 -0
- package/dist/providers.d.ts +89 -0
- package/dist/reindex.d.ts +37 -0
- package/dist/remote-client.d.ts +108 -0
- package/dist/retrieval.d.ts +71 -0
- package/dist/safety.d.ts +70 -0
- package/dist/sdk.d.ts +72 -0
- package/dist/search.d.ts +65 -0
- package/dist/service.d.ts +117 -0
- package/dist/source-ingest.d.ts +18 -0
- package/dist/source-ref.d.ts +30 -0
- package/dist/source-resolver.d.ts +92 -0
- package/dist/storage-contract.d.ts +106 -0
- package/dist/web-search.d.ts +40 -0
- package/dist/wiki-compiler.d.ts +67 -0
- package/dist/wiki-layout.d.ts +23 -0
- package/dist/workspace.d.ts +111 -0
- package/docs/architecture/ai-native-knowledge-base.md +24 -0
- package/docs/architecture/hosted-wrapper-responsibilities.md +8 -0
- package/docs/canonical-secrets-bootstrap-2026-06-08.md +127 -0
- package/package.json +15 -7
- package/src/agent.ts +0 -367
- package/src/artifact-store.ts +0 -184
- package/src/auth.ts +0 -123
- package/src/cli.ts +0 -1181
- package/src/embeddings.ts +0 -516
- package/src/knowledge-db.ts +0 -354
- package/src/manifest-ingest.ts +0 -515
- package/src/mcp-http.js +0 -110
- package/src/mcp.js +0 -1503
- package/src/outbox-consume.ts +0 -463
- package/src/provenance.ts +0 -93
- package/src/providers.ts +0 -308
- package/src/reindex.ts +0 -260
- package/src/remote-client.ts +0 -268
- package/src/retrieval.ts +0 -326
- package/src/safety.ts +0 -265
- package/src/schema.js +0 -25
- package/src/search.ts +0 -510
- package/src/service.ts +0 -432
- package/src/source-ingest.ts +0 -268
- package/src/source-ref.ts +0 -104
- package/src/source-resolver.ts +0 -436
- package/src/storage-contract.ts +0 -293
- package/src/store.ts +0 -113
- package/src/web-search.ts +0 -330
- package/src/wiki-compiler.ts +0 -711
- package/src/wiki-layout.ts +0 -251
- package/src/workspace.ts +0 -213
package/src/manifest-ingest.ts
DELETED
|
@@ -1,515 +0,0 @@
|
|
|
1
|
-
import { createHash } from 'node:crypto';
|
|
2
|
-
import { existsSync, readFileSync } from 'node:fs';
|
|
3
|
-
import { basename } from 'node:path';
|
|
4
|
-
import type { Database } from 'bun:sqlite';
|
|
5
|
-
import { migrateKnowledgeDb, openKnowledgeDb } from './knowledge-db';
|
|
6
|
-
import { parseSourceRef, type SourceRef } from './source-ref';
|
|
7
|
-
import { sourceProvenance, withProvenance } from './provenance';
|
|
8
|
-
import type { KnowledgeConfig } from './workspace';
|
|
9
|
-
import {
|
|
10
|
-
assertS3ReadAllowed,
|
|
11
|
-
assertWriteAllowed,
|
|
12
|
-
recordAuditEvent,
|
|
13
|
-
recordRedactionFindings,
|
|
14
|
-
redactSecrets,
|
|
15
|
-
type SafetyPolicy,
|
|
16
|
-
} from './safety';
|
|
17
|
-
|
|
18
|
-
export interface ManifestIngestOptions {
|
|
19
|
-
dbPath: string;
|
|
20
|
-
input: string;
|
|
21
|
-
config?: KnowledgeConfig;
|
|
22
|
-
safetyPolicy?: SafetyPolicy;
|
|
23
|
-
now?: Date;
|
|
24
|
-
maxChunkChars?: number;
|
|
25
|
-
chunkOverlapChars?: number;
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
export interface ManifestItemsIngestOptions {
|
|
29
|
-
dbPath: string;
|
|
30
|
-
items: ManifestObject[];
|
|
31
|
-
sourceLabel: string;
|
|
32
|
-
readAction?: string;
|
|
33
|
-
safetyPolicy?: SafetyPolicy;
|
|
34
|
-
now?: Date;
|
|
35
|
-
maxChunkChars?: number;
|
|
36
|
-
chunkOverlapChars?: number;
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
export interface ManifestIngestResult {
|
|
40
|
-
path: string;
|
|
41
|
-
db_path: string;
|
|
42
|
-
items_seen: number;
|
|
43
|
-
sources_upserted: number;
|
|
44
|
-
revisions_upserted: number;
|
|
45
|
-
chunks_inserted: number;
|
|
46
|
-
chunks_deleted: number;
|
|
47
|
-
redactions: number;
|
|
48
|
-
skipped: number;
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
export type ManifestObject = Record<string, unknown>;
|
|
52
|
-
|
|
53
|
-
interface NormalizedManifestItem {
|
|
54
|
-
raw: ManifestObject;
|
|
55
|
-
sourceRef: string;
|
|
56
|
-
sourceUri: string;
|
|
57
|
-
kind: SourceRef['kind'];
|
|
58
|
-
title: string | null;
|
|
59
|
-
revision: string;
|
|
60
|
-
hash: string | null;
|
|
61
|
-
extractedTextUri: string | null;
|
|
62
|
-
text: string | null;
|
|
63
|
-
metadata: ManifestObject;
|
|
64
|
-
acl: unknown;
|
|
65
|
-
status: string;
|
|
66
|
-
updatedAt: string;
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
function stableId(prefix: string, value: string): string {
|
|
70
|
-
return `${prefix}_${createHash('sha256').update(value).digest('hex').slice(0, 20)}`;
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
function asObject(value: unknown): ManifestObject | undefined {
|
|
74
|
-
return value && typeof value === 'object' && !Array.isArray(value) ? value as ManifestObject : undefined;
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
function asString(value: unknown): string | undefined {
|
|
78
|
-
return typeof value === 'string' && value.length > 0 ? value : undefined;
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
function asNumber(value: unknown): number | undefined {
|
|
82
|
-
return typeof value === 'number' && Number.isFinite(value) ? value : undefined;
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
function buildSourceRefFromItem(item: ManifestObject): string {
|
|
86
|
-
const explicit = asString(item.source_ref) ?? asString(item.source_uri) ?? asString(item.uri);
|
|
87
|
-
if (explicit) return explicit;
|
|
88
|
-
|
|
89
|
-
const fileId = asString(item.file_id);
|
|
90
|
-
if (fileId) {
|
|
91
|
-
const revision = asString(item.revision_id) ?? asString(item.revision);
|
|
92
|
-
const fileRef = `open-files://file/${encodeURIComponent(fileId)}`;
|
|
93
|
-
return revision ? `${fileRef}/revision/${encodeURIComponent(revision)}` : fileRef;
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
const sourceId = asString(item.source_id);
|
|
97
|
-
const path = asString(item.path);
|
|
98
|
-
if (sourceId && path) {
|
|
99
|
-
return `open-files://source/${encodeURIComponent(sourceId)}/path/${encodeURIComponent(path)}`;
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
throw new Error('Manifest item is missing source_ref, file_id, or source_id/path.');
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
function baseSourceUri(sourceRef: string, parsed: SourceRef): string {
|
|
106
|
-
if (parsed.kind === 'open-files' && parsed.entity === 'file' && parsed.revision_id) {
|
|
107
|
-
return sourceRef.replace(/\/revision\/[^/]+$/, '');
|
|
108
|
-
}
|
|
109
|
-
return sourceRef;
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
function textFromItem(item: ManifestObject): string | null {
|
|
113
|
-
const direct =
|
|
114
|
-
asString(item.extracted_text) ??
|
|
115
|
-
asString(item.text) ??
|
|
116
|
-
asString(item.content_text) ??
|
|
117
|
-
asString(item.markdown);
|
|
118
|
-
if (direct !== undefined) return direct;
|
|
119
|
-
const content = item.content;
|
|
120
|
-
return typeof content === 'string' ? content : null;
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
function extractedTextUriFromItem(item: ManifestObject): string | null {
|
|
124
|
-
const direct = asString(item.extracted_text_ref) ?? asString(item.extracted_text_uri) ?? asString(item.text_ref);
|
|
125
|
-
if (direct) return direct;
|
|
126
|
-
const content = asObject(item.content);
|
|
127
|
-
return asString(content?.extracted_text_ref) ?? asString(content?.extracted_text_uri) ?? null;
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
function titleFromItem(item: ManifestObject): string | null {
|
|
131
|
-
const path = asString(item.path);
|
|
132
|
-
return asString(item.title) ?? asString(item.name) ?? (path ? basename(path) : null);
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
function hashFromItem(item: ManifestObject): string | null {
|
|
136
|
-
return asString(item.hash) ?? asString(item.checksum) ?? asString(item.sha256) ?? null;
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
function revisionFromItem(item: ManifestObject, parsed: SourceRef, hash: string | null): string {
|
|
140
|
-
const revision =
|
|
141
|
-
asString(item.revision_id) ??
|
|
142
|
-
asString(item.revision) ??
|
|
143
|
-
asString(item.version_id) ??
|
|
144
|
-
(parsed.kind === 'open-files' ? parsed.revision_id : undefined) ??
|
|
145
|
-
hash ??
|
|
146
|
-
asString(item.updated_at);
|
|
147
|
-
return revision ?? 'current';
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
function metadataFromItem(item: ManifestObject, normalized: {
|
|
151
|
-
sourceRef: string;
|
|
152
|
-
sourceUri: string;
|
|
153
|
-
status: string;
|
|
154
|
-
}): ManifestObject {
|
|
155
|
-
const metadata: ManifestObject = {};
|
|
156
|
-
for (const [key, value] of Object.entries(item)) {
|
|
157
|
-
if (['text', 'content', 'content_text', 'extracted_text', 'markdown'].includes(key)) continue;
|
|
158
|
-
metadata[key] = value;
|
|
159
|
-
}
|
|
160
|
-
metadata.source_ref = normalized.sourceRef;
|
|
161
|
-
metadata.source_uri = normalized.sourceUri;
|
|
162
|
-
metadata.status = normalized.status;
|
|
163
|
-
return metadata;
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
function normalizeManifestItem(item: ManifestObject, now: string): NormalizedManifestItem {
|
|
167
|
-
const sourceRef = buildSourceRefFromItem(item);
|
|
168
|
-
const parsed = parseSourceRef(sourceRef);
|
|
169
|
-
const sourceUri = baseSourceUri(sourceRef, parsed);
|
|
170
|
-
const hash = hashFromItem(item);
|
|
171
|
-
const status = asString(item.status) ?? 'active';
|
|
172
|
-
return {
|
|
173
|
-
raw: item,
|
|
174
|
-
sourceRef,
|
|
175
|
-
sourceUri,
|
|
176
|
-
kind: parsed.kind,
|
|
177
|
-
title: titleFromItem(item),
|
|
178
|
-
revision: revisionFromItem(item, parsed, hash),
|
|
179
|
-
hash,
|
|
180
|
-
extractedTextUri: extractedTextUriFromItem(item),
|
|
181
|
-
text: textFromItem(item),
|
|
182
|
-
metadata: metadataFromItem(item, { sourceRef, sourceUri, status }),
|
|
183
|
-
acl: item.permissions ?? item.acl ?? {},
|
|
184
|
-
status,
|
|
185
|
-
updatedAt: asString(item.updated_at) ?? now,
|
|
186
|
-
};
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
function parseManifestText(text: string): ManifestObject[] {
|
|
190
|
-
const trimmed = text.trim();
|
|
191
|
-
if (!trimmed) return [];
|
|
192
|
-
|
|
193
|
-
if (trimmed.startsWith('[')) {
|
|
194
|
-
const parsed = JSON.parse(trimmed);
|
|
195
|
-
if (!Array.isArray(parsed)) throw new Error('Manifest array parse failed.');
|
|
196
|
-
return parsed.map((entry) => {
|
|
197
|
-
const item = asObject(entry);
|
|
198
|
-
if (!item) throw new Error('Manifest array entries must be objects.');
|
|
199
|
-
return item;
|
|
200
|
-
});
|
|
201
|
-
}
|
|
202
|
-
|
|
203
|
-
if (trimmed.startsWith('{')) {
|
|
204
|
-
try {
|
|
205
|
-
const parsed = JSON.parse(trimmed);
|
|
206
|
-
const object = asObject(parsed);
|
|
207
|
-
if (!object) throw new Error('Manifest object parse failed.');
|
|
208
|
-
if (Array.isArray(object.items)) {
|
|
209
|
-
return object.items.map((entry) => {
|
|
210
|
-
const item = asObject(entry);
|
|
211
|
-
if (!item) throw new Error('Manifest items entries must be objects.');
|
|
212
|
-
return item;
|
|
213
|
-
});
|
|
214
|
-
}
|
|
215
|
-
if ('source_ref' in object || 'source_uri' in object || 'file_id' in object) return [object];
|
|
216
|
-
} catch (error) {
|
|
217
|
-
const lines = trimmed.split(/\r?\n/).filter((line) => line.trim().length > 0);
|
|
218
|
-
if (lines.length <= 1) throw error;
|
|
219
|
-
return lines.map((line) => {
|
|
220
|
-
const item = asObject(JSON.parse(line));
|
|
221
|
-
if (!item) throw new Error('Manifest JSONL entries must be objects.');
|
|
222
|
-
return item;
|
|
223
|
-
});
|
|
224
|
-
}
|
|
225
|
-
}
|
|
226
|
-
|
|
227
|
-
return trimmed.split(/\r?\n/).filter((line) => line.trim().length > 0).map((line) => {
|
|
228
|
-
const item = asObject(JSON.parse(line));
|
|
229
|
-
if (!item) throw new Error('Manifest JSONL entries must be objects.');
|
|
230
|
-
return item;
|
|
231
|
-
});
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
async function readS3Text(uri: string, config?: KnowledgeConfig, safetyPolicy?: SafetyPolicy): Promise<string> {
|
|
235
|
-
const parsed = new URL(uri);
|
|
236
|
-
const bucket = parsed.hostname;
|
|
237
|
-
const key = decodeURIComponent(parsed.pathname.replace(/^\/+/, ''));
|
|
238
|
-
if (!bucket || !key) throw new Error(`Invalid S3 manifest URI: ${uri}`);
|
|
239
|
-
if (safetyPolicy) assertS3ReadAllowed(uri, safetyPolicy);
|
|
240
|
-
const [{ S3Client, GetObjectCommand }, { fromIni }] = await Promise.all([
|
|
241
|
-
import('@aws-sdk/client-s3'),
|
|
242
|
-
import('@aws-sdk/credential-providers'),
|
|
243
|
-
]);
|
|
244
|
-
const s3Config = config?.storage.type === 's3' && config.storage.s3?.bucket === bucket ? config.storage.s3 : undefined;
|
|
245
|
-
const client = new S3Client({
|
|
246
|
-
region: s3Config?.region,
|
|
247
|
-
credentials: s3Config?.profile ? fromIni({ profile: s3Config.profile }) : undefined,
|
|
248
|
-
maxAttempts: s3Config?.max_attempts,
|
|
249
|
-
});
|
|
250
|
-
const response = await client.send(new GetObjectCommand({ Bucket: bucket, Key: key }));
|
|
251
|
-
if (!response.Body) return '';
|
|
252
|
-
return await response.Body.transformToString();
|
|
253
|
-
}
|
|
254
|
-
|
|
255
|
-
async function readManifestInput(input: string, config?: KnowledgeConfig, safetyPolicy?: SafetyPolicy): Promise<string> {
|
|
256
|
-
if (input.startsWith('s3://')) return readS3Text(input, config, safetyPolicy);
|
|
257
|
-
if (!existsSync(input)) throw new Error(`Manifest not found: ${input}`);
|
|
258
|
-
return readFileSync(input, 'utf8');
|
|
259
|
-
}
|
|
260
|
-
|
|
261
|
-
interface TextChunk {
|
|
262
|
-
ordinal: number;
|
|
263
|
-
text: string;
|
|
264
|
-
startOffset: number;
|
|
265
|
-
endOffset: number;
|
|
266
|
-
}
|
|
267
|
-
|
|
268
|
-
function chunkText(text: string, maxChars: number, overlapChars: number): TextChunk[] {
|
|
269
|
-
const normalized = text.replace(/\r\n/g, '\n');
|
|
270
|
-
if (!normalized.trim()) return [];
|
|
271
|
-
const chunks: TextChunk[] = [];
|
|
272
|
-
let start = 0;
|
|
273
|
-
while (start < normalized.length) {
|
|
274
|
-
const hardEnd = Math.min(normalized.length, start + maxChars);
|
|
275
|
-
let end = hardEnd;
|
|
276
|
-
if (hardEnd < normalized.length) {
|
|
277
|
-
const paragraphBreak = normalized.lastIndexOf('\n\n', hardEnd);
|
|
278
|
-
const sentenceBreak = normalized.lastIndexOf('. ', hardEnd);
|
|
279
|
-
const candidate = Math.max(paragraphBreak, sentenceBreak);
|
|
280
|
-
if (candidate > start + Math.floor(maxChars * 0.5)) end = candidate + (candidate === paragraphBreak ? 2 : 1);
|
|
281
|
-
}
|
|
282
|
-
const chunk = normalized.slice(start, end).trim();
|
|
283
|
-
if (chunk) {
|
|
284
|
-
chunks.push({
|
|
285
|
-
ordinal: chunks.length,
|
|
286
|
-
text: chunk,
|
|
287
|
-
startOffset: start,
|
|
288
|
-
endOffset: end,
|
|
289
|
-
});
|
|
290
|
-
}
|
|
291
|
-
if (end >= normalized.length) break;
|
|
292
|
-
start = Math.max(0, end - overlapChars);
|
|
293
|
-
}
|
|
294
|
-
return chunks;
|
|
295
|
-
}
|
|
296
|
-
|
|
297
|
-
function estimateTokenCount(text: string): number {
|
|
298
|
-
const words = text.trim().split(/\s+/).filter(Boolean).length;
|
|
299
|
-
return Math.max(1, Math.ceil(words * 1.25));
|
|
300
|
-
}
|
|
301
|
-
|
|
302
|
-
function deleteChunksForRevision(db: Database, sourceRevisionId: string): number {
|
|
303
|
-
const rows = db.query<{ id: string }, [string]>('SELECT id FROM chunks WHERE source_revision_id = ?').all(sourceRevisionId);
|
|
304
|
-
for (const row of rows) {
|
|
305
|
-
db.run('DELETE FROM chunks_fts WHERE chunk_id = ?', [row.id]);
|
|
306
|
-
}
|
|
307
|
-
db.run('DELETE FROM chunks WHERE source_revision_id = ?', [sourceRevisionId]);
|
|
308
|
-
return rows.length;
|
|
309
|
-
}
|
|
310
|
-
|
|
311
|
-
function upsertSource(db: Database, item: NormalizedManifestItem, now: string): string {
|
|
312
|
-
const sourceId = stableId('src', item.sourceUri);
|
|
313
|
-
db.run(
|
|
314
|
-
`INSERT INTO sources (id, uri, kind, title, metadata_json, acl_json, created_at, updated_at)
|
|
315
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
316
|
-
ON CONFLICT(uri) DO UPDATE SET
|
|
317
|
-
kind = excluded.kind,
|
|
318
|
-
title = excluded.title,
|
|
319
|
-
metadata_json = excluded.metadata_json,
|
|
320
|
-
acl_json = excluded.acl_json,
|
|
321
|
-
updated_at = excluded.updated_at`,
|
|
322
|
-
[
|
|
323
|
-
sourceId,
|
|
324
|
-
item.sourceUri,
|
|
325
|
-
item.kind,
|
|
326
|
-
item.title,
|
|
327
|
-
JSON.stringify(item.metadata),
|
|
328
|
-
JSON.stringify(item.acl ?? {}),
|
|
329
|
-
now,
|
|
330
|
-
item.updatedAt,
|
|
331
|
-
],
|
|
332
|
-
);
|
|
333
|
-
const row = db.query<{ id: string }, [string]>('SELECT id FROM sources WHERE uri = ?').get(item.sourceUri);
|
|
334
|
-
if (!row) throw new Error(`Failed to upsert source: ${item.sourceUri}`);
|
|
335
|
-
return row.id;
|
|
336
|
-
}
|
|
337
|
-
|
|
338
|
-
function upsertRevision(db: Database, sourceId: string, item: NormalizedManifestItem, now: string): string {
|
|
339
|
-
const revisionId = stableId('rev', `${sourceId}\u0000${item.revision}`);
|
|
340
|
-
db.run(
|
|
341
|
-
`INSERT INTO source_revisions (id, source_id, revision, hash, extracted_text_uri, metadata_json, created_at)
|
|
342
|
-
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
343
|
-
ON CONFLICT(source_id, revision) DO UPDATE SET
|
|
344
|
-
hash = excluded.hash,
|
|
345
|
-
extracted_text_uri = excluded.extracted_text_uri,
|
|
346
|
-
metadata_json = excluded.metadata_json`,
|
|
347
|
-
[
|
|
348
|
-
revisionId,
|
|
349
|
-
sourceId,
|
|
350
|
-
item.revision,
|
|
351
|
-
item.hash,
|
|
352
|
-
item.extractedTextUri,
|
|
353
|
-
JSON.stringify(item.metadata),
|
|
354
|
-
now,
|
|
355
|
-
],
|
|
356
|
-
);
|
|
357
|
-
const row = db.query<{ id: string }, [string, string]>(
|
|
358
|
-
'SELECT id FROM source_revisions WHERE source_id = ? AND revision = ?',
|
|
359
|
-
).get(sourceId, item.revision);
|
|
360
|
-
if (!row) throw new Error(`Failed to upsert source revision: ${item.sourceRef}`);
|
|
361
|
-
return row.id;
|
|
362
|
-
}
|
|
363
|
-
|
|
364
|
-
function insertChunks(db: Database, sourceRevisionId: string, item: NormalizedManifestItem, now: string, maxChars: number, overlapChars: number, safetyPolicy?: SafetyPolicy): { chunksInserted: number; redactions: number } {
|
|
365
|
-
if (!item.text || item.status.toLowerCase() === 'deleted') return { chunksInserted: 0, redactions: 0 };
|
|
366
|
-
const redacted = redactSecrets(item.text, safetyPolicy);
|
|
367
|
-
if (redacted.findings.length > 0) {
|
|
368
|
-
recordRedactionFindings(db, {
|
|
369
|
-
source_uri: item.sourceUri,
|
|
370
|
-
findings: redacted.findings,
|
|
371
|
-
metadata: { source_ref: item.sourceRef, revision: item.revision },
|
|
372
|
-
created_at: now,
|
|
373
|
-
});
|
|
374
|
-
recordAuditEvent(db, {
|
|
375
|
-
event_type: 'redaction',
|
|
376
|
-
action: 'source_text_redact',
|
|
377
|
-
target_uri: item.sourceUri,
|
|
378
|
-
decision: 'redacted',
|
|
379
|
-
metadata: { findings: redacted.findings.length, source_ref: item.sourceRef, revision: item.revision },
|
|
380
|
-
created_at: now,
|
|
381
|
-
});
|
|
382
|
-
}
|
|
383
|
-
const chunks = chunkText(redacted.text, maxChars, overlapChars);
|
|
384
|
-
for (const chunk of chunks) {
|
|
385
|
-
const chunkId = stableId('chk', `${sourceRevisionId}\u0000${chunk.ordinal}\u0000${chunk.text}`);
|
|
386
|
-
const provenance = sourceProvenance({
|
|
387
|
-
source_ref: item.sourceRef,
|
|
388
|
-
source_uri: item.sourceUri,
|
|
389
|
-
source_kind: item.kind,
|
|
390
|
-
source_revision_id: sourceRevisionId,
|
|
391
|
-
revision: item.revision,
|
|
392
|
-
hash: item.hash,
|
|
393
|
-
chunk_id: chunkId,
|
|
394
|
-
start_offset: chunk.startOffset,
|
|
395
|
-
end_offset: chunk.endOffset,
|
|
396
|
-
status: item.status,
|
|
397
|
-
resolver: 'open-files-read-only',
|
|
398
|
-
});
|
|
399
|
-
const metadata = withProvenance({
|
|
400
|
-
source_ref: item.sourceRef,
|
|
401
|
-
source_uri: item.sourceUri,
|
|
402
|
-
source_kind: item.kind,
|
|
403
|
-
source_revision_id: sourceRevisionId,
|
|
404
|
-
revision: item.revision,
|
|
405
|
-
hash: item.hash,
|
|
406
|
-
status: item.status,
|
|
407
|
-
path: asString(item.raw.path) ?? null,
|
|
408
|
-
mime: asString(item.raw.mime) ?? asString(item.raw.content_type) ?? null,
|
|
409
|
-
size: asNumber(item.raw.size) ?? null,
|
|
410
|
-
}, provenance);
|
|
411
|
-
db.run(
|
|
412
|
-
`INSERT INTO chunks (id, source_revision_id, kind, ordinal, text, token_count, start_offset, end_offset, metadata_json, created_at)
|
|
413
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
414
|
-
[
|
|
415
|
-
chunkId,
|
|
416
|
-
sourceRevisionId,
|
|
417
|
-
'source',
|
|
418
|
-
chunk.ordinal,
|
|
419
|
-
chunk.text,
|
|
420
|
-
estimateTokenCount(chunk.text),
|
|
421
|
-
chunk.startOffset,
|
|
422
|
-
chunk.endOffset,
|
|
423
|
-
JSON.stringify(metadata),
|
|
424
|
-
now,
|
|
425
|
-
],
|
|
426
|
-
);
|
|
427
|
-
db.run(
|
|
428
|
-
'INSERT INTO chunks_fts (chunk_id, text, title, source_uri) VALUES (?, ?, ?, ?)',
|
|
429
|
-
[chunkId, chunk.text, item.title ?? '', item.sourceUri],
|
|
430
|
-
);
|
|
431
|
-
}
|
|
432
|
-
return { chunksInserted: chunks.length, redactions: redacted.findings.length };
|
|
433
|
-
}
|
|
434
|
-
|
|
435
|
-
export async function ingestOpenFilesManifest(options: ManifestIngestOptions): Promise<ManifestIngestResult> {
|
|
436
|
-
const now = options.now ?? new Date();
|
|
437
|
-
if (options.safetyPolicy) assertWriteAllowed(options.dbPath, options.safetyPolicy);
|
|
438
|
-
migrateKnowledgeDb(options.dbPath);
|
|
439
|
-
const text = await readManifestInput(options.input, options.config, options.safetyPolicy);
|
|
440
|
-
const items = parseManifestText(text);
|
|
441
|
-
return ingestOpenFilesManifestItems({
|
|
442
|
-
dbPath: options.dbPath,
|
|
443
|
-
items,
|
|
444
|
-
sourceLabel: options.input,
|
|
445
|
-
safetyPolicy: options.safetyPolicy,
|
|
446
|
-
now,
|
|
447
|
-
maxChunkChars: options.maxChunkChars,
|
|
448
|
-
chunkOverlapChars: options.chunkOverlapChars,
|
|
449
|
-
});
|
|
450
|
-
}
|
|
451
|
-
|
|
452
|
-
export async function ingestOpenFilesManifestItems(options: ManifestItemsIngestOptions): Promise<ManifestIngestResult> {
|
|
453
|
-
const now = (options.now ?? new Date()).toISOString();
|
|
454
|
-
const maxChunkChars = options.maxChunkChars ?? 4000;
|
|
455
|
-
const chunkOverlapChars = options.chunkOverlapChars ?? 200;
|
|
456
|
-
if (maxChunkChars < 500) throw new Error('maxChunkChars must be at least 500.');
|
|
457
|
-
if (chunkOverlapChars < 0 || chunkOverlapChars >= maxChunkChars) throw new Error('chunkOverlapChars must be less than maxChunkChars.');
|
|
458
|
-
|
|
459
|
-
if (options.safetyPolicy) assertWriteAllowed(options.dbPath, options.safetyPolicy);
|
|
460
|
-
migrateKnowledgeDb(options.dbPath);
|
|
461
|
-
const db = openKnowledgeDb(options.dbPath);
|
|
462
|
-
try {
|
|
463
|
-
const result = db.transaction(() => {
|
|
464
|
-
const seenSources = new Set<string>();
|
|
465
|
-
const seenRevisions = new Set<string>();
|
|
466
|
-
let chunksInserted = 0;
|
|
467
|
-
let chunksDeleted = 0;
|
|
468
|
-
let redactions = 0;
|
|
469
|
-
let skipped = 0;
|
|
470
|
-
recordAuditEvent(db, {
|
|
471
|
-
event_type: 'source_read',
|
|
472
|
-
action: options.readAction ?? (options.sourceLabel.startsWith('s3://') ? 's3_manifest_read' : 'local_manifest_read'),
|
|
473
|
-
target_uri: options.sourceLabel,
|
|
474
|
-
decision: 'allow',
|
|
475
|
-
metadata: { items: options.items.length, read_only: true },
|
|
476
|
-
created_at: now,
|
|
477
|
-
});
|
|
478
|
-
for (const raw of options.items) {
|
|
479
|
-
const item = normalizeManifestItem(raw, now);
|
|
480
|
-
const sourceId = upsertSource(db, item, now);
|
|
481
|
-
const revisionId = upsertRevision(db, sourceId, item, now);
|
|
482
|
-
seenSources.add(sourceId);
|
|
483
|
-
seenRevisions.add(revisionId);
|
|
484
|
-
if (item.text || item.status.toLowerCase() === 'deleted') {
|
|
485
|
-
chunksDeleted += deleteChunksForRevision(db, revisionId);
|
|
486
|
-
}
|
|
487
|
-
const inserted = insertChunks(db, revisionId, item, now, maxChunkChars, chunkOverlapChars, options.safetyPolicy);
|
|
488
|
-
chunksInserted += inserted.chunksInserted;
|
|
489
|
-
redactions += inserted.redactions;
|
|
490
|
-
}
|
|
491
|
-
recordAuditEvent(db, {
|
|
492
|
-
event_type: 'write',
|
|
493
|
-
action: 'knowledge_manifest_ingest',
|
|
494
|
-
target_uri: options.dbPath,
|
|
495
|
-
decision: 'allow',
|
|
496
|
-
metadata: { items: options.items.length, sources: seenSources.size, revisions: seenRevisions.size, chunks_inserted: chunksInserted, redactions },
|
|
497
|
-
created_at: now,
|
|
498
|
-
});
|
|
499
|
-
return {
|
|
500
|
-
path: options.sourceLabel,
|
|
501
|
-
db_path: options.dbPath,
|
|
502
|
-
items_seen: options.items.length,
|
|
503
|
-
sources_upserted: seenSources.size,
|
|
504
|
-
revisions_upserted: seenRevisions.size,
|
|
505
|
-
chunks_inserted: chunksInserted,
|
|
506
|
-
chunks_deleted: chunksDeleted,
|
|
507
|
-
redactions,
|
|
508
|
-
skipped,
|
|
509
|
-
};
|
|
510
|
-
})();
|
|
511
|
-
return result;
|
|
512
|
-
} finally {
|
|
513
|
-
db.close();
|
|
514
|
-
}
|
|
515
|
-
}
|
package/src/mcp-http.js
DELETED
|
@@ -1,110 +0,0 @@
|
|
|
1
|
-
import { createServer } from 'node:http';
|
|
2
|
-
import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js';
|
|
3
|
-
|
|
4
|
-
export const MCP_HTTP_SERVICE_NAME = 'knowledge';
|
|
5
|
-
export const DEFAULT_MCP_HTTP_PORT = 8819;
|
|
6
|
-
|
|
7
|
-
export function isHttpMode(argv = process.argv, env = process.env) {
|
|
8
|
-
return argv.includes('--http') || env.MCP_HTTP === '1';
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
export function resolveMcpHttpPort(argv = process.argv, env = process.env) {
|
|
12
|
-
const portIdx = argv.indexOf('--port');
|
|
13
|
-
if (portIdx !== -1 && argv[portIdx + 1]) {
|
|
14
|
-
return parsePort(argv[portIdx + 1], '--port');
|
|
15
|
-
}
|
|
16
|
-
if (env.MCP_HTTP_PORT) {
|
|
17
|
-
return parsePort(env.MCP_HTTP_PORT, 'MCP_HTTP_PORT');
|
|
18
|
-
}
|
|
19
|
-
return DEFAULT_MCP_HTTP_PORT;
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
function parsePort(raw, source) {
|
|
23
|
-
const parsed = Number(raw);
|
|
24
|
-
if (!Number.isInteger(parsed) || parsed < 0 || parsed > 65535) {
|
|
25
|
-
throw new Error(`Invalid ${source} value "${raw}". Expected 0-65535.`);
|
|
26
|
-
}
|
|
27
|
-
return parsed;
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
async function readJsonBody(req) {
|
|
31
|
-
const chunks = [];
|
|
32
|
-
for await (const chunk of req) {
|
|
33
|
-
chunks.push(typeof chunk === 'string' ? Buffer.from(chunk) : chunk);
|
|
34
|
-
}
|
|
35
|
-
const text = Buffer.concat(chunks).toString('utf8');
|
|
36
|
-
if (!text) return undefined;
|
|
37
|
-
return JSON.parse(text);
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
export async function startMcpHttpServer(buildServer, options = {}) {
|
|
41
|
-
const host = options.host ?? '127.0.0.1';
|
|
42
|
-
const requestedPort = options.port ?? resolveMcpHttpPort();
|
|
43
|
-
const serviceName = options.serviceName ?? MCP_HTTP_SERVICE_NAME;
|
|
44
|
-
|
|
45
|
-
const httpServer = createServer(async (req, res) => {
|
|
46
|
-
try {
|
|
47
|
-
const url = new URL(req.url ?? '/', `http://${req.headers.host ?? 'localhost'}`);
|
|
48
|
-
|
|
49
|
-
if (req.method === 'GET' && url.pathname === '/health') {
|
|
50
|
-
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
51
|
-
res.end(JSON.stringify({ status: 'ok', name: serviceName }));
|
|
52
|
-
return;
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
if (url.pathname !== '/mcp') {
|
|
56
|
-
res.writeHead(404, { 'Content-Type': 'text/plain' });
|
|
57
|
-
res.end('Not Found');
|
|
58
|
-
return;
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
const server = buildServer();
|
|
62
|
-
const transport = new StreamableHTTPServerTransport({
|
|
63
|
-
sessionIdGenerator: undefined,
|
|
64
|
-
});
|
|
65
|
-
|
|
66
|
-
await server.connect(transport);
|
|
67
|
-
|
|
68
|
-
let parsedBody;
|
|
69
|
-
if (req.method === 'POST') {
|
|
70
|
-
parsedBody = await readJsonBody(req);
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
await transport.handleRequest(req, res, parsedBody);
|
|
74
|
-
|
|
75
|
-
res.on('close', () => {
|
|
76
|
-
void transport.close();
|
|
77
|
-
void server.close();
|
|
78
|
-
});
|
|
79
|
-
} catch (error) {
|
|
80
|
-
console.error(`[${serviceName}-mcp] HTTP error:`, error);
|
|
81
|
-
if (!res.headersSent) {
|
|
82
|
-
res.writeHead(500, { 'Content-Type': 'application/json' });
|
|
83
|
-
res.end(JSON.stringify({
|
|
84
|
-
jsonrpc: '2.0',
|
|
85
|
-
error: { code: -32603, message: 'Internal server error' },
|
|
86
|
-
id: null,
|
|
87
|
-
}));
|
|
88
|
-
}
|
|
89
|
-
}
|
|
90
|
-
});
|
|
91
|
-
|
|
92
|
-
await new Promise((resolve, reject) => {
|
|
93
|
-
httpServer.once('error', reject);
|
|
94
|
-
httpServer.listen(requestedPort, host, () => resolve());
|
|
95
|
-
});
|
|
96
|
-
|
|
97
|
-
const addr = httpServer.address();
|
|
98
|
-
const port = typeof addr === 'object' && addr ? addr.port : requestedPort;
|
|
99
|
-
|
|
100
|
-
console.error(`[${serviceName}-mcp] Streamable HTTP listening on http://${host}:${port}/mcp`);
|
|
101
|
-
|
|
102
|
-
return {
|
|
103
|
-
port,
|
|
104
|
-
host,
|
|
105
|
-
close: () =>
|
|
106
|
-
new Promise((resolve, reject) => {
|
|
107
|
-
httpServer.close((err) => (err ? reject(err) : resolve()));
|
|
108
|
-
}),
|
|
109
|
-
};
|
|
110
|
-
}
|