@hasna/knowledge 0.2.7 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -1
- package/bin/open-knowledge-mcp.js +624 -5
- package/bin/open-knowledge.js +47 -25
- package/docs/architecture/ai-native-knowledge-base.md +24 -0
- package/package.json +1 -1
- package/src/cli.ts +61 -13
- package/src/manifest-ingest.ts +36 -10
- package/src/mcp.js +25 -0
- package/src/source-ingest.ts +268 -0
- package/src/source-ref.ts +12 -0
- package/src/source-resolver.ts +418 -0
|
@@ -0,0 +1,418 @@
|
|
|
1
|
+
import type { Database } from 'bun:sqlite';
|
|
2
|
+
import { migrateKnowledgeDb, openKnowledgeDb } from './knowledge-db';
|
|
3
|
+
import { catalogSourceUriForRef, parseSourceRef, revisionIdForSourceRef } from './source-ref';
|
|
4
|
+
import { assertWriteAllowed, recordAuditEvent, type SafetyPolicy } from './safety';
|
|
5
|
+
|
|
6
|
+
export interface SourceResolveOptions {
|
|
7
|
+
dbPath: string;
|
|
8
|
+
sourceRef: string;
|
|
9
|
+
purpose?: string;
|
|
10
|
+
limit?: number;
|
|
11
|
+
now?: Date;
|
|
12
|
+
safetyPolicy?: SafetyPolicy;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export interface SourceResolverEvidence {
|
|
16
|
+
resolver: 'open-files-read-only';
|
|
17
|
+
mode: 'local_catalog';
|
|
18
|
+
purpose: string;
|
|
19
|
+
read_only: true;
|
|
20
|
+
source_ref: string;
|
|
21
|
+
source_uri: string;
|
|
22
|
+
source_revision_id: string | null;
|
|
23
|
+
revision: string | null;
|
|
24
|
+
hash: string | null;
|
|
25
|
+
chunk_id?: string;
|
|
26
|
+
start_offset?: number | null;
|
|
27
|
+
end_offset?: number | null;
|
|
28
|
+
resolved_at: string;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export interface ResolvedSourceChunk {
|
|
32
|
+
id: string;
|
|
33
|
+
kind: string;
|
|
34
|
+
ordinal: number;
|
|
35
|
+
text: string;
|
|
36
|
+
token_count: number | null;
|
|
37
|
+
start_offset: number | null;
|
|
38
|
+
end_offset: number | null;
|
|
39
|
+
metadata: Record<string, unknown>;
|
|
40
|
+
evidence: SourceResolverEvidence;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export interface ResolvedSourceCitation {
|
|
44
|
+
source_ref: string;
|
|
45
|
+
source_uri: string;
|
|
46
|
+
chunk_id: string;
|
|
47
|
+
quote: string;
|
|
48
|
+
start_offset: number | null;
|
|
49
|
+
end_offset: number | null;
|
|
50
|
+
evidence: SourceResolverEvidence;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export interface SourceResolveResult {
|
|
54
|
+
source_ref: string;
|
|
55
|
+
source_uri: string;
|
|
56
|
+
purpose: string;
|
|
57
|
+
read_only: true;
|
|
58
|
+
resolved: boolean;
|
|
59
|
+
resolver: {
|
|
60
|
+
name: 'open-files-read-only';
|
|
61
|
+
mode: 'local_catalog';
|
|
62
|
+
contract: 'open-files-knowledge-source-v1';
|
|
63
|
+
};
|
|
64
|
+
source: {
|
|
65
|
+
id: string;
|
|
66
|
+
uri: string;
|
|
67
|
+
kind: string;
|
|
68
|
+
title: string | null;
|
|
69
|
+
metadata: Record<string, unknown>;
|
|
70
|
+
permissions: Record<string, unknown>;
|
|
71
|
+
updated_at: string;
|
|
72
|
+
} | null;
|
|
73
|
+
revision: {
|
|
74
|
+
id: string;
|
|
75
|
+
revision: string;
|
|
76
|
+
hash: string | null;
|
|
77
|
+
extracted_text_uri: string | null;
|
|
78
|
+
metadata: Record<string, unknown>;
|
|
79
|
+
created_at: string;
|
|
80
|
+
reindex_required: boolean;
|
|
81
|
+
} | null;
|
|
82
|
+
content: {
|
|
83
|
+
mime: string | null;
|
|
84
|
+
size: number | null;
|
|
85
|
+
hash: string | null;
|
|
86
|
+
text_available: boolean;
|
|
87
|
+
chunks_total: number;
|
|
88
|
+
chunks_returned: number;
|
|
89
|
+
char_count_returned: number;
|
|
90
|
+
extracted_text_ref: string | null;
|
|
91
|
+
bytes_available: false;
|
|
92
|
+
bytes_exposed: false;
|
|
93
|
+
};
|
|
94
|
+
chunks: ResolvedSourceChunk[];
|
|
95
|
+
citations: ResolvedSourceCitation[];
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
interface DbSourceRow {
|
|
99
|
+
id: string;
|
|
100
|
+
uri: string;
|
|
101
|
+
kind: string;
|
|
102
|
+
title: string | null;
|
|
103
|
+
metadata_json: string;
|
|
104
|
+
acl_json: string;
|
|
105
|
+
updated_at: string;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
interface DbRevisionRow {
|
|
109
|
+
id: string;
|
|
110
|
+
revision: string;
|
|
111
|
+
hash: string | null;
|
|
112
|
+
extracted_text_uri: string | null;
|
|
113
|
+
metadata_json: string;
|
|
114
|
+
created_at: string;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
interface DbChunkRow {
|
|
118
|
+
id: string;
|
|
119
|
+
kind: string;
|
|
120
|
+
ordinal: number;
|
|
121
|
+
text: string;
|
|
122
|
+
token_count: number | null;
|
|
123
|
+
start_offset: number | null;
|
|
124
|
+
end_offset: number | null;
|
|
125
|
+
metadata_json: string;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
function parseJsonObject(value: string | null | undefined): Record<string, unknown> {
|
|
129
|
+
if (!value) return {};
|
|
130
|
+
try {
|
|
131
|
+
const parsed = JSON.parse(value);
|
|
132
|
+
return parsed && typeof parsed === 'object' && !Array.isArray(parsed) ? parsed as Record<string, unknown> : {};
|
|
133
|
+
} catch {
|
|
134
|
+
return {};
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
function metadataString(metadata: Record<string, unknown>, keys: string[]): string | null {
|
|
139
|
+
for (const key of keys) {
|
|
140
|
+
const value = metadata[key];
|
|
141
|
+
if (typeof value === 'string' && value.length > 0) return value;
|
|
142
|
+
}
|
|
143
|
+
return null;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
function metadataNumber(metadata: Record<string, unknown>, keys: string[]): number | null {
|
|
147
|
+
for (const key of keys) {
|
|
148
|
+
const value = metadata[key];
|
|
149
|
+
if (typeof value === 'number' && Number.isFinite(value)) return value;
|
|
150
|
+
}
|
|
151
|
+
return null;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
function assertPurposeAllowed(permissions: Record<string, unknown>, purpose: string): void {
|
|
155
|
+
const mode = permissions.mode;
|
|
156
|
+
if (typeof mode === 'string' && mode !== 'read_only') {
|
|
157
|
+
throw new Error(`Source resolver denied ${purpose}. Permission mode is ${mode}, expected read_only.`);
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
const denied = permissions.denied_purposes;
|
|
161
|
+
if (Array.isArray(denied) && denied.includes(purpose)) {
|
|
162
|
+
throw new Error(`Source resolver denied ${purpose}. Purpose is explicitly denied.`);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
const allowed = permissions.allowed_purposes;
|
|
166
|
+
if (Array.isArray(allowed) && allowed.length > 0 && !allowed.includes(purpose)) {
|
|
167
|
+
throw new Error(`Source resolver denied ${purpose}. Allowed purposes: ${allowed.join(', ')}`);
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
function sourceRevisionRef(sourceUri: string, revision: DbRevisionRow | null, fallback: string): string {
|
|
172
|
+
if (!revision) return fallback;
|
|
173
|
+
try {
|
|
174
|
+
const parsed = parseSourceRef(sourceUri);
|
|
175
|
+
if (parsed.kind === 'open-files' && parsed.entity === 'file') {
|
|
176
|
+
return `${sourceUri}/revision/${encodeURIComponent(revision.revision)}`;
|
|
177
|
+
}
|
|
178
|
+
} catch {
|
|
179
|
+
return fallback;
|
|
180
|
+
}
|
|
181
|
+
return fallback;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
function selectSource(db: Database, sourceUri: string, requestedRef: string): DbSourceRow | null {
|
|
185
|
+
return db.query<DbSourceRow, [string, string, string]>(
|
|
186
|
+
`SELECT id, uri, kind, title, metadata_json, acl_json, updated_at
|
|
187
|
+
FROM sources
|
|
188
|
+
WHERE uri = ? OR uri = ?
|
|
189
|
+
ORDER BY CASE WHEN uri = ? THEN 0 ELSE 1 END
|
|
190
|
+
LIMIT 1`,
|
|
191
|
+
).get(sourceUri, requestedRef, sourceUri) ?? null;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
function selectRevision(db: Database, sourceId: string, revisionId: string | null): DbRevisionRow | null {
|
|
195
|
+
if (revisionId) {
|
|
196
|
+
return db.query<DbRevisionRow, [string, string]>(
|
|
197
|
+
`SELECT id, revision, hash, extracted_text_uri, metadata_json, created_at
|
|
198
|
+
FROM source_revisions
|
|
199
|
+
WHERE source_id = ? AND revision = ?
|
|
200
|
+
LIMIT 1`,
|
|
201
|
+
).get(sourceId, revisionId) ?? null;
|
|
202
|
+
}
|
|
203
|
+
return db.query<DbRevisionRow, [string]>(
|
|
204
|
+
`SELECT id, revision, hash, extracted_text_uri, metadata_json, created_at
|
|
205
|
+
FROM source_revisions
|
|
206
|
+
WHERE source_id = ?
|
|
207
|
+
ORDER BY created_at DESC, revision DESC
|
|
208
|
+
LIMIT 1`,
|
|
209
|
+
).get(sourceId) ?? null;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
function countChunks(db: Database, revisionId: string | null): number {
|
|
213
|
+
if (!revisionId) return 0;
|
|
214
|
+
const row = db.query<{ n: number }, [string]>('SELECT COUNT(*) AS n FROM chunks WHERE source_revision_id = ?').get(revisionId);
|
|
215
|
+
return row?.n ?? 0;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
function selectChunks(db: Database, revisionId: string | null, limit: number): DbChunkRow[] {
|
|
219
|
+
if (!revisionId || limit <= 0) return [];
|
|
220
|
+
return db.query<DbChunkRow, [string, number]>(
|
|
221
|
+
`SELECT id, kind, ordinal, text, token_count, start_offset, end_offset, metadata_json
|
|
222
|
+
FROM chunks
|
|
223
|
+
WHERE source_revision_id = ?
|
|
224
|
+
ORDER BY ordinal ASC
|
|
225
|
+
LIMIT ?`,
|
|
226
|
+
).all(revisionId, limit);
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
export async function resolveOpenFilesSource(options: SourceResolveOptions): Promise<SourceResolveResult> {
|
|
230
|
+
const purpose = options.purpose ?? 'knowledge_answer';
|
|
231
|
+
const limit = Math.max(0, Math.min(options.limit ?? 10, 100));
|
|
232
|
+
const resolvedAt = (options.now ?? new Date()).toISOString();
|
|
233
|
+
const parsed = parseSourceRef(options.sourceRef);
|
|
234
|
+
const sourceUri = catalogSourceUriForRef(options.sourceRef, parsed);
|
|
235
|
+
const requestedRevision = revisionIdForSourceRef(options.sourceRef);
|
|
236
|
+
|
|
237
|
+
if (options.safetyPolicy) {
|
|
238
|
+
if (!options.safetyPolicy.readOnlySourceAccess) throw new Error('Safety policy denied source resolution.');
|
|
239
|
+
assertWriteAllowed(options.dbPath, options.safetyPolicy);
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
migrateKnowledgeDb(options.dbPath);
|
|
243
|
+
const db = openKnowledgeDb(options.dbPath);
|
|
244
|
+
try {
|
|
245
|
+
return db.transaction((): SourceResolveResult => {
|
|
246
|
+
const source = selectSource(db, sourceUri, options.sourceRef);
|
|
247
|
+
if (!source) {
|
|
248
|
+
recordAuditEvent(db, {
|
|
249
|
+
event_type: 'source_read',
|
|
250
|
+
action: 'open_files_resolve_missing',
|
|
251
|
+
target_uri: options.sourceRef,
|
|
252
|
+
decision: 'allow',
|
|
253
|
+
metadata: { purpose, read_only: true, source_uri: sourceUri },
|
|
254
|
+
created_at: resolvedAt,
|
|
255
|
+
});
|
|
256
|
+
return {
|
|
257
|
+
source_ref: options.sourceRef,
|
|
258
|
+
source_uri: sourceUri,
|
|
259
|
+
purpose,
|
|
260
|
+
read_only: true,
|
|
261
|
+
resolved: false,
|
|
262
|
+
resolver: {
|
|
263
|
+
name: 'open-files-read-only',
|
|
264
|
+
mode: 'local_catalog',
|
|
265
|
+
contract: 'open-files-knowledge-source-v1',
|
|
266
|
+
},
|
|
267
|
+
source: null,
|
|
268
|
+
revision: null,
|
|
269
|
+
content: {
|
|
270
|
+
mime: null,
|
|
271
|
+
size: null,
|
|
272
|
+
hash: null,
|
|
273
|
+
text_available: false,
|
|
274
|
+
chunks_total: 0,
|
|
275
|
+
chunks_returned: 0,
|
|
276
|
+
char_count_returned: 0,
|
|
277
|
+
extracted_text_ref: null,
|
|
278
|
+
bytes_available: false,
|
|
279
|
+
bytes_exposed: false,
|
|
280
|
+
},
|
|
281
|
+
chunks: [],
|
|
282
|
+
citations: [],
|
|
283
|
+
} satisfies SourceResolveResult;
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
const sourceMetadata = parseJsonObject(source.metadata_json);
|
|
287
|
+
const permissions = parseJsonObject(source.acl_json);
|
|
288
|
+
try {
|
|
289
|
+
assertPurposeAllowed(permissions, purpose);
|
|
290
|
+
} catch (error) {
|
|
291
|
+
recordAuditEvent(db, {
|
|
292
|
+
event_type: 'source_read',
|
|
293
|
+
action: 'open_files_resolve',
|
|
294
|
+
target_uri: options.sourceRef,
|
|
295
|
+
decision: 'deny',
|
|
296
|
+
metadata: {
|
|
297
|
+
purpose,
|
|
298
|
+
read_only: true,
|
|
299
|
+
source_uri: source.uri,
|
|
300
|
+
error: error instanceof Error ? error.message : String(error),
|
|
301
|
+
},
|
|
302
|
+
created_at: resolvedAt,
|
|
303
|
+
});
|
|
304
|
+
throw error;
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
const revision = selectRevision(db, source.id, requestedRevision);
|
|
308
|
+
const revisionMetadata = parseJsonObject(revision?.metadata_json);
|
|
309
|
+
const totalChunks = countChunks(db, revision?.id ?? null);
|
|
310
|
+
const rows = selectChunks(db, revision?.id ?? null, limit);
|
|
311
|
+
const effectiveSourceRef = sourceRevisionRef(source.uri, revision, options.sourceRef);
|
|
312
|
+
const chunks = rows.map((row) => {
|
|
313
|
+
const metadata = parseJsonObject(row.metadata_json);
|
|
314
|
+
const evidence: SourceResolverEvidence = {
|
|
315
|
+
resolver: 'open-files-read-only',
|
|
316
|
+
mode: 'local_catalog',
|
|
317
|
+
purpose,
|
|
318
|
+
read_only: true,
|
|
319
|
+
source_ref: metadataString(metadata, ['source_ref']) ?? effectiveSourceRef,
|
|
320
|
+
source_uri: source.uri,
|
|
321
|
+
source_revision_id: revision?.id ?? null,
|
|
322
|
+
revision: revision?.revision ?? null,
|
|
323
|
+
hash: revision?.hash ?? metadataString(metadata, ['hash']),
|
|
324
|
+
chunk_id: row.id,
|
|
325
|
+
start_offset: row.start_offset,
|
|
326
|
+
end_offset: row.end_offset,
|
|
327
|
+
resolved_at: resolvedAt,
|
|
328
|
+
};
|
|
329
|
+
return {
|
|
330
|
+
id: row.id,
|
|
331
|
+
kind: row.kind,
|
|
332
|
+
ordinal: row.ordinal,
|
|
333
|
+
text: row.text,
|
|
334
|
+
token_count: row.token_count,
|
|
335
|
+
start_offset: row.start_offset,
|
|
336
|
+
end_offset: row.end_offset,
|
|
337
|
+
metadata,
|
|
338
|
+
evidence,
|
|
339
|
+
};
|
|
340
|
+
});
|
|
341
|
+
|
|
342
|
+
const citations = chunks.map((chunk) => ({
|
|
343
|
+
source_ref: chunk.evidence.source_ref,
|
|
344
|
+
source_uri: source.uri,
|
|
345
|
+
chunk_id: chunk.id,
|
|
346
|
+
quote: chunk.text.slice(0, 500),
|
|
347
|
+
start_offset: chunk.start_offset,
|
|
348
|
+
end_offset: chunk.end_offset,
|
|
349
|
+
evidence: chunk.evidence,
|
|
350
|
+
}));
|
|
351
|
+
|
|
352
|
+
recordAuditEvent(db, {
|
|
353
|
+
event_type: 'source_read',
|
|
354
|
+
action: 'open_files_resolve',
|
|
355
|
+
target_uri: options.sourceRef,
|
|
356
|
+
decision: 'allow',
|
|
357
|
+
metadata: {
|
|
358
|
+
purpose,
|
|
359
|
+
read_only: true,
|
|
360
|
+
source_uri: source.uri,
|
|
361
|
+
revision: revision?.revision ?? null,
|
|
362
|
+
chunks_returned: chunks.length,
|
|
363
|
+
chunks_total: totalChunks,
|
|
364
|
+
},
|
|
365
|
+
created_at: resolvedAt,
|
|
366
|
+
});
|
|
367
|
+
|
|
368
|
+
const mime = metadataString(sourceMetadata, ['mime', 'content_type']) ?? metadataString(revisionMetadata, ['mime', 'content_type']);
|
|
369
|
+
const size = metadataNumber(sourceMetadata, ['size', 'size_bytes']) ?? metadataNumber(revisionMetadata, ['size', 'size_bytes']);
|
|
370
|
+
return {
|
|
371
|
+
source_ref: effectiveSourceRef,
|
|
372
|
+
source_uri: source.uri,
|
|
373
|
+
purpose,
|
|
374
|
+
read_only: true,
|
|
375
|
+
resolved: true,
|
|
376
|
+
resolver: {
|
|
377
|
+
name: 'open-files-read-only',
|
|
378
|
+
mode: 'local_catalog',
|
|
379
|
+
contract: 'open-files-knowledge-source-v1',
|
|
380
|
+
},
|
|
381
|
+
source: {
|
|
382
|
+
id: source.id,
|
|
383
|
+
uri: source.uri,
|
|
384
|
+
kind: source.kind,
|
|
385
|
+
title: source.title,
|
|
386
|
+
metadata: sourceMetadata,
|
|
387
|
+
permissions,
|
|
388
|
+
updated_at: source.updated_at,
|
|
389
|
+
},
|
|
390
|
+
revision: revision ? {
|
|
391
|
+
id: revision.id,
|
|
392
|
+
revision: revision.revision,
|
|
393
|
+
hash: revision.hash,
|
|
394
|
+
extracted_text_uri: revision.extracted_text_uri,
|
|
395
|
+
metadata: revisionMetadata,
|
|
396
|
+
created_at: revision.created_at,
|
|
397
|
+
reindex_required: revisionMetadata.reindex_required === true,
|
|
398
|
+
} : null,
|
|
399
|
+
content: {
|
|
400
|
+
mime,
|
|
401
|
+
size,
|
|
402
|
+
hash: revision?.hash ?? metadataString(sourceMetadata, ['hash', 'checksum', 'sha256']),
|
|
403
|
+
text_available: totalChunks > 0,
|
|
404
|
+
chunks_total: totalChunks,
|
|
405
|
+
chunks_returned: chunks.length,
|
|
406
|
+
char_count_returned: chunks.reduce((sum, chunk) => sum + chunk.text.length, 0),
|
|
407
|
+
extracted_text_ref: revision?.extracted_text_uri ?? metadataString(revisionMetadata, ['extracted_text_ref', 'extracted_text_uri']),
|
|
408
|
+
bytes_available: false,
|
|
409
|
+
bytes_exposed: false,
|
|
410
|
+
},
|
|
411
|
+
chunks,
|
|
412
|
+
citations,
|
|
413
|
+
};
|
|
414
|
+
})();
|
|
415
|
+
} finally {
|
|
416
|
+
db.close();
|
|
417
|
+
}
|
|
418
|
+
}
|