@hasna/knowledge 0.2.7 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,418 @@
1
+ import type { Database } from 'bun:sqlite';
2
+ import { migrateKnowledgeDb, openKnowledgeDb } from './knowledge-db';
3
+ import { catalogSourceUriForRef, parseSourceRef, revisionIdForSourceRef } from './source-ref';
4
+ import { assertWriteAllowed, recordAuditEvent, type SafetyPolicy } from './safety';
5
+
6
+ export interface SourceResolveOptions {
7
+ dbPath: string;
8
+ sourceRef: string;
9
+ purpose?: string;
10
+ limit?: number;
11
+ now?: Date;
12
+ safetyPolicy?: SafetyPolicy;
13
+ }
14
+
15
+ export interface SourceResolverEvidence {
16
+ resolver: 'open-files-read-only';
17
+ mode: 'local_catalog';
18
+ purpose: string;
19
+ read_only: true;
20
+ source_ref: string;
21
+ source_uri: string;
22
+ source_revision_id: string | null;
23
+ revision: string | null;
24
+ hash: string | null;
25
+ chunk_id?: string;
26
+ start_offset?: number | null;
27
+ end_offset?: number | null;
28
+ resolved_at: string;
29
+ }
30
+
31
+ export interface ResolvedSourceChunk {
32
+ id: string;
33
+ kind: string;
34
+ ordinal: number;
35
+ text: string;
36
+ token_count: number | null;
37
+ start_offset: number | null;
38
+ end_offset: number | null;
39
+ metadata: Record<string, unknown>;
40
+ evidence: SourceResolverEvidence;
41
+ }
42
+
43
+ export interface ResolvedSourceCitation {
44
+ source_ref: string;
45
+ source_uri: string;
46
+ chunk_id: string;
47
+ quote: string;
48
+ start_offset: number | null;
49
+ end_offset: number | null;
50
+ evidence: SourceResolverEvidence;
51
+ }
52
+
53
+ export interface SourceResolveResult {
54
+ source_ref: string;
55
+ source_uri: string;
56
+ purpose: string;
57
+ read_only: true;
58
+ resolved: boolean;
59
+ resolver: {
60
+ name: 'open-files-read-only';
61
+ mode: 'local_catalog';
62
+ contract: 'open-files-knowledge-source-v1';
63
+ };
64
+ source: {
65
+ id: string;
66
+ uri: string;
67
+ kind: string;
68
+ title: string | null;
69
+ metadata: Record<string, unknown>;
70
+ permissions: Record<string, unknown>;
71
+ updated_at: string;
72
+ } | null;
73
+ revision: {
74
+ id: string;
75
+ revision: string;
76
+ hash: string | null;
77
+ extracted_text_uri: string | null;
78
+ metadata: Record<string, unknown>;
79
+ created_at: string;
80
+ reindex_required: boolean;
81
+ } | null;
82
+ content: {
83
+ mime: string | null;
84
+ size: number | null;
85
+ hash: string | null;
86
+ text_available: boolean;
87
+ chunks_total: number;
88
+ chunks_returned: number;
89
+ char_count_returned: number;
90
+ extracted_text_ref: string | null;
91
+ bytes_available: false;
92
+ bytes_exposed: false;
93
+ };
94
+ chunks: ResolvedSourceChunk[];
95
+ citations: ResolvedSourceCitation[];
96
+ }
97
+
98
+ interface DbSourceRow {
99
+ id: string;
100
+ uri: string;
101
+ kind: string;
102
+ title: string | null;
103
+ metadata_json: string;
104
+ acl_json: string;
105
+ updated_at: string;
106
+ }
107
+
108
+ interface DbRevisionRow {
109
+ id: string;
110
+ revision: string;
111
+ hash: string | null;
112
+ extracted_text_uri: string | null;
113
+ metadata_json: string;
114
+ created_at: string;
115
+ }
116
+
117
+ interface DbChunkRow {
118
+ id: string;
119
+ kind: string;
120
+ ordinal: number;
121
+ text: string;
122
+ token_count: number | null;
123
+ start_offset: number | null;
124
+ end_offset: number | null;
125
+ metadata_json: string;
126
+ }
127
+
128
+ function parseJsonObject(value: string | null | undefined): Record<string, unknown> {
129
+ if (!value) return {};
130
+ try {
131
+ const parsed = JSON.parse(value);
132
+ return parsed && typeof parsed === 'object' && !Array.isArray(parsed) ? parsed as Record<string, unknown> : {};
133
+ } catch {
134
+ return {};
135
+ }
136
+ }
137
+
138
+ function metadataString(metadata: Record<string, unknown>, keys: string[]): string | null {
139
+ for (const key of keys) {
140
+ const value = metadata[key];
141
+ if (typeof value === 'string' && value.length > 0) return value;
142
+ }
143
+ return null;
144
+ }
145
+
146
+ function metadataNumber(metadata: Record<string, unknown>, keys: string[]): number | null {
147
+ for (const key of keys) {
148
+ const value = metadata[key];
149
+ if (typeof value === 'number' && Number.isFinite(value)) return value;
150
+ }
151
+ return null;
152
+ }
153
+
154
+ function assertPurposeAllowed(permissions: Record<string, unknown>, purpose: string): void {
155
+ const mode = permissions.mode;
156
+ if (typeof mode === 'string' && mode !== 'read_only') {
157
+ throw new Error(`Source resolver denied ${purpose}. Permission mode is ${mode}, expected read_only.`);
158
+ }
159
+
160
+ const denied = permissions.denied_purposes;
161
+ if (Array.isArray(denied) && denied.includes(purpose)) {
162
+ throw new Error(`Source resolver denied ${purpose}. Purpose is explicitly denied.`);
163
+ }
164
+
165
+ const allowed = permissions.allowed_purposes;
166
+ if (Array.isArray(allowed) && allowed.length > 0 && !allowed.includes(purpose)) {
167
+ throw new Error(`Source resolver denied ${purpose}. Allowed purposes: ${allowed.join(', ')}`);
168
+ }
169
+ }
170
+
171
+ function sourceRevisionRef(sourceUri: string, revision: DbRevisionRow | null, fallback: string): string {
172
+ if (!revision) return fallback;
173
+ try {
174
+ const parsed = parseSourceRef(sourceUri);
175
+ if (parsed.kind === 'open-files' && parsed.entity === 'file') {
176
+ return `${sourceUri}/revision/${encodeURIComponent(revision.revision)}`;
177
+ }
178
+ } catch {
179
+ return fallback;
180
+ }
181
+ return fallback;
182
+ }
183
+
184
+ function selectSource(db: Database, sourceUri: string, requestedRef: string): DbSourceRow | null {
185
+ return db.query<DbSourceRow, [string, string, string]>(
186
+ `SELECT id, uri, kind, title, metadata_json, acl_json, updated_at
187
+ FROM sources
188
+ WHERE uri = ? OR uri = ?
189
+ ORDER BY CASE WHEN uri = ? THEN 0 ELSE 1 END
190
+ LIMIT 1`,
191
+ ).get(sourceUri, requestedRef, sourceUri) ?? null;
192
+ }
193
+
194
+ function selectRevision(db: Database, sourceId: string, revisionId: string | null): DbRevisionRow | null {
195
+ if (revisionId) {
196
+ return db.query<DbRevisionRow, [string, string]>(
197
+ `SELECT id, revision, hash, extracted_text_uri, metadata_json, created_at
198
+ FROM source_revisions
199
+ WHERE source_id = ? AND revision = ?
200
+ LIMIT 1`,
201
+ ).get(sourceId, revisionId) ?? null;
202
+ }
203
+ return db.query<DbRevisionRow, [string]>(
204
+ `SELECT id, revision, hash, extracted_text_uri, metadata_json, created_at
205
+ FROM source_revisions
206
+ WHERE source_id = ?
207
+ ORDER BY created_at DESC, revision DESC
208
+ LIMIT 1`,
209
+ ).get(sourceId) ?? null;
210
+ }
211
+
212
+ function countChunks(db: Database, revisionId: string | null): number {
213
+ if (!revisionId) return 0;
214
+ const row = db.query<{ n: number }, [string]>('SELECT COUNT(*) AS n FROM chunks WHERE source_revision_id = ?').get(revisionId);
215
+ return row?.n ?? 0;
216
+ }
217
+
218
+ function selectChunks(db: Database, revisionId: string | null, limit: number): DbChunkRow[] {
219
+ if (!revisionId || limit <= 0) return [];
220
+ return db.query<DbChunkRow, [string, number]>(
221
+ `SELECT id, kind, ordinal, text, token_count, start_offset, end_offset, metadata_json
222
+ FROM chunks
223
+ WHERE source_revision_id = ?
224
+ ORDER BY ordinal ASC
225
+ LIMIT ?`,
226
+ ).all(revisionId, limit);
227
+ }
228
+
229
+ export async function resolveOpenFilesSource(options: SourceResolveOptions): Promise<SourceResolveResult> {
230
+ const purpose = options.purpose ?? 'knowledge_answer';
231
+ const limit = Math.max(0, Math.min(options.limit ?? 10, 100));
232
+ const resolvedAt = (options.now ?? new Date()).toISOString();
233
+ const parsed = parseSourceRef(options.sourceRef);
234
+ const sourceUri = catalogSourceUriForRef(options.sourceRef, parsed);
235
+ const requestedRevision = revisionIdForSourceRef(options.sourceRef);
236
+
237
+ if (options.safetyPolicy) {
238
+ if (!options.safetyPolicy.readOnlySourceAccess) throw new Error('Safety policy denied source resolution.');
239
+ assertWriteAllowed(options.dbPath, options.safetyPolicy);
240
+ }
241
+
242
+ migrateKnowledgeDb(options.dbPath);
243
+ const db = openKnowledgeDb(options.dbPath);
244
+ try {
245
+ return db.transaction((): SourceResolveResult => {
246
+ const source = selectSource(db, sourceUri, options.sourceRef);
247
+ if (!source) {
248
+ recordAuditEvent(db, {
249
+ event_type: 'source_read',
250
+ action: 'open_files_resolve_missing',
251
+ target_uri: options.sourceRef,
252
+ decision: 'allow',
253
+ metadata: { purpose, read_only: true, source_uri: sourceUri },
254
+ created_at: resolvedAt,
255
+ });
256
+ return {
257
+ source_ref: options.sourceRef,
258
+ source_uri: sourceUri,
259
+ purpose,
260
+ read_only: true,
261
+ resolved: false,
262
+ resolver: {
263
+ name: 'open-files-read-only',
264
+ mode: 'local_catalog',
265
+ contract: 'open-files-knowledge-source-v1',
266
+ },
267
+ source: null,
268
+ revision: null,
269
+ content: {
270
+ mime: null,
271
+ size: null,
272
+ hash: null,
273
+ text_available: false,
274
+ chunks_total: 0,
275
+ chunks_returned: 0,
276
+ char_count_returned: 0,
277
+ extracted_text_ref: null,
278
+ bytes_available: false,
279
+ bytes_exposed: false,
280
+ },
281
+ chunks: [],
282
+ citations: [],
283
+ } satisfies SourceResolveResult;
284
+ }
285
+
286
+ const sourceMetadata = parseJsonObject(source.metadata_json);
287
+ const permissions = parseJsonObject(source.acl_json);
288
+ try {
289
+ assertPurposeAllowed(permissions, purpose);
290
+ } catch (error) {
291
+ recordAuditEvent(db, {
292
+ event_type: 'source_read',
293
+ action: 'open_files_resolve',
294
+ target_uri: options.sourceRef,
295
+ decision: 'deny',
296
+ metadata: {
297
+ purpose,
298
+ read_only: true,
299
+ source_uri: source.uri,
300
+ error: error instanceof Error ? error.message : String(error),
301
+ },
302
+ created_at: resolvedAt,
303
+ });
304
+ throw error;
305
+ }
306
+
307
+ const revision = selectRevision(db, source.id, requestedRevision);
308
+ const revisionMetadata = parseJsonObject(revision?.metadata_json);
309
+ const totalChunks = countChunks(db, revision?.id ?? null);
310
+ const rows = selectChunks(db, revision?.id ?? null, limit);
311
+ const effectiveSourceRef = sourceRevisionRef(source.uri, revision, options.sourceRef);
312
+ const chunks = rows.map((row) => {
313
+ const metadata = parseJsonObject(row.metadata_json);
314
+ const evidence: SourceResolverEvidence = {
315
+ resolver: 'open-files-read-only',
316
+ mode: 'local_catalog',
317
+ purpose,
318
+ read_only: true,
319
+ source_ref: metadataString(metadata, ['source_ref']) ?? effectiveSourceRef,
320
+ source_uri: source.uri,
321
+ source_revision_id: revision?.id ?? null,
322
+ revision: revision?.revision ?? null,
323
+ hash: revision?.hash ?? metadataString(metadata, ['hash']),
324
+ chunk_id: row.id,
325
+ start_offset: row.start_offset,
326
+ end_offset: row.end_offset,
327
+ resolved_at: resolvedAt,
328
+ };
329
+ return {
330
+ id: row.id,
331
+ kind: row.kind,
332
+ ordinal: row.ordinal,
333
+ text: row.text,
334
+ token_count: row.token_count,
335
+ start_offset: row.start_offset,
336
+ end_offset: row.end_offset,
337
+ metadata,
338
+ evidence,
339
+ };
340
+ });
341
+
342
+ const citations = chunks.map((chunk) => ({
343
+ source_ref: chunk.evidence.source_ref,
344
+ source_uri: source.uri,
345
+ chunk_id: chunk.id,
346
+ quote: chunk.text.slice(0, 500),
347
+ start_offset: chunk.start_offset,
348
+ end_offset: chunk.end_offset,
349
+ evidence: chunk.evidence,
350
+ }));
351
+
352
+ recordAuditEvent(db, {
353
+ event_type: 'source_read',
354
+ action: 'open_files_resolve',
355
+ target_uri: options.sourceRef,
356
+ decision: 'allow',
357
+ metadata: {
358
+ purpose,
359
+ read_only: true,
360
+ source_uri: source.uri,
361
+ revision: revision?.revision ?? null,
362
+ chunks_returned: chunks.length,
363
+ chunks_total: totalChunks,
364
+ },
365
+ created_at: resolvedAt,
366
+ });
367
+
368
+ const mime = metadataString(sourceMetadata, ['mime', 'content_type']) ?? metadataString(revisionMetadata, ['mime', 'content_type']);
369
+ const size = metadataNumber(sourceMetadata, ['size', 'size_bytes']) ?? metadataNumber(revisionMetadata, ['size', 'size_bytes']);
370
+ return {
371
+ source_ref: effectiveSourceRef,
372
+ source_uri: source.uri,
373
+ purpose,
374
+ read_only: true,
375
+ resolved: true,
376
+ resolver: {
377
+ name: 'open-files-read-only',
378
+ mode: 'local_catalog',
379
+ contract: 'open-files-knowledge-source-v1',
380
+ },
381
+ source: {
382
+ id: source.id,
383
+ uri: source.uri,
384
+ kind: source.kind,
385
+ title: source.title,
386
+ metadata: sourceMetadata,
387
+ permissions,
388
+ updated_at: source.updated_at,
389
+ },
390
+ revision: revision ? {
391
+ id: revision.id,
392
+ revision: revision.revision,
393
+ hash: revision.hash,
394
+ extracted_text_uri: revision.extracted_text_uri,
395
+ metadata: revisionMetadata,
396
+ created_at: revision.created_at,
397
+ reindex_required: revisionMetadata.reindex_required === true,
398
+ } : null,
399
+ content: {
400
+ mime,
401
+ size,
402
+ hash: revision?.hash ?? metadataString(sourceMetadata, ['hash', 'checksum', 'sha256']),
403
+ text_available: totalChunks > 0,
404
+ chunks_total: totalChunks,
405
+ chunks_returned: chunks.length,
406
+ char_count_returned: chunks.reduce((sum, chunk) => sum + chunk.text.length, 0),
407
+ extracted_text_ref: revision?.extracted_text_uri ?? metadataString(revisionMetadata, ['extracted_text_ref', 'extracted_text_uri']),
408
+ bytes_available: false,
409
+ bytes_exposed: false,
410
+ },
411
+ chunks,
412
+ citations,
413
+ };
414
+ })();
415
+ } finally {
416
+ db.close();
417
+ }
418
+ }