@hasna/knowledge 0.2.6 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/safety.ts ADDED
@@ -0,0 +1,265 @@
1
+ import { createHash, randomUUID } from 'node:crypto';
2
+ import { relative, resolve, sep } from 'node:path';
3
+ import type { Database } from 'bun:sqlite';
4
+ import type { KnowledgeConfig, KnowledgeWorkspace } from './workspace';
5
+
6
+ export type SafetyDecision = 'allow' | 'deny' | 'requires_approval';
7
+
8
+ export interface SafetyPolicy {
9
+ mode: 'local' | 'hosted';
10
+ allowWriteRoots: string[];
11
+ readOnlySourceAccess: boolean;
12
+ network: {
13
+ webSearchEnabled: boolean;
14
+ s3ReadsEnabled: boolean;
15
+ allowedS3Buckets: string[];
16
+ };
17
+ redaction: {
18
+ enabled: boolean;
19
+ };
20
+ approvals: {
21
+ generatedWritesRequireApproval: boolean;
22
+ };
23
+ }
24
+
25
+ export interface SafetyAuditInput {
26
+ event_type: string;
27
+ action: string;
28
+ target_uri?: string | null;
29
+ decision: SafetyDecision | 'redacted' | 'info';
30
+ metadata?: Record<string, unknown>;
31
+ created_at?: string;
32
+ }
33
+
34
+ export interface RedactionFinding {
35
+ type: string;
36
+ severity: 'low' | 'medium' | 'high';
37
+ start: number;
38
+ end: number;
39
+ }
40
+
41
+ export interface RedactionResult {
42
+ text: string;
43
+ findings: RedactionFinding[];
44
+ }
45
+
46
+ type ConfigWithSafety = KnowledgeConfig & {
47
+ safety?: {
48
+ network?: {
49
+ web_search_enabled?: boolean;
50
+ s3_reads_enabled?: boolean;
51
+ allowed_s3_buckets?: string[];
52
+ };
53
+ redaction?: {
54
+ enabled?: boolean;
55
+ };
56
+ approvals?: {
57
+ generated_writes_require_approval?: boolean;
58
+ };
59
+ };
60
+ };
61
+
62
+ function envEnabled(name: string): boolean {
63
+ const value = process.env[name];
64
+ return value === '1' || value === 'true' || value === 'yes';
65
+ }
66
+
67
+ export function resolveSafetyPolicy(config: KnowledgeConfig, workspace: KnowledgeWorkspace): SafetyPolicy {
68
+ const extended = config as ConfigWithSafety;
69
+ const configuredBuckets = new Set<string>(extended.safety?.network?.allowed_s3_buckets ?? []);
70
+ if (config.storage.type === 's3' && config.storage.s3?.bucket) configuredBuckets.add(config.storage.s3.bucket);
71
+ if (process.env.HASNA_KNOWLEDGE_ALLOWED_S3_BUCKETS) {
72
+ for (const bucket of process.env.HASNA_KNOWLEDGE_ALLOWED_S3_BUCKETS.split(',').map((entry) => entry.trim()).filter(Boolean)) {
73
+ configuredBuckets.add(bucket);
74
+ }
75
+ }
76
+ return {
77
+ mode: config.mode,
78
+ allowWriteRoots: [
79
+ workspace.home,
80
+ workspace.artifactsDir,
81
+ workspace.cacheDir,
82
+ workspace.exportsDir,
83
+ workspace.indexesDir,
84
+ workspace.logsDir,
85
+ workspace.runsDir,
86
+ workspace.schemasDir,
87
+ workspace.wikiDir,
88
+ ].map((entry) => resolve(entry)),
89
+ readOnlySourceAccess: true,
90
+ network: {
91
+ webSearchEnabled: extended.safety?.network?.web_search_enabled ?? envEnabled('HASNA_KNOWLEDGE_WEB_SEARCH'),
92
+ s3ReadsEnabled: extended.safety?.network?.s3_reads_enabled ?? envEnabled('HASNA_KNOWLEDGE_ALLOW_S3_READS'),
93
+ allowedS3Buckets: [...configuredBuckets].sort(),
94
+ },
95
+ redaction: {
96
+ enabled: extended.safety?.redaction?.enabled ?? true,
97
+ },
98
+ approvals: {
99
+ generatedWritesRequireApproval: extended.safety?.approvals?.generated_writes_require_approval ?? true,
100
+ },
101
+ };
102
+ }
103
+
104
+ function isInside(root: string, target: string): boolean {
105
+ const rel = relative(root, target);
106
+ return rel === '' || (!rel.startsWith('..') && rel !== '..' && !rel.startsWith(`..${sep}`));
107
+ }
108
+
109
+ export function assertWriteAllowed(targetPath: string, policy: SafetyPolicy): void {
110
+ const resolved = resolve(targetPath);
111
+ if (!policy.allowWriteRoots.some((root) => isInside(root, resolved))) {
112
+ throw new Error(`Safety policy denied write outside .hasna/apps/knowledge: ${targetPath}`);
113
+ }
114
+ }
115
+
116
+ export function assertS3ReadAllowed(uri: string, policy: SafetyPolicy): void {
117
+ const parsed = new URL(uri);
118
+ const bucket = parsed.hostname;
119
+ if (!policy.network.s3ReadsEnabled) {
120
+ throw new Error('Safety policy denied S3 read. Set safety.network.s3_reads_enabled=true or HASNA_KNOWLEDGE_ALLOW_S3_READS=1.');
121
+ }
122
+ if (!policy.network.allowedS3Buckets.includes(bucket)) {
123
+ throw new Error(`Safety policy denied S3 bucket "${bucket}". Add it to safety.network.allowed_s3_buckets or HASNA_KNOWLEDGE_ALLOWED_S3_BUCKETS.`);
124
+ }
125
+ }
126
+
127
+ export function assertWebSearchAllowed(policy: SafetyPolicy): void {
128
+ if (!policy.network.webSearchEnabled) {
129
+ throw new Error('Safety policy denied web search. Set safety.network.web_search_enabled=true or HASNA_KNOWLEDGE_WEB_SEARCH=1.');
130
+ }
131
+ }
132
+
133
+ const REDACTION_PATTERNS: Array<{ type: string; severity: RedactionFinding['severity']; regex: RegExp; replacement: string }> = [
134
+ { type: 'private_key_block', severity: 'high', regex: /-----BEGIN [A-Z ]*PRIVATE KEY-----[\s\S]*?-----END [A-Z ]*PRIVATE KEY-----/g, replacement: '[REDACTED:private_key_block]' },
135
+ { type: 'secret_assignment', severity: 'high', regex: /\b(?:api[_-]?key|secret|token|password)\s*[:=]\s*['"]?[^'"\s]{8,}/gi, replacement: '[REDACTED:secret_assignment]' },
136
+ { type: 'openai_api_key', severity: 'high', regex: /\bsk-[A-Za-z0-9_-]{20,}\b/g, replacement: '[REDACTED:openai_api_key]' },
137
+ { type: 'anthropic_api_key', severity: 'high', regex: /\bsk-ant-[A-Za-z0-9_-]{20,}\b/g, replacement: '[REDACTED:anthropic_api_key]' },
138
+ { type: 'aws_access_key_id', severity: 'high', regex: /\bA(?:KIA|SIA)[A-Z0-9]{16}\b/g, replacement: '[REDACTED:aws_access_key_id]' },
139
+ ];
140
+
141
+ export function redactSecrets(text: string, policy?: Pick<SafetyPolicy, 'redaction'>): RedactionResult {
142
+ if (policy && !policy.redaction.enabled) return { text, findings: [] };
143
+ let output = text;
144
+ const findings: RedactionFinding[] = [];
145
+ for (const pattern of REDACTION_PATTERNS) {
146
+ output = output.replace(pattern.regex, (match, ...args) => {
147
+ const offset = typeof args.at(-2) === 'number' ? args.at(-2) as number : output.indexOf(match);
148
+ findings.push({
149
+ type: pattern.type,
150
+ severity: pattern.severity,
151
+ start: Math.max(0, offset),
152
+ end: Math.max(0, offset + match.length),
153
+ });
154
+ return pattern.replacement;
155
+ });
156
+ }
157
+ return { text: output, findings };
158
+ }
159
+
160
+ export function auditId(input: SafetyAuditInput): string {
161
+ return `audit_${createHash('sha256')
162
+ .update(`${input.event_type}\u0000${input.action}\u0000${input.target_uri ?? ''}\u0000${input.created_at ?? ''}\u0000${JSON.stringify(input.metadata ?? {})}\u0000${randomUUID()}`)
163
+ .digest('hex')
164
+ .slice(0, 24)}`;
165
+ }
166
+
167
+ export function recordAuditEvent(db: Database, input: SafetyAuditInput): string {
168
+ const createdAt = input.created_at ?? new Date().toISOString();
169
+ const id = auditId({ ...input, created_at: createdAt });
170
+ db.run(
171
+ `INSERT INTO audit_events (id, event_type, action, target_uri, decision, metadata_json, created_at)
172
+ VALUES (?, ?, ?, ?, ?, ?, ?)`,
173
+ [
174
+ id,
175
+ input.event_type,
176
+ input.action,
177
+ input.target_uri ?? null,
178
+ input.decision,
179
+ JSON.stringify(input.metadata ?? {}),
180
+ createdAt,
181
+ ],
182
+ );
183
+ return id;
184
+ }
185
+
186
+ export function recordRedactionFindings(db: Database, input: {
187
+ source_uri?: string | null;
188
+ run_id?: string | null;
189
+ findings: RedactionFinding[];
190
+ metadata?: Record<string, unknown>;
191
+ created_at?: string;
192
+ }): number {
193
+ const createdAt = input.created_at ?? new Date().toISOString();
194
+ for (const finding of input.findings) {
195
+ db.run(
196
+ `INSERT INTO redaction_findings (id, source_uri, run_id, severity, finding_type, metadata_json, created_at)
197
+ VALUES (?, ?, ?, ?, ?, ?, ?)`,
198
+ [
199
+ `redact_${randomUUID()}`,
200
+ input.source_uri ?? null,
201
+ input.run_id ?? null,
202
+ finding.severity,
203
+ finding.type,
204
+ JSON.stringify({ ...(input.metadata ?? {}), start: finding.start, end: finding.end }),
205
+ createdAt,
206
+ ],
207
+ );
208
+ }
209
+ return input.findings.length;
210
+ }
211
+
212
+ export function createApprovalGate(db: Database, input: {
213
+ action: string;
214
+ target_uri?: string | null;
215
+ reason?: string | null;
216
+ approved_by?: string | null;
217
+ metadata?: Record<string, unknown>;
218
+ created_at?: string;
219
+ }): { id: string; status: 'approved' } {
220
+ const now = input.created_at ?? new Date().toISOString();
221
+ const id = `approval_${randomUUID()}`;
222
+ db.run(
223
+ `INSERT INTO approval_gates (id, action, target_uri, status, reason, approved_by, metadata_json, created_at, updated_at)
224
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
225
+ [
226
+ id,
227
+ input.action,
228
+ input.target_uri ?? null,
229
+ 'approved',
230
+ input.reason ?? null,
231
+ input.approved_by ?? 'local-cli',
232
+ JSON.stringify(input.metadata ?? {}),
233
+ now,
234
+ now,
235
+ ],
236
+ );
237
+ return { id, status: 'approved' };
238
+ }
239
+
240
+ export function hasApproval(db: Database, action: string, targetUri?: string | null): boolean {
241
+ const row = db.query<{ id: string }, [string, string | null, string | null]>(
242
+ `SELECT id FROM approval_gates
243
+ WHERE action = ? AND status = 'approved' AND (target_uri IS NULL OR target_uri = ? OR ? IS NULL)
244
+ ORDER BY updated_at DESC LIMIT 1`,
245
+ ).get(action, targetUri ?? null, targetUri ?? null);
246
+ return Boolean(row);
247
+ }
248
+
249
+ export function approvalStatus(db: Database, policy: SafetyPolicy, action: string, targetUri?: string | null): {
250
+ action: string;
251
+ target_uri: string | null;
252
+ approval_required: boolean;
253
+ approved: boolean;
254
+ decision: SafetyDecision;
255
+ } {
256
+ const approvalRequired = action === 'generated_write' && policy.approvals.generatedWritesRequireApproval;
257
+ const approved = !approvalRequired || hasApproval(db, action, targetUri);
258
+ return {
259
+ action,
260
+ target_uri: targetUri ?? null,
261
+ approval_required: approvalRequired,
262
+ approved,
263
+ decision: approved ? 'allow' : 'requires_approval',
264
+ };
265
+ }
package/src/source-ref.ts CHANGED
@@ -82,6 +82,18 @@ export function parseSourceRef(uri: string): SourceRef {
82
82
  throw new Error(`Unsupported source ref scheme: ${uri}`);
83
83
  }
84
84
 
85
+ export function catalogSourceUriForRef(uri: string, parsed = parseSourceRef(uri)): string {
86
+ if (parsed.kind === 'open-files' && parsed.entity === 'file' && parsed.revision_id) {
87
+ return uri.replace(/\/revision\/[^/]+$/, '');
88
+ }
89
+ return uri;
90
+ }
91
+
92
+ export function revisionIdForSourceRef(uri: string): string | null {
93
+ const parsed = parseSourceRef(uri);
94
+ return parsed.kind === 'open-files' && parsed.entity === 'file' ? parsed.revision_id ?? null : null;
95
+ }
96
+
85
97
  export function isSupportedSourceRef(uri: string): boolean {
86
98
  try {
87
99
  parseSourceRef(uri);
@@ -0,0 +1,418 @@
1
+ import type { Database } from 'bun:sqlite';
2
+ import { migrateKnowledgeDb, openKnowledgeDb } from './knowledge-db';
3
+ import { catalogSourceUriForRef, parseSourceRef, revisionIdForSourceRef } from './source-ref';
4
+ import { assertWriteAllowed, recordAuditEvent, type SafetyPolicy } from './safety';
5
+
6
+ export interface SourceResolveOptions {
7
+ dbPath: string;
8
+ sourceRef: string;
9
+ purpose?: string;
10
+ limit?: number;
11
+ now?: Date;
12
+ safetyPolicy?: SafetyPolicy;
13
+ }
14
+
15
+ export interface SourceResolverEvidence {
16
+ resolver: 'open-files-read-only';
17
+ mode: 'local_catalog';
18
+ purpose: string;
19
+ read_only: true;
20
+ source_ref: string;
21
+ source_uri: string;
22
+ source_revision_id: string | null;
23
+ revision: string | null;
24
+ hash: string | null;
25
+ chunk_id?: string;
26
+ start_offset?: number | null;
27
+ end_offset?: number | null;
28
+ resolved_at: string;
29
+ }
30
+
31
+ export interface ResolvedSourceChunk {
32
+ id: string;
33
+ kind: string;
34
+ ordinal: number;
35
+ text: string;
36
+ token_count: number | null;
37
+ start_offset: number | null;
38
+ end_offset: number | null;
39
+ metadata: Record<string, unknown>;
40
+ evidence: SourceResolverEvidence;
41
+ }
42
+
43
+ export interface ResolvedSourceCitation {
44
+ source_ref: string;
45
+ source_uri: string;
46
+ chunk_id: string;
47
+ quote: string;
48
+ start_offset: number | null;
49
+ end_offset: number | null;
50
+ evidence: SourceResolverEvidence;
51
+ }
52
+
53
+ export interface SourceResolveResult {
54
+ source_ref: string;
55
+ source_uri: string;
56
+ purpose: string;
57
+ read_only: true;
58
+ resolved: boolean;
59
+ resolver: {
60
+ name: 'open-files-read-only';
61
+ mode: 'local_catalog';
62
+ contract: 'open-files-knowledge-source-v1';
63
+ };
64
+ source: {
65
+ id: string;
66
+ uri: string;
67
+ kind: string;
68
+ title: string | null;
69
+ metadata: Record<string, unknown>;
70
+ permissions: Record<string, unknown>;
71
+ updated_at: string;
72
+ } | null;
73
+ revision: {
74
+ id: string;
75
+ revision: string;
76
+ hash: string | null;
77
+ extracted_text_uri: string | null;
78
+ metadata: Record<string, unknown>;
79
+ created_at: string;
80
+ reindex_required: boolean;
81
+ } | null;
82
+ content: {
83
+ mime: string | null;
84
+ size: number | null;
85
+ hash: string | null;
86
+ text_available: boolean;
87
+ chunks_total: number;
88
+ chunks_returned: number;
89
+ char_count_returned: number;
90
+ extracted_text_ref: string | null;
91
+ bytes_available: false;
92
+ bytes_exposed: false;
93
+ };
94
+ chunks: ResolvedSourceChunk[];
95
+ citations: ResolvedSourceCitation[];
96
+ }
97
+
98
+ interface DbSourceRow {
99
+ id: string;
100
+ uri: string;
101
+ kind: string;
102
+ title: string | null;
103
+ metadata_json: string;
104
+ acl_json: string;
105
+ updated_at: string;
106
+ }
107
+
108
+ interface DbRevisionRow {
109
+ id: string;
110
+ revision: string;
111
+ hash: string | null;
112
+ extracted_text_uri: string | null;
113
+ metadata_json: string;
114
+ created_at: string;
115
+ }
116
+
117
+ interface DbChunkRow {
118
+ id: string;
119
+ kind: string;
120
+ ordinal: number;
121
+ text: string;
122
+ token_count: number | null;
123
+ start_offset: number | null;
124
+ end_offset: number | null;
125
+ metadata_json: string;
126
+ }
127
+
128
+ function parseJsonObject(value: string | null | undefined): Record<string, unknown> {
129
+ if (!value) return {};
130
+ try {
131
+ const parsed = JSON.parse(value);
132
+ return parsed && typeof parsed === 'object' && !Array.isArray(parsed) ? parsed as Record<string, unknown> : {};
133
+ } catch {
134
+ return {};
135
+ }
136
+ }
137
+
138
+ function metadataString(metadata: Record<string, unknown>, keys: string[]): string | null {
139
+ for (const key of keys) {
140
+ const value = metadata[key];
141
+ if (typeof value === 'string' && value.length > 0) return value;
142
+ }
143
+ return null;
144
+ }
145
+
146
+ function metadataNumber(metadata: Record<string, unknown>, keys: string[]): number | null {
147
+ for (const key of keys) {
148
+ const value = metadata[key];
149
+ if (typeof value === 'number' && Number.isFinite(value)) return value;
150
+ }
151
+ return null;
152
+ }
153
+
154
+ function assertPurposeAllowed(permissions: Record<string, unknown>, purpose: string): void {
155
+ const mode = permissions.mode;
156
+ if (typeof mode === 'string' && mode !== 'read_only') {
157
+ throw new Error(`Source resolver denied ${purpose}. Permission mode is ${mode}, expected read_only.`);
158
+ }
159
+
160
+ const denied = permissions.denied_purposes;
161
+ if (Array.isArray(denied) && denied.includes(purpose)) {
162
+ throw new Error(`Source resolver denied ${purpose}. Purpose is explicitly denied.`);
163
+ }
164
+
165
+ const allowed = permissions.allowed_purposes;
166
+ if (Array.isArray(allowed) && allowed.length > 0 && !allowed.includes(purpose)) {
167
+ throw new Error(`Source resolver denied ${purpose}. Allowed purposes: ${allowed.join(', ')}`);
168
+ }
169
+ }
170
+
171
+ function sourceRevisionRef(sourceUri: string, revision: DbRevisionRow | null, fallback: string): string {
172
+ if (!revision) return fallback;
173
+ try {
174
+ const parsed = parseSourceRef(sourceUri);
175
+ if (parsed.kind === 'open-files' && parsed.entity === 'file') {
176
+ return `${sourceUri}/revision/${encodeURIComponent(revision.revision)}`;
177
+ }
178
+ } catch {
179
+ return fallback;
180
+ }
181
+ return fallback;
182
+ }
183
+
184
+ function selectSource(db: Database, sourceUri: string, requestedRef: string): DbSourceRow | null {
185
+ return db.query<DbSourceRow, [string, string, string]>(
186
+ `SELECT id, uri, kind, title, metadata_json, acl_json, updated_at
187
+ FROM sources
188
+ WHERE uri = ? OR uri = ?
189
+ ORDER BY CASE WHEN uri = ? THEN 0 ELSE 1 END
190
+ LIMIT 1`,
191
+ ).get(sourceUri, requestedRef, sourceUri) ?? null;
192
+ }
193
+
194
+ function selectRevision(db: Database, sourceId: string, revisionId: string | null): DbRevisionRow | null {
195
+ if (revisionId) {
196
+ return db.query<DbRevisionRow, [string, string]>(
197
+ `SELECT id, revision, hash, extracted_text_uri, metadata_json, created_at
198
+ FROM source_revisions
199
+ WHERE source_id = ? AND revision = ?
200
+ LIMIT 1`,
201
+ ).get(sourceId, revisionId) ?? null;
202
+ }
203
+ return db.query<DbRevisionRow, [string]>(
204
+ `SELECT id, revision, hash, extracted_text_uri, metadata_json, created_at
205
+ FROM source_revisions
206
+ WHERE source_id = ?
207
+ ORDER BY created_at DESC, revision DESC
208
+ LIMIT 1`,
209
+ ).get(sourceId) ?? null;
210
+ }
211
+
212
+ function countChunks(db: Database, revisionId: string | null): number {
213
+ if (!revisionId) return 0;
214
+ const row = db.query<{ n: number }, [string]>('SELECT COUNT(*) AS n FROM chunks WHERE source_revision_id = ?').get(revisionId);
215
+ return row?.n ?? 0;
216
+ }
217
+
218
+ function selectChunks(db: Database, revisionId: string | null, limit: number): DbChunkRow[] {
219
+ if (!revisionId || limit <= 0) return [];
220
+ return db.query<DbChunkRow, [string, number]>(
221
+ `SELECT id, kind, ordinal, text, token_count, start_offset, end_offset, metadata_json
222
+ FROM chunks
223
+ WHERE source_revision_id = ?
224
+ ORDER BY ordinal ASC
225
+ LIMIT ?`,
226
+ ).all(revisionId, limit);
227
+ }
228
+
229
+ export async function resolveOpenFilesSource(options: SourceResolveOptions): Promise<SourceResolveResult> {
230
+ const purpose = options.purpose ?? 'knowledge_answer';
231
+ const limit = Math.max(0, Math.min(options.limit ?? 10, 100));
232
+ const resolvedAt = (options.now ?? new Date()).toISOString();
233
+ const parsed = parseSourceRef(options.sourceRef);
234
+ const sourceUri = catalogSourceUriForRef(options.sourceRef, parsed);
235
+ const requestedRevision = revisionIdForSourceRef(options.sourceRef);
236
+
237
+ if (options.safetyPolicy) {
238
+ if (!options.safetyPolicy.readOnlySourceAccess) throw new Error('Safety policy denied source resolution.');
239
+ assertWriteAllowed(options.dbPath, options.safetyPolicy);
240
+ }
241
+
242
+ migrateKnowledgeDb(options.dbPath);
243
+ const db = openKnowledgeDb(options.dbPath);
244
+ try {
245
+ return db.transaction((): SourceResolveResult => {
246
+ const source = selectSource(db, sourceUri, options.sourceRef);
247
+ if (!source) {
248
+ recordAuditEvent(db, {
249
+ event_type: 'source_read',
250
+ action: 'open_files_resolve_missing',
251
+ target_uri: options.sourceRef,
252
+ decision: 'allow',
253
+ metadata: { purpose, read_only: true, source_uri: sourceUri },
254
+ created_at: resolvedAt,
255
+ });
256
+ return {
257
+ source_ref: options.sourceRef,
258
+ source_uri: sourceUri,
259
+ purpose,
260
+ read_only: true,
261
+ resolved: false,
262
+ resolver: {
263
+ name: 'open-files-read-only',
264
+ mode: 'local_catalog',
265
+ contract: 'open-files-knowledge-source-v1',
266
+ },
267
+ source: null,
268
+ revision: null,
269
+ content: {
270
+ mime: null,
271
+ size: null,
272
+ hash: null,
273
+ text_available: false,
274
+ chunks_total: 0,
275
+ chunks_returned: 0,
276
+ char_count_returned: 0,
277
+ extracted_text_ref: null,
278
+ bytes_available: false,
279
+ bytes_exposed: false,
280
+ },
281
+ chunks: [],
282
+ citations: [],
283
+ } satisfies SourceResolveResult;
284
+ }
285
+
286
+ const sourceMetadata = parseJsonObject(source.metadata_json);
287
+ const permissions = parseJsonObject(source.acl_json);
288
+ try {
289
+ assertPurposeAllowed(permissions, purpose);
290
+ } catch (error) {
291
+ recordAuditEvent(db, {
292
+ event_type: 'source_read',
293
+ action: 'open_files_resolve',
294
+ target_uri: options.sourceRef,
295
+ decision: 'deny',
296
+ metadata: {
297
+ purpose,
298
+ read_only: true,
299
+ source_uri: source.uri,
300
+ error: error instanceof Error ? error.message : String(error),
301
+ },
302
+ created_at: resolvedAt,
303
+ });
304
+ throw error;
305
+ }
306
+
307
+ const revision = selectRevision(db, source.id, requestedRevision);
308
+ const revisionMetadata = parseJsonObject(revision?.metadata_json);
309
+ const totalChunks = countChunks(db, revision?.id ?? null);
310
+ const rows = selectChunks(db, revision?.id ?? null, limit);
311
+ const effectiveSourceRef = sourceRevisionRef(source.uri, revision, options.sourceRef);
312
+ const chunks = rows.map((row) => {
313
+ const metadata = parseJsonObject(row.metadata_json);
314
+ const evidence: SourceResolverEvidence = {
315
+ resolver: 'open-files-read-only',
316
+ mode: 'local_catalog',
317
+ purpose,
318
+ read_only: true,
319
+ source_ref: metadataString(metadata, ['source_ref']) ?? effectiveSourceRef,
320
+ source_uri: source.uri,
321
+ source_revision_id: revision?.id ?? null,
322
+ revision: revision?.revision ?? null,
323
+ hash: revision?.hash ?? metadataString(metadata, ['hash']),
324
+ chunk_id: row.id,
325
+ start_offset: row.start_offset,
326
+ end_offset: row.end_offset,
327
+ resolved_at: resolvedAt,
328
+ };
329
+ return {
330
+ id: row.id,
331
+ kind: row.kind,
332
+ ordinal: row.ordinal,
333
+ text: row.text,
334
+ token_count: row.token_count,
335
+ start_offset: row.start_offset,
336
+ end_offset: row.end_offset,
337
+ metadata,
338
+ evidence,
339
+ };
340
+ });
341
+
342
+ const citations = chunks.map((chunk) => ({
343
+ source_ref: chunk.evidence.source_ref,
344
+ source_uri: source.uri,
345
+ chunk_id: chunk.id,
346
+ quote: chunk.text.slice(0, 500),
347
+ start_offset: chunk.start_offset,
348
+ end_offset: chunk.end_offset,
349
+ evidence: chunk.evidence,
350
+ }));
351
+
352
+ recordAuditEvent(db, {
353
+ event_type: 'source_read',
354
+ action: 'open_files_resolve',
355
+ target_uri: options.sourceRef,
356
+ decision: 'allow',
357
+ metadata: {
358
+ purpose,
359
+ read_only: true,
360
+ source_uri: source.uri,
361
+ revision: revision?.revision ?? null,
362
+ chunks_returned: chunks.length,
363
+ chunks_total: totalChunks,
364
+ },
365
+ created_at: resolvedAt,
366
+ });
367
+
368
+ const mime = metadataString(sourceMetadata, ['mime', 'content_type']) ?? metadataString(revisionMetadata, ['mime', 'content_type']);
369
+ const size = metadataNumber(sourceMetadata, ['size', 'size_bytes']) ?? metadataNumber(revisionMetadata, ['size', 'size_bytes']);
370
+ return {
371
+ source_ref: effectiveSourceRef,
372
+ source_uri: source.uri,
373
+ purpose,
374
+ read_only: true,
375
+ resolved: true,
376
+ resolver: {
377
+ name: 'open-files-read-only',
378
+ mode: 'local_catalog',
379
+ contract: 'open-files-knowledge-source-v1',
380
+ },
381
+ source: {
382
+ id: source.id,
383
+ uri: source.uri,
384
+ kind: source.kind,
385
+ title: source.title,
386
+ metadata: sourceMetadata,
387
+ permissions,
388
+ updated_at: source.updated_at,
389
+ },
390
+ revision: revision ? {
391
+ id: revision.id,
392
+ revision: revision.revision,
393
+ hash: revision.hash,
394
+ extracted_text_uri: revision.extracted_text_uri,
395
+ metadata: revisionMetadata,
396
+ created_at: revision.created_at,
397
+ reindex_required: revisionMetadata.reindex_required === true,
398
+ } : null,
399
+ content: {
400
+ mime,
401
+ size,
402
+ hash: revision?.hash ?? metadataString(sourceMetadata, ['hash', 'checksum', 'sha256']),
403
+ text_available: totalChunks > 0,
404
+ chunks_total: totalChunks,
405
+ chunks_returned: chunks.length,
406
+ char_count_returned: chunks.reduce((sum, chunk) => sum + chunk.text.length, 0),
407
+ extracted_text_ref: revision?.extracted_text_uri ?? metadataString(revisionMetadata, ['extracted_text_ref', 'extracted_text_uri']),
408
+ bytes_available: false,
409
+ bytes_exposed: false,
410
+ },
411
+ chunks,
412
+ citations,
413
+ };
414
+ })();
415
+ } finally {
416
+ db.close();
417
+ }
418
+ }