@hasna/knowledge 0.2.26 → 0.2.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +61 -0
- package/bin/open-knowledge-mcp.js +85 -9
- package/bin/open-knowledge.js +86 -86
- package/dist/agent.d.ts +35 -0
- package/dist/artifact-store.d.ts +63 -0
- package/dist/auth.d.ts +35 -0
- package/dist/embeddings.d.ts +77 -0
- package/dist/index.d.ts +20 -0
- package/dist/index.js +5709 -0
- package/dist/knowledge-db.d.ts +27 -0
- package/dist/manifest-ingest.d.ts +35 -0
- package/dist/outbox-consume.d.ts +25 -0
- package/dist/provenance.d.ts +50 -0
- package/dist/providers.d.ts +89 -0
- package/dist/reindex.d.ts +37 -0
- package/dist/remote-client.d.ts +108 -0
- package/dist/retrieval.d.ts +71 -0
- package/dist/safety.d.ts +70 -0
- package/dist/sdk.d.ts +72 -0
- package/dist/search.d.ts +65 -0
- package/dist/service.d.ts +117 -0
- package/dist/source-ingest.d.ts +18 -0
- package/dist/source-ref.d.ts +30 -0
- package/dist/source-resolver.d.ts +92 -0
- package/dist/storage-contract.d.ts +106 -0
- package/dist/web-search.d.ts +40 -0
- package/dist/wiki-compiler.d.ts +67 -0
- package/dist/wiki-layout.d.ts +23 -0
- package/dist/workspace.d.ts +111 -0
- package/docs/architecture/ai-native-knowledge-base.md +24 -0
- package/docs/architecture/hosted-wrapper-responsibilities.md +8 -0
- package/docs/canonical-secrets-bootstrap-2026-06-08.md +127 -0
- package/package.json +15 -7
- package/src/agent.ts +0 -367
- package/src/artifact-store.ts +0 -184
- package/src/auth.ts +0 -123
- package/src/cli.ts +0 -1181
- package/src/embeddings.ts +0 -516
- package/src/knowledge-db.ts +0 -354
- package/src/manifest-ingest.ts +0 -515
- package/src/mcp-http.js +0 -110
- package/src/mcp.js +0 -1503
- package/src/outbox-consume.ts +0 -463
- package/src/provenance.ts +0 -93
- package/src/providers.ts +0 -308
- package/src/reindex.ts +0 -260
- package/src/remote-client.ts +0 -268
- package/src/retrieval.ts +0 -326
- package/src/safety.ts +0 -265
- package/src/schema.js +0 -25
- package/src/search.ts +0 -510
- package/src/service.ts +0 -432
- package/src/source-ingest.ts +0 -268
- package/src/source-ref.ts +0 -104
- package/src/source-resolver.ts +0 -436
- package/src/storage-contract.ts +0 -293
- package/src/store.ts +0 -113
- package/src/web-search.ts +0 -330
- package/src/wiki-compiler.ts +0 -711
- package/src/wiki-layout.ts +0 -251
- package/src/workspace.ts +0 -213
package/src/source-ref.ts
DELETED
|
@@ -1,104 +0,0 @@
|
|
|
1
|
-
export type SourceRefKind = 'open-files' | 's3' | 'file' | 'web';
|
|
2
|
-
|
|
3
|
-
export interface BaseSourceRef {
|
|
4
|
-
kind: SourceRefKind;
|
|
5
|
-
uri: string;
|
|
6
|
-
}
|
|
7
|
-
|
|
8
|
-
export interface OpenFilesSourceRef extends BaseSourceRef {
|
|
9
|
-
kind: 'open-files';
|
|
10
|
-
entity: 'file' | 'source';
|
|
11
|
-
id: string;
|
|
12
|
-
revision_id?: string;
|
|
13
|
-
path?: string;
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
export interface S3SourceRef extends BaseSourceRef {
|
|
17
|
-
kind: 's3';
|
|
18
|
-
bucket: string;
|
|
19
|
-
key: string;
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
export interface FileSourceRef extends BaseSourceRef {
|
|
23
|
-
kind: 'file';
|
|
24
|
-
path: string;
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
export interface WebSourceRef extends BaseSourceRef {
|
|
28
|
-
kind: 'web';
|
|
29
|
-
url: string;
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
export type SourceRef = OpenFilesSourceRef | S3SourceRef | FileSourceRef | WebSourceRef;
|
|
33
|
-
|
|
34
|
-
function assertNonEmpty(value: string | undefined, message: string): string {
|
|
35
|
-
if (!value) throw new Error(message);
|
|
36
|
-
return value;
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
function parseOpenFilesRef(uri: string): OpenFilesSourceRef {
|
|
40
|
-
const withoutScheme = uri.slice('open-files://'.length);
|
|
41
|
-
const parts = withoutScheme.split('/').filter(Boolean);
|
|
42
|
-
const entity = parts[0];
|
|
43
|
-
if (entity !== 'file' && entity !== 'source') {
|
|
44
|
-
throw new Error("Invalid open-files ref. Expected open-files://file/<id>, open-files://file/<id>/revision/<revision_id>, or open-files://source/<id>/path/<path>.");
|
|
45
|
-
}
|
|
46
|
-
const id = assertNonEmpty(parts[1], 'Invalid open-files ref. Missing id.');
|
|
47
|
-
if (entity === 'file') {
|
|
48
|
-
if (parts.length === 2) return { kind: 'open-files', uri, entity, id };
|
|
49
|
-
if (parts[2] === 'revision' && parts[3] && parts.length === 4) {
|
|
50
|
-
return { kind: 'open-files', uri, entity, id, revision_id: decodeURIComponent(parts[3]) };
|
|
51
|
-
}
|
|
52
|
-
throw new Error('Invalid open-files file ref. Expected open-files://file/<id>/revision/<revision_id>.');
|
|
53
|
-
}
|
|
54
|
-
const pathIndex = parts.indexOf('path');
|
|
55
|
-
const path = pathIndex >= 0 ? decodeURIComponent(parts.slice(pathIndex + 1).join('/')) : undefined;
|
|
56
|
-
return { kind: 'open-files', uri, entity, id, path };
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
function parseS3Ref(uri: string): S3SourceRef {
|
|
60
|
-
const parsed = new URL(uri);
|
|
61
|
-
const bucket = assertNonEmpty(parsed.hostname, 'Invalid s3 ref. Missing bucket.');
|
|
62
|
-
const key = decodeURIComponent(parsed.pathname.replace(/^\/+/, ''));
|
|
63
|
-
if (!key) throw new Error('Invalid s3 ref. Missing object key.');
|
|
64
|
-
return { kind: 's3', uri, bucket, key };
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
function parseFileRef(uri: string): FileSourceRef {
|
|
68
|
-
const parsed = new URL(uri);
|
|
69
|
-
return { kind: 'file', uri, path: decodeURIComponent(parsed.pathname) };
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
function parseWebRef(uri: string): WebSourceRef {
|
|
73
|
-
const parsed = new URL(uri);
|
|
74
|
-
return { kind: 'web', uri, url: parsed.toString() };
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
export function parseSourceRef(uri: string): SourceRef {
|
|
78
|
-
if (uri.startsWith('open-files://')) return parseOpenFilesRef(uri);
|
|
79
|
-
if (uri.startsWith('s3://')) return parseS3Ref(uri);
|
|
80
|
-
if (uri.startsWith('file://')) return parseFileRef(uri);
|
|
81
|
-
if (uri.startsWith('https://') || uri.startsWith('http://')) return parseWebRef(uri);
|
|
82
|
-
throw new Error(`Unsupported source ref scheme: ${uri}`);
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
export function catalogSourceUriForRef(uri: string, parsed = parseSourceRef(uri)): string {
|
|
86
|
-
if (parsed.kind === 'open-files' && parsed.entity === 'file' && parsed.revision_id) {
|
|
87
|
-
return uri.replace(/\/revision\/[^/]+$/, '');
|
|
88
|
-
}
|
|
89
|
-
return uri;
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
export function revisionIdForSourceRef(uri: string): string | null {
|
|
93
|
-
const parsed = parseSourceRef(uri);
|
|
94
|
-
return parsed.kind === 'open-files' && parsed.entity === 'file' ? parsed.revision_id ?? null : null;
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
export function isSupportedSourceRef(uri: string): boolean {
|
|
98
|
-
try {
|
|
99
|
-
parseSourceRef(uri);
|
|
100
|
-
return true;
|
|
101
|
-
} catch {
|
|
102
|
-
return false;
|
|
103
|
-
}
|
|
104
|
-
}
|
package/src/source-resolver.ts
DELETED
|
@@ -1,436 +0,0 @@
|
|
|
1
|
-
import type { Database } from 'bun:sqlite';
|
|
2
|
-
import { migrateKnowledgeDb, openKnowledgeDb } from './knowledge-db';
|
|
3
|
-
import { sourceProvenance, type KnowledgeProvenance } from './provenance';
|
|
4
|
-
import { catalogSourceUriForRef, parseSourceRef, revisionIdForSourceRef } from './source-ref';
|
|
5
|
-
import { assertWriteAllowed, recordAuditEvent, type SafetyPolicy } from './safety';
|
|
6
|
-
|
|
7
|
-
export interface SourceResolveOptions {
|
|
8
|
-
dbPath: string;
|
|
9
|
-
sourceRef: string;
|
|
10
|
-
purpose?: string;
|
|
11
|
-
limit?: number;
|
|
12
|
-
now?: Date;
|
|
13
|
-
safetyPolicy?: SafetyPolicy;
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
export interface SourceResolverEvidence {
|
|
17
|
-
resolver: 'open-files-read-only';
|
|
18
|
-
mode: 'local_catalog';
|
|
19
|
-
purpose: string;
|
|
20
|
-
read_only: true;
|
|
21
|
-
source_ref: string;
|
|
22
|
-
source_uri: string;
|
|
23
|
-
source_revision_id: string | null;
|
|
24
|
-
revision: string | null;
|
|
25
|
-
hash: string | null;
|
|
26
|
-
chunk_id?: string;
|
|
27
|
-
start_offset?: number | null;
|
|
28
|
-
end_offset?: number | null;
|
|
29
|
-
resolved_at: string;
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
export interface ResolvedSourceChunk {
|
|
33
|
-
id: string;
|
|
34
|
-
kind: string;
|
|
35
|
-
ordinal: number;
|
|
36
|
-
text: string;
|
|
37
|
-
token_count: number | null;
|
|
38
|
-
start_offset: number | null;
|
|
39
|
-
end_offset: number | null;
|
|
40
|
-
metadata: Record<string, unknown>;
|
|
41
|
-
evidence: SourceResolverEvidence;
|
|
42
|
-
provenance: KnowledgeProvenance;
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
export interface ResolvedSourceCitation {
|
|
46
|
-
source_ref: string;
|
|
47
|
-
source_uri: string;
|
|
48
|
-
chunk_id: string;
|
|
49
|
-
quote: string;
|
|
50
|
-
start_offset: number | null;
|
|
51
|
-
end_offset: number | null;
|
|
52
|
-
evidence: SourceResolverEvidence;
|
|
53
|
-
provenance: KnowledgeProvenance;
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
export interface SourceResolveResult {
|
|
57
|
-
source_ref: string;
|
|
58
|
-
source_uri: string;
|
|
59
|
-
purpose: string;
|
|
60
|
-
read_only: true;
|
|
61
|
-
resolved: boolean;
|
|
62
|
-
resolver: {
|
|
63
|
-
name: 'open-files-read-only';
|
|
64
|
-
mode: 'local_catalog';
|
|
65
|
-
contract: 'open-files-knowledge-source-v1';
|
|
66
|
-
};
|
|
67
|
-
source: {
|
|
68
|
-
id: string;
|
|
69
|
-
uri: string;
|
|
70
|
-
kind: string;
|
|
71
|
-
title: string | null;
|
|
72
|
-
metadata: Record<string, unknown>;
|
|
73
|
-
permissions: Record<string, unknown>;
|
|
74
|
-
updated_at: string;
|
|
75
|
-
} | null;
|
|
76
|
-
revision: {
|
|
77
|
-
id: string;
|
|
78
|
-
revision: string;
|
|
79
|
-
hash: string | null;
|
|
80
|
-
extracted_text_uri: string | null;
|
|
81
|
-
metadata: Record<string, unknown>;
|
|
82
|
-
created_at: string;
|
|
83
|
-
reindex_required: boolean;
|
|
84
|
-
} | null;
|
|
85
|
-
content: {
|
|
86
|
-
mime: string | null;
|
|
87
|
-
size: number | null;
|
|
88
|
-
hash: string | null;
|
|
89
|
-
text_available: boolean;
|
|
90
|
-
chunks_total: number;
|
|
91
|
-
chunks_returned: number;
|
|
92
|
-
char_count_returned: number;
|
|
93
|
-
extracted_text_ref: string | null;
|
|
94
|
-
bytes_available: false;
|
|
95
|
-
bytes_exposed: false;
|
|
96
|
-
};
|
|
97
|
-
chunks: ResolvedSourceChunk[];
|
|
98
|
-
citations: ResolvedSourceCitation[];
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
interface DbSourceRow {
|
|
102
|
-
id: string;
|
|
103
|
-
uri: string;
|
|
104
|
-
kind: string;
|
|
105
|
-
title: string | null;
|
|
106
|
-
metadata_json: string;
|
|
107
|
-
acl_json: string;
|
|
108
|
-
updated_at: string;
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
interface DbRevisionRow {
|
|
112
|
-
id: string;
|
|
113
|
-
revision: string;
|
|
114
|
-
hash: string | null;
|
|
115
|
-
extracted_text_uri: string | null;
|
|
116
|
-
metadata_json: string;
|
|
117
|
-
created_at: string;
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
interface DbChunkRow {
|
|
121
|
-
id: string;
|
|
122
|
-
kind: string;
|
|
123
|
-
ordinal: number;
|
|
124
|
-
text: string;
|
|
125
|
-
token_count: number | null;
|
|
126
|
-
start_offset: number | null;
|
|
127
|
-
end_offset: number | null;
|
|
128
|
-
metadata_json: string;
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
function parseJsonObject(value: string | null | undefined): Record<string, unknown> {
|
|
132
|
-
if (!value) return {};
|
|
133
|
-
try {
|
|
134
|
-
const parsed = JSON.parse(value);
|
|
135
|
-
return parsed && typeof parsed === 'object' && !Array.isArray(parsed) ? parsed as Record<string, unknown> : {};
|
|
136
|
-
} catch {
|
|
137
|
-
return {};
|
|
138
|
-
}
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
function metadataString(metadata: Record<string, unknown>, keys: string[]): string | null {
|
|
142
|
-
for (const key of keys) {
|
|
143
|
-
const value = metadata[key];
|
|
144
|
-
if (typeof value === 'string' && value.length > 0) return value;
|
|
145
|
-
}
|
|
146
|
-
return null;
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
function metadataNumber(metadata: Record<string, unknown>, keys: string[]): number | null {
|
|
150
|
-
for (const key of keys) {
|
|
151
|
-
const value = metadata[key];
|
|
152
|
-
if (typeof value === 'number' && Number.isFinite(value)) return value;
|
|
153
|
-
}
|
|
154
|
-
return null;
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
function assertPurposeAllowed(permissions: Record<string, unknown>, purpose: string): void {
|
|
158
|
-
const mode = permissions.mode;
|
|
159
|
-
if (typeof mode === 'string' && mode !== 'read_only') {
|
|
160
|
-
throw new Error(`Source resolver denied ${purpose}. Permission mode is ${mode}, expected read_only.`);
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
const denied = permissions.denied_purposes;
|
|
164
|
-
if (Array.isArray(denied) && denied.includes(purpose)) {
|
|
165
|
-
throw new Error(`Source resolver denied ${purpose}. Purpose is explicitly denied.`);
|
|
166
|
-
}
|
|
167
|
-
|
|
168
|
-
const allowed = permissions.allowed_purposes;
|
|
169
|
-
if (Array.isArray(allowed) && allowed.length > 0 && !allowed.includes(purpose)) {
|
|
170
|
-
throw new Error(`Source resolver denied ${purpose}. Allowed purposes: ${allowed.join(', ')}`);
|
|
171
|
-
}
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
function sourceRevisionRef(sourceUri: string, revision: DbRevisionRow | null, fallback: string): string {
|
|
175
|
-
if (!revision) return fallback;
|
|
176
|
-
try {
|
|
177
|
-
const parsed = parseSourceRef(sourceUri);
|
|
178
|
-
if (parsed.kind === 'open-files' && parsed.entity === 'file') {
|
|
179
|
-
return `${sourceUri}/revision/${encodeURIComponent(revision.revision)}`;
|
|
180
|
-
}
|
|
181
|
-
} catch {
|
|
182
|
-
return fallback;
|
|
183
|
-
}
|
|
184
|
-
return fallback;
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
function selectSource(db: Database, sourceUri: string, requestedRef: string): DbSourceRow | null {
|
|
188
|
-
return db.query<DbSourceRow, [string, string, string]>(
|
|
189
|
-
`SELECT id, uri, kind, title, metadata_json, acl_json, updated_at
|
|
190
|
-
FROM sources
|
|
191
|
-
WHERE uri = ? OR uri = ?
|
|
192
|
-
ORDER BY CASE WHEN uri = ? THEN 0 ELSE 1 END
|
|
193
|
-
LIMIT 1`,
|
|
194
|
-
).get(sourceUri, requestedRef, sourceUri) ?? null;
|
|
195
|
-
}
|
|
196
|
-
|
|
197
|
-
function selectRevision(db: Database, sourceId: string, revisionId: string | null): DbRevisionRow | null {
|
|
198
|
-
if (revisionId) {
|
|
199
|
-
return db.query<DbRevisionRow, [string, string]>(
|
|
200
|
-
`SELECT id, revision, hash, extracted_text_uri, metadata_json, created_at
|
|
201
|
-
FROM source_revisions
|
|
202
|
-
WHERE source_id = ? AND revision = ?
|
|
203
|
-
LIMIT 1`,
|
|
204
|
-
).get(sourceId, revisionId) ?? null;
|
|
205
|
-
}
|
|
206
|
-
return db.query<DbRevisionRow, [string]>(
|
|
207
|
-
`SELECT id, revision, hash, extracted_text_uri, metadata_json, created_at
|
|
208
|
-
FROM source_revisions
|
|
209
|
-
WHERE source_id = ?
|
|
210
|
-
ORDER BY created_at DESC, revision DESC
|
|
211
|
-
LIMIT 1`,
|
|
212
|
-
).get(sourceId) ?? null;
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
function countChunks(db: Database, revisionId: string | null): number {
|
|
216
|
-
if (!revisionId) return 0;
|
|
217
|
-
const row = db.query<{ n: number }, [string]>('SELECT COUNT(*) AS n FROM chunks WHERE source_revision_id = ?').get(revisionId);
|
|
218
|
-
return row?.n ?? 0;
|
|
219
|
-
}
|
|
220
|
-
|
|
221
|
-
function selectChunks(db: Database, revisionId: string | null, limit: number): DbChunkRow[] {
|
|
222
|
-
if (!revisionId || limit <= 0) return [];
|
|
223
|
-
return db.query<DbChunkRow, [string, number]>(
|
|
224
|
-
`SELECT id, kind, ordinal, text, token_count, start_offset, end_offset, metadata_json
|
|
225
|
-
FROM chunks
|
|
226
|
-
WHERE source_revision_id = ?
|
|
227
|
-
ORDER BY ordinal ASC
|
|
228
|
-
LIMIT ?`,
|
|
229
|
-
).all(revisionId, limit);
|
|
230
|
-
}
|
|
231
|
-
|
|
232
|
-
export async function resolveOpenFilesSource(options: SourceResolveOptions): Promise<SourceResolveResult> {
|
|
233
|
-
const purpose = options.purpose ?? 'knowledge_answer';
|
|
234
|
-
const limit = Math.max(0, Math.min(options.limit ?? 10, 100));
|
|
235
|
-
const resolvedAt = (options.now ?? new Date()).toISOString();
|
|
236
|
-
const parsed = parseSourceRef(options.sourceRef);
|
|
237
|
-
const sourceUri = catalogSourceUriForRef(options.sourceRef, parsed);
|
|
238
|
-
const requestedRevision = revisionIdForSourceRef(options.sourceRef);
|
|
239
|
-
|
|
240
|
-
if (options.safetyPolicy) {
|
|
241
|
-
if (!options.safetyPolicy.readOnlySourceAccess) throw new Error('Safety policy denied source resolution.');
|
|
242
|
-
assertWriteAllowed(options.dbPath, options.safetyPolicy);
|
|
243
|
-
}
|
|
244
|
-
|
|
245
|
-
migrateKnowledgeDb(options.dbPath);
|
|
246
|
-
const db = openKnowledgeDb(options.dbPath);
|
|
247
|
-
try {
|
|
248
|
-
return db.transaction((): SourceResolveResult => {
|
|
249
|
-
const source = selectSource(db, sourceUri, options.sourceRef);
|
|
250
|
-
if (!source) {
|
|
251
|
-
recordAuditEvent(db, {
|
|
252
|
-
event_type: 'source_read',
|
|
253
|
-
action: 'open_files_resolve_missing',
|
|
254
|
-
target_uri: options.sourceRef,
|
|
255
|
-
decision: 'allow',
|
|
256
|
-
metadata: { purpose, read_only: true, source_uri: sourceUri },
|
|
257
|
-
created_at: resolvedAt,
|
|
258
|
-
});
|
|
259
|
-
return {
|
|
260
|
-
source_ref: options.sourceRef,
|
|
261
|
-
source_uri: sourceUri,
|
|
262
|
-
purpose,
|
|
263
|
-
read_only: true,
|
|
264
|
-
resolved: false,
|
|
265
|
-
resolver: {
|
|
266
|
-
name: 'open-files-read-only',
|
|
267
|
-
mode: 'local_catalog',
|
|
268
|
-
contract: 'open-files-knowledge-source-v1',
|
|
269
|
-
},
|
|
270
|
-
source: null,
|
|
271
|
-
revision: null,
|
|
272
|
-
content: {
|
|
273
|
-
mime: null,
|
|
274
|
-
size: null,
|
|
275
|
-
hash: null,
|
|
276
|
-
text_available: false,
|
|
277
|
-
chunks_total: 0,
|
|
278
|
-
chunks_returned: 0,
|
|
279
|
-
char_count_returned: 0,
|
|
280
|
-
extracted_text_ref: null,
|
|
281
|
-
bytes_available: false,
|
|
282
|
-
bytes_exposed: false,
|
|
283
|
-
},
|
|
284
|
-
chunks: [],
|
|
285
|
-
citations: [],
|
|
286
|
-
} satisfies SourceResolveResult;
|
|
287
|
-
}
|
|
288
|
-
|
|
289
|
-
const sourceMetadata = parseJsonObject(source.metadata_json);
|
|
290
|
-
const permissions = parseJsonObject(source.acl_json);
|
|
291
|
-
try {
|
|
292
|
-
assertPurposeAllowed(permissions, purpose);
|
|
293
|
-
} catch (error) {
|
|
294
|
-
recordAuditEvent(db, {
|
|
295
|
-
event_type: 'source_read',
|
|
296
|
-
action: 'open_files_resolve',
|
|
297
|
-
target_uri: options.sourceRef,
|
|
298
|
-
decision: 'deny',
|
|
299
|
-
metadata: {
|
|
300
|
-
purpose,
|
|
301
|
-
read_only: true,
|
|
302
|
-
source_uri: source.uri,
|
|
303
|
-
error: error instanceof Error ? error.message : String(error),
|
|
304
|
-
},
|
|
305
|
-
created_at: resolvedAt,
|
|
306
|
-
});
|
|
307
|
-
throw error;
|
|
308
|
-
}
|
|
309
|
-
|
|
310
|
-
const revision = selectRevision(db, source.id, requestedRevision);
|
|
311
|
-
const revisionMetadata = parseJsonObject(revision?.metadata_json);
|
|
312
|
-
const totalChunks = countChunks(db, revision?.id ?? null);
|
|
313
|
-
const rows = selectChunks(db, revision?.id ?? null, limit);
|
|
314
|
-
const effectiveSourceRef = sourceRevisionRef(source.uri, revision, options.sourceRef);
|
|
315
|
-
const chunks = rows.map((row) => {
|
|
316
|
-
const metadata = parseJsonObject(row.metadata_json);
|
|
317
|
-
const evidence: SourceResolverEvidence = {
|
|
318
|
-
resolver: 'open-files-read-only',
|
|
319
|
-
mode: 'local_catalog',
|
|
320
|
-
purpose,
|
|
321
|
-
read_only: true,
|
|
322
|
-
source_ref: metadataString(metadata, ['source_ref']) ?? effectiveSourceRef,
|
|
323
|
-
source_uri: source.uri,
|
|
324
|
-
source_revision_id: revision?.id ?? null,
|
|
325
|
-
revision: revision?.revision ?? null,
|
|
326
|
-
hash: revision?.hash ?? metadataString(metadata, ['hash']),
|
|
327
|
-
chunk_id: row.id,
|
|
328
|
-
start_offset: row.start_offset,
|
|
329
|
-
end_offset: row.end_offset,
|
|
330
|
-
resolved_at: resolvedAt,
|
|
331
|
-
};
|
|
332
|
-
const provenance = sourceProvenance({
|
|
333
|
-
source_ref: evidence.source_ref,
|
|
334
|
-
source_uri: evidence.source_uri,
|
|
335
|
-
source_kind: source.kind,
|
|
336
|
-
source_revision_id: evidence.source_revision_id,
|
|
337
|
-
revision: evidence.revision,
|
|
338
|
-
hash: evidence.hash,
|
|
339
|
-
chunk_id: row.id,
|
|
340
|
-
start_offset: row.start_offset,
|
|
341
|
-
end_offset: row.end_offset,
|
|
342
|
-
status: metadataString(metadata, ['status']),
|
|
343
|
-
resolver: evidence.resolver,
|
|
344
|
-
});
|
|
345
|
-
return {
|
|
346
|
-
id: row.id,
|
|
347
|
-
kind: row.kind,
|
|
348
|
-
ordinal: row.ordinal,
|
|
349
|
-
text: row.text,
|
|
350
|
-
token_count: row.token_count,
|
|
351
|
-
start_offset: row.start_offset,
|
|
352
|
-
end_offset: row.end_offset,
|
|
353
|
-
metadata,
|
|
354
|
-
evidence,
|
|
355
|
-
provenance,
|
|
356
|
-
};
|
|
357
|
-
});
|
|
358
|
-
|
|
359
|
-
const citations = chunks.map((chunk) => ({
|
|
360
|
-
source_ref: chunk.evidence.source_ref,
|
|
361
|
-
source_uri: source.uri,
|
|
362
|
-
chunk_id: chunk.id,
|
|
363
|
-
quote: chunk.text.slice(0, 500),
|
|
364
|
-
start_offset: chunk.start_offset,
|
|
365
|
-
end_offset: chunk.end_offset,
|
|
366
|
-
evidence: chunk.evidence,
|
|
367
|
-
provenance: chunk.provenance,
|
|
368
|
-
}));
|
|
369
|
-
|
|
370
|
-
recordAuditEvent(db, {
|
|
371
|
-
event_type: 'source_read',
|
|
372
|
-
action: 'open_files_resolve',
|
|
373
|
-
target_uri: options.sourceRef,
|
|
374
|
-
decision: 'allow',
|
|
375
|
-
metadata: {
|
|
376
|
-
purpose,
|
|
377
|
-
read_only: true,
|
|
378
|
-
source_uri: source.uri,
|
|
379
|
-
revision: revision?.revision ?? null,
|
|
380
|
-
chunks_returned: chunks.length,
|
|
381
|
-
chunks_total: totalChunks,
|
|
382
|
-
},
|
|
383
|
-
created_at: resolvedAt,
|
|
384
|
-
});
|
|
385
|
-
|
|
386
|
-
const mime = metadataString(sourceMetadata, ['mime', 'content_type']) ?? metadataString(revisionMetadata, ['mime', 'content_type']);
|
|
387
|
-
const size = metadataNumber(sourceMetadata, ['size', 'size_bytes']) ?? metadataNumber(revisionMetadata, ['size', 'size_bytes']);
|
|
388
|
-
return {
|
|
389
|
-
source_ref: effectiveSourceRef,
|
|
390
|
-
source_uri: source.uri,
|
|
391
|
-
purpose,
|
|
392
|
-
read_only: true,
|
|
393
|
-
resolved: true,
|
|
394
|
-
resolver: {
|
|
395
|
-
name: 'open-files-read-only',
|
|
396
|
-
mode: 'local_catalog',
|
|
397
|
-
contract: 'open-files-knowledge-source-v1',
|
|
398
|
-
},
|
|
399
|
-
source: {
|
|
400
|
-
id: source.id,
|
|
401
|
-
uri: source.uri,
|
|
402
|
-
kind: source.kind,
|
|
403
|
-
title: source.title,
|
|
404
|
-
metadata: sourceMetadata,
|
|
405
|
-
permissions,
|
|
406
|
-
updated_at: source.updated_at,
|
|
407
|
-
},
|
|
408
|
-
revision: revision ? {
|
|
409
|
-
id: revision.id,
|
|
410
|
-
revision: revision.revision,
|
|
411
|
-
hash: revision.hash,
|
|
412
|
-
extracted_text_uri: revision.extracted_text_uri,
|
|
413
|
-
metadata: revisionMetadata,
|
|
414
|
-
created_at: revision.created_at,
|
|
415
|
-
reindex_required: revisionMetadata.reindex_required === true,
|
|
416
|
-
} : null,
|
|
417
|
-
content: {
|
|
418
|
-
mime,
|
|
419
|
-
size,
|
|
420
|
-
hash: revision?.hash ?? metadataString(sourceMetadata, ['hash', 'checksum', 'sha256']),
|
|
421
|
-
text_available: totalChunks > 0,
|
|
422
|
-
chunks_total: totalChunks,
|
|
423
|
-
chunks_returned: chunks.length,
|
|
424
|
-
char_count_returned: chunks.reduce((sum, chunk) => sum + chunk.text.length, 0),
|
|
425
|
-
extracted_text_ref: revision?.extracted_text_uri ?? metadataString(revisionMetadata, ['extracted_text_ref', 'extracted_text_uri']),
|
|
426
|
-
bytes_available: false,
|
|
427
|
-
bytes_exposed: false,
|
|
428
|
-
},
|
|
429
|
-
chunks,
|
|
430
|
-
citations,
|
|
431
|
-
};
|
|
432
|
-
})();
|
|
433
|
-
} finally {
|
|
434
|
-
db.close();
|
|
435
|
-
}
|
|
436
|
-
}
|