@hasna/knowledge 0.2.7 → 0.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +18 -1
- package/bin/open-knowledge-mcp.js +624 -5
- package/bin/open-knowledge.js +42 -25
- package/docs/architecture/ai-native-knowledge-base.md +18 -0
- package/package.json +1 -1
- package/src/cli.ts +35 -4
- package/src/mcp.js +25 -0
- package/src/source-ref.ts +12 -0
- package/src/source-resolver.ts +418 -0
package/src/mcp.js
CHANGED
|
@@ -7,6 +7,8 @@ import pkg from '../package.json' with { type: 'json' };
|
|
|
7
7
|
import { defaultStorePath, loadStore, saveStore, makeId, withLock } from './store.ts';
|
|
8
8
|
import { ensureKnowledgeWorkspace, readKnowledgeConfig, resolveScopedWorkspace } from './workspace.ts';
|
|
9
9
|
import { parseSourceRef } from './source-ref.ts';
|
|
10
|
+
import { resolveOpenFilesSource } from './source-resolver.ts';
|
|
11
|
+
import { resolveSafetyPolicy } from './safety.ts';
|
|
10
12
|
|
|
11
13
|
const storePathField = z.string().optional().describe('Path to the JSON store file');
|
|
12
14
|
const scopeField = z.enum(['local', 'global', 'project']).optional().describe('Workspace scope');
|
|
@@ -102,6 +104,29 @@ export function buildServer() {
|
|
|
102
104
|
}
|
|
103
105
|
});
|
|
104
106
|
|
|
107
|
+
registerTool(server, 'ok_resolve_source', 'Resolve source content', 'Resolve an indexed source ref through the read-only open-files boundary and return chunk citation evidence', {
|
|
108
|
+
source_ref: z.string().describe('Source reference URI, preferably open-files://...'),
|
|
109
|
+
purpose: z.string().optional().describe('Read-only purpose label, default knowledge_answer'),
|
|
110
|
+
limit: z.number().optional().describe('Maximum chunks to return, default 10'),
|
|
111
|
+
scope: scopeField,
|
|
112
|
+
}, async ({ source_ref, purpose, limit, scope }) => {
|
|
113
|
+
const workspace = ensureKnowledgeWorkspace(resolveScopedWorkspace(scope).home);
|
|
114
|
+
const config = readKnowledgeConfig(workspace.configPath);
|
|
115
|
+
const safetyPolicy = resolveSafetyPolicy(config, workspace);
|
|
116
|
+
try {
|
|
117
|
+
const result = await resolveOpenFilesSource({
|
|
118
|
+
dbPath: workspace.knowledgeDbPath,
|
|
119
|
+
sourceRef: source_ref,
|
|
120
|
+
purpose,
|
|
121
|
+
limit,
|
|
122
|
+
safetyPolicy,
|
|
123
|
+
});
|
|
124
|
+
return jsonText({ ok: true, ...result });
|
|
125
|
+
} catch (error) {
|
|
126
|
+
return errorText(error instanceof Error ? error.message : String(error));
|
|
127
|
+
}
|
|
128
|
+
});
|
|
129
|
+
|
|
105
130
|
registerTool(server, 'ok_add', 'Add a knowledge item', 'Add a new item to the knowledge store', {
|
|
106
131
|
title: z.string().describe('Item title'),
|
|
107
132
|
content: z.string().describe('Item content/body'),
|
package/src/source-ref.ts
CHANGED
|
@@ -82,6 +82,18 @@ export function parseSourceRef(uri: string): SourceRef {
|
|
|
82
82
|
throw new Error(`Unsupported source ref scheme: ${uri}`);
|
|
83
83
|
}
|
|
84
84
|
|
|
85
|
+
export function catalogSourceUriForRef(uri: string, parsed = parseSourceRef(uri)): string {
|
|
86
|
+
if (parsed.kind === 'open-files' && parsed.entity === 'file' && parsed.revision_id) {
|
|
87
|
+
return uri.replace(/\/revision\/[^/]+$/, '');
|
|
88
|
+
}
|
|
89
|
+
return uri;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
export function revisionIdForSourceRef(uri: string): string | null {
|
|
93
|
+
const parsed = parseSourceRef(uri);
|
|
94
|
+
return parsed.kind === 'open-files' && parsed.entity === 'file' ? parsed.revision_id ?? null : null;
|
|
95
|
+
}
|
|
96
|
+
|
|
85
97
|
export function isSupportedSourceRef(uri: string): boolean {
|
|
86
98
|
try {
|
|
87
99
|
parseSourceRef(uri);
|
|
@@ -0,0 +1,418 @@
|
|
|
1
|
+
import type { Database } from 'bun:sqlite';
|
|
2
|
+
import { migrateKnowledgeDb, openKnowledgeDb } from './knowledge-db';
|
|
3
|
+
import { catalogSourceUriForRef, parseSourceRef, revisionIdForSourceRef } from './source-ref';
|
|
4
|
+
import { assertWriteAllowed, recordAuditEvent, type SafetyPolicy } from './safety';
|
|
5
|
+
|
|
6
|
+
export interface SourceResolveOptions {
|
|
7
|
+
dbPath: string;
|
|
8
|
+
sourceRef: string;
|
|
9
|
+
purpose?: string;
|
|
10
|
+
limit?: number;
|
|
11
|
+
now?: Date;
|
|
12
|
+
safetyPolicy?: SafetyPolicy;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export interface SourceResolverEvidence {
|
|
16
|
+
resolver: 'open-files-read-only';
|
|
17
|
+
mode: 'local_catalog';
|
|
18
|
+
purpose: string;
|
|
19
|
+
read_only: true;
|
|
20
|
+
source_ref: string;
|
|
21
|
+
source_uri: string;
|
|
22
|
+
source_revision_id: string | null;
|
|
23
|
+
revision: string | null;
|
|
24
|
+
hash: string | null;
|
|
25
|
+
chunk_id?: string;
|
|
26
|
+
start_offset?: number | null;
|
|
27
|
+
end_offset?: number | null;
|
|
28
|
+
resolved_at: string;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export interface ResolvedSourceChunk {
|
|
32
|
+
id: string;
|
|
33
|
+
kind: string;
|
|
34
|
+
ordinal: number;
|
|
35
|
+
text: string;
|
|
36
|
+
token_count: number | null;
|
|
37
|
+
start_offset: number | null;
|
|
38
|
+
end_offset: number | null;
|
|
39
|
+
metadata: Record<string, unknown>;
|
|
40
|
+
evidence: SourceResolverEvidence;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export interface ResolvedSourceCitation {
|
|
44
|
+
source_ref: string;
|
|
45
|
+
source_uri: string;
|
|
46
|
+
chunk_id: string;
|
|
47
|
+
quote: string;
|
|
48
|
+
start_offset: number | null;
|
|
49
|
+
end_offset: number | null;
|
|
50
|
+
evidence: SourceResolverEvidence;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export interface SourceResolveResult {
|
|
54
|
+
source_ref: string;
|
|
55
|
+
source_uri: string;
|
|
56
|
+
purpose: string;
|
|
57
|
+
read_only: true;
|
|
58
|
+
resolved: boolean;
|
|
59
|
+
resolver: {
|
|
60
|
+
name: 'open-files-read-only';
|
|
61
|
+
mode: 'local_catalog';
|
|
62
|
+
contract: 'open-files-knowledge-source-v1';
|
|
63
|
+
};
|
|
64
|
+
source: {
|
|
65
|
+
id: string;
|
|
66
|
+
uri: string;
|
|
67
|
+
kind: string;
|
|
68
|
+
title: string | null;
|
|
69
|
+
metadata: Record<string, unknown>;
|
|
70
|
+
permissions: Record<string, unknown>;
|
|
71
|
+
updated_at: string;
|
|
72
|
+
} | null;
|
|
73
|
+
revision: {
|
|
74
|
+
id: string;
|
|
75
|
+
revision: string;
|
|
76
|
+
hash: string | null;
|
|
77
|
+
extracted_text_uri: string | null;
|
|
78
|
+
metadata: Record<string, unknown>;
|
|
79
|
+
created_at: string;
|
|
80
|
+
reindex_required: boolean;
|
|
81
|
+
} | null;
|
|
82
|
+
content: {
|
|
83
|
+
mime: string | null;
|
|
84
|
+
size: number | null;
|
|
85
|
+
hash: string | null;
|
|
86
|
+
text_available: boolean;
|
|
87
|
+
chunks_total: number;
|
|
88
|
+
chunks_returned: number;
|
|
89
|
+
char_count_returned: number;
|
|
90
|
+
extracted_text_ref: string | null;
|
|
91
|
+
bytes_available: false;
|
|
92
|
+
bytes_exposed: false;
|
|
93
|
+
};
|
|
94
|
+
chunks: ResolvedSourceChunk[];
|
|
95
|
+
citations: ResolvedSourceCitation[];
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
interface DbSourceRow {
|
|
99
|
+
id: string;
|
|
100
|
+
uri: string;
|
|
101
|
+
kind: string;
|
|
102
|
+
title: string | null;
|
|
103
|
+
metadata_json: string;
|
|
104
|
+
acl_json: string;
|
|
105
|
+
updated_at: string;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
interface DbRevisionRow {
|
|
109
|
+
id: string;
|
|
110
|
+
revision: string;
|
|
111
|
+
hash: string | null;
|
|
112
|
+
extracted_text_uri: string | null;
|
|
113
|
+
metadata_json: string;
|
|
114
|
+
created_at: string;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
interface DbChunkRow {
|
|
118
|
+
id: string;
|
|
119
|
+
kind: string;
|
|
120
|
+
ordinal: number;
|
|
121
|
+
text: string;
|
|
122
|
+
token_count: number | null;
|
|
123
|
+
start_offset: number | null;
|
|
124
|
+
end_offset: number | null;
|
|
125
|
+
metadata_json: string;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
function parseJsonObject(value: string | null | undefined): Record<string, unknown> {
|
|
129
|
+
if (!value) return {};
|
|
130
|
+
try {
|
|
131
|
+
const parsed = JSON.parse(value);
|
|
132
|
+
return parsed && typeof parsed === 'object' && !Array.isArray(parsed) ? parsed as Record<string, unknown> : {};
|
|
133
|
+
} catch {
|
|
134
|
+
return {};
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
function metadataString(metadata: Record<string, unknown>, keys: string[]): string | null {
|
|
139
|
+
for (const key of keys) {
|
|
140
|
+
const value = metadata[key];
|
|
141
|
+
if (typeof value === 'string' && value.length > 0) return value;
|
|
142
|
+
}
|
|
143
|
+
return null;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
function metadataNumber(metadata: Record<string, unknown>, keys: string[]): number | null {
|
|
147
|
+
for (const key of keys) {
|
|
148
|
+
const value = metadata[key];
|
|
149
|
+
if (typeof value === 'number' && Number.isFinite(value)) return value;
|
|
150
|
+
}
|
|
151
|
+
return null;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
function assertPurposeAllowed(permissions: Record<string, unknown>, purpose: string): void {
|
|
155
|
+
const mode = permissions.mode;
|
|
156
|
+
if (typeof mode === 'string' && mode !== 'read_only') {
|
|
157
|
+
throw new Error(`Source resolver denied ${purpose}. Permission mode is ${mode}, expected read_only.`);
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
const denied = permissions.denied_purposes;
|
|
161
|
+
if (Array.isArray(denied) && denied.includes(purpose)) {
|
|
162
|
+
throw new Error(`Source resolver denied ${purpose}. Purpose is explicitly denied.`);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
const allowed = permissions.allowed_purposes;
|
|
166
|
+
if (Array.isArray(allowed) && allowed.length > 0 && !allowed.includes(purpose)) {
|
|
167
|
+
throw new Error(`Source resolver denied ${purpose}. Allowed purposes: ${allowed.join(', ')}`);
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
function sourceRevisionRef(sourceUri: string, revision: DbRevisionRow | null, fallback: string): string {
|
|
172
|
+
if (!revision) return fallback;
|
|
173
|
+
try {
|
|
174
|
+
const parsed = parseSourceRef(sourceUri);
|
|
175
|
+
if (parsed.kind === 'open-files' && parsed.entity === 'file') {
|
|
176
|
+
return `${sourceUri}/revision/${encodeURIComponent(revision.revision)}`;
|
|
177
|
+
}
|
|
178
|
+
} catch {
|
|
179
|
+
return fallback;
|
|
180
|
+
}
|
|
181
|
+
return fallback;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
function selectSource(db: Database, sourceUri: string, requestedRef: string): DbSourceRow | null {
|
|
185
|
+
return db.query<DbSourceRow, [string, string, string]>(
|
|
186
|
+
`SELECT id, uri, kind, title, metadata_json, acl_json, updated_at
|
|
187
|
+
FROM sources
|
|
188
|
+
WHERE uri = ? OR uri = ?
|
|
189
|
+
ORDER BY CASE WHEN uri = ? THEN 0 ELSE 1 END
|
|
190
|
+
LIMIT 1`,
|
|
191
|
+
).get(sourceUri, requestedRef, sourceUri) ?? null;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
function selectRevision(db: Database, sourceId: string, revisionId: string | null): DbRevisionRow | null {
|
|
195
|
+
if (revisionId) {
|
|
196
|
+
return db.query<DbRevisionRow, [string, string]>(
|
|
197
|
+
`SELECT id, revision, hash, extracted_text_uri, metadata_json, created_at
|
|
198
|
+
FROM source_revisions
|
|
199
|
+
WHERE source_id = ? AND revision = ?
|
|
200
|
+
LIMIT 1`,
|
|
201
|
+
).get(sourceId, revisionId) ?? null;
|
|
202
|
+
}
|
|
203
|
+
return db.query<DbRevisionRow, [string]>(
|
|
204
|
+
`SELECT id, revision, hash, extracted_text_uri, metadata_json, created_at
|
|
205
|
+
FROM source_revisions
|
|
206
|
+
WHERE source_id = ?
|
|
207
|
+
ORDER BY created_at DESC, revision DESC
|
|
208
|
+
LIMIT 1`,
|
|
209
|
+
).get(sourceId) ?? null;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
function countChunks(db: Database, revisionId: string | null): number {
|
|
213
|
+
if (!revisionId) return 0;
|
|
214
|
+
const row = db.query<{ n: number }, [string]>('SELECT COUNT(*) AS n FROM chunks WHERE source_revision_id = ?').get(revisionId);
|
|
215
|
+
return row?.n ?? 0;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
function selectChunks(db: Database, revisionId: string | null, limit: number): DbChunkRow[] {
|
|
219
|
+
if (!revisionId || limit <= 0) return [];
|
|
220
|
+
return db.query<DbChunkRow, [string, number]>(
|
|
221
|
+
`SELECT id, kind, ordinal, text, token_count, start_offset, end_offset, metadata_json
|
|
222
|
+
FROM chunks
|
|
223
|
+
WHERE source_revision_id = ?
|
|
224
|
+
ORDER BY ordinal ASC
|
|
225
|
+
LIMIT ?`,
|
|
226
|
+
).all(revisionId, limit);
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
export async function resolveOpenFilesSource(options: SourceResolveOptions): Promise<SourceResolveResult> {
|
|
230
|
+
const purpose = options.purpose ?? 'knowledge_answer';
|
|
231
|
+
const limit = Math.max(0, Math.min(options.limit ?? 10, 100));
|
|
232
|
+
const resolvedAt = (options.now ?? new Date()).toISOString();
|
|
233
|
+
const parsed = parseSourceRef(options.sourceRef);
|
|
234
|
+
const sourceUri = catalogSourceUriForRef(options.sourceRef, parsed);
|
|
235
|
+
const requestedRevision = revisionIdForSourceRef(options.sourceRef);
|
|
236
|
+
|
|
237
|
+
if (options.safetyPolicy) {
|
|
238
|
+
if (!options.safetyPolicy.readOnlySourceAccess) throw new Error('Safety policy denied source resolution.');
|
|
239
|
+
assertWriteAllowed(options.dbPath, options.safetyPolicy);
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
migrateKnowledgeDb(options.dbPath);
|
|
243
|
+
const db = openKnowledgeDb(options.dbPath);
|
|
244
|
+
try {
|
|
245
|
+
return db.transaction((): SourceResolveResult => {
|
|
246
|
+
const source = selectSource(db, sourceUri, options.sourceRef);
|
|
247
|
+
if (!source) {
|
|
248
|
+
recordAuditEvent(db, {
|
|
249
|
+
event_type: 'source_read',
|
|
250
|
+
action: 'open_files_resolve_missing',
|
|
251
|
+
target_uri: options.sourceRef,
|
|
252
|
+
decision: 'allow',
|
|
253
|
+
metadata: { purpose, read_only: true, source_uri: sourceUri },
|
|
254
|
+
created_at: resolvedAt,
|
|
255
|
+
});
|
|
256
|
+
return {
|
|
257
|
+
source_ref: options.sourceRef,
|
|
258
|
+
source_uri: sourceUri,
|
|
259
|
+
purpose,
|
|
260
|
+
read_only: true,
|
|
261
|
+
resolved: false,
|
|
262
|
+
resolver: {
|
|
263
|
+
name: 'open-files-read-only',
|
|
264
|
+
mode: 'local_catalog',
|
|
265
|
+
contract: 'open-files-knowledge-source-v1',
|
|
266
|
+
},
|
|
267
|
+
source: null,
|
|
268
|
+
revision: null,
|
|
269
|
+
content: {
|
|
270
|
+
mime: null,
|
|
271
|
+
size: null,
|
|
272
|
+
hash: null,
|
|
273
|
+
text_available: false,
|
|
274
|
+
chunks_total: 0,
|
|
275
|
+
chunks_returned: 0,
|
|
276
|
+
char_count_returned: 0,
|
|
277
|
+
extracted_text_ref: null,
|
|
278
|
+
bytes_available: false,
|
|
279
|
+
bytes_exposed: false,
|
|
280
|
+
},
|
|
281
|
+
chunks: [],
|
|
282
|
+
citations: [],
|
|
283
|
+
} satisfies SourceResolveResult;
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
const sourceMetadata = parseJsonObject(source.metadata_json);
|
|
287
|
+
const permissions = parseJsonObject(source.acl_json);
|
|
288
|
+
try {
|
|
289
|
+
assertPurposeAllowed(permissions, purpose);
|
|
290
|
+
} catch (error) {
|
|
291
|
+
recordAuditEvent(db, {
|
|
292
|
+
event_type: 'source_read',
|
|
293
|
+
action: 'open_files_resolve',
|
|
294
|
+
target_uri: options.sourceRef,
|
|
295
|
+
decision: 'deny',
|
|
296
|
+
metadata: {
|
|
297
|
+
purpose,
|
|
298
|
+
read_only: true,
|
|
299
|
+
source_uri: source.uri,
|
|
300
|
+
error: error instanceof Error ? error.message : String(error),
|
|
301
|
+
},
|
|
302
|
+
created_at: resolvedAt,
|
|
303
|
+
});
|
|
304
|
+
throw error;
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
const revision = selectRevision(db, source.id, requestedRevision);
|
|
308
|
+
const revisionMetadata = parseJsonObject(revision?.metadata_json);
|
|
309
|
+
const totalChunks = countChunks(db, revision?.id ?? null);
|
|
310
|
+
const rows = selectChunks(db, revision?.id ?? null, limit);
|
|
311
|
+
const effectiveSourceRef = sourceRevisionRef(source.uri, revision, options.sourceRef);
|
|
312
|
+
const chunks = rows.map((row) => {
|
|
313
|
+
const metadata = parseJsonObject(row.metadata_json);
|
|
314
|
+
const evidence: SourceResolverEvidence = {
|
|
315
|
+
resolver: 'open-files-read-only',
|
|
316
|
+
mode: 'local_catalog',
|
|
317
|
+
purpose,
|
|
318
|
+
read_only: true,
|
|
319
|
+
source_ref: metadataString(metadata, ['source_ref']) ?? effectiveSourceRef,
|
|
320
|
+
source_uri: source.uri,
|
|
321
|
+
source_revision_id: revision?.id ?? null,
|
|
322
|
+
revision: revision?.revision ?? null,
|
|
323
|
+
hash: revision?.hash ?? metadataString(metadata, ['hash']),
|
|
324
|
+
chunk_id: row.id,
|
|
325
|
+
start_offset: row.start_offset,
|
|
326
|
+
end_offset: row.end_offset,
|
|
327
|
+
resolved_at: resolvedAt,
|
|
328
|
+
};
|
|
329
|
+
return {
|
|
330
|
+
id: row.id,
|
|
331
|
+
kind: row.kind,
|
|
332
|
+
ordinal: row.ordinal,
|
|
333
|
+
text: row.text,
|
|
334
|
+
token_count: row.token_count,
|
|
335
|
+
start_offset: row.start_offset,
|
|
336
|
+
end_offset: row.end_offset,
|
|
337
|
+
metadata,
|
|
338
|
+
evidence,
|
|
339
|
+
};
|
|
340
|
+
});
|
|
341
|
+
|
|
342
|
+
const citations = chunks.map((chunk) => ({
|
|
343
|
+
source_ref: chunk.evidence.source_ref,
|
|
344
|
+
source_uri: source.uri,
|
|
345
|
+
chunk_id: chunk.id,
|
|
346
|
+
quote: chunk.text.slice(0, 500),
|
|
347
|
+
start_offset: chunk.start_offset,
|
|
348
|
+
end_offset: chunk.end_offset,
|
|
349
|
+
evidence: chunk.evidence,
|
|
350
|
+
}));
|
|
351
|
+
|
|
352
|
+
recordAuditEvent(db, {
|
|
353
|
+
event_type: 'source_read',
|
|
354
|
+
action: 'open_files_resolve',
|
|
355
|
+
target_uri: options.sourceRef,
|
|
356
|
+
decision: 'allow',
|
|
357
|
+
metadata: {
|
|
358
|
+
purpose,
|
|
359
|
+
read_only: true,
|
|
360
|
+
source_uri: source.uri,
|
|
361
|
+
revision: revision?.revision ?? null,
|
|
362
|
+
chunks_returned: chunks.length,
|
|
363
|
+
chunks_total: totalChunks,
|
|
364
|
+
},
|
|
365
|
+
created_at: resolvedAt,
|
|
366
|
+
});
|
|
367
|
+
|
|
368
|
+
const mime = metadataString(sourceMetadata, ['mime', 'content_type']) ?? metadataString(revisionMetadata, ['mime', 'content_type']);
|
|
369
|
+
const size = metadataNumber(sourceMetadata, ['size', 'size_bytes']) ?? metadataNumber(revisionMetadata, ['size', 'size_bytes']);
|
|
370
|
+
return {
|
|
371
|
+
source_ref: effectiveSourceRef,
|
|
372
|
+
source_uri: source.uri,
|
|
373
|
+
purpose,
|
|
374
|
+
read_only: true,
|
|
375
|
+
resolved: true,
|
|
376
|
+
resolver: {
|
|
377
|
+
name: 'open-files-read-only',
|
|
378
|
+
mode: 'local_catalog',
|
|
379
|
+
contract: 'open-files-knowledge-source-v1',
|
|
380
|
+
},
|
|
381
|
+
source: {
|
|
382
|
+
id: source.id,
|
|
383
|
+
uri: source.uri,
|
|
384
|
+
kind: source.kind,
|
|
385
|
+
title: source.title,
|
|
386
|
+
metadata: sourceMetadata,
|
|
387
|
+
permissions,
|
|
388
|
+
updated_at: source.updated_at,
|
|
389
|
+
},
|
|
390
|
+
revision: revision ? {
|
|
391
|
+
id: revision.id,
|
|
392
|
+
revision: revision.revision,
|
|
393
|
+
hash: revision.hash,
|
|
394
|
+
extracted_text_uri: revision.extracted_text_uri,
|
|
395
|
+
metadata: revisionMetadata,
|
|
396
|
+
created_at: revision.created_at,
|
|
397
|
+
reindex_required: revisionMetadata.reindex_required === true,
|
|
398
|
+
} : null,
|
|
399
|
+
content: {
|
|
400
|
+
mime,
|
|
401
|
+
size,
|
|
402
|
+
hash: revision?.hash ?? metadataString(sourceMetadata, ['hash', 'checksum', 'sha256']),
|
|
403
|
+
text_available: totalChunks > 0,
|
|
404
|
+
chunks_total: totalChunks,
|
|
405
|
+
chunks_returned: chunks.length,
|
|
406
|
+
char_count_returned: chunks.reduce((sum, chunk) => sum + chunk.text.length, 0),
|
|
407
|
+
extracted_text_ref: revision?.extracted_text_uri ?? metadataString(revisionMetadata, ['extracted_text_ref', 'extracted_text_uri']),
|
|
408
|
+
bytes_available: false,
|
|
409
|
+
bytes_exposed: false,
|
|
410
|
+
},
|
|
411
|
+
chunks,
|
|
412
|
+
citations,
|
|
413
|
+
};
|
|
414
|
+
})();
|
|
415
|
+
} finally {
|
|
416
|
+
db.close();
|
|
417
|
+
}
|
|
418
|
+
}
|