@hasna/knowledge 0.2.26 → 0.2.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/README.md +61 -0
  2. package/bin/open-knowledge-mcp.js +85 -9
  3. package/bin/open-knowledge.js +86 -86
  4. package/dist/agent.d.ts +35 -0
  5. package/dist/artifact-store.d.ts +63 -0
  6. package/dist/auth.d.ts +35 -0
  7. package/dist/embeddings.d.ts +77 -0
  8. package/dist/index.d.ts +20 -0
  9. package/dist/index.js +5709 -0
  10. package/dist/knowledge-db.d.ts +27 -0
  11. package/dist/manifest-ingest.d.ts +35 -0
  12. package/dist/outbox-consume.d.ts +25 -0
  13. package/dist/provenance.d.ts +50 -0
  14. package/dist/providers.d.ts +89 -0
  15. package/dist/reindex.d.ts +37 -0
  16. package/dist/remote-client.d.ts +108 -0
  17. package/dist/retrieval.d.ts +71 -0
  18. package/dist/safety.d.ts +70 -0
  19. package/dist/sdk.d.ts +72 -0
  20. package/dist/search.d.ts +65 -0
  21. package/dist/service.d.ts +117 -0
  22. package/dist/source-ingest.d.ts +18 -0
  23. package/dist/source-ref.d.ts +30 -0
  24. package/dist/source-resolver.d.ts +92 -0
  25. package/dist/storage-contract.d.ts +106 -0
  26. package/dist/web-search.d.ts +40 -0
  27. package/dist/wiki-compiler.d.ts +67 -0
  28. package/dist/wiki-layout.d.ts +23 -0
  29. package/dist/workspace.d.ts +111 -0
  30. package/docs/architecture/ai-native-knowledge-base.md +24 -0
  31. package/docs/architecture/hosted-wrapper-responsibilities.md +8 -0
  32. package/docs/canonical-secrets-bootstrap-2026-06-08.md +127 -0
  33. package/package.json +15 -7
  34. package/src/agent.ts +0 -367
  35. package/src/artifact-store.ts +0 -184
  36. package/src/auth.ts +0 -123
  37. package/src/cli.ts +0 -1181
  38. package/src/embeddings.ts +0 -516
  39. package/src/knowledge-db.ts +0 -354
  40. package/src/manifest-ingest.ts +0 -515
  41. package/src/mcp-http.js +0 -110
  42. package/src/mcp.js +0 -1503
  43. package/src/outbox-consume.ts +0 -463
  44. package/src/provenance.ts +0 -93
  45. package/src/providers.ts +0 -308
  46. package/src/reindex.ts +0 -260
  47. package/src/remote-client.ts +0 -268
  48. package/src/retrieval.ts +0 -326
  49. package/src/safety.ts +0 -265
  50. package/src/schema.js +0 -25
  51. package/src/search.ts +0 -510
  52. package/src/service.ts +0 -432
  53. package/src/source-ingest.ts +0 -268
  54. package/src/source-ref.ts +0 -104
  55. package/src/source-resolver.ts +0 -436
  56. package/src/storage-contract.ts +0 -293
  57. package/src/store.ts +0 -113
  58. package/src/web-search.ts +0 -330
  59. package/src/wiki-compiler.ts +0 -711
  60. package/src/wiki-layout.ts +0 -251
  61. package/src/workspace.ts +0 -213
@@ -1,463 +0,0 @@
1
- import { createHash, randomUUID } from 'node:crypto';
2
- import { existsSync, readFileSync } from 'node:fs';
3
- import { basename } from 'node:path';
4
- import type { Database } from 'bun:sqlite';
5
- import { migrateKnowledgeDb, openKnowledgeDb } from './knowledge-db';
6
- import { parseSourceRef, type SourceRef } from './source-ref';
7
- import type { KnowledgeConfig } from './workspace';
8
- import { assertS3ReadAllowed, assertWriteAllowed, recordAuditEvent, type SafetyPolicy } from './safety';
9
-
10
- type OutboxObject = Record<string, unknown>;
11
-
12
- export interface OutboxConsumeOptions {
13
- dbPath: string;
14
- input: string;
15
- config?: KnowledgeConfig;
16
- safetyPolicy?: SafetyPolicy;
17
- now?: Date;
18
- }
19
-
20
- export interface OutboxConsumeResult {
21
- path: string;
22
- db_path: string;
23
- run_id: string;
24
- events_seen: number;
25
- sources_touched: number;
26
- revisions_touched: number;
27
- chunks_deleted: number;
28
- embeddings_deleted: number;
29
- stale_revisions: number;
30
- deleted_sources: number;
31
- moved_sources: number;
32
- permission_updates: number;
33
- vector_entries_deleted: number;
34
- }
35
-
36
- interface NormalizedOutboxEvent {
37
- raw: OutboxObject;
38
- eventType: string;
39
- sourceRef: string;
40
- sourceUri: string;
41
- kind: SourceRef['kind'];
42
- title: string | null;
43
- revision: string | null;
44
- hash: string | null;
45
- status: string | null;
46
- updatedAt: string;
47
- acl: unknown;
48
- }
49
-
50
- function stableId(prefix: string, value: string): string {
51
- return `${prefix}_${createHash('sha256').update(value).digest('hex').slice(0, 20)}`;
52
- }
53
-
54
- function asObject(value: unknown): OutboxObject | undefined {
55
- return value && typeof value === 'object' && !Array.isArray(value) ? value as OutboxObject : undefined;
56
- }
57
-
58
- function asString(value: unknown): string | undefined {
59
- return typeof value === 'string' && value.length > 0 ? value : undefined;
60
- }
61
-
62
- function buildSourceRef(event: OutboxObject): string {
63
- const explicit = asString(event.source_ref) ?? asString(event.source_uri) ?? asString(event.uri);
64
- if (explicit) return explicit;
65
- const fileId = asString(event.file_id);
66
- if (fileId) {
67
- const revision = asString(event.revision_id) ?? asString(event.revision);
68
- const fileRef = `open-files://file/${encodeURIComponent(fileId)}`;
69
- return revision ? `${fileRef}/revision/${encodeURIComponent(revision)}` : fileRef;
70
- }
71
- const sourceId = asString(event.source_id);
72
- const path = asString(event.path);
73
- if (sourceId && path) {
74
- return `open-files://source/${encodeURIComponent(sourceId)}/path/${encodeURIComponent(path)}`;
75
- }
76
- throw new Error('Outbox event is missing source_ref, file_id, or source_id/path.');
77
- }
78
-
79
- function baseSourceUri(sourceRef: string, parsed: SourceRef): string {
80
- if (parsed.kind === 'open-files' && parsed.entity === 'file' && parsed.revision_id) {
81
- return sourceRef.replace(/\/revision\/[^/]+$/, '');
82
- }
83
- return sourceRef;
84
- }
85
-
86
- function hashFromEvent(event: OutboxObject): string | null {
87
- return asString(event.hash) ?? asString(event.checksum) ?? asString(event.sha256) ?? null;
88
- }
89
-
90
- function revisionFromEvent(event: OutboxObject, parsed: SourceRef, hash: string | null): string | null {
91
- return (
92
- asString(event.revision_id) ??
93
- asString(event.revision) ??
94
- asString(event.version_id) ??
95
- (parsed.kind === 'open-files' ? parsed.revision_id : undefined) ??
96
- hash ??
97
- null
98
- );
99
- }
100
-
101
- function eventType(event: OutboxObject): string {
102
- return (asString(event.event) ?? asString(event.type) ?? asString(event.action) ?? asString(event.change_type) ?? 'changed').toLowerCase();
103
- }
104
-
105
- function titleFromEvent(event: OutboxObject): string | null {
106
- const path = asString(event.path);
107
- return asString(event.title) ?? asString(event.name) ?? (path ? basename(path) : null);
108
- }
109
-
110
- function normalizeEvent(event: OutboxObject, now: string): NormalizedOutboxEvent {
111
- const sourceRef = buildSourceRef(event);
112
- const parsed = parseSourceRef(sourceRef);
113
- const hash = hashFromEvent(event);
114
- return {
115
- raw: event,
116
- eventType: eventType(event),
117
- sourceRef,
118
- sourceUri: baseSourceUri(sourceRef, parsed),
119
- kind: parsed.kind,
120
- title: titleFromEvent(event),
121
- revision: revisionFromEvent(event, parsed, hash),
122
- hash,
123
- status: asString(event.status)?.toLowerCase() ?? null,
124
- updatedAt: asString(event.updated_at) ?? now,
125
- acl: event.permissions ?? event.acl ?? undefined,
126
- };
127
- }
128
-
129
- function parseOutboxText(text: string): OutboxObject[] {
130
- const trimmed = text.trim();
131
- if (!trimmed) return [];
132
- if (trimmed.startsWith('[')) {
133
- const parsed = JSON.parse(trimmed);
134
- if (!Array.isArray(parsed)) throw new Error('Outbox array parse failed.');
135
- return parsed.map((entry) => {
136
- const event = asObject(entry);
137
- if (!event) throw new Error('Outbox array entries must be objects.');
138
- return event;
139
- });
140
- }
141
- if (trimmed.startsWith('{')) {
142
- try {
143
- const parsed = JSON.parse(trimmed);
144
- const object = asObject(parsed);
145
- if (!object) throw new Error('Outbox object parse failed.');
146
- if (Array.isArray(object.events)) {
147
- return object.events.map((entry) => {
148
- const event = asObject(entry);
149
- if (!event) throw new Error('Outbox events entries must be objects.');
150
- return event;
151
- });
152
- }
153
- if ('source_ref' in object || 'source_uri' in object || 'file_id' in object) return [object];
154
- } catch (error) {
155
- const lines = trimmed.split(/\r?\n/).filter((line) => line.trim().length > 0);
156
- if (lines.length <= 1) throw error;
157
- return lines.map((line) => {
158
- const event = asObject(JSON.parse(line));
159
- if (!event) throw new Error('Outbox JSONL entries must be objects.');
160
- return event;
161
- });
162
- }
163
- }
164
- return trimmed.split(/\r?\n/).filter((line) => line.trim().length > 0).map((line) => {
165
- const event = asObject(JSON.parse(line));
166
- if (!event) throw new Error('Outbox JSONL entries must be objects.');
167
- return event;
168
- });
169
- }
170
-
171
- async function readS3Text(uri: string, config?: KnowledgeConfig, safetyPolicy?: SafetyPolicy): Promise<string> {
172
- const parsed = new URL(uri);
173
- const bucket = parsed.hostname;
174
- const key = decodeURIComponent(parsed.pathname.replace(/^\/+/, ''));
175
- if (!bucket || !key) throw new Error(`Invalid S3 outbox URI: ${uri}`);
176
- if (safetyPolicy) assertS3ReadAllowed(uri, safetyPolicy);
177
- const [{ S3Client, GetObjectCommand }, { fromIni }] = await Promise.all([
178
- import('@aws-sdk/client-s3'),
179
- import('@aws-sdk/credential-providers'),
180
- ]);
181
- const s3Config = config?.storage.type === 's3' && config.storage.s3?.bucket === bucket ? config.storage.s3 : undefined;
182
- const client = new S3Client({
183
- region: s3Config?.region,
184
- credentials: s3Config?.profile ? fromIni({ profile: s3Config.profile }) : undefined,
185
- maxAttempts: s3Config?.max_attempts,
186
- });
187
- const response = await client.send(new GetObjectCommand({ Bucket: bucket, Key: key }));
188
- if (!response.Body) return '';
189
- return await response.Body.transformToString();
190
- }
191
-
192
- async function readOutboxInput(input: string, config?: KnowledgeConfig, safetyPolicy?: SafetyPolicy): Promise<string> {
193
- if (input.startsWith('s3://')) return readS3Text(input, config, safetyPolicy);
194
- if (!existsSync(input)) throw new Error(`Outbox not found: ${input}`);
195
- return readFileSync(input, 'utf8');
196
- }
197
-
198
- function mergeJson(existing: string | null | undefined, patch: OutboxObject): string {
199
- let base: OutboxObject = {};
200
- if (existing) {
201
- try {
202
- base = asObject(JSON.parse(existing)) ?? {};
203
- } catch {
204
- base = {};
205
- }
206
- }
207
- return JSON.stringify({ ...base, ...patch });
208
- }
209
-
210
- function ensureSource(db: Database, event: NormalizedOutboxEvent, now: string): string {
211
- const id = stableId('src', event.sourceUri);
212
- db.run(
213
- `INSERT INTO sources (id, uri, kind, title, metadata_json, acl_json, created_at, updated_at)
214
- VALUES (?, ?, ?, ?, ?, ?, ?, ?)
215
- ON CONFLICT(uri) DO UPDATE SET
216
- kind = excluded.kind,
217
- title = COALESCE(excluded.title, sources.title),
218
- updated_at = excluded.updated_at`,
219
- [
220
- id,
221
- event.sourceUri,
222
- event.kind,
223
- event.title,
224
- JSON.stringify({ source_ref: event.sourceRef, source_uri: event.sourceUri, status: event.status, last_outbox_event: event.eventType }),
225
- JSON.stringify(event.acl ?? {}),
226
- now,
227
- event.updatedAt,
228
- ],
229
- );
230
- const row = db.query<{ id: string; metadata_json: string; acl_json: string }, [string]>('SELECT id, metadata_json, acl_json FROM sources WHERE uri = ?').get(event.sourceUri);
231
- if (!row) throw new Error(`Failed to upsert source for outbox event: ${event.sourceUri}`);
232
- const patch: OutboxObject = {
233
- source_ref: event.sourceRef,
234
- source_uri: event.sourceUri,
235
- last_outbox_event: event.eventType,
236
- last_outbox_at: event.updatedAt,
237
- };
238
- if (event.status) patch.status = event.status;
239
- if (asString(event.raw.path)) patch.path = event.raw.path;
240
- db.run(
241
- 'UPDATE sources SET metadata_json = ?, acl_json = CASE WHEN ? IS NULL THEN acl_json ELSE ? END, updated_at = ? WHERE id = ?',
242
- [
243
- mergeJson(row.metadata_json, patch),
244
- event.acl === undefined ? null : JSON.stringify(event.acl),
245
- event.acl === undefined ? null : JSON.stringify(event.acl),
246
- event.updatedAt,
247
- row.id,
248
- ],
249
- );
250
- return row.id;
251
- }
252
-
253
- function ensureRevision(db: Database, sourceId: string, event: NormalizedOutboxEvent, now: string): string | null {
254
- if (!event.revision) return null;
255
- const id = stableId('rev', `${sourceId}\u0000${event.revision}`);
256
- const metadata = {
257
- source_ref: event.sourceRef,
258
- source_uri: event.sourceUri,
259
- status: event.status,
260
- last_outbox_event: event.eventType,
261
- reindex_required: true,
262
- };
263
- db.run(
264
- `INSERT INTO source_revisions (id, source_id, revision, hash, extracted_text_uri, metadata_json, created_at)
265
- VALUES (?, ?, ?, ?, ?, ?, ?)
266
- ON CONFLICT(source_id, revision) DO UPDATE SET
267
- hash = COALESCE(excluded.hash, source_revisions.hash),
268
- metadata_json = excluded.metadata_json`,
269
- [id, sourceId, event.revision, event.hash, asString(event.raw.extracted_text_ref) ?? null, JSON.stringify(metadata), now],
270
- );
271
- const row = db.query<{ id: string }, [string, string]>(
272
- 'SELECT id FROM source_revisions WHERE source_id = ? AND revision = ?',
273
- ).get(sourceId, event.revision);
274
- return row?.id ?? null;
275
- }
276
-
277
- function revisionIdsForEvent(db: Database, sourceId: string, event: NormalizedOutboxEvent): string[] {
278
- if (event.revision) {
279
- return db.query<{ id: string }, [string, string]>(
280
- 'SELECT id FROM source_revisions WHERE source_id = ? AND revision = ?',
281
- ).all(sourceId, event.revision).map((row) => row.id);
282
- }
283
- if (event.hash) {
284
- return db.query<{ id: string }, [string, string]>(
285
- 'SELECT id FROM source_revisions WHERE source_id = ? AND hash = ?',
286
- ).all(sourceId, event.hash).map((row) => row.id);
287
- }
288
- return db.query<{ id: string }, [string]>(
289
- 'SELECT id FROM source_revisions WHERE source_id = ?',
290
- ).all(sourceId).map((row) => row.id);
291
- }
292
-
293
- function invalidateRevision(db: Database, revisionId: string): { chunksDeleted: number; embeddingsDeleted: number; vectorEntriesDeleted: number } {
294
- const chunks = db.query<{ id: string }, [string]>('SELECT id FROM chunks WHERE source_revision_id = ?').all(revisionId);
295
- let embeddingsDeleted = 0;
296
- let vectorEntriesDeleted = 0;
297
- for (const chunk of chunks) {
298
- const row = db.query<{ n: number }, [string]>('SELECT COUNT(*) AS n FROM chunk_embeddings WHERE chunk_id = ?').get(chunk.id);
299
- embeddingsDeleted += row?.n ?? 0;
300
- const vectorRow = db.query<{ n: number }, [string]>('SELECT COUNT(*) AS n FROM vector_index_entries WHERE chunk_id = ?').get(chunk.id);
301
- vectorEntriesDeleted += vectorRow?.n ?? 0;
302
- db.run('DELETE FROM vector_index_entries WHERE chunk_id = ?', [chunk.id]);
303
- db.run('DELETE FROM chunk_embeddings WHERE chunk_id = ?', [chunk.id]);
304
- db.run('DELETE FROM chunks_fts WHERE chunk_id = ?', [chunk.id]);
305
- }
306
- db.run('DELETE FROM chunks WHERE source_revision_id = ?', [revisionId]);
307
- const revision = db.query<{ metadata_json: string }, [string]>('SELECT metadata_json FROM source_revisions WHERE id = ?').get(revisionId);
308
- db.run(
309
- 'UPDATE source_revisions SET metadata_json = ? WHERE id = ?',
310
- [mergeJson(revision?.metadata_json, { reindex_required: true, invalidated_at: new Date().toISOString() }), revisionId],
311
- );
312
- return { chunksDeleted: chunks.length, embeddingsDeleted, vectorEntriesDeleted };
313
- }
314
-
315
- function isDeleteEvent(eventType: string, status: string | null): boolean {
316
- return status === 'deleted' || ['delete', 'deleted', 'remove', 'removed'].includes(eventType);
317
- }
318
-
319
- function isMoveEvent(eventType: string): boolean {
320
- return ['move', 'moved', 'rename', 'renamed', 'path_changed'].includes(eventType);
321
- }
322
-
323
- function isPermissionEvent(eventType: string): boolean {
324
- return ['permission', 'permissions', 'permission_changed', 'acl_changed'].includes(eventType);
325
- }
326
-
327
- export async function consumeOpenFilesOutbox(options: OutboxConsumeOptions): Promise<OutboxConsumeResult> {
328
- const now = (options.now ?? new Date()).toISOString();
329
- if (options.safetyPolicy) assertWriteAllowed(options.dbPath, options.safetyPolicy);
330
- migrateKnowledgeDb(options.dbPath);
331
- const text = await readOutboxInput(options.input, options.config, options.safetyPolicy);
332
- const events = parseOutboxText(text);
333
- const db = openKnowledgeDb(options.dbPath);
334
- const runId = `run_${randomUUID()}`;
335
- try {
336
- return db.transaction(() => {
337
- db.run(
338
- `INSERT INTO runs (id, type, prompt, status, provider, model, metadata_json, created_at, updated_at)
339
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
340
- [
341
- runId,
342
- 'open-files-outbox',
343
- options.input,
344
- 'completed',
345
- 'local',
346
- 'open-files-outbox',
347
- JSON.stringify({ path: options.input, events: events.length }),
348
- now,
349
- now,
350
- ],
351
- );
352
-
353
- const sourcesTouched = new Set<string>();
354
- const revisionsTouched = new Set<string>();
355
- let chunksDeleted = 0;
356
- let embeddingsDeleted = 0;
357
- let vectorEntriesDeleted = 0;
358
- let staleRevisions = 0;
359
- let deletedSources = 0;
360
- let movedSources = 0;
361
- let permissionUpdates = 0;
362
-
363
- recordAuditEvent(db, {
364
- event_type: 'source_read',
365
- action: options.input.startsWith('s3://') ? 's3_outbox_read' : 'local_outbox_read',
366
- target_uri: options.input,
367
- decision: 'allow',
368
- metadata: { events: events.length, read_only: true },
369
- created_at: now,
370
- });
371
-
372
- events.forEach((raw, index) => {
373
- const event = normalizeEvent(raw, now);
374
- const sourceId = ensureSource(db, event, now);
375
- sourcesTouched.add(sourceId);
376
- const createdRevisionId = ensureRevision(db, sourceId, event, now);
377
- if (createdRevisionId) revisionsTouched.add(createdRevisionId);
378
-
379
- const affectedRevisionIds = revisionIdsForEvent(db, sourceId, event);
380
- for (const revisionId of affectedRevisionIds) {
381
- revisionsTouched.add(revisionId);
382
- const invalidation = invalidateRevision(db, revisionId);
383
- chunksDeleted += invalidation.chunksDeleted;
384
- embeddingsDeleted += invalidation.embeddingsDeleted;
385
- vectorEntriesDeleted += invalidation.vectorEntriesDeleted;
386
- staleRevisions += 1;
387
- }
388
-
389
- if (isDeleteEvent(event.eventType, event.status)) deletedSources += 1;
390
- if (isMoveEvent(event.eventType)) movedSources += 1;
391
- if (isPermissionEvent(event.eventType) || event.acl !== undefined) permissionUpdates += 1;
392
-
393
- db.run(
394
- `INSERT INTO run_events (id, run_id, level, event, metadata_json, created_at)
395
- VALUES (?, ?, ?, ?, ?, ?)`,
396
- [
397
- stableId('evt', `${runId}\u0000${index}\u0000${event.sourceRef}\u0000${event.eventType}`),
398
- runId,
399
- 'info',
400
- event.eventType,
401
- JSON.stringify({
402
- source_ref: event.sourceRef,
403
- source_uri: event.sourceUri,
404
- revision: event.revision,
405
- hash: event.hash,
406
- status: event.status,
407
- affected_revisions: affectedRevisionIds.length,
408
- }),
409
- event.updatedAt,
410
- ],
411
- );
412
- });
413
-
414
- db.run(
415
- `INSERT INTO provider_usage (id, run_id, provider, model, input_tokens, output_tokens, cost_usd, metadata_json, created_at)
416
- VALUES (?, ?, ?, ?, 0, 0, 0, ?, ?)`,
417
- [
418
- stableId('usage', runId),
419
- runId,
420
- 'local',
421
- 'open-files-outbox',
422
- JSON.stringify({ note: 'No model provider used for outbox invalidation.' }),
423
- now,
424
- ],
425
- );
426
-
427
- recordAuditEvent(db, {
428
- event_type: 'write',
429
- action: 'knowledge_outbox_invalidation',
430
- target_uri: options.dbPath,
431
- decision: 'allow',
432
- metadata: {
433
- run_id: runId,
434
- events: events.length,
435
- sources: sourcesTouched.size,
436
- revisions: revisionsTouched.size,
437
- chunks_deleted: chunksDeleted,
438
- embeddings_deleted: embeddingsDeleted,
439
- vector_entries_deleted: vectorEntriesDeleted,
440
- },
441
- created_at: now,
442
- });
443
-
444
- return {
445
- path: options.input,
446
- db_path: options.dbPath,
447
- run_id: runId,
448
- events_seen: events.length,
449
- sources_touched: sourcesTouched.size,
450
- revisions_touched: revisionsTouched.size,
451
- chunks_deleted: chunksDeleted,
452
- embeddings_deleted: embeddingsDeleted,
453
- vector_entries_deleted: vectorEntriesDeleted,
454
- stale_revisions: staleRevisions,
455
- deleted_sources: deletedSources,
456
- moved_sources: movedSources,
457
- permission_updates: permissionUpdates,
458
- };
459
- })();
460
- } finally {
461
- db.close();
462
- }
463
- }
package/src/provenance.ts DELETED
@@ -1,93 +0,0 @@
1
- export interface KnowledgeProvenance {
2
- source_owner: 'open-files';
3
- source_ref: string | null;
4
- source_uri: string | null;
5
- source_kind: string | null;
6
- source_revision_id: string | null;
7
- revision: string | null;
8
- hash: string | null;
9
- chunk_id: string | null;
10
- start_offset: number | null;
11
- end_offset: number | null;
12
- status: string | null;
13
- read_only: true;
14
- citation_required: boolean;
15
- resolver: string | null;
16
- stale: boolean;
17
- }
18
-
19
- export interface GeneratedArtifactProvenance {
20
- source_owner: 'open-files';
21
- generated_from: string;
22
- artifact_key: string;
23
- source_refs: string[];
24
- read_only_sources: true;
25
- citation_required: boolean;
26
- raw_source_bytes_stored_in_open_knowledge: false;
27
- }
28
-
29
- export interface SourceProvenanceInput {
30
- source_ref?: string | null;
31
- source_uri?: string | null;
32
- source_kind?: string | null;
33
- source_revision_id?: string | null;
34
- revision?: string | null;
35
- hash?: string | null;
36
- chunk_id?: string | null;
37
- start_offset?: number | null;
38
- end_offset?: number | null;
39
- status?: string | null;
40
- resolver?: string | null;
41
- }
42
-
43
- export function isStaleStatus(status: string | null | undefined): boolean {
44
- return ['deleted', 'stale', 'invalidated', 'reindex_required'].includes((status ?? '').toLowerCase());
45
- }
46
-
47
- export function sourceProvenance(input: SourceProvenanceInput): KnowledgeProvenance {
48
- const status = input.status ?? null;
49
- return {
50
- source_owner: 'open-files',
51
- source_ref: input.source_ref ?? null,
52
- source_uri: input.source_uri ?? null,
53
- source_kind: input.source_kind ?? null,
54
- source_revision_id: input.source_revision_id ?? null,
55
- revision: input.revision ?? null,
56
- hash: input.hash ?? null,
57
- chunk_id: input.chunk_id ?? null,
58
- start_offset: input.start_offset ?? null,
59
- end_offset: input.end_offset ?? null,
60
- status,
61
- read_only: true,
62
- citation_required: true,
63
- resolver: input.resolver ?? null,
64
- stale: isStaleStatus(status),
65
- };
66
- }
67
-
68
- export function generatedArtifactProvenance(input: {
69
- generated_from: string;
70
- artifact_key: string;
71
- source_refs?: string[];
72
- citation_required?: boolean;
73
- }): GeneratedArtifactProvenance {
74
- return {
75
- source_owner: 'open-files',
76
- generated_from: input.generated_from,
77
- artifact_key: input.artifact_key,
78
- source_refs: input.source_refs ?? [],
79
- read_only_sources: true,
80
- citation_required: input.citation_required ?? true,
81
- raw_source_bytes_stored_in_open_knowledge: false,
82
- };
83
- }
84
-
85
- export function withProvenance<T extends Record<string, unknown>>(
86
- metadata: T,
87
- provenance: KnowledgeProvenance | GeneratedArtifactProvenance,
88
- ): T & { provenance: KnowledgeProvenance | GeneratedArtifactProvenance } {
89
- return {
90
- ...metadata,
91
- provenance,
92
- };
93
- }