@hasna/knowledge 0.2.5 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +32 -0
- package/bin/open-knowledge-mcp.js +14 -1
- package/bin/open-knowledge.js +66 -16
- package/package.json +1 -1
- package/src/cli.ts +157 -5
- package/src/knowledge-db.ts +41 -1
- package/src/manifest-ingest.ts +58 -9
- package/src/outbox-consume.ts +454 -0
- package/src/safety.ts +265 -0
- package/src/workspace.ts +26 -0
package/src/cli.ts
CHANGED
|
@@ -6,10 +6,12 @@
|
|
|
6
6
|
*/
|
|
7
7
|
import { defaultStorePath, loadStore, saveStore, withLock, makeId, makeShortId, ensureStore, type KnowledgeItem } from './store';
|
|
8
8
|
import { ensureKnowledgeWorkspace, readKnowledgeConfig, resolveScopedWorkspace } from './workspace';
|
|
9
|
-
import { getKnowledgeDbStats, migrateKnowledgeDb } from './knowledge-db';
|
|
9
|
+
import { getKnowledgeDbStats, migrateKnowledgeDb, openKnowledgeDb } from './knowledge-db';
|
|
10
10
|
import { createArtifactStore } from './artifact-store';
|
|
11
11
|
import { initializeWikiLayout } from './wiki-layout';
|
|
12
12
|
import { ingestOpenFilesManifest } from './manifest-ingest';
|
|
13
|
+
import { consumeOpenFilesOutbox } from './outbox-consume';
|
|
14
|
+
import { approvalStatus, assertS3ReadAllowed, assertWebSearchAllowed, createApprovalGate, recordAuditEvent, recordRedactionFindings, redactSecrets, resolveSafetyPolicy } from './safety';
|
|
13
15
|
import pkg from '../package.json' with { type: 'json' };
|
|
14
16
|
|
|
15
17
|
type LogLevel = 'debug' | 'info' | 'warn' | 'error';
|
|
@@ -60,7 +62,7 @@ interface ParseResult {
|
|
|
60
62
|
flags: Flags;
|
|
61
63
|
}
|
|
62
64
|
|
|
63
|
-
const COMMANDS = ['add', 'list', 'get', 'delete', 'update', 'archive', 'restore', 'upsert', 'untag', 'export', 'prune', 'dedupe', 'stats', 'paths', 'db', 'wiki', 'ingest', 'help'];
|
|
65
|
+
const COMMANDS = ['add', 'list', 'get', 'delete', 'update', 'archive', 'restore', 'upsert', 'untag', 'export', 'prune', 'dedupe', 'stats', 'paths', 'db', 'wiki', 'ingest', 'reindex', 'safety', 'help'];
|
|
64
66
|
const COMMAND_ALIASES: Record<string, string> = {
|
|
65
67
|
ls: 'list',
|
|
66
68
|
rm: 'delete',
|
|
@@ -164,6 +166,8 @@ Commands:
|
|
|
164
166
|
db init|stats Initialize or inspect local knowledge.db
|
|
165
167
|
wiki init Initialize scalable wiki/schema/index/log artifacts
|
|
166
168
|
ingest manifest <file|s3://> Ingest an open-files manifest into knowledge.db
|
|
169
|
+
reindex outbox <file|s3://> Consume open-files change events and invalidate chunks
|
|
170
|
+
safety status|check|approve|audit|redact
|
|
167
171
|
help [command] Show help
|
|
168
172
|
|
|
169
173
|
Global Options:
|
|
@@ -226,6 +230,8 @@ function printCommandHelp(command: string): void {
|
|
|
226
230
|
if (command === 'db') { console.log('Usage: open-knowledge db init|stats [--scope local|global|project] [--json]'); return; }
|
|
227
231
|
if (command === 'wiki') { console.log('Usage: open-knowledge wiki init [--scope local|global|project] [--json]'); return; }
|
|
228
232
|
if (command === 'ingest') { console.log('Usage: open-knowledge ingest manifest <file|s3://bucket/key> [--scope local|global|project] [--json]'); return; }
|
|
233
|
+
if (command === 'reindex') { console.log('Usage: open-knowledge reindex outbox <file|s3://bucket/key> [--scope local|global|project] [--json]'); return; }
|
|
234
|
+
if (command === 'safety') { console.log('Usage: open-knowledge safety status|check|approve|audit|redact [args] [--scope local|global|project] [--json]'); return; }
|
|
229
235
|
printGlobalHelp();
|
|
230
236
|
}
|
|
231
237
|
|
|
@@ -270,11 +276,11 @@ async function run(argv: string[]): Promise<void> {
|
|
|
270
276
|
if (flags.completions) {
|
|
271
277
|
const shell = flags.completions;
|
|
272
278
|
if (shell === 'bash') {
|
|
273
|
-
console.log(`_open_knowledge() { local cur; cur="${"$"}{COMP_WORDS[COMP_CWORD]}"; COMPREPLY=($(compgen -W "add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki ingest help ls rm edit unarchive --json --yes --help --version --desc --page --limit --search --sort --id --store --title --content --url --tag --format --completions --no-color --scope --archived --include-archived" -- "$cur")); }; complete -F _open_knowledge open-knowledge`);
|
|
279
|
+
console.log(`_open_knowledge() { local cur; cur="${"$"}{COMP_WORDS[COMP_CWORD]}"; COMPREPLY=($(compgen -W "add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki ingest reindex safety help ls rm edit unarchive --json --yes --help --version --desc --page --limit --search --sort --id --store --title --content --url --tag --format --completions --no-color --scope --archived --include-archived" -- "$cur")); }; complete -F _open_knowledge open-knowledge`);
|
|
274
280
|
} else if (shell === 'zsh') {
|
|
275
|
-
console.log(`#compdef open-knowledge\n_open_knowledge() { _arguments -C "1: :(add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki ingest help ls rm edit unarchive)" "(--json)--json" "(--yes)-y" "(--help)--help" "(--version)--version" "(--desc)--desc" "(--archived)--archived" "(--include-archived)--include-archived" "(-p --page)"{-p,--page}"[page number]:number:" "(-l --limit)"{-l,--limit}"[items per page]:number:" "(-s --search)"{-s,--search}"[search text]:text:" "(--sort)--sort"\{created,title\}:" "(--id)--id[item id]:id:" "(--store)--store[store path]:path:" "(--title)--title[new title]:" "(--content)--content[new content]:" "(--url)--url[source url]:" "(-t --tag)"{-t,--tag}"[tag]:tag:" "(--format)--format[json|jsonl]:" "(--completions)--completions[output completions]:shell:(bash zsh fish):" "(--no-color)--no-color[disable color]" "(--scope)--scope"\{local,global,project\}:" }; _open_knowledge`);
|
|
281
|
+
console.log(`#compdef open-knowledge\n_open_knowledge() { _arguments -C "1: :(add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki ingest reindex safety help ls rm edit unarchive)" "(--json)--json" "(--yes)-y" "(--help)--help" "(--version)--version" "(--desc)--desc" "(--archived)--archived" "(--include-archived)--include-archived" "(-p --page)"{-p,--page}"[page number]:number:" "(-l --limit)"{-l,--limit}"[items per page]:number:" "(-s --search)"{-s,--search}"[search text]:text:" "(--sort)--sort"\{created,title\}:" "(--id)--id[item id]:id:" "(--store)--store[store path]:path:" "(--title)--title[new title]:" "(--content)--content[new content]:" "(--url)--url[source url]:" "(-t --tag)"{-t,--tag}"[tag]:tag:" "(--format)--format[json|jsonl]:" "(--completions)--completions[output completions]:shell:(bash zsh fish):" "(--no-color)--no-color[disable color]" "(--scope)--scope"\{local,global,project\}:" }; _open_knowledge`);
|
|
276
282
|
} else if (shell === 'fish') {
|
|
277
|
-
console.log(`complete -c open-knowledge -f; complete -c open-knowledge -a "add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki ingest help ls rm edit unarchive"; complete -c open-knowledge -l json; complete -c open-knowledge -l yes -s y; complete -c open-knowledge -l help -s h; complete -c open-knowledge -l version -s v; complete -c open-knowledge -l desc; complete -c open-knowledge -l archived; complete -c open-knowledge -l include-archived; complete -c open-knowledge -s p -l page; complete -c open-knowledge -s l -l limit; complete -c open-knowledge -s s -l search; complete -c open-knowledge -l sort; complete -c open-knowledge -l id; complete -c open-knowledge -l store; complete -c open-knowledge -l title; complete -c open-knowledge -l content; complete -c open-knowledge -l url; complete -c open-knowledge -s t -l tag; complete -c open-knowledge -l format; complete -c open-knowledge -l completions; complete -c open-knowledge -l no-color; complete -c open-knowledge -l scope -a "local global project"`);
|
|
283
|
+
console.log(`complete -c open-knowledge -f; complete -c open-knowledge -a "add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki ingest reindex safety help ls rm edit unarchive"; complete -c open-knowledge -l json; complete -c open-knowledge -l yes -s y; complete -c open-knowledge -l help -s h; complete -c open-knowledge -l version -s v; complete -c open-knowledge -l desc; complete -c open-knowledge -l archived; complete -c open-knowledge -l include-archived; complete -c open-knowledge -s p -l page; complete -c open-knowledge -s l -l limit; complete -c open-knowledge -s s -l search; complete -c open-knowledge -l sort; complete -c open-knowledge -l id; complete -c open-knowledge -l store; complete -c open-knowledge -l title; complete -c open-knowledge -l content; complete -c open-knowledge -l url; complete -c open-knowledge -s t -l tag; complete -c open-knowledge -l format; complete -c open-knowledge -l completions; complete -c open-knowledge -l no-color; complete -c open-knowledge -l scope -a "local global project"`);
|
|
278
284
|
} else {
|
|
279
285
|
throw new Error("Invalid --completions value. Use 'bash', 'zsh', or 'fish'.");
|
|
280
286
|
}
|
|
@@ -344,6 +350,132 @@ async function run(argv: string[]): Promise<void> {
|
|
|
344
350
|
return;
|
|
345
351
|
}
|
|
346
352
|
|
|
353
|
+
if (command === 'safety') {
|
|
354
|
+
const action = positional[1] ?? 'status';
|
|
355
|
+
const resolvedWorkspace = ensureKnowledgeWorkspace(workspace.home);
|
|
356
|
+
const config = readKnowledgeConfig(resolvedWorkspace.configPath);
|
|
357
|
+
const policy = resolveSafetyPolicy(config, resolvedWorkspace);
|
|
358
|
+
migrateKnowledgeDb(resolvedWorkspace.knowledgeDbPath);
|
|
359
|
+
const db = openKnowledgeDb(resolvedWorkspace.knowledgeDbPath);
|
|
360
|
+
try {
|
|
361
|
+
if (action === 'status') {
|
|
362
|
+
output({
|
|
363
|
+
ok: true,
|
|
364
|
+
mode: policy.mode,
|
|
365
|
+
workspace: resolvedWorkspace.home,
|
|
366
|
+
allow_write_roots: policy.allowWriteRoots,
|
|
367
|
+
read_only_source_access: policy.readOnlySourceAccess,
|
|
368
|
+
network: policy.network,
|
|
369
|
+
redaction: policy.redaction,
|
|
370
|
+
approvals: policy.approvals,
|
|
371
|
+
message: `Safety policy: ${policy.mode}`,
|
|
372
|
+
}, flags.json);
|
|
373
|
+
return;
|
|
374
|
+
}
|
|
375
|
+
if (action === 'check') {
|
|
376
|
+
const checkAction = positional[2] ?? 'generated_write';
|
|
377
|
+
const target = positional[3] ?? null;
|
|
378
|
+
let decision: ReturnType<typeof approvalStatus> | { action: string; target_uri: string | null; approval_required: false; approved: boolean; decision: string };
|
|
379
|
+
try {
|
|
380
|
+
if (checkAction === 'web_search') {
|
|
381
|
+
assertWebSearchAllowed(policy);
|
|
382
|
+
decision = { action: checkAction, target_uri: target, approval_required: false, approved: true, decision: 'allow' };
|
|
383
|
+
} else if (checkAction === 's3_read') {
|
|
384
|
+
if (!target) throw new Error('safety check s3_read requires an s3:// target.');
|
|
385
|
+
assertS3ReadAllowed(target, policy);
|
|
386
|
+
decision = { action: checkAction, target_uri: target, approval_required: false, approved: true, decision: 'allow' };
|
|
387
|
+
} else {
|
|
388
|
+
decision = approvalStatus(db, policy, checkAction, target);
|
|
389
|
+
}
|
|
390
|
+
recordAuditEvent(db, {
|
|
391
|
+
event_type: 'safety_check',
|
|
392
|
+
action: checkAction,
|
|
393
|
+
target_uri: target,
|
|
394
|
+
decision: decision.decision === 'allow' ? 'allow' : 'requires_approval',
|
|
395
|
+
metadata: decision,
|
|
396
|
+
});
|
|
397
|
+
output({ ok: true, ...decision, message: `Safety check ${decision.decision}` }, flags.json);
|
|
398
|
+
return;
|
|
399
|
+
} catch (error) {
|
|
400
|
+
recordAuditEvent(db, {
|
|
401
|
+
event_type: 'safety_check',
|
|
402
|
+
action: checkAction,
|
|
403
|
+
target_uri: target,
|
|
404
|
+
decision: 'deny',
|
|
405
|
+
metadata: { error: error instanceof Error ? error.message : String(error) },
|
|
406
|
+
});
|
|
407
|
+
throw error;
|
|
408
|
+
}
|
|
409
|
+
}
|
|
410
|
+
if (action === 'approve') {
|
|
411
|
+
const approveAction = positional[2] ?? 'generated_write';
|
|
412
|
+
const target = positional[3] ?? null;
|
|
413
|
+
const approval = createApprovalGate(db, {
|
|
414
|
+
action: approveAction,
|
|
415
|
+
target_uri: target,
|
|
416
|
+
reason: 'local-cli approval',
|
|
417
|
+
metadata: { scope: flags.scope ?? 'global' },
|
|
418
|
+
});
|
|
419
|
+
recordAuditEvent(db, {
|
|
420
|
+
event_type: 'approval',
|
|
421
|
+
action: approveAction,
|
|
422
|
+
target_uri: target,
|
|
423
|
+
decision: 'allow',
|
|
424
|
+
metadata: { approval_id: approval.id },
|
|
425
|
+
});
|
|
426
|
+
output({ ok: true, ...approval, action: approveAction, target_uri: target, message: `Approved ${approveAction}` }, flags.json);
|
|
427
|
+
return;
|
|
428
|
+
}
|
|
429
|
+
if (action === 'audit') {
|
|
430
|
+
const rows = db.query<{
|
|
431
|
+
id: string;
|
|
432
|
+
event_type: string;
|
|
433
|
+
action: string;
|
|
434
|
+
target_uri: string | null;
|
|
435
|
+
decision: string;
|
|
436
|
+
metadata_json: string;
|
|
437
|
+
created_at: string;
|
|
438
|
+
}, []>(
|
|
439
|
+
'SELECT id, event_type, action, target_uri, decision, metadata_json, created_at FROM audit_events ORDER BY created_at DESC LIMIT 50',
|
|
440
|
+
).all().map((row) => ({
|
|
441
|
+
id: row.id,
|
|
442
|
+
event_type: row.event_type,
|
|
443
|
+
action: row.action,
|
|
444
|
+
target_uri: row.target_uri,
|
|
445
|
+
decision: row.decision,
|
|
446
|
+
metadata: JSON.parse(row.metadata_json),
|
|
447
|
+
created_at: row.created_at,
|
|
448
|
+
}));
|
|
449
|
+
output({ ok: true, events: rows, message: `${rows.length} audit event(s)` }, flags.json);
|
|
450
|
+
return;
|
|
451
|
+
}
|
|
452
|
+
if (action === 'redact') {
|
|
453
|
+
const text = positional.slice(2).join(' ');
|
|
454
|
+
if (!text) throw new Error('Usage: open-knowledge safety redact <text>');
|
|
455
|
+
const result = redactSecrets(text, policy);
|
|
456
|
+
if (result.findings.length > 0) {
|
|
457
|
+
recordRedactionFindings(db, {
|
|
458
|
+
source_uri: 'safety://redact',
|
|
459
|
+
findings: result.findings,
|
|
460
|
+
metadata: { command: 'safety redact' },
|
|
461
|
+
});
|
|
462
|
+
}
|
|
463
|
+
recordAuditEvent(db, {
|
|
464
|
+
event_type: 'redaction',
|
|
465
|
+
action: 'safety_redact',
|
|
466
|
+
target_uri: 'safety://redact',
|
|
467
|
+
decision: result.findings.length > 0 ? 'redacted' : 'allow',
|
|
468
|
+
metadata: { findings: result.findings.length },
|
|
469
|
+
});
|
|
470
|
+
output({ ok: true, text: result.text, findings: result.findings, message: `Redacted ${result.findings.length} finding(s)` }, flags.json);
|
|
471
|
+
return;
|
|
472
|
+
}
|
|
473
|
+
throw new Error("Invalid safety action. Use 'status', 'check', 'approve', 'audit', or 'redact'.");
|
|
474
|
+
} finally {
|
|
475
|
+
db.close();
|
|
476
|
+
}
|
|
477
|
+
}
|
|
478
|
+
|
|
347
479
|
if (command === 'ingest') {
|
|
348
480
|
const action = positional[1] ?? '';
|
|
349
481
|
if (action !== 'manifest') throw new Error("Invalid ingest action. Use 'manifest'.");
|
|
@@ -351,15 +483,35 @@ async function run(argv: string[]): Promise<void> {
|
|
|
351
483
|
if (!input) throw new Error('Usage: open-knowledge ingest manifest <file|s3://bucket/key>');
|
|
352
484
|
const resolvedWorkspace = ensureKnowledgeWorkspace(workspace.home);
|
|
353
485
|
const config = readKnowledgeConfig(resolvedWorkspace.configPath);
|
|
486
|
+
const safetyPolicy = resolveSafetyPolicy(config, resolvedWorkspace);
|
|
354
487
|
const result = await ingestOpenFilesManifest({
|
|
355
488
|
dbPath: resolvedWorkspace.knowledgeDbPath,
|
|
356
489
|
input,
|
|
357
490
|
config,
|
|
491
|
+
safetyPolicy,
|
|
358
492
|
});
|
|
359
493
|
output({ ok: true, ...result, message: `Ingested ${result.items_seen} manifest item(s)` }, flags.json);
|
|
360
494
|
return;
|
|
361
495
|
}
|
|
362
496
|
|
|
497
|
+
if (command === 'reindex') {
|
|
498
|
+
const action = positional[1] ?? '';
|
|
499
|
+
if (action !== 'outbox') throw new Error("Invalid reindex action. Use 'outbox'.");
|
|
500
|
+
const input = positional[2];
|
|
501
|
+
if (!input) throw new Error('Usage: open-knowledge reindex outbox <file|s3://bucket/key>');
|
|
502
|
+
const resolvedWorkspace = ensureKnowledgeWorkspace(workspace.home);
|
|
503
|
+
const config = readKnowledgeConfig(resolvedWorkspace.configPath);
|
|
504
|
+
const safetyPolicy = resolveSafetyPolicy(config, resolvedWorkspace);
|
|
505
|
+
const result = await consumeOpenFilesOutbox({
|
|
506
|
+
dbPath: resolvedWorkspace.knowledgeDbPath,
|
|
507
|
+
input,
|
|
508
|
+
config,
|
|
509
|
+
safetyPolicy,
|
|
510
|
+
});
|
|
511
|
+
output({ ok: true, ...result, message: `Consumed ${result.events_seen} outbox event(s)` }, flags.json);
|
|
512
|
+
return;
|
|
513
|
+
}
|
|
514
|
+
|
|
363
515
|
ensureStore(storePath);
|
|
364
516
|
|
|
365
517
|
if (command === 'add') {
|
package/src/knowledge-db.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { Database } from 'bun:sqlite';
|
|
2
2
|
import { ensureParentDir } from './workspace';
|
|
3
3
|
|
|
4
|
-
export const CURRENT_SCHEMA_VERSION =
|
|
4
|
+
export const CURRENT_SCHEMA_VERSION = 3;
|
|
5
5
|
|
|
6
6
|
export interface KnowledgeDbStats {
|
|
7
7
|
schema_version: number;
|
|
@@ -13,6 +13,9 @@ export interface KnowledgeDbStats {
|
|
|
13
13
|
indexes: number;
|
|
14
14
|
runs: number;
|
|
15
15
|
run_events: number;
|
|
16
|
+
redaction_findings: number;
|
|
17
|
+
audit_events: number;
|
|
18
|
+
approval_gates: number;
|
|
16
19
|
}
|
|
17
20
|
|
|
18
21
|
const MIGRATION_1 = `
|
|
@@ -199,6 +202,39 @@ INSERT OR IGNORE INTO schema_versions(version, applied_at)
|
|
|
199
202
|
VALUES (2, datetime('now'));
|
|
200
203
|
`;
|
|
201
204
|
|
|
205
|
+
const MIGRATION_3 = `
|
|
206
|
+
CREATE TABLE IF NOT EXISTS audit_events (
|
|
207
|
+
id TEXT PRIMARY KEY,
|
|
208
|
+
event_type TEXT NOT NULL,
|
|
209
|
+
action TEXT NOT NULL,
|
|
210
|
+
target_uri TEXT,
|
|
211
|
+
decision TEXT NOT NULL,
|
|
212
|
+
metadata_json TEXT NOT NULL DEFAULT '{}',
|
|
213
|
+
created_at TEXT NOT NULL
|
|
214
|
+
);
|
|
215
|
+
|
|
216
|
+
CREATE TABLE IF NOT EXISTS approval_gates (
|
|
217
|
+
id TEXT PRIMARY KEY,
|
|
218
|
+
action TEXT NOT NULL,
|
|
219
|
+
target_uri TEXT,
|
|
220
|
+
status TEXT NOT NULL,
|
|
221
|
+
reason TEXT,
|
|
222
|
+
approved_by TEXT,
|
|
223
|
+
metadata_json TEXT NOT NULL DEFAULT '{}',
|
|
224
|
+
created_at TEXT NOT NULL,
|
|
225
|
+
updated_at TEXT NOT NULL
|
|
226
|
+
);
|
|
227
|
+
|
|
228
|
+
CREATE INDEX IF NOT EXISTS idx_audit_events_action ON audit_events(action);
|
|
229
|
+
CREATE INDEX IF NOT EXISTS idx_audit_events_target ON audit_events(target_uri);
|
|
230
|
+
CREATE INDEX IF NOT EXISTS idx_audit_events_created ON audit_events(created_at);
|
|
231
|
+
CREATE INDEX IF NOT EXISTS idx_approval_gates_action ON approval_gates(action);
|
|
232
|
+
CREATE INDEX IF NOT EXISTS idx_approval_gates_status ON approval_gates(status);
|
|
233
|
+
|
|
234
|
+
INSERT OR IGNORE INTO schema_versions(version, applied_at)
|
|
235
|
+
VALUES (3, datetime('now'));
|
|
236
|
+
`;
|
|
237
|
+
|
|
202
238
|
export function openKnowledgeDb(path: string): Database {
|
|
203
239
|
ensureParentDir(path);
|
|
204
240
|
const db = new Database(path);
|
|
@@ -211,6 +247,7 @@ export function migrateKnowledgeDb(path: string): { path: string; schema_version
|
|
|
211
247
|
try {
|
|
212
248
|
db.exec(MIGRATION_1);
|
|
213
249
|
if (getSchemaVersion(db) < 2) db.exec(MIGRATION_2);
|
|
250
|
+
if (getSchemaVersion(db) < 3) db.exec(MIGRATION_3);
|
|
214
251
|
return { path, schema_version: getSchemaVersion(db) };
|
|
215
252
|
} finally {
|
|
216
253
|
db.close();
|
|
@@ -240,6 +277,9 @@ export function getKnowledgeDbStats(path: string): KnowledgeDbStats {
|
|
|
240
277
|
indexes: count(db, 'knowledge_indexes'),
|
|
241
278
|
runs: count(db, 'runs'),
|
|
242
279
|
run_events: count(db, 'run_events'),
|
|
280
|
+
redaction_findings: count(db, 'redaction_findings'),
|
|
281
|
+
audit_events: count(db, 'audit_events'),
|
|
282
|
+
approval_gates: count(db, 'approval_gates'),
|
|
243
283
|
};
|
|
244
284
|
} finally {
|
|
245
285
|
db.close();
|
package/src/manifest-ingest.ts
CHANGED
|
@@ -5,11 +5,20 @@ import type { Database } from 'bun:sqlite';
|
|
|
5
5
|
import { migrateKnowledgeDb, openKnowledgeDb } from './knowledge-db';
|
|
6
6
|
import { parseSourceRef, type SourceRef } from './source-ref';
|
|
7
7
|
import type { KnowledgeConfig } from './workspace';
|
|
8
|
+
import {
|
|
9
|
+
assertS3ReadAllowed,
|
|
10
|
+
assertWriteAllowed,
|
|
11
|
+
recordAuditEvent,
|
|
12
|
+
recordRedactionFindings,
|
|
13
|
+
redactSecrets,
|
|
14
|
+
type SafetyPolicy,
|
|
15
|
+
} from './safety';
|
|
8
16
|
|
|
9
17
|
export interface ManifestIngestOptions {
|
|
10
18
|
dbPath: string;
|
|
11
19
|
input: string;
|
|
12
20
|
config?: KnowledgeConfig;
|
|
21
|
+
safetyPolicy?: SafetyPolicy;
|
|
13
22
|
now?: Date;
|
|
14
23
|
maxChunkChars?: number;
|
|
15
24
|
chunkOverlapChars?: number;
|
|
@@ -23,6 +32,7 @@ export interface ManifestIngestResult {
|
|
|
23
32
|
revisions_upserted: number;
|
|
24
33
|
chunks_inserted: number;
|
|
25
34
|
chunks_deleted: number;
|
|
35
|
+
redactions: number;
|
|
26
36
|
skipped: number;
|
|
27
37
|
}
|
|
28
38
|
|
|
@@ -209,11 +219,12 @@ function parseManifestText(text: string): ManifestObject[] {
|
|
|
209
219
|
});
|
|
210
220
|
}
|
|
211
221
|
|
|
212
|
-
async function readS3Text(uri: string, config?: KnowledgeConfig): Promise<string> {
|
|
222
|
+
async function readS3Text(uri: string, config?: KnowledgeConfig, safetyPolicy?: SafetyPolicy): Promise<string> {
|
|
213
223
|
const parsed = new URL(uri);
|
|
214
224
|
const bucket = parsed.hostname;
|
|
215
225
|
const key = decodeURIComponent(parsed.pathname.replace(/^\/+/, ''));
|
|
216
226
|
if (!bucket || !key) throw new Error(`Invalid S3 manifest URI: ${uri}`);
|
|
227
|
+
if (safetyPolicy) assertS3ReadAllowed(uri, safetyPolicy);
|
|
217
228
|
const [{ S3Client, GetObjectCommand }, { fromIni }] = await Promise.all([
|
|
218
229
|
import('@aws-sdk/client-s3'),
|
|
219
230
|
import('@aws-sdk/credential-providers'),
|
|
@@ -229,8 +240,8 @@ async function readS3Text(uri: string, config?: KnowledgeConfig): Promise<string
|
|
|
229
240
|
return await response.Body.transformToString();
|
|
230
241
|
}
|
|
231
242
|
|
|
232
|
-
async function readManifestInput(input: string, config?: KnowledgeConfig): Promise<string> {
|
|
233
|
-
if (input.startsWith('s3://')) return readS3Text(input, config);
|
|
243
|
+
async function readManifestInput(input: string, config?: KnowledgeConfig, safetyPolicy?: SafetyPolicy): Promise<string> {
|
|
244
|
+
if (input.startsWith('s3://')) return readS3Text(input, config, safetyPolicy);
|
|
234
245
|
if (!existsSync(input)) throw new Error(`Manifest not found: ${input}`);
|
|
235
246
|
return readFileSync(input, 'utf8');
|
|
236
247
|
}
|
|
@@ -338,9 +349,26 @@ function upsertRevision(db: Database, sourceId: string, item: NormalizedManifest
|
|
|
338
349
|
return row.id;
|
|
339
350
|
}
|
|
340
351
|
|
|
341
|
-
function insertChunks(db: Database, sourceRevisionId: string, item: NormalizedManifestItem, now: string, maxChars: number, overlapChars: number): number {
|
|
342
|
-
if (!item.text || item.status.toLowerCase() === 'deleted') return 0;
|
|
343
|
-
const
|
|
352
|
+
function insertChunks(db: Database, sourceRevisionId: string, item: NormalizedManifestItem, now: string, maxChars: number, overlapChars: number, safetyPolicy?: SafetyPolicy): { chunksInserted: number; redactions: number } {
|
|
353
|
+
if (!item.text || item.status.toLowerCase() === 'deleted') return { chunksInserted: 0, redactions: 0 };
|
|
354
|
+
const redacted = redactSecrets(item.text, safetyPolicy);
|
|
355
|
+
if (redacted.findings.length > 0) {
|
|
356
|
+
recordRedactionFindings(db, {
|
|
357
|
+
source_uri: item.sourceUri,
|
|
358
|
+
findings: redacted.findings,
|
|
359
|
+
metadata: { source_ref: item.sourceRef, revision: item.revision },
|
|
360
|
+
created_at: now,
|
|
361
|
+
});
|
|
362
|
+
recordAuditEvent(db, {
|
|
363
|
+
event_type: 'redaction',
|
|
364
|
+
action: 'source_text_redact',
|
|
365
|
+
target_uri: item.sourceUri,
|
|
366
|
+
decision: 'redacted',
|
|
367
|
+
metadata: { findings: redacted.findings.length, source_ref: item.sourceRef, revision: item.revision },
|
|
368
|
+
created_at: now,
|
|
369
|
+
});
|
|
370
|
+
}
|
|
371
|
+
const chunks = chunkText(redacted.text, maxChars, overlapChars);
|
|
344
372
|
for (const chunk of chunks) {
|
|
345
373
|
const chunkId = stableId('chk', `${sourceRevisionId}\u0000${chunk.ordinal}\u0000${chunk.text}`);
|
|
346
374
|
const metadata = {
|
|
@@ -373,7 +401,7 @@ function insertChunks(db: Database, sourceRevisionId: string, item: NormalizedMa
|
|
|
373
401
|
[chunkId, chunk.text, item.title ?? '', item.sourceUri],
|
|
374
402
|
);
|
|
375
403
|
}
|
|
376
|
-
return chunks.length;
|
|
404
|
+
return { chunksInserted: chunks.length, redactions: redacted.findings.length };
|
|
377
405
|
}
|
|
378
406
|
|
|
379
407
|
export async function ingestOpenFilesManifest(options: ManifestIngestOptions): Promise<ManifestIngestResult> {
|
|
@@ -383,8 +411,9 @@ export async function ingestOpenFilesManifest(options: ManifestIngestOptions): P
|
|
|
383
411
|
if (maxChunkChars < 500) throw new Error('maxChunkChars must be at least 500.');
|
|
384
412
|
if (chunkOverlapChars < 0 || chunkOverlapChars >= maxChunkChars) throw new Error('chunkOverlapChars must be less than maxChunkChars.');
|
|
385
413
|
|
|
414
|
+
if (options.safetyPolicy) assertWriteAllowed(options.dbPath, options.safetyPolicy);
|
|
386
415
|
migrateKnowledgeDb(options.dbPath);
|
|
387
|
-
const text = await readManifestInput(options.input, options.config);
|
|
416
|
+
const text = await readManifestInput(options.input, options.config, options.safetyPolicy);
|
|
388
417
|
const items = parseManifestText(text);
|
|
389
418
|
const db = openKnowledgeDb(options.dbPath);
|
|
390
419
|
try {
|
|
@@ -393,7 +422,16 @@ export async function ingestOpenFilesManifest(options: ManifestIngestOptions): P
|
|
|
393
422
|
const seenRevisions = new Set<string>();
|
|
394
423
|
let chunksInserted = 0;
|
|
395
424
|
let chunksDeleted = 0;
|
|
425
|
+
let redactions = 0;
|
|
396
426
|
let skipped = 0;
|
|
427
|
+
recordAuditEvent(db, {
|
|
428
|
+
event_type: 'source_read',
|
|
429
|
+
action: options.input.startsWith('s3://') ? 's3_manifest_read' : 'local_manifest_read',
|
|
430
|
+
target_uri: options.input,
|
|
431
|
+
decision: 'allow',
|
|
432
|
+
metadata: { items: items.length, read_only: true },
|
|
433
|
+
created_at: now,
|
|
434
|
+
});
|
|
397
435
|
for (const raw of items) {
|
|
398
436
|
const item = normalizeManifestItem(raw, now);
|
|
399
437
|
const sourceId = upsertSource(db, item, now);
|
|
@@ -403,8 +441,18 @@ export async function ingestOpenFilesManifest(options: ManifestIngestOptions): P
|
|
|
403
441
|
if (item.text || item.status.toLowerCase() === 'deleted') {
|
|
404
442
|
chunksDeleted += deleteChunksForRevision(db, revisionId);
|
|
405
443
|
}
|
|
406
|
-
|
|
444
|
+
const inserted = insertChunks(db, revisionId, item, now, maxChunkChars, chunkOverlapChars, options.safetyPolicy);
|
|
445
|
+
chunksInserted += inserted.chunksInserted;
|
|
446
|
+
redactions += inserted.redactions;
|
|
407
447
|
}
|
|
448
|
+
recordAuditEvent(db, {
|
|
449
|
+
event_type: 'write',
|
|
450
|
+
action: 'knowledge_manifest_ingest',
|
|
451
|
+
target_uri: options.dbPath,
|
|
452
|
+
decision: 'allow',
|
|
453
|
+
metadata: { items: items.length, sources: seenSources.size, revisions: seenRevisions.size, chunks_inserted: chunksInserted, redactions },
|
|
454
|
+
created_at: now,
|
|
455
|
+
});
|
|
408
456
|
return {
|
|
409
457
|
path: options.input,
|
|
410
458
|
db_path: options.dbPath,
|
|
@@ -413,6 +461,7 @@ export async function ingestOpenFilesManifest(options: ManifestIngestOptions): P
|
|
|
413
461
|
revisions_upserted: seenRevisions.size,
|
|
414
462
|
chunks_inserted: chunksInserted,
|
|
415
463
|
chunks_deleted: chunksDeleted,
|
|
464
|
+
redactions,
|
|
416
465
|
skipped,
|
|
417
466
|
};
|
|
418
467
|
})();
|