@hasna/knowledge 0.2.7 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -1
- package/bin/open-knowledge-mcp.js +624 -5
- package/bin/open-knowledge.js +47 -25
- package/docs/architecture/ai-native-knowledge-base.md +24 -0
- package/package.json +1 -1
- package/src/cli.ts +61 -13
- package/src/manifest-ingest.ts +36 -10
- package/src/mcp.js +25 -0
- package/src/source-ingest.ts +268 -0
- package/src/source-ref.ts +12 -0
- package/src/source-resolver.ts +418 -0
package/src/cli.ts
CHANGED
|
@@ -10,7 +10,9 @@ import { getKnowledgeDbStats, migrateKnowledgeDb, openKnowledgeDb } from './know
|
|
|
10
10
|
import { createArtifactStore } from './artifact-store';
|
|
11
11
|
import { initializeWikiLayout } from './wiki-layout';
|
|
12
12
|
import { ingestOpenFilesManifest } from './manifest-ingest';
|
|
13
|
+
import { ingestSourceRef } from './source-ingest';
|
|
13
14
|
import { consumeOpenFilesOutbox } from './outbox-consume';
|
|
15
|
+
import { resolveOpenFilesSource } from './source-resolver';
|
|
14
16
|
import { approvalStatus, assertS3ReadAllowed, assertWebSearchAllowed, createApprovalGate, recordAuditEvent, recordRedactionFindings, redactSecrets, resolveSafetyPolicy } from './safety';
|
|
15
17
|
import pkg from '../package.json' with { type: 'json' };
|
|
16
18
|
|
|
@@ -49,6 +51,7 @@ interface Flags {
|
|
|
49
51
|
tag?: string;
|
|
50
52
|
format?: string;
|
|
51
53
|
completions?: string;
|
|
54
|
+
purpose?: string;
|
|
52
55
|
noColor?: boolean;
|
|
53
56
|
scope?: string;
|
|
54
57
|
olderThan?: number;
|
|
@@ -62,7 +65,7 @@ interface ParseResult {
|
|
|
62
65
|
flags: Flags;
|
|
63
66
|
}
|
|
64
67
|
|
|
65
|
-
const COMMANDS = ['add', 'list', 'get', 'delete', 'update', 'archive', 'restore', 'upsert', 'untag', 'export', 'prune', 'dedupe', 'stats', 'paths', 'db', 'wiki', 'ingest', 'reindex', 'safety', 'help'];
|
|
68
|
+
const COMMANDS = ['add', 'list', 'get', 'delete', 'update', 'archive', 'restore', 'upsert', 'untag', 'export', 'prune', 'dedupe', 'stats', 'paths', 'db', 'wiki', 'source', 'ingest', 'reindex', 'safety', 'help'];
|
|
66
69
|
const COMMAND_ALIASES: Record<string, string> = {
|
|
67
70
|
ls: 'list',
|
|
68
71
|
rm: 'delete',
|
|
@@ -97,6 +100,7 @@ function parseArgs(argv: string[]): ParseResult {
|
|
|
97
100
|
case '--tag': case '-t': flags.tag = argv[i + 1]; i += 1; break;
|
|
98
101
|
case '--format': flags.format = argv[i + 1]; i += 1; break;
|
|
99
102
|
case '--completions': flags.completions = argv[i + 1]; i += 1; break;
|
|
103
|
+
case '--purpose': flags.purpose = argv[i + 1]; i += 1; break;
|
|
100
104
|
case '--no-color': flags.noColor = true; break;
|
|
101
105
|
case '--scope': flags.scope = argv[i + 1]; i += 1; break;
|
|
102
106
|
case '--older-than': flags.olderThan = Number(argv[i + 1]); i += 1; break;
|
|
@@ -165,7 +169,9 @@ Commands:
|
|
|
165
169
|
paths Show resolved workspace/store paths
|
|
166
170
|
db init|stats Initialize or inspect local knowledge.db
|
|
167
171
|
wiki init Initialize scalable wiki/schema/index/log artifacts
|
|
172
|
+
source resolve <source-ref> Resolve read-only source content and citation evidence
|
|
168
173
|
ingest manifest <file|s3://> Ingest an open-files manifest into knowledge.db
|
|
174
|
+
ingest source <source-ref> Ingest a read-only source ref into knowledge.db
|
|
169
175
|
reindex outbox <file|s3://> Consume open-files change events and invalidate chunks
|
|
170
176
|
safety status|check|approve|audit|redact
|
|
171
177
|
help [command] Show help
|
|
@@ -173,6 +179,7 @@ Commands:
|
|
|
173
179
|
Global Options:
|
|
174
180
|
--json Output JSON
|
|
175
181
|
--store <path> Override store path
|
|
182
|
+
--purpose <name> Read-only source purpose (default: knowledge_answer)
|
|
176
183
|
--scope local|global|project Store scope (default: global ~/.hasna/apps/knowledge/)
|
|
177
184
|
--no-color Disable color output
|
|
178
185
|
--completions <shell> Output completions for bash|zsh|fish
|
|
@@ -229,7 +236,8 @@ function printCommandHelp(command: string): void {
|
|
|
229
236
|
if (command === 'paths') { console.log('Usage: open-knowledge paths [--scope local|global|project] [--json]'); return; }
|
|
230
237
|
if (command === 'db') { console.log('Usage: open-knowledge db init|stats [--scope local|global|project] [--json]'); return; }
|
|
231
238
|
if (command === 'wiki') { console.log('Usage: open-knowledge wiki init [--scope local|global|project] [--json]'); return; }
|
|
232
|
-
if (command === '
|
|
239
|
+
if (command === 'source') { console.log('Usage: open-knowledge source resolve <source-ref> [--purpose knowledge_answer|knowledge_index] [--limit <n>] [--scope local|global|project] [--json]'); return; }
|
|
240
|
+
if (command === 'ingest') { console.log('Usage: open-knowledge ingest manifest <file|s3://bucket/key> | source <source-ref> [--purpose knowledge_index] [--scope local|global|project] [--json]'); return; }
|
|
233
241
|
if (command === 'reindex') { console.log('Usage: open-knowledge reindex outbox <file|s3://bucket/key> [--scope local|global|project] [--json]'); return; }
|
|
234
242
|
if (command === 'safety') { console.log('Usage: open-knowledge safety status|check|approve|audit|redact [args] [--scope local|global|project] [--json]'); return; }
|
|
235
243
|
printGlobalHelp();
|
|
@@ -276,11 +284,11 @@ async function run(argv: string[]): Promise<void> {
|
|
|
276
284
|
if (flags.completions) {
|
|
277
285
|
const shell = flags.completions;
|
|
278
286
|
if (shell === 'bash') {
|
|
279
|
-
console.log(`_open_knowledge() { local cur; cur="${"$"}{COMP_WORDS[COMP_CWORD]}"; COMPREPLY=($(compgen -W "add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki ingest reindex safety help ls rm edit unarchive --json --yes --help --version --desc --page --limit --search --sort --id --store --title --content --url --tag --format --completions --no-color --scope --archived --include-archived" -- "$cur")); }; complete -F _open_knowledge open-knowledge`);
|
|
287
|
+
console.log(`_open_knowledge() { local cur; cur="${"$"}{COMP_WORDS[COMP_CWORD]}"; COMPREPLY=($(compgen -W "add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki source ingest reindex safety help ls rm edit unarchive --json --yes --help --version --desc --page --limit --search --sort --id --store --title --content --url --tag --format --completions --purpose --no-color --scope --archived --include-archived" -- "$cur")); }; complete -F _open_knowledge open-knowledge`);
|
|
280
288
|
} else if (shell === 'zsh') {
|
|
281
|
-
console.log(`#compdef open-knowledge\n_open_knowledge() { _arguments -C "1: :(add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki ingest reindex safety help ls rm edit unarchive)" "(--json)--json" "(--yes)-y" "(--help)--help" "(--version)--version" "(--desc)--desc" "(--archived)--archived" "(--include-archived)--include-archived" "(-p --page)"{-p,--page}"[page number]:number:" "(-l --limit)"{-l,--limit}"[items per page]:number:" "(-s --search)"{-s,--search}"[search text]:text:" "(--sort)--sort"\{created,title\}:" "(--id)--id[item id]:id:" "(--store)--store[store path]:path:" "(--title)--title[new title]:" "(--content)--content[new content]:" "(--url)--url[source url]:" "(-t --tag)"{-t,--tag}"[tag]:tag:" "(--format)--format[json|jsonl]:" "(--completions)--completions[output completions]:shell:(bash zsh fish):" "(--no-color)--no-color[disable color]" "(--scope)--scope"\{local,global,project\}:" }; _open_knowledge`);
|
|
289
|
+
console.log(`#compdef open-knowledge\n_open_knowledge() { _arguments -C "1: :(add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki source ingest reindex safety help ls rm edit unarchive)" "(--json)--json" "(--yes)-y" "(--help)--help" "(--version)--version" "(--desc)--desc" "(--archived)--archived" "(--include-archived)--include-archived" "(-p --page)"{-p,--page}"[page number]:number:" "(-l --limit)"{-l,--limit}"[items per page]:number:" "(-s --search)"{-s,--search}"[search text]:text:" "(--sort)--sort"\{created,title\}:" "(--id)--id[item id]:id:" "(--store)--store[store path]:path:" "(--title)--title[new title]:" "(--content)--content[new content]:" "(--url)--url[source url]:" "(-t --tag)"{-t,--tag}"[tag]:tag:" "(--format)--format[json|jsonl]:" "(--completions)--completions[output completions]:shell:(bash zsh fish):" "(--purpose)--purpose[purpose]:" "(--no-color)--no-color[disable color]" "(--scope)--scope"\{local,global,project\}:" }; _open_knowledge`);
|
|
282
290
|
} else if (shell === 'fish') {
|
|
283
|
-
console.log(`complete -c open-knowledge -f; complete -c open-knowledge -a "add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki ingest reindex safety help ls rm edit unarchive"; complete -c open-knowledge -l json; complete -c open-knowledge -l yes -s y; complete -c open-knowledge -l help -s h; complete -c open-knowledge -l version -s v; complete -c open-knowledge -l desc; complete -c open-knowledge -l archived; complete -c open-knowledge -l include-archived; complete -c open-knowledge -s p -l page; complete -c open-knowledge -s l -l limit; complete -c open-knowledge -s s -l search; complete -c open-knowledge -l sort; complete -c open-knowledge -l id; complete -c open-knowledge -l store; complete -c open-knowledge -l title; complete -c open-knowledge -l content; complete -c open-knowledge -l url; complete -c open-knowledge -s t -l tag; complete -c open-knowledge -l format; complete -c open-knowledge -l completions; complete -c open-knowledge -l no-color; complete -c open-knowledge -l scope -a "local global project"`);
|
|
291
|
+
console.log(`complete -c open-knowledge -f; complete -c open-knowledge -a "add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki source ingest reindex safety help ls rm edit unarchive"; complete -c open-knowledge -l json; complete -c open-knowledge -l yes -s y; complete -c open-knowledge -l help -s h; complete -c open-knowledge -l version -s v; complete -c open-knowledge -l desc; complete -c open-knowledge -l archived; complete -c open-knowledge -l include-archived; complete -c open-knowledge -s p -l page; complete -c open-knowledge -s l -l limit; complete -c open-knowledge -s s -l search; complete -c open-knowledge -l sort; complete -c open-knowledge -l id; complete -c open-knowledge -l store; complete -c open-knowledge -l title; complete -c open-knowledge -l content; complete -c open-knowledge -l url; complete -c open-knowledge -s t -l tag; complete -c open-knowledge -l format; complete -c open-knowledge -l completions; complete -c open-knowledge -l purpose; complete -c open-knowledge -l no-color; complete -c open-knowledge -l scope -a "local global project"`);
|
|
284
292
|
} else {
|
|
285
293
|
throw new Error("Invalid --completions value. Use 'bash', 'zsh', or 'fish'.");
|
|
286
294
|
}
|
|
@@ -476,24 +484,64 @@ async function run(argv: string[]): Promise<void> {
|
|
|
476
484
|
}
|
|
477
485
|
}
|
|
478
486
|
|
|
479
|
-
if (command === '
|
|
487
|
+
if (command === 'source') {
|
|
480
488
|
const action = positional[1] ?? '';
|
|
481
|
-
if (action !== '
|
|
482
|
-
const
|
|
483
|
-
if (!
|
|
489
|
+
if (action !== 'resolve') throw new Error("Invalid source action. Use 'resolve'.");
|
|
490
|
+
const sourceRef = positional[2];
|
|
491
|
+
if (!sourceRef) throw new Error('Usage: open-knowledge source resolve <source-ref>');
|
|
484
492
|
const resolvedWorkspace = ensureKnowledgeWorkspace(workspace.home);
|
|
485
493
|
const config = readKnowledgeConfig(resolvedWorkspace.configPath);
|
|
486
494
|
const safetyPolicy = resolveSafetyPolicy(config, resolvedWorkspace);
|
|
487
|
-
const result = await
|
|
495
|
+
const result = await resolveOpenFilesSource({
|
|
488
496
|
dbPath: resolvedWorkspace.knowledgeDbPath,
|
|
489
|
-
|
|
490
|
-
|
|
497
|
+
sourceRef,
|
|
498
|
+
purpose: flags.purpose,
|
|
499
|
+
limit: flags.limit,
|
|
491
500
|
safetyPolicy,
|
|
492
501
|
});
|
|
493
|
-
output({
|
|
502
|
+
output({
|
|
503
|
+
ok: true,
|
|
504
|
+
...result,
|
|
505
|
+
message: result.resolved
|
|
506
|
+
? `Resolved ${result.source_ref} (${result.content.chunks_returned}/${result.content.chunks_total} chunks)`
|
|
507
|
+
: `Source not indexed: ${sourceRef}`,
|
|
508
|
+
}, flags.json);
|
|
494
509
|
return;
|
|
495
510
|
}
|
|
496
511
|
|
|
512
|
+
if (command === 'ingest') {
|
|
513
|
+
const action = positional[1] ?? '';
|
|
514
|
+
const resolvedWorkspace = ensureKnowledgeWorkspace(workspace.home);
|
|
515
|
+
const config = readKnowledgeConfig(resolvedWorkspace.configPath);
|
|
516
|
+
const safetyPolicy = resolveSafetyPolicy(config, resolvedWorkspace);
|
|
517
|
+
if (action === 'manifest') {
|
|
518
|
+
const input = positional[2];
|
|
519
|
+
if (!input) throw new Error('Usage: open-knowledge ingest manifest <file|s3://bucket/key>');
|
|
520
|
+
const result = await ingestOpenFilesManifest({
|
|
521
|
+
dbPath: resolvedWorkspace.knowledgeDbPath,
|
|
522
|
+
input,
|
|
523
|
+
config,
|
|
524
|
+
safetyPolicy,
|
|
525
|
+
});
|
|
526
|
+
output({ ok: true, ...result, message: `Ingested ${result.items_seen} manifest item(s)` }, flags.json);
|
|
527
|
+
return;
|
|
528
|
+
}
|
|
529
|
+
if (action === 'source') {
|
|
530
|
+
const sourceRef = positional[2];
|
|
531
|
+
if (!sourceRef) throw new Error('Usage: open-knowledge ingest source <source-ref>');
|
|
532
|
+
const result = await ingestSourceRef({
|
|
533
|
+
dbPath: resolvedWorkspace.knowledgeDbPath,
|
|
534
|
+
sourceRef,
|
|
535
|
+
purpose: flags.purpose,
|
|
536
|
+
config,
|
|
537
|
+
safetyPolicy,
|
|
538
|
+
});
|
|
539
|
+
output({ ok: true, ...result, message: `Ingested source ${result.source_ref} (${result.chunks_inserted} chunks)` }, flags.json);
|
|
540
|
+
return;
|
|
541
|
+
}
|
|
542
|
+
throw new Error("Invalid ingest action. Use 'manifest' or 'source'.");
|
|
543
|
+
}
|
|
544
|
+
|
|
497
545
|
if (command === 'reindex') {
|
|
498
546
|
const action = positional[1] ?? '';
|
|
499
547
|
if (action !== 'outbox') throw new Error("Invalid reindex action. Use 'outbox'.");
|
package/src/manifest-ingest.ts
CHANGED
|
@@ -24,6 +24,17 @@ export interface ManifestIngestOptions {
|
|
|
24
24
|
chunkOverlapChars?: number;
|
|
25
25
|
}
|
|
26
26
|
|
|
27
|
+
export interface ManifestItemsIngestOptions {
|
|
28
|
+
dbPath: string;
|
|
29
|
+
items: ManifestObject[];
|
|
30
|
+
sourceLabel: string;
|
|
31
|
+
readAction?: string;
|
|
32
|
+
safetyPolicy?: SafetyPolicy;
|
|
33
|
+
now?: Date;
|
|
34
|
+
maxChunkChars?: number;
|
|
35
|
+
chunkOverlapChars?: number;
|
|
36
|
+
}
|
|
37
|
+
|
|
27
38
|
export interface ManifestIngestResult {
|
|
28
39
|
path: string;
|
|
29
40
|
db_path: string;
|
|
@@ -36,7 +47,7 @@ export interface ManifestIngestResult {
|
|
|
36
47
|
skipped: number;
|
|
37
48
|
}
|
|
38
49
|
|
|
39
|
-
type ManifestObject = Record<string, unknown>;
|
|
50
|
+
export type ManifestObject = Record<string, unknown>;
|
|
40
51
|
|
|
41
52
|
interface NormalizedManifestItem {
|
|
42
53
|
raw: ManifestObject;
|
|
@@ -405,6 +416,23 @@ function insertChunks(db: Database, sourceRevisionId: string, item: NormalizedMa
|
|
|
405
416
|
}
|
|
406
417
|
|
|
407
418
|
export async function ingestOpenFilesManifest(options: ManifestIngestOptions): Promise<ManifestIngestResult> {
|
|
419
|
+
const now = options.now ?? new Date();
|
|
420
|
+
if (options.safetyPolicy) assertWriteAllowed(options.dbPath, options.safetyPolicy);
|
|
421
|
+
migrateKnowledgeDb(options.dbPath);
|
|
422
|
+
const text = await readManifestInput(options.input, options.config, options.safetyPolicy);
|
|
423
|
+
const items = parseManifestText(text);
|
|
424
|
+
return ingestOpenFilesManifestItems({
|
|
425
|
+
dbPath: options.dbPath,
|
|
426
|
+
items,
|
|
427
|
+
sourceLabel: options.input,
|
|
428
|
+
safetyPolicy: options.safetyPolicy,
|
|
429
|
+
now,
|
|
430
|
+
maxChunkChars: options.maxChunkChars,
|
|
431
|
+
chunkOverlapChars: options.chunkOverlapChars,
|
|
432
|
+
});
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
export async function ingestOpenFilesManifestItems(options: ManifestItemsIngestOptions): Promise<ManifestIngestResult> {
|
|
408
436
|
const now = (options.now ?? new Date()).toISOString();
|
|
409
437
|
const maxChunkChars = options.maxChunkChars ?? 4000;
|
|
410
438
|
const chunkOverlapChars = options.chunkOverlapChars ?? 200;
|
|
@@ -413,8 +441,6 @@ export async function ingestOpenFilesManifest(options: ManifestIngestOptions): P
|
|
|
413
441
|
|
|
414
442
|
if (options.safetyPolicy) assertWriteAllowed(options.dbPath, options.safetyPolicy);
|
|
415
443
|
migrateKnowledgeDb(options.dbPath);
|
|
416
|
-
const text = await readManifestInput(options.input, options.config, options.safetyPolicy);
|
|
417
|
-
const items = parseManifestText(text);
|
|
418
444
|
const db = openKnowledgeDb(options.dbPath);
|
|
419
445
|
try {
|
|
420
446
|
const result = db.transaction(() => {
|
|
@@ -426,13 +452,13 @@ export async function ingestOpenFilesManifest(options: ManifestIngestOptions): P
|
|
|
426
452
|
let skipped = 0;
|
|
427
453
|
recordAuditEvent(db, {
|
|
428
454
|
event_type: 'source_read',
|
|
429
|
-
action: options.
|
|
430
|
-
target_uri: options.
|
|
455
|
+
action: options.readAction ?? (options.sourceLabel.startsWith('s3://') ? 's3_manifest_read' : 'local_manifest_read'),
|
|
456
|
+
target_uri: options.sourceLabel,
|
|
431
457
|
decision: 'allow',
|
|
432
|
-
metadata: { items: items.length, read_only: true },
|
|
458
|
+
metadata: { items: options.items.length, read_only: true },
|
|
433
459
|
created_at: now,
|
|
434
460
|
});
|
|
435
|
-
for (const raw of items) {
|
|
461
|
+
for (const raw of options.items) {
|
|
436
462
|
const item = normalizeManifestItem(raw, now);
|
|
437
463
|
const sourceId = upsertSource(db, item, now);
|
|
438
464
|
const revisionId = upsertRevision(db, sourceId, item, now);
|
|
@@ -450,13 +476,13 @@ export async function ingestOpenFilesManifest(options: ManifestIngestOptions): P
|
|
|
450
476
|
action: 'knowledge_manifest_ingest',
|
|
451
477
|
target_uri: options.dbPath,
|
|
452
478
|
decision: 'allow',
|
|
453
|
-
metadata: { items: items.length, sources: seenSources.size, revisions: seenRevisions.size, chunks_inserted: chunksInserted, redactions },
|
|
479
|
+
metadata: { items: options.items.length, sources: seenSources.size, revisions: seenRevisions.size, chunks_inserted: chunksInserted, redactions },
|
|
454
480
|
created_at: now,
|
|
455
481
|
});
|
|
456
482
|
return {
|
|
457
|
-
path: options.
|
|
483
|
+
path: options.sourceLabel,
|
|
458
484
|
db_path: options.dbPath,
|
|
459
|
-
items_seen: items.length,
|
|
485
|
+
items_seen: options.items.length,
|
|
460
486
|
sources_upserted: seenSources.size,
|
|
461
487
|
revisions_upserted: seenRevisions.size,
|
|
462
488
|
chunks_inserted: chunksInserted,
|
package/src/mcp.js
CHANGED
|
@@ -7,6 +7,8 @@ import pkg from '../package.json' with { type: 'json' };
|
|
|
7
7
|
import { defaultStorePath, loadStore, saveStore, makeId, withLock } from './store.ts';
|
|
8
8
|
import { ensureKnowledgeWorkspace, readKnowledgeConfig, resolveScopedWorkspace } from './workspace.ts';
|
|
9
9
|
import { parseSourceRef } from './source-ref.ts';
|
|
10
|
+
import { resolveOpenFilesSource } from './source-resolver.ts';
|
|
11
|
+
import { resolveSafetyPolicy } from './safety.ts';
|
|
10
12
|
|
|
11
13
|
const storePathField = z.string().optional().describe('Path to the JSON store file');
|
|
12
14
|
const scopeField = z.enum(['local', 'global', 'project']).optional().describe('Workspace scope');
|
|
@@ -102,6 +104,29 @@ export function buildServer() {
|
|
|
102
104
|
}
|
|
103
105
|
});
|
|
104
106
|
|
|
107
|
+
registerTool(server, 'ok_resolve_source', 'Resolve source content', 'Resolve an indexed source ref through the read-only open-files boundary and return chunk citation evidence', {
|
|
108
|
+
source_ref: z.string().describe('Source reference URI, preferably open-files://...'),
|
|
109
|
+
purpose: z.string().optional().describe('Read-only purpose label, default knowledge_answer'),
|
|
110
|
+
limit: z.number().optional().describe('Maximum chunks to return, default 10'),
|
|
111
|
+
scope: scopeField,
|
|
112
|
+
}, async ({ source_ref, purpose, limit, scope }) => {
|
|
113
|
+
const workspace = ensureKnowledgeWorkspace(resolveScopedWorkspace(scope).home);
|
|
114
|
+
const config = readKnowledgeConfig(workspace.configPath);
|
|
115
|
+
const safetyPolicy = resolveSafetyPolicy(config, workspace);
|
|
116
|
+
try {
|
|
117
|
+
const result = await resolveOpenFilesSource({
|
|
118
|
+
dbPath: workspace.knowledgeDbPath,
|
|
119
|
+
sourceRef: source_ref,
|
|
120
|
+
purpose,
|
|
121
|
+
limit,
|
|
122
|
+
safetyPolicy,
|
|
123
|
+
});
|
|
124
|
+
return jsonText({ ok: true, ...result });
|
|
125
|
+
} catch (error) {
|
|
126
|
+
return errorText(error instanceof Error ? error.message : String(error));
|
|
127
|
+
}
|
|
128
|
+
});
|
|
129
|
+
|
|
105
130
|
registerTool(server, 'ok_add', 'Add a knowledge item', 'Add a new item to the knowledge store', {
|
|
106
131
|
title: z.string().describe('Item title'),
|
|
107
132
|
content: z.string().describe('Item content/body'),
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
import { createHash } from 'node:crypto';
|
|
2
|
+
import { existsSync, readFileSync } from 'node:fs';
|
|
3
|
+
import { basename } from 'node:path';
|
|
4
|
+
import { ingestOpenFilesManifestItems, type ManifestIngestResult, type ManifestObject } from './manifest-ingest';
|
|
5
|
+
import { parseSourceRef, type SourceRef } from './source-ref';
|
|
6
|
+
import { resolveOpenFilesSource } from './source-resolver';
|
|
7
|
+
import type { KnowledgeConfig } from './workspace';
|
|
8
|
+
import { assertS3ReadAllowed, assertWebSearchAllowed, type SafetyPolicy } from './safety';
|
|
9
|
+
|
|
10
|
+
export interface SourceIngestOptions {
|
|
11
|
+
dbPath: string;
|
|
12
|
+
sourceRef: string;
|
|
13
|
+
purpose?: string;
|
|
14
|
+
config?: KnowledgeConfig;
|
|
15
|
+
safetyPolicy?: SafetyPolicy;
|
|
16
|
+
now?: Date;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export interface SourceIngestResult extends ManifestIngestResult {
|
|
20
|
+
source_ref: string;
|
|
21
|
+
content_source: 'catalog_chunks' | 'extracted_text_ref' | 'file' | 's3' | 'web';
|
|
22
|
+
read_only: true;
|
|
23
|
+
hash: string;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
interface ResolvedText {
|
|
27
|
+
text: string;
|
|
28
|
+
contentSource: SourceIngestResult['content_source'];
|
|
29
|
+
title: string | null;
|
|
30
|
+
mime: string | null;
|
|
31
|
+
size: number | null;
|
|
32
|
+
hash: string | null;
|
|
33
|
+
revision: string | null;
|
|
34
|
+
extractedTextRef: string | null;
|
|
35
|
+
metadata: Record<string, unknown>;
|
|
36
|
+
permissions: Record<string, unknown>;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function sha256Text(text: string): string {
|
|
40
|
+
return `sha256:${createHash('sha256').update(text).digest('hex')}`;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function stripHtml(html: string): string {
|
|
44
|
+
return html
|
|
45
|
+
.replace(/<script[\s\S]*?<\/script>/gi, ' ')
|
|
46
|
+
.replace(/<style[\s\S]*?<\/style>/gi, ' ')
|
|
47
|
+
.replace(/<[^>]+>/g, ' ')
|
|
48
|
+
.replace(/ /g, ' ')
|
|
49
|
+
.replace(/&/g, '&')
|
|
50
|
+
.replace(/</g, '<')
|
|
51
|
+
.replace(/>/g, '>')
|
|
52
|
+
.replace(/\s+\n/g, '\n')
|
|
53
|
+
.replace(/\n\s+/g, '\n')
|
|
54
|
+
.replace(/[ \t]{2,}/g, ' ')
|
|
55
|
+
.trim();
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
async function readS3Text(uri: string, config?: KnowledgeConfig, safetyPolicy?: SafetyPolicy): Promise<string> {
|
|
59
|
+
const parsed = new URL(uri);
|
|
60
|
+
const bucket = parsed.hostname;
|
|
61
|
+
const key = decodeURIComponent(parsed.pathname.replace(/^\/+/, ''));
|
|
62
|
+
if (!bucket || !key) throw new Error(`Invalid S3 source URI: ${uri}`);
|
|
63
|
+
if (safetyPolicy) assertS3ReadAllowed(uri, safetyPolicy);
|
|
64
|
+
const [{ S3Client, GetObjectCommand }, { fromIni }] = await Promise.all([
|
|
65
|
+
import('@aws-sdk/client-s3'),
|
|
66
|
+
import('@aws-sdk/credential-providers'),
|
|
67
|
+
]);
|
|
68
|
+
const s3Config = config?.storage.type === 's3' && config.storage.s3?.bucket === bucket ? config.storage.s3 : undefined;
|
|
69
|
+
const client = new S3Client({
|
|
70
|
+
region: s3Config?.region,
|
|
71
|
+
credentials: s3Config?.profile ? fromIni({ profile: s3Config.profile }) : undefined,
|
|
72
|
+
maxAttempts: s3Config?.max_attempts,
|
|
73
|
+
});
|
|
74
|
+
const response = await client.send(new GetObjectCommand({ Bucket: bucket, Key: key }));
|
|
75
|
+
if (!response.Body) return '';
|
|
76
|
+
return await response.Body.transformToString();
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
async function readWebText(uri: string, safetyPolicy?: SafetyPolicy): Promise<{ text: string; mime: string | null }> {
|
|
80
|
+
if (safetyPolicy) assertWebSearchAllowed(safetyPolicy);
|
|
81
|
+
const response = await fetch(uri, {
|
|
82
|
+
headers: {
|
|
83
|
+
accept: 'text/markdown,text/plain,text/html,application/json;q=0.8,*/*;q=0.5',
|
|
84
|
+
'user-agent': '@hasna/knowledge source-ingest',
|
|
85
|
+
},
|
|
86
|
+
});
|
|
87
|
+
if (!response.ok) throw new Error(`Web source read failed ${response.status}: ${uri}`);
|
|
88
|
+
const mime = response.headers.get('content-type');
|
|
89
|
+
const body = await response.text();
|
|
90
|
+
return { text: mime?.includes('html') ? stripHtml(body) : body, mime };
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
function titleForRef(parsed: SourceRef): string | null {
|
|
94
|
+
if (parsed.kind === 'file') return basename(parsed.path);
|
|
95
|
+
if (parsed.kind === 's3') return basename(parsed.key);
|
|
96
|
+
if (parsed.kind === 'web') return basename(new URL(parsed.url).pathname) || parsed.url;
|
|
97
|
+
return parsed.path ? basename(parsed.path) : parsed.id;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
async function readDirectSourceText(parsed: SourceRef, config?: KnowledgeConfig, safetyPolicy?: SafetyPolicy): Promise<ResolvedText> {
|
|
101
|
+
if (parsed.kind === 'file') {
|
|
102
|
+
if (!existsSync(parsed.path)) throw new Error(`Source file not found: ${parsed.path}`);
|
|
103
|
+
const text = readFileSync(parsed.path, 'utf8');
|
|
104
|
+
return {
|
|
105
|
+
text,
|
|
106
|
+
contentSource: 'file',
|
|
107
|
+
title: titleForRef(parsed),
|
|
108
|
+
mime: 'text/plain',
|
|
109
|
+
size: text.length,
|
|
110
|
+
hash: sha256Text(text),
|
|
111
|
+
revision: null,
|
|
112
|
+
extractedTextRef: null,
|
|
113
|
+
metadata: { path: parsed.path },
|
|
114
|
+
permissions: { mode: 'read_only' },
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
if (parsed.kind === 's3') {
|
|
119
|
+
const text = await readS3Text(parsed.uri, config, safetyPolicy);
|
|
120
|
+
return {
|
|
121
|
+
text,
|
|
122
|
+
contentSource: 's3',
|
|
123
|
+
title: titleForRef(parsed),
|
|
124
|
+
mime: 'text/plain',
|
|
125
|
+
size: text.length,
|
|
126
|
+
hash: sha256Text(text),
|
|
127
|
+
revision: null,
|
|
128
|
+
extractedTextRef: null,
|
|
129
|
+
metadata: { bucket: parsed.bucket, key: parsed.key },
|
|
130
|
+
permissions: { mode: 'read_only' },
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
if (parsed.kind === 'web') {
|
|
135
|
+
const web = await readWebText(parsed.url, safetyPolicy);
|
|
136
|
+
return {
|
|
137
|
+
text: web.text,
|
|
138
|
+
contentSource: 'web',
|
|
139
|
+
title: titleForRef(parsed),
|
|
140
|
+
mime: web.mime,
|
|
141
|
+
size: web.text.length,
|
|
142
|
+
hash: sha256Text(web.text),
|
|
143
|
+
revision: null,
|
|
144
|
+
extractedTextRef: null,
|
|
145
|
+
metadata: { url: parsed.url },
|
|
146
|
+
permissions: { mode: 'read_only' },
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
throw new Error(`Direct source reading is not available for ${parsed.uri}`);
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
async function readTextRef(uri: string, config?: KnowledgeConfig, safetyPolicy?: SafetyPolicy): Promise<{ text: string; contentSource: SourceIngestResult['content_source'] }> {
|
|
154
|
+
if (uri.startsWith('open-files://')) {
|
|
155
|
+
throw new Error('Open-files extracted text refs require an open-files resolver API. Ingest an open-files manifest with extracted_text or an extracted_text_ref using file://, s3://, or https://.');
|
|
156
|
+
}
|
|
157
|
+
const parsed = parseSourceRef(uri);
|
|
158
|
+
const direct = await readDirectSourceText(parsed, config, safetyPolicy);
|
|
159
|
+
return { text: direct.text, contentSource: 'extracted_text_ref' };
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
async function readOpenFilesSourceText(options: SourceIngestOptions): Promise<ResolvedText> {
|
|
163
|
+
const resolved = await resolveOpenFilesSource({
|
|
164
|
+
dbPath: options.dbPath,
|
|
165
|
+
sourceRef: options.sourceRef,
|
|
166
|
+
purpose: options.purpose ?? 'knowledge_index',
|
|
167
|
+
limit: 100,
|
|
168
|
+
safetyPolicy: options.safetyPolicy,
|
|
169
|
+
now: options.now,
|
|
170
|
+
});
|
|
171
|
+
if (!resolved.resolved) {
|
|
172
|
+
throw new Error('Open-files source is not in the local knowledge catalog. Ingest an open-files manifest first or use the open-files resolver API.');
|
|
173
|
+
}
|
|
174
|
+
if (resolved.revision?.extracted_text_uri && !resolved.content.text_available) {
|
|
175
|
+
const textRef = await readTextRef(resolved.revision.extracted_text_uri, options.config, options.safetyPolicy);
|
|
176
|
+
return {
|
|
177
|
+
text: textRef.text,
|
|
178
|
+
contentSource: textRef.contentSource,
|
|
179
|
+
title: resolved.source?.title ?? null,
|
|
180
|
+
mime: resolved.content.mime,
|
|
181
|
+
size: textRef.text.length,
|
|
182
|
+
hash: resolved.revision.hash ?? sha256Text(textRef.text),
|
|
183
|
+
revision: resolved.revision.revision,
|
|
184
|
+
extractedTextRef: resolved.revision.extracted_text_uri,
|
|
185
|
+
metadata: resolved.source?.metadata ?? {},
|
|
186
|
+
permissions: resolved.source?.permissions ?? { mode: 'read_only' },
|
|
187
|
+
};
|
|
188
|
+
}
|
|
189
|
+
if (resolved.chunks.length === 0) {
|
|
190
|
+
throw new Error('Open-files source has no extracted text chunks yet. Ingest an open-files manifest with extracted_text or extracted_text_ref first.');
|
|
191
|
+
}
|
|
192
|
+
const text = resolved.chunks.map((chunk) => chunk.text).join('\n\n');
|
|
193
|
+
return {
|
|
194
|
+
text,
|
|
195
|
+
contentSource: 'catalog_chunks',
|
|
196
|
+
title: resolved.source?.title ?? null,
|
|
197
|
+
mime: resolved.content.mime,
|
|
198
|
+
size: text.length,
|
|
199
|
+
hash: resolved.revision?.hash ?? sha256Text(text),
|
|
200
|
+
revision: resolved.revision?.revision ?? null,
|
|
201
|
+
extractedTextRef: resolved.revision?.extracted_text_uri ?? null,
|
|
202
|
+
metadata: resolved.source?.metadata ?? {},
|
|
203
|
+
permissions: resolved.source?.permissions ?? { mode: 'read_only' },
|
|
204
|
+
};
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
function manifestItemForSource(sourceRef: string, parsed: SourceRef, resolved: ResolvedText, purpose: string): ManifestObject {
|
|
208
|
+
const hash = resolved.hash ?? sha256Text(resolved.text);
|
|
209
|
+
const metadata = {
|
|
210
|
+
...resolved.metadata,
|
|
211
|
+
source_ref: sourceRef,
|
|
212
|
+
content_source: resolved.contentSource,
|
|
213
|
+
read_only: true,
|
|
214
|
+
};
|
|
215
|
+
const item: ManifestObject = {
|
|
216
|
+
source_ref: sourceRef,
|
|
217
|
+
name: resolved.title ?? titleForRef(parsed),
|
|
218
|
+
mime: resolved.mime ?? 'text/plain',
|
|
219
|
+
size: resolved.size ?? resolved.text.length,
|
|
220
|
+
hash,
|
|
221
|
+
revision: resolved.revision ?? hash,
|
|
222
|
+
status: 'active',
|
|
223
|
+
updated_at: new Date().toISOString(),
|
|
224
|
+
permissions: {
|
|
225
|
+
mode: 'read_only',
|
|
226
|
+
allowed_purposes: [purpose],
|
|
227
|
+
...resolved.permissions,
|
|
228
|
+
},
|
|
229
|
+
metadata,
|
|
230
|
+
extracted_text_ref: resolved.extractedTextRef,
|
|
231
|
+
extracted_text: resolved.text,
|
|
232
|
+
};
|
|
233
|
+
if (parsed.kind === 'open-files') {
|
|
234
|
+
if (parsed.entity === 'file') item.file_id = parsed.id;
|
|
235
|
+
if (parsed.entity === 'source') {
|
|
236
|
+
item.source_id = parsed.id;
|
|
237
|
+
item.path = parsed.path;
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
if (parsed.kind === 'file') item.path = parsed.path;
|
|
241
|
+
if (parsed.kind === 's3') item.path = parsed.key;
|
|
242
|
+
if (parsed.kind === 'web') item.url = parsed.url;
|
|
243
|
+
return item;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
export async function ingestSourceRef(options: SourceIngestOptions): Promise<SourceIngestResult> {
|
|
247
|
+
const purpose = options.purpose ?? 'knowledge_index';
|
|
248
|
+
const parsed = parseSourceRef(options.sourceRef);
|
|
249
|
+
const resolved = parsed.kind === 'open-files'
|
|
250
|
+
? await readOpenFilesSourceText(options)
|
|
251
|
+
: await readDirectSourceText(parsed, options.config, options.safetyPolicy);
|
|
252
|
+
const item = manifestItemForSource(options.sourceRef, parsed, resolved, purpose);
|
|
253
|
+
const result = await ingestOpenFilesManifestItems({
|
|
254
|
+
dbPath: options.dbPath,
|
|
255
|
+
items: [item],
|
|
256
|
+
sourceLabel: options.sourceRef,
|
|
257
|
+
readAction: 'source_ref_ingest_read',
|
|
258
|
+
safetyPolicy: options.safetyPolicy,
|
|
259
|
+
now: options.now,
|
|
260
|
+
});
|
|
261
|
+
return {
|
|
262
|
+
...result,
|
|
263
|
+
source_ref: options.sourceRef,
|
|
264
|
+
content_source: resolved.contentSource,
|
|
265
|
+
read_only: true,
|
|
266
|
+
hash: String(item.hash),
|
|
267
|
+
};
|
|
268
|
+
}
|
package/src/source-ref.ts
CHANGED
|
@@ -82,6 +82,18 @@ export function parseSourceRef(uri: string): SourceRef {
|
|
|
82
82
|
throw new Error(`Unsupported source ref scheme: ${uri}`);
|
|
83
83
|
}
|
|
84
84
|
|
|
85
|
+
export function catalogSourceUriForRef(uri: string, parsed = parseSourceRef(uri)): string {
|
|
86
|
+
if (parsed.kind === 'open-files' && parsed.entity === 'file' && parsed.revision_id) {
|
|
87
|
+
return uri.replace(/\/revision\/[^/]+$/, '');
|
|
88
|
+
}
|
|
89
|
+
return uri;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
export function revisionIdForSourceRef(uri: string): string | null {
|
|
93
|
+
const parsed = parseSourceRef(uri);
|
|
94
|
+
return parsed.kind === 'open-files' && parsed.entity === 'file' ? parsed.revision_id ?? null : null;
|
|
95
|
+
}
|
|
96
|
+
|
|
85
97
|
export function isSupportedSourceRef(uri: string): boolean {
|
|
86
98
|
try {
|
|
87
99
|
parseSourceRef(uri);
|