@hasna/knowledge 0.2.9 → 0.2.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,6 +15,7 @@ var __export = (target, all) => {
15
15
  });
16
16
  };
17
17
  var __esm = (fn, res) => () => (fn && (res = fn(fn = 0)), res);
18
+ var __require = import.meta.require;
18
19
 
19
20
  // src/mcp-http.js
20
21
  var exports_mcp_http = {};
@@ -13655,11 +13656,11 @@ function date4(params) {
13655
13656
  // node_modules/zod/v4/classic/external.js
13656
13657
  config(en_default());
13657
13658
  // src/mcp.js
13658
- import { existsSync as existsSync3, readFileSync as readFileSync3, writeFileSync as writeFileSync3 } from "fs";
13659
+ import { existsSync as existsSync7, readFileSync as readFileSync7, writeFileSync as writeFileSync4 } from "fs";
13659
13660
  // package.json
13660
13661
  var package_default = {
13661
13662
  name: "@hasna/knowledge",
13662
- version: "0.2.9",
13663
+ version: "0.2.11",
13663
13664
  description: "Agent-friendly local knowledge CLI with JSON output, pagination, and safe destructive actions",
13664
13665
  type: "module",
13665
13666
  bin: {
@@ -13676,7 +13677,7 @@ var package_default = {
13676
13677
  scripts: {
13677
13678
  test: "bun test",
13678
13679
  "test:cli": "bun test tests/cli.test.ts",
13679
- build: "bun build --target=bun --outfile=bin/open-knowledge.js --minify --external @aws-sdk/client-s3 --external @aws-sdk/credential-providers src/cli.ts && bun build --target=bun --outfile=bin/open-knowledge-mcp.js --external @modelcontextprotocol/sdk src/mcp.js",
13680
+ build: "bun build --target=bun --outfile=bin/open-knowledge.js --minify --external @aws-sdk/client-s3 --external @aws-sdk/credential-providers --external ai --external @ai-sdk/openai --external @ai-sdk/anthropic --external @ai-sdk/deepseek src/cli.ts && bun build --target=bun --outfile=bin/open-knowledge-mcp.js --external @modelcontextprotocol/sdk --external @aws-sdk/client-s3 --external @aws-sdk/credential-providers --external ai --external @ai-sdk/openai --external @ai-sdk/anthropic --external @ai-sdk/deepseek src/mcp.js",
13680
13681
  prepublishOnly: "bun run build",
13681
13682
  postinstall: "bun run build"
13682
13683
  },
@@ -13709,7 +13710,11 @@ var package_default = {
13709
13710
  dependencies: {
13710
13711
  "@aws-sdk/client-s3": "^3.1063.0",
13711
13712
  "@aws-sdk/credential-providers": "^3.1063.0",
13713
+ "@ai-sdk/anthropic": "^3.0.81",
13714
+ "@ai-sdk/deepseek": "^2.0.35",
13715
+ "@ai-sdk/openai": "^3.0.68",
13712
13716
  "@modelcontextprotocol/sdk": "^1.29.0",
13717
+ ai: "^6.0.197",
13713
13718
  zod: "^4.3.6"
13714
13719
  },
13715
13720
  devDependencies: {
@@ -13763,6 +13768,28 @@ function defaultKnowledgeConfig() {
13763
13768
  preferred_ref: "open-files",
13764
13769
  allowed_schemes: ["open-files", "s3", "file", "https", "http"]
13765
13770
  },
13771
+ providers: {
13772
+ default_model: "openai:gpt-5.2",
13773
+ aliases: {
13774
+ fast: "openai:gpt-5-mini",
13775
+ reasoning: "anthropic:claude-opus-4-6",
13776
+ sonnet: "anthropic:claude-sonnet-4-6",
13777
+ deepseek: "deepseek:deepseek-chat",
13778
+ "deepseek-reasoning": "deepseek:deepseek-reasoner"
13779
+ },
13780
+ openai: {
13781
+ api_key_env: "OPENAI_API_KEY",
13782
+ default_model: "gpt-5.2"
13783
+ },
13784
+ anthropic: {
13785
+ api_key_env: "ANTHROPIC_API_KEY",
13786
+ default_model: "claude-sonnet-4-6"
13787
+ },
13788
+ deepseek: {
13789
+ api_key_env: "DEEPSEEK_API_KEY",
13790
+ default_model: "deepseek-chat"
13791
+ }
13792
+ },
13766
13793
  safety: {
13767
13794
  network: {
13768
13795
  web_search_enabled: false,
@@ -13952,6 +13979,160 @@ function revisionIdForSourceRef(uri) {
13952
13979
  return parsed.kind === "open-files" && parsed.entity === "file" ? parsed.revision_id ?? null : null;
13953
13980
  }
13954
13981
 
13982
+ // src/artifact-store.ts
13983
+ import { existsSync as existsSync3, mkdirSync as mkdirSync2, readFileSync as readFileSync3, writeFileSync as writeFileSync3 } from "fs";
13984
+ import { dirname as dirname2, join as join2, relative, sep } from "path";
13985
+ function normalizeArtifactKey(key) {
13986
+ const raw = key.replace(/\\/g, "/").trim();
13987
+ if (!raw || raw.startsWith("/")) {
13988
+ throw new Error(`Invalid artifact key: ${key}`);
13989
+ }
13990
+ const segments = raw.split("/").filter(Boolean);
13991
+ if (segments.length === 0 || segments.some((segment) => segment === "." || segment === "..")) {
13992
+ throw new Error(`Invalid artifact key: ${key}`);
13993
+ }
13994
+ return segments.join("/");
13995
+ }
13996
+ function assertInside(root, target) {
13997
+ const rel = relative(root, target);
13998
+ if (rel.startsWith("..") || rel === ".." || rel.startsWith(`..${sep}`)) {
13999
+ throw new Error(`Artifact path escapes root: ${target}`);
14000
+ }
14001
+ }
14002
+
14003
+ class LocalArtifactStore {
14004
+ root;
14005
+ type = "local";
14006
+ canRead = true;
14007
+ canWrite = true;
14008
+ constructor(root) {
14009
+ this.root = root;
14010
+ mkdirSync2(root, { recursive: true });
14011
+ }
14012
+ async put(entry) {
14013
+ const key = normalizeArtifactKey(entry.key);
14014
+ const path = join2(this.root, key);
14015
+ assertInside(this.root, path);
14016
+ mkdirSync2(dirname2(path), { recursive: true });
14017
+ writeFileSync3(path, entry.body);
14018
+ return { key, uri: `file://${path}` };
14019
+ }
14020
+ async getText(key) {
14021
+ const normalizedKey = normalizeArtifactKey(key);
14022
+ const path = join2(this.root, normalizedKey);
14023
+ assertInside(this.root, path);
14024
+ return readFileSync3(path, "utf8");
14025
+ }
14026
+ async exists(key) {
14027
+ const normalizedKey = normalizeArtifactKey(key);
14028
+ const path = join2(this.root, normalizedKey);
14029
+ assertInside(this.root, path);
14030
+ return existsSync3(path);
14031
+ }
14032
+ }
14033
+
14034
+ class S3ArtifactStore {
14035
+ options;
14036
+ type = "s3";
14037
+ canRead = true;
14038
+ canWrite = true;
14039
+ client;
14040
+ constructor(options) {
14041
+ this.options = options;
14042
+ this.client = options.client;
14043
+ }
14044
+ async getClient() {
14045
+ if (this.client)
14046
+ return this.client;
14047
+ const [{ S3Client }, { fromIni }] = await Promise.all([
14048
+ import("@aws-sdk/client-s3"),
14049
+ import("@aws-sdk/credential-providers")
14050
+ ]);
14051
+ this.client = new S3Client({
14052
+ region: this.options.region,
14053
+ credentials: this.options.profile ? fromIni({ profile: this.options.profile }) : undefined,
14054
+ maxAttempts: this.options.max_attempts
14055
+ });
14056
+ return this.client;
14057
+ }
14058
+ objectKey(key) {
14059
+ const normalizedKey = normalizeArtifactKey(key);
14060
+ const prefix = this.options.prefix ? normalizeArtifactKey(this.options.prefix) : "";
14061
+ return prefix ? `${prefix}/${normalizedKey}` : normalizedKey;
14062
+ }
14063
+ async put(entry) {
14064
+ const [{ PutObjectCommand }, client] = await Promise.all([
14065
+ import("@aws-sdk/client-s3"),
14066
+ this.getClient()
14067
+ ]);
14068
+ const key = this.objectKey(entry.key);
14069
+ await client.send(new PutObjectCommand({
14070
+ Bucket: this.options.bucket,
14071
+ Key: key,
14072
+ Body: entry.body,
14073
+ ContentType: entry.content_type,
14074
+ Metadata: entry.metadata,
14075
+ ServerSideEncryption: this.options.server_side_encryption,
14076
+ SSEKMSKeyId: this.options.kms_key_id
14077
+ }));
14078
+ return { key, uri: `s3://${this.options.bucket}/${key}` };
14079
+ }
14080
+ async getText(key) {
14081
+ const [{ GetObjectCommand }, client] = await Promise.all([
14082
+ import("@aws-sdk/client-s3"),
14083
+ this.getClient()
14084
+ ]);
14085
+ const objectKey = this.objectKey(key);
14086
+ const response = await client.send(new GetObjectCommand({
14087
+ Bucket: this.options.bucket,
14088
+ Key: objectKey
14089
+ }));
14090
+ if (!response.Body)
14091
+ return "";
14092
+ return await response.Body.transformToString();
14093
+ }
14094
+ async exists(key) {
14095
+ const [{ HeadObjectCommand }, client] = await Promise.all([
14096
+ import("@aws-sdk/client-s3"),
14097
+ this.getClient()
14098
+ ]);
14099
+ const objectKey = this.objectKey(key);
14100
+ try {
14101
+ await client.send(new HeadObjectCommand({
14102
+ Bucket: this.options.bucket,
14103
+ Key: objectKey
14104
+ }));
14105
+ return true;
14106
+ } catch (error48) {
14107
+ const name = error48 instanceof Error ? error48.name : "";
14108
+ if (name === "NotFound" || name === "NoSuchKey" || name === "NotFoundError")
14109
+ return false;
14110
+ throw error48;
14111
+ }
14112
+ }
14113
+ }
14114
+ function createArtifactStore(config2, workspace) {
14115
+ if (config2.storage.type === "s3") {
14116
+ if (!config2.storage.s3?.bucket)
14117
+ throw new Error("S3 artifact storage requires storage.s3.bucket");
14118
+ return new S3ArtifactStore({
14119
+ bucket: config2.storage.s3.bucket,
14120
+ prefix: config2.storage.s3.prefix,
14121
+ region: config2.storage.s3.region,
14122
+ profile: config2.storage.s3.profile,
14123
+ max_attempts: config2.storage.s3.max_attempts,
14124
+ server_side_encryption: config2.storage.s3.server_side_encryption,
14125
+ kms_key_id: config2.storage.s3.kms_key_id
14126
+ });
14127
+ }
14128
+ return new LocalArtifactStore(workspace.artifactsDir);
14129
+ }
14130
+
14131
+ // src/outbox-consume.ts
14132
+ import { createHash as createHash2, randomUUID as randomUUID3 } from "crypto";
14133
+ import { existsSync as existsSync4, readFileSync as readFileSync4 } from "fs";
14134
+ import { basename } from "path";
14135
+
13955
14136
  // src/knowledge-db.ts
13956
14137
  import { Database } from "bun:sqlite";
13957
14138
  var MIGRATION_1 = `
@@ -14191,10 +14372,35 @@ function getSchemaVersion(db) {
14191
14372
  const row = db.query("SELECT MAX(version) AS version FROM schema_versions").get();
14192
14373
  return row?.version ?? 0;
14193
14374
  }
14375
+ function count(db, table) {
14376
+ const row = db.query(`SELECT COUNT(*) AS n FROM ${table}`).get();
14377
+ return row?.n ?? 0;
14378
+ }
14379
+ function getKnowledgeDbStats(path) {
14380
+ const db = openKnowledgeDb(path);
14381
+ try {
14382
+ return {
14383
+ schema_version: getSchemaVersion(db),
14384
+ sources: count(db, "sources"),
14385
+ source_revisions: count(db, "source_revisions"),
14386
+ chunks: count(db, "chunks"),
14387
+ wiki_pages: count(db, "wiki_pages"),
14388
+ citations: count(db, "citations"),
14389
+ indexes: count(db, "knowledge_indexes"),
14390
+ runs: count(db, "runs"),
14391
+ run_events: count(db, "run_events"),
14392
+ redaction_findings: count(db, "redaction_findings"),
14393
+ audit_events: count(db, "audit_events"),
14394
+ approval_gates: count(db, "approval_gates")
14395
+ };
14396
+ } finally {
14397
+ db.close();
14398
+ }
14399
+ }
14194
14400
 
14195
14401
  // src/safety.ts
14196
14402
  import { createHash, randomUUID as randomUUID2 } from "crypto";
14197
- import { relative, resolve as resolve2, sep } from "path";
14403
+ import { relative as relative2, resolve as resolve2, sep as sep2 } from "path";
14198
14404
  function envEnabled(name) {
14199
14405
  const value = process.env[name];
14200
14406
  return value === "1" || value === "true" || value === "yes";
@@ -14237,8 +14443,8 @@ function resolveSafetyPolicy(config2, workspace) {
14237
14443
  };
14238
14444
  }
14239
14445
  function isInside(root, target) {
14240
- const rel = relative(root, target);
14241
- return rel === "" || !rel.startsWith("..") && rel !== ".." && !rel.startsWith(`..${sep}`);
14446
+ const rel = relative2(root, target);
14447
+ return rel === "" || !rel.startsWith("..") && rel !== ".." && !rel.startsWith(`..${sep2}`);
14242
14448
  }
14243
14449
  function assertWriteAllowed(targetPath, policy) {
14244
14450
  const resolved = resolve2(targetPath);
@@ -14246,6 +14452,47 @@ function assertWriteAllowed(targetPath, policy) {
14246
14452
  throw new Error(`Safety policy denied write outside .hasna/apps/knowledge: ${targetPath}`);
14247
14453
  }
14248
14454
  }
14455
+ function assertS3ReadAllowed(uri, policy) {
14456
+ const parsed = new URL(uri);
14457
+ const bucket = parsed.hostname;
14458
+ if (!policy.network.s3ReadsEnabled) {
14459
+ throw new Error("Safety policy denied S3 read. Set safety.network.s3_reads_enabled=true or HASNA_KNOWLEDGE_ALLOW_S3_READS=1.");
14460
+ }
14461
+ if (!policy.network.allowedS3Buckets.includes(bucket)) {
14462
+ throw new Error(`Safety policy denied S3 bucket "${bucket}". Add it to safety.network.allowed_s3_buckets or HASNA_KNOWLEDGE_ALLOWED_S3_BUCKETS.`);
14463
+ }
14464
+ }
14465
+ function assertWebSearchAllowed(policy) {
14466
+ if (!policy.network.webSearchEnabled) {
14467
+ throw new Error("Safety policy denied web search. Set safety.network.web_search_enabled=true or HASNA_KNOWLEDGE_WEB_SEARCH=1.");
14468
+ }
14469
+ }
14470
+ var REDACTION_PATTERNS = [
14471
+ { type: "private_key_block", severity: "high", regex: /-----BEGIN [A-Z ]*PRIVATE KEY-----[\s\S]*?-----END [A-Z ]*PRIVATE KEY-----/g, replacement: "[REDACTED:private_key_block]" },
14472
+ { type: "secret_assignment", severity: "high", regex: /\b(?:api[_-]?key|secret|token|password)\s*[:=]\s*['"]?[^'"\s]{8,}/gi, replacement: "[REDACTED:secret_assignment]" },
14473
+ { type: "openai_api_key", severity: "high", regex: /\bsk-[A-Za-z0-9_-]{20,}\b/g, replacement: "[REDACTED:openai_api_key]" },
14474
+ { type: "anthropic_api_key", severity: "high", regex: /\bsk-ant-[A-Za-z0-9_-]{20,}\b/g, replacement: "[REDACTED:anthropic_api_key]" },
14475
+ { type: "aws_access_key_id", severity: "high", regex: /\bA(?:KIA|SIA)[A-Z0-9]{16}\b/g, replacement: "[REDACTED:aws_access_key_id]" }
14476
+ ];
14477
+ function redactSecrets(text, policy) {
14478
+ if (policy && !policy.redaction.enabled)
14479
+ return { text, findings: [] };
14480
+ let output = text;
14481
+ const findings = [];
14482
+ for (const pattern of REDACTION_PATTERNS) {
14483
+ output = output.replace(pattern.regex, (match, ...args) => {
14484
+ const offset = typeof args.at(-2) === "number" ? args.at(-2) : output.indexOf(match);
14485
+ findings.push({
14486
+ type: pattern.type,
14487
+ severity: pattern.severity,
14488
+ start: Math.max(0, offset),
14489
+ end: Math.max(0, offset + match.length)
14490
+ });
14491
+ return pattern.replacement;
14492
+ });
14493
+ }
14494
+ return { text: output, findings };
14495
+ }
14249
14496
  function auditId(input) {
14250
14497
  return `audit_${createHash("sha256").update(`${input.event_type}\x00${input.action}\x00${input.target_uri ?? ""}\x00${input.created_at ?? ""}\x00${JSON.stringify(input.metadata ?? {})}\x00${randomUUID2()}`).digest("hex").slice(0, 24)}`;
14251
14498
  }
@@ -14264,6 +14511,795 @@ function recordAuditEvent(db, input) {
14264
14511
  ]);
14265
14512
  return id;
14266
14513
  }
14514
+ function recordRedactionFindings(db, input) {
14515
+ const createdAt = input.created_at ?? new Date().toISOString();
14516
+ for (const finding of input.findings) {
14517
+ db.run(`INSERT INTO redaction_findings (id, source_uri, run_id, severity, finding_type, metadata_json, created_at)
14518
+ VALUES (?, ?, ?, ?, ?, ?, ?)`, [
14519
+ `redact_${randomUUID2()}`,
14520
+ input.source_uri ?? null,
14521
+ input.run_id ?? null,
14522
+ finding.severity,
14523
+ finding.type,
14524
+ JSON.stringify({ ...input.metadata ?? {}, start: finding.start, end: finding.end }),
14525
+ createdAt
14526
+ ]);
14527
+ }
14528
+ return input.findings.length;
14529
+ }
14530
+
14531
+ // src/outbox-consume.ts
14532
+ function stableId(prefix, value) {
14533
+ return `${prefix}_${createHash2("sha256").update(value).digest("hex").slice(0, 20)}`;
14534
+ }
14535
+ function asObject(value) {
14536
+ return value && typeof value === "object" && !Array.isArray(value) ? value : undefined;
14537
+ }
14538
+ function asString(value) {
14539
+ return typeof value === "string" && value.length > 0 ? value : undefined;
14540
+ }
14541
+ function buildSourceRef(event) {
14542
+ const explicit = asString(event.source_ref) ?? asString(event.source_uri) ?? asString(event.uri);
14543
+ if (explicit)
14544
+ return explicit;
14545
+ const fileId = asString(event.file_id);
14546
+ if (fileId) {
14547
+ const revision = asString(event.revision_id) ?? asString(event.revision);
14548
+ const fileRef = `open-files://file/${encodeURIComponent(fileId)}`;
14549
+ return revision ? `${fileRef}/revision/${encodeURIComponent(revision)}` : fileRef;
14550
+ }
14551
+ const sourceId = asString(event.source_id);
14552
+ const path = asString(event.path);
14553
+ if (sourceId && path) {
14554
+ return `open-files://source/${encodeURIComponent(sourceId)}/path/${encodeURIComponent(path)}`;
14555
+ }
14556
+ throw new Error("Outbox event is missing source_ref, file_id, or source_id/path.");
14557
+ }
14558
+ function baseSourceUri(sourceRef, parsed) {
14559
+ if (parsed.kind === "open-files" && parsed.entity === "file" && parsed.revision_id) {
14560
+ return sourceRef.replace(/\/revision\/[^/]+$/, "");
14561
+ }
14562
+ return sourceRef;
14563
+ }
14564
+ function hashFromEvent(event) {
14565
+ return asString(event.hash) ?? asString(event.checksum) ?? asString(event.sha256) ?? null;
14566
+ }
14567
+ function revisionFromEvent(event, parsed, hash2) {
14568
+ return asString(event.revision_id) ?? asString(event.revision) ?? asString(event.version_id) ?? (parsed.kind === "open-files" ? parsed.revision_id : undefined) ?? hash2 ?? null;
14569
+ }
14570
+ function eventType(event) {
14571
+ return (asString(event.event) ?? asString(event.type) ?? asString(event.action) ?? asString(event.change_type) ?? "changed").toLowerCase();
14572
+ }
14573
+ function titleFromEvent(event) {
14574
+ const path = asString(event.path);
14575
+ return asString(event.title) ?? asString(event.name) ?? (path ? basename(path) : null);
14576
+ }
14577
+ function normalizeEvent(event, now) {
14578
+ const sourceRef = buildSourceRef(event);
14579
+ const parsed = parseSourceRef(sourceRef);
14580
+ const hash2 = hashFromEvent(event);
14581
+ return {
14582
+ raw: event,
14583
+ eventType: eventType(event),
14584
+ sourceRef,
14585
+ sourceUri: baseSourceUri(sourceRef, parsed),
14586
+ kind: parsed.kind,
14587
+ title: titleFromEvent(event),
14588
+ revision: revisionFromEvent(event, parsed, hash2),
14589
+ hash: hash2,
14590
+ status: asString(event.status)?.toLowerCase() ?? null,
14591
+ updatedAt: asString(event.updated_at) ?? now,
14592
+ acl: event.permissions ?? event.acl ?? undefined
14593
+ };
14594
+ }
14595
+ function parseOutboxText(text) {
14596
+ const trimmed = text.trim();
14597
+ if (!trimmed)
14598
+ return [];
14599
+ if (trimmed.startsWith("[")) {
14600
+ const parsed = JSON.parse(trimmed);
14601
+ if (!Array.isArray(parsed))
14602
+ throw new Error("Outbox array parse failed.");
14603
+ return parsed.map((entry) => {
14604
+ const event = asObject(entry);
14605
+ if (!event)
14606
+ throw new Error("Outbox array entries must be objects.");
14607
+ return event;
14608
+ });
14609
+ }
14610
+ if (trimmed.startsWith("{")) {
14611
+ try {
14612
+ const parsed = JSON.parse(trimmed);
14613
+ const object2 = asObject(parsed);
14614
+ if (!object2)
14615
+ throw new Error("Outbox object parse failed.");
14616
+ if (Array.isArray(object2.events)) {
14617
+ return object2.events.map((entry) => {
14618
+ const event = asObject(entry);
14619
+ if (!event)
14620
+ throw new Error("Outbox events entries must be objects.");
14621
+ return event;
14622
+ });
14623
+ }
14624
+ if ("source_ref" in object2 || "source_uri" in object2 || "file_id" in object2)
14625
+ return [object2];
14626
+ } catch (error48) {
14627
+ const lines = trimmed.split(/\r?\n/).filter((line) => line.trim().length > 0);
14628
+ if (lines.length <= 1)
14629
+ throw error48;
14630
+ return lines.map((line) => {
14631
+ const event = asObject(JSON.parse(line));
14632
+ if (!event)
14633
+ throw new Error("Outbox JSONL entries must be objects.");
14634
+ return event;
14635
+ });
14636
+ }
14637
+ }
14638
+ return trimmed.split(/\r?\n/).filter((line) => line.trim().length > 0).map((line) => {
14639
+ const event = asObject(JSON.parse(line));
14640
+ if (!event)
14641
+ throw new Error("Outbox JSONL entries must be objects.");
14642
+ return event;
14643
+ });
14644
+ }
14645
+ async function readS3Text(uri, config2, safetyPolicy) {
14646
+ const parsed = new URL(uri);
14647
+ const bucket = parsed.hostname;
14648
+ const key = decodeURIComponent(parsed.pathname.replace(/^\/+/, ""));
14649
+ if (!bucket || !key)
14650
+ throw new Error(`Invalid S3 outbox URI: ${uri}`);
14651
+ if (safetyPolicy)
14652
+ assertS3ReadAllowed(uri, safetyPolicy);
14653
+ const [{ S3Client, GetObjectCommand }, { fromIni }] = await Promise.all([
14654
+ import("@aws-sdk/client-s3"),
14655
+ import("@aws-sdk/credential-providers")
14656
+ ]);
14657
+ const s3Config = config2?.storage.type === "s3" && config2.storage.s3?.bucket === bucket ? config2.storage.s3 : undefined;
14658
+ const client = new S3Client({
14659
+ region: s3Config?.region,
14660
+ credentials: s3Config?.profile ? fromIni({ profile: s3Config.profile }) : undefined,
14661
+ maxAttempts: s3Config?.max_attempts
14662
+ });
14663
+ const response = await client.send(new GetObjectCommand({ Bucket: bucket, Key: key }));
14664
+ if (!response.Body)
14665
+ return "";
14666
+ return await response.Body.transformToString();
14667
+ }
14668
+ async function readOutboxInput(input, config2, safetyPolicy) {
14669
+ if (input.startsWith("s3://"))
14670
+ return readS3Text(input, config2, safetyPolicy);
14671
+ if (!existsSync4(input))
14672
+ throw new Error(`Outbox not found: ${input}`);
14673
+ return readFileSync4(input, "utf8");
14674
+ }
14675
+ function mergeJson(existing, patch) {
14676
+ let base = {};
14677
+ if (existing) {
14678
+ try {
14679
+ base = asObject(JSON.parse(existing)) ?? {};
14680
+ } catch {
14681
+ base = {};
14682
+ }
14683
+ }
14684
+ return JSON.stringify({ ...base, ...patch });
14685
+ }
14686
+ function ensureSource(db, event, now) {
14687
+ const id = stableId("src", event.sourceUri);
14688
+ db.run(`INSERT INTO sources (id, uri, kind, title, metadata_json, acl_json, created_at, updated_at)
14689
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
14690
+ ON CONFLICT(uri) DO UPDATE SET
14691
+ kind = excluded.kind,
14692
+ title = COALESCE(excluded.title, sources.title),
14693
+ updated_at = excluded.updated_at`, [
14694
+ id,
14695
+ event.sourceUri,
14696
+ event.kind,
14697
+ event.title,
14698
+ JSON.stringify({ source_ref: event.sourceRef, source_uri: event.sourceUri, status: event.status, last_outbox_event: event.eventType }),
14699
+ JSON.stringify(event.acl ?? {}),
14700
+ now,
14701
+ event.updatedAt
14702
+ ]);
14703
+ const row = db.query("SELECT id, metadata_json, acl_json FROM sources WHERE uri = ?").get(event.sourceUri);
14704
+ if (!row)
14705
+ throw new Error(`Failed to upsert source for outbox event: ${event.sourceUri}`);
14706
+ const patch = {
14707
+ source_ref: event.sourceRef,
14708
+ source_uri: event.sourceUri,
14709
+ last_outbox_event: event.eventType,
14710
+ last_outbox_at: event.updatedAt
14711
+ };
14712
+ if (event.status)
14713
+ patch.status = event.status;
14714
+ if (asString(event.raw.path))
14715
+ patch.path = event.raw.path;
14716
+ db.run("UPDATE sources SET metadata_json = ?, acl_json = CASE WHEN ? IS NULL THEN acl_json ELSE ? END, updated_at = ? WHERE id = ?", [
14717
+ mergeJson(row.metadata_json, patch),
14718
+ event.acl === undefined ? null : JSON.stringify(event.acl),
14719
+ event.acl === undefined ? null : JSON.stringify(event.acl),
14720
+ event.updatedAt,
14721
+ row.id
14722
+ ]);
14723
+ return row.id;
14724
+ }
14725
+ function ensureRevision(db, sourceId, event, now) {
14726
+ if (!event.revision)
14727
+ return null;
14728
+ const id = stableId("rev", `${sourceId}\x00${event.revision}`);
14729
+ const metadata = {
14730
+ source_ref: event.sourceRef,
14731
+ source_uri: event.sourceUri,
14732
+ status: event.status,
14733
+ last_outbox_event: event.eventType,
14734
+ reindex_required: true
14735
+ };
14736
+ db.run(`INSERT INTO source_revisions (id, source_id, revision, hash, extracted_text_uri, metadata_json, created_at)
14737
+ VALUES (?, ?, ?, ?, ?, ?, ?)
14738
+ ON CONFLICT(source_id, revision) DO UPDATE SET
14739
+ hash = COALESCE(excluded.hash, source_revisions.hash),
14740
+ metadata_json = excluded.metadata_json`, [id, sourceId, event.revision, event.hash, asString(event.raw.extracted_text_ref) ?? null, JSON.stringify(metadata), now]);
14741
+ const row = db.query("SELECT id FROM source_revisions WHERE source_id = ? AND revision = ?").get(sourceId, event.revision);
14742
+ return row?.id ?? null;
14743
+ }
14744
+ function revisionIdsForEvent(db, sourceId, event) {
14745
+ if (event.revision) {
14746
+ return db.query("SELECT id FROM source_revisions WHERE source_id = ? AND revision = ?").all(sourceId, event.revision).map((row) => row.id);
14747
+ }
14748
+ if (event.hash) {
14749
+ return db.query("SELECT id FROM source_revisions WHERE source_id = ? AND hash = ?").all(sourceId, event.hash).map((row) => row.id);
14750
+ }
14751
+ return db.query("SELECT id FROM source_revisions WHERE source_id = ?").all(sourceId).map((row) => row.id);
14752
+ }
14753
+ function invalidateRevision(db, revisionId) {
14754
+ const chunks = db.query("SELECT id FROM chunks WHERE source_revision_id = ?").all(revisionId);
14755
+ let embeddingsDeleted = 0;
14756
+ for (const chunk of chunks) {
14757
+ const row = db.query("SELECT COUNT(*) AS n FROM chunk_embeddings WHERE chunk_id = ?").get(chunk.id);
14758
+ embeddingsDeleted += row?.n ?? 0;
14759
+ db.run("DELETE FROM chunk_embeddings WHERE chunk_id = ?", [chunk.id]);
14760
+ db.run("DELETE FROM chunks_fts WHERE chunk_id = ?", [chunk.id]);
14761
+ }
14762
+ db.run("DELETE FROM chunks WHERE source_revision_id = ?", [revisionId]);
14763
+ const revision = db.query("SELECT metadata_json FROM source_revisions WHERE id = ?").get(revisionId);
14764
+ db.run("UPDATE source_revisions SET metadata_json = ? WHERE id = ?", [mergeJson(revision?.metadata_json, { reindex_required: true, invalidated_at: new Date().toISOString() }), revisionId]);
14765
+ return { chunksDeleted: chunks.length, embeddingsDeleted };
14766
+ }
14767
+ function isDeleteEvent(eventType2, status) {
14768
+ return status === "deleted" || ["delete", "deleted", "remove", "removed"].includes(eventType2);
14769
+ }
14770
+ function isMoveEvent(eventType2) {
14771
+ return ["move", "moved", "rename", "renamed", "path_changed"].includes(eventType2);
14772
+ }
14773
+ function isPermissionEvent(eventType2) {
14774
+ return ["permission", "permissions", "permission_changed", "acl_changed"].includes(eventType2);
14775
+ }
14776
+ async function consumeOpenFilesOutbox(options) {
14777
+ const now = (options.now ?? new Date).toISOString();
14778
+ if (options.safetyPolicy)
14779
+ assertWriteAllowed(options.dbPath, options.safetyPolicy);
14780
+ migrateKnowledgeDb(options.dbPath);
14781
+ const text = await readOutboxInput(options.input, options.config, options.safetyPolicy);
14782
+ const events = parseOutboxText(text);
14783
+ const db = openKnowledgeDb(options.dbPath);
14784
+ const runId = `run_${randomUUID3()}`;
14785
+ try {
14786
+ return db.transaction(() => {
14787
+ db.run(`INSERT INTO runs (id, type, prompt, status, provider, model, metadata_json, created_at, updated_at)
14788
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`, [
14789
+ runId,
14790
+ "open-files-outbox",
14791
+ options.input,
14792
+ "completed",
14793
+ "local",
14794
+ "open-files-outbox",
14795
+ JSON.stringify({ path: options.input, events: events.length }),
14796
+ now,
14797
+ now
14798
+ ]);
14799
+ const sourcesTouched = new Set;
14800
+ const revisionsTouched = new Set;
14801
+ let chunksDeleted = 0;
14802
+ let embeddingsDeleted = 0;
14803
+ let staleRevisions = 0;
14804
+ let deletedSources = 0;
14805
+ let movedSources = 0;
14806
+ let permissionUpdates = 0;
14807
+ recordAuditEvent(db, {
14808
+ event_type: "source_read",
14809
+ action: options.input.startsWith("s3://") ? "s3_outbox_read" : "local_outbox_read",
14810
+ target_uri: options.input,
14811
+ decision: "allow",
14812
+ metadata: { events: events.length, read_only: true },
14813
+ created_at: now
14814
+ });
14815
+ events.forEach((raw, index) => {
14816
+ const event = normalizeEvent(raw, now);
14817
+ const sourceId = ensureSource(db, event, now);
14818
+ sourcesTouched.add(sourceId);
14819
+ const createdRevisionId = ensureRevision(db, sourceId, event, now);
14820
+ if (createdRevisionId)
14821
+ revisionsTouched.add(createdRevisionId);
14822
+ const affectedRevisionIds = revisionIdsForEvent(db, sourceId, event);
14823
+ for (const revisionId of affectedRevisionIds) {
14824
+ revisionsTouched.add(revisionId);
14825
+ const invalidation = invalidateRevision(db, revisionId);
14826
+ chunksDeleted += invalidation.chunksDeleted;
14827
+ embeddingsDeleted += invalidation.embeddingsDeleted;
14828
+ staleRevisions += 1;
14829
+ }
14830
+ if (isDeleteEvent(event.eventType, event.status))
14831
+ deletedSources += 1;
14832
+ if (isMoveEvent(event.eventType))
14833
+ movedSources += 1;
14834
+ if (isPermissionEvent(event.eventType) || event.acl !== undefined)
14835
+ permissionUpdates += 1;
14836
+ db.run(`INSERT INTO run_events (id, run_id, level, event, metadata_json, created_at)
14837
+ VALUES (?, ?, ?, ?, ?, ?)`, [
14838
+ stableId("evt", `${runId}\x00${index}\x00${event.sourceRef}\x00${event.eventType}`),
14839
+ runId,
14840
+ "info",
14841
+ event.eventType,
14842
+ JSON.stringify({
14843
+ source_ref: event.sourceRef,
14844
+ source_uri: event.sourceUri,
14845
+ revision: event.revision,
14846
+ hash: event.hash,
14847
+ status: event.status,
14848
+ affected_revisions: affectedRevisionIds.length
14849
+ }),
14850
+ event.updatedAt
14851
+ ]);
14852
+ });
14853
+ db.run(`INSERT INTO provider_usage (id, run_id, provider, model, input_tokens, output_tokens, cost_usd, metadata_json, created_at)
14854
+ VALUES (?, ?, ?, ?, 0, 0, 0, ?, ?)`, [
14855
+ stableId("usage", runId),
14856
+ runId,
14857
+ "local",
14858
+ "open-files-outbox",
14859
+ JSON.stringify({ note: "No model provider used for outbox invalidation." }),
14860
+ now
14861
+ ]);
14862
+ recordAuditEvent(db, {
14863
+ event_type: "write",
14864
+ action: "knowledge_outbox_invalidation",
14865
+ target_uri: options.dbPath,
14866
+ decision: "allow",
14867
+ metadata: {
14868
+ run_id: runId,
14869
+ events: events.length,
14870
+ sources: sourcesTouched.size,
14871
+ revisions: revisionsTouched.size,
14872
+ chunks_deleted: chunksDeleted,
14873
+ embeddings_deleted: embeddingsDeleted
14874
+ },
14875
+ created_at: now
14876
+ });
14877
+ return {
14878
+ path: options.input,
14879
+ db_path: options.dbPath,
14880
+ run_id: runId,
14881
+ events_seen: events.length,
14882
+ sources_touched: sourcesTouched.size,
14883
+ revisions_touched: revisionsTouched.size,
14884
+ chunks_deleted: chunksDeleted,
14885
+ embeddings_deleted: embeddingsDeleted,
14886
+ stale_revisions: staleRevisions,
14887
+ deleted_sources: deletedSources,
14888
+ moved_sources: movedSources,
14889
+ permission_updates: permissionUpdates
14890
+ };
14891
+ })();
14892
+ } finally {
14893
+ db.close();
14894
+ }
14895
+ }
14896
+
14897
+ // src/manifest-ingest.ts
14898
+ import { createHash as createHash3 } from "crypto";
14899
+ import { existsSync as existsSync5, readFileSync as readFileSync5 } from "fs";
14900
+ import { basename as basename2 } from "path";
14901
+ function stableId2(prefix, value) {
14902
+ return `${prefix}_${createHash3("sha256").update(value).digest("hex").slice(0, 20)}`;
14903
+ }
14904
+ function asObject2(value) {
14905
+ return value && typeof value === "object" && !Array.isArray(value) ? value : undefined;
14906
+ }
14907
+ function asString2(value) {
14908
+ return typeof value === "string" && value.length > 0 ? value : undefined;
14909
+ }
14910
+ function asNumber(value) {
14911
+ return typeof value === "number" && Number.isFinite(value) ? value : undefined;
14912
+ }
14913
+ function buildSourceRefFromItem(item) {
14914
+ const explicit = asString2(item.source_ref) ?? asString2(item.source_uri) ?? asString2(item.uri);
14915
+ if (explicit)
14916
+ return explicit;
14917
+ const fileId = asString2(item.file_id);
14918
+ if (fileId) {
14919
+ const revision = asString2(item.revision_id) ?? asString2(item.revision);
14920
+ const fileRef = `open-files://file/${encodeURIComponent(fileId)}`;
14921
+ return revision ? `${fileRef}/revision/${encodeURIComponent(revision)}` : fileRef;
14922
+ }
14923
+ const sourceId = asString2(item.source_id);
14924
+ const path = asString2(item.path);
14925
+ if (sourceId && path) {
14926
+ return `open-files://source/${encodeURIComponent(sourceId)}/path/${encodeURIComponent(path)}`;
14927
+ }
14928
+ throw new Error("Manifest item is missing source_ref, file_id, or source_id/path.");
14929
+ }
14930
+ function baseSourceUri2(sourceRef, parsed) {
14931
+ if (parsed.kind === "open-files" && parsed.entity === "file" && parsed.revision_id) {
14932
+ return sourceRef.replace(/\/revision\/[^/]+$/, "");
14933
+ }
14934
+ return sourceRef;
14935
+ }
14936
+ function textFromItem(item) {
14937
+ const direct = asString2(item.extracted_text) ?? asString2(item.text) ?? asString2(item.content_text) ?? asString2(item.markdown);
14938
+ if (direct !== undefined)
14939
+ return direct;
14940
+ const content = item.content;
14941
+ return typeof content === "string" ? content : null;
14942
+ }
14943
+ function extractedTextUriFromItem(item) {
14944
+ const direct = asString2(item.extracted_text_ref) ?? asString2(item.extracted_text_uri) ?? asString2(item.text_ref);
14945
+ if (direct)
14946
+ return direct;
14947
+ const content = asObject2(item.content);
14948
+ return asString2(content?.extracted_text_ref) ?? asString2(content?.extracted_text_uri) ?? null;
14949
+ }
14950
+ function titleFromItem(item) {
14951
+ const path = asString2(item.path);
14952
+ return asString2(item.title) ?? asString2(item.name) ?? (path ? basename2(path) : null);
14953
+ }
14954
+ function hashFromItem(item) {
14955
+ return asString2(item.hash) ?? asString2(item.checksum) ?? asString2(item.sha256) ?? null;
14956
+ }
14957
+ function revisionFromItem(item, parsed, hash2) {
14958
+ const revision = asString2(item.revision_id) ?? asString2(item.revision) ?? asString2(item.version_id) ?? (parsed.kind === "open-files" ? parsed.revision_id : undefined) ?? hash2 ?? asString2(item.updated_at);
14959
+ return revision ?? "current";
14960
+ }
14961
+ function metadataFromItem(item, normalized) {
14962
+ const metadata = {};
14963
+ for (const [key, value] of Object.entries(item)) {
14964
+ if (["text", "content", "content_text", "extracted_text", "markdown"].includes(key))
14965
+ continue;
14966
+ metadata[key] = value;
14967
+ }
14968
+ metadata.source_ref = normalized.sourceRef;
14969
+ metadata.source_uri = normalized.sourceUri;
14970
+ metadata.status = normalized.status;
14971
+ return metadata;
14972
+ }
14973
+ function normalizeManifestItem(item, now) {
14974
+ const sourceRef = buildSourceRefFromItem(item);
14975
+ const parsed = parseSourceRef(sourceRef);
14976
+ const sourceUri = baseSourceUri2(sourceRef, parsed);
14977
+ const hash2 = hashFromItem(item);
14978
+ const status = asString2(item.status) ?? "active";
14979
+ return {
14980
+ raw: item,
14981
+ sourceRef,
14982
+ sourceUri,
14983
+ kind: parsed.kind,
14984
+ title: titleFromItem(item),
14985
+ revision: revisionFromItem(item, parsed, hash2),
14986
+ hash: hash2,
14987
+ extractedTextUri: extractedTextUriFromItem(item),
14988
+ text: textFromItem(item),
14989
+ metadata: metadataFromItem(item, { sourceRef, sourceUri, status }),
14990
+ acl: item.permissions ?? item.acl ?? {},
14991
+ status,
14992
+ updatedAt: asString2(item.updated_at) ?? now
14993
+ };
14994
+ }
14995
+ function parseManifestText(text) {
14996
+ const trimmed = text.trim();
14997
+ if (!trimmed)
14998
+ return [];
14999
+ if (trimmed.startsWith("[")) {
15000
+ const parsed = JSON.parse(trimmed);
15001
+ if (!Array.isArray(parsed))
15002
+ throw new Error("Manifest array parse failed.");
15003
+ return parsed.map((entry) => {
15004
+ const item = asObject2(entry);
15005
+ if (!item)
15006
+ throw new Error("Manifest array entries must be objects.");
15007
+ return item;
15008
+ });
15009
+ }
15010
+ if (trimmed.startsWith("{")) {
15011
+ try {
15012
+ const parsed = JSON.parse(trimmed);
15013
+ const object2 = asObject2(parsed);
15014
+ if (!object2)
15015
+ throw new Error("Manifest object parse failed.");
15016
+ if (Array.isArray(object2.items)) {
15017
+ return object2.items.map((entry) => {
15018
+ const item = asObject2(entry);
15019
+ if (!item)
15020
+ throw new Error("Manifest items entries must be objects.");
15021
+ return item;
15022
+ });
15023
+ }
15024
+ if ("source_ref" in object2 || "source_uri" in object2 || "file_id" in object2)
15025
+ return [object2];
15026
+ } catch (error48) {
15027
+ const lines = trimmed.split(/\r?\n/).filter((line) => line.trim().length > 0);
15028
+ if (lines.length <= 1)
15029
+ throw error48;
15030
+ return lines.map((line) => {
15031
+ const item = asObject2(JSON.parse(line));
15032
+ if (!item)
15033
+ throw new Error("Manifest JSONL entries must be objects.");
15034
+ return item;
15035
+ });
15036
+ }
15037
+ }
15038
+ return trimmed.split(/\r?\n/).filter((line) => line.trim().length > 0).map((line) => {
15039
+ const item = asObject2(JSON.parse(line));
15040
+ if (!item)
15041
+ throw new Error("Manifest JSONL entries must be objects.");
15042
+ return item;
15043
+ });
15044
+ }
15045
+ async function readS3Text2(uri, config2, safetyPolicy) {
15046
+ const parsed = new URL(uri);
15047
+ const bucket = parsed.hostname;
15048
+ const key = decodeURIComponent(parsed.pathname.replace(/^\/+/, ""));
15049
+ if (!bucket || !key)
15050
+ throw new Error(`Invalid S3 manifest URI: ${uri}`);
15051
+ if (safetyPolicy)
15052
+ assertS3ReadAllowed(uri, safetyPolicy);
15053
+ const [{ S3Client, GetObjectCommand }, { fromIni }] = await Promise.all([
15054
+ import("@aws-sdk/client-s3"),
15055
+ import("@aws-sdk/credential-providers")
15056
+ ]);
15057
+ const s3Config = config2?.storage.type === "s3" && config2.storage.s3?.bucket === bucket ? config2.storage.s3 : undefined;
15058
+ const client = new S3Client({
15059
+ region: s3Config?.region,
15060
+ credentials: s3Config?.profile ? fromIni({ profile: s3Config.profile }) : undefined,
15061
+ maxAttempts: s3Config?.max_attempts
15062
+ });
15063
+ const response = await client.send(new GetObjectCommand({ Bucket: bucket, Key: key }));
15064
+ if (!response.Body)
15065
+ return "";
15066
+ return await response.Body.transformToString();
15067
+ }
15068
+ async function readManifestInput(input, config2, safetyPolicy) {
15069
+ if (input.startsWith("s3://"))
15070
+ return readS3Text2(input, config2, safetyPolicy);
15071
+ if (!existsSync5(input))
15072
+ throw new Error(`Manifest not found: ${input}`);
15073
+ return readFileSync5(input, "utf8");
15074
+ }
15075
+ function chunkText(text, maxChars, overlapChars) {
15076
+ const normalized = text.replace(/\r\n/g, `
15077
+ `);
15078
+ if (!normalized.trim())
15079
+ return [];
15080
+ const chunks = [];
15081
+ let start = 0;
15082
+ while (start < normalized.length) {
15083
+ const hardEnd = Math.min(normalized.length, start + maxChars);
15084
+ let end = hardEnd;
15085
+ if (hardEnd < normalized.length) {
15086
+ const paragraphBreak = normalized.lastIndexOf(`
15087
+
15088
+ `, hardEnd);
15089
+ const sentenceBreak = normalized.lastIndexOf(". ", hardEnd);
15090
+ const candidate = Math.max(paragraphBreak, sentenceBreak);
15091
+ if (candidate > start + Math.floor(maxChars * 0.5))
15092
+ end = candidate + (candidate === paragraphBreak ? 2 : 1);
15093
+ }
15094
+ const chunk = normalized.slice(start, end).trim();
15095
+ if (chunk) {
15096
+ chunks.push({
15097
+ ordinal: chunks.length,
15098
+ text: chunk,
15099
+ startOffset: start,
15100
+ endOffset: end
15101
+ });
15102
+ }
15103
+ if (end >= normalized.length)
15104
+ break;
15105
+ start = Math.max(0, end - overlapChars);
15106
+ }
15107
+ return chunks;
15108
+ }
15109
+ function estimateTokenCount(text) {
15110
+ const words = text.trim().split(/\s+/).filter(Boolean).length;
15111
+ return Math.max(1, Math.ceil(words * 1.25));
15112
+ }
15113
+ function deleteChunksForRevision(db, sourceRevisionId) {
15114
+ const rows = db.query("SELECT id FROM chunks WHERE source_revision_id = ?").all(sourceRevisionId);
15115
+ for (const row of rows) {
15116
+ db.run("DELETE FROM chunks_fts WHERE chunk_id = ?", [row.id]);
15117
+ }
15118
+ db.run("DELETE FROM chunks WHERE source_revision_id = ?", [sourceRevisionId]);
15119
+ return rows.length;
15120
+ }
15121
+ function upsertSource(db, item, now) {
15122
+ const sourceId = stableId2("src", item.sourceUri);
15123
+ db.run(`INSERT INTO sources (id, uri, kind, title, metadata_json, acl_json, created_at, updated_at)
15124
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
15125
+ ON CONFLICT(uri) DO UPDATE SET
15126
+ kind = excluded.kind,
15127
+ title = excluded.title,
15128
+ metadata_json = excluded.metadata_json,
15129
+ acl_json = excluded.acl_json,
15130
+ updated_at = excluded.updated_at`, [
15131
+ sourceId,
15132
+ item.sourceUri,
15133
+ item.kind,
15134
+ item.title,
15135
+ JSON.stringify(item.metadata),
15136
+ JSON.stringify(item.acl ?? {}),
15137
+ now,
15138
+ item.updatedAt
15139
+ ]);
15140
+ const row = db.query("SELECT id FROM sources WHERE uri = ?").get(item.sourceUri);
15141
+ if (!row)
15142
+ throw new Error(`Failed to upsert source: ${item.sourceUri}`);
15143
+ return row.id;
15144
+ }
15145
+ function upsertRevision(db, sourceId, item, now) {
15146
+ const revisionId = stableId2("rev", `${sourceId}\x00${item.revision}`);
15147
+ db.run(`INSERT INTO source_revisions (id, source_id, revision, hash, extracted_text_uri, metadata_json, created_at)
15148
+ VALUES (?, ?, ?, ?, ?, ?, ?)
15149
+ ON CONFLICT(source_id, revision) DO UPDATE SET
15150
+ hash = excluded.hash,
15151
+ extracted_text_uri = excluded.extracted_text_uri,
15152
+ metadata_json = excluded.metadata_json`, [
15153
+ revisionId,
15154
+ sourceId,
15155
+ item.revision,
15156
+ item.hash,
15157
+ item.extractedTextUri,
15158
+ JSON.stringify(item.metadata),
15159
+ now
15160
+ ]);
15161
+ const row = db.query("SELECT id FROM source_revisions WHERE source_id = ? AND revision = ?").get(sourceId, item.revision);
15162
+ if (!row)
15163
+ throw new Error(`Failed to upsert source revision: ${item.sourceRef}`);
15164
+ return row.id;
15165
+ }
15166
+ function insertChunks(db, sourceRevisionId, item, now, maxChars, overlapChars, safetyPolicy) {
15167
+ if (!item.text || item.status.toLowerCase() === "deleted")
15168
+ return { chunksInserted: 0, redactions: 0 };
15169
+ const redacted = redactSecrets(item.text, safetyPolicy);
15170
+ if (redacted.findings.length > 0) {
15171
+ recordRedactionFindings(db, {
15172
+ source_uri: item.sourceUri,
15173
+ findings: redacted.findings,
15174
+ metadata: { source_ref: item.sourceRef, revision: item.revision },
15175
+ created_at: now
15176
+ });
15177
+ recordAuditEvent(db, {
15178
+ event_type: "redaction",
15179
+ action: "source_text_redact",
15180
+ target_uri: item.sourceUri,
15181
+ decision: "redacted",
15182
+ metadata: { findings: redacted.findings.length, source_ref: item.sourceRef, revision: item.revision },
15183
+ created_at: now
15184
+ });
15185
+ }
15186
+ const chunks = chunkText(redacted.text, maxChars, overlapChars);
15187
+ for (const chunk of chunks) {
15188
+ const chunkId = stableId2("chk", `${sourceRevisionId}\x00${chunk.ordinal}\x00${chunk.text}`);
15189
+ const metadata = {
15190
+ source_ref: item.sourceRef,
15191
+ source_uri: item.sourceUri,
15192
+ hash: item.hash,
15193
+ status: item.status,
15194
+ path: asString2(item.raw.path) ?? null,
15195
+ mime: asString2(item.raw.mime) ?? asString2(item.raw.content_type) ?? null,
15196
+ size: asNumber(item.raw.size) ?? null
15197
+ };
15198
+ db.run(`INSERT INTO chunks (id, source_revision_id, kind, ordinal, text, token_count, start_offset, end_offset, metadata_json, created_at)
15199
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, [
15200
+ chunkId,
15201
+ sourceRevisionId,
15202
+ "source",
15203
+ chunk.ordinal,
15204
+ chunk.text,
15205
+ estimateTokenCount(chunk.text),
15206
+ chunk.startOffset,
15207
+ chunk.endOffset,
15208
+ JSON.stringify(metadata),
15209
+ now
15210
+ ]);
15211
+ db.run("INSERT INTO chunks_fts (chunk_id, text, title, source_uri) VALUES (?, ?, ?, ?)", [chunkId, chunk.text, item.title ?? "", item.sourceUri]);
15212
+ }
15213
+ return { chunksInserted: chunks.length, redactions: redacted.findings.length };
15214
+ }
15215
+ async function ingestOpenFilesManifest(options) {
15216
+ const now = options.now ?? new Date;
15217
+ if (options.safetyPolicy)
15218
+ assertWriteAllowed(options.dbPath, options.safetyPolicy);
15219
+ migrateKnowledgeDb(options.dbPath);
15220
+ const text = await readManifestInput(options.input, options.config, options.safetyPolicy);
15221
+ const items = parseManifestText(text);
15222
+ return ingestOpenFilesManifestItems({
15223
+ dbPath: options.dbPath,
15224
+ items,
15225
+ sourceLabel: options.input,
15226
+ safetyPolicy: options.safetyPolicy,
15227
+ now,
15228
+ maxChunkChars: options.maxChunkChars,
15229
+ chunkOverlapChars: options.chunkOverlapChars
15230
+ });
15231
+ }
15232
+ async function ingestOpenFilesManifestItems(options) {
15233
+ const now = (options.now ?? new Date).toISOString();
15234
+ const maxChunkChars = options.maxChunkChars ?? 4000;
15235
+ const chunkOverlapChars = options.chunkOverlapChars ?? 200;
15236
+ if (maxChunkChars < 500)
15237
+ throw new Error("maxChunkChars must be at least 500.");
15238
+ if (chunkOverlapChars < 0 || chunkOverlapChars >= maxChunkChars)
15239
+ throw new Error("chunkOverlapChars must be less than maxChunkChars.");
15240
+ if (options.safetyPolicy)
15241
+ assertWriteAllowed(options.dbPath, options.safetyPolicy);
15242
+ migrateKnowledgeDb(options.dbPath);
15243
+ const db = openKnowledgeDb(options.dbPath);
15244
+ try {
15245
+ const result = db.transaction(() => {
15246
+ const seenSources = new Set;
15247
+ const seenRevisions = new Set;
15248
+ let chunksInserted = 0;
15249
+ let chunksDeleted = 0;
15250
+ let redactions = 0;
15251
+ let skipped = 0;
15252
+ recordAuditEvent(db, {
15253
+ event_type: "source_read",
15254
+ action: options.readAction ?? (options.sourceLabel.startsWith("s3://") ? "s3_manifest_read" : "local_manifest_read"),
15255
+ target_uri: options.sourceLabel,
15256
+ decision: "allow",
15257
+ metadata: { items: options.items.length, read_only: true },
15258
+ created_at: now
15259
+ });
15260
+ for (const raw of options.items) {
15261
+ const item = normalizeManifestItem(raw, now);
15262
+ const sourceId = upsertSource(db, item, now);
15263
+ const revisionId = upsertRevision(db, sourceId, item, now);
15264
+ seenSources.add(sourceId);
15265
+ seenRevisions.add(revisionId);
15266
+ if (item.text || item.status.toLowerCase() === "deleted") {
15267
+ chunksDeleted += deleteChunksForRevision(db, revisionId);
15268
+ }
15269
+ const inserted = insertChunks(db, revisionId, item, now, maxChunkChars, chunkOverlapChars, options.safetyPolicy);
15270
+ chunksInserted += inserted.chunksInserted;
15271
+ redactions += inserted.redactions;
15272
+ }
15273
+ recordAuditEvent(db, {
15274
+ event_type: "write",
15275
+ action: "knowledge_manifest_ingest",
15276
+ target_uri: options.dbPath,
15277
+ decision: "allow",
15278
+ metadata: { items: options.items.length, sources: seenSources.size, revisions: seenRevisions.size, chunks_inserted: chunksInserted, redactions },
15279
+ created_at: now
15280
+ });
15281
+ return {
15282
+ path: options.sourceLabel,
15283
+ db_path: options.dbPath,
15284
+ items_seen: options.items.length,
15285
+ sources_upserted: seenSources.size,
15286
+ revisions_upserted: seenRevisions.size,
15287
+ chunks_inserted: chunksInserted,
15288
+ chunks_deleted: chunksDeleted,
15289
+ redactions,
15290
+ skipped
15291
+ };
15292
+ })();
15293
+ return result;
15294
+ } finally {
15295
+ db.close();
15296
+ }
15297
+ }
15298
+
15299
+ // src/source-ingest.ts
15300
+ import { createHash as createHash4 } from "crypto";
15301
+ import { existsSync as existsSync6, readFileSync as readFileSync6 } from "fs";
15302
+ import { basename as basename3 } from "path";
14267
15303
 
14268
15304
  // src/source-resolver.ts
14269
15305
  function parseJsonObject(value) {
@@ -14539,6 +15575,558 @@ async function resolveOpenFilesSource(options) {
14539
15575
  }
14540
15576
  }
14541
15577
 
15578
+ // src/source-ingest.ts
15579
+ function sha256Text(text) {
15580
+ return `sha256:${createHash4("sha256").update(text).digest("hex")}`;
15581
+ }
15582
+ function stripHtml(html) {
15583
+ return html.replace(/<script[\s\S]*?<\/script>/gi, " ").replace(/<style[\s\S]*?<\/style>/gi, " ").replace(/<[^>]+>/g, " ").replace(/&nbsp;/g, " ").replace(/&amp;/g, "&").replace(/&lt;/g, "<").replace(/&gt;/g, ">").replace(/\s+\n/g, `
15584
+ `).replace(/\n\s+/g, `
15585
+ `).replace(/[ \t]{2,}/g, " ").trim();
15586
+ }
15587
+ async function readS3Text3(uri, config2, safetyPolicy) {
15588
+ const parsed = new URL(uri);
15589
+ const bucket = parsed.hostname;
15590
+ const key = decodeURIComponent(parsed.pathname.replace(/^\/+/, ""));
15591
+ if (!bucket || !key)
15592
+ throw new Error(`Invalid S3 source URI: ${uri}`);
15593
+ if (safetyPolicy)
15594
+ assertS3ReadAllowed(uri, safetyPolicy);
15595
+ const [{ S3Client, GetObjectCommand }, { fromIni }] = await Promise.all([
15596
+ import("@aws-sdk/client-s3"),
15597
+ import("@aws-sdk/credential-providers")
15598
+ ]);
15599
+ const s3Config = config2?.storage.type === "s3" && config2.storage.s3?.bucket === bucket ? config2.storage.s3 : undefined;
15600
+ const client = new S3Client({
15601
+ region: s3Config?.region,
15602
+ credentials: s3Config?.profile ? fromIni({ profile: s3Config.profile }) : undefined,
15603
+ maxAttempts: s3Config?.max_attempts
15604
+ });
15605
+ const response = await client.send(new GetObjectCommand({ Bucket: bucket, Key: key }));
15606
+ if (!response.Body)
15607
+ return "";
15608
+ return await response.Body.transformToString();
15609
+ }
15610
+ async function readWebText(uri, safetyPolicy) {
15611
+ if (safetyPolicy)
15612
+ assertWebSearchAllowed(safetyPolicy);
15613
+ const response = await fetch(uri, {
15614
+ headers: {
15615
+ accept: "text/markdown,text/plain,text/html,application/json;q=0.8,*/*;q=0.5",
15616
+ "user-agent": "@hasna/knowledge source-ingest"
15617
+ }
15618
+ });
15619
+ if (!response.ok)
15620
+ throw new Error(`Web source read failed ${response.status}: ${uri}`);
15621
+ const mime = response.headers.get("content-type");
15622
+ const body = await response.text();
15623
+ return { text: mime?.includes("html") ? stripHtml(body) : body, mime };
15624
+ }
15625
+ function titleForRef(parsed) {
15626
+ if (parsed.kind === "file")
15627
+ return basename3(parsed.path);
15628
+ if (parsed.kind === "s3")
15629
+ return basename3(parsed.key);
15630
+ if (parsed.kind === "web")
15631
+ return basename3(new URL(parsed.url).pathname) || parsed.url;
15632
+ return parsed.path ? basename3(parsed.path) : parsed.id;
15633
+ }
15634
+ async function readDirectSourceText(parsed, config2, safetyPolicy) {
15635
+ if (parsed.kind === "file") {
15636
+ if (!existsSync6(parsed.path))
15637
+ throw new Error(`Source file not found: ${parsed.path}`);
15638
+ const text = readFileSync6(parsed.path, "utf8");
15639
+ return {
15640
+ text,
15641
+ contentSource: "file",
15642
+ title: titleForRef(parsed),
15643
+ mime: "text/plain",
15644
+ size: text.length,
15645
+ hash: sha256Text(text),
15646
+ revision: null,
15647
+ extractedTextRef: null,
15648
+ metadata: { path: parsed.path },
15649
+ permissions: { mode: "read_only" }
15650
+ };
15651
+ }
15652
+ if (parsed.kind === "s3") {
15653
+ const text = await readS3Text3(parsed.uri, config2, safetyPolicy);
15654
+ return {
15655
+ text,
15656
+ contentSource: "s3",
15657
+ title: titleForRef(parsed),
15658
+ mime: "text/plain",
15659
+ size: text.length,
15660
+ hash: sha256Text(text),
15661
+ revision: null,
15662
+ extractedTextRef: null,
15663
+ metadata: { bucket: parsed.bucket, key: parsed.key },
15664
+ permissions: { mode: "read_only" }
15665
+ };
15666
+ }
15667
+ if (parsed.kind === "web") {
15668
+ const web = await readWebText(parsed.url, safetyPolicy);
15669
+ return {
15670
+ text: web.text,
15671
+ contentSource: "web",
15672
+ title: titleForRef(parsed),
15673
+ mime: web.mime,
15674
+ size: web.text.length,
15675
+ hash: sha256Text(web.text),
15676
+ revision: null,
15677
+ extractedTextRef: null,
15678
+ metadata: { url: parsed.url },
15679
+ permissions: { mode: "read_only" }
15680
+ };
15681
+ }
15682
+ throw new Error(`Direct source reading is not available for ${parsed.uri}`);
15683
+ }
15684
+ async function readTextRef(uri, config2, safetyPolicy) {
15685
+ if (uri.startsWith("open-files://")) {
15686
+ throw new Error("Open-files extracted text refs require an open-files resolver API. Ingest an open-files manifest with extracted_text or an extracted_text_ref using file://, s3://, or https://.");
15687
+ }
15688
+ const parsed = parseSourceRef(uri);
15689
+ const direct = await readDirectSourceText(parsed, config2, safetyPolicy);
15690
+ return { text: direct.text, contentSource: "extracted_text_ref" };
15691
+ }
15692
+ async function readOpenFilesSourceText(options) {
15693
+ const resolved = await resolveOpenFilesSource({
15694
+ dbPath: options.dbPath,
15695
+ sourceRef: options.sourceRef,
15696
+ purpose: options.purpose ?? "knowledge_index",
15697
+ limit: 100,
15698
+ safetyPolicy: options.safetyPolicy,
15699
+ now: options.now
15700
+ });
15701
+ if (!resolved.resolved) {
15702
+ throw new Error("Open-files source is not in the local knowledge catalog. Ingest an open-files manifest first or use the open-files resolver API.");
15703
+ }
15704
+ if (resolved.revision?.extracted_text_uri && !resolved.content.text_available) {
15705
+ const textRef = await readTextRef(resolved.revision.extracted_text_uri, options.config, options.safetyPolicy);
15706
+ return {
15707
+ text: textRef.text,
15708
+ contentSource: textRef.contentSource,
15709
+ title: resolved.source?.title ?? null,
15710
+ mime: resolved.content.mime,
15711
+ size: textRef.text.length,
15712
+ hash: resolved.revision.hash ?? sha256Text(textRef.text),
15713
+ revision: resolved.revision.revision,
15714
+ extractedTextRef: resolved.revision.extracted_text_uri,
15715
+ metadata: resolved.source?.metadata ?? {},
15716
+ permissions: resolved.source?.permissions ?? { mode: "read_only" }
15717
+ };
15718
+ }
15719
+ if (resolved.chunks.length === 0) {
15720
+ throw new Error("Open-files source has no extracted text chunks yet. Ingest an open-files manifest with extracted_text or extracted_text_ref first.");
15721
+ }
15722
+ const text = resolved.chunks.map((chunk) => chunk.text).join(`
15723
+
15724
+ `);
15725
+ return {
15726
+ text,
15727
+ contentSource: "catalog_chunks",
15728
+ title: resolved.source?.title ?? null,
15729
+ mime: resolved.content.mime,
15730
+ size: text.length,
15731
+ hash: resolved.revision?.hash ?? sha256Text(text),
15732
+ revision: resolved.revision?.revision ?? null,
15733
+ extractedTextRef: resolved.revision?.extracted_text_uri ?? null,
15734
+ metadata: resolved.source?.metadata ?? {},
15735
+ permissions: resolved.source?.permissions ?? { mode: "read_only" }
15736
+ };
15737
+ }
15738
+ function manifestItemForSource(sourceRef, parsed, resolved, purpose) {
15739
+ const hash2 = resolved.hash ?? sha256Text(resolved.text);
15740
+ const metadata = {
15741
+ ...resolved.metadata,
15742
+ source_ref: sourceRef,
15743
+ content_source: resolved.contentSource,
15744
+ read_only: true
15745
+ };
15746
+ const item = {
15747
+ source_ref: sourceRef,
15748
+ name: resolved.title ?? titleForRef(parsed),
15749
+ mime: resolved.mime ?? "text/plain",
15750
+ size: resolved.size ?? resolved.text.length,
15751
+ hash: hash2,
15752
+ revision: resolved.revision ?? hash2,
15753
+ status: "active",
15754
+ updated_at: new Date().toISOString(),
15755
+ permissions: {
15756
+ mode: "read_only",
15757
+ allowed_purposes: [purpose],
15758
+ ...resolved.permissions
15759
+ },
15760
+ metadata,
15761
+ extracted_text_ref: resolved.extractedTextRef,
15762
+ extracted_text: resolved.text
15763
+ };
15764
+ if (parsed.kind === "open-files") {
15765
+ if (parsed.entity === "file")
15766
+ item.file_id = parsed.id;
15767
+ if (parsed.entity === "source") {
15768
+ item.source_id = parsed.id;
15769
+ item.path = parsed.path;
15770
+ }
15771
+ }
15772
+ if (parsed.kind === "file")
15773
+ item.path = parsed.path;
15774
+ if (parsed.kind === "s3")
15775
+ item.path = parsed.key;
15776
+ if (parsed.kind === "web")
15777
+ item.url = parsed.url;
15778
+ return item;
15779
+ }
15780
+ async function ingestSourceRef(options) {
15781
+ const purpose = options.purpose ?? "knowledge_index";
15782
+ const parsed = parseSourceRef(options.sourceRef);
15783
+ const resolved = parsed.kind === "open-files" ? await readOpenFilesSourceText(options) : await readDirectSourceText(parsed, options.config, options.safetyPolicy);
15784
+ const item = manifestItemForSource(options.sourceRef, parsed, resolved, purpose);
15785
+ const result = await ingestOpenFilesManifestItems({
15786
+ dbPath: options.dbPath,
15787
+ items: [item],
15788
+ sourceLabel: options.sourceRef,
15789
+ readAction: "source_ref_ingest_read",
15790
+ safetyPolicy: options.safetyPolicy,
15791
+ now: options.now
15792
+ });
15793
+ return {
15794
+ ...result,
15795
+ source_ref: options.sourceRef,
15796
+ content_source: resolved.contentSource,
15797
+ read_only: true,
15798
+ hash: String(item.hash)
15799
+ };
15800
+ }
15801
+
15802
+ // src/providers.ts
15803
+ var DEFAULT_PROVIDER_SETTINGS = {
15804
+ openai: {
15805
+ api_key_env: "OPENAI_API_KEY",
15806
+ default_model: "gpt-5.2"
15807
+ },
15808
+ anthropic: {
15809
+ api_key_env: "ANTHROPIC_API_KEY",
15810
+ default_model: "claude-sonnet-4-6"
15811
+ },
15812
+ deepseek: {
15813
+ api_key_env: "DEEPSEEK_API_KEY",
15814
+ default_model: "deepseek-chat"
15815
+ }
15816
+ };
15817
+ var PROVIDER_CAPABILITIES = {
15818
+ openai: {
15819
+ text_generation: true,
15820
+ structured_output: true,
15821
+ tool_usage: true,
15822
+ tool_streaming: true,
15823
+ image_input: true,
15824
+ native_web_search: true,
15825
+ reasoning: true,
15826
+ embeddings: true
15827
+ },
15828
+ anthropic: {
15829
+ text_generation: true,
15830
+ structured_output: true,
15831
+ tool_usage: true,
15832
+ tool_streaming: true,
15833
+ image_input: true,
15834
+ native_web_search: false,
15835
+ reasoning: true,
15836
+ embeddings: false
15837
+ },
15838
+ deepseek: {
15839
+ text_generation: true,
15840
+ structured_output: true,
15841
+ tool_usage: true,
15842
+ tool_streaming: true,
15843
+ image_input: false,
15844
+ native_web_search: false,
15845
+ reasoning: true,
15846
+ embeddings: false
15847
+ }
15848
+ };
15849
+ var BUILTIN_ALIASES = {
15850
+ default: "openai:gpt-5.2",
15851
+ fast: "openai:gpt-5-mini",
15852
+ reasoning: "anthropic:claude-opus-4-6",
15853
+ sonnet: "anthropic:claude-sonnet-4-6",
15854
+ deepseek: "deepseek:deepseek-chat",
15855
+ "deepseek-reasoning": "deepseek:deepseek-reasoner"
15856
+ };
15857
+ function providerConfig(config2) {
15858
+ return config2.providers ?? {};
15859
+ }
15860
+ function providerSettings(config2, provider) {
15861
+ const configured = providerConfig(config2)[provider] ?? {};
15862
+ return {
15863
+ ...DEFAULT_PROVIDER_SETTINGS[provider],
15864
+ ...configured
15865
+ };
15866
+ }
15867
+ function modelAliases(config2) {
15868
+ const configured = providerConfig(config2);
15869
+ return {
15870
+ ...BUILTIN_ALIASES,
15871
+ ...configured.default_model ? { default: configured.default_model } : {},
15872
+ ...configured.aliases ?? {}
15873
+ };
15874
+ }
15875
+ function parseModelRef(modelRef) {
15876
+ const [provider, ...rest] = modelRef.split(":");
15877
+ const model = rest.join(":");
15878
+ if (provider !== "openai" && provider !== "anthropic" && provider !== "deepseek") {
15879
+ throw new Error(`Unsupported AI provider: ${provider}`);
15880
+ }
15881
+ if (!model)
15882
+ throw new Error(`Invalid model ref: ${modelRef}. Expected provider:model.`);
15883
+ return { provider, model };
15884
+ }
15885
+ function resolveModelRef(aliasOrRef, config2) {
15886
+ const aliases = modelAliases(config2);
15887
+ return aliases[aliasOrRef] ?? aliasOrRef;
15888
+ }
15889
+ function listModelRegistry(config2) {
15890
+ const aliases = modelAliases(config2);
15891
+ return Object.entries(aliases).map(([alias, modelRef]) => {
15892
+ const parsed = parseModelRef(modelRef);
15893
+ return {
15894
+ alias,
15895
+ model_ref: modelRef,
15896
+ provider: parsed.provider,
15897
+ model: parsed.model,
15898
+ default: alias === "default",
15899
+ capabilities: PROVIDER_CAPABILITIES[parsed.provider]
15900
+ };
15901
+ });
15902
+ }
15903
+ function providerCredentialStatus(config2, env = process.env) {
15904
+ return Object.keys(DEFAULT_PROVIDER_SETTINGS).map((provider) => {
15905
+ const settings = providerSettings(config2, provider);
15906
+ const configured = Boolean(env[settings.api_key_env]);
15907
+ return {
15908
+ provider,
15909
+ api_key_env: settings.api_key_env,
15910
+ configured,
15911
+ source: configured ? "env" : "missing",
15912
+ base_url: settings.base_url ?? null,
15913
+ default_model: settings.default_model
15914
+ };
15915
+ });
15916
+ }
15917
+ function providerStatus(config2, env = process.env) {
15918
+ return {
15919
+ default_model: resolveModelRef("default", config2),
15920
+ providers: providerCredentialStatus(config2, env),
15921
+ models: listModelRegistry(config2)
15922
+ };
15923
+ }
15924
+
15925
+ // src/wiki-layout.ts
15926
+ function todayParts(now) {
15927
+ const year = String(now.getUTCFullYear());
15928
+ const month = String(now.getUTCMonth() + 1).padStart(2, "0");
15929
+ const day = String(now.getUTCDate()).padStart(2, "0");
15930
+ return { year, month, day };
15931
+ }
15932
+ function agentSchemaTemplate() {
15933
+ return `# Knowledge Agent Schema v1
15934
+
15935
+ ## Source Rules
15936
+
15937
+ - Treat open-files source references as the preferred source of truth.
15938
+ - Do not copy raw source files into open-knowledge.
15939
+ - Cite every durable fact with a source URI, revision/hash when available, and optional span.
15940
+ - Mark uncertainty explicitly when sources disagree or are incomplete.
15941
+
15942
+ ## Wiki Rules
15943
+
15944
+ - Write generated knowledge as Markdown pages under wiki/.
15945
+ - Keep root indexes small; use topic, team, project, and machine-readable shards for scale.
15946
+ - Preserve backlinks between related pages and decisions.
15947
+ - Prefer updating existing pages over creating near-duplicates.
15948
+
15949
+ ## Query Rules
15950
+
15951
+ - Search wiki pages first, then source chunks, then deeper read-only source refs.
15952
+ - Use web search only when requested or when current external context is required.
15953
+ - File useful answers back into the wiki only after approval or approved auto-write mode.
15954
+
15955
+ ## Lint Rules
15956
+
15957
+ - Flag stale pages, missing citations, contradictions, orphan pages, duplicate pages, and unresolved source refs.
15958
+ `;
15959
+ }
15960
+ function rootIndexTemplate() {
15961
+ return `# Knowledge Index
15962
+
15963
+ This is a compact orientation index for agents. It is not the full search index.
15964
+
15965
+ ## Shards
15966
+
15967
+ - wiki/
15968
+ - indexes/
15969
+ - schemas/
15970
+ - logs/
15971
+
15972
+ ## Source Ownership
15973
+
15974
+ Raw source files are resolved through open-files. This app stores source refs,
15975
+ citations, chunks, generated wiki artifacts, indexes, and run records.
15976
+ `;
15977
+ }
15978
+ function wikiReadmeTemplate() {
15979
+ return `# Wiki
15980
+
15981
+ Generated durable knowledge pages live here.
15982
+
15983
+ Pages should be concise, cited, and organized for both humans and agents.
15984
+ `;
15985
+ }
15986
+ async function initializeWikiLayout(store, now = new Date) {
15987
+ const { year, month, day } = todayParts(now);
15988
+ const schemaKey = "schemas/v1.md";
15989
+ const rootIndexKey = "indexes/root.md";
15990
+ const wikiReadmeKey = "wiki/README.md";
15991
+ const logKey = `logs/${year}/${month}/${day}.jsonl`;
15992
+ const event = {
15993
+ ts: now.toISOString(),
15994
+ event: "wiki_layout_initialized",
15995
+ schema_key: schemaKey,
15996
+ root_index_key: rootIndexKey,
15997
+ wiki_readme_key: wikiReadmeKey
15998
+ };
15999
+ const writes = [
16000
+ store.put({ key: schemaKey, body: agentSchemaTemplate(), content_type: "text/markdown" }),
16001
+ store.put({ key: rootIndexKey, body: rootIndexTemplate(), content_type: "text/markdown" }),
16002
+ store.put({ key: wikiReadmeKey, body: wikiReadmeTemplate(), content_type: "text/markdown" }),
16003
+ store.put({ key: logKey, body: `${JSON.stringify(event)}
16004
+ `, content_type: "application/x-ndjson" })
16005
+ ];
16006
+ await Promise.all(writes);
16007
+ return {
16008
+ schema_key: schemaKey,
16009
+ root_index_key: rootIndexKey,
16010
+ wiki_readme_key: wikiReadmeKey,
16011
+ log_key: logKey,
16012
+ written: [schemaKey, rootIndexKey, wikiReadmeKey, logKey]
16013
+ };
16014
+ }
16015
+
16016
+ // src/service.ts
16017
+ class KnowledgeService {
16018
+ options;
16019
+ ensuredWorkspace;
16020
+ cachedConfig;
16021
+ constructor(options = {}) {
16022
+ this.options = options;
16023
+ }
16024
+ get scope() {
16025
+ return this.options.scope ?? "global";
16026
+ }
16027
+ get workspace() {
16028
+ return this.ensuredWorkspace ?? resolveScopedWorkspace(this.options.scope, this.options.cwd);
16029
+ }
16030
+ ensureWorkspace() {
16031
+ if (!this.ensuredWorkspace)
16032
+ this.ensuredWorkspace = ensureKnowledgeWorkspace(this.workspace.home);
16033
+ return this.ensuredWorkspace;
16034
+ }
16035
+ jsonStorePath() {
16036
+ return this.ensureWorkspace().jsonStorePath;
16037
+ }
16038
+ config() {
16039
+ if (!this.cachedConfig) {
16040
+ const workspace = this.ensureWorkspace();
16041
+ this.cachedConfig = readKnowledgeConfig(workspace.configPath);
16042
+ }
16043
+ return this.cachedConfig;
16044
+ }
16045
+ safetyPolicy() {
16046
+ return resolveSafetyPolicy(this.config(), this.ensureWorkspace());
16047
+ }
16048
+ artifactStore() {
16049
+ return createArtifactStore(this.config(), this.ensureWorkspace());
16050
+ }
16051
+ paths() {
16052
+ const workspace = this.ensureWorkspace();
16053
+ return {
16054
+ ok: true,
16055
+ scope: this.scope,
16056
+ home: workspace.home,
16057
+ config_path: workspace.configPath,
16058
+ json_store_path: workspace.jsonStorePath,
16059
+ knowledge_db_path: workspace.knowledgeDbPath,
16060
+ artifacts_dir: workspace.artifactsDir,
16061
+ indexes_dir: workspace.indexesDir,
16062
+ logs_dir: workspace.logsDir,
16063
+ runs_dir: workspace.runsDir,
16064
+ schemas_dir: workspace.schemasDir,
16065
+ wiki_dir: workspace.wikiDir,
16066
+ config: this.config(),
16067
+ message: workspace.home
16068
+ };
16069
+ }
16070
+ initDb() {
16071
+ return migrateKnowledgeDb(this.ensureWorkspace().knowledgeDbPath);
16072
+ }
16073
+ dbStats() {
16074
+ const workspace = this.ensureWorkspace();
16075
+ migrateKnowledgeDb(workspace.knowledgeDbPath);
16076
+ return getKnowledgeDbStats(workspace.knowledgeDbPath);
16077
+ }
16078
+ async initWiki() {
16079
+ return initializeWikiLayout(this.artifactStore());
16080
+ }
16081
+ async ingestManifest(input) {
16082
+ const workspace = this.ensureWorkspace();
16083
+ return ingestOpenFilesManifest({
16084
+ dbPath: workspace.knowledgeDbPath,
16085
+ input,
16086
+ config: this.config(),
16087
+ safetyPolicy: this.safetyPolicy()
16088
+ });
16089
+ }
16090
+ async ingestSource(sourceRef, purpose) {
16091
+ const workspace = this.ensureWorkspace();
16092
+ return ingestSourceRef({
16093
+ dbPath: workspace.knowledgeDbPath,
16094
+ sourceRef,
16095
+ purpose,
16096
+ config: this.config(),
16097
+ safetyPolicy: this.safetyPolicy()
16098
+ });
16099
+ }
16100
+ async resolveSource(sourceRef, options = {}) {
16101
+ const workspace = this.ensureWorkspace();
16102
+ return resolveOpenFilesSource({
16103
+ dbPath: workspace.knowledgeDbPath,
16104
+ sourceRef,
16105
+ purpose: options.purpose,
16106
+ limit: options.limit,
16107
+ safetyPolicy: this.safetyPolicy()
16108
+ });
16109
+ }
16110
+ async consumeOutbox(input) {
16111
+ const workspace = this.ensureWorkspace();
16112
+ return consumeOpenFilesOutbox({
16113
+ dbPath: workspace.knowledgeDbPath,
16114
+ input,
16115
+ config: this.config(),
16116
+ safetyPolicy: this.safetyPolicy()
16117
+ });
16118
+ }
16119
+ providerStatus(env = process.env) {
16120
+ return providerStatus(this.config(), env);
16121
+ }
16122
+ modelRegistry() {
16123
+ return listModelRegistry(this.config());
16124
+ }
16125
+ }
16126
+ function createKnowledgeService(options = {}) {
16127
+ return new KnowledgeService(options);
16128
+ }
16129
+
14542
16130
  // src/mcp.js
14543
16131
  var storePathField = exports_external.string().optional().describe("Path to the JSON store file");
14544
16132
  var scopeField = exports_external.enum(["local", "global", "project"]).optional().describe("Workspace scope");
@@ -14555,7 +16143,7 @@ function resolveStorePath(storePath, scope) {
14555
16143
  if (storePath)
14556
16144
  return storePath;
14557
16145
  if (scope === "project" || scope === "local") {
14558
- return ensureKnowledgeWorkspace(resolveScopedWorkspace(scope).home).jsonStorePath;
16146
+ return createKnowledgeService({ scope }).jsonStorePath();
14559
16147
  }
14560
16148
  return defaultStorePath();
14561
16149
  }
@@ -14597,22 +16185,7 @@ function buildServer() {
14597
16185
  registerTool(server, "ok_paths", "Knowledge workspace paths", "Show resolved workspace and store paths", {
14598
16186
  scope: scopeField
14599
16187
  }, async ({ scope }) => {
14600
- const workspace = ensureKnowledgeWorkspace(resolveScopedWorkspace(scope).home);
14601
- return jsonText({
14602
- ok: true,
14603
- scope: scope ?? "global",
14604
- home: workspace.home,
14605
- config_path: workspace.configPath,
14606
- json_store_path: workspace.jsonStorePath,
14607
- knowledge_db_path: workspace.knowledgeDbPath,
14608
- artifacts_dir: workspace.artifactsDir,
14609
- indexes_dir: workspace.indexesDir,
14610
- logs_dir: workspace.logsDir,
14611
- runs_dir: workspace.runsDir,
14612
- schemas_dir: workspace.schemasDir,
14613
- wiki_dir: workspace.wikiDir,
14614
- config: readKnowledgeConfig(workspace.configPath)
14615
- });
16188
+ return jsonText(createKnowledgeService({ scope }).paths());
14616
16189
  });
14617
16190
  registerTool(server, "ok_parse_source_ref", "Parse source reference", "Parse and validate an open-files, S3, file, or web source ref", {
14618
16191
  uri: exports_external.string().describe("Source reference URI")
@@ -14629,22 +16202,29 @@ function buildServer() {
14629
16202
  limit: exports_external.number().optional().describe("Maximum chunks to return, default 10"),
14630
16203
  scope: scopeField
14631
16204
  }, async ({ source_ref, purpose, limit, scope }) => {
14632
- const workspace = ensureKnowledgeWorkspace(resolveScopedWorkspace(scope).home);
14633
- const config2 = readKnowledgeConfig(workspace.configPath);
14634
- const safetyPolicy = resolveSafetyPolicy(config2, workspace);
16205
+ const service = createKnowledgeService({ scope });
14635
16206
  try {
14636
- const result = await resolveOpenFilesSource({
14637
- dbPath: workspace.knowledgeDbPath,
14638
- sourceRef: source_ref,
16207
+ const result = await service.resolveSource(source_ref, {
14639
16208
  purpose,
14640
- limit,
14641
- safetyPolicy
16209
+ limit
14642
16210
  });
14643
16211
  return jsonText({ ok: true, ...result });
14644
16212
  } catch (error48) {
14645
16213
  return errorText(error48 instanceof Error ? error48.message : String(error48));
14646
16214
  }
14647
16215
  });
16216
+ registerTool(server, "ok_provider_status", "AI provider status", "Inspect configured AI SDK providers, model aliases, and BYOK credential availability", {
16217
+ scope: scopeField
16218
+ }, async ({ scope }) => {
16219
+ const service = createKnowledgeService({ scope });
16220
+ return jsonText({ ok: true, ...service.providerStatus() });
16221
+ });
16222
+ registerTool(server, "ok_provider_models", "AI provider models", "List AI SDK model aliases and capability metadata", {
16223
+ scope: scopeField
16224
+ }, async ({ scope }) => {
16225
+ const service = createKnowledgeService({ scope });
16226
+ return jsonText({ ok: true, models: service.modelRegistry() });
16227
+ });
14648
16228
  registerTool(server, "ok_add", "Add a knowledge item", "Add a new item to the knowledge store", {
14649
16229
  title: exports_external.string().describe("Item title"),
14650
16230
  content: exports_external.string().describe("Item content/body"),
@@ -14969,7 +16549,7 @@ function buildServer() {
14969
16549
  const storePath = resolveStorePath(store_path, scope);
14970
16550
  return readStoreLocked(storePath, (db) => {
14971
16551
  const filePath = file2 || "./knowledge-export.json";
14972
- writeFileSync3(filePath, JSON.stringify(db, null, 2));
16552
+ writeFileSync4(filePath, JSON.stringify(db, null, 2));
14973
16553
  return jsonText({ ok: true, file: filePath, count: db.items.length });
14974
16554
  });
14975
16555
  });
@@ -14978,9 +16558,9 @@ function buildServer() {
14978
16558
  store_path: storePathField,
14979
16559
  scope: scopeField
14980
16560
  }, async ({ file: file2, store_path, scope }) => {
14981
- if (!existsSync3(file2))
16561
+ if (!existsSync7(file2))
14982
16562
  return errorText(`File not found: ${file2}`);
14983
- const imported = JSON.parse(readFileSync3(file2, "utf8"));
16563
+ const imported = JSON.parse(readFileSync7(file2, "utf8"));
14984
16564
  if (!imported || !Array.isArray(imported.items))
14985
16565
  return errorText('Invalid import file: expected {"items": [...]}');
14986
16566
  const storePath = resolveStorePath(store_path, scope);