@hasna/knowledge 0.2.12 → 0.2.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -0
- package/bin/open-knowledge-mcp.js +153 -5
- package/bin/open-knowledge.js +44 -31
- package/docs/architecture/ai-native-knowledge-base.md +15 -0
- package/package.json +1 -1
- package/src/knowledge-db.ts +1 -0
- package/src/manifest-ingest.ts +19 -2
- package/src/provenance.ts +93 -0
- package/src/service.ts +2 -1
- package/src/source-resolver.ts +18 -0
- package/src/wiki-layout.ts +91 -0
package/README.md
CHANGED
|
@@ -314,6 +314,12 @@ source ref. It does not copy raw files into the knowledge workspace; local file,
|
|
|
314
314
|
S3, web, and open-files inputs are converted into redacted chunks with offsets,
|
|
315
315
|
hashes, revision metadata, and FTS rows.
|
|
316
316
|
|
|
317
|
+
Chunks, resolver results, generated wiki pages, and index records carry
|
|
318
|
+
provenance metadata: source owner, source ref/URI, revision/hash, chunk offsets,
|
|
319
|
+
read-only status, citation requirements, and stale-source status. This keeps
|
|
320
|
+
future semantic search and wiki compile flows tied back to `open-files` instead
|
|
321
|
+
of detached Markdown.
|
|
322
|
+
|
|
317
323
|
AI provider configuration is local/BYOK by default. `open-knowledge` declares
|
|
318
324
|
AI SDK v6 provider support through `ai`, `@ai-sdk/openai`,
|
|
319
325
|
`@ai-sdk/anthropic`, and `@ai-sdk/deepseek`, but does not call providers until a
|
|
@@ -13660,7 +13660,7 @@ import { existsSync as existsSync7, readFileSync as readFileSync7, writeFileSync
|
|
|
13660
13660
|
// package.json
|
|
13661
13661
|
var package_default = {
|
|
13662
13662
|
name: "@hasna/knowledge",
|
|
13663
|
-
version: "0.2.
|
|
13663
|
+
version: "0.2.13",
|
|
13664
13664
|
description: "Agent-friendly local knowledge CLI with JSON output, pagination, and safe destructive actions",
|
|
13665
13665
|
type: "module",
|
|
13666
13666
|
bin: {
|
|
@@ -14353,6 +14353,7 @@ function openKnowledgeDb(path) {
|
|
|
14353
14353
|
ensureParentDir(path);
|
|
14354
14354
|
const db = new Database(path);
|
|
14355
14355
|
db.exec("PRAGMA foreign_keys = ON;");
|
|
14356
|
+
db.exec("PRAGMA busy_timeout = 5000;");
|
|
14356
14357
|
return db;
|
|
14357
14358
|
}
|
|
14358
14359
|
function migrateKnowledgeDb(path) {
|
|
@@ -14899,6 +14900,50 @@ async function consumeOpenFilesOutbox(options) {
|
|
|
14899
14900
|
import { createHash as createHash3 } from "crypto";
|
|
14900
14901
|
import { existsSync as existsSync5, readFileSync as readFileSync5 } from "fs";
|
|
14901
14902
|
import { basename as basename2 } from "path";
|
|
14903
|
+
|
|
14904
|
+
// src/provenance.ts
|
|
14905
|
+
function isStaleStatus(status) {
|
|
14906
|
+
return ["deleted", "stale", "invalidated", "reindex_required"].includes((status ?? "").toLowerCase());
|
|
14907
|
+
}
|
|
14908
|
+
function sourceProvenance(input) {
|
|
14909
|
+
const status = input.status ?? null;
|
|
14910
|
+
return {
|
|
14911
|
+
source_owner: "open-files",
|
|
14912
|
+
source_ref: input.source_ref ?? null,
|
|
14913
|
+
source_uri: input.source_uri ?? null,
|
|
14914
|
+
source_kind: input.source_kind ?? null,
|
|
14915
|
+
source_revision_id: input.source_revision_id ?? null,
|
|
14916
|
+
revision: input.revision ?? null,
|
|
14917
|
+
hash: input.hash ?? null,
|
|
14918
|
+
chunk_id: input.chunk_id ?? null,
|
|
14919
|
+
start_offset: input.start_offset ?? null,
|
|
14920
|
+
end_offset: input.end_offset ?? null,
|
|
14921
|
+
status,
|
|
14922
|
+
read_only: true,
|
|
14923
|
+
citation_required: true,
|
|
14924
|
+
resolver: input.resolver ?? null,
|
|
14925
|
+
stale: isStaleStatus(status)
|
|
14926
|
+
};
|
|
14927
|
+
}
|
|
14928
|
+
function generatedArtifactProvenance(input) {
|
|
14929
|
+
return {
|
|
14930
|
+
source_owner: "open-files",
|
|
14931
|
+
generated_from: input.generated_from,
|
|
14932
|
+
artifact_key: input.artifact_key,
|
|
14933
|
+
source_refs: input.source_refs ?? [],
|
|
14934
|
+
read_only_sources: true,
|
|
14935
|
+
citation_required: input.citation_required ?? true,
|
|
14936
|
+
raw_source_bytes_stored_in_open_knowledge: false
|
|
14937
|
+
};
|
|
14938
|
+
}
|
|
14939
|
+
function withProvenance(metadata, provenance) {
|
|
14940
|
+
return {
|
|
14941
|
+
...metadata,
|
|
14942
|
+
provenance
|
|
14943
|
+
};
|
|
14944
|
+
}
|
|
14945
|
+
|
|
14946
|
+
// src/manifest-ingest.ts
|
|
14902
14947
|
function stableId2(prefix, value) {
|
|
14903
14948
|
return `${prefix}_${createHash3("sha256").update(value).digest("hex").slice(0, 20)}`;
|
|
14904
14949
|
}
|
|
@@ -15187,15 +15232,31 @@ function insertChunks(db, sourceRevisionId, item, now, maxChars, overlapChars, s
|
|
|
15187
15232
|
const chunks = chunkText(redacted.text, maxChars, overlapChars);
|
|
15188
15233
|
for (const chunk of chunks) {
|
|
15189
15234
|
const chunkId = stableId2("chk", `${sourceRevisionId}\x00${chunk.ordinal}\x00${chunk.text}`);
|
|
15190
|
-
const
|
|
15235
|
+
const provenance = sourceProvenance({
|
|
15236
|
+
source_ref: item.sourceRef,
|
|
15237
|
+
source_uri: item.sourceUri,
|
|
15238
|
+
source_kind: item.kind,
|
|
15239
|
+
source_revision_id: sourceRevisionId,
|
|
15240
|
+
revision: item.revision,
|
|
15241
|
+
hash: item.hash,
|
|
15242
|
+
chunk_id: chunkId,
|
|
15243
|
+
start_offset: chunk.startOffset,
|
|
15244
|
+
end_offset: chunk.endOffset,
|
|
15245
|
+
status: item.status,
|
|
15246
|
+
resolver: "open-files-read-only"
|
|
15247
|
+
});
|
|
15248
|
+
const metadata = withProvenance({
|
|
15191
15249
|
source_ref: item.sourceRef,
|
|
15192
15250
|
source_uri: item.sourceUri,
|
|
15251
|
+
source_kind: item.kind,
|
|
15252
|
+
source_revision_id: sourceRevisionId,
|
|
15253
|
+
revision: item.revision,
|
|
15193
15254
|
hash: item.hash,
|
|
15194
15255
|
status: item.status,
|
|
15195
15256
|
path: asString2(item.raw.path) ?? null,
|
|
15196
15257
|
mime: asString2(item.raw.mime) ?? asString2(item.raw.content_type) ?? null,
|
|
15197
15258
|
size: asNumber(item.raw.size) ?? null
|
|
15198
|
-
};
|
|
15259
|
+
}, provenance);
|
|
15199
15260
|
db.run(`INSERT INTO chunks (id, source_revision_id, kind, ordinal, text, token_count, start_offset, end_offset, metadata_json, created_at)
|
|
15200
15261
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, [
|
|
15201
15262
|
chunkId,
|
|
@@ -15488,6 +15549,19 @@ async function resolveOpenFilesSource(options) {
|
|
|
15488
15549
|
end_offset: row.end_offset,
|
|
15489
15550
|
resolved_at: resolvedAt
|
|
15490
15551
|
};
|
|
15552
|
+
const provenance = sourceProvenance({
|
|
15553
|
+
source_ref: evidence.source_ref,
|
|
15554
|
+
source_uri: evidence.source_uri,
|
|
15555
|
+
source_kind: source.kind,
|
|
15556
|
+
source_revision_id: evidence.source_revision_id,
|
|
15557
|
+
revision: evidence.revision,
|
|
15558
|
+
hash: evidence.hash,
|
|
15559
|
+
chunk_id: row.id,
|
|
15560
|
+
start_offset: row.start_offset,
|
|
15561
|
+
end_offset: row.end_offset,
|
|
15562
|
+
status: metadataString(metadata, ["status"]),
|
|
15563
|
+
resolver: evidence.resolver
|
|
15564
|
+
});
|
|
15491
15565
|
return {
|
|
15492
15566
|
id: row.id,
|
|
15493
15567
|
kind: row.kind,
|
|
@@ -15497,7 +15571,8 @@ async function resolveOpenFilesSource(options) {
|
|
|
15497
15571
|
start_offset: row.start_offset,
|
|
15498
15572
|
end_offset: row.end_offset,
|
|
15499
15573
|
metadata,
|
|
15500
|
-
evidence
|
|
15574
|
+
evidence,
|
|
15575
|
+
provenance
|
|
15501
15576
|
};
|
|
15502
15577
|
});
|
|
15503
15578
|
const citations = chunks.map((chunk) => ({
|
|
@@ -15507,7 +15582,8 @@ async function resolveOpenFilesSource(options) {
|
|
|
15507
15582
|
quote: chunk.text.slice(0, 500),
|
|
15508
15583
|
start_offset: chunk.start_offset,
|
|
15509
15584
|
end_offset: chunk.end_offset,
|
|
15510
|
-
evidence: chunk.evidence
|
|
15585
|
+
evidence: chunk.evidence,
|
|
15586
|
+
provenance: chunk.provenance
|
|
15511
15587
|
}));
|
|
15512
15588
|
recordAuditEvent(db, {
|
|
15513
15589
|
event_type: "source_read",
|
|
@@ -16095,12 +16171,16 @@ function recordStorageObjects(db, objects, now = new Date) {
|
|
|
16095
16171
|
}
|
|
16096
16172
|
|
|
16097
16173
|
// src/wiki-layout.ts
|
|
16174
|
+
import { createHash as createHash6 } from "crypto";
|
|
16098
16175
|
function todayParts(now) {
|
|
16099
16176
|
const year = String(now.getUTCFullYear());
|
|
16100
16177
|
const month = String(now.getUTCMonth() + 1).padStart(2, "0");
|
|
16101
16178
|
const day = String(now.getUTCDate()).padStart(2, "0");
|
|
16102
16179
|
return { year, month, day };
|
|
16103
16180
|
}
|
|
16181
|
+
function stableId3(prefix, value) {
|
|
16182
|
+
return `${prefix}_${createHash6("sha256").update(value).digest("hex").slice(0, 20)}`;
|
|
16183
|
+
}
|
|
16104
16184
|
function agentSchemaTemplate() {
|
|
16105
16185
|
return `# Knowledge Agent Schema v1
|
|
16106
16186
|
|
|
@@ -16182,6 +16262,13 @@ async function initializeWikiLayout(store, now = new Date) {
|
|
|
16182
16262
|
uri: result.uri,
|
|
16183
16263
|
kind: artifactKindForKey(entry.key),
|
|
16184
16264
|
content_type: entry.content_type,
|
|
16265
|
+
metadata: {
|
|
16266
|
+
provenance: generatedArtifactProvenance({
|
|
16267
|
+
generated_from: "wiki_layout_init",
|
|
16268
|
+
artifact_key: entry.key,
|
|
16269
|
+
citation_required: entry.key.startsWith("wiki/") || entry.key.startsWith("indexes/")
|
|
16270
|
+
})
|
|
16271
|
+
},
|
|
16185
16272
|
...hashArtifactBody(entry.body)
|
|
16186
16273
|
};
|
|
16187
16274
|
}));
|
|
@@ -16194,6 +16281,66 @@ async function initializeWikiLayout(store, now = new Date) {
|
|
|
16194
16281
|
written: [schemaKey, rootIndexKey, wikiReadmeKey, logKey]
|
|
16195
16282
|
};
|
|
16196
16283
|
}
|
|
16284
|
+
function provenanceFor(artifact) {
|
|
16285
|
+
const existing = artifact.metadata?.provenance;
|
|
16286
|
+
if (existing && typeof existing === "object" && !Array.isArray(existing)) {
|
|
16287
|
+
return existing;
|
|
16288
|
+
}
|
|
16289
|
+
return generatedArtifactProvenance({
|
|
16290
|
+
generated_from: "wiki_layout_init",
|
|
16291
|
+
artifact_key: artifact.key
|
|
16292
|
+
});
|
|
16293
|
+
}
|
|
16294
|
+
function recordWikiLayoutCatalog(db, artifacts, now = new Date) {
|
|
16295
|
+
const timestamp = now.toISOString();
|
|
16296
|
+
const rootIndex = artifacts.find((artifact) => artifact.key.endsWith("indexes/root.md"));
|
|
16297
|
+
const wikiReadme = artifacts.find((artifact) => artifact.key.endsWith("wiki/README.md"));
|
|
16298
|
+
if (rootIndex) {
|
|
16299
|
+
db.run(`INSERT INTO knowledge_indexes (id, kind, name, artifact_uri, shard_key, metadata_json, created_at, updated_at)
|
|
16300
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
16301
|
+
ON CONFLICT(kind, name, shard_key) DO UPDATE SET
|
|
16302
|
+
artifact_uri = excluded.artifact_uri,
|
|
16303
|
+
metadata_json = excluded.metadata_json,
|
|
16304
|
+
updated_at = excluded.updated_at`, [
|
|
16305
|
+
stableId3("idx", "root:indexes/root.md"),
|
|
16306
|
+
"root",
|
|
16307
|
+
"root",
|
|
16308
|
+
rootIndex.uri,
|
|
16309
|
+
"root",
|
|
16310
|
+
JSON.stringify({
|
|
16311
|
+
artifact_key: rootIndex.key,
|
|
16312
|
+
content_hash: rootIndex.hash ?? null,
|
|
16313
|
+
provenance: provenanceFor(rootIndex)
|
|
16314
|
+
}),
|
|
16315
|
+
timestamp,
|
|
16316
|
+
timestamp
|
|
16317
|
+
]);
|
|
16318
|
+
}
|
|
16319
|
+
if (wikiReadme) {
|
|
16320
|
+
db.run(`INSERT INTO wiki_pages (id, path, title, artifact_uri, content_hash, status, metadata_json, created_at, updated_at)
|
|
16321
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
16322
|
+
ON CONFLICT(path) DO UPDATE SET
|
|
16323
|
+
title = excluded.title,
|
|
16324
|
+
artifact_uri = excluded.artifact_uri,
|
|
16325
|
+
content_hash = excluded.content_hash,
|
|
16326
|
+
status = excluded.status,
|
|
16327
|
+
metadata_json = excluded.metadata_json,
|
|
16328
|
+
updated_at = excluded.updated_at`, [
|
|
16329
|
+
stableId3("wiki", "wiki/README.md"),
|
|
16330
|
+
"wiki/README.md",
|
|
16331
|
+
"Wiki",
|
|
16332
|
+
wikiReadme.uri,
|
|
16333
|
+
wikiReadme.hash ?? null,
|
|
16334
|
+
"active",
|
|
16335
|
+
JSON.stringify({
|
|
16336
|
+
artifact_key: wikiReadme.key,
|
|
16337
|
+
provenance: provenanceFor(wikiReadme)
|
|
16338
|
+
}),
|
|
16339
|
+
timestamp,
|
|
16340
|
+
timestamp
|
|
16341
|
+
]);
|
|
16342
|
+
}
|
|
16343
|
+
}
|
|
16197
16344
|
|
|
16198
16345
|
// src/service.ts
|
|
16199
16346
|
class KnowledgeService {
|
|
@@ -16270,6 +16417,7 @@ class KnowledgeService {
|
|
|
16270
16417
|
const db = openKnowledgeDb(workspace.knowledgeDbPath);
|
|
16271
16418
|
try {
|
|
16272
16419
|
recordStorageObjects(db, result.artifacts);
|
|
16420
|
+
recordWikiLayoutCatalog(db, result.artifacts);
|
|
16273
16421
|
} finally {
|
|
16274
16422
|
db.close();
|
|
16275
16423
|
}
|
package/bin/open-knowledge.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env bun
|
|
2
2
|
// @bun
|
|
3
|
-
var D=import.meta.require;import{readFileSync as
|
|
4
|
-
`);return t}function
|
|
3
|
+
var D=import.meta.require;import{readFileSync as Z,writeFileSync as V,existsSync as Q,renameSync as xt,unlinkSync as Ae}from"fs";import{randomUUID as Le}from"crypto";import{existsSync as Et,mkdirSync as de,readFileSync as mt,writeFileSync as Tt}from"fs";import{homedir as Re}from"os";import{dirname as kt,join as x,resolve as bt}from"path";var q=x(".hasna","apps","knowledge");function le(){return x(Re(),".open-knowledge","db.json")}function _e(){return x(Re(),".hasna","apps","knowledge")}function wt(e=process.cwd()){return bt(e,q)}function z(e){return{home:e,configPath:x(e,"config.json"),jsonStorePath:x(e,"db.json"),knowledgeDbPath:x(e,"knowledge.db"),artifactsDir:x(e,"artifacts"),cacheDir:x(e,"cache"),exportsDir:x(e,"exports"),indexesDir:x(e,"indexes"),logsDir:x(e,"logs"),runsDir:x(e,"runs"),schemasDir:x(e,"schemas"),wikiDir:x(e,"wiki")}}function St(){return{version:1,mode:"local",storage:{type:"local",artifacts_root:"artifacts"},sources:{preferred_ref:"open-files",allowed_schemes:["open-files","s3","file","https","http"]},providers:{default_model:"openai:gpt-5.2",aliases:{fast:"openai:gpt-5-mini",reasoning:"anthropic:claude-opus-4-6",sonnet:"anthropic:claude-sonnet-4-6",deepseek:"deepseek:deepseek-chat","deepseek-reasoning":"deepseek:deepseek-reasoner"},openai:{api_key_env:"OPENAI_API_KEY",default_model:"gpt-5.2"},anthropic:{api_key_env:"ANTHROPIC_API_KEY",default_model:"claude-sonnet-4-6"},deepseek:{api_key_env:"DEEPSEEK_API_KEY",default_model:"deepseek-chat"}},safety:{network:{web_search_enabled:!1,s3_reads_enabled:!1,allowed_s3_buckets:[]},redaction:{enabled:!0},approvals:{generated_writes_require_approval:!0}}}}function ve(e){let t=z(e);de(t.home,{recursive:!0});for(let r of[t.artifactsDir,t.cacheDir,t.exportsDir,t.indexesDir,t.logsDir,t.runsDir,t.schemasDir,t.wikiDir])de(r,{recursive:!0});if(!Et(t.configPath))Tt(t.configPath,`${JSON.stringify(St(),null,2)}
|
|
4
|
+
`);return t}function Oe(e,t=process.cwd()){if(e==="project"||e==="local")return z(wt(t));return z(_e())}function J(e){de(kt(e),{recursive:!0})}function Ne(e){let t=mt(e,"utf8");return JSON.parse(t)}function fe(){return z(_e()).jsonStorePath}function ge(e){if(!Q(e))if(J(e),e===fe()&&Q(le()))V(e,Z(le(),"utf8"));else V(e,JSON.stringify({items:[]},null,2))}function Rt(e){return`${e}.lock`}function vt(e,t){let i=Date.now();while(Date.now()-i<5000){try{if(!Q(e)){V(e,JSON.stringify({owner:t,ts:Date.now()}));return}let _=JSON.parse(Z(e,"utf8"));if(Date.now()-_.ts>1e4)Ae(e)}catch{}let s=Date.now();while(Date.now()-s<50);}throw Error(`Could not acquire lock on ${e} after 5000ms`)}function Ot(e,t){try{if(Q(e)){if(JSON.parse(Z(e,"utf8")).owner===t)Ae(e)}}catch{}}function v(e){ge(e);let t=Z(e,"utf8"),r=JSON.parse(t);if(!r||!Array.isArray(r.items))return{items:[]};return r}function U(e,t){let r=`${e}.tmp.${Le()}`;V(r,JSON.stringify(t,null,2)),xt(r,e)}function O(e,t){let r=Le(),n=Rt(e);vt(n,r);try{return t()}finally{Ot(n,r)}}function pe(){return`k_${Date.now().toString(36)}_${Math.random().toString(36).slice(2,8)}`}function Ie(e){return e.replace(/^k_/,"").slice(0,12)}import{Database as Nt}from"bun:sqlite";var At=`
|
|
5
5
|
PRAGMA journal_mode = WAL;
|
|
6
6
|
PRAGMA foreign_keys = ON;
|
|
7
7
|
|
|
@@ -168,7 +168,7 @@ CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
|
|
|
168
168
|
|
|
169
169
|
INSERT OR IGNORE INTO schema_versions(version, applied_at)
|
|
170
170
|
VALUES (1, datetime('now'));
|
|
171
|
-
`,
|
|
171
|
+
`,Lt=`
|
|
172
172
|
DROP TABLE IF EXISTS chunks_fts;
|
|
173
173
|
|
|
174
174
|
CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
|
|
@@ -181,7 +181,7 @@ CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
|
|
|
181
181
|
|
|
182
182
|
INSERT OR IGNORE INTO schema_versions(version, applied_at)
|
|
183
183
|
VALUES (2, datetime('now'));
|
|
184
|
-
`,
|
|
184
|
+
`,It=`
|
|
185
185
|
CREATE TABLE IF NOT EXISTS audit_events (
|
|
186
186
|
id TEXT PRIMARY KEY,
|
|
187
187
|
event_type TEXT NOT NULL,
|
|
@@ -212,59 +212,59 @@ CREATE INDEX IF NOT EXISTS idx_approval_gates_status ON approval_gates(status);
|
|
|
212
212
|
|
|
213
213
|
INSERT OR IGNORE INTO schema_versions(version, applied_at)
|
|
214
214
|
VALUES (3, datetime('now'));
|
|
215
|
-
`;function A(e){
|
|
216
|
-
VALUES (?, ?, ?, ?, ?, ?, ?)`,[n,t.event_type,t.action,t.target_uri??null,t.decision,JSON.stringify(t.metadata??{}),r]),n}function
|
|
217
|
-
VALUES (?, ?, ?, ?, ?, ?, ?)`,[`redact_${Ee()}`,t.source_uri??null,t.run_id??null,n.severity,n.type,JSON.stringify({...t.metadata??{},start:n.start,end:n.end}),r]);return t.findings.length}function
|
|
218
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,[n,t.action,t.target_uri??null,"approved",t.reason??null,t.approved_by??"local-cli",JSON.stringify(t.metadata??{}),r,r]),{id:n,status:"approved"}}function
|
|
215
|
+
`;function A(e){J(e);let t=new Nt(e);return t.exec("PRAGMA foreign_keys = ON;"),t.exec("PRAGMA busy_timeout = 5000;"),t}function C(e){let t=A(e);try{if(t.exec(At),ee(t)<2)t.exec(Lt);if(ee(t)<3)t.exec(It);return{path:e,schema_version:ee(t)}}finally{t.close()}}function ee(e){return e.query("SELECT MAX(version) AS version FROM schema_versions").get()?.version??0}function N(e,t){return e.query(`SELECT COUNT(*) AS n FROM ${t}`).get()?.n??0}function De(e){let t=A(e);try{return{schema_version:ee(t),sources:N(t,"sources"),source_revisions:N(t,"source_revisions"),chunks:N(t,"chunks"),wiki_pages:N(t,"wiki_pages"),citations:N(t,"citations"),indexes:N(t,"knowledge_indexes"),runs:N(t,"runs"),run_events:N(t,"run_events"),redaction_findings:N(t,"redaction_findings"),audit_events:N(t,"audit_events"),approval_gates:N(t,"approval_gates"),storage_objects:N(t,"storage_objects")}}finally{t.close()}}import{existsSync as Dt,mkdirSync as Ce,readFileSync as Ct,writeFileSync as Ut}from"fs";import{dirname as Pt,join as he,relative as jt,sep as Kt}from"path";function H(e){let t=e.replace(/\\/g,"/").trim();if(!t||t.startsWith("/"))throw Error(`Invalid artifact key: ${e}`);let r=t.split("/").filter(Boolean);if(r.length===0||r.some((n)=>n==="."||n===".."))throw Error(`Invalid artifact key: ${e}`);return r.join("/")}function ye(e,t){let r=jt(e,t);if(r.startsWith("..")||r===".."||r.startsWith(`..${Kt}`))throw Error(`Artifact path escapes root: ${t}`)}class Ue{root;type="local";canRead=!0;canWrite=!0;constructor(e){this.root=e;Ce(e,{recursive:!0})}async put(e){let t=H(e.key),r=he(this.root,t);return ye(this.root,r),Ce(Pt(r),{recursive:!0}),Ut(r,e.body),{key:t,uri:`file://${r}`}}async getText(e){let t=H(e),r=he(this.root,t);return ye(this.root,r),Ct(r,"utf8")}async exists(e){let t=H(e),r=he(this.root,t);return ye(this.root,r),Dt(r)}}class Pe{options;type="s3";canRead=!0;canWrite=!0;client;constructor(e){this.options=e;this.client=e.client}async getClient(){if(this.client)return this.client;let[{S3Client:e},{fromIni:t}]=await Promise.all([import("@aws-sdk/client-s3"),import("@aws-sdk/credential-providers")]);return this.client=new e({region:this.options.region,credentials:this.options.profile?t({profile:this.options.profile}):void 0,maxAttempts:this.options.max_attempts}),this.client}objectKey(e){let t=H(e),r=this.options.prefix?H(this.options.prefix):"";return r?`${r}/${t}`:t}async put(e){let[{PutObjectCommand:t},r]=await Promise.all([import("@aws-sdk/client-s3"),this.getClient()]),n=this.objectKey(e.key);return await r.send(new t({Bucket:this.options.bucket,Key:n,Body:e.body,ContentType:e.content_type,Metadata:e.metadata,ServerSideEncryption:this.options.server_side_encryption,SSEKMSKeyId:this.options.kms_key_id})),{key:n,uri:`s3://${this.options.bucket}/${n}`}}async getText(e){let[{GetObjectCommand:t},r]=await Promise.all([import("@aws-sdk/client-s3"),this.getClient()]),n=this.objectKey(e),i=await r.send(new t({Bucket:this.options.bucket,Key:n}));if(!i.Body)return"";return await i.Body.transformToString()}async exists(e){let[{HeadObjectCommand:t},r]=await Promise.all([import("@aws-sdk/client-s3"),this.getClient()]),n=this.objectKey(e);try{return await r.send(new t({Bucket:this.options.bucket,Key:n})),!0}catch(i){let s=i instanceof Error?i.name:"";if(s==="NotFound"||s==="NoSuchKey"||s==="NotFoundError")return!1;throw i}}}function je(e,t){if(e.storage.type==="s3"){if(!e.storage.s3?.bucket)throw Error("S3 artifact storage requires storage.s3.bucket");return new Pe({bucket:e.storage.s3.bucket,prefix:e.storage.s3.prefix,region:e.storage.s3.region,profile:e.storage.s3.profile,max_attempts:e.storage.s3.max_attempts,server_side_encryption:e.storage.s3.server_side_encryption,kms_key_id:e.storage.s3.kms_key_id})}return new Ue(t.artifactsDir)}import{createHash as Jt,randomUUID as Vt}from"crypto";import{existsSync as Qt,readFileSync as Zt}from"fs";import{basename as er}from"path";function Ke(e,t){if(!e)throw Error(t);return e}function Ft(e){let r=e.slice(13).split("/").filter(Boolean),n=r[0];if(n!=="file"&&n!=="source")throw Error("Invalid open-files ref. Expected open-files://file/<id>, open-files://file/<id>/revision/<revision_id>, or open-files://source/<id>/path/<path>.");let i=Ke(r[1],"Invalid open-files ref. Missing id.");if(n==="file"){if(r.length===2)return{kind:"open-files",uri:e,entity:n,id:i};if(r[2]==="revision"&&r[3]&&r.length===4)return{kind:"open-files",uri:e,entity:n,id:i,revision_id:decodeURIComponent(r[3])};throw Error("Invalid open-files file ref. Expected open-files://file/<id>/revision/<revision_id>.")}let s=r.indexOf("path"),_=s>=0?decodeURIComponent(r.slice(s+1).join("/")):void 0;return{kind:"open-files",uri:e,entity:n,id:i,path:_}}function Mt(e){let t=new URL(e),r=Ke(t.hostname,"Invalid s3 ref. Missing bucket."),n=decodeURIComponent(t.pathname.replace(/^\/+/,""));if(!n)throw Error("Invalid s3 ref. Missing object key.");return{kind:"s3",uri:e,bucket:r,key:n}}function Wt(e){let t=new URL(e);return{kind:"file",uri:e,path:decodeURIComponent(t.pathname)}}function Xt(e){let t=new URL(e);return{kind:"web",uri:e,url:t.toString()}}function L(e){if(e.startsWith("open-files://"))return Ft(e);if(e.startsWith("s3://"))return Mt(e);if(e.startsWith("file://"))return Wt(e);if(e.startsWith("https://")||e.startsWith("http://"))return Xt(e);throw Error(`Unsupported source ref scheme: ${e}`)}function Fe(e,t=L(e)){if(t.kind==="open-files"&&t.entity==="file"&&t.revision_id)return e.replace(/\/revision\/[^/]+$/,"");return e}function Me(e){let t=L(e);return t.kind==="open-files"&&t.entity==="file"?t.revision_id??null:null}import{createHash as $t,randomUUID as Ee}from"crypto";import{relative as Bt,resolve as Xe,sep as zt}from"path";function We(e){let t=process.env[e];return t==="1"||t==="true"||t==="yes"}function $e(e,t){let r=e,n=new Set(r.safety?.network?.allowed_s3_buckets??[]);if(e.storage.type==="s3"&&e.storage.s3?.bucket)n.add(e.storage.s3.bucket);if(process.env.HASNA_KNOWLEDGE_ALLOWED_S3_BUCKETS)for(let i of process.env.HASNA_KNOWLEDGE_ALLOWED_S3_BUCKETS.split(",").map((s)=>s.trim()).filter(Boolean))n.add(i);return{mode:e.mode,allowWriteRoots:[t.home,t.artifactsDir,t.cacheDir,t.exportsDir,t.indexesDir,t.logsDir,t.runsDir,t.schemasDir,t.wikiDir].map((i)=>Xe(i)),readOnlySourceAccess:!0,network:{webSearchEnabled:r.safety?.network?.web_search_enabled??We("HASNA_KNOWLEDGE_WEB_SEARCH"),s3ReadsEnabled:r.safety?.network?.s3_reads_enabled??We("HASNA_KNOWLEDGE_ALLOW_S3_READS"),allowedS3Buckets:[...n].sort()},redaction:{enabled:r.safety?.redaction?.enabled??!0},approvals:{generatedWritesRequireApproval:r.safety?.approvals?.generated_writes_require_approval??!0}}}function qt(e,t){let r=Bt(e,t);return r===""||!r.startsWith("..")&&r!==".."&&!r.startsWith(`..${zt}`)}function K(e,t){let r=Xe(e);if(!t.allowWriteRoots.some((n)=>qt(n,r)))throw Error(`Safety policy denied write outside .hasna/apps/knowledge: ${e}`)}function j(e,t){let n=new URL(e).hostname;if(!t.network.s3ReadsEnabled)throw Error("Safety policy denied S3 read. Set safety.network.s3_reads_enabled=true or HASNA_KNOWLEDGE_ALLOW_S3_READS=1.");if(!t.network.allowedS3Buckets.includes(n))throw Error(`Safety policy denied S3 bucket "${n}". Add it to safety.network.allowed_s3_buckets or HASNA_KNOWLEDGE_ALLOWED_S3_BUCKETS.`)}function te(e){if(!e.network.webSearchEnabled)throw Error("Safety policy denied web search. Set safety.network.web_search_enabled=true or HASNA_KNOWLEDGE_WEB_SEARCH=1.")}var Ht=[{type:"private_key_block",severity:"high",regex:/-----BEGIN [A-Z ]*PRIVATE KEY-----[\s\S]*?-----END [A-Z ]*PRIVATE KEY-----/g,replacement:"[REDACTED:private_key_block]"},{type:"secret_assignment",severity:"high",regex:/\b(?:api[_-]?key|secret|token|password)\s*[:=]\s*['"]?[^'"\s]{8,}/gi,replacement:"[REDACTED:secret_assignment]"},{type:"openai_api_key",severity:"high",regex:/\bsk-[A-Za-z0-9_-]{20,}\b/g,replacement:"[REDACTED:openai_api_key]"},{type:"anthropic_api_key",severity:"high",regex:/\bsk-ant-[A-Za-z0-9_-]{20,}\b/g,replacement:"[REDACTED:anthropic_api_key]"},{type:"aws_access_key_id",severity:"high",regex:/\bA(?:KIA|SIA)[A-Z0-9]{16}\b/g,replacement:"[REDACTED:aws_access_key_id]"}];function re(e,t){if(t&&!t.redaction.enabled)return{text:e,findings:[]};let r=e,n=[];for(let i of Ht)r=r.replace(i.regex,(s,..._)=>{let f=typeof _.at(-2)==="number"?_.at(-2):r.indexOf(s);return n.push({type:i.type,severity:i.severity,start:Math.max(0,f),end:Math.max(0,f+s.length)}),i.replacement});return{text:r,findings:n}}function Gt(e){return`audit_${$t("sha256").update(`${e.event_type}\x00${e.action}\x00${e.target_uri??""}\x00${e.created_at??""}\x00${JSON.stringify(e.metadata??{})}\x00${Ee()}`).digest("hex").slice(0,24)}`}function w(e,t){let r=t.created_at??new Date().toISOString(),n=Gt({...t,created_at:r});return e.run(`INSERT INTO audit_events (id, event_type, action, target_uri, decision, metadata_json, created_at)
|
|
216
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)`,[n,t.event_type,t.action,t.target_uri??null,t.decision,JSON.stringify(t.metadata??{}),r]),n}function ne(e,t){let r=t.created_at??new Date().toISOString();for(let n of t.findings)e.run(`INSERT INTO redaction_findings (id, source_uri, run_id, severity, finding_type, metadata_json, created_at)
|
|
217
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)`,[`redact_${Ee()}`,t.source_uri??null,t.run_id??null,n.severity,n.type,JSON.stringify({...t.metadata??{},start:n.start,end:n.end}),r]);return t.findings.length}function Be(e,t){let r=t.created_at??new Date().toISOString(),n=`approval_${Ee()}`;return e.run(`INSERT INTO approval_gates (id, action, target_uri, status, reason, approved_by, metadata_json, created_at, updated_at)
|
|
218
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,[n,t.action,t.target_uri??null,"approved",t.reason??null,t.approved_by??"local-cli",JSON.stringify(t.metadata??{}),r,r]),{id:n,status:"approved"}}function Yt(e,t,r){let n=e.query(`SELECT id FROM approval_gates
|
|
219
219
|
WHERE action = ? AND status = 'approved' AND (target_uri IS NULL OR target_uri = ? OR ? IS NULL)
|
|
220
|
-
ORDER BY updated_at DESC LIMIT 1`).get(t,r??null,r??null);return Boolean(n)}function
|
|
220
|
+
ORDER BY updated_at DESC LIMIT 1`).get(t,r??null,r??null);return Boolean(n)}function ze(e,t,r,n){let i=r==="generated_write"&&t.approvals.generatedWritesRequireApproval,s=!i||Yt(e,r,n);return{action:r,target_uri:n??null,approval_required:i,approved:s,decision:s?"allow":"requires_approval"}}function ie(e,t){return`${e}_${Jt("sha256").update(t).digest("hex").slice(0,20)}`}function W(e){return e&&typeof e==="object"&&!Array.isArray(e)?e:void 0}function b(e){return typeof e==="string"&&e.length>0?e:void 0}function tr(e){let t=b(e.source_ref)??b(e.source_uri)??b(e.uri);if(t)return t;let r=b(e.file_id);if(r){let s=b(e.revision_id)??b(e.revision),_=`open-files://file/${encodeURIComponent(r)}`;return s?`${_}/revision/${encodeURIComponent(s)}`:_}let n=b(e.source_id),i=b(e.path);if(n&&i)return`open-files://source/${encodeURIComponent(n)}/path/${encodeURIComponent(i)}`;throw Error("Outbox event is missing source_ref, file_id, or source_id/path.")}function rr(e,t){if(t.kind==="open-files"&&t.entity==="file"&&t.revision_id)return e.replace(/\/revision\/[^/]+$/,"");return e}function nr(e){return b(e.hash)??b(e.checksum)??b(e.sha256)??null}function ir(e,t,r){return b(e.revision_id)??b(e.revision)??b(e.version_id)??(t.kind==="open-files"?t.revision_id:void 0)??r??null}function sr(e){return(b(e.event)??b(e.type)??b(e.action)??b(e.change_type)??"changed").toLowerCase()}function or(e){let t=b(e.path);return b(e.title)??b(e.name)??(t?er(t):null)}function ar(e,t){let r=tr(e),n=L(r),i=nr(e);return{raw:e,eventType:sr(e),sourceRef:r,sourceUri:rr(r,n),kind:n.kind,title:or(e),revision:ir(e,n,i),hash:i,status:b(e.status)?.toLowerCase()??null,updatedAt:b(e.updated_at)??t,acl:e.permissions??e.acl??void 0}}function cr(e){let t=e.trim();if(!t)return[];if(t.startsWith("[")){let r=JSON.parse(t);if(!Array.isArray(r))throw Error("Outbox array parse failed.");return r.map((n)=>{let i=W(n);if(!i)throw Error("Outbox array entries must be objects.");return i})}if(t.startsWith("{"))try{let r=JSON.parse(t),n=W(r);if(!n)throw Error("Outbox object parse failed.");if(Array.isArray(n.events))return n.events.map((i)=>{let s=W(i);if(!s)throw Error("Outbox events entries must be objects.");return s});if("source_ref"in n||"source_uri"in n||"file_id"in n)return[n]}catch(r){let n=t.split(/\r?\n/).filter((i)=>i.trim().length>0);if(n.length<=1)throw r;return n.map((i)=>{let s=W(JSON.parse(i));if(!s)throw Error("Outbox JSONL entries must be objects.");return s})}return t.split(/\r?\n/).filter((r)=>r.trim().length>0).map((r)=>{let n=W(JSON.parse(r));if(!n)throw Error("Outbox JSONL entries must be objects.");return n})}async function ur(e,t,r){let n=new URL(e),i=n.hostname,s=decodeURIComponent(n.pathname.replace(/^\/+/,""));if(!i||!s)throw Error(`Invalid S3 outbox URI: ${e}`);if(r)j(e,r);let[{S3Client:_,GetObjectCommand:f},{fromIni:a}]=await Promise.all([import("@aws-sdk/client-s3"),import("@aws-sdk/credential-providers")]),o=t?.storage.type==="s3"&&t.storage.s3?.bucket===i?t.storage.s3:void 0,u=await new _({region:o?.region,credentials:o?.profile?a({profile:o.profile}):void 0,maxAttempts:o?.max_attempts}).send(new f({Bucket:i,Key:s}));if(!u.Body)return"";return await u.Body.transformToString()}async function dr(e,t,r){if(e.startsWith("s3://"))return ur(e,t,r);if(!Qt(e))throw Error(`Outbox not found: ${e}`);return Zt(e,"utf8")}function qe(e,t){let r={};if(e)try{r=W(JSON.parse(e))??{}}catch{r={}}return JSON.stringify({...r,...t})}function lr(e,t,r){let n=ie("src",t.sourceUri);e.run(`INSERT INTO sources (id, uri, kind, title, metadata_json, acl_json, created_at, updated_at)
|
|
221
221
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
222
222
|
ON CONFLICT(uri) DO UPDATE SET
|
|
223
223
|
kind = excluded.kind,
|
|
224
224
|
title = COALESCE(excluded.title, sources.title),
|
|
225
|
-
updated_at = excluded.updated_at`,[n,t.sourceUri,t.kind,t.title,JSON.stringify({source_ref:t.sourceRef,source_uri:t.sourceUri,status:t.status,last_outbox_event:t.eventType}),JSON.stringify(t.acl??{}),r,t.updatedAt]);let i=e.query("SELECT id, metadata_json, acl_json FROM sources WHERE uri = ?").get(t.sourceUri);if(!i)throw Error(`Failed to upsert source for outbox event: ${t.sourceUri}`);let s={source_ref:t.sourceRef,source_uri:t.sourceUri,last_outbox_event:t.eventType,last_outbox_at:t.updatedAt};if(t.status)s.status=t.status;if(b(t.raw.path))s.path=t.raw.path;return e.run("UPDATE sources SET metadata_json = ?, acl_json = CASE WHEN ? IS NULL THEN acl_json ELSE ? END, updated_at = ? WHERE id = ?",[
|
|
225
|
+
updated_at = excluded.updated_at`,[n,t.sourceUri,t.kind,t.title,JSON.stringify({source_ref:t.sourceRef,source_uri:t.sourceUri,status:t.status,last_outbox_event:t.eventType}),JSON.stringify(t.acl??{}),r,t.updatedAt]);let i=e.query("SELECT id, metadata_json, acl_json FROM sources WHERE uri = ?").get(t.sourceUri);if(!i)throw Error(`Failed to upsert source for outbox event: ${t.sourceUri}`);let s={source_ref:t.sourceRef,source_uri:t.sourceUri,last_outbox_event:t.eventType,last_outbox_at:t.updatedAt};if(t.status)s.status=t.status;if(b(t.raw.path))s.path=t.raw.path;return e.run("UPDATE sources SET metadata_json = ?, acl_json = CASE WHEN ? IS NULL THEN acl_json ELSE ? END, updated_at = ? WHERE id = ?",[qe(i.metadata_json,s),t.acl===void 0?null:JSON.stringify(t.acl),t.acl===void 0?null:JSON.stringify(t.acl),t.updatedAt,i.id]),i.id}function _r(e,t,r,n){if(!r.revision)return null;let i=ie("rev",`${t}\x00${r.revision}`),s={source_ref:r.sourceRef,source_uri:r.sourceUri,status:r.status,last_outbox_event:r.eventType,reindex_required:!0};return e.run(`INSERT INTO source_revisions (id, source_id, revision, hash, extracted_text_uri, metadata_json, created_at)
|
|
226
226
|
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
227
227
|
ON CONFLICT(source_id, revision) DO UPDATE SET
|
|
228
228
|
hash = COALESCE(excluded.hash, source_revisions.hash),
|
|
229
|
-
metadata_json = excluded.metadata_json`,[i,t,r.revision,r.hash,b(r.raw.extracted_text_ref)??null,JSON.stringify(s),n]),e.query("SELECT id FROM source_revisions WHERE source_id = ? AND revision = ?").get(t,r.revision)?.id??null}function
|
|
230
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,[s,"open-files-outbox",e.input,"completed","local","open-files-outbox",JSON.stringify({path:e.input,events:n.length}),t,t]);let
|
|
231
|
-
VALUES (?, ?, ?, ?, ?, ?)`,[
|
|
232
|
-
VALUES (?, ?, ?, ?, 0, 0, 0, ?, ?)`,[
|
|
233
|
-
`);if(!n.trim())return[];let i=[],s=0;while(s<n.length){let
|
|
229
|
+
metadata_json = excluded.metadata_json`,[i,t,r.revision,r.hash,b(r.raw.extracted_text_ref)??null,JSON.stringify(s),n]),e.query("SELECT id FROM source_revisions WHERE source_id = ? AND revision = ?").get(t,r.revision)?.id??null}function fr(e,t,r){if(r.revision)return e.query("SELECT id FROM source_revisions WHERE source_id = ? AND revision = ?").all(t,r.revision).map((n)=>n.id);if(r.hash)return e.query("SELECT id FROM source_revisions WHERE source_id = ? AND hash = ?").all(t,r.hash).map((n)=>n.id);return e.query("SELECT id FROM source_revisions WHERE source_id = ?").all(t).map((n)=>n.id)}function gr(e,t){let r=e.query("SELECT id FROM chunks WHERE source_revision_id = ?").all(t),n=0;for(let s of r){let _=e.query("SELECT COUNT(*) AS n FROM chunk_embeddings WHERE chunk_id = ?").get(s.id);n+=_?.n??0,e.run("DELETE FROM chunk_embeddings WHERE chunk_id = ?",[s.id]),e.run("DELETE FROM chunks_fts WHERE chunk_id = ?",[s.id])}e.run("DELETE FROM chunks WHERE source_revision_id = ?",[t]);let i=e.query("SELECT metadata_json FROM source_revisions WHERE id = ?").get(t);return e.run("UPDATE source_revisions SET metadata_json = ? WHERE id = ?",[qe(i?.metadata_json,{reindex_required:!0,invalidated_at:new Date().toISOString()}),t]),{chunksDeleted:r.length,embeddingsDeleted:n}}function pr(e,t){return t==="deleted"||["delete","deleted","remove","removed"].includes(e)}function hr(e){return["move","moved","rename","renamed","path_changed"].includes(e)}function yr(e){return["permission","permissions","permission_changed","acl_changed"].includes(e)}async function He(e){let t=(e.now??new Date).toISOString();if(e.safetyPolicy)K(e.dbPath,e.safetyPolicy);C(e.dbPath);let r=await dr(e.input,e.config,e.safetyPolicy),n=cr(r),i=A(e.dbPath),s=`run_${Vt()}`;try{return i.transaction(()=>{i.run(`INSERT INTO runs (id, type, prompt, status, provider, model, metadata_json, created_at, updated_at)
|
|
230
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,[s,"open-files-outbox",e.input,"completed","local","open-files-outbox",JSON.stringify({path:e.input,events:n.length}),t,t]);let _=new Set,f=new Set,a=0,o=0,c=0,u=0,d=0,l=0;return w(i,{event_type:"source_read",action:e.input.startsWith("s3://")?"s3_outbox_read":"local_outbox_read",target_uri:e.input,decision:"allow",metadata:{events:n.length,read_only:!0},created_at:t}),n.forEach((m,T)=>{let p=ar(m,t),I=lr(i,p,t);_.add(I);let k=_r(i,I,p,t);if(k)f.add(k);let S=fr(i,I,p);for(let g of S){f.add(g);let R=gr(i,g);a+=R.chunksDeleted,o+=R.embeddingsDeleted,c+=1}if(pr(p.eventType,p.status))u+=1;if(hr(p.eventType))d+=1;if(yr(p.eventType)||p.acl!==void 0)l+=1;i.run(`INSERT INTO run_events (id, run_id, level, event, metadata_json, created_at)
|
|
231
|
+
VALUES (?, ?, ?, ?, ?, ?)`,[ie("evt",`${s}\x00${T}\x00${p.sourceRef}\x00${p.eventType}`),s,"info",p.eventType,JSON.stringify({source_ref:p.sourceRef,source_uri:p.sourceUri,revision:p.revision,hash:p.hash,status:p.status,affected_revisions:S.length}),p.updatedAt])}),i.run(`INSERT INTO provider_usage (id, run_id, provider, model, input_tokens, output_tokens, cost_usd, metadata_json, created_at)
|
|
232
|
+
VALUES (?, ?, ?, ?, 0, 0, 0, ?, ?)`,[ie("usage",s),s,"local","open-files-outbox",JSON.stringify({note:"No model provider used for outbox invalidation."}),t]),w(i,{event_type:"write",action:"knowledge_outbox_invalidation",target_uri:e.dbPath,decision:"allow",metadata:{run_id:s,events:n.length,sources:_.size,revisions:f.size,chunks_deleted:a,embeddings_deleted:o},created_at:t}),{path:e.input,db_path:e.dbPath,run_id:s,events_seen:n.length,sources_touched:_.size,revisions_touched:f.size,chunks_deleted:a,embeddings_deleted:o,stale_revisions:c,deleted_sources:u,moved_sources:d,permission_updates:l}})()}finally{i.close()}}import{createHash as mr}from"crypto";import{existsSync as Tr,readFileSync as kr}from"fs";import{basename as br}from"path";function Er(e){return["deleted","stale","invalidated","reindex_required"].includes((e??"").toLowerCase())}function se(e){let t=e.status??null;return{source_owner:"open-files",source_ref:e.source_ref??null,source_uri:e.source_uri??null,source_kind:e.source_kind??null,source_revision_id:e.source_revision_id??null,revision:e.revision??null,hash:e.hash??null,chunk_id:e.chunk_id??null,start_offset:e.start_offset??null,end_offset:e.end_offset??null,status:t,read_only:!0,citation_required:!0,resolver:e.resolver??null,stale:Er(t)}}function me(e){return{source_owner:"open-files",generated_from:e.generated_from,artifact_key:e.artifact_key,source_refs:e.source_refs??[],read_only_sources:!0,citation_required:e.citation_required??!0,raw_source_bytes_stored_in_open_knowledge:!1}}function Ge(e,t){return{...e,provenance:t}}function Te(e,t){return`${e}_${mr("sha256").update(t).digest("hex").slice(0,20)}`}function X(e){return e&&typeof e==="object"&&!Array.isArray(e)?e:void 0}function y(e){return typeof e==="string"&&e.length>0?e:void 0}function wr(e){return typeof e==="number"&&Number.isFinite(e)?e:void 0}function Sr(e){let t=y(e.source_ref)??y(e.source_uri)??y(e.uri);if(t)return t;let r=y(e.file_id);if(r){let s=y(e.revision_id)??y(e.revision),_=`open-files://file/${encodeURIComponent(r)}`;return s?`${_}/revision/${encodeURIComponent(s)}`:_}let n=y(e.source_id),i=y(e.path);if(n&&i)return`open-files://source/${encodeURIComponent(n)}/path/${encodeURIComponent(i)}`;throw Error("Manifest item is missing source_ref, file_id, or source_id/path.")}function xr(e,t){if(t.kind==="open-files"&&t.entity==="file"&&t.revision_id)return e.replace(/\/revision\/[^/]+$/,"");return e}function Rr(e){let t=y(e.extracted_text)??y(e.text)??y(e.content_text)??y(e.markdown);if(t!==void 0)return t;let r=e.content;return typeof r==="string"?r:null}function vr(e){let t=y(e.extracted_text_ref)??y(e.extracted_text_uri)??y(e.text_ref);if(t)return t;let r=X(e.content);return y(r?.extracted_text_ref)??y(r?.extracted_text_uri)??null}function Or(e){let t=y(e.path);return y(e.title)??y(e.name)??(t?br(t):null)}function Nr(e){return y(e.hash)??y(e.checksum)??y(e.sha256)??null}function Ar(e,t,r){return y(e.revision_id)??y(e.revision)??y(e.version_id)??(t.kind==="open-files"?t.revision_id:void 0)??r??y(e.updated_at)??"current"}function Lr(e,t){let r={};for(let[n,i]of Object.entries(e)){if(["text","content","content_text","extracted_text","markdown"].includes(n))continue;r[n]=i}return r.source_ref=t.sourceRef,r.source_uri=t.sourceUri,r.status=t.status,r}function Ir(e,t){let r=Sr(e),n=L(r),i=xr(r,n),s=Nr(e),_=y(e.status)??"active";return{raw:e,sourceRef:r,sourceUri:i,kind:n.kind,title:Or(e),revision:Ar(e,n,s),hash:s,extractedTextUri:vr(e),text:Rr(e),metadata:Lr(e,{sourceRef:r,sourceUri:i,status:_}),acl:e.permissions??e.acl??{},status:_,updatedAt:y(e.updated_at)??t}}function Dr(e){let t=e.trim();if(!t)return[];if(t.startsWith("[")){let r=JSON.parse(t);if(!Array.isArray(r))throw Error("Manifest array parse failed.");return r.map((n)=>{let i=X(n);if(!i)throw Error("Manifest array entries must be objects.");return i})}if(t.startsWith("{"))try{let r=JSON.parse(t),n=X(r);if(!n)throw Error("Manifest object parse failed.");if(Array.isArray(n.items))return n.items.map((i)=>{let s=X(i);if(!s)throw Error("Manifest items entries must be objects.");return s});if("source_ref"in n||"source_uri"in n||"file_id"in n)return[n]}catch(r){let n=t.split(/\r?\n/).filter((i)=>i.trim().length>0);if(n.length<=1)throw r;return n.map((i)=>{let s=X(JSON.parse(i));if(!s)throw Error("Manifest JSONL entries must be objects.");return s})}return t.split(/\r?\n/).filter((r)=>r.trim().length>0).map((r)=>{let n=X(JSON.parse(r));if(!n)throw Error("Manifest JSONL entries must be objects.");return n})}async function Cr(e,t,r){let n=new URL(e),i=n.hostname,s=decodeURIComponent(n.pathname.replace(/^\/+/,""));if(!i||!s)throw Error(`Invalid S3 manifest URI: ${e}`);if(r)j(e,r);let[{S3Client:_,GetObjectCommand:f},{fromIni:a}]=await Promise.all([import("@aws-sdk/client-s3"),import("@aws-sdk/credential-providers")]),o=t?.storage.type==="s3"&&t.storage.s3?.bucket===i?t.storage.s3:void 0,u=await new _({region:o?.region,credentials:o?.profile?a({profile:o.profile}):void 0,maxAttempts:o?.max_attempts}).send(new f({Bucket:i,Key:s}));if(!u.Body)return"";return await u.Body.transformToString()}async function Ur(e,t,r){if(e.startsWith("s3://"))return Cr(e,t,r);if(!Tr(e))throw Error(`Manifest not found: ${e}`);return kr(e,"utf8")}function Pr(e,t,r){let n=e.replace(/\r\n/g,`
|
|
233
|
+
`);if(!n.trim())return[];let i=[],s=0;while(s<n.length){let _=Math.min(n.length,s+t),f=_;if(_<n.length){let o=n.lastIndexOf(`
|
|
234
234
|
|
|
235
|
-
`,
|
|
235
|
+
`,_),c=n.lastIndexOf(". ",_),u=Math.max(o,c);if(u>s+Math.floor(t*0.5))f=u+(u===o?2:1)}let a=n.slice(s,f).trim();if(a)i.push({ordinal:i.length,text:a,startOffset:s,endOffset:f});if(f>=n.length)break;s=Math.max(0,f-r)}return i}function jr(e){let t=e.trim().split(/\s+/).filter(Boolean).length;return Math.max(1,Math.ceil(t*1.25))}function Kr(e,t){let r=e.query("SELECT id FROM chunks WHERE source_revision_id = ?").all(t);for(let n of r)e.run("DELETE FROM chunks_fts WHERE chunk_id = ?",[n.id]);return e.run("DELETE FROM chunks WHERE source_revision_id = ?",[t]),r.length}function Fr(e,t,r){let n=Te("src",t.sourceUri);e.run(`INSERT INTO sources (id, uri, kind, title, metadata_json, acl_json, created_at, updated_at)
|
|
236
236
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
237
237
|
ON CONFLICT(uri) DO UPDATE SET
|
|
238
238
|
kind = excluded.kind,
|
|
239
239
|
title = excluded.title,
|
|
240
240
|
metadata_json = excluded.metadata_json,
|
|
241
241
|
acl_json = excluded.acl_json,
|
|
242
|
-
updated_at = excluded.updated_at`,[n,t.sourceUri,t.kind,t.title,JSON.stringify(t.metadata),JSON.stringify(t.acl??{}),r,t.updatedAt]);let i=e.query("SELECT id FROM sources WHERE uri = ?").get(t.sourceUri);if(!i)throw Error(`Failed to upsert source: ${t.sourceUri}`);return i.id}function
|
|
242
|
+
updated_at = excluded.updated_at`,[n,t.sourceUri,t.kind,t.title,JSON.stringify(t.metadata),JSON.stringify(t.acl??{}),r,t.updatedAt]);let i=e.query("SELECT id FROM sources WHERE uri = ?").get(t.sourceUri);if(!i)throw Error(`Failed to upsert source: ${t.sourceUri}`);return i.id}function Mr(e,t,r,n){let i=Te("rev",`${t}\x00${r.revision}`);e.run(`INSERT INTO source_revisions (id, source_id, revision, hash, extracted_text_uri, metadata_json, created_at)
|
|
243
243
|
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
244
244
|
ON CONFLICT(source_id, revision) DO UPDATE SET
|
|
245
245
|
hash = excluded.hash,
|
|
246
246
|
extracted_text_uri = excluded.extracted_text_uri,
|
|
247
|
-
metadata_json = excluded.metadata_json`,[i,t,r.revision,r.hash,r.extractedTextUri,JSON.stringify(r.metadata),n]);let s=e.query("SELECT id FROM source_revisions WHERE source_id = ? AND revision = ?").get(t,r.revision);if(!s)throw Error(`Failed to upsert source revision: ${r.sourceRef}`);return s.id}function
|
|
248
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,[c,t,"source",o.ordinal,o.text,
|
|
247
|
+
metadata_json = excluded.metadata_json`,[i,t,r.revision,r.hash,r.extractedTextUri,JSON.stringify(r.metadata),n]);let s=e.query("SELECT id FROM source_revisions WHERE source_id = ? AND revision = ?").get(t,r.revision);if(!s)throw Error(`Failed to upsert source revision: ${r.sourceRef}`);return s.id}function Wr(e,t,r,n,i,s,_){if(!r.text||r.status.toLowerCase()==="deleted")return{chunksInserted:0,redactions:0};let f=re(r.text,_);if(f.findings.length>0)ne(e,{source_uri:r.sourceUri,findings:f.findings,metadata:{source_ref:r.sourceRef,revision:r.revision},created_at:n}),w(e,{event_type:"redaction",action:"source_text_redact",target_uri:r.sourceUri,decision:"redacted",metadata:{findings:f.findings.length,source_ref:r.sourceRef,revision:r.revision},created_at:n});let a=Pr(f.text,i,s);for(let o of a){let c=Te("chk",`${t}\x00${o.ordinal}\x00${o.text}`),u=se({source_ref:r.sourceRef,source_uri:r.sourceUri,source_kind:r.kind,source_revision_id:t,revision:r.revision,hash:r.hash,chunk_id:c,start_offset:o.startOffset,end_offset:o.endOffset,status:r.status,resolver:"open-files-read-only"}),d=Ge({source_ref:r.sourceRef,source_uri:r.sourceUri,source_kind:r.kind,source_revision_id:t,revision:r.revision,hash:r.hash,status:r.status,path:y(r.raw.path)??null,mime:y(r.raw.mime)??y(r.raw.content_type)??null,size:wr(r.raw.size)??null},u);e.run(`INSERT INTO chunks (id, source_revision_id, kind, ordinal, text, token_count, start_offset, end_offset, metadata_json, created_at)
|
|
248
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,[c,t,"source",o.ordinal,o.text,jr(o.text),o.startOffset,o.endOffset,JSON.stringify(d),n]),e.run("INSERT INTO chunks_fts (chunk_id, text, title, source_uri) VALUES (?, ?, ?, ?)",[c,o.text,r.title??"",r.sourceUri])}return{chunksInserted:a.length,redactions:f.findings.length}}async function Ye(e){let t=e.now??new Date;if(e.safetyPolicy)K(e.dbPath,e.safetyPolicy);C(e.dbPath);let r=await Ur(e.input,e.config,e.safetyPolicy),n=Dr(r);return ke({dbPath:e.dbPath,items:n,sourceLabel:e.input,safetyPolicy:e.safetyPolicy,now:t,maxChunkChars:e.maxChunkChars,chunkOverlapChars:e.chunkOverlapChars})}async function ke(e){let t=(e.now??new Date).toISOString(),r=e.maxChunkChars??4000,n=e.chunkOverlapChars??200;if(r<500)throw Error("maxChunkChars must be at least 500.");if(n<0||n>=r)throw Error("chunkOverlapChars must be less than maxChunkChars.");if(e.safetyPolicy)K(e.dbPath,e.safetyPolicy);C(e.dbPath);let i=A(e.dbPath);try{return i.transaction(()=>{let _=new Set,f=new Set,a=0,o=0,c=0,u=0;w(i,{event_type:"source_read",action:e.readAction??(e.sourceLabel.startsWith("s3://")?"s3_manifest_read":"local_manifest_read"),target_uri:e.sourceLabel,decision:"allow",metadata:{items:e.items.length,read_only:!0},created_at:t});for(let d of e.items){let l=Ir(d,t),m=Fr(i,l,t),T=Mr(i,m,l,t);if(_.add(m),f.add(T),l.text||l.status.toLowerCase()==="deleted")o+=Kr(i,T);let p=Wr(i,T,l,t,r,n,e.safetyPolicy);a+=p.chunksInserted,c+=p.redactions}return w(i,{event_type:"write",action:"knowledge_manifest_ingest",target_uri:e.dbPath,decision:"allow",metadata:{items:e.items.length,sources:_.size,revisions:f.size,chunks_inserted:a,redactions:c},created_at:t}),{path:e.sourceLabel,db_path:e.dbPath,items_seen:e.items.length,sources_upserted:_.size,revisions_upserted:f.size,chunks_inserted:a,chunks_deleted:o,redactions:c,skipped:u}})()}finally{i.close()}}import{createHash as Gr}from"crypto";import{existsSync as Yr,readFileSync as Jr}from"fs";import{basename as ce}from"path";function oe(e){if(!e)return{};try{let t=JSON.parse(e);return t&&typeof t==="object"&&!Array.isArray(t)?t:{}}catch{return{}}}function F(e,t){for(let r of t){let n=e[r];if(typeof n==="string"&&n.length>0)return n}return null}function Je(e,t){for(let r of t){let n=e[r];if(typeof n==="number"&&Number.isFinite(n))return n}return null}function Xr(e,t){let r=e.mode;if(typeof r==="string"&&r!=="read_only")throw Error(`Source resolver denied ${t}. Permission mode is ${r}, expected read_only.`);let n=e.denied_purposes;if(Array.isArray(n)&&n.includes(t))throw Error(`Source resolver denied ${t}. Purpose is explicitly denied.`);let i=e.allowed_purposes;if(Array.isArray(i)&&i.length>0&&!i.includes(t))throw Error(`Source resolver denied ${t}. Allowed purposes: ${i.join(", ")}`)}function $r(e,t,r){if(!t)return r;try{let n=L(e);if(n.kind==="open-files"&&n.entity==="file")return`${e}/revision/${encodeURIComponent(t.revision)}`}catch{return r}return r}function Br(e,t,r){return e.query(`SELECT id, uri, kind, title, metadata_json, acl_json, updated_at
|
|
249
249
|
FROM sources
|
|
250
250
|
WHERE uri = ? OR uri = ?
|
|
251
251
|
ORDER BY CASE WHEN uri = ? THEN 0 ELSE 1 END
|
|
252
|
-
LIMIT 1`).get(t,r,t)??null}function
|
|
252
|
+
LIMIT 1`).get(t,r,t)??null}function zr(e,t,r){if(r)return e.query(`SELECT id, revision, hash, extracted_text_uri, metadata_json, created_at
|
|
253
253
|
FROM source_revisions
|
|
254
254
|
WHERE source_id = ? AND revision = ?
|
|
255
255
|
LIMIT 1`).get(t,r)??null;return e.query(`SELECT id, revision, hash, extracted_text_uri, metadata_json, created_at
|
|
256
256
|
FROM source_revisions
|
|
257
257
|
WHERE source_id = ?
|
|
258
258
|
ORDER BY created_at DESC, revision DESC
|
|
259
|
-
LIMIT 1`).get(t)??null}function
|
|
259
|
+
LIMIT 1`).get(t)??null}function qr(e,t){if(!t)return 0;return e.query("SELECT COUNT(*) AS n FROM chunks WHERE source_revision_id = ?").get(t)?.n??0}function Hr(e,t,r){if(!t||r<=0)return[];return e.query(`SELECT id, kind, ordinal, text, token_count, start_offset, end_offset, metadata_json
|
|
260
260
|
FROM chunks
|
|
261
261
|
WHERE source_revision_id = ?
|
|
262
262
|
ORDER BY ordinal ASC
|
|
263
|
-
LIMIT ?`).all(t,r)}async function
|
|
263
|
+
LIMIT ?`).all(t,r)}async function ae(e){let t=e.purpose??"knowledge_answer",r=Math.max(0,Math.min(e.limit??10,100)),n=(e.now??new Date).toISOString(),i=L(e.sourceRef),s=Fe(e.sourceRef,i),_=Me(e.sourceRef);if(e.safetyPolicy){if(!e.safetyPolicy.readOnlySourceAccess)throw Error("Safety policy denied source resolution.");K(e.dbPath,e.safetyPolicy)}C(e.dbPath);let f=A(e.dbPath);try{return f.transaction(()=>{let a=Br(f,s,e.sourceRef);if(!a)return w(f,{event_type:"source_read",action:"open_files_resolve_missing",target_uri:e.sourceRef,decision:"allow",metadata:{purpose:t,read_only:!0,source_uri:s},created_at:n}),{source_ref:e.sourceRef,source_uri:s,purpose:t,read_only:!0,resolved:!1,resolver:{name:"open-files-read-only",mode:"local_catalog",contract:"open-files-knowledge-source-v1"},source:null,revision:null,content:{mime:null,size:null,hash:null,text_available:!1,chunks_total:0,chunks_returned:0,char_count_returned:0,extracted_text_ref:null,bytes_available:!1,bytes_exposed:!1},chunks:[],citations:[]};let o=oe(a.metadata_json),c=oe(a.acl_json);try{Xr(c,t)}catch(g){throw w(f,{event_type:"source_read",action:"open_files_resolve",target_uri:e.sourceRef,decision:"deny",metadata:{purpose:t,read_only:!0,source_uri:a.uri,error:g instanceof Error?g.message:String(g)},created_at:n}),g}let u=zr(f,a.id,_),d=oe(u?.metadata_json),l=qr(f,u?.id??null),m=Hr(f,u?.id??null,r),T=$r(a.uri,u,e.sourceRef),p=m.map((g)=>{let R=oe(g.metadata_json),h={resolver:"open-files-read-only",mode:"local_catalog",purpose:t,read_only:!0,source_ref:F(R,["source_ref"])??T,source_uri:a.uri,source_revision_id:u?.id??null,revision:u?.revision??null,hash:u?.hash??F(R,["hash"]),chunk_id:g.id,start_offset:g.start_offset,end_offset:g.end_offset,resolved_at:n},B=se({source_ref:h.source_ref,source_uri:h.source_uri,source_kind:a.kind,source_revision_id:h.source_revision_id,revision:h.revision,hash:h.hash,chunk_id:g.id,start_offset:g.start_offset,end_offset:g.end_offset,status:F(R,["status"]),resolver:h.resolver});return{id:g.id,kind:g.kind,ordinal:g.ordinal,text:g.text,token_count:g.token_count,start_offset:g.start_offset,end_offset:g.end_offset,metadata:R,evidence:h,provenance:B}}),I=p.map((g)=>({source_ref:g.evidence.source_ref,source_uri:a.uri,chunk_id:g.id,quote:g.text.slice(0,500),start_offset:g.start_offset,end_offset:g.end_offset,evidence:g.evidence,provenance:g.provenance}));w(f,{event_type:"source_read",action:"open_files_resolve",target_uri:e.sourceRef,decision:"allow",metadata:{purpose:t,read_only:!0,source_uri:a.uri,revision:u?.revision??null,chunks_returned:p.length,chunks_total:l},created_at:n});let k=F(o,["mime","content_type"])??F(d,["mime","content_type"]),S=Je(o,["size","size_bytes"])??Je(d,["size","size_bytes"]);return{source_ref:T,source_uri:a.uri,purpose:t,read_only:!0,resolved:!0,resolver:{name:"open-files-read-only",mode:"local_catalog",contract:"open-files-knowledge-source-v1"},source:{id:a.id,uri:a.uri,kind:a.kind,title:a.title,metadata:o,permissions:c,updated_at:a.updated_at},revision:u?{id:u.id,revision:u.revision,hash:u.hash,extracted_text_uri:u.extracted_text_uri,metadata:d,created_at:u.created_at,reindex_required:d.reindex_required===!0}:null,content:{mime:k,size:S,hash:u?.hash??F(o,["hash","checksum","sha256"]),text_available:l>0,chunks_total:l,chunks_returned:p.length,char_count_returned:p.reduce((g,R)=>g+R.text.length,0),extracted_text_ref:u?.extracted_text_uri??F(d,["extracted_text_ref","extracted_text_uri"]),bytes_available:!1,bytes_exposed:!1},chunks:p,citations:I}})()}finally{f.close()}}function $(e){return`sha256:${Gr("sha256").update(e).digest("hex")}`}function Vr(e){return e.replace(/<script[\s\S]*?<\/script>/gi," ").replace(/<style[\s\S]*?<\/style>/gi," ").replace(/<[^>]+>/g," ").replace(/ /g," ").replace(/&/g,"&").replace(/</g,"<").replace(/>/g,">").replace(/\s+\n/g,`
|
|
264
264
|
`).replace(/\n\s+/g,`
|
|
265
|
-
`).replace(/[ \t]{2,}/g," ").trim()}async function
|
|
265
|
+
`).replace(/[ \t]{2,}/g," ").trim()}async function Qr(e,t,r){let n=new URL(e),i=n.hostname,s=decodeURIComponent(n.pathname.replace(/^\/+/,""));if(!i||!s)throw Error(`Invalid S3 source URI: ${e}`);if(r)j(e,r);let[{S3Client:_,GetObjectCommand:f},{fromIni:a}]=await Promise.all([import("@aws-sdk/client-s3"),import("@aws-sdk/credential-providers")]),o=t?.storage.type==="s3"&&t.storage.s3?.bucket===i?t.storage.s3:void 0,u=await new _({region:o?.region,credentials:o?.profile?a({profile:o.profile}):void 0,maxAttempts:o?.max_attempts}).send(new f({Bucket:i,Key:s}));if(!u.Body)return"";return await u.Body.transformToString()}async function Zr(e,t){if(t)te(t);let r=await fetch(e,{headers:{accept:"text/markdown,text/plain,text/html,application/json;q=0.8,*/*;q=0.5","user-agent":"@hasna/knowledge source-ingest"}});if(!r.ok)throw Error(`Web source read failed ${r.status}: ${e}`);let n=r.headers.get("content-type"),i=await r.text();return{text:n?.includes("html")?Vr(i):i,mime:n}}function ue(e){if(e.kind==="file")return ce(e.path);if(e.kind==="s3")return ce(e.key);if(e.kind==="web")return ce(new URL(e.url).pathname)||e.url;return e.path?ce(e.path):e.id}async function Ve(e,t,r){if(e.kind==="file"){if(!Yr(e.path))throw Error(`Source file not found: ${e.path}`);let n=Jr(e.path,"utf8");return{text:n,contentSource:"file",title:ue(e),mime:"text/plain",size:n.length,hash:$(n),revision:null,extractedTextRef:null,metadata:{path:e.path},permissions:{mode:"read_only"}}}if(e.kind==="s3"){let n=await Qr(e.uri,t,r);return{text:n,contentSource:"s3",title:ue(e),mime:"text/plain",size:n.length,hash:$(n),revision:null,extractedTextRef:null,metadata:{bucket:e.bucket,key:e.key},permissions:{mode:"read_only"}}}if(e.kind==="web"){let n=await Zr(e.url,r);return{text:n.text,contentSource:"web",title:ue(e),mime:n.mime,size:n.text.length,hash:$(n.text),revision:null,extractedTextRef:null,metadata:{url:e.url},permissions:{mode:"read_only"}}}throw Error(`Direct source reading is not available for ${e.uri}`)}async function en(e,t,r){if(e.startsWith("open-files://"))throw Error("Open-files extracted text refs require an open-files resolver API. Ingest an open-files manifest with extracted_text or an extracted_text_ref using file://, s3://, or https://.");let n=L(e);return{text:(await Ve(n,t,r)).text,contentSource:"extracted_text_ref"}}async function tn(e){let t=await ae({dbPath:e.dbPath,sourceRef:e.sourceRef,purpose:e.purpose??"knowledge_index",limit:100,safetyPolicy:e.safetyPolicy,now:e.now});if(!t.resolved)throw Error("Open-files source is not in the local knowledge catalog. Ingest an open-files manifest first or use the open-files resolver API.");if(t.revision?.extracted_text_uri&&!t.content.text_available){let n=await en(t.revision.extracted_text_uri,e.config,e.safetyPolicy);return{text:n.text,contentSource:n.contentSource,title:t.source?.title??null,mime:t.content.mime,size:n.text.length,hash:t.revision.hash??$(n.text),revision:t.revision.revision,extractedTextRef:t.revision.extracted_text_uri,metadata:t.source?.metadata??{},permissions:t.source?.permissions??{mode:"read_only"}}}if(t.chunks.length===0)throw Error("Open-files source has no extracted text chunks yet. Ingest an open-files manifest with extracted_text or extracted_text_ref first.");let r=t.chunks.map((n)=>n.text).join(`
|
|
266
266
|
|
|
267
|
-
`);return{text:r,contentSource:"catalog_chunks",title:t.source?.title??null,mime:t.content.mime,size:r.length,hash:t.revision?.hash??$(r),revision:t.revision?.revision??null,extractedTextRef:t.revision?.extracted_text_uri??null,metadata:t.source?.metadata??{},permissions:t.source?.permissions??{mode:"read_only"}}}function
|
|
267
|
+
`);return{text:r,contentSource:"catalog_chunks",title:t.source?.title??null,mime:t.content.mime,size:r.length,hash:t.revision?.hash??$(r),revision:t.revision?.revision??null,extractedTextRef:t.revision?.extracted_text_uri??null,metadata:t.source?.metadata??{},permissions:t.source?.permissions??{mode:"read_only"}}}function rn(e,t,r,n){let i=r.hash??$(r.text),s={...r.metadata,source_ref:e,content_source:r.contentSource,read_only:!0},_={source_ref:e,name:r.title??ue(t),mime:r.mime??"text/plain",size:r.size??r.text.length,hash:i,revision:r.revision??i,status:"active",updated_at:new Date().toISOString(),permissions:{mode:"read_only",allowed_purposes:[n],...r.permissions},metadata:s,extracted_text_ref:r.extractedTextRef,extracted_text:r.text};if(t.kind==="open-files"){if(t.entity==="file")_.file_id=t.id;if(t.entity==="source")_.source_id=t.id,_.path=t.path}if(t.kind==="file")_.path=t.path;if(t.kind==="s3")_.path=t.key;if(t.kind==="web")_.url=t.url;return _}async function Qe(e){let t=e.purpose??"knowledge_index",r=L(e.sourceRef),n=r.kind==="open-files"?await tn(e):await Ve(r,e.config,e.safetyPolicy),i=rn(e.sourceRef,r,n,t);return{...await ke({dbPath:e.dbPath,items:[i],sourceLabel:e.sourceRef,readAction:"source_ref_ingest_read",safetyPolicy:e.safetyPolicy,now:e.now}),source_ref:e.sourceRef,content_source:n.contentSource,read_only:!0,hash:String(i.hash)}}var Ze={openai:{api_key_env:"OPENAI_API_KEY",default_model:"gpt-5.2"},anthropic:{api_key_env:"ANTHROPIC_API_KEY",default_model:"claude-sonnet-4-6"},deepseek:{api_key_env:"DEEPSEEK_API_KEY",default_model:"deepseek-chat"}},nn={openai:{text_generation:!0,structured_output:!0,tool_usage:!0,tool_streaming:!0,image_input:!0,native_web_search:!0,reasoning:!0,embeddings:!0},anthropic:{text_generation:!0,structured_output:!0,tool_usage:!0,tool_streaming:!0,image_input:!0,native_web_search:!1,reasoning:!0,embeddings:!1},deepseek:{text_generation:!0,structured_output:!0,tool_usage:!0,tool_streaming:!0,image_input:!1,native_web_search:!1,reasoning:!0,embeddings:!1}},sn={default:"openai:gpt-5.2",fast:"openai:gpt-5-mini",reasoning:"anthropic:claude-opus-4-6",sonnet:"anthropic:claude-sonnet-4-6",deepseek:"deepseek:deepseek-chat","deepseek-reasoning":"deepseek:deepseek-reasoner"};function et(e){return e.providers??{}}function on(e,t){let r=et(e)[t]??{};return{...Ze[t],...r}}function tt(e){let t=et(e);return{...sn,...t.default_model?{default:t.default_model}:{},...t.aliases??{}}}function be(e){let[t,...r]=e.split(":"),n=r.join(":");if(t!=="openai"&&t!=="anthropic"&&t!=="deepseek")throw Error(`Unsupported AI provider: ${t}`);if(!n)throw Error(`Invalid model ref: ${e}. Expected provider:model.`);return{provider:t,model:n}}function we(e,t){return tt(t)[e]??e}function Se(e){let t=tt(e);return Object.entries(t).map(([r,n])=>{let i=be(n);return{alias:r,model_ref:n,provider:i.provider,model:i.model,default:r==="default",capabilities:nn[i.provider]}})}function rt(e,t=process.env){return Object.keys(Ze).map((r)=>{let n=on(e,r),i=Boolean(t[n.api_key_env]);return{provider:r,api_key_env:n.api_key_env,configured:i,source:i?"env":"missing",base_url:n.base_url??null,default_model:n.default_model}})}function nt(e,t=process.env){return{default_model:we("default",e),providers:rt(e,t),models:Se(e)}}function it(e,t,r=process.env){let n=rt(t,r).find((i)=>i.provider===e);if(!n)throw Error(`Unsupported AI provider: ${e}`);if(!n.configured)throw Error(`Missing ${n.api_key_env} for ${e}. Set the env var to use this provider.`);return n}import{createHash as an,randomUUID as cn}from"crypto";var st=[{kind:"schema",prefix:"schemas/",description:"Machine-readable agent schemas and source rules."},{kind:"index",prefix:"indexes/",description:"Small orientation indexes and future shard manifests."},{kind:"log",prefix:"logs/",description:"Append-only JSONL run and wiki-maintenance log partitions."},{kind:"run",prefix:"runs/",description:"Prompt/tool/cost ledgers and generated output records."},{kind:"wiki_page",prefix:"wiki/",description:"Generated cited Markdown pages, not raw source files."},{kind:"export",prefix:"exports/",description:"Portable exports and snapshots of derived knowledge state."}];function ot(e){let t=typeof e==="string"?Buffer.from(e):Buffer.from(e);return{hash:`sha256:${an("sha256").update(t).digest("hex")}`,size_bytes:t.byteLength}}function at(e){return st.find((r)=>e.startsWith(r.prefix))?.kind??"artifact"}function ct(e,t,r="global"){let n=xe(e,t),i=e.storage.s3??null,s=i?.prefix?.replace(/^\/+|\/+$/g,"")??"",_=i?`s3://${i.bucket}/${s?`${s}/`:""}`:"";return{scope:r,mode:e.mode,storage_type:e.storage.type,workspace_home:t.home,local_layout:{app_path:q,config_path:t.configPath,json_store_path:t.jsonStorePath,knowledge_db_path:t.knowledgeDbPath,directories:{artifacts:t.artifactsDir,cache:t.cacheDir,exports:t.exportsDir,indexes:t.indexesDir,logs:t.logsDir,runs:t.runsDir,schemas:t.schemasDir,wiki:t.wikiDir}},artifact_store:{type:e.storage.type,artifacts_root:e.storage.artifacts_root,uri_prefix:e.storage.type==="s3"?_:`file://${t.artifactsDir}/`,s3:i?{bucket:i.bucket,prefix:s,region:i.region??null,profile:i.profile??null,server_side_encryption:i.server_side_encryption??null,kms_key_configured:Boolean(i.kms_key_id)}:null},source_ownership:{owner:"open-files",preferred_ref:e.sources.preferred_ref,allowed_schemes:e.sources.allowed_schemes,raw_source_bytes_stored_in_open_knowledge:!1,stores:["source refs","source revisions and hashes","citation spans","redacted extracted chunks","embeddings","generated wiki artifacts","indexes","run ledgers"],does_not_store:["raw open-files bytes","S3 object credentials","connector secrets","hosted tenant ownership state"]},generated_artifacts:st,scalability:{catalog:"knowledge.db tracks sources, revisions, chunks, citations, indexes, runs, and storage_objects.",indexes:"Indexes are cataloged DB rows plus sharded artifacts, not one giant index.md.",logs:"Logs use dated JSONL partitions under logs/yyyy/mm/dd.jsonl.",markdown:"Markdown pages are the readable wiki layer over DB/object-store state."},warnings:n.warnings}}function xe(e,t){let r=[],n=[];if(!t.home.endsWith(q))n.push(`Workspace home does not end with ${q}: ${t.home}`);if(e.storage.type==="s3"){if(!e.storage.s3?.bucket)r.push("storage.s3.bucket is required when storage.type is s3.");if(!e.storage.s3?.prefix)n.push("storage.s3.prefix is empty; generated knowledge artifacts will be written at the bucket root.");if(e.mode==="local")n.push("storage.type is s3 while mode is local; this is valid for BYO S3, but hosted wrappers should set mode to hosted.")}if(e.storage.type==="local"&&e.storage.s3)n.push("storage.s3 is configured but ignored while storage.type is local.");if(e.sources.preferred_ref!=="open-files")n.push("sources.preferred_ref should stay open-files for durable company knowledge.");if(!e.sources.allowed_schemes.includes("open-files"))r.push("sources.allowed_schemes must include open-files.");return{ok:r.length===0,errors:r,warnings:n}}function ut(e,t,r=new Date){let n=r.toISOString(),i=e.prepare(`
|
|
268
268
|
INSERT INTO storage_objects (
|
|
269
269
|
id, artifact_uri, kind, content_type, hash, size_bytes, metadata_json, created_at, updated_at
|
|
270
270
|
)
|
|
@@ -276,7 +276,7 @@ VALUES (3, datetime('now'));
|
|
|
276
276
|
size_bytes = excluded.size_bytes,
|
|
277
277
|
metadata_json = excluded.metadata_json,
|
|
278
278
|
updated_at = excluded.updated_at
|
|
279
|
-
`);e.transaction((
|
|
279
|
+
`);e.transaction((_)=>{for(let f of _)i.run(cn(),f.uri,f.kind,f.content_type??null,f.hash??null,f.size_bytes??null,JSON.stringify({key:f.key,...f.metadata??{}}),n,n)})(t)}import{createHash as un}from"crypto";function dn(e){let t=String(e.getUTCFullYear()),r=String(e.getUTCMonth()+1).padStart(2,"0"),n=String(e.getUTCDate()).padStart(2,"0");return{year:t,month:r,day:n}}function dt(e,t){return`${e}_${un("sha256").update(t).digest("hex").slice(0,20)}`}function ln(){return`# Knowledge Agent Schema v1
|
|
280
280
|
|
|
281
281
|
## Source Rules
|
|
282
282
|
|
|
@@ -301,7 +301,7 @@ VALUES (3, datetime('now'));
|
|
|
301
301
|
## Lint Rules
|
|
302
302
|
|
|
303
303
|
- Flag stale pages, missing citations, contradictions, orphan pages, duplicate pages, and unresolved source refs.
|
|
304
|
-
`}function
|
|
304
|
+
`}function _n(){return`# Knowledge Index
|
|
305
305
|
|
|
306
306
|
This is a compact orientation index for agents. It is not the full search index.
|
|
307
307
|
|
|
@@ -316,13 +316,26 @@ This is a compact orientation index for agents. It is not the full search index.
|
|
|
316
316
|
|
|
317
317
|
Raw source files are resolved through open-files. This app stores source refs,
|
|
318
318
|
citations, chunks, generated wiki artifacts, indexes, and run records.
|
|
319
|
-
`}function
|
|
319
|
+
`}function fn(){return`# Wiki
|
|
320
320
|
|
|
321
321
|
Generated durable knowledge pages live here.
|
|
322
322
|
|
|
323
323
|
Pages should be concise, cited, and organized for both humans and agents.
|
|
324
|
-
`}async function
|
|
325
|
-
`,content_type:"application/x-ndjson"}],u=await Promise.all(c.map(async(
|
|
324
|
+
`}async function _t(e,t=new Date){let{year:r,month:n,day:i}=dn(t),s="schemas/v1.md",_="indexes/root.md",f="wiki/README.md",a=`logs/${r}/${n}/${i}.jsonl`,o={ts:t.toISOString(),event:"wiki_layout_initialized",schema_key:"schemas/v1.md",root_index_key:"indexes/root.md",wiki_readme_key:"wiki/README.md"},c=[{key:"schemas/v1.md",body:ln(),content_type:"text/markdown"},{key:"indexes/root.md",body:_n(),content_type:"text/markdown"},{key:"wiki/README.md",body:fn(),content_type:"text/markdown"},{key:a,body:`${JSON.stringify(o)}
|
|
325
|
+
`,content_type:"application/x-ndjson"}],u=await Promise.all(c.map(async(d)=>{let l=await e.put(d);return{key:l.key,uri:l.uri,kind:at(d.key),content_type:d.content_type,metadata:{provenance:me({generated_from:"wiki_layout_init",artifact_key:d.key,citation_required:d.key.startsWith("wiki/")||d.key.startsWith("indexes/")})},...ot(d.body)}}));return{schema_key:"schemas/v1.md",root_index_key:"indexes/root.md",wiki_readme_key:"wiki/README.md",log_key:a,artifacts:u,written:["schemas/v1.md","indexes/root.md","wiki/README.md",a]}}function lt(e){let t=e.metadata?.provenance;if(t&&typeof t==="object"&&!Array.isArray(t))return t;return me({generated_from:"wiki_layout_init",artifact_key:e.key})}function ft(e,t,r=new Date){let n=r.toISOString(),i=t.find((_)=>_.key.endsWith("indexes/root.md")),s=t.find((_)=>_.key.endsWith("wiki/README.md"));if(i)e.run(`INSERT INTO knowledge_indexes (id, kind, name, artifact_uri, shard_key, metadata_json, created_at, updated_at)
|
|
326
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
327
|
+
ON CONFLICT(kind, name, shard_key) DO UPDATE SET
|
|
328
|
+
artifact_uri = excluded.artifact_uri,
|
|
329
|
+
metadata_json = excluded.metadata_json,
|
|
330
|
+
updated_at = excluded.updated_at`,[dt("idx","root:indexes/root.md"),"root","root",i.uri,"root",JSON.stringify({artifact_key:i.key,content_hash:i.hash??null,provenance:lt(i)}),n,n]);if(s)e.run(`INSERT INTO wiki_pages (id, path, title, artifact_uri, content_hash, status, metadata_json, created_at, updated_at)
|
|
331
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
332
|
+
ON CONFLICT(path) DO UPDATE SET
|
|
333
|
+
title = excluded.title,
|
|
334
|
+
artifact_uri = excluded.artifact_uri,
|
|
335
|
+
content_hash = excluded.content_hash,
|
|
336
|
+
status = excluded.status,
|
|
337
|
+
metadata_json = excluded.metadata_json,
|
|
338
|
+
updated_at = excluded.updated_at`,[dt("wiki","wiki/README.md"),"wiki/README.md","Wiki",s.uri,s.hash??null,"active",JSON.stringify({artifact_key:s.key,provenance:lt(s)}),n,n])}class gt{options;ensuredWorkspace;cachedConfig;constructor(e={}){this.options=e}get scope(){return this.options.scope??"global"}get workspace(){return this.ensuredWorkspace??Oe(this.options.scope,this.options.cwd)}ensureWorkspace(){if(!this.ensuredWorkspace)this.ensuredWorkspace=ve(this.workspace.home);return this.ensuredWorkspace}jsonStorePath(){return this.ensureWorkspace().jsonStorePath}config(){if(!this.cachedConfig){let e=this.ensureWorkspace();this.cachedConfig=Ne(e.configPath)}return this.cachedConfig}safetyPolicy(){return $e(this.config(),this.ensureWorkspace())}artifactStore(){return je(this.config(),this.ensureWorkspace())}storageContract(){return ct(this.config(),this.ensureWorkspace(),this.scope)}validateStorage(){return xe(this.config(),this.ensureWorkspace())}paths(){let e=this.ensureWorkspace();return{ok:!0,scope:this.scope,home:e.home,config_path:e.configPath,json_store_path:e.jsonStorePath,knowledge_db_path:e.knowledgeDbPath,artifacts_dir:e.artifactsDir,indexes_dir:e.indexesDir,logs_dir:e.logsDir,runs_dir:e.runsDir,schemas_dir:e.schemasDir,wiki_dir:e.wikiDir,config:this.config(),message:e.home}}initDb(){return C(this.ensureWorkspace().knowledgeDbPath)}dbStats(){let e=this.ensureWorkspace();return C(e.knowledgeDbPath),De(e.knowledgeDbPath)}async initWiki(){let e=this.ensureWorkspace();C(e.knowledgeDbPath);let t=await _t(this.artifactStore()),r=A(e.knowledgeDbPath);try{ut(r,t.artifacts),ft(r,t.artifacts)}finally{r.close()}return t}async ingestManifest(e){let t=this.ensureWorkspace();return Ye({dbPath:t.knowledgeDbPath,input:e,config:this.config(),safetyPolicy:this.safetyPolicy()})}async ingestSource(e,t){let r=this.ensureWorkspace();return Qe({dbPath:r.knowledgeDbPath,sourceRef:e,purpose:t,config:this.config(),safetyPolicy:this.safetyPolicy()})}async resolveSource(e,t={}){let r=this.ensureWorkspace();return ae({dbPath:r.knowledgeDbPath,sourceRef:e,purpose:t.purpose,limit:t.limit,safetyPolicy:this.safetyPolicy()})}async consumeOutbox(e){let t=this.ensureWorkspace();return He({dbPath:t.knowledgeDbPath,input:e,config:this.config(),safetyPolicy:this.safetyPolicy()})}providerStatus(e=process.env){return nt(this.config(),e)}modelRegistry(){return Se(this.config())}}function pt(e={}){return new gt(e)}var G={name:"@hasna/knowledge",version:"0.2.13",description:"Agent-friendly local knowledge CLI with JSON output, pagination, and safe destructive actions",type:"module",bin:{"open-knowledge":"bin/open-knowledge.js","open-knowledge-mcp":"bin/open-knowledge-mcp.js"},files:["bin","src","docs","LICENSE","README.md"],scripts:{test:"bun test","test:cli":"bun test tests/cli.test.ts",build:"bun build --target=bun --outfile=bin/open-knowledge.js --minify --external @aws-sdk/client-s3 --external @aws-sdk/credential-providers --external ai --external @ai-sdk/openai --external @ai-sdk/anthropic --external @ai-sdk/deepseek src/cli.ts && bun build --target=bun --outfile=bin/open-knowledge-mcp.js --external @modelcontextprotocol/sdk --external @aws-sdk/client-s3 --external @aws-sdk/credential-providers --external ai --external @ai-sdk/openai --external @ai-sdk/anthropic --external @ai-sdk/deepseek src/mcp.js",prepublishOnly:"bun run build",postinstall:"bun run build"},keywords:["knowledge","cli","agents","json","notes","local","store"],license:"Apache-2.0",publishConfig:{registry:"https://registry.npmjs.org",access:"public"},repository:{type:"git",url:"git+https://github.com/hasna/knowledge.git"},bugs:{url:"https://github.com/hasna/knowledge/issues"},author:"Hasna Inc. <hasna@example.com>",engines:{bun:">=1.0",node:">=18"},dependencies:{"@aws-sdk/client-s3":"^3.1063.0","@aws-sdk/credential-providers":"^3.1063.0","@ai-sdk/anthropic":"^3.0.81","@ai-sdk/deepseek":"^2.0.35","@ai-sdk/openai":"^3.0.68","@modelcontextprotocol/sdk":"^1.29.0",ai:"^6.0.197",zod:"^4.3.6"},devDependencies:{"@types/bun":"^1.3.14"}};var ht={debug:0,info:1,warn:2,error:3},pn=()=>{if(process.env.DEBUG)return"debug";if(process.env.LOG_LEVEL==="debug")return"debug";if(process.env.LOG_LEVEL==="warn")return"warn";if(process.env.LOG_LEVEL==="error")return"error";return"info"};function M(e,t,r){if(ht[e]<ht[pn()])return;let n={debug:"[DEBUG]",info:"[INFO]",warn:"[WARN]",error:"[ERROR]"}[e],i=r?`${n} ${t} ${JSON.stringify(r)}`:`${n} ${t}`;if(e==="error")console.error(i);else console.error(i)}var hn=["add","list","get","delete","update","archive","restore","upsert","untag","export","prune","dedupe","stats","paths","storage","db","wiki","source","ingest","reindex","providers","safety","help"],yt={ls:"list",rm:"delete",edit:"update",unarchive:"restore"};function yn(e){let t=[],r={};for(let n=0;n<e.length;n+=1){let i=e[n];if(!i.startsWith("-")){t.push(i);continue}switch(i){case"--json":r.json=!0;break;case"--yes":case"-y":r.yes=!0;break;case"--help":case"-h":r.help=!0;break;case"--version":case"-v":r.version=!0;break;case"--desc":r.desc=!0;break;case"--page":case"-p":r.page=Number(e[n+1]),n+=1;break;case"--limit":case"-l":r.limit=Number(e[n+1]),n+=1;break;case"--search":case"-s":r.search=e[n+1],n+=1;break;case"--sort":r.sort=e[n+1],n+=1;break;case"--id":r.id=e[n+1],n+=1;break;case"--store":r.store=e[n+1],n+=1;break;case"--title":r.title=e[n+1],n+=1;break;case"--content":r.content=e[n+1],n+=1;break;case"--url":r.url=e[n+1],n+=1;break;case"--tag":case"-t":r.tag=e[n+1],n+=1;break;case"--format":r.format=e[n+1],n+=1;break;case"--completions":r.completions=e[n+1],n+=1;break;case"--purpose":r.purpose=e[n+1],n+=1;break;case"--no-color":r.noColor=!0;break;case"--scope":r.scope=e[n+1],n+=1;break;case"--older-than":r.olderThan=Number(e[n+1]),n+=1;break;case"--empty":r.empty=!0;break;case"--archived":r.archived=!0;break;case"--include-archived":r.includeArchived=!0;break;default:throw Error(`Unknown flag: ${i}. Run 'open-knowledge --help' for valid options.`)}}return{positional:t,flags:r}}function En(e){if(!e)return"";return yt[e]??e}function mn(e,t){let r=Array.from({length:e.length+1},()=>Array(t.length+1).fill(0));for(let n=0;n<=e.length;n+=1)r[n][0]=n;for(let n=0;n<=t.length;n+=1)r[0][n]=n;for(let n=1;n<=e.length;n+=1)for(let i=1;i<=t.length;i+=1){let s=e[n-1]===t[i-1]?0:1;r[n][i]=Math.min(r[n-1][i]+1,r[n][i-1]+1,r[n-1][i-1]+s)}return r[e.length][t.length]}function Tn(e){if(!e)return"";let t=[...hn,...Object.keys(yt)],r="",n=Number.POSITIVE_INFINITY;for(let i of t){let s=mn(e,i);if(s<n)n=s,r=i}return n<=3?r:""}function kn(){console.log(`open-knowledge - local agent knowledge store
|
|
326
339
|
|
|
327
340
|
Usage:
|
|
328
341
|
open-knowledge <command> [options]
|
|
@@ -393,5 +406,5 @@ Export Options:
|
|
|
393
406
|
|
|
394
407
|
Prune Options:
|
|
395
408
|
--older-than <days> Remove items older than N days
|
|
396
|
-
--empty Remove items with empty content`)}function
|
|
397
|
-
_open_knowledge() { _arguments -C "1: :(add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex providers safety help ls rm edit unarchive)" "(--json)--json" "(--yes)-y" "(--help)--help" "(--version)--version" "(--desc)--desc" "(--archived)--archived" "(--include-archived)--include-archived" "(-p --page)"{-p,--page}"[page number]:number:" "(-l --limit)"{-l,--limit}"[items per page]:number:" "(-s --search)"{-s,--search}"[search text]:text:" "(--sort)--sort"{created,title}:" "(--id)--id[item id]:id:" "(--store)--store[store path]:path:" "(--title)--title[new title]:" "(--content)--content[new content]:" "(--url)--url[source url]:" "(-t --tag)"{-t,--tag}"[tag]:tag:" "(--format)--format[json|jsonl]:" "(--completions)--completions[output completions]:shell:(bash zsh fish):" "(--purpose)--purpose[purpose]:" "(--no-color)--no-color[disable color]" "(--scope)--scope"{local,global,project}:" }; _open_knowledge`);else if(a==="fish")console.log('complete -c open-knowledge -f; complete -c open-knowledge -a "add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex providers safety help ls rm edit unarchive"; complete -c open-knowledge -l json; complete -c open-knowledge -l yes -s y; complete -c open-knowledge -l help -s h; complete -c open-knowledge -l version -s v; complete -c open-knowledge -l desc; complete -c open-knowledge -l archived; complete -c open-knowledge -l include-archived; complete -c open-knowledge -s p -l page; complete -c open-knowledge -s l -l limit; complete -c open-knowledge -s s -l search; complete -c open-knowledge -l sort; complete -c open-knowledge -l id; complete -c open-knowledge -l store; complete -c open-knowledge -l title; complete -c open-knowledge -l content; complete -c open-knowledge -l url; complete -c open-knowledge -s t -l tag; complete -c open-knowledge -l format; complete -c open-knowledge -l completions; complete -c open-knowledge -l purpose; complete -c open-knowledge -l no-color; complete -c open-knowledge -l scope -a "local global project"');else throw Error("Invalid --completions value. Use 'bash', 'zsh', or 'fish'.");return}let n=dn(t[0]);if(!n||r.help||n==="help"){pn(t[1]);return}let i=ut({scope:r.scope}),s=r.store;if(!s)if(r.scope==="project"||r.scope==="local")s=i.jsonStorePath();else s=fe();if(n==="paths"){E(i.paths(),r.json);return}if(n==="storage"){let a=t[1]??"status";if(a==="status"){let o=i.storageContract(),c=i.validateStorage();E({ok:c.ok,...o,validation:c,message:`${o.storage_type} artifact storage at ${o.artifact_store.uri_prefix}`},r.json);return}if(a==="validate"){let o=i.validateStorage();E({ok:o.ok,validation:o,message:o.ok?"Storage contract valid":`Storage contract invalid: ${o.errors.join("; ")}`},r.json);return}throw Error("Invalid storage action. Use 'status' or 'validate'.")}if(n==="db"){let a=t[1]??"init";if(a!=="init"&&a!=="stats")throw Error("Invalid db action. Use 'init' or 'stats'.");if(a==="init"){let c=i.initDb();E({ok:!0,...c,message:`Initialized ${c.path}`},r.json);return}let o=i.dbStats();E({ok:!0,path:i.workspace.knowledgeDbPath,...o,message:`knowledge.db schema v${o.schema_version}`},r.json);return}if(n==="wiki"){if((t[1]??"init")!=="init")throw Error("Invalid wiki action. Use 'init'.");let o=await i.initWiki();E({ok:!0,...o,message:`Initialized wiki layout in ${i.workspace.home}`},r.json);return}if(n==="safety"){let a=t[1]??"status",o=i.ensureWorkspace(),c=i.safetyPolicy();i.initDb();let u=A(o.knowledgeDbPath);try{if(a==="status"){E({ok:!0,mode:c.mode,workspace:o.home,allow_write_roots:c.allowWriteRoots,read_only_source_access:c.readOnlySourceAccess,network:c.network,redaction:c.redaction,approvals:c.approvals,message:`Safety policy: ${c.mode}`},r.json);return}if(a==="check"){let l=t[2]??"generated_write",d=t[3]??null,y;try{if(l==="web_search")ee(c),y={action:l,target_uri:d,approval_required:!1,approved:!0,decision:"allow"};else if(l==="s3_read"){if(!d)throw Error("safety check s3_read requires an s3:// target.");j(d,c),y={action:l,target_uri:d,approval_required:!1,approved:!0,decision:"allow"}}else y=$e(u,c,l,d);w(u,{event_type:"safety_check",action:l,target_uri:d,decision:y.decision==="allow"?"allow":"requires_approval",metadata:y}),E({ok:!0,...y,message:`Safety check ${y.decision}`},r.json);return}catch(m){throw w(u,{event_type:"safety_check",action:l,target_uri:d,decision:"deny",metadata:{error:m instanceof Error?m.message:String(m)}}),m}}if(a==="approve"){let l=t[2]??"generated_write",d=t[3]??null,y=We(u,{action:l,target_uri:d,reason:"local-cli approval",metadata:{scope:r.scope??"global"}});w(u,{event_type:"approval",action:l,target_uri:d,decision:"allow",metadata:{approval_id:y.id}}),E({ok:!0,...y,action:l,target_uri:d,message:`Approved ${l}`},r.json);return}if(a==="audit"){let l=u.query("SELECT id, event_type, action, target_uri, decision, metadata_json, created_at FROM audit_events ORDER BY created_at DESC LIMIT 50").all().map((d)=>({id:d.id,event_type:d.event_type,action:d.action,target_uri:d.target_uri,decision:d.decision,metadata:JSON.parse(d.metadata_json),created_at:d.created_at}));E({ok:!0,events:l,message:`${l.length} audit event(s)`},r.json);return}if(a==="redact"){let l=t.slice(2).join(" ");if(!l)throw Error("Usage: open-knowledge safety redact <text>");let d=te(l,c);if(d.findings.length>0)re(u,{source_uri:"safety://redact",findings:d.findings,metadata:{command:"safety redact"}});w(u,{event_type:"redaction",action:"safety_redact",target_uri:"safety://redact",decision:d.findings.length>0?"redacted":"allow",metadata:{findings:d.findings.length}}),E({ok:!0,text:d.text,findings:d.findings,message:`Redacted ${d.findings.length} finding(s)`},r.json);return}throw Error("Invalid safety action. Use 'status', 'check', 'approve', 'audit', or 'redact'.")}finally{u.close()}}if(n==="source"){if((t[1]??"")!=="resolve")throw Error("Invalid source action. Use 'resolve'.");let o=t[2];if(!o)throw Error("Usage: open-knowledge source resolve <source-ref>");let c=await i.resolveSource(o,{purpose:r.purpose,limit:r.limit});E({ok:!0,...c,message:c.resolved?`Resolved ${c.source_ref} (${c.content.chunks_returned}/${c.content.chunks_total} chunks)`:`Source not indexed: ${o}`},r.json);return}if(n==="ingest"){let a=t[1]??"";if(a==="manifest"){let o=t[2];if(!o)throw Error("Usage: open-knowledge ingest manifest <file|s3://bucket/key>");let c=await i.ingestManifest(o);E({ok:!0,...c,message:`Ingested ${c.items_seen} manifest item(s)`},r.json);return}if(a==="source"){let o=t[2];if(!o)throw Error("Usage: open-knowledge ingest source <source-ref>");let c=await i.ingestSource(o,r.purpose);E({ok:!0,...c,message:`Ingested source ${c.source_ref} (${c.chunks_inserted} chunks)`},r.json);return}throw Error("Invalid ingest action. Use 'manifest' or 'source'.")}if(n==="reindex"){if((t[1]??"")!=="outbox")throw Error("Invalid reindex action. Use 'outbox'.");let o=t[2];if(!o)throw Error("Usage: open-knowledge reindex outbox <file|s3://bucket/key>");let c=await i.consumeOutbox(o);E({ok:!0,...c,message:`Consumed ${c.events_seen} outbox event(s)`},r.json);return}if(n==="providers"){let a=t[1]??"status";if(a==="status"){let o=i.providerStatus(),c=o.providers.filter((u)=>u.configured).length;E({ok:!0,...o,message:`${c}/${o.providers.length} provider credential(s) configured`},r.json);return}if(a==="models"){let o=i.modelRegistry();E({ok:!0,models:o,message:`${o.length} model alias(es)`},r.json);return}if(a==="check"){let o=t[2]??"default",c=ke(o,i.config()),u=Te(c),l=tt(u.provider,i.config());E({ok:!0,target:o,model_ref:c,provider:u.provider,model:u.model,credential:l,message:`${u.provider} credentials configured`},r.json);return}throw Error("Invalid providers action. Use 'status', 'models', or 'check'.")}if(_e(s),n==="add"){let a=t[1],o=t[2];if(!a||!o)throw Error("Usage: open-knowledge add <title> <content>");v(s,()=>{let c=x(s),u={id:pe(),title:a,content:o,url:r.url??null,tags:r.tag?[r.tag]:[],created_at:new Date().toISOString(),updated_at:new Date().toISOString()};c.items.push(u),U(s,c),F("info","Item added",{id:u.id,title:u.title}),E({ok:!0,item:u,message:`Added ${u.id}`},r.json)});return}if(n==="list"){if(r.format!==void 0&&r.format!=="table"&&r.format!=="json")throw Error("Invalid --format value for list. Use 'table' or 'json'.");v(s,()=>{let a=x(s),o=Number.isFinite(r.page)&&r.page>0?r.page:1,c=Number.isFinite(r.limit)&&r.limit>0?r.limit:20,u=r.search?String(r.search).toLowerCase():"",l=r.tag?String(r.tag).toLowerCase():"",d=r.format==="table"||!r.json&&!r.format&&gn(r),y=r.json||r.format==="json",m=a.items;if(r.archived)m=m.filter((T)=>T.archived===!0);else if(!r.includeArchived)m=m.filter((T)=>!T.archived);if(u)m=m.filter((T)=>T.title.toLowerCase().includes(u)||T.content.toLowerCase().includes(u));if(l)m=m.filter((T)=>T.tags&&T.tags.map((ce)=>ce.toLowerCase()).includes(l));let{sorted:p,sort:I,direction:k}=hn(m,r),S=(o-1)*c,g=p.slice(S,S+c),O=Math.max(1,Math.ceil(p.length/c));if(y){E({ok:!0,page:o,limit:c,total:p.length,total_pages:O,sort:I,direction:k,items:g},!0);return}if(g.length===0){E(`No items found (search=${u||"none"}, tag=${l||"none"})`,!1);return}if(d){let T=(P)=>P,ce=`${T("ID")} ${T("TITLE")} ${T("CREATED")} ${T("URL")} ${T("TAGS")}`;console.log(ce);for(let P of g)console.log(`${P.id} ${T(P.title)} ${P.created_at} ${P.url?T(P.url):""} ${P.tags?.length?T(`[${P.tags.join(", ")}]`):""}`);console.log(`Page ${o}/${O} | showing ${g.length} of ${p.length} | sort=${I} ${k} | search=${u||"none"} | tag=${l||"none"}`)}else{for(let T of g)console.log(`${T.id} ${T.title} ${T.created_at}${T.url?` ${T.url}`:""}${T.tags?.length?` [${T.tags.join(", ")}]`:""}`);console.log(`Page ${o}/${O} | showing ${g.length} of ${p.length} | sort=${I} ${k} | search=${u||"none"} | tag=${l||"none"}`)}});return}if(n==="get"){Y(r),v(s,()=>{let o=x(s).items.find((c)=>c.id===r.id||c.short_id===r.id);if(!o)throw Error(`Item not found: ${r.id}`);E({ok:!0,item:o,message:`${o.id}: ${o.title}`},r.json)});return}if(n==="update"){Y(r),v(s,()=>{let a=x(s),o=a.items.findIndex((u)=>u.id===r.id||u.short_id===r.id);if(o===-1)throw Error(`Item not found: ${r.id}`);let c=a.items[o];if(r.title!==void 0)c.title=r.title;if(r.content!==void 0)c.content=r.content;if(r.url!==void 0)c.url=r.url;if(r.tag!==void 0){if(c.tags=c.tags||[],!c.tags.map((u)=>u.toLowerCase()).includes(r.tag.toLowerCase()))c.tags.push(r.tag)}c.updated_at=new Date().toISOString(),a.items[o]=c,U(s,a),E({ok:!0,item:c,message:`Updated ${c.id}`},r.json)});return}if(n==="archive"||n==="restore"){Y(r),v(s,()=>{let a=x(s),o=a.items.findIndex((u)=>u.id===r.id||u.short_id===r.id);if(o===-1)throw Error(`Item not found: ${r.id}`);let c=a.items[o];c.archived=n==="archive",c.updated_at=new Date().toISOString(),a.items[o]=c,U(s,a),E({ok:!0,item:c,message:`${n==="archive"?"Archived":"Restored"} ${c.id}`},r.json)});return}if(n==="untag"){if(Y(r),!r.tag)throw Error("Missing required --tag. Example: open-knowledge untag --id <id> -t <tag>");v(s,()=>{let a=x(s),o=a.items.findIndex((l)=>l.id===r.id||l.short_id===r.id);if(o===-1)throw Error(`Item not found: ${r.id}`);let c=a.items[o],u=c.tags?.length??0;c.tags=(c.tags??[]).filter((l)=>l.toLowerCase()!==r.tag.toLowerCase()),c.updated_at=new Date().toISOString(),a.items[o]=c,U(s,a),E({ok:!0,item:c,removed:u-c.tags.length,message:`Removed tag from ${c.id}`},r.json)});return}if(n==="upsert"){let a=r.title??t[1],o=r.content??t[2];v(s,()=>{let c=x(s),u=r.id?c.items.findIndex((y)=>y.id===r.id||y.short_id===r.id):-1,l=new Date().toISOString();if(u===-1){if(!a||!o)throw Error("New item requires title and content. Example: open-knowledge upsert <title> <content> [--id <id>]");let y=r.id??pe(),m={id:y,short_id:Ae(y),title:a,content:o,url:r.url??null,tags:r.tag?[r.tag]:[],metadata:{},archived:!1,created_at:l,updated_at:l};c.items.push(m),U(s,c),E({ok:!0,created:!0,item:m,message:`Upserted ${m.id}`},r.json);return}let d=c.items[u];if(a!==void 0)d.title=a;if(o!==void 0)d.content=o;if(r.url!==void 0)d.url=r.url;if(r.tag!==void 0){if(d.tags=d.tags||[],!d.tags.map((y)=>y.toLowerCase()).includes(r.tag.toLowerCase()))d.tags.push(r.tag)}d.updated_at=l,c.items[u]=d,U(s,c),E({ok:!0,created:!1,item:d,message:`Upserted ${d.id}`},r.json)});return}if(n==="delete"){if(Y(r),!r.yes)throw Error("Refusing delete without --yes. Re-run with: open-knowledge delete --id <id> --yes");v(s,()=>{let a=x(s),o=a.items.length;a.items=a.items.filter((u)=>u.id!==r.id&&u.short_id!==r.id);let c=o!==a.items.length;if(U(s,a),!c)throw Error(`Item not found: ${r.id}`);F("info","Item deleted",{id:r.id}),E({ok:!0,deleted_id:r.id,message:`Deleted ${r.id}`},r.json)});return}if(n==="export"){let a=r.format??"json";if(a!=="json"&&a!=="jsonl")throw Error("Invalid --format. Use 'json' or 'jsonl'.");v(s,()=>{let o=x(s);if(a==="jsonl")for(let c of o.items)console.log(JSON.stringify(c));else E({ok:!0,items:o.items},r.json)});return}if(n==="prune"){if(!r.yes)throw Error("Refusing prune without --yes. Re-run with: open-knowledge prune --yes [--older-than <days>] [--empty]");v(s,()=>{let a=x(s),o=a.items.length;if(r.olderThan!==void 0){let u=new Date;u.setDate(u.getDate()-r.olderThan),a.items=a.items.filter((l)=>new Date(l.created_at)>=u)}if(r.empty)a.items=a.items.filter((u)=>u.content.trim().length>0);let c=o-a.items.length;U(s,a),F("info","Prune completed",{pruned:c,remaining:a.items.length}),E({ok:!0,pruned:c,remaining:a.items.length,message:`Pruned ${c} item(s)`},r.json)});return}if(n==="dedupe"){if(!r.yes)throw Error("Refusing dedupe without --yes. Re-run with: open-knowledge dedupe --yes [--json]");v(s,()=>{let a=x(s),o=new Set,c=a.items.length;a.items=a.items.filter((l)=>{let d=`${l.title}\x00${l.content}`;if(o.has(d))return!1;return o.add(d),!0});let u=c-a.items.length;U(s,a),F("info","Dedupe completed",{removed:u,remaining:a.items.length}),E({ok:!0,removed:u,remaining:a.items.length,message:`Dedupe removed ${u} duplicate(s)`},r.json)});return}if(n==="stats"){v(s,()=>{let a=x(s),o=a.items.filter((k)=>!k.archived),c=o.length,u=a.items.length-c,l=o.filter((k)=>k.url).length,d=o.filter((k)=>k.tags&&k.tags.length>0).length,y=c>0?o.map((k)=>k.created_at).sort()[0]:null,m=c>0?o.map((k)=>k.created_at).sort()[c-1]:null,p={};for(let k of o)for(let S of k.tags||[])p[S]=(p[S]||0)+1;let I=Object.entries(p).sort((k,S)=>S[1]-k[1]).slice(0,5).map(([k,S])=>({tag:k,count:S}));E({ok:!0,total:c,archived:u,with_url:l,with_tags:d,oldest:y,newest:m,top_tags:I,message:`${c} items | ${l} with URL | ${d} with tags`},r.json)});return}let f=fn(t[0]),_=f?` Did you mean '${f}'?`:"";throw F("warn","Unknown command",{input:t[0],suggestion:f}),Error(`Unknown command: ${t[0]}.${_} Run 'open-knowledge --help' for available commands.`)}if(import.meta.main)En(process.argv.slice(2)).catch((e)=>{let t=e instanceof Error?e.message:String(e);F("error","CLI error",{message:t,stack:e instanceof Error?e.stack:void 0}),console.error(`Error: ${t}`),process.exitCode=1});export{fn as suggestCommand,hn as sortItems,En as run,un as parseArgs};
|
|
409
|
+
--empty Remove items with empty content`)}function bn(e){if(e==="add"){console.log("Usage: open-knowledge add <title> <content> [--url <url>] [-t <tag>] [--json]");return}if(e==="list"||e==="ls"){console.log("Usage: open-knowledge list|ls [--format table|json] [-p <page>] [-l <limit>] [-s <search>] [-t <tag>] [--sort created|title] [--desc] [--json]");return}if(e==="get"){console.log("Usage: open-knowledge get --id <id> [--json]");return}if(e==="update"||e==="edit"){console.log("Usage: open-knowledge update|edit --id <id> [--title <title>] [--content <content>] [--url <url>] [-t <tag>] [--json]");return}if(e==="archive"){console.log("Usage: open-knowledge archive --id <id> [--json]");return}if(e==="restore"||e==="unarchive"){console.log("Usage: open-knowledge restore|unarchive --id <id> [--json]");return}if(e==="upsert"){console.log("Usage: open-knowledge upsert [title] [content] [--id <id>] [--title <title>] [--content <content>] [--url <url>] [-t <tag>] [--json]");return}if(e==="untag"){console.log("Usage: open-knowledge untag --id <id> -t <tag> [--json]");return}if(e==="delete"||e==="rm"){console.log("Usage: open-knowledge delete|rm --id <id> -y [--json]");return}if(e==="export"){console.log("Usage: open-knowledge export [--format jsonl] [--json]");return}if(e==="prune"){console.log("Usage: open-knowledge prune --yes [--older-than <days>] [--empty] [--json]");return}if(e==="dedupe"){console.log("Usage: open-knowledge dedupe --yes [--json]");return}if(e==="stats"){console.log("Usage: open-knowledge stats [--json]");return}if(e==="paths"){console.log("Usage: open-knowledge paths [--scope local|global|project] [--json]");return}if(e==="storage"){console.log("Usage: open-knowledge storage status|validate [--scope local|global|project] [--json]");return}if(e==="db"){console.log("Usage: open-knowledge db init|stats [--scope local|global|project] [--json]");return}if(e==="wiki"){console.log("Usage: open-knowledge wiki init [--scope local|global|project] [--json]");return}if(e==="source"){console.log("Usage: open-knowledge source resolve <source-ref> [--purpose knowledge_answer|knowledge_index] [--limit <n>] [--scope local|global|project] [--json]");return}if(e==="ingest"){console.log("Usage: open-knowledge ingest manifest <file|s3://bucket/key> | source <source-ref> [--purpose knowledge_index] [--scope local|global|project] [--json]");return}if(e==="reindex"){console.log("Usage: open-knowledge reindex outbox <file|s3://bucket/key> [--scope local|global|project] [--json]");return}if(e==="providers"){console.log("Usage: open-knowledge providers status|models|check [provider|model-alias] [--scope local|global|project] [--json]");return}if(e==="safety"){console.log("Usage: open-knowledge safety status|check|approve|audit|redact [args] [--scope local|global|project] [--json]");return}kn()}function wn(e){if(e.noColor||process.env.NO_COLOR)return!1;if(process.env.FORCE_COLOR)return!0;return process.stdout.isTTY===!0}function E(e,t,r){if(t){console.log(JSON.stringify(e,null,2));return}if(typeof e==="string"){console.log(e);return}console.log(e.message??JSON.stringify(e,null,2))}function Y(e){if(!e.id)throw Error("Missing required --id. Example: open-knowledge get --id <id>")}function Sn(e,t){let r=t.sort??"created";if(r!=="created"&&r!=="title")throw Error("Invalid --sort value. Use 'created' or 'title'.");let n=[...e].sort((i,s)=>{if(r==="title")return i.title.localeCompare(s.title);return i.created_at.localeCompare(s.created_at)});if(t.desc)n.reverse();return{sorted:n,sort:r,direction:t.desc?"desc":"asc"}}async function xn(e){let{positional:t,flags:r}=yn(e);if(M("debug","CLI invoked",{command:t[0],flags:{json:r.json,store:r.store}}),r.version){console.log(r.json?JSON.stringify({name:G.name,version:G.version},null,2):`${G.name} ${G.version}`);return}if(r.completions){let a=r.completions;if(a==="bash")console.log('_open_knowledge() { local cur; cur="${COMP_WORDS[COMP_CWORD]}"; COMPREPLY=($(compgen -W "add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex providers safety help ls rm edit unarchive --json --yes --help --version --desc --page --limit --search --sort --id --store --title --content --url --tag --format --completions --purpose --no-color --scope --archived --include-archived" -- "$cur")); }; complete -F _open_knowledge open-knowledge');else if(a==="zsh")console.log(`#compdef open-knowledge
|
|
410
|
+
_open_knowledge() { _arguments -C "1: :(add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex providers safety help ls rm edit unarchive)" "(--json)--json" "(--yes)-y" "(--help)--help" "(--version)--version" "(--desc)--desc" "(--archived)--archived" "(--include-archived)--include-archived" "(-p --page)"{-p,--page}"[page number]:number:" "(-l --limit)"{-l,--limit}"[items per page]:number:" "(-s --search)"{-s,--search}"[search text]:text:" "(--sort)--sort"{created,title}:" "(--id)--id[item id]:id:" "(--store)--store[store path]:path:" "(--title)--title[new title]:" "(--content)--content[new content]:" "(--url)--url[source url]:" "(-t --tag)"{-t,--tag}"[tag]:tag:" "(--format)--format[json|jsonl]:" "(--completions)--completions[output completions]:shell:(bash zsh fish):" "(--purpose)--purpose[purpose]:" "(--no-color)--no-color[disable color]" "(--scope)--scope"{local,global,project}:" }; _open_knowledge`);else if(a==="fish")console.log('complete -c open-knowledge -f; complete -c open-knowledge -a "add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex providers safety help ls rm edit unarchive"; complete -c open-knowledge -l json; complete -c open-knowledge -l yes -s y; complete -c open-knowledge -l help -s h; complete -c open-knowledge -l version -s v; complete -c open-knowledge -l desc; complete -c open-knowledge -l archived; complete -c open-knowledge -l include-archived; complete -c open-knowledge -s p -l page; complete -c open-knowledge -s l -l limit; complete -c open-knowledge -s s -l search; complete -c open-knowledge -l sort; complete -c open-knowledge -l id; complete -c open-knowledge -l store; complete -c open-knowledge -l title; complete -c open-knowledge -l content; complete -c open-knowledge -l url; complete -c open-knowledge -s t -l tag; complete -c open-knowledge -l format; complete -c open-knowledge -l completions; complete -c open-knowledge -l purpose; complete -c open-knowledge -l no-color; complete -c open-knowledge -l scope -a "local global project"');else throw Error("Invalid --completions value. Use 'bash', 'zsh', or 'fish'.");return}let n=En(t[0]);if(!n||r.help||n==="help"){bn(t[1]);return}let i=pt({scope:r.scope}),s=r.store;if(!s)if(r.scope==="project"||r.scope==="local")s=i.jsonStorePath();else s=fe();if(n==="paths"){E(i.paths(),r.json);return}if(n==="storage"){let a=t[1]??"status";if(a==="status"){let o=i.storageContract(),c=i.validateStorage();E({ok:c.ok,...o,validation:c,message:`${o.storage_type} artifact storage at ${o.artifact_store.uri_prefix}`},r.json);return}if(a==="validate"){let o=i.validateStorage();E({ok:o.ok,validation:o,message:o.ok?"Storage contract valid":`Storage contract invalid: ${o.errors.join("; ")}`},r.json);return}throw Error("Invalid storage action. Use 'status' or 'validate'.")}if(n==="db"){let a=t[1]??"init";if(a!=="init"&&a!=="stats")throw Error("Invalid db action. Use 'init' or 'stats'.");if(a==="init"){let c=i.initDb();E({ok:!0,...c,message:`Initialized ${c.path}`},r.json);return}let o=i.dbStats();E({ok:!0,path:i.workspace.knowledgeDbPath,...o,message:`knowledge.db schema v${o.schema_version}`},r.json);return}if(n==="wiki"){if((t[1]??"init")!=="init")throw Error("Invalid wiki action. Use 'init'.");let o=await i.initWiki();E({ok:!0,...o,message:`Initialized wiki layout in ${i.workspace.home}`},r.json);return}if(n==="safety"){let a=t[1]??"status",o=i.ensureWorkspace(),c=i.safetyPolicy();i.initDb();let u=A(o.knowledgeDbPath);try{if(a==="status"){E({ok:!0,mode:c.mode,workspace:o.home,allow_write_roots:c.allowWriteRoots,read_only_source_access:c.readOnlySourceAccess,network:c.network,redaction:c.redaction,approvals:c.approvals,message:`Safety policy: ${c.mode}`},r.json);return}if(a==="check"){let d=t[2]??"generated_write",l=t[3]??null,m;try{if(d==="web_search")te(c),m={action:d,target_uri:l,approval_required:!1,approved:!0,decision:"allow"};else if(d==="s3_read"){if(!l)throw Error("safety check s3_read requires an s3:// target.");j(l,c),m={action:d,target_uri:l,approval_required:!1,approved:!0,decision:"allow"}}else m=ze(u,c,d,l);w(u,{event_type:"safety_check",action:d,target_uri:l,decision:m.decision==="allow"?"allow":"requires_approval",metadata:m}),E({ok:!0,...m,message:`Safety check ${m.decision}`},r.json);return}catch(T){throw w(u,{event_type:"safety_check",action:d,target_uri:l,decision:"deny",metadata:{error:T instanceof Error?T.message:String(T)}}),T}}if(a==="approve"){let d=t[2]??"generated_write",l=t[3]??null,m=Be(u,{action:d,target_uri:l,reason:"local-cli approval",metadata:{scope:r.scope??"global"}});w(u,{event_type:"approval",action:d,target_uri:l,decision:"allow",metadata:{approval_id:m.id}}),E({ok:!0,...m,action:d,target_uri:l,message:`Approved ${d}`},r.json);return}if(a==="audit"){let d=u.query("SELECT id, event_type, action, target_uri, decision, metadata_json, created_at FROM audit_events ORDER BY created_at DESC LIMIT 50").all().map((l)=>({id:l.id,event_type:l.event_type,action:l.action,target_uri:l.target_uri,decision:l.decision,metadata:JSON.parse(l.metadata_json),created_at:l.created_at}));E({ok:!0,events:d,message:`${d.length} audit event(s)`},r.json);return}if(a==="redact"){let d=t.slice(2).join(" ");if(!d)throw Error("Usage: open-knowledge safety redact <text>");let l=re(d,c);if(l.findings.length>0)ne(u,{source_uri:"safety://redact",findings:l.findings,metadata:{command:"safety redact"}});w(u,{event_type:"redaction",action:"safety_redact",target_uri:"safety://redact",decision:l.findings.length>0?"redacted":"allow",metadata:{findings:l.findings.length}}),E({ok:!0,text:l.text,findings:l.findings,message:`Redacted ${l.findings.length} finding(s)`},r.json);return}throw Error("Invalid safety action. Use 'status', 'check', 'approve', 'audit', or 'redact'.")}finally{u.close()}}if(n==="source"){if((t[1]??"")!=="resolve")throw Error("Invalid source action. Use 'resolve'.");let o=t[2];if(!o)throw Error("Usage: open-knowledge source resolve <source-ref>");let c=await i.resolveSource(o,{purpose:r.purpose,limit:r.limit});E({ok:!0,...c,message:c.resolved?`Resolved ${c.source_ref} (${c.content.chunks_returned}/${c.content.chunks_total} chunks)`:`Source not indexed: ${o}`},r.json);return}if(n==="ingest"){let a=t[1]??"";if(a==="manifest"){let o=t[2];if(!o)throw Error("Usage: open-knowledge ingest manifest <file|s3://bucket/key>");let c=await i.ingestManifest(o);E({ok:!0,...c,message:`Ingested ${c.items_seen} manifest item(s)`},r.json);return}if(a==="source"){let o=t[2];if(!o)throw Error("Usage: open-knowledge ingest source <source-ref>");let c=await i.ingestSource(o,r.purpose);E({ok:!0,...c,message:`Ingested source ${c.source_ref} (${c.chunks_inserted} chunks)`},r.json);return}throw Error("Invalid ingest action. Use 'manifest' or 'source'.")}if(n==="reindex"){if((t[1]??"")!=="outbox")throw Error("Invalid reindex action. Use 'outbox'.");let o=t[2];if(!o)throw Error("Usage: open-knowledge reindex outbox <file|s3://bucket/key>");let c=await i.consumeOutbox(o);E({ok:!0,...c,message:`Consumed ${c.events_seen} outbox event(s)`},r.json);return}if(n==="providers"){let a=t[1]??"status";if(a==="status"){let o=i.providerStatus(),c=o.providers.filter((u)=>u.configured).length;E({ok:!0,...o,message:`${c}/${o.providers.length} provider credential(s) configured`},r.json);return}if(a==="models"){let o=i.modelRegistry();E({ok:!0,models:o,message:`${o.length} model alias(es)`},r.json);return}if(a==="check"){let o=t[2]??"default",c=we(o,i.config()),u=be(c),d=it(u.provider,i.config());E({ok:!0,target:o,model_ref:c,provider:u.provider,model:u.model,credential:d,message:`${u.provider} credentials configured`},r.json);return}throw Error("Invalid providers action. Use 'status', 'models', or 'check'.")}if(ge(s),n==="add"){let a=t[1],o=t[2];if(!a||!o)throw Error("Usage: open-knowledge add <title> <content>");O(s,()=>{let c=v(s),u={id:pe(),title:a,content:o,url:r.url??null,tags:r.tag?[r.tag]:[],created_at:new Date().toISOString(),updated_at:new Date().toISOString()};c.items.push(u),U(s,c),M("info","Item added",{id:u.id,title:u.title}),E({ok:!0,item:u,message:`Added ${u.id}`},r.json)});return}if(n==="list"){if(r.format!==void 0&&r.format!=="table"&&r.format!=="json")throw Error("Invalid --format value for list. Use 'table' or 'json'.");O(s,()=>{let a=v(s),o=Number.isFinite(r.page)&&r.page>0?r.page:1,c=Number.isFinite(r.limit)&&r.limit>0?r.limit:20,u=r.search?String(r.search).toLowerCase():"",d=r.tag?String(r.tag).toLowerCase():"",l=r.format==="table"||!r.json&&!r.format&&wn(r),m=r.json||r.format==="json",T=a.items;if(r.archived)T=T.filter((h)=>h.archived===!0);else if(!r.includeArchived)T=T.filter((h)=>!h.archived);if(u)T=T.filter((h)=>h.title.toLowerCase().includes(u)||h.content.toLowerCase().includes(u));if(d)T=T.filter((h)=>h.tags&&h.tags.map((B)=>B.toLowerCase()).includes(d));let{sorted:p,sort:I,direction:k}=Sn(T,r),S=(o-1)*c,g=p.slice(S,S+c),R=Math.max(1,Math.ceil(p.length/c));if(m){E({ok:!0,page:o,limit:c,total:p.length,total_pages:R,sort:I,direction:k,items:g},!0);return}if(g.length===0){E(`No items found (search=${u||"none"}, tag=${d||"none"})`,!1);return}if(l){let h=(P)=>P,B=`${h("ID")} ${h("TITLE")} ${h("CREATED")} ${h("URL")} ${h("TAGS")}`;console.log(B);for(let P of g)console.log(`${P.id} ${h(P.title)} ${P.created_at} ${P.url?h(P.url):""} ${P.tags?.length?h(`[${P.tags.join(", ")}]`):""}`);console.log(`Page ${o}/${R} | showing ${g.length} of ${p.length} | sort=${I} ${k} | search=${u||"none"} | tag=${d||"none"}`)}else{for(let h of g)console.log(`${h.id} ${h.title} ${h.created_at}${h.url?` ${h.url}`:""}${h.tags?.length?` [${h.tags.join(", ")}]`:""}`);console.log(`Page ${o}/${R} | showing ${g.length} of ${p.length} | sort=${I} ${k} | search=${u||"none"} | tag=${d||"none"}`)}});return}if(n==="get"){Y(r),O(s,()=>{let o=v(s).items.find((c)=>c.id===r.id||c.short_id===r.id);if(!o)throw Error(`Item not found: ${r.id}`);E({ok:!0,item:o,message:`${o.id}: ${o.title}`},r.json)});return}if(n==="update"){Y(r),O(s,()=>{let a=v(s),o=a.items.findIndex((u)=>u.id===r.id||u.short_id===r.id);if(o===-1)throw Error(`Item not found: ${r.id}`);let c=a.items[o];if(r.title!==void 0)c.title=r.title;if(r.content!==void 0)c.content=r.content;if(r.url!==void 0)c.url=r.url;if(r.tag!==void 0){if(c.tags=c.tags||[],!c.tags.map((u)=>u.toLowerCase()).includes(r.tag.toLowerCase()))c.tags.push(r.tag)}c.updated_at=new Date().toISOString(),a.items[o]=c,U(s,a),E({ok:!0,item:c,message:`Updated ${c.id}`},r.json)});return}if(n==="archive"||n==="restore"){Y(r),O(s,()=>{let a=v(s),o=a.items.findIndex((u)=>u.id===r.id||u.short_id===r.id);if(o===-1)throw Error(`Item not found: ${r.id}`);let c=a.items[o];c.archived=n==="archive",c.updated_at=new Date().toISOString(),a.items[o]=c,U(s,a),E({ok:!0,item:c,message:`${n==="archive"?"Archived":"Restored"} ${c.id}`},r.json)});return}if(n==="untag"){if(Y(r),!r.tag)throw Error("Missing required --tag. Example: open-knowledge untag --id <id> -t <tag>");O(s,()=>{let a=v(s),o=a.items.findIndex((d)=>d.id===r.id||d.short_id===r.id);if(o===-1)throw Error(`Item not found: ${r.id}`);let c=a.items[o],u=c.tags?.length??0;c.tags=(c.tags??[]).filter((d)=>d.toLowerCase()!==r.tag.toLowerCase()),c.updated_at=new Date().toISOString(),a.items[o]=c,U(s,a),E({ok:!0,item:c,removed:u-c.tags.length,message:`Removed tag from ${c.id}`},r.json)});return}if(n==="upsert"){let a=r.title??t[1],o=r.content??t[2];O(s,()=>{let c=v(s),u=r.id?c.items.findIndex((m)=>m.id===r.id||m.short_id===r.id):-1,d=new Date().toISOString();if(u===-1){if(!a||!o)throw Error("New item requires title and content. Example: open-knowledge upsert <title> <content> [--id <id>]");let m=r.id??pe(),T={id:m,short_id:Ie(m),title:a,content:o,url:r.url??null,tags:r.tag?[r.tag]:[],metadata:{},archived:!1,created_at:d,updated_at:d};c.items.push(T),U(s,c),E({ok:!0,created:!0,item:T,message:`Upserted ${T.id}`},r.json);return}let l=c.items[u];if(a!==void 0)l.title=a;if(o!==void 0)l.content=o;if(r.url!==void 0)l.url=r.url;if(r.tag!==void 0){if(l.tags=l.tags||[],!l.tags.map((m)=>m.toLowerCase()).includes(r.tag.toLowerCase()))l.tags.push(r.tag)}l.updated_at=d,c.items[u]=l,U(s,c),E({ok:!0,created:!1,item:l,message:`Upserted ${l.id}`},r.json)});return}if(n==="delete"){if(Y(r),!r.yes)throw Error("Refusing delete without --yes. Re-run with: open-knowledge delete --id <id> --yes");O(s,()=>{let a=v(s),o=a.items.length;a.items=a.items.filter((u)=>u.id!==r.id&&u.short_id!==r.id);let c=o!==a.items.length;if(U(s,a),!c)throw Error(`Item not found: ${r.id}`);M("info","Item deleted",{id:r.id}),E({ok:!0,deleted_id:r.id,message:`Deleted ${r.id}`},r.json)});return}if(n==="export"){let a=r.format??"json";if(a!=="json"&&a!=="jsonl")throw Error("Invalid --format. Use 'json' or 'jsonl'.");O(s,()=>{let o=v(s);if(a==="jsonl")for(let c of o.items)console.log(JSON.stringify(c));else E({ok:!0,items:o.items},r.json)});return}if(n==="prune"){if(!r.yes)throw Error("Refusing prune without --yes. Re-run with: open-knowledge prune --yes [--older-than <days>] [--empty]");O(s,()=>{let a=v(s),o=a.items.length;if(r.olderThan!==void 0){let u=new Date;u.setDate(u.getDate()-r.olderThan),a.items=a.items.filter((d)=>new Date(d.created_at)>=u)}if(r.empty)a.items=a.items.filter((u)=>u.content.trim().length>0);let c=o-a.items.length;U(s,a),M("info","Prune completed",{pruned:c,remaining:a.items.length}),E({ok:!0,pruned:c,remaining:a.items.length,message:`Pruned ${c} item(s)`},r.json)});return}if(n==="dedupe"){if(!r.yes)throw Error("Refusing dedupe without --yes. Re-run with: open-knowledge dedupe --yes [--json]");O(s,()=>{let a=v(s),o=new Set,c=a.items.length;a.items=a.items.filter((d)=>{let l=`${d.title}\x00${d.content}`;if(o.has(l))return!1;return o.add(l),!0});let u=c-a.items.length;U(s,a),M("info","Dedupe completed",{removed:u,remaining:a.items.length}),E({ok:!0,removed:u,remaining:a.items.length,message:`Dedupe removed ${u} duplicate(s)`},r.json)});return}if(n==="stats"){O(s,()=>{let a=v(s),o=a.items.filter((k)=>!k.archived),c=o.length,u=a.items.length-c,d=o.filter((k)=>k.url).length,l=o.filter((k)=>k.tags&&k.tags.length>0).length,m=c>0?o.map((k)=>k.created_at).sort()[0]:null,T=c>0?o.map((k)=>k.created_at).sort()[c-1]:null,p={};for(let k of o)for(let S of k.tags||[])p[S]=(p[S]||0)+1;let I=Object.entries(p).sort((k,S)=>S[1]-k[1]).slice(0,5).map(([k,S])=>({tag:k,count:S}));E({ok:!0,total:c,archived:u,with_url:d,with_tags:l,oldest:m,newest:T,top_tags:I,message:`${c} items | ${d} with URL | ${l} with tags`},r.json)});return}let _=Tn(t[0]),f=_?` Did you mean '${_}'?`:"";throw M("warn","Unknown command",{input:t[0],suggestion:_}),Error(`Unknown command: ${t[0]}.${f} Run 'open-knowledge --help' for available commands.`)}if(import.meta.main)xn(process.argv.slice(2)).catch((e)=>{let t=e instanceof Error?e.message:String(e);M("error","CLI error",{message:t,stack:e instanceof Error?e.stack:void 0}),console.error(`Error: ${t}`),process.exitCode=1});export{Tn as suggestCommand,Sn as sortItems,xn as run,yn as parseArgs};
|
|
@@ -89,6 +89,21 @@ file revisions, hashes, extraction state, permissions, and storage metadata.
|
|
|
89
89
|
Direct `s3://`, `file://`, and `https://` refs are useful for bootstrap and
|
|
90
90
|
interop, but should be normalized into source records when possible.
|
|
91
91
|
|
|
92
|
+
## Provenance Contract
|
|
93
|
+
|
|
94
|
+
Every durable search/wiki artifact should carry a provenance object in metadata:
|
|
95
|
+
`source_owner`, `source_ref`, `source_uri`, `source_kind`, `source_revision_id`,
|
|
96
|
+
`revision`, `hash`, optional `chunk_id`, offsets, `read_only`,
|
|
97
|
+
`citation_required`, resolver name, and stale status. For generated artifacts
|
|
98
|
+
that are not source-backed yet, metadata still records that `open-files` owns
|
|
99
|
+
source bytes and that citations are required before durable facts are filed.
|
|
100
|
+
|
|
101
|
+
`wiki init` now catalogs the starter `wiki/README.md` and `indexes/root.md`
|
|
102
|
+
records with generated-artifact provenance. Source ingestion stores source
|
|
103
|
+
provenance on every chunk, and source resolution returns that provenance with
|
|
104
|
+
chunks and citations so semantic search can pass through trustworthy evidence
|
|
105
|
+
without reconstructing it later.
|
|
106
|
+
|
|
92
107
|
## Resolver Boundary
|
|
93
108
|
|
|
94
109
|
The local resolver is exposed through:
|
package/package.json
CHANGED
package/src/knowledge-db.ts
CHANGED
package/src/manifest-ingest.ts
CHANGED
|
@@ -4,6 +4,7 @@ import { basename } from 'node:path';
|
|
|
4
4
|
import type { Database } from 'bun:sqlite';
|
|
5
5
|
import { migrateKnowledgeDb, openKnowledgeDb } from './knowledge-db';
|
|
6
6
|
import { parseSourceRef, type SourceRef } from './source-ref';
|
|
7
|
+
import { sourceProvenance, withProvenance } from './provenance';
|
|
7
8
|
import type { KnowledgeConfig } from './workspace';
|
|
8
9
|
import {
|
|
9
10
|
assertS3ReadAllowed,
|
|
@@ -382,15 +383,31 @@ function insertChunks(db: Database, sourceRevisionId: string, item: NormalizedMa
|
|
|
382
383
|
const chunks = chunkText(redacted.text, maxChars, overlapChars);
|
|
383
384
|
for (const chunk of chunks) {
|
|
384
385
|
const chunkId = stableId('chk', `${sourceRevisionId}\u0000${chunk.ordinal}\u0000${chunk.text}`);
|
|
385
|
-
const
|
|
386
|
+
const provenance = sourceProvenance({
|
|
386
387
|
source_ref: item.sourceRef,
|
|
387
388
|
source_uri: item.sourceUri,
|
|
389
|
+
source_kind: item.kind,
|
|
390
|
+
source_revision_id: sourceRevisionId,
|
|
391
|
+
revision: item.revision,
|
|
392
|
+
hash: item.hash,
|
|
393
|
+
chunk_id: chunkId,
|
|
394
|
+
start_offset: chunk.startOffset,
|
|
395
|
+
end_offset: chunk.endOffset,
|
|
396
|
+
status: item.status,
|
|
397
|
+
resolver: 'open-files-read-only',
|
|
398
|
+
});
|
|
399
|
+
const metadata = withProvenance({
|
|
400
|
+
source_ref: item.sourceRef,
|
|
401
|
+
source_uri: item.sourceUri,
|
|
402
|
+
source_kind: item.kind,
|
|
403
|
+
source_revision_id: sourceRevisionId,
|
|
404
|
+
revision: item.revision,
|
|
388
405
|
hash: item.hash,
|
|
389
406
|
status: item.status,
|
|
390
407
|
path: asString(item.raw.path) ?? null,
|
|
391
408
|
mime: asString(item.raw.mime) ?? asString(item.raw.content_type) ?? null,
|
|
392
409
|
size: asNumber(item.raw.size) ?? null,
|
|
393
|
-
};
|
|
410
|
+
}, provenance);
|
|
394
411
|
db.run(
|
|
395
412
|
`INSERT INTO chunks (id, source_revision_id, kind, ordinal, text, token_count, start_offset, end_offset, metadata_json, created_at)
|
|
396
413
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
export interface KnowledgeProvenance {
|
|
2
|
+
source_owner: 'open-files';
|
|
3
|
+
source_ref: string | null;
|
|
4
|
+
source_uri: string | null;
|
|
5
|
+
source_kind: string | null;
|
|
6
|
+
source_revision_id: string | null;
|
|
7
|
+
revision: string | null;
|
|
8
|
+
hash: string | null;
|
|
9
|
+
chunk_id: string | null;
|
|
10
|
+
start_offset: number | null;
|
|
11
|
+
end_offset: number | null;
|
|
12
|
+
status: string | null;
|
|
13
|
+
read_only: true;
|
|
14
|
+
citation_required: boolean;
|
|
15
|
+
resolver: string | null;
|
|
16
|
+
stale: boolean;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export interface GeneratedArtifactProvenance {
|
|
20
|
+
source_owner: 'open-files';
|
|
21
|
+
generated_from: string;
|
|
22
|
+
artifact_key: string;
|
|
23
|
+
source_refs: string[];
|
|
24
|
+
read_only_sources: true;
|
|
25
|
+
citation_required: boolean;
|
|
26
|
+
raw_source_bytes_stored_in_open_knowledge: false;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export interface SourceProvenanceInput {
|
|
30
|
+
source_ref?: string | null;
|
|
31
|
+
source_uri?: string | null;
|
|
32
|
+
source_kind?: string | null;
|
|
33
|
+
source_revision_id?: string | null;
|
|
34
|
+
revision?: string | null;
|
|
35
|
+
hash?: string | null;
|
|
36
|
+
chunk_id?: string | null;
|
|
37
|
+
start_offset?: number | null;
|
|
38
|
+
end_offset?: number | null;
|
|
39
|
+
status?: string | null;
|
|
40
|
+
resolver?: string | null;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export function isStaleStatus(status: string | null | undefined): boolean {
|
|
44
|
+
return ['deleted', 'stale', 'invalidated', 'reindex_required'].includes((status ?? '').toLowerCase());
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export function sourceProvenance(input: SourceProvenanceInput): KnowledgeProvenance {
|
|
48
|
+
const status = input.status ?? null;
|
|
49
|
+
return {
|
|
50
|
+
source_owner: 'open-files',
|
|
51
|
+
source_ref: input.source_ref ?? null,
|
|
52
|
+
source_uri: input.source_uri ?? null,
|
|
53
|
+
source_kind: input.source_kind ?? null,
|
|
54
|
+
source_revision_id: input.source_revision_id ?? null,
|
|
55
|
+
revision: input.revision ?? null,
|
|
56
|
+
hash: input.hash ?? null,
|
|
57
|
+
chunk_id: input.chunk_id ?? null,
|
|
58
|
+
start_offset: input.start_offset ?? null,
|
|
59
|
+
end_offset: input.end_offset ?? null,
|
|
60
|
+
status,
|
|
61
|
+
read_only: true,
|
|
62
|
+
citation_required: true,
|
|
63
|
+
resolver: input.resolver ?? null,
|
|
64
|
+
stale: isStaleStatus(status),
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
export function generatedArtifactProvenance(input: {
|
|
69
|
+
generated_from: string;
|
|
70
|
+
artifact_key: string;
|
|
71
|
+
source_refs?: string[];
|
|
72
|
+
citation_required?: boolean;
|
|
73
|
+
}): GeneratedArtifactProvenance {
|
|
74
|
+
return {
|
|
75
|
+
source_owner: 'open-files',
|
|
76
|
+
generated_from: input.generated_from,
|
|
77
|
+
artifact_key: input.artifact_key,
|
|
78
|
+
source_refs: input.source_refs ?? [],
|
|
79
|
+
read_only_sources: true,
|
|
80
|
+
citation_required: input.citation_required ?? true,
|
|
81
|
+
raw_source_bytes_stored_in_open_knowledge: false,
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
export function withProvenance<T extends Record<string, unknown>>(
|
|
86
|
+
metadata: T,
|
|
87
|
+
provenance: KnowledgeProvenance | GeneratedArtifactProvenance,
|
|
88
|
+
): T & { provenance: KnowledgeProvenance | GeneratedArtifactProvenance } {
|
|
89
|
+
return {
|
|
90
|
+
...metadata,
|
|
91
|
+
provenance,
|
|
92
|
+
};
|
|
93
|
+
}
|
package/src/service.ts
CHANGED
|
@@ -13,7 +13,7 @@ import {
|
|
|
13
13
|
type StorageContract,
|
|
14
14
|
type StorageValidationResult,
|
|
15
15
|
} from './storage-contract';
|
|
16
|
-
import { initializeWikiLayout } from './wiki-layout';
|
|
16
|
+
import { initializeWikiLayout, recordWikiLayoutCatalog } from './wiki-layout';
|
|
17
17
|
import {
|
|
18
18
|
ensureKnowledgeWorkspace,
|
|
19
19
|
readKnowledgeConfig,
|
|
@@ -128,6 +128,7 @@ export class KnowledgeService {
|
|
|
128
128
|
const db = openKnowledgeDb(workspace.knowledgeDbPath);
|
|
129
129
|
try {
|
|
130
130
|
recordStorageObjects(db, result.artifacts);
|
|
131
|
+
recordWikiLayoutCatalog(db, result.artifacts);
|
|
131
132
|
} finally {
|
|
132
133
|
db.close();
|
|
133
134
|
}
|
package/src/source-resolver.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import type { Database } from 'bun:sqlite';
|
|
2
2
|
import { migrateKnowledgeDb, openKnowledgeDb } from './knowledge-db';
|
|
3
|
+
import { sourceProvenance, type KnowledgeProvenance } from './provenance';
|
|
3
4
|
import { catalogSourceUriForRef, parseSourceRef, revisionIdForSourceRef } from './source-ref';
|
|
4
5
|
import { assertWriteAllowed, recordAuditEvent, type SafetyPolicy } from './safety';
|
|
5
6
|
|
|
@@ -38,6 +39,7 @@ export interface ResolvedSourceChunk {
|
|
|
38
39
|
end_offset: number | null;
|
|
39
40
|
metadata: Record<string, unknown>;
|
|
40
41
|
evidence: SourceResolverEvidence;
|
|
42
|
+
provenance: KnowledgeProvenance;
|
|
41
43
|
}
|
|
42
44
|
|
|
43
45
|
export interface ResolvedSourceCitation {
|
|
@@ -48,6 +50,7 @@ export interface ResolvedSourceCitation {
|
|
|
48
50
|
start_offset: number | null;
|
|
49
51
|
end_offset: number | null;
|
|
50
52
|
evidence: SourceResolverEvidence;
|
|
53
|
+
provenance: KnowledgeProvenance;
|
|
51
54
|
}
|
|
52
55
|
|
|
53
56
|
export interface SourceResolveResult {
|
|
@@ -326,6 +329,19 @@ export async function resolveOpenFilesSource(options: SourceResolveOptions): Pro
|
|
|
326
329
|
end_offset: row.end_offset,
|
|
327
330
|
resolved_at: resolvedAt,
|
|
328
331
|
};
|
|
332
|
+
const provenance = sourceProvenance({
|
|
333
|
+
source_ref: evidence.source_ref,
|
|
334
|
+
source_uri: evidence.source_uri,
|
|
335
|
+
source_kind: source.kind,
|
|
336
|
+
source_revision_id: evidence.source_revision_id,
|
|
337
|
+
revision: evidence.revision,
|
|
338
|
+
hash: evidence.hash,
|
|
339
|
+
chunk_id: row.id,
|
|
340
|
+
start_offset: row.start_offset,
|
|
341
|
+
end_offset: row.end_offset,
|
|
342
|
+
status: metadataString(metadata, ['status']),
|
|
343
|
+
resolver: evidence.resolver,
|
|
344
|
+
});
|
|
329
345
|
return {
|
|
330
346
|
id: row.id,
|
|
331
347
|
kind: row.kind,
|
|
@@ -336,6 +352,7 @@ export async function resolveOpenFilesSource(options: SourceResolveOptions): Pro
|
|
|
336
352
|
end_offset: row.end_offset,
|
|
337
353
|
metadata,
|
|
338
354
|
evidence,
|
|
355
|
+
provenance,
|
|
339
356
|
};
|
|
340
357
|
});
|
|
341
358
|
|
|
@@ -347,6 +364,7 @@ export async function resolveOpenFilesSource(options: SourceResolveOptions): Pro
|
|
|
347
364
|
start_offset: chunk.start_offset,
|
|
348
365
|
end_offset: chunk.end_offset,
|
|
349
366
|
evidence: chunk.evidence,
|
|
367
|
+
provenance: chunk.provenance,
|
|
350
368
|
}));
|
|
351
369
|
|
|
352
370
|
recordAuditEvent(db, {
|
package/src/wiki-layout.ts
CHANGED
|
@@ -1,4 +1,7 @@
|
|
|
1
|
+
import { createHash } from 'node:crypto';
|
|
2
|
+
import type { Database } from 'bun:sqlite';
|
|
1
3
|
import type { ArtifactStore } from './artifact-store';
|
|
4
|
+
import { generatedArtifactProvenance, type GeneratedArtifactProvenance } from './provenance';
|
|
2
5
|
import {
|
|
3
6
|
artifactKindForKey,
|
|
4
7
|
hashArtifactBody,
|
|
@@ -14,6 +17,13 @@ export interface WikiLayoutInitResult {
|
|
|
14
17
|
written: string[];
|
|
15
18
|
}
|
|
16
19
|
|
|
20
|
+
interface CatalogArtifact {
|
|
21
|
+
key: string;
|
|
22
|
+
uri: string;
|
|
23
|
+
hash?: string;
|
|
24
|
+
metadata?: Record<string, unknown>;
|
|
25
|
+
}
|
|
26
|
+
|
|
17
27
|
function todayParts(now: Date): { year: string; month: string; day: string } {
|
|
18
28
|
const year = String(now.getUTCFullYear());
|
|
19
29
|
const month = String(now.getUTCMonth() + 1).padStart(2, '0');
|
|
@@ -21,6 +31,10 @@ function todayParts(now: Date): { year: string; month: string; day: string } {
|
|
|
21
31
|
return { year, month, day };
|
|
22
32
|
}
|
|
23
33
|
|
|
34
|
+
function stableId(prefix: string, value: string): string {
|
|
35
|
+
return `${prefix}_${createHash('sha256').update(value).digest('hex').slice(0, 20)}`;
|
|
36
|
+
}
|
|
37
|
+
|
|
24
38
|
export function agentSchemaTemplate(): string {
|
|
25
39
|
return `# Knowledge Agent Schema v1
|
|
26
40
|
|
|
@@ -106,6 +120,13 @@ export async function initializeWikiLayout(store: ArtifactStore, now = new Date(
|
|
|
106
120
|
uri: result.uri,
|
|
107
121
|
kind: artifactKindForKey(entry.key),
|
|
108
122
|
content_type: entry.content_type,
|
|
123
|
+
metadata: {
|
|
124
|
+
provenance: generatedArtifactProvenance({
|
|
125
|
+
generated_from: 'wiki_layout_init',
|
|
126
|
+
artifact_key: entry.key,
|
|
127
|
+
citation_required: entry.key.startsWith('wiki/') || entry.key.startsWith('indexes/'),
|
|
128
|
+
}),
|
|
129
|
+
},
|
|
109
130
|
...hashArtifactBody(entry.body),
|
|
110
131
|
};
|
|
111
132
|
}));
|
|
@@ -118,3 +139,73 @@ export async function initializeWikiLayout(store: ArtifactStore, now = new Date(
|
|
|
118
139
|
written: [schemaKey, rootIndexKey, wikiReadmeKey, logKey],
|
|
119
140
|
};
|
|
120
141
|
}
|
|
142
|
+
|
|
143
|
+
function provenanceFor(artifact: CatalogArtifact): GeneratedArtifactProvenance {
|
|
144
|
+
const existing = artifact.metadata?.provenance;
|
|
145
|
+
if (existing && typeof existing === 'object' && !Array.isArray(existing)) {
|
|
146
|
+
return existing as GeneratedArtifactProvenance;
|
|
147
|
+
}
|
|
148
|
+
return generatedArtifactProvenance({
|
|
149
|
+
generated_from: 'wiki_layout_init',
|
|
150
|
+
artifact_key: artifact.key,
|
|
151
|
+
});
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
export function recordWikiLayoutCatalog(db: Database, artifacts: CatalogArtifact[], now = new Date()): void {
|
|
155
|
+
const timestamp = now.toISOString();
|
|
156
|
+
const rootIndex = artifacts.find((artifact) => artifact.key.endsWith('indexes/root.md'));
|
|
157
|
+
const wikiReadme = artifacts.find((artifact) => artifact.key.endsWith('wiki/README.md'));
|
|
158
|
+
|
|
159
|
+
if (rootIndex) {
|
|
160
|
+
db.run(
|
|
161
|
+
`INSERT INTO knowledge_indexes (id, kind, name, artifact_uri, shard_key, metadata_json, created_at, updated_at)
|
|
162
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
163
|
+
ON CONFLICT(kind, name, shard_key) DO UPDATE SET
|
|
164
|
+
artifact_uri = excluded.artifact_uri,
|
|
165
|
+
metadata_json = excluded.metadata_json,
|
|
166
|
+
updated_at = excluded.updated_at`,
|
|
167
|
+
[
|
|
168
|
+
stableId('idx', 'root:indexes/root.md'),
|
|
169
|
+
'root',
|
|
170
|
+
'root',
|
|
171
|
+
rootIndex.uri,
|
|
172
|
+
'root',
|
|
173
|
+
JSON.stringify({
|
|
174
|
+
artifact_key: rootIndex.key,
|
|
175
|
+
content_hash: rootIndex.hash ?? null,
|
|
176
|
+
provenance: provenanceFor(rootIndex),
|
|
177
|
+
}),
|
|
178
|
+
timestamp,
|
|
179
|
+
timestamp,
|
|
180
|
+
],
|
|
181
|
+
);
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
if (wikiReadme) {
|
|
185
|
+
db.run(
|
|
186
|
+
`INSERT INTO wiki_pages (id, path, title, artifact_uri, content_hash, status, metadata_json, created_at, updated_at)
|
|
187
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
188
|
+
ON CONFLICT(path) DO UPDATE SET
|
|
189
|
+
title = excluded.title,
|
|
190
|
+
artifact_uri = excluded.artifact_uri,
|
|
191
|
+
content_hash = excluded.content_hash,
|
|
192
|
+
status = excluded.status,
|
|
193
|
+
metadata_json = excluded.metadata_json,
|
|
194
|
+
updated_at = excluded.updated_at`,
|
|
195
|
+
[
|
|
196
|
+
stableId('wiki', 'wiki/README.md'),
|
|
197
|
+
'wiki/README.md',
|
|
198
|
+
'Wiki',
|
|
199
|
+
wikiReadme.uri,
|
|
200
|
+
wikiReadme.hash ?? null,
|
|
201
|
+
'active',
|
|
202
|
+
JSON.stringify({
|
|
203
|
+
artifact_key: wikiReadme.key,
|
|
204
|
+
provenance: provenanceFor(wikiReadme),
|
|
205
|
+
}),
|
|
206
|
+
timestamp,
|
|
207
|
+
timestamp,
|
|
208
|
+
],
|
|
209
|
+
);
|
|
210
|
+
}
|
|
211
|
+
}
|