@hasna/knowledge 0.2.8 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -66,6 +66,9 @@ open-knowledge wiki init --scope project
66
66
  # Ingest an open-files source manifest into the project SQLite catalog
67
67
  open-knowledge ingest manifest ./open-files-manifest.jsonl --scope project --json
68
68
 
69
+ # Ingest one read-only source ref directly
70
+ open-knowledge ingest source file:///absolute/path/to/handbook.md --purpose knowledge_index --scope project --json
71
+
69
72
  # Consume open-files change events and invalidate stale source chunks
70
73
  open-knowledge reindex outbox ./open-files-outbox.jsonl --scope project --json
71
74
 
@@ -184,11 +187,20 @@ raw source retrieval remains owned by `open-files`.
184
187
  ### ingest
185
188
  ```bash
186
189
  open-knowledge ingest manifest <file|s3://bucket/key> [--scope project] [--json]
190
+ open-knowledge ingest source <source-ref> [--purpose knowledge_index] [--scope project] [--json]
187
191
  ```
188
192
  Import an open-files JSON or JSONL source manifest into `knowledge.db`. This
189
193
  upserts sources and source revisions, stores hash/MIME/status/permission
190
194
  metadata, and chunks embedded extracted text when the manifest includes it.
191
195
 
196
+ `ingest source` accepts `open-files://`, `file://`, `s3://`, and `https://`
197
+ refs. It reads source content through a read-only boundary, redacts known
198
+ secrets before storage, records hashes/revisions, and stores only derived chunks
199
+ and citation spans. Web and S3 reads remain opt-in through the safety policy.
200
+ For `open-files://` refs, the source must already be present in the local
201
+ knowledge catalog through a manifest or extracted-text ref until the open-files
202
+ resolver API lands.
203
+
192
204
  ### reindex
193
205
  ```bash
194
206
  open-knowledge reindex outbox <file|s3://bucket/key> [--scope project] [--json]
@@ -259,6 +271,11 @@ only the indexed, derived knowledge catalog. The resolver enforces read-only
259
271
  purpose labels from source permissions, returns chunk citation evidence, writes
260
272
  an audit event, and keeps bytes/storage credentials inside `open-files`.
261
273
 
274
+ `open-knowledge ingest source` can also build derived chunks from an allowed
275
+ source ref. It does not copy raw files into the knowledge workspace; local file,
276
+ S3, web, and open-files inputs are converted into redacted chunks with offsets,
277
+ hashes, revision metadata, and FTS rows.
278
+
262
279
  Generated knowledge artifacts can be stored locally under
263
280
  `.hasna/apps/knowledge/artifacts` or through the S3 artifact-store adapter.
264
281
 
@@ -13659,7 +13659,7 @@ import { existsSync as existsSync3, readFileSync as readFileSync3, writeFileSync
13659
13659
  // package.json
13660
13660
  var package_default = {
13661
13661
  name: "@hasna/knowledge",
13662
- version: "0.2.8",
13662
+ version: "0.2.9",
13663
13663
  description: "Agent-friendly local knowledge CLI with JSON output, pagination, and safe destructive actions",
13664
13664
  type: "module",
13665
13665
  bin: {
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env bun
2
2
  // @bun
3
- var C=import.meta.require;import{readFileSync as Z,writeFileSync as V,existsSync as Q,renameSync as Ge,unlinkSync as ye}from"fs";import{randomUUID as he}from"crypto";import{existsSync as Be,mkdirSync as oe,readFileSync as We,writeFileSync as Pe}from"fs";import{homedir as pe}from"os";import{dirname as He,join as w,resolve as qe}from"path";var ze=w(".hasna","apps","knowledge");function ae(){return w(pe(),".open-knowledge","db.json")}function ue(){return w(pe(),".hasna","apps","knowledge")}function Ye(e=process.cwd()){return qe(e,ze)}function H(e){return{home:e,configPath:w(e,"config.json"),jsonStorePath:w(e,"db.json"),knowledgeDbPath:w(e,"knowledge.db"),artifactsDir:w(e,"artifacts"),cacheDir:w(e,"cache"),exportsDir:w(e,"exports"),indexesDir:w(e,"indexes"),logsDir:w(e,"logs"),runsDir:w(e,"runs"),schemasDir:w(e,"schemas"),wikiDir:w(e,"wiki")}}function Je(){return{version:1,mode:"local",storage:{type:"local",artifacts_root:"artifacts"},sources:{preferred_ref:"open-files",allowed_schemes:["open-files","s3","file","https","http"]},safety:{network:{web_search_enabled:!1,s3_reads_enabled:!1,allowed_s3_buckets:[]},redaction:{enabled:!0},approvals:{generated_writes_require_approval:!0}}}}function X(e){let n=H(e);oe(n.home,{recursive:!0});for(let t of[n.artifactsDir,n.cacheDir,n.exportsDir,n.indexesDir,n.logsDir,n.runsDir,n.schemasDir,n.wikiDir])oe(t,{recursive:!0});if(!Be(n.configPath))Pe(n.configPath,`${JSON.stringify(Je(),null,2)}
4
- `);return n}function ge(e,n=process.cwd()){if(e==="project"||e==="local")return H(Ye(n));return H(ue())}function G(e){oe(He(e),{recursive:!0})}function F(e){let n=We(e,"utf8");return JSON.parse(n)}function ce(){return H(ue()).jsonStorePath}function de(e){if(!Q(e))if(G(e),e===ce()&&Q(ae()))V(e,Z(ae(),"utf8"));else V(e,JSON.stringify({items:[]},null,2))}function Ve(e){return`${e}.lock`}function Qe(e,n){let i=Date.now();while(Date.now()-i<5000){try{if(!Q(e)){V(e,JSON.stringify({owner:n,ts:Date.now()}));return}let l=JSON.parse(Z(e,"utf8"));if(Date.now()-l.ts>1e4)ye(e)}catch{}let s=Date.now();while(Date.now()-s<50);}throw Error(`Could not acquire lock on ${e} after 5000ms`)}function Ze(e,n){try{if(Q(e)){if(JSON.parse(Z(e,"utf8")).owner===n)ye(e)}}catch{}}function L(e){de(e);let n=Z(e,"utf8"),t=JSON.parse(n);if(!t||!Array.isArray(t.items))return{items:[]};return t}function A(e,n){let t=`${e}.tmp.${he()}`;V(t,JSON.stringify(n,null,2)),Ge(t,e)}function k(e,n){let t=he(),r=Ve(e);Qe(r,t);try{return n()}finally{Ze(r,t)}}function _e(){return`k_${Date.now().toString(36)}_${Math.random().toString(36).slice(2,8)}`}function Re(e){return e.replace(/^k_/,"").slice(0,12)}import{Database as et}from"bun:sqlite";var tt=`
3
+ var A=import.meta.require;import{readFileSync as ee,writeFileSync as Q,existsSync as Z,renameSync as nt,unlinkSync as xe}from"fs";import{randomUUID as Ne}from"crypto";import{existsSync as Ye,mkdirSync as le,readFileSync as Je,writeFileSync as Ge}from"fs";import{homedir as be}from"os";import{dirname as Ve,join as N,resolve as Qe}from"path";var Ze=N(".hasna","apps","knowledge");function _e(){return N(be(),".open-knowledge","db.json")}function fe(){return N(be(),".hasna","apps","knowledge")}function et(e=process.cwd()){return Qe(e,Ze)}function H(e){return{home:e,configPath:N(e,"config.json"),jsonStorePath:N(e,"db.json"),knowledgeDbPath:N(e,"knowledge.db"),artifactsDir:N(e,"artifacts"),cacheDir:N(e,"cache"),exportsDir:N(e,"exports"),indexesDir:N(e,"indexes"),logsDir:N(e,"logs"),runsDir:N(e,"runs"),schemasDir:N(e,"schemas"),wikiDir:N(e,"wiki")}}function tt(){return{version:1,mode:"local",storage:{type:"local",artifacts_root:"artifacts"},sources:{preferred_ref:"open-files",allowed_schemes:["open-files","s3","file","https","http"]},safety:{network:{web_search_enabled:!1,s3_reads_enabled:!1,allowed_s3_buckets:[]},redaction:{enabled:!0},approvals:{generated_writes_require_approval:!0}}}}function X(e){let n=H(e);le(n.home,{recursive:!0});for(let t of[n.artifactsDir,n.cacheDir,n.exportsDir,n.indexesDir,n.logsDir,n.runsDir,n.schemasDir,n.wikiDir])le(t,{recursive:!0});if(!Ye(n.configPath))Ge(n.configPath,`${JSON.stringify(tt(),null,2)}
4
+ `);return n}function Se(e,n=process.cwd()){if(e==="project"||e==="local")return H(et(n));return H(fe())}function V(e){le(Ve(e),{recursive:!0})}function M(e){let n=Je(e,"utf8");return JSON.parse(n)}function Ee(){return H(fe()).jsonStorePath}function pe(e){if(!Z(e))if(V(e),e===Ee()&&Z(_e()))Q(e,ee(_e(),"utf8"));else Q(e,JSON.stringify({items:[]},null,2))}function rt(e){return`${e}.lock`}function it(e,n){let i=Date.now();while(Date.now()-i<5000){try{if(!Z(e)){Q(e,JSON.stringify({owner:n,ts:Date.now()}));return}let _=JSON.parse(ee(e,"utf8"));if(Date.now()-_.ts>1e4)xe(e)}catch{}let s=Date.now();while(Date.now()-s<50);}throw Error(`Could not acquire lock on ${e} after 5000ms`)}function st(e,n){try{if(Z(e)){if(JSON.parse(ee(e,"utf8")).owner===n)xe(e)}}catch{}}function O(e){pe(e);let n=ee(e,"utf8"),t=JSON.parse(n);if(!t||!Array.isArray(t.items))return{items:[]};return t}function U(e,n){let t=`${e}.tmp.${Ne()}`;Q(t,JSON.stringify(n,null,2)),nt(t,e)}function w(e,n){let t=Ne(),r=rt(e);it(r,t);try{return n()}finally{st(r,t)}}function Te(){return`k_${Date.now().toString(36)}_${Math.random().toString(36).slice(2,8)}`}function Oe(e){return e.replace(/^k_/,"").slice(0,12)}import{Database as ot}from"bun:sqlite";var at=`
5
5
  PRAGMA journal_mode = WAL;
6
6
  PRAGMA foreign_keys = ON;
7
7
 
@@ -168,7 +168,7 @@ CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
168
168
 
169
169
  INSERT OR IGNORE INTO schema_versions(version, applied_at)
170
170
  VALUES (1, datetime('now'));
171
- `,nt=`
171
+ `,ut=`
172
172
  DROP TABLE IF EXISTS chunks_fts;
173
173
 
174
174
  CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
@@ -181,7 +181,7 @@ CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
181
181
 
182
182
  INSERT OR IGNORE INTO schema_versions(version, applied_at)
183
183
  VALUES (2, datetime('now'));
184
- `,rt=`
184
+ `,ct=`
185
185
  CREATE TABLE IF NOT EXISTS audit_events (
186
186
  id TEXT PRIMARY KEY,
187
187
  event_type TEXT NOT NULL,
@@ -212,7 +212,7 @@ CREATE INDEX IF NOT EXISTS idx_approval_gates_status ON approval_gates(status);
212
212
 
213
213
  INSERT OR IGNORE INTO schema_versions(version, applied_at)
214
214
  VALUES (3, datetime('now'));
215
- `;function I(e){G(e);let n=new et(e);return n.exec("PRAGMA foreign_keys = ON;"),n}function D(e){let n=I(e);try{if(n.exec(tt),ee(n)<2)n.exec(nt);if(ee(n)<3)n.exec(rt);return{path:e,schema_version:ee(n)}}finally{n.close()}}function ee(e){return e.query("SELECT MAX(version) AS version FROM schema_versions").get()?.version??0}function v(e,n){return e.query(`SELECT COUNT(*) AS n FROM ${n}`).get()?.n??0}function Ne(e){let n=I(e);try{return{schema_version:ee(n),sources:v(n,"sources"),source_revisions:v(n,"source_revisions"),chunks:v(n,"chunks"),wiki_pages:v(n,"wiki_pages"),citations:v(n,"citations"),indexes:v(n,"knowledge_indexes"),runs:v(n,"runs"),run_events:v(n,"run_events"),redaction_findings:v(n,"redaction_findings"),audit_events:v(n,"audit_events"),approval_gates:v(n,"approval_gates")}}finally{n.close()}}import{existsSync as it,mkdirSync as be,readFileSync as st,writeFileSync as ot}from"fs";import{dirname as at,join as le,relative as ut,sep as ct}from"path";function q(e){let n=e.replace(/\\/g,"/").trim();if(!n||n.startsWith("/"))throw Error(`Invalid artifact key: ${e}`);let t=n.split("/").filter(Boolean);if(t.length===0||t.some((r)=>r==="."||r===".."))throw Error(`Invalid artifact key: ${e}`);return t.join("/")}function fe(e,n){let t=ut(e,n);if(t.startsWith("..")||t===".."||t.startsWith(`..${ct}`))throw Error(`Artifact path escapes root: ${n}`)}class Se{root;type="local";canRead=!0;canWrite=!0;constructor(e){this.root=e;be(e,{recursive:!0})}async put(e){let n=q(e.key),t=le(this.root,n);return fe(this.root,t),be(at(t),{recursive:!0}),ot(t,e.body),{key:n,uri:`file://${t}`}}async getText(e){let n=q(e),t=le(this.root,n);return fe(this.root,t),st(t,"utf8")}async exists(e){let n=q(e),t=le(this.root,n);return fe(this.root,t),it(t)}}class Oe{options;type="s3";canRead=!0;canWrite=!0;client;constructor(e){this.options=e;this.client=e.client}async getClient(){if(this.client)return this.client;let[{S3Client:e},{fromIni:n}]=await Promise.all([import("@aws-sdk/client-s3"),import("@aws-sdk/credential-providers")]);return this.client=new e({region:this.options.region,credentials:this.options.profile?n({profile:this.options.profile}):void 0,maxAttempts:this.options.max_attempts}),this.client}objectKey(e){let n=q(e),t=this.options.prefix?q(this.options.prefix):"";return t?`${t}/${n}`:n}async put(e){let[{PutObjectCommand:n},t]=await Promise.all([import("@aws-sdk/client-s3"),this.getClient()]),r=this.objectKey(e.key);return await t.send(new n({Bucket:this.options.bucket,Key:r,Body:e.body,ContentType:e.content_type,Metadata:e.metadata,ServerSideEncryption:this.options.server_side_encryption,SSEKMSKeyId:this.options.kms_key_id})),{key:r,uri:`s3://${this.options.bucket}/${r}`}}async getText(e){let[{GetObjectCommand:n},t]=await Promise.all([import("@aws-sdk/client-s3"),this.getClient()]),r=this.objectKey(e),i=await t.send(new n({Bucket:this.options.bucket,Key:r}));if(!i.Body)return"";return await i.Body.transformToString()}async exists(e){let[{HeadObjectCommand:n},t]=await Promise.all([import("@aws-sdk/client-s3"),this.getClient()]),r=this.objectKey(e);try{return await t.send(new n({Bucket:this.options.bucket,Key:r})),!0}catch(i){let s=i instanceof Error?i.name:"";if(s==="NotFound"||s==="NoSuchKey"||s==="NotFoundError")return!1;throw i}}}function me(e,n){if(e.storage.type==="s3"){if(!e.storage.s3?.bucket)throw Error("S3 artifact storage requires storage.s3.bucket");return new Oe({bucket:e.storage.s3.bucket,prefix:e.storage.s3.prefix,region:e.storage.s3.region,profile:e.storage.s3.profile,max_attempts:e.storage.s3.max_attempts,server_side_encryption:e.storage.s3.server_side_encryption,kms_key_id:e.storage.s3.kms_key_id})}return new Se(n.artifactsDir)}function dt(e){let n=String(e.getUTCFullYear()),t=String(e.getUTCMonth()+1).padStart(2,"0"),r=String(e.getUTCDate()).padStart(2,"0");return{year:n,month:t,day:r}}function _t(){return`# Knowledge Agent Schema v1
215
+ `;function C(e){V(e);let n=new ot(e);return n.exec("PRAGMA foreign_keys = ON;"),n}function D(e){let n=C(e);try{if(n.exec(at),te(n)<2)n.exec(ut);if(te(n)<3)n.exec(ct);return{path:e,schema_version:te(n)}}finally{n.close()}}function te(e){return e.query("SELECT MAX(version) AS version FROM schema_versions").get()?.version??0}function I(e,n){return e.query(`SELECT COUNT(*) AS n FROM ${n}`).get()?.n??0}function we(e){let n=C(e);try{return{schema_version:te(n),sources:I(n,"sources"),source_revisions:I(n,"source_revisions"),chunks:I(n,"chunks"),wiki_pages:I(n,"wiki_pages"),citations:I(n,"citations"),indexes:I(n,"knowledge_indexes"),runs:I(n,"runs"),run_events:I(n,"run_events"),redaction_findings:I(n,"redaction_findings"),audit_events:I(n,"audit_events"),approval_gates:I(n,"approval_gates")}}finally{n.close()}}import{existsSync as dt,mkdirSync as ke,readFileSync as lt,writeFileSync as _t}from"fs";import{dirname as ft,join as ge,relative as Et,sep as pt}from"path";function q(e){let n=e.replace(/\\/g,"/").trim();if(!n||n.startsWith("/"))throw Error(`Invalid artifact key: ${e}`);let t=n.split("/").filter(Boolean);if(t.length===0||t.some((r)=>r==="."||r===".."))throw Error(`Invalid artifact key: ${e}`);return t.join("/")}function he(e,n){let t=Et(e,n);if(t.startsWith("..")||t===".."||t.startsWith(`..${pt}`))throw Error(`Artifact path escapes root: ${n}`)}class Le{root;type="local";canRead=!0;canWrite=!0;constructor(e){this.root=e;ke(e,{recursive:!0})}async put(e){let n=q(e.key),t=ge(this.root,n);return he(this.root,t),ke(ft(t),{recursive:!0}),_t(t,e.body),{key:n,uri:`file://${t}`}}async getText(e){let n=q(e),t=ge(this.root,n);return he(this.root,t),lt(t,"utf8")}async exists(e){let n=q(e),t=ge(this.root,n);return he(this.root,t),dt(t)}}class ve{options;type="s3";canRead=!0;canWrite=!0;client;constructor(e){this.options=e;this.client=e.client}async getClient(){if(this.client)return this.client;let[{S3Client:e},{fromIni:n}]=await Promise.all([import("@aws-sdk/client-s3"),import("@aws-sdk/credential-providers")]);return this.client=new e({region:this.options.region,credentials:this.options.profile?n({profile:this.options.profile}):void 0,maxAttempts:this.options.max_attempts}),this.client}objectKey(e){let n=q(e),t=this.options.prefix?q(this.options.prefix):"";return t?`${t}/${n}`:n}async put(e){let[{PutObjectCommand:n},t]=await Promise.all([import("@aws-sdk/client-s3"),this.getClient()]),r=this.objectKey(e.key);return await t.send(new n({Bucket:this.options.bucket,Key:r,Body:e.body,ContentType:e.content_type,Metadata:e.metadata,ServerSideEncryption:this.options.server_side_encryption,SSEKMSKeyId:this.options.kms_key_id})),{key:r,uri:`s3://${this.options.bucket}/${r}`}}async getText(e){let[{GetObjectCommand:n},t]=await Promise.all([import("@aws-sdk/client-s3"),this.getClient()]),r=this.objectKey(e),i=await t.send(new n({Bucket:this.options.bucket,Key:r}));if(!i.Body)return"";return await i.Body.transformToString()}async exists(e){let[{HeadObjectCommand:n},t]=await Promise.all([import("@aws-sdk/client-s3"),this.getClient()]),r=this.objectKey(e);try{return await t.send(new n({Bucket:this.options.bucket,Key:r})),!0}catch(i){let s=i instanceof Error?i.name:"";if(s==="NotFound"||s==="NoSuchKey"||s==="NotFoundError")return!1;throw i}}}function Ae(e,n){if(e.storage.type==="s3"){if(!e.storage.s3?.bucket)throw Error("S3 artifact storage requires storage.s3.bucket");return new ve({bucket:e.storage.s3.bucket,prefix:e.storage.s3.prefix,region:e.storage.s3.region,profile:e.storage.s3.profile,max_attempts:e.storage.s3.max_attempts,server_side_encryption:e.storage.s3.server_side_encryption,kms_key_id:e.storage.s3.kms_key_id})}return new Le(n.artifactsDir)}function Tt(e){let n=String(e.getUTCFullYear()),t=String(e.getUTCMonth()+1).padStart(2,"0"),r=String(e.getUTCDate()).padStart(2,"0");return{year:n,month:t,day:r}}function gt(){return`# Knowledge Agent Schema v1
216
216
 
217
217
  ## Source Rules
218
218
 
@@ -237,7 +237,7 @@ VALUES (3, datetime('now'));
237
237
  ## Lint Rules
238
238
 
239
239
  - Flag stale pages, missing citations, contradictions, orphan pages, duplicate pages, and unresolved source refs.
240
- `}function lt(){return`# Knowledge Index
240
+ `}function ht(){return`# Knowledge Index
241
241
 
242
242
  This is a compact orientation index for agents. It is not the full search index.
243
243
 
@@ -252,61 +252,65 @@ This is a compact orientation index for agents. It is not the full search index.
252
252
 
253
253
  Raw source files are resolved through open-files. This app stores source refs,
254
254
  citations, chunks, generated wiki artifacts, indexes, and run records.
255
- `}function ft(){return`# Wiki
255
+ `}function yt(){return`# Wiki
256
256
 
257
257
  Generated durable knowledge pages live here.
258
258
 
259
259
  Pages should be concise, cited, and organized for both humans and agents.
260
- `}async function we(e,n=new Date){let{year:t,month:r,day:i}=dt(n),s="schemas/v1.md",l="indexes/root.md",T="wiki/README.md",o=`logs/${t}/${r}/${i}.jsonl`,a={ts:n.toISOString(),event:"wiki_layout_initialized",schema_key:"schemas/v1.md",root_index_key:"indexes/root.md",wiki_readme_key:"wiki/README.md"},c=[e.put({key:"schemas/v1.md",body:_t(),content_type:"text/markdown"}),e.put({key:"indexes/root.md",body:lt(),content_type:"text/markdown"}),e.put({key:"wiki/README.md",body:ft(),content_type:"text/markdown"}),e.put({key:o,body:`${JSON.stringify(a)}
261
- `,content_type:"application/x-ndjson"})];return await Promise.all(c),{schema_key:"schemas/v1.md",root_index_key:"indexes/root.md",wiki_readme_key:"wiki/README.md",log_key:o,written:["schemas/v1.md","indexes/root.md","wiki/README.md",o]}}import{createHash as mt}from"crypto";import{existsSync as wt,readFileSync as Lt}from"fs";import{basename as kt}from"path";function Le(e,n){if(!e)throw Error(n);return e}function Et(e){let t=e.slice(13).split("/").filter(Boolean),r=t[0];if(r!=="file"&&r!=="source")throw Error("Invalid open-files ref. Expected open-files://file/<id>, open-files://file/<id>/revision/<revision_id>, or open-files://source/<id>/path/<path>.");let i=Le(t[1],"Invalid open-files ref. Missing id.");if(r==="file"){if(t.length===2)return{kind:"open-files",uri:e,entity:r,id:i};if(t[2]==="revision"&&t[3]&&t.length===4)return{kind:"open-files",uri:e,entity:r,id:i,revision_id:decodeURIComponent(t[3])};throw Error("Invalid open-files file ref. Expected open-files://file/<id>/revision/<revision_id>.")}let s=t.indexOf("path"),l=s>=0?decodeURIComponent(t.slice(s+1).join("/")):void 0;return{kind:"open-files",uri:e,entity:r,id:i,path:l}}function Tt(e){let n=new URL(e),t=Le(n.hostname,"Invalid s3 ref. Missing bucket."),r=decodeURIComponent(n.pathname.replace(/^\/+/,""));if(!r)throw Error("Invalid s3 ref. Missing object key.");return{kind:"s3",uri:e,bucket:t,key:r}}function pt(e){let n=new URL(e);return{kind:"file",uri:e,path:decodeURIComponent(n.pathname)}}function gt(e){let n=new URL(e);return{kind:"web",uri:e,url:n.toString()}}function j(e){if(e.startsWith("open-files://"))return Et(e);if(e.startsWith("s3://"))return Tt(e);if(e.startsWith("file://"))return pt(e);if(e.startsWith("https://")||e.startsWith("http://"))return gt(e);throw Error(`Unsupported source ref scheme: ${e}`)}function ke(e,n=j(e)){if(n.kind==="open-files"&&n.entity==="file"&&n.revision_id)return e.replace(/\/revision\/[^/]+$/,"");return e}function xe(e){let n=j(e);return n.kind==="open-files"&&n.entity==="file"?n.revision_id??null:null}import{createHash as yt,randomUUID as Ee}from"crypto";import{relative as ht,resolve as Ae,sep as Rt}from"path";function ve(e){let n=process.env[e];return n==="1"||n==="true"||n==="yes"}function z(e,n){let t=e,r=new Set(t.safety?.network?.allowed_s3_buckets??[]);if(e.storage.type==="s3"&&e.storage.s3?.bucket)r.add(e.storage.s3.bucket);if(process.env.HASNA_KNOWLEDGE_ALLOWED_S3_BUCKETS)for(let i of process.env.HASNA_KNOWLEDGE_ALLOWED_S3_BUCKETS.split(",").map((s)=>s.trim()).filter(Boolean))r.add(i);return{mode:e.mode,allowWriteRoots:[n.home,n.artifactsDir,n.cacheDir,n.exportsDir,n.indexesDir,n.logsDir,n.runsDir,n.schemasDir,n.wikiDir].map((i)=>Ae(i)),readOnlySourceAccess:!0,network:{webSearchEnabled:t.safety?.network?.web_search_enabled??ve("HASNA_KNOWLEDGE_WEB_SEARCH"),s3ReadsEnabled:t.safety?.network?.s3_reads_enabled??ve("HASNA_KNOWLEDGE_ALLOW_S3_READS"),allowedS3Buckets:[...r].sort()},redaction:{enabled:t.safety?.redaction?.enabled??!0},approvals:{generatedWritesRequireApproval:t.safety?.approvals?.generated_writes_require_approval??!0}}}function Nt(e,n){let t=ht(e,n);return t===""||!t.startsWith("..")&&t!==".."&&!t.startsWith(`..${Rt}`)}function K(e,n){let t=Ae(e);if(!n.allowWriteRoots.some((r)=>Nt(r,t)))throw Error(`Safety policy denied write outside .hasna/apps/knowledge: ${e}`)}function $(e,n){let r=new URL(e).hostname;if(!n.network.s3ReadsEnabled)throw Error("Safety policy denied S3 read. Set safety.network.s3_reads_enabled=true or HASNA_KNOWLEDGE_ALLOW_S3_READS=1.");if(!n.network.allowedS3Buckets.includes(r))throw Error(`Safety policy denied S3 bucket "${r}". Add it to safety.network.allowed_s3_buckets or HASNA_KNOWLEDGE_ALLOWED_S3_BUCKETS.`)}function Ie(e){if(!e.network.webSearchEnabled)throw Error("Safety policy denied web search. Set safety.network.web_search_enabled=true or HASNA_KNOWLEDGE_WEB_SEARCH=1.")}var bt=[{type:"private_key_block",severity:"high",regex:/-----BEGIN [A-Z ]*PRIVATE KEY-----[\s\S]*?-----END [A-Z ]*PRIVATE KEY-----/g,replacement:"[REDACTED:private_key_block]"},{type:"secret_assignment",severity:"high",regex:/\b(?:api[_-]?key|secret|token|password)\s*[:=]\s*['"]?[^'"\s]{8,}/gi,replacement:"[REDACTED:secret_assignment]"},{type:"openai_api_key",severity:"high",regex:/\bsk-[A-Za-z0-9_-]{20,}\b/g,replacement:"[REDACTED:openai_api_key]"},{type:"anthropic_api_key",severity:"high",regex:/\bsk-ant-[A-Za-z0-9_-]{20,}\b/g,replacement:"[REDACTED:anthropic_api_key]"},{type:"aws_access_key_id",severity:"high",regex:/\bA(?:KIA|SIA)[A-Z0-9]{16}\b/g,replacement:"[REDACTED:aws_access_key_id]"}];function te(e,n){if(n&&!n.redaction.enabled)return{text:e,findings:[]};let t=e,r=[];for(let i of bt)t=t.replace(i.regex,(s,...l)=>{let T=typeof l.at(-2)==="number"?l.at(-2):t.indexOf(s);return r.push({type:i.type,severity:i.severity,start:Math.max(0,T),end:Math.max(0,T+s.length)}),i.replacement});return{text:t,findings:r}}function St(e){return`audit_${yt("sha256").update(`${e.event_type}\x00${e.action}\x00${e.target_uri??""}\x00${e.created_at??""}\x00${JSON.stringify(e.metadata??{})}\x00${Ee()}`).digest("hex").slice(0,24)}`}function S(e,n){let t=n.created_at??new Date().toISOString(),r=St({...n,created_at:t});return e.run(`INSERT INTO audit_events (id, event_type, action, target_uri, decision, metadata_json, created_at)
262
- VALUES (?, ?, ?, ?, ?, ?, ?)`,[r,n.event_type,n.action,n.target_uri??null,n.decision,JSON.stringify(n.metadata??{}),t]),r}function ne(e,n){let t=n.created_at??new Date().toISOString();for(let r of n.findings)e.run(`INSERT INTO redaction_findings (id, source_uri, run_id, severity, finding_type, metadata_json, created_at)
263
- VALUES (?, ?, ?, ?, ?, ?, ?)`,[`redact_${Ee()}`,n.source_uri??null,n.run_id??null,r.severity,r.type,JSON.stringify({...n.metadata??{},start:r.start,end:r.end}),t]);return n.findings.length}function De(e,n){let t=n.created_at??new Date().toISOString(),r=`approval_${Ee()}`;return e.run(`INSERT INTO approval_gates (id, action, target_uri, status, reason, approved_by, metadata_json, created_at, updated_at)
264
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,[r,n.action,n.target_uri??null,"approved",n.reason??null,n.approved_by??"local-cli",JSON.stringify(n.metadata??{}),t,t]),{id:r,status:"approved"}}function Ot(e,n,t){let r=e.query(`SELECT id FROM approval_gates
260
+ `}async function Ie(e,n=new Date){let{year:t,month:r,day:i}=Tt(n),s="schemas/v1.md",_="indexes/root.md",E="wiki/README.md",o=`logs/${t}/${r}/${i}.jsonl`,a={ts:n.toISOString(),event:"wiki_layout_initialized",schema_key:"schemas/v1.md",root_index_key:"indexes/root.md",wiki_readme_key:"wiki/README.md"},c=[e.put({key:"schemas/v1.md",body:gt(),content_type:"text/markdown"}),e.put({key:"indexes/root.md",body:ht(),content_type:"text/markdown"}),e.put({key:"wiki/README.md",body:yt(),content_type:"text/markdown"}),e.put({key:o,body:`${JSON.stringify(a)}
261
+ `,content_type:"application/x-ndjson"})];return await Promise.all(c),{schema_key:"schemas/v1.md",root_index_key:"indexes/root.md",wiki_readme_key:"wiki/README.md",log_key:o,written:["schemas/v1.md","indexes/root.md","wiki/README.md",o]}}import{createHash as At}from"crypto";import{existsSync as It,readFileSync as Dt}from"fs";import{basename as Ut}from"path";function De(e,n){if(!e)throw Error(n);return e}function mt(e){let t=e.slice(13).split("/").filter(Boolean),r=t[0];if(r!=="file"&&r!=="source")throw Error("Invalid open-files ref. Expected open-files://file/<id>, open-files://file/<id>/revision/<revision_id>, or open-files://source/<id>/path/<path>.");let i=De(t[1],"Invalid open-files ref. Missing id.");if(r==="file"){if(t.length===2)return{kind:"open-files",uri:e,entity:r,id:i};if(t[2]==="revision"&&t[3]&&t.length===4)return{kind:"open-files",uri:e,entity:r,id:i,revision_id:decodeURIComponent(t[3])};throw Error("Invalid open-files file ref. Expected open-files://file/<id>/revision/<revision_id>.")}let s=t.indexOf("path"),_=s>=0?decodeURIComponent(t.slice(s+1).join("/")):void 0;return{kind:"open-files",uri:e,entity:r,id:i,path:_}}function Rt(e){let n=new URL(e),t=De(n.hostname,"Invalid s3 ref. Missing bucket."),r=decodeURIComponent(n.pathname.replace(/^\/+/,""));if(!r)throw Error("Invalid s3 ref. Missing object key.");return{kind:"s3",uri:e,bucket:t,key:r}}function bt(e){let n=new URL(e);return{kind:"file",uri:e,path:decodeURIComponent(n.pathname)}}function St(e){let n=new URL(e);return{kind:"web",uri:e,url:n.toString()}}function L(e){if(e.startsWith("open-files://"))return mt(e);if(e.startsWith("s3://"))return Rt(e);if(e.startsWith("file://"))return bt(e);if(e.startsWith("https://")||e.startsWith("http://"))return St(e);throw Error(`Unsupported source ref scheme: ${e}`)}function Ue(e,n=L(e)){if(n.kind==="open-files"&&n.entity==="file"&&n.revision_id)return e.replace(/\/revision\/[^/]+$/,"");return e}function Ce(e){let n=L(e);return n.kind==="open-files"&&n.entity==="file"?n.revision_id??null:null}import{createHash as xt,randomUUID as ye}from"crypto";import{relative as Nt,resolve as Xe,sep as Ot}from"path";function je(e){let n=process.env[e];return n==="1"||n==="true"||n==="yes"}function Y(e,n){let t=e,r=new Set(t.safety?.network?.allowed_s3_buckets??[]);if(e.storage.type==="s3"&&e.storage.s3?.bucket)r.add(e.storage.s3.bucket);if(process.env.HASNA_KNOWLEDGE_ALLOWED_S3_BUCKETS)for(let i of process.env.HASNA_KNOWLEDGE_ALLOWED_S3_BUCKETS.split(",").map((s)=>s.trim()).filter(Boolean))r.add(i);return{mode:e.mode,allowWriteRoots:[n.home,n.artifactsDir,n.cacheDir,n.exportsDir,n.indexesDir,n.logsDir,n.runsDir,n.schemasDir,n.wikiDir].map((i)=>Xe(i)),readOnlySourceAccess:!0,network:{webSearchEnabled:t.safety?.network?.web_search_enabled??je("HASNA_KNOWLEDGE_WEB_SEARCH"),s3ReadsEnabled:t.safety?.network?.s3_reads_enabled??je("HASNA_KNOWLEDGE_ALLOW_S3_READS"),allowedS3Buckets:[...r].sort()},redaction:{enabled:t.safety?.redaction?.enabled??!0},approvals:{generatedWritesRequireApproval:t.safety?.approvals?.generated_writes_require_approval??!0}}}function wt(e,n){let t=Nt(e,n);return t===""||!t.startsWith("..")&&t!==".."&&!t.startsWith(`..${Ot}`)}function K(e,n){let t=Xe(e);if(!n.allowWriteRoots.some((r)=>wt(r,t)))throw Error(`Safety policy denied write outside .hasna/apps/knowledge: ${e}`)}function F(e,n){let r=new URL(e).hostname;if(!n.network.s3ReadsEnabled)throw Error("Safety policy denied S3 read. Set safety.network.s3_reads_enabled=true or HASNA_KNOWLEDGE_ALLOW_S3_READS=1.");if(!n.network.allowedS3Buckets.includes(r))throw Error(`Safety policy denied S3 bucket "${r}". Add it to safety.network.allowed_s3_buckets or HASNA_KNOWLEDGE_ALLOWED_S3_BUCKETS.`)}function ne(e){if(!e.network.webSearchEnabled)throw Error("Safety policy denied web search. Set safety.network.web_search_enabled=true or HASNA_KNOWLEDGE_WEB_SEARCH=1.")}var kt=[{type:"private_key_block",severity:"high",regex:/-----BEGIN [A-Z ]*PRIVATE KEY-----[\s\S]*?-----END [A-Z ]*PRIVATE KEY-----/g,replacement:"[REDACTED:private_key_block]"},{type:"secret_assignment",severity:"high",regex:/\b(?:api[_-]?key|secret|token|password)\s*[:=]\s*['"]?[^'"\s]{8,}/gi,replacement:"[REDACTED:secret_assignment]"},{type:"openai_api_key",severity:"high",regex:/\bsk-[A-Za-z0-9_-]{20,}\b/g,replacement:"[REDACTED:openai_api_key]"},{type:"anthropic_api_key",severity:"high",regex:/\bsk-ant-[A-Za-z0-9_-]{20,}\b/g,replacement:"[REDACTED:anthropic_api_key]"},{type:"aws_access_key_id",severity:"high",regex:/\bA(?:KIA|SIA)[A-Z0-9]{16}\b/g,replacement:"[REDACTED:aws_access_key_id]"}];function re(e,n){if(n&&!n.redaction.enabled)return{text:e,findings:[]};let t=e,r=[];for(let i of kt)t=t.replace(i.regex,(s,..._)=>{let E=typeof _.at(-2)==="number"?_.at(-2):t.indexOf(s);return r.push({type:i.type,severity:i.severity,start:Math.max(0,E),end:Math.max(0,E+s.length)}),i.replacement});return{text:t,findings:r}}function Lt(e){return`audit_${xt("sha256").update(`${e.event_type}\x00${e.action}\x00${e.target_uri??""}\x00${e.created_at??""}\x00${JSON.stringify(e.metadata??{})}\x00${ye()}`).digest("hex").slice(0,24)}`}function S(e,n){let t=n.created_at??new Date().toISOString(),r=Lt({...n,created_at:t});return e.run(`INSERT INTO audit_events (id, event_type, action, target_uri, decision, metadata_json, created_at)
262
+ VALUES (?, ?, ?, ?, ?, ?, ?)`,[r,n.event_type,n.action,n.target_uri??null,n.decision,JSON.stringify(n.metadata??{}),t]),r}function ie(e,n){let t=n.created_at??new Date().toISOString();for(let r of n.findings)e.run(`INSERT INTO redaction_findings (id, source_uri, run_id, severity, finding_type, metadata_json, created_at)
263
+ VALUES (?, ?, ?, ?, ?, ?, ?)`,[`redact_${ye()}`,n.source_uri??null,n.run_id??null,r.severity,r.type,JSON.stringify({...n.metadata??{},start:r.start,end:r.end}),t]);return n.findings.length}function Fe(e,n){let t=n.created_at??new Date().toISOString(),r=`approval_${ye()}`;return e.run(`INSERT INTO approval_gates (id, action, target_uri, status, reason, approved_by, metadata_json, created_at, updated_at)
264
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,[r,n.action,n.target_uri??null,"approved",n.reason??null,n.approved_by??"local-cli",JSON.stringify(n.metadata??{}),t,t]),{id:r,status:"approved"}}function vt(e,n,t){let r=e.query(`SELECT id FROM approval_gates
265
265
  WHERE action = ? AND status = 'approved' AND (target_uri IS NULL OR target_uri = ? OR ? IS NULL)
266
- ORDER BY updated_at DESC LIMIT 1`).get(n,t??null,t??null);return Boolean(r)}function Ue(e,n,t,r){let i=t==="generated_write"&&n.approvals.generatedWritesRequireApproval,s=!i||Ot(e,t,r);return{action:t,target_uri:r??null,approval_required:i,approved:s,decision:s?"allow":"requires_approval"}}function Te(e,n){return`${e}_${mt("sha256").update(n).digest("hex").slice(0,20)}`}function B(e){return e&&typeof e==="object"&&!Array.isArray(e)?e:void 0}function y(e){return typeof e==="string"&&e.length>0?e:void 0}function xt(e){return typeof e==="number"&&Number.isFinite(e)?e:void 0}function vt(e){let n=y(e.source_ref)??y(e.source_uri)??y(e.uri);if(n)return n;let t=y(e.file_id);if(t){let s=y(e.revision_id)??y(e.revision),l=`open-files://file/${encodeURIComponent(t)}`;return s?`${l}/revision/${encodeURIComponent(s)}`:l}let r=y(e.source_id),i=y(e.path);if(r&&i)return`open-files://source/${encodeURIComponent(r)}/path/${encodeURIComponent(i)}`;throw Error("Manifest item is missing source_ref, file_id, or source_id/path.")}function At(e,n){if(n.kind==="open-files"&&n.entity==="file"&&n.revision_id)return e.replace(/\/revision\/[^/]+$/,"");return e}function It(e){let n=y(e.extracted_text)??y(e.text)??y(e.content_text)??y(e.markdown);if(n!==void 0)return n;let t=e.content;return typeof t==="string"?t:null}function Dt(e){let n=y(e.extracted_text_ref)??y(e.extracted_text_uri)??y(e.text_ref);if(n)return n;let t=B(e.content);return y(t?.extracted_text_ref)??y(t?.extracted_text_uri)??null}function Ut(e){let n=y(e.path);return y(e.title)??y(e.name)??(n?kt(n):null)}function Ct(e){return y(e.hash)??y(e.checksum)??y(e.sha256)??null}function Xt(e,n,t){return y(e.revision_id)??y(e.revision)??y(e.version_id)??(n.kind==="open-files"?n.revision_id:void 0)??t??y(e.updated_at)??"current"}function jt(e,n){let t={};for(let[r,i]of Object.entries(e)){if(["text","content","content_text","extracted_text","markdown"].includes(r))continue;t[r]=i}return t.source_ref=n.sourceRef,t.source_uri=n.sourceUri,t.status=n.status,t}function Ft(e,n){let t=vt(e),r=j(t),i=At(t,r),s=Ct(e),l=y(e.status)??"active";return{raw:e,sourceRef:t,sourceUri:i,kind:r.kind,title:Ut(e),revision:Xt(e,r,s),hash:s,extractedTextUri:Dt(e),text:It(e),metadata:jt(e,{sourceRef:t,sourceUri:i,status:l}),acl:e.permissions??e.acl??{},status:l,updatedAt:y(e.updated_at)??n}}function Mt(e){let n=e.trim();if(!n)return[];if(n.startsWith("[")){let t=JSON.parse(n);if(!Array.isArray(t))throw Error("Manifest array parse failed.");return t.map((r)=>{let i=B(r);if(!i)throw Error("Manifest array entries must be objects.");return i})}if(n.startsWith("{"))try{let t=JSON.parse(n),r=B(t);if(!r)throw Error("Manifest object parse failed.");if(Array.isArray(r.items))return r.items.map((i)=>{let s=B(i);if(!s)throw Error("Manifest items entries must be objects.");return s});if("source_ref"in r||"source_uri"in r||"file_id"in r)return[r]}catch(t){let r=n.split(/\r?\n/).filter((i)=>i.trim().length>0);if(r.length<=1)throw t;return r.map((i)=>{let s=B(JSON.parse(i));if(!s)throw Error("Manifest JSONL entries must be objects.");return s})}return n.split(/\r?\n/).filter((t)=>t.trim().length>0).map((t)=>{let r=B(JSON.parse(t));if(!r)throw Error("Manifest JSONL entries must be objects.");return r})}async function Kt(e,n,t){let r=new URL(e),i=r.hostname,s=decodeURIComponent(r.pathname.replace(/^\/+/,""));if(!i||!s)throw Error(`Invalid S3 manifest URI: ${e}`);if(t)$(e,t);let[{S3Client:l,GetObjectCommand:T},{fromIni:o}]=await Promise.all([import("@aws-sdk/client-s3"),import("@aws-sdk/credential-providers")]),a=n?.storage.type==="s3"&&n.storage.s3?.bucket===i?n.storage.s3:void 0,u=await new l({region:a?.region,credentials:a?.profile?o({profile:a.profile}):void 0,maxAttempts:a?.max_attempts}).send(new T({Bucket:i,Key:s}));if(!u.Body)return"";return await u.Body.transformToString()}async function $t(e,n,t){if(e.startsWith("s3://"))return Kt(e,n,t);if(!wt(e))throw Error(`Manifest not found: ${e}`);return Lt(e,"utf8")}function Bt(e,n,t){let r=e.replace(/\r\n/g,`
267
- `);if(!r.trim())return[];let i=[],s=0;while(s<r.length){let l=Math.min(r.length,s+n),T=l;if(l<r.length){let a=r.lastIndexOf(`
266
+ ORDER BY updated_at DESC LIMIT 1`).get(n,t??null,t??null);return Boolean(r)}function Me(e,n,t,r){let i=t==="generated_write"&&n.approvals.generatedWritesRequireApproval,s=!i||vt(e,t,r);return{action:t,target_uri:r??null,approval_required:i,approved:s,decision:s?"allow":"requires_approval"}}function me(e,n){return`${e}_${At("sha256").update(n).digest("hex").slice(0,20)}`}function $(e){return e&&typeof e==="object"&&!Array.isArray(e)?e:void 0}function h(e){return typeof e==="string"&&e.length>0?e:void 0}function Ct(e){return typeof e==="number"&&Number.isFinite(e)?e:void 0}function jt(e){let n=h(e.source_ref)??h(e.source_uri)??h(e.uri);if(n)return n;let t=h(e.file_id);if(t){let s=h(e.revision_id)??h(e.revision),_=`open-files://file/${encodeURIComponent(t)}`;return s?`${_}/revision/${encodeURIComponent(s)}`:_}let r=h(e.source_id),i=h(e.path);if(r&&i)return`open-files://source/${encodeURIComponent(r)}/path/${encodeURIComponent(i)}`;throw Error("Manifest item is missing source_ref, file_id, or source_id/path.")}function Xt(e,n){if(n.kind==="open-files"&&n.entity==="file"&&n.revision_id)return e.replace(/\/revision\/[^/]+$/,"");return e}function Ft(e){let n=h(e.extracted_text)??h(e.text)??h(e.content_text)??h(e.markdown);if(n!==void 0)return n;let t=e.content;return typeof t==="string"?t:null}function Mt(e){let n=h(e.extracted_text_ref)??h(e.extracted_text_uri)??h(e.text_ref);if(n)return n;let t=$(e.content);return h(t?.extracted_text_ref)??h(t?.extracted_text_uri)??null}function Kt(e){let n=h(e.path);return h(e.title)??h(e.name)??(n?Ut(n):null)}function Pt(e){return h(e.hash)??h(e.checksum)??h(e.sha256)??null}function $t(e,n,t){return h(e.revision_id)??h(e.revision)??h(e.version_id)??(n.kind==="open-files"?n.revision_id:void 0)??t??h(e.updated_at)??"current"}function Bt(e,n){let t={};for(let[r,i]of Object.entries(e)){if(["text","content","content_text","extracted_text","markdown"].includes(r))continue;t[r]=i}return t.source_ref=n.sourceRef,t.source_uri=n.sourceUri,t.status=n.status,t}function Wt(e,n){let t=jt(e),r=L(t),i=Xt(t,r),s=Pt(e),_=h(e.status)??"active";return{raw:e,sourceRef:t,sourceUri:i,kind:r.kind,title:Kt(e),revision:$t(e,r,s),hash:s,extractedTextUri:Mt(e),text:Ft(e),metadata:Bt(e,{sourceRef:t,sourceUri:i,status:_}),acl:e.permissions??e.acl??{},status:_,updatedAt:h(e.updated_at)??n}}function zt(e){let n=e.trim();if(!n)return[];if(n.startsWith("[")){let t=JSON.parse(n);if(!Array.isArray(t))throw Error("Manifest array parse failed.");return t.map((r)=>{let i=$(r);if(!i)throw Error("Manifest array entries must be objects.");return i})}if(n.startsWith("{"))try{let t=JSON.parse(n),r=$(t);if(!r)throw Error("Manifest object parse failed.");if(Array.isArray(r.items))return r.items.map((i)=>{let s=$(i);if(!s)throw Error("Manifest items entries must be objects.");return s});if("source_ref"in r||"source_uri"in r||"file_id"in r)return[r]}catch(t){let r=n.split(/\r?\n/).filter((i)=>i.trim().length>0);if(r.length<=1)throw t;return r.map((i)=>{let s=$(JSON.parse(i));if(!s)throw Error("Manifest JSONL entries must be objects.");return s})}return n.split(/\r?\n/).filter((t)=>t.trim().length>0).map((t)=>{let r=$(JSON.parse(t));if(!r)throw Error("Manifest JSONL entries must be objects.");return r})}async function Ht(e,n,t){let r=new URL(e),i=r.hostname,s=decodeURIComponent(r.pathname.replace(/^\/+/,""));if(!i||!s)throw Error(`Invalid S3 manifest URI: ${e}`);if(t)F(e,t);let[{S3Client:_,GetObjectCommand:E},{fromIni:o}]=await Promise.all([import("@aws-sdk/client-s3"),import("@aws-sdk/credential-providers")]),a=n?.storage.type==="s3"&&n.storage.s3?.bucket===i?n.storage.s3:void 0,u=await new _({region:a?.region,credentials:a?.profile?o({profile:a.profile}):void 0,maxAttempts:a?.max_attempts}).send(new E({Bucket:i,Key:s}));if(!u.Body)return"";return await u.Body.transformToString()}async function qt(e,n,t){if(e.startsWith("s3://"))return Ht(e,n,t);if(!It(e))throw Error(`Manifest not found: ${e}`);return Dt(e,"utf8")}function Yt(e,n,t){let r=e.replace(/\r\n/g,`
267
+ `);if(!r.trim())return[];let i=[],s=0;while(s<r.length){let _=Math.min(r.length,s+n),E=_;if(_<r.length){let a=r.lastIndexOf(`
268
268
 
269
- `,l),c=r.lastIndexOf(". ",l),u=Math.max(a,c);if(u>s+Math.floor(n*0.5))T=u+(u===a?2:1)}let o=r.slice(s,T).trim();if(o)i.push({ordinal:i.length,text:o,startOffset:s,endOffset:T});if(T>=r.length)break;s=Math.max(0,T-t)}return i}function Wt(e){let n=e.trim().split(/\s+/).filter(Boolean).length;return Math.max(1,Math.ceil(n*1.25))}function Pt(e,n){let t=e.query("SELECT id FROM chunks WHERE source_revision_id = ?").all(n);for(let r of t)e.run("DELETE FROM chunks_fts WHERE chunk_id = ?",[r.id]);return e.run("DELETE FROM chunks WHERE source_revision_id = ?",[n]),t.length}function Ht(e,n,t){let r=Te("src",n.sourceUri);e.run(`INSERT INTO sources (id, uri, kind, title, metadata_json, acl_json, created_at, updated_at)
269
+ `,_),c=r.lastIndexOf(". ",_),u=Math.max(a,c);if(u>s+Math.floor(n*0.5))E=u+(u===a?2:1)}let o=r.slice(s,E).trim();if(o)i.push({ordinal:i.length,text:o,startOffset:s,endOffset:E});if(E>=r.length)break;s=Math.max(0,E-t)}return i}function Jt(e){let n=e.trim().split(/\s+/).filter(Boolean).length;return Math.max(1,Math.ceil(n*1.25))}function Gt(e,n){let t=e.query("SELECT id FROM chunks WHERE source_revision_id = ?").all(n);for(let r of t)e.run("DELETE FROM chunks_fts WHERE chunk_id = ?",[r.id]);return e.run("DELETE FROM chunks WHERE source_revision_id = ?",[n]),t.length}function Vt(e,n,t){let r=me("src",n.sourceUri);e.run(`INSERT INTO sources (id, uri, kind, title, metadata_json, acl_json, created_at, updated_at)
270
270
  VALUES (?, ?, ?, ?, ?, ?, ?, ?)
271
271
  ON CONFLICT(uri) DO UPDATE SET
272
272
  kind = excluded.kind,
273
273
  title = excluded.title,
274
274
  metadata_json = excluded.metadata_json,
275
275
  acl_json = excluded.acl_json,
276
- updated_at = excluded.updated_at`,[r,n.sourceUri,n.kind,n.title,JSON.stringify(n.metadata),JSON.stringify(n.acl??{}),t,n.updatedAt]);let i=e.query("SELECT id FROM sources WHERE uri = ?").get(n.sourceUri);if(!i)throw Error(`Failed to upsert source: ${n.sourceUri}`);return i.id}function qt(e,n,t,r){let i=Te("rev",`${n}\x00${t.revision}`);e.run(`INSERT INTO source_revisions (id, source_id, revision, hash, extracted_text_uri, metadata_json, created_at)
276
+ updated_at = excluded.updated_at`,[r,n.sourceUri,n.kind,n.title,JSON.stringify(n.metadata),JSON.stringify(n.acl??{}),t,n.updatedAt]);let i=e.query("SELECT id FROM sources WHERE uri = ?").get(n.sourceUri);if(!i)throw Error(`Failed to upsert source: ${n.sourceUri}`);return i.id}function Qt(e,n,t,r){let i=me("rev",`${n}\x00${t.revision}`);e.run(`INSERT INTO source_revisions (id, source_id, revision, hash, extracted_text_uri, metadata_json, created_at)
277
277
  VALUES (?, ?, ?, ?, ?, ?, ?)
278
278
  ON CONFLICT(source_id, revision) DO UPDATE SET
279
279
  hash = excluded.hash,
280
280
  extracted_text_uri = excluded.extracted_text_uri,
281
- metadata_json = excluded.metadata_json`,[i,n,t.revision,t.hash,t.extractedTextUri,JSON.stringify(t.metadata),r]);let s=e.query("SELECT id FROM source_revisions WHERE source_id = ? AND revision = ?").get(n,t.revision);if(!s)throw Error(`Failed to upsert source revision: ${t.sourceRef}`);return s.id}function zt(e,n,t,r,i,s,l){if(!t.text||t.status.toLowerCase()==="deleted")return{chunksInserted:0,redactions:0};let T=te(t.text,l);if(T.findings.length>0)ne(e,{source_uri:t.sourceUri,findings:T.findings,metadata:{source_ref:t.sourceRef,revision:t.revision},created_at:r}),S(e,{event_type:"redaction",action:"source_text_redact",target_uri:t.sourceUri,decision:"redacted",metadata:{findings:T.findings.length,source_ref:t.sourceRef,revision:t.revision},created_at:r});let o=Bt(T.text,i,s);for(let a of o){let c=Te("chk",`${n}\x00${a.ordinal}\x00${a.text}`),u={source_ref:t.sourceRef,source_uri:t.sourceUri,hash:t.hash,status:t.status,path:y(t.raw.path)??null,mime:y(t.raw.mime)??y(t.raw.content_type)??null,size:xt(t.raw.size)??null};e.run(`INSERT INTO chunks (id, source_revision_id, kind, ordinal, text, token_count, start_offset, end_offset, metadata_json, created_at)
282
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,[c,n,"source",a.ordinal,a.text,Wt(a.text),a.startOffset,a.endOffset,JSON.stringify(u),r]),e.run("INSERT INTO chunks_fts (chunk_id, text, title, source_uri) VALUES (?, ?, ?, ?)",[c,a.text,t.title??"",t.sourceUri])}return{chunksInserted:o.length,redactions:T.findings.length}}async function Ce(e){let n=(e.now??new Date).toISOString(),t=e.maxChunkChars??4000,r=e.chunkOverlapChars??200;if(t<500)throw Error("maxChunkChars must be at least 500.");if(r<0||r>=t)throw Error("chunkOverlapChars must be less than maxChunkChars.");if(e.safetyPolicy)K(e.dbPath,e.safetyPolicy);D(e.dbPath);let i=await $t(e.input,e.config,e.safetyPolicy),s=Mt(i),l=I(e.dbPath);try{return l.transaction(()=>{let o=new Set,a=new Set,c=0,u=0,_=0,d=0;S(l,{event_type:"source_read",action:e.input.startsWith("s3://")?"s3_manifest_read":"local_manifest_read",target_uri:e.input,decision:"allow",metadata:{items:s.length,read_only:!0},created_at:n});for(let f of s){let p=Ft(f,n),E=Ht(l,p,n),O=qt(l,E,p,n);if(o.add(E),a.add(O),p.text||p.status.toLowerCase()==="deleted")u+=Pt(l,O);let h=zt(l,O,p,n,t,r,e.safetyPolicy);c+=h.chunksInserted,_+=h.redactions}return S(l,{event_type:"write",action:"knowledge_manifest_ingest",target_uri:e.dbPath,decision:"allow",metadata:{items:s.length,sources:o.size,revisions:a.size,chunks_inserted:c,redactions:_},created_at:n}),{path:e.input,db_path:e.dbPath,items_seen:s.length,sources_upserted:o.size,revisions_upserted:a.size,chunks_inserted:c,chunks_deleted:u,redactions:_,skipped:d}})()}finally{l.close()}}import{createHash as Yt,randomUUID as Jt}from"crypto";import{existsSync as Gt,readFileSync as Vt}from"fs";import{basename as Qt}from"path";function re(e,n){return`${e}_${Yt("sha256").update(n).digest("hex").slice(0,20)}`}function W(e){return e&&typeof e==="object"&&!Array.isArray(e)?e:void 0}function b(e){return typeof e==="string"&&e.length>0?e:void 0}function Zt(e){let n=b(e.source_ref)??b(e.source_uri)??b(e.uri);if(n)return n;let t=b(e.file_id);if(t){let s=b(e.revision_id)??b(e.revision),l=`open-files://file/${encodeURIComponent(t)}`;return s?`${l}/revision/${encodeURIComponent(s)}`:l}let r=b(e.source_id),i=b(e.path);if(r&&i)return`open-files://source/${encodeURIComponent(r)}/path/${encodeURIComponent(i)}`;throw Error("Outbox event is missing source_ref, file_id, or source_id/path.")}function en(e,n){if(n.kind==="open-files"&&n.entity==="file"&&n.revision_id)return e.replace(/\/revision\/[^/]+$/,"");return e}function tn(e){return b(e.hash)??b(e.checksum)??b(e.sha256)??null}function nn(e,n,t){return b(e.revision_id)??b(e.revision)??b(e.version_id)??(n.kind==="open-files"?n.revision_id:void 0)??t??null}function rn(e){return(b(e.event)??b(e.type)??b(e.action)??b(e.change_type)??"changed").toLowerCase()}function sn(e){let n=b(e.path);return b(e.title)??b(e.name)??(n?Qt(n):null)}function on(e,n){let t=Zt(e),r=j(t),i=tn(e);return{raw:e,eventType:rn(e),sourceRef:t,sourceUri:en(t,r),kind:r.kind,title:sn(e),revision:nn(e,r,i),hash:i,status:b(e.status)?.toLowerCase()??null,updatedAt:b(e.updated_at)??n,acl:e.permissions??e.acl??void 0}}function an(e){let n=e.trim();if(!n)return[];if(n.startsWith("[")){let t=JSON.parse(n);if(!Array.isArray(t))throw Error("Outbox array parse failed.");return t.map((r)=>{let i=W(r);if(!i)throw Error("Outbox array entries must be objects.");return i})}if(n.startsWith("{"))try{let t=JSON.parse(n),r=W(t);if(!r)throw Error("Outbox object parse failed.");if(Array.isArray(r.events))return r.events.map((i)=>{let s=W(i);if(!s)throw Error("Outbox events entries must be objects.");return s});if("source_ref"in r||"source_uri"in r||"file_id"in r)return[r]}catch(t){let r=n.split(/\r?\n/).filter((i)=>i.trim().length>0);if(r.length<=1)throw t;return r.map((i)=>{let s=W(JSON.parse(i));if(!s)throw Error("Outbox JSONL entries must be objects.");return s})}return n.split(/\r?\n/).filter((t)=>t.trim().length>0).map((t)=>{let r=W(JSON.parse(t));if(!r)throw Error("Outbox JSONL entries must be objects.");return r})}async function un(e,n,t){let r=new URL(e),i=r.hostname,s=decodeURIComponent(r.pathname.replace(/^\/+/,""));if(!i||!s)throw Error(`Invalid S3 outbox URI: ${e}`);if(t)$(e,t);let[{S3Client:l,GetObjectCommand:T},{fromIni:o}]=await Promise.all([import("@aws-sdk/client-s3"),import("@aws-sdk/credential-providers")]),a=n?.storage.type==="s3"&&n.storage.s3?.bucket===i?n.storage.s3:void 0,u=await new l({region:a?.region,credentials:a?.profile?o({profile:a.profile}):void 0,maxAttempts:a?.max_attempts}).send(new T({Bucket:i,Key:s}));if(!u.Body)return"";return await u.Body.transformToString()}async function cn(e,n,t){if(e.startsWith("s3://"))return un(e,n,t);if(!Gt(e))throw Error(`Outbox not found: ${e}`);return Vt(e,"utf8")}function Xe(e,n){let t={};if(e)try{t=W(JSON.parse(e))??{}}catch{t={}}return JSON.stringify({...t,...n})}function dn(e,n,t){let r=re("src",n.sourceUri);e.run(`INSERT INTO sources (id, uri, kind, title, metadata_json, acl_json, created_at, updated_at)
283
- VALUES (?, ?, ?, ?, ?, ?, ?, ?)
284
- ON CONFLICT(uri) DO UPDATE SET
285
- kind = excluded.kind,
286
- title = COALESCE(excluded.title, sources.title),
287
- updated_at = excluded.updated_at`,[r,n.sourceUri,n.kind,n.title,JSON.stringify({source_ref:n.sourceRef,source_uri:n.sourceUri,status:n.status,last_outbox_event:n.eventType}),JSON.stringify(n.acl??{}),t,n.updatedAt]);let i=e.query("SELECT id, metadata_json, acl_json FROM sources WHERE uri = ?").get(n.sourceUri);if(!i)throw Error(`Failed to upsert source for outbox event: ${n.sourceUri}`);let s={source_ref:n.sourceRef,source_uri:n.sourceUri,last_outbox_event:n.eventType,last_outbox_at:n.updatedAt};if(n.status)s.status=n.status;if(b(n.raw.path))s.path=n.raw.path;return e.run("UPDATE sources SET metadata_json = ?, acl_json = CASE WHEN ? IS NULL THEN acl_json ELSE ? END, updated_at = ? WHERE id = ?",[Xe(i.metadata_json,s),n.acl===void 0?null:JSON.stringify(n.acl),n.acl===void 0?null:JSON.stringify(n.acl),n.updatedAt,i.id]),i.id}function _n(e,n,t,r){if(!t.revision)return null;let i=re("rev",`${n}\x00${t.revision}`),s={source_ref:t.sourceRef,source_uri:t.sourceUri,status:t.status,last_outbox_event:t.eventType,reindex_required:!0};return e.run(`INSERT INTO source_revisions (id, source_id, revision, hash, extracted_text_uri, metadata_json, created_at)
288
- VALUES (?, ?, ?, ?, ?, ?, ?)
289
- ON CONFLICT(source_id, revision) DO UPDATE SET
290
- hash = COALESCE(excluded.hash, source_revisions.hash),
291
- metadata_json = excluded.metadata_json`,[i,n,t.revision,t.hash,b(t.raw.extracted_text_ref)??null,JSON.stringify(s),r]),e.query("SELECT id FROM source_revisions WHERE source_id = ? AND revision = ?").get(n,t.revision)?.id??null}function ln(e,n,t){if(t.revision)return e.query("SELECT id FROM source_revisions WHERE source_id = ? AND revision = ?").all(n,t.revision).map((r)=>r.id);if(t.hash)return e.query("SELECT id FROM source_revisions WHERE source_id = ? AND hash = ?").all(n,t.hash).map((r)=>r.id);return e.query("SELECT id FROM source_revisions WHERE source_id = ?").all(n).map((r)=>r.id)}function fn(e,n){let t=e.query("SELECT id FROM chunks WHERE source_revision_id = ?").all(n),r=0;for(let s of t){let l=e.query("SELECT COUNT(*) AS n FROM chunk_embeddings WHERE chunk_id = ?").get(s.id);r+=l?.n??0,e.run("DELETE FROM chunk_embeddings WHERE chunk_id = ?",[s.id]),e.run("DELETE FROM chunks_fts WHERE chunk_id = ?",[s.id])}e.run("DELETE FROM chunks WHERE source_revision_id = ?",[n]);let i=e.query("SELECT metadata_json FROM source_revisions WHERE id = ?").get(n);return e.run("UPDATE source_revisions SET metadata_json = ? WHERE id = ?",[Xe(i?.metadata_json,{reindex_required:!0,invalidated_at:new Date().toISOString()}),n]),{chunksDeleted:t.length,embeddingsDeleted:r}}function En(e,n){return n==="deleted"||["delete","deleted","remove","removed"].includes(e)}function Tn(e){return["move","moved","rename","renamed","path_changed"].includes(e)}function pn(e){return["permission","permissions","permission_changed","acl_changed"].includes(e)}async function je(e){let n=(e.now??new Date).toISOString();if(e.safetyPolicy)K(e.dbPath,e.safetyPolicy);D(e.dbPath);let t=await cn(e.input,e.config,e.safetyPolicy),r=an(t),i=I(e.dbPath),s=`run_${Jt()}`;try{return i.transaction(()=>{i.run(`INSERT INTO runs (id, type, prompt, status, provider, model, metadata_json, created_at, updated_at)
292
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,[s,"open-files-outbox",e.input,"completed","local","open-files-outbox",JSON.stringify({path:e.input,events:r.length}),n,n]);let l=new Set,T=new Set,o=0,a=0,c=0,u=0,_=0,d=0;return S(i,{event_type:"source_read",action:e.input.startsWith("s3://")?"s3_outbox_read":"local_outbox_read",target_uri:e.input,decision:"allow",metadata:{events:r.length,read_only:!0},created_at:n}),r.forEach((f,p)=>{let E=on(f,n),O=dn(i,E,n);l.add(O);let h=_n(i,O,E,n);if(h)T.add(h);let m=ln(i,O,E);for(let g of m){T.add(g);let x=fn(i,g);o+=x.chunksDeleted,a+=x.embeddingsDeleted,c+=1}if(En(E.eventType,E.status))u+=1;if(Tn(E.eventType))_+=1;if(pn(E.eventType)||E.acl!==void 0)d+=1;i.run(`INSERT INTO run_events (id, run_id, level, event, metadata_json, created_at)
293
- VALUES (?, ?, ?, ?, ?, ?)`,[re("evt",`${s}\x00${p}\x00${E.sourceRef}\x00${E.eventType}`),s,"info",E.eventType,JSON.stringify({source_ref:E.sourceRef,source_uri:E.sourceUri,revision:E.revision,hash:E.hash,status:E.status,affected_revisions:m.length}),E.updatedAt])}),i.run(`INSERT INTO provider_usage (id, run_id, provider, model, input_tokens, output_tokens, cost_usd, metadata_json, created_at)
294
- VALUES (?, ?, ?, ?, 0, 0, 0, ?, ?)`,[re("usage",s),s,"local","open-files-outbox",JSON.stringify({note:"No model provider used for outbox invalidation."}),n]),S(i,{event_type:"write",action:"knowledge_outbox_invalidation",target_uri:e.dbPath,decision:"allow",metadata:{run_id:s,events:r.length,sources:l.size,revisions:T.size,chunks_deleted:o,embeddings_deleted:a},created_at:n}),{path:e.input,db_path:e.dbPath,run_id:s,events_seen:r.length,sources_touched:l.size,revisions_touched:T.size,chunks_deleted:o,embeddings_deleted:a,stale_revisions:c,deleted_sources:u,moved_sources:_,permission_updates:d}})()}finally{i.close()}}function ie(e){if(!e)return{};try{let n=JSON.parse(e);return n&&typeof n==="object"&&!Array.isArray(n)?n:{}}catch{return{}}}function P(e,n){for(let t of n){let r=e[t];if(typeof r==="string"&&r.length>0)return r}return null}function Fe(e,n){for(let t of n){let r=e[t];if(typeof r==="number"&&Number.isFinite(r))return r}return null}function gn(e,n){let t=e.mode;if(typeof t==="string"&&t!=="read_only")throw Error(`Source resolver denied ${n}. Permission mode is ${t}, expected read_only.`);let r=e.denied_purposes;if(Array.isArray(r)&&r.includes(n))throw Error(`Source resolver denied ${n}. Purpose is explicitly denied.`);let i=e.allowed_purposes;if(Array.isArray(i)&&i.length>0&&!i.includes(n))throw Error(`Source resolver denied ${n}. Allowed purposes: ${i.join(", ")}`)}function yn(e,n,t){if(!n)return t;try{let r=j(e);if(r.kind==="open-files"&&r.entity==="file")return`${e}/revision/${encodeURIComponent(n.revision)}`}catch{return t}return t}function hn(e,n,t){return e.query(`SELECT id, uri, kind, title, metadata_json, acl_json, updated_at
281
+ metadata_json = excluded.metadata_json`,[i,n,t.revision,t.hash,t.extractedTextUri,JSON.stringify(t.metadata),r]);let s=e.query("SELECT id FROM source_revisions WHERE source_id = ? AND revision = ?").get(n,t.revision);if(!s)throw Error(`Failed to upsert source revision: ${t.sourceRef}`);return s.id}function Zt(e,n,t,r,i,s,_){if(!t.text||t.status.toLowerCase()==="deleted")return{chunksInserted:0,redactions:0};let E=re(t.text,_);if(E.findings.length>0)ie(e,{source_uri:t.sourceUri,findings:E.findings,metadata:{source_ref:t.sourceRef,revision:t.revision},created_at:r}),S(e,{event_type:"redaction",action:"source_text_redact",target_uri:t.sourceUri,decision:"redacted",metadata:{findings:E.findings.length,source_ref:t.sourceRef,revision:t.revision},created_at:r});let o=Yt(E.text,i,s);for(let a of o){let c=me("chk",`${n}\x00${a.ordinal}\x00${a.text}`),u={source_ref:t.sourceRef,source_uri:t.sourceUri,hash:t.hash,status:t.status,path:h(t.raw.path)??null,mime:h(t.raw.mime)??h(t.raw.content_type)??null,size:Ct(t.raw.size)??null};e.run(`INSERT INTO chunks (id, source_revision_id, kind, ordinal, text, token_count, start_offset, end_offset, metadata_json, created_at)
282
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,[c,n,"source",a.ordinal,a.text,Jt(a.text),a.startOffset,a.endOffset,JSON.stringify(u),r]),e.run("INSERT INTO chunks_fts (chunk_id, text, title, source_uri) VALUES (?, ?, ?, ?)",[c,a.text,t.title??"",t.sourceUri])}return{chunksInserted:o.length,redactions:E.findings.length}}async function Ke(e){let n=e.now??new Date;if(e.safetyPolicy)K(e.dbPath,e.safetyPolicy);D(e.dbPath);let t=await qt(e.input,e.config,e.safetyPolicy),r=zt(t);return Re({dbPath:e.dbPath,items:r,sourceLabel:e.input,safetyPolicy:e.safetyPolicy,now:n,maxChunkChars:e.maxChunkChars,chunkOverlapChars:e.chunkOverlapChars})}async function Re(e){let n=(e.now??new Date).toISOString(),t=e.maxChunkChars??4000,r=e.chunkOverlapChars??200;if(t<500)throw Error("maxChunkChars must be at least 500.");if(r<0||r>=t)throw Error("chunkOverlapChars must be less than maxChunkChars.");if(e.safetyPolicy)K(e.dbPath,e.safetyPolicy);D(e.dbPath);let i=C(e.dbPath);try{return i.transaction(()=>{let _=new Set,E=new Set,o=0,a=0,c=0,u=0;S(i,{event_type:"source_read",action:e.readAction??(e.sourceLabel.startsWith("s3://")?"s3_manifest_read":"local_manifest_read"),target_uri:e.sourceLabel,decision:"allow",metadata:{items:e.items.length,read_only:!0},created_at:n});for(let l of e.items){let d=Wt(l,n),f=Vt(i,d,n),T=Qt(i,f,d,n);if(_.add(f),E.add(T),d.text||d.status.toLowerCase()==="deleted")a+=Gt(i,T);let p=Zt(i,T,d,n,t,r,e.safetyPolicy);o+=p.chunksInserted,c+=p.redactions}return S(i,{event_type:"write",action:"knowledge_manifest_ingest",target_uri:e.dbPath,decision:"allow",metadata:{items:e.items.length,sources:_.size,revisions:E.size,chunks_inserted:o,redactions:c},created_at:n}),{path:e.sourceLabel,db_path:e.dbPath,items_seen:e.items.length,sources_upserted:_.size,revisions_upserted:E.size,chunks_inserted:o,chunks_deleted:a,redactions:c,skipped:u}})()}finally{i.close()}}import{createHash as an}from"crypto";import{existsSync as un,readFileSync as cn}from"fs";import{basename as ae}from"path";function se(e){if(!e)return{};try{let n=JSON.parse(e);return n&&typeof n==="object"&&!Array.isArray(n)?n:{}}catch{return{}}}function B(e,n){for(let t of n){let r=e[t];if(typeof r==="string"&&r.length>0)return r}return null}function Pe(e,n){for(let t of n){let r=e[t];if(typeof r==="number"&&Number.isFinite(r))return r}return null}function en(e,n){let t=e.mode;if(typeof t==="string"&&t!=="read_only")throw Error(`Source resolver denied ${n}. Permission mode is ${t}, expected read_only.`);let r=e.denied_purposes;if(Array.isArray(r)&&r.includes(n))throw Error(`Source resolver denied ${n}. Purpose is explicitly denied.`);let i=e.allowed_purposes;if(Array.isArray(i)&&i.length>0&&!i.includes(n))throw Error(`Source resolver denied ${n}. Allowed purposes: ${i.join(", ")}`)}function tn(e,n,t){if(!n)return t;try{let r=L(e);if(r.kind==="open-files"&&r.entity==="file")return`${e}/revision/${encodeURIComponent(n.revision)}`}catch{return t}return t}function nn(e,n,t){return e.query(`SELECT id, uri, kind, title, metadata_json, acl_json, updated_at
295
283
  FROM sources
296
284
  WHERE uri = ? OR uri = ?
297
285
  ORDER BY CASE WHEN uri = ? THEN 0 ELSE 1 END
298
- LIMIT 1`).get(n,t,n)??null}function Rn(e,n,t){if(t)return e.query(`SELECT id, revision, hash, extracted_text_uri, metadata_json, created_at
286
+ LIMIT 1`).get(n,t,n)??null}function rn(e,n,t){if(t)return e.query(`SELECT id, revision, hash, extracted_text_uri, metadata_json, created_at
299
287
  FROM source_revisions
300
288
  WHERE source_id = ? AND revision = ?
301
289
  LIMIT 1`).get(n,t)??null;return e.query(`SELECT id, revision, hash, extracted_text_uri, metadata_json, created_at
302
290
  FROM source_revisions
303
291
  WHERE source_id = ?
304
292
  ORDER BY created_at DESC, revision DESC
305
- LIMIT 1`).get(n)??null}function Nn(e,n){if(!n)return 0;return e.query("SELECT COUNT(*) AS n FROM chunks WHERE source_revision_id = ?").get(n)?.n??0}function bn(e,n,t){if(!n||t<=0)return[];return e.query(`SELECT id, kind, ordinal, text, token_count, start_offset, end_offset, metadata_json
293
+ LIMIT 1`).get(n)??null}function sn(e,n){if(!n)return 0;return e.query("SELECT COUNT(*) AS n FROM chunks WHERE source_revision_id = ?").get(n)?.n??0}function on(e,n,t){if(!n||t<=0)return[];return e.query(`SELECT id, kind, ordinal, text, token_count, start_offset, end_offset, metadata_json
306
294
  FROM chunks
307
295
  WHERE source_revision_id = ?
308
296
  ORDER BY ordinal ASC
309
- LIMIT ?`).all(n,t)}async function Me(e){let n=e.purpose??"knowledge_answer",t=Math.max(0,Math.min(e.limit??10,100)),r=(e.now??new Date).toISOString(),i=j(e.sourceRef),s=ke(e.sourceRef,i),l=xe(e.sourceRef);if(e.safetyPolicy){if(!e.safetyPolicy.readOnlySourceAccess)throw Error("Safety policy denied source resolution.");K(e.dbPath,e.safetyPolicy)}D(e.dbPath);let T=I(e.dbPath);try{return T.transaction(()=>{let o=hn(T,s,e.sourceRef);if(!o)return S(T,{event_type:"source_read",action:"open_files_resolve_missing",target_uri:e.sourceRef,decision:"allow",metadata:{purpose:n,read_only:!0,source_uri:s},created_at:r}),{source_ref:e.sourceRef,source_uri:s,purpose:n,read_only:!0,resolved:!1,resolver:{name:"open-files-read-only",mode:"local_catalog",contract:"open-files-knowledge-source-v1"},source:null,revision:null,content:{mime:null,size:null,hash:null,text_available:!1,chunks_total:0,chunks_returned:0,char_count_returned:0,extracted_text_ref:null,bytes_available:!1,bytes_exposed:!1},chunks:[],citations:[]};let a=ie(o.metadata_json),c=ie(o.acl_json);try{gn(c,n)}catch(g){throw S(T,{event_type:"source_read",action:"open_files_resolve",target_uri:e.sourceRef,decision:"deny",metadata:{purpose:n,read_only:!0,source_uri:o.uri,error:g instanceof Error?g.message:String(g)},created_at:r}),g}let u=Rn(T,o.id,l),_=ie(u?.metadata_json),d=Nn(T,u?.id??null),f=bn(T,u?.id??null,t),p=yn(o.uri,u,e.sourceRef),E=f.map((g)=>{let x=ie(g.metadata_json),R={resolver:"open-files-read-only",mode:"local_catalog",purpose:n,read_only:!0,source_ref:P(x,["source_ref"])??p,source_uri:o.uri,source_revision_id:u?.id??null,revision:u?.revision??null,hash:u?.hash??P(x,["hash"]),chunk_id:g.id,start_offset:g.start_offset,end_offset:g.end_offset,resolved_at:r};return{id:g.id,kind:g.kind,ordinal:g.ordinal,text:g.text,token_count:g.token_count,start_offset:g.start_offset,end_offset:g.end_offset,metadata:x,evidence:R}}),O=E.map((g)=>({source_ref:g.evidence.source_ref,source_uri:o.uri,chunk_id:g.id,quote:g.text.slice(0,500),start_offset:g.start_offset,end_offset:g.end_offset,evidence:g.evidence}));S(T,{event_type:"source_read",action:"open_files_resolve",target_uri:e.sourceRef,decision:"allow",metadata:{purpose:n,read_only:!0,source_uri:o.uri,revision:u?.revision??null,chunks_returned:E.length,chunks_total:d},created_at:r});let h=P(a,["mime","content_type"])??P(_,["mime","content_type"]),m=Fe(a,["size","size_bytes"])??Fe(_,["size","size_bytes"]);return{source_ref:p,source_uri:o.uri,purpose:n,read_only:!0,resolved:!0,resolver:{name:"open-files-read-only",mode:"local_catalog",contract:"open-files-knowledge-source-v1"},source:{id:o.id,uri:o.uri,kind:o.kind,title:o.title,metadata:a,permissions:c,updated_at:o.updated_at},revision:u?{id:u.id,revision:u.revision,hash:u.hash,extracted_text_uri:u.extracted_text_uri,metadata:_,created_at:u.created_at,reindex_required:_.reindex_required===!0}:null,content:{mime:h,size:m,hash:u?.hash??P(a,["hash","checksum","sha256"]),text_available:d>0,chunks_total:d,chunks_returned:E.length,char_count_returned:E.reduce((g,x)=>g+x.text.length,0),extracted_text_ref:u?.extracted_text_uri??P(_,["extracted_text_ref","extracted_text_uri"]),bytes_available:!1,bytes_exposed:!1},chunks:E,citations:O}})()}finally{T.close()}}var Y={name:"@hasna/knowledge",version:"0.2.8",description:"Agent-friendly local knowledge CLI with JSON output, pagination, and safe destructive actions",type:"module",bin:{"open-knowledge":"bin/open-knowledge.js","open-knowledge-mcp":"bin/open-knowledge-mcp.js"},files:["bin","src","docs","LICENSE","README.md"],scripts:{test:"bun test","test:cli":"bun test tests/cli.test.ts",build:"bun build --target=bun --outfile=bin/open-knowledge.js --minify --external @aws-sdk/client-s3 --external @aws-sdk/credential-providers src/cli.ts && bun build --target=bun --outfile=bin/open-knowledge-mcp.js --external @modelcontextprotocol/sdk src/mcp.js",prepublishOnly:"bun run build",postinstall:"bun run build"},keywords:["knowledge","cli","agents","json","notes","local","store"],license:"Apache-2.0",publishConfig:{registry:"https://registry.npmjs.org",access:"public"},repository:{type:"git",url:"git+https://github.com/hasna/knowledge.git"},bugs:{url:"https://github.com/hasna/knowledge/issues"},author:"Hasna Inc. <hasna@example.com>",engines:{bun:">=1.0",node:">=18"},dependencies:{"@aws-sdk/client-s3":"^3.1063.0","@aws-sdk/credential-providers":"^3.1063.0","@modelcontextprotocol/sdk":"^1.29.0",zod:"^4.3.6"},devDependencies:{"@types/bun":"^1.3.14"}};var Ke={debug:0,info:1,warn:2,error:3},On=()=>{if(process.env.DEBUG)return"debug";if(process.env.LOG_LEVEL==="debug")return"debug";if(process.env.LOG_LEVEL==="warn")return"warn";if(process.env.LOG_LEVEL==="error")return"error";return"info"};function M(e,n,t){if(Ke[e]<Ke[On()])return;let r={debug:"[DEBUG]",info:"[INFO]",warn:"[WARN]",error:"[ERROR]"}[e],i=t?`${r} ${n} ${JSON.stringify(t)}`:`${r} ${n}`;if(e==="error")console.error(i);else console.error(i)}var mn=["add","list","get","delete","update","archive","restore","upsert","untag","export","prune","dedupe","stats","paths","db","wiki","source","ingest","reindex","safety","help"],$e={ls:"list",rm:"delete",edit:"update",unarchive:"restore"};function wn(e){let n=[],t={};for(let r=0;r<e.length;r+=1){let i=e[r];if(!i.startsWith("-")){n.push(i);continue}switch(i){case"--json":t.json=!0;break;case"--yes":case"-y":t.yes=!0;break;case"--help":case"-h":t.help=!0;break;case"--version":case"-v":t.version=!0;break;case"--desc":t.desc=!0;break;case"--page":case"-p":t.page=Number(e[r+1]),r+=1;break;case"--limit":case"-l":t.limit=Number(e[r+1]),r+=1;break;case"--search":case"-s":t.search=e[r+1],r+=1;break;case"--sort":t.sort=e[r+1],r+=1;break;case"--id":t.id=e[r+1],r+=1;break;case"--store":t.store=e[r+1],r+=1;break;case"--title":t.title=e[r+1],r+=1;break;case"--content":t.content=e[r+1],r+=1;break;case"--url":t.url=e[r+1],r+=1;break;case"--tag":case"-t":t.tag=e[r+1],r+=1;break;case"--format":t.format=e[r+1],r+=1;break;case"--completions":t.completions=e[r+1],r+=1;break;case"--purpose":t.purpose=e[r+1],r+=1;break;case"--no-color":t.noColor=!0;break;case"--scope":t.scope=e[r+1],r+=1;break;case"--older-than":t.olderThan=Number(e[r+1]),r+=1;break;case"--empty":t.empty=!0;break;case"--archived":t.archived=!0;break;case"--include-archived":t.includeArchived=!0;break;default:throw Error(`Unknown flag: ${i}. Run 'open-knowledge --help' for valid options.`)}}return{positional:n,flags:t}}function Ln(e){if(!e)return"";return $e[e]??e}function kn(e,n){let t=Array.from({length:e.length+1},()=>Array(n.length+1).fill(0));for(let r=0;r<=e.length;r+=1)t[r][0]=r;for(let r=0;r<=n.length;r+=1)t[0][r]=r;for(let r=1;r<=e.length;r+=1)for(let i=1;i<=n.length;i+=1){let s=e[r-1]===n[i-1]?0:1;t[r][i]=Math.min(t[r-1][i]+1,t[r][i-1]+1,t[r-1][i-1]+s)}return t[e.length][n.length]}function xn(e){if(!e)return"";let n=[...mn,...Object.keys($e)],t="",r=Number.POSITIVE_INFINITY;for(let i of n){let s=kn(e,i);if(s<r)r=s,t=i}return r<=3?t:""}function vn(){console.log(`open-knowledge - local agent knowledge store
297
+ LIMIT ?`).all(n,t)}async function oe(e){let n=e.purpose??"knowledge_answer",t=Math.max(0,Math.min(e.limit??10,100)),r=(e.now??new Date).toISOString(),i=L(e.sourceRef),s=Ue(e.sourceRef,i),_=Ce(e.sourceRef);if(e.safetyPolicy){if(!e.safetyPolicy.readOnlySourceAccess)throw Error("Safety policy denied source resolution.");K(e.dbPath,e.safetyPolicy)}D(e.dbPath);let E=C(e.dbPath);try{return E.transaction(()=>{let o=nn(E,s,e.sourceRef);if(!o)return S(E,{event_type:"source_read",action:"open_files_resolve_missing",target_uri:e.sourceRef,decision:"allow",metadata:{purpose:n,read_only:!0,source_uri:s},created_at:r}),{source_ref:e.sourceRef,source_uri:s,purpose:n,read_only:!0,resolved:!1,resolver:{name:"open-files-read-only",mode:"local_catalog",contract:"open-files-knowledge-source-v1"},source:null,revision:null,content:{mime:null,size:null,hash:null,text_available:!1,chunks_total:0,chunks_returned:0,char_count_returned:0,extracted_text_ref:null,bytes_available:!1,bytes_exposed:!1},chunks:[],citations:[]};let a=se(o.metadata_json),c=se(o.acl_json);try{en(c,n)}catch(g){throw S(E,{event_type:"source_read",action:"open_files_resolve",target_uri:e.sourceRef,decision:"deny",metadata:{purpose:n,read_only:!0,source_uri:o.uri,error:g instanceof Error?g.message:String(g)},created_at:r}),g}let u=rn(E,o.id,_),l=se(u?.metadata_json),d=sn(E,u?.id??null),f=on(E,u?.id??null,t),T=tn(o.uri,u,e.sourceRef),p=f.map((g)=>{let k=se(g.metadata_json),y={resolver:"open-files-read-only",mode:"local_catalog",purpose:n,read_only:!0,source_ref:B(k,["source_ref"])??T,source_uri:o.uri,source_revision_id:u?.id??null,revision:u?.revision??null,hash:u?.hash??B(k,["hash"]),chunk_id:g.id,start_offset:g.start_offset,end_offset:g.end_offset,resolved_at:r};return{id:g.id,kind:g.kind,ordinal:g.ordinal,text:g.text,token_count:g.token_count,start_offset:g.start_offset,end_offset:g.end_offset,metadata:k,evidence:y}}),v=p.map((g)=>({source_ref:g.evidence.source_ref,source_uri:o.uri,chunk_id:g.id,quote:g.text.slice(0,500),start_offset:g.start_offset,end_offset:g.end_offset,evidence:g.evidence}));S(E,{event_type:"source_read",action:"open_files_resolve",target_uri:e.sourceRef,decision:"allow",metadata:{purpose:n,read_only:!0,source_uri:o.uri,revision:u?.revision??null,chunks_returned:p.length,chunks_total:d},created_at:r});let R=B(a,["mime","content_type"])??B(l,["mime","content_type"]),x=Pe(a,["size","size_bytes"])??Pe(l,["size","size_bytes"]);return{source_ref:T,source_uri:o.uri,purpose:n,read_only:!0,resolved:!0,resolver:{name:"open-files-read-only",mode:"local_catalog",contract:"open-files-knowledge-source-v1"},source:{id:o.id,uri:o.uri,kind:o.kind,title:o.title,metadata:a,permissions:c,updated_at:o.updated_at},revision:u?{id:u.id,revision:u.revision,hash:u.hash,extracted_text_uri:u.extracted_text_uri,metadata:l,created_at:u.created_at,reindex_required:l.reindex_required===!0}:null,content:{mime:R,size:x,hash:u?.hash??B(a,["hash","checksum","sha256"]),text_available:d>0,chunks_total:d,chunks_returned:p.length,char_count_returned:p.reduce((g,k)=>g+k.text.length,0),extracted_text_ref:u?.extracted_text_uri??B(l,["extracted_text_ref","extracted_text_uri"]),bytes_available:!1,bytes_exposed:!1},chunks:p,citations:v}})()}finally{E.close()}}function W(e){return`sha256:${an("sha256").update(e).digest("hex")}`}function dn(e){return e.replace(/<script[\s\S]*?<\/script>/gi," ").replace(/<style[\s\S]*?<\/style>/gi," ").replace(/<[^>]+>/g," ").replace(/&nbsp;/g," ").replace(/&amp;/g,"&").replace(/&lt;/g,"<").replace(/&gt;/g,">").replace(/\s+\n/g,`
298
+ `).replace(/\n\s+/g,`
299
+ `).replace(/[ \t]{2,}/g," ").trim()}async function ln(e,n,t){let r=new URL(e),i=r.hostname,s=decodeURIComponent(r.pathname.replace(/^\/+/,""));if(!i||!s)throw Error(`Invalid S3 source URI: ${e}`);if(t)F(e,t);let[{S3Client:_,GetObjectCommand:E},{fromIni:o}]=await Promise.all([import("@aws-sdk/client-s3"),import("@aws-sdk/credential-providers")]),a=n?.storage.type==="s3"&&n.storage.s3?.bucket===i?n.storage.s3:void 0,u=await new _({region:a?.region,credentials:a?.profile?o({profile:a.profile}):void 0,maxAttempts:a?.max_attempts}).send(new E({Bucket:i,Key:s}));if(!u.Body)return"";return await u.Body.transformToString()}async function _n(e,n){if(n)ne(n);let t=await fetch(e,{headers:{accept:"text/markdown,text/plain,text/html,application/json;q=0.8,*/*;q=0.5","user-agent":"@hasna/knowledge source-ingest"}});if(!t.ok)throw Error(`Web source read failed ${t.status}: ${e}`);let r=t.headers.get("content-type"),i=await t.text();return{text:r?.includes("html")?dn(i):i,mime:r}}function ue(e){if(e.kind==="file")return ae(e.path);if(e.kind==="s3")return ae(e.key);if(e.kind==="web")return ae(new URL(e.url).pathname)||e.url;return e.path?ae(e.path):e.id}async function $e(e,n,t){if(e.kind==="file"){if(!un(e.path))throw Error(`Source file not found: ${e.path}`);let r=cn(e.path,"utf8");return{text:r,contentSource:"file",title:ue(e),mime:"text/plain",size:r.length,hash:W(r),revision:null,extractedTextRef:null,metadata:{path:e.path},permissions:{mode:"read_only"}}}if(e.kind==="s3"){let r=await ln(e.uri,n,t);return{text:r,contentSource:"s3",title:ue(e),mime:"text/plain",size:r.length,hash:W(r),revision:null,extractedTextRef:null,metadata:{bucket:e.bucket,key:e.key},permissions:{mode:"read_only"}}}if(e.kind==="web"){let r=await _n(e.url,t);return{text:r.text,contentSource:"web",title:ue(e),mime:r.mime,size:r.text.length,hash:W(r.text),revision:null,extractedTextRef:null,metadata:{url:e.url},permissions:{mode:"read_only"}}}throw Error(`Direct source reading is not available for ${e.uri}`)}async function fn(e,n,t){if(e.startsWith("open-files://"))throw Error("Open-files extracted text refs require an open-files resolver API. Ingest an open-files manifest with extracted_text or an extracted_text_ref using file://, s3://, or https://.");let r=L(e);return{text:(await $e(r,n,t)).text,contentSource:"extracted_text_ref"}}async function En(e){let n=await oe({dbPath:e.dbPath,sourceRef:e.sourceRef,purpose:e.purpose??"knowledge_index",limit:100,safetyPolicy:e.safetyPolicy,now:e.now});if(!n.resolved)throw Error("Open-files source is not in the local knowledge catalog. Ingest an open-files manifest first or use the open-files resolver API.");if(n.revision?.extracted_text_uri&&!n.content.text_available){let r=await fn(n.revision.extracted_text_uri,e.config,e.safetyPolicy);return{text:r.text,contentSource:r.contentSource,title:n.source?.title??null,mime:n.content.mime,size:r.text.length,hash:n.revision.hash??W(r.text),revision:n.revision.revision,extractedTextRef:n.revision.extracted_text_uri,metadata:n.source?.metadata??{},permissions:n.source?.permissions??{mode:"read_only"}}}if(n.chunks.length===0)throw Error("Open-files source has no extracted text chunks yet. Ingest an open-files manifest with extracted_text or extracted_text_ref first.");let t=n.chunks.map((r)=>r.text).join(`
300
+
301
+ `);return{text:t,contentSource:"catalog_chunks",title:n.source?.title??null,mime:n.content.mime,size:t.length,hash:n.revision?.hash??W(t),revision:n.revision?.revision??null,extractedTextRef:n.revision?.extracted_text_uri??null,metadata:n.source?.metadata??{},permissions:n.source?.permissions??{mode:"read_only"}}}function pn(e,n,t,r){let i=t.hash??W(t.text),s={...t.metadata,source_ref:e,content_source:t.contentSource,read_only:!0},_={source_ref:e,name:t.title??ue(n),mime:t.mime??"text/plain",size:t.size??t.text.length,hash:i,revision:t.revision??i,status:"active",updated_at:new Date().toISOString(),permissions:{mode:"read_only",allowed_purposes:[r],...t.permissions},metadata:s,extracted_text_ref:t.extractedTextRef,extracted_text:t.text};if(n.kind==="open-files"){if(n.entity==="file")_.file_id=n.id;if(n.entity==="source")_.source_id=n.id,_.path=n.path}if(n.kind==="file")_.path=n.path;if(n.kind==="s3")_.path=n.key;if(n.kind==="web")_.url=n.url;return _}async function Be(e){let n=e.purpose??"knowledge_index",t=L(e.sourceRef),r=t.kind==="open-files"?await En(e):await $e(t,e.config,e.safetyPolicy),i=pn(e.sourceRef,t,r,n);return{...await Re({dbPath:e.dbPath,items:[i],sourceLabel:e.sourceRef,readAction:"source_ref_ingest_read",safetyPolicy:e.safetyPolicy,now:e.now}),source_ref:e.sourceRef,content_source:r.contentSource,read_only:!0,hash:String(i.hash)}}import{createHash as Tn,randomUUID as gn}from"crypto";import{existsSync as hn,readFileSync as yn}from"fs";import{basename as mn}from"path";function ce(e,n){return`${e}_${Tn("sha256").update(n).digest("hex").slice(0,20)}`}function z(e){return e&&typeof e==="object"&&!Array.isArray(e)?e:void 0}function b(e){return typeof e==="string"&&e.length>0?e:void 0}function Rn(e){let n=b(e.source_ref)??b(e.source_uri)??b(e.uri);if(n)return n;let t=b(e.file_id);if(t){let s=b(e.revision_id)??b(e.revision),_=`open-files://file/${encodeURIComponent(t)}`;return s?`${_}/revision/${encodeURIComponent(s)}`:_}let r=b(e.source_id),i=b(e.path);if(r&&i)return`open-files://source/${encodeURIComponent(r)}/path/${encodeURIComponent(i)}`;throw Error("Outbox event is missing source_ref, file_id, or source_id/path.")}function bn(e,n){if(n.kind==="open-files"&&n.entity==="file"&&n.revision_id)return e.replace(/\/revision\/[^/]+$/,"");return e}function Sn(e){return b(e.hash)??b(e.checksum)??b(e.sha256)??null}function xn(e,n,t){return b(e.revision_id)??b(e.revision)??b(e.version_id)??(n.kind==="open-files"?n.revision_id:void 0)??t??null}function Nn(e){return(b(e.event)??b(e.type)??b(e.action)??b(e.change_type)??"changed").toLowerCase()}function On(e){let n=b(e.path);return b(e.title)??b(e.name)??(n?mn(n):null)}function wn(e,n){let t=Rn(e),r=L(t),i=Sn(e);return{raw:e,eventType:Nn(e),sourceRef:t,sourceUri:bn(t,r),kind:r.kind,title:On(e),revision:xn(e,r,i),hash:i,status:b(e.status)?.toLowerCase()??null,updatedAt:b(e.updated_at)??n,acl:e.permissions??e.acl??void 0}}function kn(e){let n=e.trim();if(!n)return[];if(n.startsWith("[")){let t=JSON.parse(n);if(!Array.isArray(t))throw Error("Outbox array parse failed.");return t.map((r)=>{let i=z(r);if(!i)throw Error("Outbox array entries must be objects.");return i})}if(n.startsWith("{"))try{let t=JSON.parse(n),r=z(t);if(!r)throw Error("Outbox object parse failed.");if(Array.isArray(r.events))return r.events.map((i)=>{let s=z(i);if(!s)throw Error("Outbox events entries must be objects.");return s});if("source_ref"in r||"source_uri"in r||"file_id"in r)return[r]}catch(t){let r=n.split(/\r?\n/).filter((i)=>i.trim().length>0);if(r.length<=1)throw t;return r.map((i)=>{let s=z(JSON.parse(i));if(!s)throw Error("Outbox JSONL entries must be objects.");return s})}return n.split(/\r?\n/).filter((t)=>t.trim().length>0).map((t)=>{let r=z(JSON.parse(t));if(!r)throw Error("Outbox JSONL entries must be objects.");return r})}async function Ln(e,n,t){let r=new URL(e),i=r.hostname,s=decodeURIComponent(r.pathname.replace(/^\/+/,""));if(!i||!s)throw Error(`Invalid S3 outbox URI: ${e}`);if(t)F(e,t);let[{S3Client:_,GetObjectCommand:E},{fromIni:o}]=await Promise.all([import("@aws-sdk/client-s3"),import("@aws-sdk/credential-providers")]),a=n?.storage.type==="s3"&&n.storage.s3?.bucket===i?n.storage.s3:void 0,u=await new _({region:a?.region,credentials:a?.profile?o({profile:a.profile}):void 0,maxAttempts:a?.max_attempts}).send(new E({Bucket:i,Key:s}));if(!u.Body)return"";return await u.Body.transformToString()}async function vn(e,n,t){if(e.startsWith("s3://"))return Ln(e,n,t);if(!hn(e))throw Error(`Outbox not found: ${e}`);return yn(e,"utf8")}function We(e,n){let t={};if(e)try{t=z(JSON.parse(e))??{}}catch{t={}}return JSON.stringify({...t,...n})}function An(e,n,t){let r=ce("src",n.sourceUri);e.run(`INSERT INTO sources (id, uri, kind, title, metadata_json, acl_json, created_at, updated_at)
302
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
303
+ ON CONFLICT(uri) DO UPDATE SET
304
+ kind = excluded.kind,
305
+ title = COALESCE(excluded.title, sources.title),
306
+ updated_at = excluded.updated_at`,[r,n.sourceUri,n.kind,n.title,JSON.stringify({source_ref:n.sourceRef,source_uri:n.sourceUri,status:n.status,last_outbox_event:n.eventType}),JSON.stringify(n.acl??{}),t,n.updatedAt]);let i=e.query("SELECT id, metadata_json, acl_json FROM sources WHERE uri = ?").get(n.sourceUri);if(!i)throw Error(`Failed to upsert source for outbox event: ${n.sourceUri}`);let s={source_ref:n.sourceRef,source_uri:n.sourceUri,last_outbox_event:n.eventType,last_outbox_at:n.updatedAt};if(n.status)s.status=n.status;if(b(n.raw.path))s.path=n.raw.path;return e.run("UPDATE sources SET metadata_json = ?, acl_json = CASE WHEN ? IS NULL THEN acl_json ELSE ? END, updated_at = ? WHERE id = ?",[We(i.metadata_json,s),n.acl===void 0?null:JSON.stringify(n.acl),n.acl===void 0?null:JSON.stringify(n.acl),n.updatedAt,i.id]),i.id}function In(e,n,t,r){if(!t.revision)return null;let i=ce("rev",`${n}\x00${t.revision}`),s={source_ref:t.sourceRef,source_uri:t.sourceUri,status:t.status,last_outbox_event:t.eventType,reindex_required:!0};return e.run(`INSERT INTO source_revisions (id, source_id, revision, hash, extracted_text_uri, metadata_json, created_at)
307
+ VALUES (?, ?, ?, ?, ?, ?, ?)
308
+ ON CONFLICT(source_id, revision) DO UPDATE SET
309
+ hash = COALESCE(excluded.hash, source_revisions.hash),
310
+ metadata_json = excluded.metadata_json`,[i,n,t.revision,t.hash,b(t.raw.extracted_text_ref)??null,JSON.stringify(s),r]),e.query("SELECT id FROM source_revisions WHERE source_id = ? AND revision = ?").get(n,t.revision)?.id??null}function Dn(e,n,t){if(t.revision)return e.query("SELECT id FROM source_revisions WHERE source_id = ? AND revision = ?").all(n,t.revision).map((r)=>r.id);if(t.hash)return e.query("SELECT id FROM source_revisions WHERE source_id = ? AND hash = ?").all(n,t.hash).map((r)=>r.id);return e.query("SELECT id FROM source_revisions WHERE source_id = ?").all(n).map((r)=>r.id)}function Un(e,n){let t=e.query("SELECT id FROM chunks WHERE source_revision_id = ?").all(n),r=0;for(let s of t){let _=e.query("SELECT COUNT(*) AS n FROM chunk_embeddings WHERE chunk_id = ?").get(s.id);r+=_?.n??0,e.run("DELETE FROM chunk_embeddings WHERE chunk_id = ?",[s.id]),e.run("DELETE FROM chunks_fts WHERE chunk_id = ?",[s.id])}e.run("DELETE FROM chunks WHERE source_revision_id = ?",[n]);let i=e.query("SELECT metadata_json FROM source_revisions WHERE id = ?").get(n);return e.run("UPDATE source_revisions SET metadata_json = ? WHERE id = ?",[We(i?.metadata_json,{reindex_required:!0,invalidated_at:new Date().toISOString()}),n]),{chunksDeleted:t.length,embeddingsDeleted:r}}function Cn(e,n){return n==="deleted"||["delete","deleted","remove","removed"].includes(e)}function jn(e){return["move","moved","rename","renamed","path_changed"].includes(e)}function Xn(e){return["permission","permissions","permission_changed","acl_changed"].includes(e)}async function ze(e){let n=(e.now??new Date).toISOString();if(e.safetyPolicy)K(e.dbPath,e.safetyPolicy);D(e.dbPath);let t=await vn(e.input,e.config,e.safetyPolicy),r=kn(t),i=C(e.dbPath),s=`run_${gn()}`;try{return i.transaction(()=>{i.run(`INSERT INTO runs (id, type, prompt, status, provider, model, metadata_json, created_at, updated_at)
311
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,[s,"open-files-outbox",e.input,"completed","local","open-files-outbox",JSON.stringify({path:e.input,events:r.length}),n,n]);let _=new Set,E=new Set,o=0,a=0,c=0,u=0,l=0,d=0;return S(i,{event_type:"source_read",action:e.input.startsWith("s3://")?"s3_outbox_read":"local_outbox_read",target_uri:e.input,decision:"allow",metadata:{events:r.length,read_only:!0},created_at:n}),r.forEach((f,T)=>{let p=wn(f,n),v=An(i,p,n);_.add(v);let R=In(i,v,p,n);if(R)E.add(R);let x=Dn(i,v,p);for(let g of x){E.add(g);let k=Un(i,g);o+=k.chunksDeleted,a+=k.embeddingsDeleted,c+=1}if(Cn(p.eventType,p.status))u+=1;if(jn(p.eventType))l+=1;if(Xn(p.eventType)||p.acl!==void 0)d+=1;i.run(`INSERT INTO run_events (id, run_id, level, event, metadata_json, created_at)
312
+ VALUES (?, ?, ?, ?, ?, ?)`,[ce("evt",`${s}\x00${T}\x00${p.sourceRef}\x00${p.eventType}`),s,"info",p.eventType,JSON.stringify({source_ref:p.sourceRef,source_uri:p.sourceUri,revision:p.revision,hash:p.hash,status:p.status,affected_revisions:x.length}),p.updatedAt])}),i.run(`INSERT INTO provider_usage (id, run_id, provider, model, input_tokens, output_tokens, cost_usd, metadata_json, created_at)
313
+ VALUES (?, ?, ?, ?, 0, 0, 0, ?, ?)`,[ce("usage",s),s,"local","open-files-outbox",JSON.stringify({note:"No model provider used for outbox invalidation."}),n]),S(i,{event_type:"write",action:"knowledge_outbox_invalidation",target_uri:e.dbPath,decision:"allow",metadata:{run_id:s,events:r.length,sources:_.size,revisions:E.size,chunks_deleted:o,embeddings_deleted:a},created_at:n}),{path:e.input,db_path:e.dbPath,run_id:s,events_seen:r.length,sources_touched:_.size,revisions_touched:E.size,chunks_deleted:o,embeddings_deleted:a,stale_revisions:c,deleted_sources:u,moved_sources:l,permission_updates:d}})()}finally{i.close()}}var J={name:"@hasna/knowledge",version:"0.2.9",description:"Agent-friendly local knowledge CLI with JSON output, pagination, and safe destructive actions",type:"module",bin:{"open-knowledge":"bin/open-knowledge.js","open-knowledge-mcp":"bin/open-knowledge-mcp.js"},files:["bin","src","docs","LICENSE","README.md"],scripts:{test:"bun test","test:cli":"bun test tests/cli.test.ts",build:"bun build --target=bun --outfile=bin/open-knowledge.js --minify --external @aws-sdk/client-s3 --external @aws-sdk/credential-providers src/cli.ts && bun build --target=bun --outfile=bin/open-knowledge-mcp.js --external @modelcontextprotocol/sdk src/mcp.js",prepublishOnly:"bun run build",postinstall:"bun run build"},keywords:["knowledge","cli","agents","json","notes","local","store"],license:"Apache-2.0",publishConfig:{registry:"https://registry.npmjs.org",access:"public"},repository:{type:"git",url:"git+https://github.com/hasna/knowledge.git"},bugs:{url:"https://github.com/hasna/knowledge/issues"},author:"Hasna Inc. <hasna@example.com>",engines:{bun:">=1.0",node:">=18"},dependencies:{"@aws-sdk/client-s3":"^3.1063.0","@aws-sdk/credential-providers":"^3.1063.0","@modelcontextprotocol/sdk":"^1.29.0",zod:"^4.3.6"},devDependencies:{"@types/bun":"^1.3.14"}};var He={debug:0,info:1,warn:2,error:3},Mn=()=>{if(process.env.DEBUG)return"debug";if(process.env.LOG_LEVEL==="debug")return"debug";if(process.env.LOG_LEVEL==="warn")return"warn";if(process.env.LOG_LEVEL==="error")return"error";return"info"};function P(e,n,t){if(He[e]<He[Mn()])return;let r={debug:"[DEBUG]",info:"[INFO]",warn:"[WARN]",error:"[ERROR]"}[e],i=t?`${r} ${n} ${JSON.stringify(t)}`:`${r} ${n}`;if(e==="error")console.error(i);else console.error(i)}var Kn=["add","list","get","delete","update","archive","restore","upsert","untag","export","prune","dedupe","stats","paths","db","wiki","source","ingest","reindex","safety","help"],qe={ls:"list",rm:"delete",edit:"update",unarchive:"restore"};function Pn(e){let n=[],t={};for(let r=0;r<e.length;r+=1){let i=e[r];if(!i.startsWith("-")){n.push(i);continue}switch(i){case"--json":t.json=!0;break;case"--yes":case"-y":t.yes=!0;break;case"--help":case"-h":t.help=!0;break;case"--version":case"-v":t.version=!0;break;case"--desc":t.desc=!0;break;case"--page":case"-p":t.page=Number(e[r+1]),r+=1;break;case"--limit":case"-l":t.limit=Number(e[r+1]),r+=1;break;case"--search":case"-s":t.search=e[r+1],r+=1;break;case"--sort":t.sort=e[r+1],r+=1;break;case"--id":t.id=e[r+1],r+=1;break;case"--store":t.store=e[r+1],r+=1;break;case"--title":t.title=e[r+1],r+=1;break;case"--content":t.content=e[r+1],r+=1;break;case"--url":t.url=e[r+1],r+=1;break;case"--tag":case"-t":t.tag=e[r+1],r+=1;break;case"--format":t.format=e[r+1],r+=1;break;case"--completions":t.completions=e[r+1],r+=1;break;case"--purpose":t.purpose=e[r+1],r+=1;break;case"--no-color":t.noColor=!0;break;case"--scope":t.scope=e[r+1],r+=1;break;case"--older-than":t.olderThan=Number(e[r+1]),r+=1;break;case"--empty":t.empty=!0;break;case"--archived":t.archived=!0;break;case"--include-archived":t.includeArchived=!0;break;default:throw Error(`Unknown flag: ${i}. Run 'open-knowledge --help' for valid options.`)}}return{positional:n,flags:t}}function $n(e){if(!e)return"";return qe[e]??e}function Bn(e,n){let t=Array.from({length:e.length+1},()=>Array(n.length+1).fill(0));for(let r=0;r<=e.length;r+=1)t[r][0]=r;for(let r=0;r<=n.length;r+=1)t[0][r]=r;for(let r=1;r<=e.length;r+=1)for(let i=1;i<=n.length;i+=1){let s=e[r-1]===n[i-1]?0:1;t[r][i]=Math.min(t[r-1][i]+1,t[r][i-1]+1,t[r-1][i-1]+s)}return t[e.length][n.length]}function Wn(e){if(!e)return"";let n=[...Kn,...Object.keys(qe)],t="",r=Number.POSITIVE_INFINITY;for(let i of n){let s=Bn(e,i);if(s<r)r=s,t=i}return r<=3?t:""}function zn(){console.log(`open-knowledge - local agent knowledge store
310
314
 
311
315
  Usage:
312
316
  open-knowledge <command> [options]
@@ -330,6 +334,7 @@ Commands:
330
334
  wiki init Initialize scalable wiki/schema/index/log artifacts
331
335
  source resolve <source-ref> Resolve read-only source content and citation evidence
332
336
  ingest manifest <file|s3://> Ingest an open-files manifest into knowledge.db
337
+ ingest source <source-ref> Ingest a read-only source ref into knowledge.db
333
338
  reindex outbox <file|s3://> Consume open-files change events and invalidate chunks
334
339
  safety status|check|approve|audit|redact
335
340
  help [command] Show help
@@ -374,5 +379,5 @@ Export Options:
374
379
 
375
380
  Prune Options:
376
381
  --older-than <days> Remove items older than N days
377
- --empty Remove items with empty content`)}function An(e){if(e==="add"){console.log("Usage: open-knowledge add <title> <content> [--url <url>] [-t <tag>] [--json]");return}if(e==="list"||e==="ls"){console.log("Usage: open-knowledge list|ls [--format table|json] [-p <page>] [-l <limit>] [-s <search>] [-t <tag>] [--sort created|title] [--desc] [--json]");return}if(e==="get"){console.log("Usage: open-knowledge get --id <id> [--json]");return}if(e==="update"||e==="edit"){console.log("Usage: open-knowledge update|edit --id <id> [--title <title>] [--content <content>] [--url <url>] [-t <tag>] [--json]");return}if(e==="archive"){console.log("Usage: open-knowledge archive --id <id> [--json]");return}if(e==="restore"||e==="unarchive"){console.log("Usage: open-knowledge restore|unarchive --id <id> [--json]");return}if(e==="upsert"){console.log("Usage: open-knowledge upsert [title] [content] [--id <id>] [--title <title>] [--content <content>] [--url <url>] [-t <tag>] [--json]");return}if(e==="untag"){console.log("Usage: open-knowledge untag --id <id> -t <tag> [--json]");return}if(e==="delete"||e==="rm"){console.log("Usage: open-knowledge delete|rm --id <id> -y [--json]");return}if(e==="export"){console.log("Usage: open-knowledge export [--format jsonl] [--json]");return}if(e==="prune"){console.log("Usage: open-knowledge prune --yes [--older-than <days>] [--empty] [--json]");return}if(e==="dedupe"){console.log("Usage: open-knowledge dedupe --yes [--json]");return}if(e==="stats"){console.log("Usage: open-knowledge stats [--json]");return}if(e==="paths"){console.log("Usage: open-knowledge paths [--scope local|global|project] [--json]");return}if(e==="db"){console.log("Usage: open-knowledge db init|stats [--scope local|global|project] [--json]");return}if(e==="wiki"){console.log("Usage: open-knowledge wiki init [--scope local|global|project] [--json]");return}if(e==="source"){console.log("Usage: open-knowledge source resolve <source-ref> [--purpose knowledge_answer|knowledge_index] [--limit <n>] [--scope local|global|project] [--json]");return}if(e==="ingest"){console.log("Usage: open-knowledge ingest manifest <file|s3://bucket/key> [--scope local|global|project] [--json]");return}if(e==="reindex"){console.log("Usage: open-knowledge reindex outbox <file|s3://bucket/key> [--scope local|global|project] [--json]");return}if(e==="safety"){console.log("Usage: open-knowledge safety status|check|approve|audit|redact [args] [--scope local|global|project] [--json]");return}vn()}function In(e){if(e.noColor||process.env.NO_COLOR)return!1;if(process.env.FORCE_COLOR)return!0;return process.stdout.isTTY===!0}function N(e,n,t){if(n){console.log(JSON.stringify(e,null,2));return}if(typeof e==="string"){console.log(e);return}console.log(e.message??JSON.stringify(e,null,2))}function J(e){if(!e.id)throw Error("Missing required --id. Example: open-knowledge get --id <id>")}function Dn(e,n){let t=n.sort??"created";if(t!=="created"&&t!=="title")throw Error("Invalid --sort value. Use 'created' or 'title'.");let r=[...e].sort((i,s)=>{if(t==="title")return i.title.localeCompare(s.title);return i.created_at.localeCompare(s.created_at)});if(n.desc)r.reverse();return{sorted:r,sort:t,direction:n.desc?"desc":"asc"}}async function Un(e){let{positional:n,flags:t}=wn(e);if(M("debug","CLI invoked",{command:n[0],flags:{json:t.json,store:t.store}}),t.version){console.log(t.json?JSON.stringify({name:Y.name,version:Y.version},null,2):`${Y.name} ${Y.version}`);return}if(t.completions){let o=t.completions;if(o==="bash")console.log('_open_knowledge() { local cur; cur="${COMP_WORDS[COMP_CWORD]}"; COMPREPLY=($(compgen -W "add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki source ingest reindex safety help ls rm edit unarchive --json --yes --help --version --desc --page --limit --search --sort --id --store --title --content --url --tag --format --completions --purpose --no-color --scope --archived --include-archived" -- "$cur")); }; complete -F _open_knowledge open-knowledge');else if(o==="zsh")console.log(`#compdef open-knowledge
378
- _open_knowledge() { _arguments -C "1: :(add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki source ingest reindex safety help ls rm edit unarchive)" "(--json)--json" "(--yes)-y" "(--help)--help" "(--version)--version" "(--desc)--desc" "(--archived)--archived" "(--include-archived)--include-archived" "(-p --page)"{-p,--page}"[page number]:number:" "(-l --limit)"{-l,--limit}"[items per page]:number:" "(-s --search)"{-s,--search}"[search text]:text:" "(--sort)--sort"{created,title}:" "(--id)--id[item id]:id:" "(--store)--store[store path]:path:" "(--title)--title[new title]:" "(--content)--content[new content]:" "(--url)--url[source url]:" "(-t --tag)"{-t,--tag}"[tag]:tag:" "(--format)--format[json|jsonl]:" "(--completions)--completions[output completions]:shell:(bash zsh fish):" "(--purpose)--purpose[purpose]:" "(--no-color)--no-color[disable color]" "(--scope)--scope"{local,global,project}:" }; _open_knowledge`);else if(o==="fish")console.log('complete -c open-knowledge -f; complete -c open-knowledge -a "add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki source ingest reindex safety help ls rm edit unarchive"; complete -c open-knowledge -l json; complete -c open-knowledge -l yes -s y; complete -c open-knowledge -l help -s h; complete -c open-knowledge -l version -s v; complete -c open-knowledge -l desc; complete -c open-knowledge -l archived; complete -c open-knowledge -l include-archived; complete -c open-knowledge -s p -l page; complete -c open-knowledge -s l -l limit; complete -c open-knowledge -s s -l search; complete -c open-knowledge -l sort; complete -c open-knowledge -l id; complete -c open-knowledge -l store; complete -c open-knowledge -l title; complete -c open-knowledge -l content; complete -c open-knowledge -l url; complete -c open-knowledge -s t -l tag; complete -c open-knowledge -l format; complete -c open-knowledge -l completions; complete -c open-knowledge -l purpose; complete -c open-knowledge -l no-color; complete -c open-knowledge -l scope -a "local global project"');else throw Error("Invalid --completions value. Use 'bash', 'zsh', or 'fish'.");return}let r=Ln(n[0]);if(!r||t.help||r==="help"){An(n[1]);return}let i=ge(t.scope),s=t.store;if(!s)if(t.scope==="project"||t.scope==="local")s=X(i.home).jsonStorePath;else s=ce();if(r==="paths"){let o=X(i.home);N({ok:!0,scope:t.scope??"global",home:o.home,config_path:o.configPath,json_store_path:o.jsonStorePath,knowledge_db_path:o.knowledgeDbPath,artifacts_dir:o.artifactsDir,indexes_dir:o.indexesDir,logs_dir:o.logsDir,runs_dir:o.runsDir,schemas_dir:o.schemasDir,wiki_dir:o.wikiDir,config:F(o.configPath),message:o.home},t.json);return}if(r==="db"){let o=n[1]??"init",a=X(i.home);if(o!=="init"&&o!=="stats")throw Error("Invalid db action. Use 'init' or 'stats'.");if(o==="init"){let u=D(a.knowledgeDbPath);N({ok:!0,...u,message:`Initialized ${u.path}`},t.json);return}D(a.knowledgeDbPath);let c=Ne(a.knowledgeDbPath);N({ok:!0,path:a.knowledgeDbPath,...c,message:`knowledge.db schema v${c.schema_version}`},t.json);return}if(r==="wiki"){if((n[1]??"init")!=="init")throw Error("Invalid wiki action. Use 'init'.");let a=X(i.home),c=F(a.configPath),u=me(c,a),_=await we(u);N({ok:!0,..._,message:`Initialized wiki layout in ${a.home}`},t.json);return}if(r==="safety"){let o=n[1]??"status",a=X(i.home),c=F(a.configPath),u=z(c,a);D(a.knowledgeDbPath);let _=I(a.knowledgeDbPath);try{if(o==="status"){N({ok:!0,mode:u.mode,workspace:a.home,allow_write_roots:u.allowWriteRoots,read_only_source_access:u.readOnlySourceAccess,network:u.network,redaction:u.redaction,approvals:u.approvals,message:`Safety policy: ${u.mode}`},t.json);return}if(o==="check"){let d=n[2]??"generated_write",f=n[3]??null,p;try{if(d==="web_search")Ie(u),p={action:d,target_uri:f,approval_required:!1,approved:!0,decision:"allow"};else if(d==="s3_read"){if(!f)throw Error("safety check s3_read requires an s3:// target.");$(f,u),p={action:d,target_uri:f,approval_required:!1,approved:!0,decision:"allow"}}else p=Ue(_,u,d,f);S(_,{event_type:"safety_check",action:d,target_uri:f,decision:p.decision==="allow"?"allow":"requires_approval",metadata:p}),N({ok:!0,...p,message:`Safety check ${p.decision}`},t.json);return}catch(E){throw S(_,{event_type:"safety_check",action:d,target_uri:f,decision:"deny",metadata:{error:E instanceof Error?E.message:String(E)}}),E}}if(o==="approve"){let d=n[2]??"generated_write",f=n[3]??null,p=De(_,{action:d,target_uri:f,reason:"local-cli approval",metadata:{scope:t.scope??"global"}});S(_,{event_type:"approval",action:d,target_uri:f,decision:"allow",metadata:{approval_id:p.id}}),N({ok:!0,...p,action:d,target_uri:f,message:`Approved ${d}`},t.json);return}if(o==="audit"){let d=_.query("SELECT id, event_type, action, target_uri, decision, metadata_json, created_at FROM audit_events ORDER BY created_at DESC LIMIT 50").all().map((f)=>({id:f.id,event_type:f.event_type,action:f.action,target_uri:f.target_uri,decision:f.decision,metadata:JSON.parse(f.metadata_json),created_at:f.created_at}));N({ok:!0,events:d,message:`${d.length} audit event(s)`},t.json);return}if(o==="redact"){let d=n.slice(2).join(" ");if(!d)throw Error("Usage: open-knowledge safety redact <text>");let f=te(d,u);if(f.findings.length>0)ne(_,{source_uri:"safety://redact",findings:f.findings,metadata:{command:"safety redact"}});S(_,{event_type:"redaction",action:"safety_redact",target_uri:"safety://redact",decision:f.findings.length>0?"redacted":"allow",metadata:{findings:f.findings.length}}),N({ok:!0,text:f.text,findings:f.findings,message:`Redacted ${f.findings.length} finding(s)`},t.json);return}throw Error("Invalid safety action. Use 'status', 'check', 'approve', 'audit', or 'redact'.")}finally{_.close()}}if(r==="source"){if((n[1]??"")!=="resolve")throw Error("Invalid source action. Use 'resolve'.");let a=n[2];if(!a)throw Error("Usage: open-knowledge source resolve <source-ref>");let c=X(i.home),u=F(c.configPath),_=z(u,c),d=await Me({dbPath:c.knowledgeDbPath,sourceRef:a,purpose:t.purpose,limit:t.limit,safetyPolicy:_});N({ok:!0,...d,message:d.resolved?`Resolved ${d.source_ref} (${d.content.chunks_returned}/${d.content.chunks_total} chunks)`:`Source not indexed: ${a}`},t.json);return}if(r==="ingest"){if((n[1]??"")!=="manifest")throw Error("Invalid ingest action. Use 'manifest'.");let a=n[2];if(!a)throw Error("Usage: open-knowledge ingest manifest <file|s3://bucket/key>");let c=X(i.home),u=F(c.configPath),_=z(u,c),d=await Ce({dbPath:c.knowledgeDbPath,input:a,config:u,safetyPolicy:_});N({ok:!0,...d,message:`Ingested ${d.items_seen} manifest item(s)`},t.json);return}if(r==="reindex"){if((n[1]??"")!=="outbox")throw Error("Invalid reindex action. Use 'outbox'.");let a=n[2];if(!a)throw Error("Usage: open-knowledge reindex outbox <file|s3://bucket/key>");let c=X(i.home),u=F(c.configPath),_=z(u,c),d=await je({dbPath:c.knowledgeDbPath,input:a,config:u,safetyPolicy:_});N({ok:!0,...d,message:`Consumed ${d.events_seen} outbox event(s)`},t.json);return}if(de(s),r==="add"){let o=n[1],a=n[2];if(!o||!a)throw Error("Usage: open-knowledge add <title> <content>");k(s,()=>{let c=L(s),u={id:_e(),title:o,content:a,url:t.url??null,tags:t.tag?[t.tag]:[],created_at:new Date().toISOString(),updated_at:new Date().toISOString()};c.items.push(u),A(s,c),M("info","Item added",{id:u.id,title:u.title}),N({ok:!0,item:u,message:`Added ${u.id}`},t.json)});return}if(r==="list"){if(t.format!==void 0&&t.format!=="table"&&t.format!=="json")throw Error("Invalid --format value for list. Use 'table' or 'json'.");k(s,()=>{let o=L(s),a=Number.isFinite(t.page)&&t.page>0?t.page:1,c=Number.isFinite(t.limit)&&t.limit>0?t.limit:20,u=t.search?String(t.search).toLowerCase():"",_=t.tag?String(t.tag).toLowerCase():"",d=t.format==="table"||!t.json&&!t.format&&In(t),f=t.json||t.format==="json",p=o.items;if(t.archived)p=p.filter((R)=>R.archived===!0);else if(!t.includeArchived)p=p.filter((R)=>!R.archived);if(u)p=p.filter((R)=>R.title.toLowerCase().includes(u)||R.content.toLowerCase().includes(u));if(_)p=p.filter((R)=>R.tags&&R.tags.map((se)=>se.toLowerCase()).includes(_));let{sorted:E,sort:O,direction:h}=Dn(p,t),m=(a-1)*c,g=E.slice(m,m+c),x=Math.max(1,Math.ceil(E.length/c));if(f){N({ok:!0,page:a,limit:c,total:E.length,total_pages:x,sort:O,direction:h,items:g},!0);return}if(g.length===0){N(`No items found (search=${u||"none"}, tag=${_||"none"})`,!1);return}if(d){let R=(U)=>U,se=`${R("ID")} ${R("TITLE")} ${R("CREATED")} ${R("URL")} ${R("TAGS")}`;console.log(se);for(let U of g)console.log(`${U.id} ${R(U.title)} ${U.created_at} ${U.url?R(U.url):""} ${U.tags?.length?R(`[${U.tags.join(", ")}]`):""}`);console.log(`Page ${a}/${x} | showing ${g.length} of ${E.length} | sort=${O} ${h} | search=${u||"none"} | tag=${_||"none"}`)}else{for(let R of g)console.log(`${R.id} ${R.title} ${R.created_at}${R.url?` ${R.url}`:""}${R.tags?.length?` [${R.tags.join(", ")}]`:""}`);console.log(`Page ${a}/${x} | showing ${g.length} of ${E.length} | sort=${O} ${h} | search=${u||"none"} | tag=${_||"none"}`)}});return}if(r==="get"){J(t),k(s,()=>{let a=L(s).items.find((c)=>c.id===t.id||c.short_id===t.id);if(!a)throw Error(`Item not found: ${t.id}`);N({ok:!0,item:a,message:`${a.id}: ${a.title}`},t.json)});return}if(r==="update"){J(t),k(s,()=>{let o=L(s),a=o.items.findIndex((u)=>u.id===t.id||u.short_id===t.id);if(a===-1)throw Error(`Item not found: ${t.id}`);let c=o.items[a];if(t.title!==void 0)c.title=t.title;if(t.content!==void 0)c.content=t.content;if(t.url!==void 0)c.url=t.url;if(t.tag!==void 0){if(c.tags=c.tags||[],!c.tags.map((u)=>u.toLowerCase()).includes(t.tag.toLowerCase()))c.tags.push(t.tag)}c.updated_at=new Date().toISOString(),o.items[a]=c,A(s,o),N({ok:!0,item:c,message:`Updated ${c.id}`},t.json)});return}if(r==="archive"||r==="restore"){J(t),k(s,()=>{let o=L(s),a=o.items.findIndex((u)=>u.id===t.id||u.short_id===t.id);if(a===-1)throw Error(`Item not found: ${t.id}`);let c=o.items[a];c.archived=r==="archive",c.updated_at=new Date().toISOString(),o.items[a]=c,A(s,o),N({ok:!0,item:c,message:`${r==="archive"?"Archived":"Restored"} ${c.id}`},t.json)});return}if(r==="untag"){if(J(t),!t.tag)throw Error("Missing required --tag. Example: open-knowledge untag --id <id> -t <tag>");k(s,()=>{let o=L(s),a=o.items.findIndex((_)=>_.id===t.id||_.short_id===t.id);if(a===-1)throw Error(`Item not found: ${t.id}`);let c=o.items[a],u=c.tags?.length??0;c.tags=(c.tags??[]).filter((_)=>_.toLowerCase()!==t.tag.toLowerCase()),c.updated_at=new Date().toISOString(),o.items[a]=c,A(s,o),N({ok:!0,item:c,removed:u-c.tags.length,message:`Removed tag from ${c.id}`},t.json)});return}if(r==="upsert"){let o=t.title??n[1],a=t.content??n[2];k(s,()=>{let c=L(s),u=t.id?c.items.findIndex((f)=>f.id===t.id||f.short_id===t.id):-1,_=new Date().toISOString();if(u===-1){if(!o||!a)throw Error("New item requires title and content. Example: open-knowledge upsert <title> <content> [--id <id>]");let f=t.id??_e(),p={id:f,short_id:Re(f),title:o,content:a,url:t.url??null,tags:t.tag?[t.tag]:[],metadata:{},archived:!1,created_at:_,updated_at:_};c.items.push(p),A(s,c),N({ok:!0,created:!0,item:p,message:`Upserted ${p.id}`},t.json);return}let d=c.items[u];if(o!==void 0)d.title=o;if(a!==void 0)d.content=a;if(t.url!==void 0)d.url=t.url;if(t.tag!==void 0){if(d.tags=d.tags||[],!d.tags.map((f)=>f.toLowerCase()).includes(t.tag.toLowerCase()))d.tags.push(t.tag)}d.updated_at=_,c.items[u]=d,A(s,c),N({ok:!0,created:!1,item:d,message:`Upserted ${d.id}`},t.json)});return}if(r==="delete"){if(J(t),!t.yes)throw Error("Refusing delete without --yes. Re-run with: open-knowledge delete --id <id> --yes");k(s,()=>{let o=L(s),a=o.items.length;o.items=o.items.filter((u)=>u.id!==t.id&&u.short_id!==t.id);let c=a!==o.items.length;if(A(s,o),!c)throw Error(`Item not found: ${t.id}`);M("info","Item deleted",{id:t.id}),N({ok:!0,deleted_id:t.id,message:`Deleted ${t.id}`},t.json)});return}if(r==="export"){let o=t.format??"json";if(o!=="json"&&o!=="jsonl")throw Error("Invalid --format. Use 'json' or 'jsonl'.");k(s,()=>{let a=L(s);if(o==="jsonl")for(let c of a.items)console.log(JSON.stringify(c));else N({ok:!0,items:a.items},t.json)});return}if(r==="prune"){if(!t.yes)throw Error("Refusing prune without --yes. Re-run with: open-knowledge prune --yes [--older-than <days>] [--empty]");k(s,()=>{let o=L(s),a=o.items.length;if(t.olderThan!==void 0){let u=new Date;u.setDate(u.getDate()-t.olderThan),o.items=o.items.filter((_)=>new Date(_.created_at)>=u)}if(t.empty)o.items=o.items.filter((u)=>u.content.trim().length>0);let c=a-o.items.length;A(s,o),M("info","Prune completed",{pruned:c,remaining:o.items.length}),N({ok:!0,pruned:c,remaining:o.items.length,message:`Pruned ${c} item(s)`},t.json)});return}if(r==="dedupe"){if(!t.yes)throw Error("Refusing dedupe without --yes. Re-run with: open-knowledge dedupe --yes [--json]");k(s,()=>{let o=L(s),a=new Set,c=o.items.length;o.items=o.items.filter((_)=>{let d=`${_.title}\x00${_.content}`;if(a.has(d))return!1;return a.add(d),!0});let u=c-o.items.length;A(s,o),M("info","Dedupe completed",{removed:u,remaining:o.items.length}),N({ok:!0,removed:u,remaining:o.items.length,message:`Dedupe removed ${u} duplicate(s)`},t.json)});return}if(r==="stats"){k(s,()=>{let o=L(s),a=o.items.filter((h)=>!h.archived),c=a.length,u=o.items.length-c,_=a.filter((h)=>h.url).length,d=a.filter((h)=>h.tags&&h.tags.length>0).length,f=c>0?a.map((h)=>h.created_at).sort()[0]:null,p=c>0?a.map((h)=>h.created_at).sort()[c-1]:null,E={};for(let h of a)for(let m of h.tags||[])E[m]=(E[m]||0)+1;let O=Object.entries(E).sort((h,m)=>m[1]-h[1]).slice(0,5).map(([h,m])=>({tag:h,count:m}));N({ok:!0,total:c,archived:u,with_url:_,with_tags:d,oldest:f,newest:p,top_tags:O,message:`${c} items | ${_} with URL | ${d} with tags`},t.json)});return}let l=xn(n[0]),T=l?` Did you mean '${l}'?`:"";throw M("warn","Unknown command",{input:n[0],suggestion:l}),Error(`Unknown command: ${n[0]}.${T} Run 'open-knowledge --help' for available commands.`)}if(import.meta.main)Un(process.argv.slice(2)).catch((e)=>{let n=e instanceof Error?e.message:String(e);M("error","CLI error",{message:n,stack:e instanceof Error?e.stack:void 0}),console.error(`Error: ${n}`),process.exitCode=1});export{xn as suggestCommand,Dn as sortItems,Un as run,wn as parseArgs};
382
+ --empty Remove items with empty content`)}function Hn(e){if(e==="add"){console.log("Usage: open-knowledge add <title> <content> [--url <url>] [-t <tag>] [--json]");return}if(e==="list"||e==="ls"){console.log("Usage: open-knowledge list|ls [--format table|json] [-p <page>] [-l <limit>] [-s <search>] [-t <tag>] [--sort created|title] [--desc] [--json]");return}if(e==="get"){console.log("Usage: open-knowledge get --id <id> [--json]");return}if(e==="update"||e==="edit"){console.log("Usage: open-knowledge update|edit --id <id> [--title <title>] [--content <content>] [--url <url>] [-t <tag>] [--json]");return}if(e==="archive"){console.log("Usage: open-knowledge archive --id <id> [--json]");return}if(e==="restore"||e==="unarchive"){console.log("Usage: open-knowledge restore|unarchive --id <id> [--json]");return}if(e==="upsert"){console.log("Usage: open-knowledge upsert [title] [content] [--id <id>] [--title <title>] [--content <content>] [--url <url>] [-t <tag>] [--json]");return}if(e==="untag"){console.log("Usage: open-knowledge untag --id <id> -t <tag> [--json]");return}if(e==="delete"||e==="rm"){console.log("Usage: open-knowledge delete|rm --id <id> -y [--json]");return}if(e==="export"){console.log("Usage: open-knowledge export [--format jsonl] [--json]");return}if(e==="prune"){console.log("Usage: open-knowledge prune --yes [--older-than <days>] [--empty] [--json]");return}if(e==="dedupe"){console.log("Usage: open-knowledge dedupe --yes [--json]");return}if(e==="stats"){console.log("Usage: open-knowledge stats [--json]");return}if(e==="paths"){console.log("Usage: open-knowledge paths [--scope local|global|project] [--json]");return}if(e==="db"){console.log("Usage: open-knowledge db init|stats [--scope local|global|project] [--json]");return}if(e==="wiki"){console.log("Usage: open-knowledge wiki init [--scope local|global|project] [--json]");return}if(e==="source"){console.log("Usage: open-knowledge source resolve <source-ref> [--purpose knowledge_answer|knowledge_index] [--limit <n>] [--scope local|global|project] [--json]");return}if(e==="ingest"){console.log("Usage: open-knowledge ingest manifest <file|s3://bucket/key> | source <source-ref> [--purpose knowledge_index] [--scope local|global|project] [--json]");return}if(e==="reindex"){console.log("Usage: open-knowledge reindex outbox <file|s3://bucket/key> [--scope local|global|project] [--json]");return}if(e==="safety"){console.log("Usage: open-knowledge safety status|check|approve|audit|redact [args] [--scope local|global|project] [--json]");return}zn()}function qn(e){if(e.noColor||process.env.NO_COLOR)return!1;if(process.env.FORCE_COLOR)return!0;return process.stdout.isTTY===!0}function m(e,n,t){if(n){console.log(JSON.stringify(e,null,2));return}if(typeof e==="string"){console.log(e);return}console.log(e.message??JSON.stringify(e,null,2))}function G(e){if(!e.id)throw Error("Missing required --id. Example: open-knowledge get --id <id>")}function Yn(e,n){let t=n.sort??"created";if(t!=="created"&&t!=="title")throw Error("Invalid --sort value. Use 'created' or 'title'.");let r=[...e].sort((i,s)=>{if(t==="title")return i.title.localeCompare(s.title);return i.created_at.localeCompare(s.created_at)});if(n.desc)r.reverse();return{sorted:r,sort:t,direction:n.desc?"desc":"asc"}}async function Jn(e){let{positional:n,flags:t}=Pn(e);if(P("debug","CLI invoked",{command:n[0],flags:{json:t.json,store:t.store}}),t.version){console.log(t.json?JSON.stringify({name:J.name,version:J.version},null,2):`${J.name} ${J.version}`);return}if(t.completions){let o=t.completions;if(o==="bash")console.log('_open_knowledge() { local cur; cur="${COMP_WORDS[COMP_CWORD]}"; COMPREPLY=($(compgen -W "add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki source ingest reindex safety help ls rm edit unarchive --json --yes --help --version --desc --page --limit --search --sort --id --store --title --content --url --tag --format --completions --purpose --no-color --scope --archived --include-archived" -- "$cur")); }; complete -F _open_knowledge open-knowledge');else if(o==="zsh")console.log(`#compdef open-knowledge
383
+ _open_knowledge() { _arguments -C "1: :(add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki source ingest reindex safety help ls rm edit unarchive)" "(--json)--json" "(--yes)-y" "(--help)--help" "(--version)--version" "(--desc)--desc" "(--archived)--archived" "(--include-archived)--include-archived" "(-p --page)"{-p,--page}"[page number]:number:" "(-l --limit)"{-l,--limit}"[items per page]:number:" "(-s --search)"{-s,--search}"[search text]:text:" "(--sort)--sort"{created,title}:" "(--id)--id[item id]:id:" "(--store)--store[store path]:path:" "(--title)--title[new title]:" "(--content)--content[new content]:" "(--url)--url[source url]:" "(-t --tag)"{-t,--tag}"[tag]:tag:" "(--format)--format[json|jsonl]:" "(--completions)--completions[output completions]:shell:(bash zsh fish):" "(--purpose)--purpose[purpose]:" "(--no-color)--no-color[disable color]" "(--scope)--scope"{local,global,project}:" }; _open_knowledge`);else if(o==="fish")console.log('complete -c open-knowledge -f; complete -c open-knowledge -a "add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki source ingest reindex safety help ls rm edit unarchive"; complete -c open-knowledge -l json; complete -c open-knowledge -l yes -s y; complete -c open-knowledge -l help -s h; complete -c open-knowledge -l version -s v; complete -c open-knowledge -l desc; complete -c open-knowledge -l archived; complete -c open-knowledge -l include-archived; complete -c open-knowledge -s p -l page; complete -c open-knowledge -s l -l limit; complete -c open-knowledge -s s -l search; complete -c open-knowledge -l sort; complete -c open-knowledge -l id; complete -c open-knowledge -l store; complete -c open-knowledge -l title; complete -c open-knowledge -l content; complete -c open-knowledge -l url; complete -c open-knowledge -s t -l tag; complete -c open-knowledge -l format; complete -c open-knowledge -l completions; complete -c open-knowledge -l purpose; complete -c open-knowledge -l no-color; complete -c open-knowledge -l scope -a "local global project"');else throw Error("Invalid --completions value. Use 'bash', 'zsh', or 'fish'.");return}let r=$n(n[0]);if(!r||t.help||r==="help"){Hn(n[1]);return}let i=Se(t.scope),s=t.store;if(!s)if(t.scope==="project"||t.scope==="local")s=X(i.home).jsonStorePath;else s=Ee();if(r==="paths"){let o=X(i.home);m({ok:!0,scope:t.scope??"global",home:o.home,config_path:o.configPath,json_store_path:o.jsonStorePath,knowledge_db_path:o.knowledgeDbPath,artifacts_dir:o.artifactsDir,indexes_dir:o.indexesDir,logs_dir:o.logsDir,runs_dir:o.runsDir,schemas_dir:o.schemasDir,wiki_dir:o.wikiDir,config:M(o.configPath),message:o.home},t.json);return}if(r==="db"){let o=n[1]??"init",a=X(i.home);if(o!=="init"&&o!=="stats")throw Error("Invalid db action. Use 'init' or 'stats'.");if(o==="init"){let u=D(a.knowledgeDbPath);m({ok:!0,...u,message:`Initialized ${u.path}`},t.json);return}D(a.knowledgeDbPath);let c=we(a.knowledgeDbPath);m({ok:!0,path:a.knowledgeDbPath,...c,message:`knowledge.db schema v${c.schema_version}`},t.json);return}if(r==="wiki"){if((n[1]??"init")!=="init")throw Error("Invalid wiki action. Use 'init'.");let a=X(i.home),c=M(a.configPath),u=Ae(c,a),l=await Ie(u);m({ok:!0,...l,message:`Initialized wiki layout in ${a.home}`},t.json);return}if(r==="safety"){let o=n[1]??"status",a=X(i.home),c=M(a.configPath),u=Y(c,a);D(a.knowledgeDbPath);let l=C(a.knowledgeDbPath);try{if(o==="status"){m({ok:!0,mode:u.mode,workspace:a.home,allow_write_roots:u.allowWriteRoots,read_only_source_access:u.readOnlySourceAccess,network:u.network,redaction:u.redaction,approvals:u.approvals,message:`Safety policy: ${u.mode}`},t.json);return}if(o==="check"){let d=n[2]??"generated_write",f=n[3]??null,T;try{if(d==="web_search")ne(u),T={action:d,target_uri:f,approval_required:!1,approved:!0,decision:"allow"};else if(d==="s3_read"){if(!f)throw Error("safety check s3_read requires an s3:// target.");F(f,u),T={action:d,target_uri:f,approval_required:!1,approved:!0,decision:"allow"}}else T=Me(l,u,d,f);S(l,{event_type:"safety_check",action:d,target_uri:f,decision:T.decision==="allow"?"allow":"requires_approval",metadata:T}),m({ok:!0,...T,message:`Safety check ${T.decision}`},t.json);return}catch(p){throw S(l,{event_type:"safety_check",action:d,target_uri:f,decision:"deny",metadata:{error:p instanceof Error?p.message:String(p)}}),p}}if(o==="approve"){let d=n[2]??"generated_write",f=n[3]??null,T=Fe(l,{action:d,target_uri:f,reason:"local-cli approval",metadata:{scope:t.scope??"global"}});S(l,{event_type:"approval",action:d,target_uri:f,decision:"allow",metadata:{approval_id:T.id}}),m({ok:!0,...T,action:d,target_uri:f,message:`Approved ${d}`},t.json);return}if(o==="audit"){let d=l.query("SELECT id, event_type, action, target_uri, decision, metadata_json, created_at FROM audit_events ORDER BY created_at DESC LIMIT 50").all().map((f)=>({id:f.id,event_type:f.event_type,action:f.action,target_uri:f.target_uri,decision:f.decision,metadata:JSON.parse(f.metadata_json),created_at:f.created_at}));m({ok:!0,events:d,message:`${d.length} audit event(s)`},t.json);return}if(o==="redact"){let d=n.slice(2).join(" ");if(!d)throw Error("Usage: open-knowledge safety redact <text>");let f=re(d,u);if(f.findings.length>0)ie(l,{source_uri:"safety://redact",findings:f.findings,metadata:{command:"safety redact"}});S(l,{event_type:"redaction",action:"safety_redact",target_uri:"safety://redact",decision:f.findings.length>0?"redacted":"allow",metadata:{findings:f.findings.length}}),m({ok:!0,text:f.text,findings:f.findings,message:`Redacted ${f.findings.length} finding(s)`},t.json);return}throw Error("Invalid safety action. Use 'status', 'check', 'approve', 'audit', or 'redact'.")}finally{l.close()}}if(r==="source"){if((n[1]??"")!=="resolve")throw Error("Invalid source action. Use 'resolve'.");let a=n[2];if(!a)throw Error("Usage: open-knowledge source resolve <source-ref>");let c=X(i.home),u=M(c.configPath),l=Y(u,c),d=await oe({dbPath:c.knowledgeDbPath,sourceRef:a,purpose:t.purpose,limit:t.limit,safetyPolicy:l});m({ok:!0,...d,message:d.resolved?`Resolved ${d.source_ref} (${d.content.chunks_returned}/${d.content.chunks_total} chunks)`:`Source not indexed: ${a}`},t.json);return}if(r==="ingest"){let o=n[1]??"",a=X(i.home),c=M(a.configPath),u=Y(c,a);if(o==="manifest"){let l=n[2];if(!l)throw Error("Usage: open-knowledge ingest manifest <file|s3://bucket/key>");let d=await Ke({dbPath:a.knowledgeDbPath,input:l,config:c,safetyPolicy:u});m({ok:!0,...d,message:`Ingested ${d.items_seen} manifest item(s)`},t.json);return}if(o==="source"){let l=n[2];if(!l)throw Error("Usage: open-knowledge ingest source <source-ref>");let d=await Be({dbPath:a.knowledgeDbPath,sourceRef:l,purpose:t.purpose,config:c,safetyPolicy:u});m({ok:!0,...d,message:`Ingested source ${d.source_ref} (${d.chunks_inserted} chunks)`},t.json);return}throw Error("Invalid ingest action. Use 'manifest' or 'source'.")}if(r==="reindex"){if((n[1]??"")!=="outbox")throw Error("Invalid reindex action. Use 'outbox'.");let a=n[2];if(!a)throw Error("Usage: open-knowledge reindex outbox <file|s3://bucket/key>");let c=X(i.home),u=M(c.configPath),l=Y(u,c),d=await ze({dbPath:c.knowledgeDbPath,input:a,config:u,safetyPolicy:l});m({ok:!0,...d,message:`Consumed ${d.events_seen} outbox event(s)`},t.json);return}if(pe(s),r==="add"){let o=n[1],a=n[2];if(!o||!a)throw Error("Usage: open-knowledge add <title> <content>");w(s,()=>{let c=O(s),u={id:Te(),title:o,content:a,url:t.url??null,tags:t.tag?[t.tag]:[],created_at:new Date().toISOString(),updated_at:new Date().toISOString()};c.items.push(u),U(s,c),P("info","Item added",{id:u.id,title:u.title}),m({ok:!0,item:u,message:`Added ${u.id}`},t.json)});return}if(r==="list"){if(t.format!==void 0&&t.format!=="table"&&t.format!=="json")throw Error("Invalid --format value for list. Use 'table' or 'json'.");w(s,()=>{let o=O(s),a=Number.isFinite(t.page)&&t.page>0?t.page:1,c=Number.isFinite(t.limit)&&t.limit>0?t.limit:20,u=t.search?String(t.search).toLowerCase():"",l=t.tag?String(t.tag).toLowerCase():"",d=t.format==="table"||!t.json&&!t.format&&qn(t),f=t.json||t.format==="json",T=o.items;if(t.archived)T=T.filter((y)=>y.archived===!0);else if(!t.includeArchived)T=T.filter((y)=>!y.archived);if(u)T=T.filter((y)=>y.title.toLowerCase().includes(u)||y.content.toLowerCase().includes(u));if(l)T=T.filter((y)=>y.tags&&y.tags.map((de)=>de.toLowerCase()).includes(l));let{sorted:p,sort:v,direction:R}=Yn(T,t),x=(a-1)*c,g=p.slice(x,x+c),k=Math.max(1,Math.ceil(p.length/c));if(f){m({ok:!0,page:a,limit:c,total:p.length,total_pages:k,sort:v,direction:R,items:g},!0);return}if(g.length===0){m(`No items found (search=${u||"none"}, tag=${l||"none"})`,!1);return}if(d){let y=(j)=>j,de=`${y("ID")} ${y("TITLE")} ${y("CREATED")} ${y("URL")} ${y("TAGS")}`;console.log(de);for(let j of g)console.log(`${j.id} ${y(j.title)} ${j.created_at} ${j.url?y(j.url):""} ${j.tags?.length?y(`[${j.tags.join(", ")}]`):""}`);console.log(`Page ${a}/${k} | showing ${g.length} of ${p.length} | sort=${v} ${R} | search=${u||"none"} | tag=${l||"none"}`)}else{for(let y of g)console.log(`${y.id} ${y.title} ${y.created_at}${y.url?` ${y.url}`:""}${y.tags?.length?` [${y.tags.join(", ")}]`:""}`);console.log(`Page ${a}/${k} | showing ${g.length} of ${p.length} | sort=${v} ${R} | search=${u||"none"} | tag=${l||"none"}`)}});return}if(r==="get"){G(t),w(s,()=>{let a=O(s).items.find((c)=>c.id===t.id||c.short_id===t.id);if(!a)throw Error(`Item not found: ${t.id}`);m({ok:!0,item:a,message:`${a.id}: ${a.title}`},t.json)});return}if(r==="update"){G(t),w(s,()=>{let o=O(s),a=o.items.findIndex((u)=>u.id===t.id||u.short_id===t.id);if(a===-1)throw Error(`Item not found: ${t.id}`);let c=o.items[a];if(t.title!==void 0)c.title=t.title;if(t.content!==void 0)c.content=t.content;if(t.url!==void 0)c.url=t.url;if(t.tag!==void 0){if(c.tags=c.tags||[],!c.tags.map((u)=>u.toLowerCase()).includes(t.tag.toLowerCase()))c.tags.push(t.tag)}c.updated_at=new Date().toISOString(),o.items[a]=c,U(s,o),m({ok:!0,item:c,message:`Updated ${c.id}`},t.json)});return}if(r==="archive"||r==="restore"){G(t),w(s,()=>{let o=O(s),a=o.items.findIndex((u)=>u.id===t.id||u.short_id===t.id);if(a===-1)throw Error(`Item not found: ${t.id}`);let c=o.items[a];c.archived=r==="archive",c.updated_at=new Date().toISOString(),o.items[a]=c,U(s,o),m({ok:!0,item:c,message:`${r==="archive"?"Archived":"Restored"} ${c.id}`},t.json)});return}if(r==="untag"){if(G(t),!t.tag)throw Error("Missing required --tag. Example: open-knowledge untag --id <id> -t <tag>");w(s,()=>{let o=O(s),a=o.items.findIndex((l)=>l.id===t.id||l.short_id===t.id);if(a===-1)throw Error(`Item not found: ${t.id}`);let c=o.items[a],u=c.tags?.length??0;c.tags=(c.tags??[]).filter((l)=>l.toLowerCase()!==t.tag.toLowerCase()),c.updated_at=new Date().toISOString(),o.items[a]=c,U(s,o),m({ok:!0,item:c,removed:u-c.tags.length,message:`Removed tag from ${c.id}`},t.json)});return}if(r==="upsert"){let o=t.title??n[1],a=t.content??n[2];w(s,()=>{let c=O(s),u=t.id?c.items.findIndex((f)=>f.id===t.id||f.short_id===t.id):-1,l=new Date().toISOString();if(u===-1){if(!o||!a)throw Error("New item requires title and content. Example: open-knowledge upsert <title> <content> [--id <id>]");let f=t.id??Te(),T={id:f,short_id:Oe(f),title:o,content:a,url:t.url??null,tags:t.tag?[t.tag]:[],metadata:{},archived:!1,created_at:l,updated_at:l};c.items.push(T),U(s,c),m({ok:!0,created:!0,item:T,message:`Upserted ${T.id}`},t.json);return}let d=c.items[u];if(o!==void 0)d.title=o;if(a!==void 0)d.content=a;if(t.url!==void 0)d.url=t.url;if(t.tag!==void 0){if(d.tags=d.tags||[],!d.tags.map((f)=>f.toLowerCase()).includes(t.tag.toLowerCase()))d.tags.push(t.tag)}d.updated_at=l,c.items[u]=d,U(s,c),m({ok:!0,created:!1,item:d,message:`Upserted ${d.id}`},t.json)});return}if(r==="delete"){if(G(t),!t.yes)throw Error("Refusing delete without --yes. Re-run with: open-knowledge delete --id <id> --yes");w(s,()=>{let o=O(s),a=o.items.length;o.items=o.items.filter((u)=>u.id!==t.id&&u.short_id!==t.id);let c=a!==o.items.length;if(U(s,o),!c)throw Error(`Item not found: ${t.id}`);P("info","Item deleted",{id:t.id}),m({ok:!0,deleted_id:t.id,message:`Deleted ${t.id}`},t.json)});return}if(r==="export"){let o=t.format??"json";if(o!=="json"&&o!=="jsonl")throw Error("Invalid --format. Use 'json' or 'jsonl'.");w(s,()=>{let a=O(s);if(o==="jsonl")for(let c of a.items)console.log(JSON.stringify(c));else m({ok:!0,items:a.items},t.json)});return}if(r==="prune"){if(!t.yes)throw Error("Refusing prune without --yes. Re-run with: open-knowledge prune --yes [--older-than <days>] [--empty]");w(s,()=>{let o=O(s),a=o.items.length;if(t.olderThan!==void 0){let u=new Date;u.setDate(u.getDate()-t.olderThan),o.items=o.items.filter((l)=>new Date(l.created_at)>=u)}if(t.empty)o.items=o.items.filter((u)=>u.content.trim().length>0);let c=a-o.items.length;U(s,o),P("info","Prune completed",{pruned:c,remaining:o.items.length}),m({ok:!0,pruned:c,remaining:o.items.length,message:`Pruned ${c} item(s)`},t.json)});return}if(r==="dedupe"){if(!t.yes)throw Error("Refusing dedupe without --yes. Re-run with: open-knowledge dedupe --yes [--json]");w(s,()=>{let o=O(s),a=new Set,c=o.items.length;o.items=o.items.filter((l)=>{let d=`${l.title}\x00${l.content}`;if(a.has(d))return!1;return a.add(d),!0});let u=c-o.items.length;U(s,o),P("info","Dedupe completed",{removed:u,remaining:o.items.length}),m({ok:!0,removed:u,remaining:o.items.length,message:`Dedupe removed ${u} duplicate(s)`},t.json)});return}if(r==="stats"){w(s,()=>{let o=O(s),a=o.items.filter((R)=>!R.archived),c=a.length,u=o.items.length-c,l=a.filter((R)=>R.url).length,d=a.filter((R)=>R.tags&&R.tags.length>0).length,f=c>0?a.map((R)=>R.created_at).sort()[0]:null,T=c>0?a.map((R)=>R.created_at).sort()[c-1]:null,p={};for(let R of a)for(let x of R.tags||[])p[x]=(p[x]||0)+1;let v=Object.entries(p).sort((R,x)=>x[1]-R[1]).slice(0,5).map(([R,x])=>({tag:R,count:x}));m({ok:!0,total:c,archived:u,with_url:l,with_tags:d,oldest:f,newest:T,top_tags:v,message:`${c} items | ${l} with URL | ${d} with tags`},t.json)});return}let _=Wn(n[0]),E=_?` Did you mean '${_}'?`:"";throw P("warn","Unknown command",{input:n[0],suggestion:_}),Error(`Unknown command: ${n[0]}.${E} Run 'open-knowledge --help' for available commands.`)}if(import.meta.main)Jn(process.argv.slice(2)).catch((e)=>{let n=e instanceof Error?e.message:String(e);P("error","CLI error",{message:n,stack:e instanceof Error?e.stack:void 0}),console.error(`Error: ${n}`),process.exitCode=1});export{Wn as suggestCommand,Yn as sortItems,Jn as run,Pn as parseArgs};
@@ -92,6 +92,7 @@ The local resolver is exposed through:
92
92
 
93
93
  ```bash
94
94
  open-knowledge source resolve <source-ref> --purpose knowledge_answer --json
95
+ open-knowledge ingest source <source-ref> --purpose knowledge_index --json
95
96
  ```
96
97
 
97
98
  and the MCP tool `ok_resolve_source`. It reads the knowledge catalog only,
@@ -99,6 +100,11 @@ enforces the read-only purpose labels imported from `open-files`, returns source
99
100
  metadata, selected revision metadata, derived chunks, and citation evidence, and
100
101
  records an audit event. It never returns raw bytes or storage credentials.
101
102
 
103
+ `ingest source` uses the same boundary for indexing. It accepts `open-files://`,
104
+ `file://`, `s3://`, and `https://` refs, applies S3/web safety gates, converts
105
+ allowed extracted text into redacted chunks with offsets, records hashes and
106
+ revisions, and stores only derived knowledge records.
107
+
102
108
  In future hosted mode, the same result shape can be backed by a remote
103
109
  open-files resolver API. The local OSS package should keep using the shared
104
110
  service boundary so CLI, MCP, and SaaS wrappers do not grow separate permission
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hasna/knowledge",
3
- "version": "0.2.8",
3
+ "version": "0.2.9",
4
4
  "description": "Agent-friendly local knowledge CLI with JSON output, pagination, and safe destructive actions",
5
5
  "type": "module",
6
6
  "bin": {
package/src/cli.ts CHANGED
@@ -10,6 +10,7 @@ import { getKnowledgeDbStats, migrateKnowledgeDb, openKnowledgeDb } from './know
10
10
  import { createArtifactStore } from './artifact-store';
11
11
  import { initializeWikiLayout } from './wiki-layout';
12
12
  import { ingestOpenFilesManifest } from './manifest-ingest';
13
+ import { ingestSourceRef } from './source-ingest';
13
14
  import { consumeOpenFilesOutbox } from './outbox-consume';
14
15
  import { resolveOpenFilesSource } from './source-resolver';
15
16
  import { approvalStatus, assertS3ReadAllowed, assertWebSearchAllowed, createApprovalGate, recordAuditEvent, recordRedactionFindings, redactSecrets, resolveSafetyPolicy } from './safety';
@@ -170,6 +171,7 @@ Commands:
170
171
  wiki init Initialize scalable wiki/schema/index/log artifacts
171
172
  source resolve <source-ref> Resolve read-only source content and citation evidence
172
173
  ingest manifest <file|s3://> Ingest an open-files manifest into knowledge.db
174
+ ingest source <source-ref> Ingest a read-only source ref into knowledge.db
173
175
  reindex outbox <file|s3://> Consume open-files change events and invalidate chunks
174
176
  safety status|check|approve|audit|redact
175
177
  help [command] Show help
@@ -235,7 +237,7 @@ function printCommandHelp(command: string): void {
235
237
  if (command === 'db') { console.log('Usage: open-knowledge db init|stats [--scope local|global|project] [--json]'); return; }
236
238
  if (command === 'wiki') { console.log('Usage: open-knowledge wiki init [--scope local|global|project] [--json]'); return; }
237
239
  if (command === 'source') { console.log('Usage: open-knowledge source resolve <source-ref> [--purpose knowledge_answer|knowledge_index] [--limit <n>] [--scope local|global|project] [--json]'); return; }
238
- if (command === 'ingest') { console.log('Usage: open-knowledge ingest manifest <file|s3://bucket/key> [--scope local|global|project] [--json]'); return; }
240
+ if (command === 'ingest') { console.log('Usage: open-knowledge ingest manifest <file|s3://bucket/key> | source <source-ref> [--purpose knowledge_index] [--scope local|global|project] [--json]'); return; }
239
241
  if (command === 'reindex') { console.log('Usage: open-knowledge reindex outbox <file|s3://bucket/key> [--scope local|global|project] [--json]'); return; }
240
242
  if (command === 'safety') { console.log('Usage: open-knowledge safety status|check|approve|audit|redact [args] [--scope local|global|project] [--json]'); return; }
241
243
  printGlobalHelp();
@@ -509,20 +511,35 @@ async function run(argv: string[]): Promise<void> {
509
511
 
510
512
  if (command === 'ingest') {
511
513
  const action = positional[1] ?? '';
512
- if (action !== 'manifest') throw new Error("Invalid ingest action. Use 'manifest'.");
513
- const input = positional[2];
514
- if (!input) throw new Error('Usage: open-knowledge ingest manifest <file|s3://bucket/key>');
515
514
  const resolvedWorkspace = ensureKnowledgeWorkspace(workspace.home);
516
515
  const config = readKnowledgeConfig(resolvedWorkspace.configPath);
517
516
  const safetyPolicy = resolveSafetyPolicy(config, resolvedWorkspace);
518
- const result = await ingestOpenFilesManifest({
519
- dbPath: resolvedWorkspace.knowledgeDbPath,
520
- input,
521
- config,
522
- safetyPolicy,
523
- });
524
- output({ ok: true, ...result, message: `Ingested ${result.items_seen} manifest item(s)` }, flags.json);
525
- return;
517
+ if (action === 'manifest') {
518
+ const input = positional[2];
519
+ if (!input) throw new Error('Usage: open-knowledge ingest manifest <file|s3://bucket/key>');
520
+ const result = await ingestOpenFilesManifest({
521
+ dbPath: resolvedWorkspace.knowledgeDbPath,
522
+ input,
523
+ config,
524
+ safetyPolicy,
525
+ });
526
+ output({ ok: true, ...result, message: `Ingested ${result.items_seen} manifest item(s)` }, flags.json);
527
+ return;
528
+ }
529
+ if (action === 'source') {
530
+ const sourceRef = positional[2];
531
+ if (!sourceRef) throw new Error('Usage: open-knowledge ingest source <source-ref>');
532
+ const result = await ingestSourceRef({
533
+ dbPath: resolvedWorkspace.knowledgeDbPath,
534
+ sourceRef,
535
+ purpose: flags.purpose,
536
+ config,
537
+ safetyPolicy,
538
+ });
539
+ output({ ok: true, ...result, message: `Ingested source ${result.source_ref} (${result.chunks_inserted} chunks)` }, flags.json);
540
+ return;
541
+ }
542
+ throw new Error("Invalid ingest action. Use 'manifest' or 'source'.");
526
543
  }
527
544
 
528
545
  if (command === 'reindex') {
@@ -24,6 +24,17 @@ export interface ManifestIngestOptions {
24
24
  chunkOverlapChars?: number;
25
25
  }
26
26
 
27
+ export interface ManifestItemsIngestOptions {
28
+ dbPath: string;
29
+ items: ManifestObject[];
30
+ sourceLabel: string;
31
+ readAction?: string;
32
+ safetyPolicy?: SafetyPolicy;
33
+ now?: Date;
34
+ maxChunkChars?: number;
35
+ chunkOverlapChars?: number;
36
+ }
37
+
27
38
  export interface ManifestIngestResult {
28
39
  path: string;
29
40
  db_path: string;
@@ -36,7 +47,7 @@ export interface ManifestIngestResult {
36
47
  skipped: number;
37
48
  }
38
49
 
39
- type ManifestObject = Record<string, unknown>;
50
+ export type ManifestObject = Record<string, unknown>;
40
51
 
41
52
  interface NormalizedManifestItem {
42
53
  raw: ManifestObject;
@@ -405,6 +416,23 @@ function insertChunks(db: Database, sourceRevisionId: string, item: NormalizedMa
405
416
  }
406
417
 
407
418
  export async function ingestOpenFilesManifest(options: ManifestIngestOptions): Promise<ManifestIngestResult> {
419
+ const now = options.now ?? new Date();
420
+ if (options.safetyPolicy) assertWriteAllowed(options.dbPath, options.safetyPolicy);
421
+ migrateKnowledgeDb(options.dbPath);
422
+ const text = await readManifestInput(options.input, options.config, options.safetyPolicy);
423
+ const items = parseManifestText(text);
424
+ return ingestOpenFilesManifestItems({
425
+ dbPath: options.dbPath,
426
+ items,
427
+ sourceLabel: options.input,
428
+ safetyPolicy: options.safetyPolicy,
429
+ now,
430
+ maxChunkChars: options.maxChunkChars,
431
+ chunkOverlapChars: options.chunkOverlapChars,
432
+ });
433
+ }
434
+
435
+ export async function ingestOpenFilesManifestItems(options: ManifestItemsIngestOptions): Promise<ManifestIngestResult> {
408
436
  const now = (options.now ?? new Date()).toISOString();
409
437
  const maxChunkChars = options.maxChunkChars ?? 4000;
410
438
  const chunkOverlapChars = options.chunkOverlapChars ?? 200;
@@ -413,8 +441,6 @@ export async function ingestOpenFilesManifest(options: ManifestIngestOptions): P
413
441
 
414
442
  if (options.safetyPolicy) assertWriteAllowed(options.dbPath, options.safetyPolicy);
415
443
  migrateKnowledgeDb(options.dbPath);
416
- const text = await readManifestInput(options.input, options.config, options.safetyPolicy);
417
- const items = parseManifestText(text);
418
444
  const db = openKnowledgeDb(options.dbPath);
419
445
  try {
420
446
  const result = db.transaction(() => {
@@ -426,13 +452,13 @@ export async function ingestOpenFilesManifest(options: ManifestIngestOptions): P
426
452
  let skipped = 0;
427
453
  recordAuditEvent(db, {
428
454
  event_type: 'source_read',
429
- action: options.input.startsWith('s3://') ? 's3_manifest_read' : 'local_manifest_read',
430
- target_uri: options.input,
455
+ action: options.readAction ?? (options.sourceLabel.startsWith('s3://') ? 's3_manifest_read' : 'local_manifest_read'),
456
+ target_uri: options.sourceLabel,
431
457
  decision: 'allow',
432
- metadata: { items: items.length, read_only: true },
458
+ metadata: { items: options.items.length, read_only: true },
433
459
  created_at: now,
434
460
  });
435
- for (const raw of items) {
461
+ for (const raw of options.items) {
436
462
  const item = normalizeManifestItem(raw, now);
437
463
  const sourceId = upsertSource(db, item, now);
438
464
  const revisionId = upsertRevision(db, sourceId, item, now);
@@ -450,13 +476,13 @@ export async function ingestOpenFilesManifest(options: ManifestIngestOptions): P
450
476
  action: 'knowledge_manifest_ingest',
451
477
  target_uri: options.dbPath,
452
478
  decision: 'allow',
453
- metadata: { items: items.length, sources: seenSources.size, revisions: seenRevisions.size, chunks_inserted: chunksInserted, redactions },
479
+ metadata: { items: options.items.length, sources: seenSources.size, revisions: seenRevisions.size, chunks_inserted: chunksInserted, redactions },
454
480
  created_at: now,
455
481
  });
456
482
  return {
457
- path: options.input,
483
+ path: options.sourceLabel,
458
484
  db_path: options.dbPath,
459
- items_seen: items.length,
485
+ items_seen: options.items.length,
460
486
  sources_upserted: seenSources.size,
461
487
  revisions_upserted: seenRevisions.size,
462
488
  chunks_inserted: chunksInserted,
@@ -0,0 +1,268 @@
1
+ import { createHash } from 'node:crypto';
2
+ import { existsSync, readFileSync } from 'node:fs';
3
+ import { basename } from 'node:path';
4
+ import { ingestOpenFilesManifestItems, type ManifestIngestResult, type ManifestObject } from './manifest-ingest';
5
+ import { parseSourceRef, type SourceRef } from './source-ref';
6
+ import { resolveOpenFilesSource } from './source-resolver';
7
+ import type { KnowledgeConfig } from './workspace';
8
+ import { assertS3ReadAllowed, assertWebSearchAllowed, type SafetyPolicy } from './safety';
9
+
10
+ export interface SourceIngestOptions {
11
+ dbPath: string;
12
+ sourceRef: string;
13
+ purpose?: string;
14
+ config?: KnowledgeConfig;
15
+ safetyPolicy?: SafetyPolicy;
16
+ now?: Date;
17
+ }
18
+
19
+ export interface SourceIngestResult extends ManifestIngestResult {
20
+ source_ref: string;
21
+ content_source: 'catalog_chunks' | 'extracted_text_ref' | 'file' | 's3' | 'web';
22
+ read_only: true;
23
+ hash: string;
24
+ }
25
+
26
+ interface ResolvedText {
27
+ text: string;
28
+ contentSource: SourceIngestResult['content_source'];
29
+ title: string | null;
30
+ mime: string | null;
31
+ size: number | null;
32
+ hash: string | null;
33
+ revision: string | null;
34
+ extractedTextRef: string | null;
35
+ metadata: Record<string, unknown>;
36
+ permissions: Record<string, unknown>;
37
+ }
38
+
39
+ function sha256Text(text: string): string {
40
+ return `sha256:${createHash('sha256').update(text).digest('hex')}`;
41
+ }
42
+
43
+ function stripHtml(html: string): string {
44
+ return html
45
+ .replace(/<script[\s\S]*?<\/script>/gi, ' ')
46
+ .replace(/<style[\s\S]*?<\/style>/gi, ' ')
47
+ .replace(/<[^>]+>/g, ' ')
48
+ .replace(/&nbsp;/g, ' ')
49
+ .replace(/&amp;/g, '&')
50
+ .replace(/&lt;/g, '<')
51
+ .replace(/&gt;/g, '>')
52
+ .replace(/\s+\n/g, '\n')
53
+ .replace(/\n\s+/g, '\n')
54
+ .replace(/[ \t]{2,}/g, ' ')
55
+ .trim();
56
+ }
57
+
58
+ async function readS3Text(uri: string, config?: KnowledgeConfig, safetyPolicy?: SafetyPolicy): Promise<string> {
59
+ const parsed = new URL(uri);
60
+ const bucket = parsed.hostname;
61
+ const key = decodeURIComponent(parsed.pathname.replace(/^\/+/, ''));
62
+ if (!bucket || !key) throw new Error(`Invalid S3 source URI: ${uri}`);
63
+ if (safetyPolicy) assertS3ReadAllowed(uri, safetyPolicy);
64
+ const [{ S3Client, GetObjectCommand }, { fromIni }] = await Promise.all([
65
+ import('@aws-sdk/client-s3'),
66
+ import('@aws-sdk/credential-providers'),
67
+ ]);
68
+ const s3Config = config?.storage.type === 's3' && config.storage.s3?.bucket === bucket ? config.storage.s3 : undefined;
69
+ const client = new S3Client({
70
+ region: s3Config?.region,
71
+ credentials: s3Config?.profile ? fromIni({ profile: s3Config.profile }) : undefined,
72
+ maxAttempts: s3Config?.max_attempts,
73
+ });
74
+ const response = await client.send(new GetObjectCommand({ Bucket: bucket, Key: key }));
75
+ if (!response.Body) return '';
76
+ return await response.Body.transformToString();
77
+ }
78
+
79
+ async function readWebText(uri: string, safetyPolicy?: SafetyPolicy): Promise<{ text: string; mime: string | null }> {
80
+ if (safetyPolicy) assertWebSearchAllowed(safetyPolicy);
81
+ const response = await fetch(uri, {
82
+ headers: {
83
+ accept: 'text/markdown,text/plain,text/html,application/json;q=0.8,*/*;q=0.5',
84
+ 'user-agent': '@hasna/knowledge source-ingest',
85
+ },
86
+ });
87
+ if (!response.ok) throw new Error(`Web source read failed ${response.status}: ${uri}`);
88
+ const mime = response.headers.get('content-type');
89
+ const body = await response.text();
90
+ return { text: mime?.includes('html') ? stripHtml(body) : body, mime };
91
+ }
92
+
93
+ function titleForRef(parsed: SourceRef): string | null {
94
+ if (parsed.kind === 'file') return basename(parsed.path);
95
+ if (parsed.kind === 's3') return basename(parsed.key);
96
+ if (parsed.kind === 'web') return basename(new URL(parsed.url).pathname) || parsed.url;
97
+ return parsed.path ? basename(parsed.path) : parsed.id;
98
+ }
99
+
100
+ async function readDirectSourceText(parsed: SourceRef, config?: KnowledgeConfig, safetyPolicy?: SafetyPolicy): Promise<ResolvedText> {
101
+ if (parsed.kind === 'file') {
102
+ if (!existsSync(parsed.path)) throw new Error(`Source file not found: ${parsed.path}`);
103
+ const text = readFileSync(parsed.path, 'utf8');
104
+ return {
105
+ text,
106
+ contentSource: 'file',
107
+ title: titleForRef(parsed),
108
+ mime: 'text/plain',
109
+ size: text.length,
110
+ hash: sha256Text(text),
111
+ revision: null,
112
+ extractedTextRef: null,
113
+ metadata: { path: parsed.path },
114
+ permissions: { mode: 'read_only' },
115
+ };
116
+ }
117
+
118
+ if (parsed.kind === 's3') {
119
+ const text = await readS3Text(parsed.uri, config, safetyPolicy);
120
+ return {
121
+ text,
122
+ contentSource: 's3',
123
+ title: titleForRef(parsed),
124
+ mime: 'text/plain',
125
+ size: text.length,
126
+ hash: sha256Text(text),
127
+ revision: null,
128
+ extractedTextRef: null,
129
+ metadata: { bucket: parsed.bucket, key: parsed.key },
130
+ permissions: { mode: 'read_only' },
131
+ };
132
+ }
133
+
134
+ if (parsed.kind === 'web') {
135
+ const web = await readWebText(parsed.url, safetyPolicy);
136
+ return {
137
+ text: web.text,
138
+ contentSource: 'web',
139
+ title: titleForRef(parsed),
140
+ mime: web.mime,
141
+ size: web.text.length,
142
+ hash: sha256Text(web.text),
143
+ revision: null,
144
+ extractedTextRef: null,
145
+ metadata: { url: parsed.url },
146
+ permissions: { mode: 'read_only' },
147
+ };
148
+ }
149
+
150
+ throw new Error(`Direct source reading is not available for ${parsed.uri}`);
151
+ }
152
+
153
+ async function readTextRef(uri: string, config?: KnowledgeConfig, safetyPolicy?: SafetyPolicy): Promise<{ text: string; contentSource: SourceIngestResult['content_source'] }> {
154
+ if (uri.startsWith('open-files://')) {
155
+ throw new Error('Open-files extracted text refs require an open-files resolver API. Ingest an open-files manifest with extracted_text or an extracted_text_ref using file://, s3://, or https://.');
156
+ }
157
+ const parsed = parseSourceRef(uri);
158
+ const direct = await readDirectSourceText(parsed, config, safetyPolicy);
159
+ return { text: direct.text, contentSource: 'extracted_text_ref' };
160
+ }
161
+
162
+ async function readOpenFilesSourceText(options: SourceIngestOptions): Promise<ResolvedText> {
163
+ const resolved = await resolveOpenFilesSource({
164
+ dbPath: options.dbPath,
165
+ sourceRef: options.sourceRef,
166
+ purpose: options.purpose ?? 'knowledge_index',
167
+ limit: 100,
168
+ safetyPolicy: options.safetyPolicy,
169
+ now: options.now,
170
+ });
171
+ if (!resolved.resolved) {
172
+ throw new Error('Open-files source is not in the local knowledge catalog. Ingest an open-files manifest first or use the open-files resolver API.');
173
+ }
174
+ if (resolved.revision?.extracted_text_uri && !resolved.content.text_available) {
175
+ const textRef = await readTextRef(resolved.revision.extracted_text_uri, options.config, options.safetyPolicy);
176
+ return {
177
+ text: textRef.text,
178
+ contentSource: textRef.contentSource,
179
+ title: resolved.source?.title ?? null,
180
+ mime: resolved.content.mime,
181
+ size: textRef.text.length,
182
+ hash: resolved.revision.hash ?? sha256Text(textRef.text),
183
+ revision: resolved.revision.revision,
184
+ extractedTextRef: resolved.revision.extracted_text_uri,
185
+ metadata: resolved.source?.metadata ?? {},
186
+ permissions: resolved.source?.permissions ?? { mode: 'read_only' },
187
+ };
188
+ }
189
+ if (resolved.chunks.length === 0) {
190
+ throw new Error('Open-files source has no extracted text chunks yet. Ingest an open-files manifest with extracted_text or extracted_text_ref first.');
191
+ }
192
+ const text = resolved.chunks.map((chunk) => chunk.text).join('\n\n');
193
+ return {
194
+ text,
195
+ contentSource: 'catalog_chunks',
196
+ title: resolved.source?.title ?? null,
197
+ mime: resolved.content.mime,
198
+ size: text.length,
199
+ hash: resolved.revision?.hash ?? sha256Text(text),
200
+ revision: resolved.revision?.revision ?? null,
201
+ extractedTextRef: resolved.revision?.extracted_text_uri ?? null,
202
+ metadata: resolved.source?.metadata ?? {},
203
+ permissions: resolved.source?.permissions ?? { mode: 'read_only' },
204
+ };
205
+ }
206
+
207
+ function manifestItemForSource(sourceRef: string, parsed: SourceRef, resolved: ResolvedText, purpose: string): ManifestObject {
208
+ const hash = resolved.hash ?? sha256Text(resolved.text);
209
+ const metadata = {
210
+ ...resolved.metadata,
211
+ source_ref: sourceRef,
212
+ content_source: resolved.contentSource,
213
+ read_only: true,
214
+ };
215
+ const item: ManifestObject = {
216
+ source_ref: sourceRef,
217
+ name: resolved.title ?? titleForRef(parsed),
218
+ mime: resolved.mime ?? 'text/plain',
219
+ size: resolved.size ?? resolved.text.length,
220
+ hash,
221
+ revision: resolved.revision ?? hash,
222
+ status: 'active',
223
+ updated_at: new Date().toISOString(),
224
+ permissions: {
225
+ mode: 'read_only',
226
+ allowed_purposes: [purpose],
227
+ ...resolved.permissions,
228
+ },
229
+ metadata,
230
+ extracted_text_ref: resolved.extractedTextRef,
231
+ extracted_text: resolved.text,
232
+ };
233
+ if (parsed.kind === 'open-files') {
234
+ if (parsed.entity === 'file') item.file_id = parsed.id;
235
+ if (parsed.entity === 'source') {
236
+ item.source_id = parsed.id;
237
+ item.path = parsed.path;
238
+ }
239
+ }
240
+ if (parsed.kind === 'file') item.path = parsed.path;
241
+ if (parsed.kind === 's3') item.path = parsed.key;
242
+ if (parsed.kind === 'web') item.url = parsed.url;
243
+ return item;
244
+ }
245
+
246
+ export async function ingestSourceRef(options: SourceIngestOptions): Promise<SourceIngestResult> {
247
+ const purpose = options.purpose ?? 'knowledge_index';
248
+ const parsed = parseSourceRef(options.sourceRef);
249
+ const resolved = parsed.kind === 'open-files'
250
+ ? await readOpenFilesSourceText(options)
251
+ : await readDirectSourceText(parsed, options.config, options.safetyPolicy);
252
+ const item = manifestItemForSource(options.sourceRef, parsed, resolved, purpose);
253
+ const result = await ingestOpenFilesManifestItems({
254
+ dbPath: options.dbPath,
255
+ items: [item],
256
+ sourceLabel: options.sourceRef,
257
+ readAction: 'source_ref_ingest_read',
258
+ safetyPolicy: options.safetyPolicy,
259
+ now: options.now,
260
+ });
261
+ return {
262
+ ...result,
263
+ source_ref: options.sourceRef,
264
+ content_source: resolved.contentSource,
265
+ read_only: true,
266
+ hash: String(item.hash),
267
+ };
268
+ }