@hasna/knowledge 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -62,6 +62,9 @@ open-knowledge db init --scope project
62
62
 
63
63
  # Initialize scalable wiki/schema/index/log artifacts
64
64
  open-knowledge wiki init --scope project
65
+
66
+ # Ingest an open-files source manifest into the project SQLite catalog
67
+ open-knowledge ingest manifest ./open-files-manifest.jsonl --scope project --json
65
68
  ```
66
69
 
67
70
  ## Commands
@@ -160,6 +163,14 @@ Create starter generated-knowledge artifacts through the artifact store:
160
163
  `schemas/v1.md`, `indexes/root.md`, `wiki/README.md`, and a dated JSONL log
161
164
  partition.
162
165
 
166
+ ### ingest
167
+ ```bash
168
+ open-knowledge ingest manifest <file|s3://bucket/key> [--scope project] [--json]
169
+ ```
170
+ Import an open-files JSON or JSONL source manifest into `knowledge.db`. This
171
+ upserts sources and source revisions, stores hash/MIME/status/permission
172
+ metadata, and chunks embedded extracted text when the manifest includes it.
173
+
163
174
  ### help
164
175
  ```bash
165
176
  open-knowledge help [command]
@@ -13659,12 +13659,12 @@ import { existsSync as existsSync3, readFileSync as readFileSync3, writeFileSync
13659
13659
  // package.json
13660
13660
  var package_default = {
13661
13661
  name: "@hasna/knowledge",
13662
- version: "0.2.4",
13662
+ version: "0.2.5",
13663
13663
  description: "Agent-friendly local knowledge CLI with JSON output, pagination, and safe destructive actions",
13664
13664
  type: "module",
13665
13665
  bin: {
13666
- "open-knowledge": "./bin/open-knowledge.js",
13667
- "open-knowledge-mcp": "./bin/open-knowledge-mcp.js"
13666
+ "open-knowledge": "bin/open-knowledge.js",
13667
+ "open-knowledge-mcp": "bin/open-knowledge-mcp.js"
13668
13668
  },
13669
13669
  files: [
13670
13670
  "bin",
@@ -13696,7 +13696,7 @@ var package_default = {
13696
13696
  },
13697
13697
  repository: {
13698
13698
  type: "git",
13699
- url: "https://github.com/hasna/knowledge"
13699
+ url: "git+https://github.com/hasna/knowledge.git"
13700
13700
  },
13701
13701
  bugs: {
13702
13702
  url: "https://github.com/hasna/knowledge/issues"
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env bun
2
2
  // @bun
3
- var F=import.meta.require;import{readFileSync as $,writeFileSync as B,existsSync as G,renameSync as de,unlinkSync as ee}from"fs";import{randomUUID as ne}from"crypto";import{existsSync as _e,mkdirSync as H,readFileSync as ye,writeFileSync as pe}from"fs";import{homedir as m}from"os";import{dirname as Oe,join as O,resolve as Re}from"path";var Ae=O(".hasna","apps","knowledge");function J(){return O(m(),".open-knowledge","db.json")}function V(){return O(m(),".hasna","apps","knowledge")}function Xe(n=process.cwd()){return Re(n,Ae)}function h(n){return{home:n,configPath:O(n,"config.json"),jsonStorePath:O(n,"db.json"),knowledgeDbPath:O(n,"knowledge.db"),artifactsDir:O(n,"artifacts"),cacheDir:O(n,"cache"),exportsDir:O(n,"exports"),indexesDir:O(n,"indexes"),logsDir:O(n,"logs"),runsDir:O(n,"runs"),schemasDir:O(n,"schemas"),wikiDir:O(n,"wiki")}}function Ue(){return{version:1,mode:"local",storage:{type:"local",artifacts_root:"artifacts"},sources:{preferred_ref:"open-files",allowed_schemes:["open-files","s3","file","https","http"]}}}function j(n){let i=h(n);H(i.home,{recursive:!0});for(let e of[i.artifactsDir,i.cacheDir,i.exportsDir,i.indexesDir,i.logsDir,i.runsDir,i.schemasDir,i.wikiDir])H(e,{recursive:!0});if(!_e(i.configPath))pe(i.configPath,`${JSON.stringify(Ue(),null,2)}
4
- `);return i}function l(n,i=process.cwd()){if(n==="project"||n==="local")return h(Xe(i));return h(V())}function M(n){H(Oe(n),{recursive:!0})}function W(n){let i=ye(n,"utf8");return JSON.parse(i)}function q(){return h(V()).jsonStorePath}function Z(n){if(!G(n))if(M(n),n===q()&&G(J()))B(n,$(J(),"utf8"));else B(n,JSON.stringify({items:[]},null,2))}function ke(n){return`${n}.lock`}function De(n,i){let N=Date.now();while(Date.now()-N<5000){try{if(!G(n)){B(n,JSON.stringify({owner:i,ts:Date.now()}));return}let S=JSON.parse($(n,"utf8"));if(Date.now()-S.ts>1e4)ee(n)}catch{}let s=Date.now();while(Date.now()-s<50);}throw Error(`Could not acquire lock on ${n} after 5000ms`)}function we(n,i){try{if(G(n)){if(JSON.parse($(n,"utf8")).owner===i)ee(n)}}catch{}}function R(n){Z(n);let i=$(n,"utf8"),e=JSON.parse(i);if(!e||!Array.isArray(e.items))return{items:[]};return e}function U(n,i){let e=`${n}.tmp.${ne()}`;B(e,JSON.stringify(i,null,2)),de(e,n)}function A(n,i){let e=ne(),r=ke(n);De(r,e);try{return i()}finally{we(r,e)}}function v(){return`k_${Date.now().toString(36)}_${Math.random().toString(36).slice(2,8)}`}function ie(n){return n.replace(/^k_/,"").slice(0,12)}import{Database as Ie}from"bun:sqlite";var Se=`
3
+ var I=import.meta.require;import{readFileSync as Y,writeFileSync as B,existsSync as $,renameSync as Ae,unlinkSync as se}from"fs";import{randomUUID as oe}from"crypto";import{existsSync as ye,mkdirSync as J,readFileSync as Re,writeFileSync as Oe}from"fs";import{homedir as re}from"os";import{dirname as ke,join as R,resolve as le}from"path";var we=R(".hasna","apps","knowledge");function H(){return R(re(),".open-knowledge","db.json")}function Q(){return R(re(),".hasna","apps","knowledge")}function Ue(e=process.cwd()){return le(e,we)}function D(e){return{home:e,configPath:R(e,"config.json"),jsonStorePath:R(e,"db.json"),knowledgeDbPath:R(e,"knowledge.db"),artifactsDir:R(e,"artifacts"),cacheDir:R(e,"cache"),exportsDir:R(e,"exports"),indexesDir:R(e,"indexes"),logsDir:R(e,"logs"),runsDir:R(e,"runs"),schemasDir:R(e,"schemas"),wikiDir:R(e,"wiki")}}function Se(){return{version:1,mode:"local",storage:{type:"local",artifacts_root:"artifacts"},sources:{preferred_ref:"open-files",allowed_schemes:["open-files","s3","file","https","http"]}}}function b(e){let t=D(e);J(t.home,{recursive:!0});for(let n of[t.artifactsDir,t.cacheDir,t.exportsDir,t.indexesDir,t.logsDir,t.runsDir,t.schemasDir,t.wikiDir])J(n,{recursive:!0});if(!ye(t.configPath))Oe(t.configPath,`${JSON.stringify(Se(),null,2)}
4
+ `);return t}function ie(e,t=process.cwd()){if(e==="project"||e==="local")return D(Ue(t));return D(Q())}function K(e){J(ke(e),{recursive:!0})}function v(e){let t=Re(e,"utf8");return JSON.parse(t)}function V(){return D(Q()).jsonStorePath}function q(e){if(!$(e))if(K(e),e===V()&&$(H()))B(e,Y(H(),"utf8"));else B(e,JSON.stringify({items:[]},null,2))}function Ie(e){return`${e}.lock`}function xe(e,t){let c=Date.now();while(Date.now()-c<5000){try{if(!$(e)){B(e,JSON.stringify({owner:t,ts:Date.now()}));return}let d=JSON.parse(Y(e,"utf8"));if(Date.now()-d.ts>1e4)se(e)}catch{}let s=Date.now();while(Date.now()-s<50);}throw Error(`Could not acquire lock on ${e} after 5000ms`)}function Xe(e,t){try{if($(e)){if(JSON.parse(Y(e,"utf8")).owner===t)se(e)}}catch{}}function k(e){q(e);let t=Y(e,"utf8"),n=JSON.parse(t);if(!n||!Array.isArray(n.items))return{items:[]};return n}function w(e,t){let n=`${e}.tmp.${oe()}`;B(n,JSON.stringify(t,null,2)),Ae(n,e)}function l(e,t){let n=oe(),r=Ie(e);xe(r,n);try{return t()}finally{Xe(r,n)}}function P(){return`k_${Date.now().toString(36)}_${Math.random().toString(36).slice(2,8)}`}function ce(e){return e.replace(/^k_/,"").slice(0,12)}import{Database as be}from"bun:sqlite";var ge=`
5
5
  PRAGMA journal_mode = WAL;
6
6
  PRAGMA foreign_keys = ON;
7
7
 
@@ -168,7 +168,20 @@ CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
168
168
 
169
169
  INSERT OR IGNORE INTO schema_versions(version, applied_at)
170
170
  VALUES (1, datetime('now'));
171
- `;function re(n){M(n);let i=new Ie(n);return i.exec("PRAGMA foreign_keys = ON;"),i}function P(n){let i=re(n);try{return i.exec(Se),{path:n,schema_version:te(i)}}finally{i.close()}}function te(n){return n.query("SELECT MAX(version) AS version FROM schema_versions").get()?.version??0}function w(n,i){return n.query(`SELECT COUNT(*) AS n FROM ${i}`).get()?.n??0}function Te(n){let i=re(n);try{return{schema_version:te(i),sources:w(i,"sources"),source_revisions:w(i,"source_revisions"),chunks:w(i,"chunks"),wiki_pages:w(i,"wiki_pages"),citations:w(i,"citations"),indexes:w(i,"knowledge_indexes"),runs:w(i,"runs"),run_events:w(i,"run_events")}}finally{i.close()}}import{existsSync as Ke,mkdirSync as Ee,readFileSync as Ce,writeFileSync as Fe}from"fs";import{dirname as he,join as a,relative as je,sep as Ye}from"path";function Y(n){let i=n.replace(/\\/g,"/").trim();if(!i||i.startsWith("/"))throw Error(`Invalid artifact key: ${n}`);let e=i.split("/").filter(Boolean);if(e.length===0||e.some((r)=>r==="."||r===".."))throw Error(`Invalid artifact key: ${n}`);return e.join("/")}function f(n,i){let e=je(n,i);if(e.startsWith("..")||e===".."||e.startsWith(`..${Ye}`))throw Error(`Artifact path escapes root: ${i}`)}class se{root;type="local";canRead=!0;canWrite=!0;constructor(n){this.root=n;Ee(n,{recursive:!0})}async put(n){let i=Y(n.key),e=a(this.root,i);return f(this.root,e),Ee(he(e),{recursive:!0}),Fe(e,n.body),{key:i,uri:`file://${e}`}}async getText(n){let i=Y(n),e=a(this.root,i);return f(this.root,e),Ce(e,"utf8")}async exists(n){let i=Y(n),e=a(this.root,i);return f(this.root,e),Ke(e)}}class oe{options;type="s3";canRead=!0;canWrite=!0;client;constructor(n){this.options=n;this.client=n.client}async getClient(){if(this.client)return this.client;let[{S3Client:n},{fromIni:i}]=await Promise.all([import("@aws-sdk/client-s3"),import("@aws-sdk/credential-providers")]);return this.client=new n({region:this.options.region,credentials:this.options.profile?i({profile:this.options.profile}):void 0,maxAttempts:this.options.max_attempts}),this.client}objectKey(n){let i=Y(n),e=this.options.prefix?Y(this.options.prefix):"";return e?`${e}/${i}`:i}async put(n){let[{PutObjectCommand:i},e]=await Promise.all([import("@aws-sdk/client-s3"),this.getClient()]),r=this.objectKey(n.key);return await e.send(new i({Bucket:this.options.bucket,Key:r,Body:n.body,ContentType:n.content_type,Metadata:n.metadata,ServerSideEncryption:this.options.server_side_encryption,SSEKMSKeyId:this.options.kms_key_id})),{key:r,uri:`s3://${this.options.bucket}/${r}`}}async getText(n){let[{GetObjectCommand:i},e]=await Promise.all([import("@aws-sdk/client-s3"),this.getClient()]),r=this.objectKey(n),N=await e.send(new i({Bucket:this.options.bucket,Key:r}));if(!N.Body)return"";return await N.Body.transformToString()}async exists(n){let[{HeadObjectCommand:i},e]=await Promise.all([import("@aws-sdk/client-s3"),this.getClient()]),r=this.objectKey(n);try{return await e.send(new i({Bucket:this.options.bucket,Key:r})),!0}catch(N){let s=N instanceof Error?N.name:"";if(s==="NotFound"||s==="NoSuchKey"||s==="NotFoundError")return!1;throw N}}}function Ne(n,i){if(n.storage.type==="s3"){if(!n.storage.s3?.bucket)throw Error("S3 artifact storage requires storage.s3.bucket");return new oe({bucket:n.storage.s3.bucket,prefix:n.storage.s3.prefix,region:n.storage.s3.region,profile:n.storage.s3.profile,max_attempts:n.storage.s3.max_attempts,server_side_encryption:n.storage.s3.server_side_encryption,kms_key_id:n.storage.s3.kms_key_id})}return new se(i.artifactsDir)}function be(n){let i=String(n.getUTCFullYear()),e=String(n.getUTCMonth()+1).padStart(2,"0"),r=String(n.getUTCDate()).padStart(2,"0");return{year:i,month:e,day:r}}function xe(){return`# Knowledge Agent Schema v1
171
+ `,Ce=`
172
+ DROP TABLE IF EXISTS chunks_fts;
173
+
174
+ CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
175
+ chunk_id UNINDEXED,
176
+ text,
177
+ title,
178
+ source_uri,
179
+ tokenize='porter unicode61'
180
+ );
181
+
182
+ INSERT OR IGNORE INTO schema_versions(version, applied_at)
183
+ VALUES (2, datetime('now'));
184
+ `;function G(e){K(e);let t=new be(e);return t.exec("PRAGMA foreign_keys = ON;"),t}function m(e){let t=G(e);try{if(t.exec(ge),Z(t)<2)t.exec(Ce);return{path:e,schema_version:Z(t)}}finally{t.close()}}function Z(e){return e.query("SELECT MAX(version) AS version FROM schema_versions").get()?.version??0}function A(e,t){return e.query(`SELECT COUNT(*) AS n FROM ${t}`).get()?.n??0}function ue(e){let t=G(e);try{return{schema_version:Z(t),sources:A(t,"sources"),source_revisions:A(t,"source_revisions"),chunks:A(t,"chunks"),wiki_pages:A(t,"wiki_pages"),citations:A(t,"citations"),indexes:A(t,"knowledge_indexes"),runs:A(t,"runs"),run_events:A(t,"run_events")}}finally{t.close()}}import{existsSync as De,mkdirSync as Te,readFileSync as me,writeFileSync as Fe}from"fs";import{dirname as Me,join as ee,relative as je,sep as Ke}from"path";function F(e){let t=e.replace(/\\/g,"/").trim();if(!t||t.startsWith("/"))throw Error(`Invalid artifact key: ${e}`);let n=t.split("/").filter(Boolean);if(n.length===0||n.some((r)=>r==="."||r===".."))throw Error(`Invalid artifact key: ${e}`);return n.join("/")}function ne(e,t){let n=je(e,t);if(n.startsWith("..")||n===".."||n.startsWith(`..${Ke}`))throw Error(`Artifact path escapes root: ${t}`)}class Ee{root;type="local";canRead=!0;canWrite=!0;constructor(e){this.root=e;Te(e,{recursive:!0})}async put(e){let t=F(e.key),n=ee(this.root,t);return ne(this.root,n),Te(Me(n),{recursive:!0}),Fe(n,e.body),{key:t,uri:`file://${n}`}}async getText(e){let t=F(e),n=ee(this.root,t);return ne(this.root,n),me(n,"utf8")}async exists(e){let t=F(e),n=ee(this.root,t);return ne(this.root,n),De(n)}}class de{options;type="s3";canRead=!0;canWrite=!0;client;constructor(e){this.options=e;this.client=e.client}async getClient(){if(this.client)return this.client;let[{S3Client:e},{fromIni:t}]=await Promise.all([import("@aws-sdk/client-s3"),import("@aws-sdk/credential-providers")]);return this.client=new e({region:this.options.region,credentials:this.options.profile?t({profile:this.options.profile}):void 0,maxAttempts:this.options.max_attempts}),this.client}objectKey(e){let t=F(e),n=this.options.prefix?F(this.options.prefix):"";return n?`${n}/${t}`:t}async put(e){let[{PutObjectCommand:t},n]=await Promise.all([import("@aws-sdk/client-s3"),this.getClient()]),r=this.objectKey(e.key);return await n.send(new t({Bucket:this.options.bucket,Key:r,Body:e.body,ContentType:e.content_type,Metadata:e.metadata,ServerSideEncryption:this.options.server_side_encryption,SSEKMSKeyId:this.options.kms_key_id})),{key:r,uri:`s3://${this.options.bucket}/${r}`}}async getText(e){let[{GetObjectCommand:t},n]=await Promise.all([import("@aws-sdk/client-s3"),this.getClient()]),r=this.objectKey(e),c=await n.send(new t({Bucket:this.options.bucket,Key:r}));if(!c.Body)return"";return await c.Body.transformToString()}async exists(e){let[{HeadObjectCommand:t},n]=await Promise.all([import("@aws-sdk/client-s3"),this.getClient()]),r=this.objectKey(e);try{return await n.send(new t({Bucket:this.options.bucket,Key:r})),!0}catch(c){let s=c instanceof Error?c.name:"";if(s==="NotFound"||s==="NoSuchKey"||s==="NotFoundError")return!1;throw c}}}function ae(e,t){if(e.storage.type==="s3"){if(!e.storage.s3?.bucket)throw Error("S3 artifact storage requires storage.s3.bucket");return new de({bucket:e.storage.s3.bucket,prefix:e.storage.s3.prefix,region:e.storage.s3.region,profile:e.storage.s3.profile,max_attempts:e.storage.s3.max_attempts,server_side_encryption:e.storage.s3.server_side_encryption,kms_key_id:e.storage.s3.kms_key_id})}return new Ee(t.artifactsDir)}function ve(e){let t=String(e.getUTCFullYear()),n=String(e.getUTCMonth()+1).padStart(2,"0"),r=String(e.getUTCDate()).padStart(2,"0");return{year:t,month:n,day:r}}function Be(){return`# Knowledge Agent Schema v1
172
185
 
173
186
  ## Source Rules
174
187
 
@@ -193,7 +206,7 @@ VALUES (1, datetime('now'));
193
206
  ## Lint Rules
194
207
 
195
208
  - Flag stale pages, missing citations, contradictions, orphan pages, duplicate pages, and unresolved source refs.
196
- `}function Me(){return`# Knowledge Index
209
+ `}function $e(){return`# Knowledge Index
197
210
 
198
211
  This is a compact orientation index for agents. It is not the full search index.
199
212
 
@@ -208,13 +221,29 @@ This is a compact orientation index for agents. It is not the full search index.
208
221
 
209
222
  Raw source files are resolved through open-files. This app stores source refs,
210
223
  citations, chunks, generated wiki artifacts, indexes, and run records.
211
- `}function Be(){return`# Wiki
224
+ `}function Ye(){return`# Wiki
212
225
 
213
226
  Generated durable knowledge pages live here.
214
227
 
215
228
  Pages should be concise, cited, and organized for both humans and agents.
216
- `}async function ue(n,i=new Date){let{year:e,month:r,day:N}=be(i),s="schemas/v1.md",S="indexes/root.md",g="wiki/README.md",t=`logs/${e}/${r}/${N}.jsonl`,E={ts:i.toISOString(),event:"wiki_layout_initialized",schema_key:"schemas/v1.md",root_index_key:"indexes/root.md",wiki_readme_key:"wiki/README.md"},T=[n.put({key:"schemas/v1.md",body:xe(),content_type:"text/markdown"}),n.put({key:"indexes/root.md",body:Me(),content_type:"text/markdown"}),n.put({key:"wiki/README.md",body:Be(),content_type:"text/markdown"}),n.put({key:t,body:`${JSON.stringify(E)}
217
- `,content_type:"application/x-ndjson"})];return await Promise.all(T),{schema_key:"schemas/v1.md",root_index_key:"indexes/root.md",wiki_readme_key:"wiki/README.md",log_key:t,written:["schemas/v1.md","indexes/root.md","wiki/README.md",t]}}var b={name:"@hasna/knowledge",version:"0.2.4",description:"Agent-friendly local knowledge CLI with JSON output, pagination, and safe destructive actions",type:"module",bin:{"open-knowledge":"./bin/open-knowledge.js","open-knowledge-mcp":"./bin/open-knowledge-mcp.js"},files:["bin","src","docs","LICENSE","README.md"],scripts:{test:"bun test","test:cli":"bun test tests/cli.test.ts",build:"bun build --target=bun --outfile=bin/open-knowledge.js --minify --external @aws-sdk/client-s3 --external @aws-sdk/credential-providers src/cli.ts && bun build --target=bun --outfile=bin/open-knowledge-mcp.js --external @modelcontextprotocol/sdk src/mcp.js",prepublishOnly:"bun run build",postinstall:"bun run build"},keywords:["knowledge","cli","agents","json","notes","local","store"],license:"Apache-2.0",publishConfig:{registry:"https://registry.npmjs.org",access:"public"},repository:{type:"git",url:"https://github.com/hasna/knowledge"},bugs:{url:"https://github.com/hasna/knowledge/issues"},author:"Hasna Inc. <hasna@example.com>",engines:{bun:">=1.0",node:">=18"},dependencies:{"@aws-sdk/client-s3":"^3.1063.0","@aws-sdk/credential-providers":"^3.1063.0","@modelcontextprotocol/sdk":"^1.29.0",zod:"^4.3.6"},devDependencies:{"@types/bun":"^1.3.14"}};var ce={debug:0,info:1,warn:2,error:3},$e=()=>{if(process.env.DEBUG)return"debug";if(process.env.LOG_LEVEL==="debug")return"debug";if(process.env.LOG_LEVEL==="warn")return"warn";if(process.env.LOG_LEVEL==="error")return"error";return"info"};function I(n,i,e){if(ce[n]<ce[$e()])return;let r={debug:"[DEBUG]",info:"[INFO]",warn:"[WARN]",error:"[ERROR]"}[n],N=e?`${r} ${i} ${JSON.stringify(e)}`:`${r} ${i}`;if(n==="error")console.error(N);else console.error(N)}var ze=["add","list","get","delete","update","archive","restore","upsert","untag","export","prune","dedupe","stats","paths","db","wiki","help"],Le={ls:"list",rm:"delete",edit:"update",unarchive:"restore"};function Qe(n){let i=[],e={};for(let r=0;r<n.length;r+=1){let N=n[r];if(!N.startsWith("-")){i.push(N);continue}switch(N){case"--json":e.json=!0;break;case"--yes":case"-y":e.yes=!0;break;case"--help":case"-h":e.help=!0;break;case"--version":case"-v":e.version=!0;break;case"--desc":e.desc=!0;break;case"--page":case"-p":e.page=Number(n[r+1]),r+=1;break;case"--limit":case"-l":e.limit=Number(n[r+1]),r+=1;break;case"--search":case"-s":e.search=n[r+1],r+=1;break;case"--sort":e.sort=n[r+1],r+=1;break;case"--id":e.id=n[r+1],r+=1;break;case"--store":e.store=n[r+1],r+=1;break;case"--title":e.title=n[r+1],r+=1;break;case"--content":e.content=n[r+1],r+=1;break;case"--url":e.url=n[r+1],r+=1;break;case"--tag":case"-t":e.tag=n[r+1],r+=1;break;case"--format":e.format=n[r+1],r+=1;break;case"--completions":e.completions=n[r+1],r+=1;break;case"--no-color":e.noColor=!0;break;case"--scope":e.scope=n[r+1],r+=1;break;case"--older-than":e.olderThan=Number(n[r+1]),r+=1;break;case"--empty":e.empty=!0;break;case"--archived":e.archived=!0;break;case"--include-archived":e.includeArchived=!0;break;default:throw Error(`Unknown flag: ${N}. Run 'open-knowledge --help' for valid options.`)}}return{positional:i,flags:e}}function He(n){if(!n)return"";return Le[n]??n}function Je(n,i){let e=Array.from({length:n.length+1},()=>Array(i.length+1).fill(0));for(let r=0;r<=n.length;r+=1)e[r][0]=r;for(let r=0;r<=i.length;r+=1)e[0][r]=r;for(let r=1;r<=n.length;r+=1)for(let N=1;N<=i.length;N+=1){let s=n[r-1]===i[N-1]?0:1;e[r][N]=Math.min(e[r-1][N]+1,e[r][N-1]+1,e[r-1][N-1]+s)}return e[n.length][i.length]}function Ve(n){if(!n)return"";let i=[...ze,...Object.keys(Le)],e="",r=Number.POSITIVE_INFINITY;for(let N of i){let s=Je(n,N);if(s<r)r=s,e=N}return r<=3?e:""}function We(){console.log(`open-knowledge - local agent knowledge store
229
+ `}async function pe(e,t=new Date){let{year:n,month:r,day:c}=ve(t),s="schemas/v1.md",d="indexes/root.md",_="wiki/README.md",i=`logs/${n}/${r}/${c}.jsonl`,u={ts:t.toISOString(),event:"wiki_layout_initialized",schema_key:"schemas/v1.md",root_index_key:"indexes/root.md",wiki_readme_key:"wiki/README.md"},o=[e.put({key:"schemas/v1.md",body:Be(),content_type:"text/markdown"}),e.put({key:"indexes/root.md",body:$e(),content_type:"text/markdown"}),e.put({key:"wiki/README.md",body:Ye(),content_type:"text/markdown"}),e.put({key:i,body:`${JSON.stringify(u)}
230
+ `,content_type:"application/x-ndjson"})];return await Promise.all(o),{schema_key:"schemas/v1.md",root_index_key:"indexes/root.md",wiki_readme_key:"wiki/README.md",log_key:i,written:["schemas/v1.md","indexes/root.md","wiki/README.md",i]}}import{createHash as He}from"crypto";import{existsSync as Qe,readFileSync as Ve}from"fs";import{basename as qe}from"path";function fe(e,t){if(!e)throw Error(t);return e}function Ge(e){let n=e.slice(13).split("/").filter(Boolean),r=n[0];if(r!=="file"&&r!=="source")throw Error("Invalid open-files ref. Expected open-files://file/<id>, open-files://file/<id>/revision/<revision_id>, or open-files://source/<id>/path/<path>.");let c=fe(n[1],"Invalid open-files ref. Missing id.");if(r==="file"){if(n.length===2)return{kind:"open-files",uri:e,entity:r,id:c};if(n[2]==="revision"&&n[3]&&n.length===4)return{kind:"open-files",uri:e,entity:r,id:c,revision_id:decodeURIComponent(n[3])};throw Error("Invalid open-files file ref. Expected open-files://file/<id>/revision/<revision_id>.")}let s=n.indexOf("path"),d=s>=0?decodeURIComponent(n.slice(s+1).join("/")):void 0;return{kind:"open-files",uri:e,entity:r,id:c,path:d}}function ze(e){let t=new URL(e),n=fe(t.hostname,"Invalid s3 ref. Missing bucket."),r=decodeURIComponent(t.pathname.replace(/^\/+/,""));if(!r)throw Error("Invalid s3 ref. Missing object key.");return{kind:"s3",uri:e,bucket:n,key:r}}function We(e){let t=new URL(e);return{kind:"file",uri:e,path:decodeURIComponent(t.pathname)}}function Je(e){let t=new URL(e);return{kind:"web",uri:e,url:t.toString()}}function _e(e){if(e.startsWith("open-files://"))return Ge(e);if(e.startsWith("s3://"))return ze(e);if(e.startsWith("file://"))return We(e);if(e.startsWith("https://")||e.startsWith("http://"))return Je(e);throw Error(`Unsupported source ref scheme: ${e}`)}function te(e,t){return`${e}_${He("sha256").update(t).digest("hex").slice(0,20)}`}function g(e){return e&&typeof e==="object"&&!Array.isArray(e)?e:void 0}function E(e){return typeof e==="string"&&e.length>0?e:void 0}function Pe(e){return typeof e==="number"&&Number.isFinite(e)?e:void 0}function Ze(e){let t=E(e.source_ref)??E(e.source_uri)??E(e.uri);if(t)return t;let n=E(e.file_id);if(n){let s=E(e.revision_id)??E(e.revision),d=`open-files://file/${encodeURIComponent(n)}`;return s?`${d}/revision/${encodeURIComponent(s)}`:d}let r=E(e.source_id),c=E(e.path);if(r&&c)return`open-files://source/${encodeURIComponent(r)}/path/${encodeURIComponent(c)}`;throw Error("Manifest item is missing source_ref, file_id, or source_id/path.")}function en(e,t){if(t.kind==="open-files"&&t.entity==="file"&&t.revision_id)return e.replace(/\/revision\/[^/]+$/,"");return e}function nn(e){let t=E(e.extracted_text)??E(e.text)??E(e.content_text)??E(e.markdown);if(t!==void 0)return t;let n=e.content;return typeof n==="string"?n:null}function tn(e){let t=E(e.extracted_text_ref)??E(e.extracted_text_uri)??E(e.text_ref);if(t)return t;let n=g(e.content);return E(n?.extracted_text_ref)??E(n?.extracted_text_uri)??null}function rn(e){let t=E(e.path);return E(e.title)??E(e.name)??(t?qe(t):null)}function sn(e){return E(e.hash)??E(e.checksum)??E(e.sha256)??null}function on(e,t,n){return E(e.revision_id)??E(e.revision)??E(e.version_id)??(t.kind==="open-files"?t.revision_id:void 0)??n??E(e.updated_at)??"current"}function cn(e,t){let n={};for(let[r,c]of Object.entries(e)){if(["text","content","content_text","extracted_text","markdown"].includes(r))continue;n[r]=c}return n.source_ref=t.sourceRef,n.source_uri=t.sourceUri,n.status=t.status,n}function un(e,t){let n=Ze(e),r=_e(n),c=en(n,r),s=sn(e),d=E(e.status)??"active";return{raw:e,sourceRef:n,sourceUri:c,kind:r.kind,title:rn(e),revision:on(e,r,s),hash:s,extractedTextUri:tn(e),text:nn(e),metadata:cn(e,{sourceRef:n,sourceUri:c,status:d}),acl:e.permissions??e.acl??{},status:d,updatedAt:E(e.updated_at)??t}}function Tn(e){let t=e.trim();if(!t)return[];if(t.startsWith("[")){let n=JSON.parse(t);if(!Array.isArray(n))throw Error("Manifest array parse failed.");return n.map((r)=>{let c=g(r);if(!c)throw Error("Manifest array entries must be objects.");return c})}if(t.startsWith("{"))try{let n=JSON.parse(t),r=g(n);if(!r)throw Error("Manifest object parse failed.");if(Array.isArray(r.items))return r.items.map((c)=>{let s=g(c);if(!s)throw Error("Manifest items entries must be objects.");return s});if("source_ref"in r||"source_uri"in r||"file_id"in r)return[r]}catch(n){let r=t.split(/\r?\n/).filter((c)=>c.trim().length>0);if(r.length<=1)throw n;return r.map((c)=>{let s=g(JSON.parse(c));if(!s)throw Error("Manifest JSONL entries must be objects.");return s})}return t.split(/\r?\n/).filter((n)=>n.trim().length>0).map((n)=>{let r=g(JSON.parse(n));if(!r)throw Error("Manifest JSONL entries must be objects.");return r})}async function En(e,t){let n=new URL(e),r=n.hostname,c=decodeURIComponent(n.pathname.replace(/^\/+/,""));if(!r||!c)throw Error(`Invalid S3 manifest URI: ${e}`);let[{S3Client:s,GetObjectCommand:d},{fromIni:_}]=await Promise.all([import("@aws-sdk/client-s3"),import("@aws-sdk/credential-providers")]),i=t?.storage.type==="s3"&&t.storage.s3?.bucket===r?t.storage.s3:void 0,o=await new s({region:i?.region,credentials:i?.profile?_({profile:i.profile}):void 0,maxAttempts:i?.max_attempts}).send(new d({Bucket:r,Key:c}));if(!o.Body)return"";return await o.Body.transformToString()}async function dn(e,t){if(e.startsWith("s3://"))return En(e,t);if(!Qe(e))throw Error(`Manifest not found: ${e}`);return Ve(e,"utf8")}function an(e,t,n){let r=e.replace(/\r\n/g,`
231
+ `);if(!r.trim())return[];let c=[],s=0;while(s<r.length){let d=Math.min(r.length,s+t),_=d;if(d<r.length){let u=r.lastIndexOf(`
232
+
233
+ `,d),o=r.lastIndexOf(". ",d),T=Math.max(u,o);if(T>s+Math.floor(t*0.5))_=T+(T===u?2:1)}let i=r.slice(s,_).trim();if(i)c.push({ordinal:c.length,text:i,startOffset:s,endOffset:_});if(_>=r.length)break;s=Math.max(0,_-n)}return c}function pn(e){let t=e.trim().split(/\s+/).filter(Boolean).length;return Math.max(1,Math.ceil(t*1.25))}function fn(e,t){let n=e.query("SELECT id FROM chunks WHERE source_revision_id = ?").all(t);for(let r of n)e.run("DELETE FROM chunks_fts WHERE chunk_id = ?",[r.id]);return e.run("DELETE FROM chunks WHERE source_revision_id = ?",[t]),n.length}function _n(e,t,n){let r=te("src",t.sourceUri);e.run(`INSERT INTO sources (id, uri, kind, title, metadata_json, acl_json, created_at, updated_at)
234
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
235
+ ON CONFLICT(uri) DO UPDATE SET
236
+ kind = excluded.kind,
237
+ title = excluded.title,
238
+ metadata_json = excluded.metadata_json,
239
+ acl_json = excluded.acl_json,
240
+ updated_at = excluded.updated_at`,[r,t.sourceUri,t.kind,t.title,JSON.stringify(t.metadata),JSON.stringify(t.acl??{}),n,t.updatedAt]);let c=e.query("SELECT id FROM sources WHERE uri = ?").get(t.sourceUri);if(!c)throw Error(`Failed to upsert source: ${t.sourceUri}`);return c.id}function Nn(e,t,n,r){let c=te("rev",`${t}\x00${n.revision}`);e.run(`INSERT INTO source_revisions (id, source_id, revision, hash, extracted_text_uri, metadata_json, created_at)
241
+ VALUES (?, ?, ?, ?, ?, ?, ?)
242
+ ON CONFLICT(source_id, revision) DO UPDATE SET
243
+ hash = excluded.hash,
244
+ extracted_text_uri = excluded.extracted_text_uri,
245
+ metadata_json = excluded.metadata_json`,[c,t,n.revision,n.hash,n.extractedTextUri,JSON.stringify(n.metadata),r]);let s=e.query("SELECT id FROM source_revisions WHERE source_id = ? AND revision = ?").get(t,n.revision);if(!s)throw Error(`Failed to upsert source revision: ${n.sourceRef}`);return s.id}function Ln(e,t,n,r,c,s){if(!n.text||n.status.toLowerCase()==="deleted")return 0;let d=an(n.text,c,s);for(let _ of d){let i=te("chk",`${t}\x00${_.ordinal}\x00${_.text}`),u={source_ref:n.sourceRef,source_uri:n.sourceUri,hash:n.hash,status:n.status,path:E(n.raw.path)??null,mime:E(n.raw.mime)??E(n.raw.content_type)??null,size:Pe(n.raw.size)??null};e.run(`INSERT INTO chunks (id, source_revision_id, kind, ordinal, text, token_count, start_offset, end_offset, metadata_json, created_at)
246
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,[i,t,"source",_.ordinal,_.text,pn(_.text),_.startOffset,_.endOffset,JSON.stringify(u),r]),e.run("INSERT INTO chunks_fts (chunk_id, text, title, source_uri) VALUES (?, ?, ?, ?)",[i,_.text,n.title??"",n.sourceUri])}return d.length}async function Ne(e){let t=(e.now??new Date).toISOString(),n=e.maxChunkChars??4000,r=e.chunkOverlapChars??200;if(n<500)throw Error("maxChunkChars must be at least 500.");if(r<0||r>=n)throw Error("chunkOverlapChars must be less than maxChunkChars.");m(e.dbPath);let c=await dn(e.input,e.config),s=Tn(c),d=G(e.dbPath);try{return d.transaction(()=>{let i=new Set,u=new Set,o=0,T=0,a=0;for(let f of s){let y=un(f,t),h=_n(d,y,t),O=Nn(d,h,y,t);if(i.add(h),u.add(O),y.text||y.status.toLowerCase()==="deleted")T+=fn(d,O);o+=Ln(d,O,y,t,n,r)}return{path:e.input,db_path:e.dbPath,items_seen:s.length,sources_upserted:i.size,revisions_upserted:u.size,chunks_inserted:o,chunks_deleted:T,skipped:a}})()}finally{d.close()}}var M={name:"@hasna/knowledge",version:"0.2.5",description:"Agent-friendly local knowledge CLI with JSON output, pagination, and safe destructive actions",type:"module",bin:{"open-knowledge":"bin/open-knowledge.js","open-knowledge-mcp":"bin/open-knowledge-mcp.js"},files:["bin","src","docs","LICENSE","README.md"],scripts:{test:"bun test","test:cli":"bun test tests/cli.test.ts",build:"bun build --target=bun --outfile=bin/open-knowledge.js --minify --external @aws-sdk/client-s3 --external @aws-sdk/credential-providers src/cli.ts && bun build --target=bun --outfile=bin/open-knowledge-mcp.js --external @modelcontextprotocol/sdk src/mcp.js",prepublishOnly:"bun run build",postinstall:"bun run build"},keywords:["knowledge","cli","agents","json","notes","local","store"],license:"Apache-2.0",publishConfig:{registry:"https://registry.npmjs.org",access:"public"},repository:{type:"git",url:"git+https://github.com/hasna/knowledge.git"},bugs:{url:"https://github.com/hasna/knowledge/issues"},author:"Hasna Inc. <hasna@example.com>",engines:{bun:">=1.0",node:">=18"},dependencies:{"@aws-sdk/client-s3":"^3.1063.0","@aws-sdk/credential-providers":"^3.1063.0","@modelcontextprotocol/sdk":"^1.29.0",zod:"^4.3.6"},devDependencies:{"@types/bun":"^1.3.14"}};var Le={debug:0,info:1,warn:2,error:3},yn=()=>{if(process.env.DEBUG)return"debug";if(process.env.LOG_LEVEL==="debug")return"debug";if(process.env.LOG_LEVEL==="warn")return"warn";if(process.env.LOG_LEVEL==="error")return"error";return"info"};function x(e,t,n){if(Le[e]<Le[yn()])return;let r={debug:"[DEBUG]",info:"[INFO]",warn:"[WARN]",error:"[ERROR]"}[e],c=n?`${r} ${t} ${JSON.stringify(n)}`:`${r} ${t}`;if(e==="error")console.error(c);else console.error(c)}var Rn=["add","list","get","delete","update","archive","restore","upsert","untag","export","prune","dedupe","stats","paths","db","wiki","ingest","help"],he={ls:"list",rm:"delete",edit:"update",unarchive:"restore"};function On(e){let t=[],n={};for(let r=0;r<e.length;r+=1){let c=e[r];if(!c.startsWith("-")){t.push(c);continue}switch(c){case"--json":n.json=!0;break;case"--yes":case"-y":n.yes=!0;break;case"--help":case"-h":n.help=!0;break;case"--version":case"-v":n.version=!0;break;case"--desc":n.desc=!0;break;case"--page":case"-p":n.page=Number(e[r+1]),r+=1;break;case"--limit":case"-l":n.limit=Number(e[r+1]),r+=1;break;case"--search":case"-s":n.search=e[r+1],r+=1;break;case"--sort":n.sort=e[r+1],r+=1;break;case"--id":n.id=e[r+1],r+=1;break;case"--store":n.store=e[r+1],r+=1;break;case"--title":n.title=e[r+1],r+=1;break;case"--content":n.content=e[r+1],r+=1;break;case"--url":n.url=e[r+1],r+=1;break;case"--tag":case"-t":n.tag=e[r+1],r+=1;break;case"--format":n.format=e[r+1],r+=1;break;case"--completions":n.completions=e[r+1],r+=1;break;case"--no-color":n.noColor=!0;break;case"--scope":n.scope=e[r+1],r+=1;break;case"--older-than":n.olderThan=Number(e[r+1]),r+=1;break;case"--empty":n.empty=!0;break;case"--archived":n.archived=!0;break;case"--include-archived":n.includeArchived=!0;break;default:throw Error(`Unknown flag: ${c}. Run 'open-knowledge --help' for valid options.`)}}return{positional:t,flags:n}}function kn(e){if(!e)return"";return he[e]??e}function ln(e,t){let n=Array.from({length:e.length+1},()=>Array(t.length+1).fill(0));for(let r=0;r<=e.length;r+=1)n[r][0]=r;for(let r=0;r<=t.length;r+=1)n[0][r]=r;for(let r=1;r<=e.length;r+=1)for(let c=1;c<=t.length;c+=1){let s=e[r-1]===t[c-1]?0:1;n[r][c]=Math.min(n[r-1][c]+1,n[r][c-1]+1,n[r-1][c-1]+s)}return n[e.length][t.length]}function wn(e){if(!e)return"";let t=[...Rn,...Object.keys(he)],n="",r=Number.POSITIVE_INFINITY;for(let c of t){let s=ln(e,c);if(s<r)r=s,n=c}return r<=3?n:""}function Un(){console.log(`open-knowledge - local agent knowledge store
218
247
 
219
248
  Usage:
220
249
  open-knowledge <command> [options]
@@ -236,6 +265,7 @@ Commands:
236
265
  paths Show resolved workspace/store paths
237
266
  db init|stats Initialize or inspect local knowledge.db
238
267
  wiki init Initialize scalable wiki/schema/index/log artifacts
268
+ ingest manifest <file|s3://> Ingest an open-files manifest into knowledge.db
239
269
  help [command] Show help
240
270
 
241
271
  Global Options:
@@ -277,5 +307,5 @@ Export Options:
277
307
 
278
308
  Prune Options:
279
309
  --older-than <days> Remove items older than N days
280
- --empty Remove items with empty content`)}function qe(n){if(n==="add"){console.log("Usage: open-knowledge add <title> <content> [--url <url>] [-t <tag>] [--json]");return}if(n==="list"||n==="ls"){console.log("Usage: open-knowledge list|ls [--format table|json] [-p <page>] [-l <limit>] [-s <search>] [-t <tag>] [--sort created|title] [--desc] [--json]");return}if(n==="get"){console.log("Usage: open-knowledge get --id <id> [--json]");return}if(n==="update"||n==="edit"){console.log("Usage: open-knowledge update|edit --id <id> [--title <title>] [--content <content>] [--url <url>] [-t <tag>] [--json]");return}if(n==="archive"){console.log("Usage: open-knowledge archive --id <id> [--json]");return}if(n==="restore"||n==="unarchive"){console.log("Usage: open-knowledge restore|unarchive --id <id> [--json]");return}if(n==="upsert"){console.log("Usage: open-knowledge upsert [title] [content] [--id <id>] [--title <title>] [--content <content>] [--url <url>] [-t <tag>] [--json]");return}if(n==="untag"){console.log("Usage: open-knowledge untag --id <id> -t <tag> [--json]");return}if(n==="delete"||n==="rm"){console.log("Usage: open-knowledge delete|rm --id <id> -y [--json]");return}if(n==="export"){console.log("Usage: open-knowledge export [--format jsonl] [--json]");return}if(n==="prune"){console.log("Usage: open-knowledge prune --yes [--older-than <days>] [--empty] [--json]");return}if(n==="dedupe"){console.log("Usage: open-knowledge dedupe --yes [--json]");return}if(n==="stats"){console.log("Usage: open-knowledge stats [--json]");return}if(n==="paths"){console.log("Usage: open-knowledge paths [--scope local|global|project] [--json]");return}if(n==="db"){console.log("Usage: open-knowledge db init|stats [--scope local|global|project] [--json]");return}if(n==="wiki"){console.log("Usage: open-knowledge wiki init [--scope local|global|project] [--json]");return}We()}function Ze(n){if(n.noColor||process.env.NO_COLOR)return!1;if(process.env.FORCE_COLOR)return!0;return process.stdout.isTTY===!0}function y(n,i,e){if(i){console.log(JSON.stringify(n,null,2));return}if(typeof n==="string"){console.log(n);return}console.log(n.message??JSON.stringify(n,null,2))}function x(n){if(!n.id)throw Error("Missing required --id. Example: open-knowledge get --id <id>")}function ve(n,i){let e=i.sort??"created";if(e!=="created"&&e!=="title")throw Error("Invalid --sort value. Use 'created' or 'title'.");let r=[...n].sort((N,s)=>{if(e==="title")return N.title.localeCompare(s.title);return N.created_at.localeCompare(s.created_at)});if(i.desc)r.reverse();return{sorted:r,sort:e,direction:i.desc?"desc":"asc"}}async function Pe(n){let{positional:i,flags:e}=Qe(n);if(I("debug","CLI invoked",{command:i[0],flags:{json:e.json,store:e.store}}),e.version){console.log(e.json?JSON.stringify({name:b.name,version:b.version},null,2):`${b.name} ${b.version}`);return}if(e.completions){let t=e.completions;if(t==="bash")console.log('_open_knowledge() { local cur; cur="${COMP_WORDS[COMP_CWORD]}"; COMPREPLY=($(compgen -W "add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki help ls rm edit unarchive --json --yes --help --version --desc --page --limit --search --sort --id --store --title --content --url --tag --format --completions --no-color --scope --archived --include-archived" -- "$cur")); }; complete -F _open_knowledge open-knowledge');else if(t==="zsh")console.log(`#compdef open-knowledge
281
- _open_knowledge() { _arguments -C "1: :(add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki help ls rm edit unarchive)" "(--json)--json" "(--yes)-y" "(--help)--help" "(--version)--version" "(--desc)--desc" "(--archived)--archived" "(--include-archived)--include-archived" "(-p --page)"{-p,--page}"[page number]:number:" "(-l --limit)"{-l,--limit}"[items per page]:number:" "(-s --search)"{-s,--search}"[search text]:text:" "(--sort)--sort"{created,title}:" "(--id)--id[item id]:id:" "(--store)--store[store path]:path:" "(--title)--title[new title]:" "(--content)--content[new content]:" "(--url)--url[source url]:" "(-t --tag)"{-t,--tag}"[tag]:tag:" "(--format)--format[json|jsonl]:" "(--completions)--completions[output completions]:shell:(bash zsh fish):" "(--no-color)--no-color[disable color]" "(--scope)--scope"{local,global,project}:" }; _open_knowledge`);else if(t==="fish")console.log('complete -c open-knowledge -f; complete -c open-knowledge -a "add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki help ls rm edit unarchive"; complete -c open-knowledge -l json; complete -c open-knowledge -l yes -s y; complete -c open-knowledge -l help -s h; complete -c open-knowledge -l version -s v; complete -c open-knowledge -l desc; complete -c open-knowledge -l archived; complete -c open-knowledge -l include-archived; complete -c open-knowledge -s p -l page; complete -c open-knowledge -s l -l limit; complete -c open-knowledge -s s -l search; complete -c open-knowledge -l sort; complete -c open-knowledge -l id; complete -c open-knowledge -l store; complete -c open-knowledge -l title; complete -c open-knowledge -l content; complete -c open-knowledge -l url; complete -c open-knowledge -s t -l tag; complete -c open-knowledge -l format; complete -c open-knowledge -l completions; complete -c open-knowledge -l no-color; complete -c open-knowledge -l scope -a "local global project"');else throw Error("Invalid --completions value. Use 'bash', 'zsh', or 'fish'.");return}let r=He(i[0]);if(!r||e.help||r==="help"){qe(i[1]);return}let N=l(e.scope),s=e.store;if(!s)if(e.scope==="project"||e.scope==="local")s=j(N.home).jsonStorePath;else s=q();if(r==="paths"){let t=j(N.home);y({ok:!0,scope:e.scope??"global",home:t.home,config_path:t.configPath,json_store_path:t.jsonStorePath,knowledge_db_path:t.knowledgeDbPath,artifacts_dir:t.artifactsDir,indexes_dir:t.indexesDir,logs_dir:t.logsDir,runs_dir:t.runsDir,schemas_dir:t.schemasDir,wiki_dir:t.wikiDir,config:W(t.configPath),message:t.home},e.json);return}if(r==="db"){let t=i[1]??"init",E=j(N.home);if(t!=="init"&&t!=="stats")throw Error("Invalid db action. Use 'init' or 'stats'.");if(t==="init"){let o=P(E.knowledgeDbPath);y({ok:!0,...o,message:`Initialized ${o.path}`},e.json);return}P(E.knowledgeDbPath);let T=Te(E.knowledgeDbPath);y({ok:!0,path:E.knowledgeDbPath,...T,message:`knowledge.db schema v${T.schema_version}`},e.json);return}if(r==="wiki"){if((i[1]??"init")!=="init")throw Error("Invalid wiki action. Use 'init'.");let E=j(N.home),T=W(E.configPath),o=Ne(T,E),c=await ue(o);y({ok:!0,...c,message:`Initialized wiki layout in ${E.home}`},e.json);return}if(Z(s),r==="add"){let t=i[1],E=i[2];if(!t||!E)throw Error("Usage: open-knowledge add <title> <content>");A(s,()=>{let T=R(s),o={id:v(),title:t,content:E,url:e.url??null,tags:e.tag?[e.tag]:[],created_at:new Date().toISOString(),updated_at:new Date().toISOString()};T.items.push(o),U(s,T),I("info","Item added",{id:o.id,title:o.title}),y({ok:!0,item:o,message:`Added ${o.id}`},e.json)});return}if(r==="list"){if(e.format!==void 0&&e.format!=="table"&&e.format!=="json")throw Error("Invalid --format value for list. Use 'table' or 'json'.");A(s,()=>{let t=R(s),E=Number.isFinite(e.page)&&e.page>0?e.page:1,T=Number.isFinite(e.limit)&&e.limit>0?e.limit:20,o=e.search?String(e.search).toLowerCase():"",c=e.tag?String(e.tag).toLowerCase():"",_=e.format==="table"||!e.json&&!e.format&&Ze(e),X=e.json||e.format==="json",p=t.items;if(e.archived)p=p.filter((u)=>u.archived===!0);else if(!e.includeArchived)p=p.filter((u)=>!u.archived);if(o)p=p.filter((u)=>u.title.toLowerCase().includes(o)||u.content.toLowerCase().includes(o));if(c)p=p.filter((u)=>u.tags&&u.tags.map((Q)=>Q.toLowerCase()).includes(c));let{sorted:d,sort:C,direction:L}=ve(p,e),k=(E-1)*T,K=d.slice(k,k+T),z=Math.max(1,Math.ceil(d.length/T));if(X){y({ok:!0,page:E,limit:T,total:d.length,total_pages:z,sort:C,direction:L,items:K},!0);return}if(K.length===0){y(`No items found (search=${o||"none"}, tag=${c||"none"})`,!1);return}if(_){let u=(D)=>D,Q=`${u("ID")} ${u("TITLE")} ${u("CREATED")} ${u("URL")} ${u("TAGS")}`;console.log(Q);for(let D of K)console.log(`${D.id} ${u(D.title)} ${D.created_at} ${D.url?u(D.url):""} ${D.tags?.length?u(`[${D.tags.join(", ")}]`):""}`);console.log(`Page ${E}/${z} | showing ${K.length} of ${d.length} | sort=${C} ${L} | search=${o||"none"} | tag=${c||"none"}`)}else{for(let u of K)console.log(`${u.id} ${u.title} ${u.created_at}${u.url?` ${u.url}`:""}${u.tags?.length?` [${u.tags.join(", ")}]`:""}`);console.log(`Page ${E}/${z} | showing ${K.length} of ${d.length} | sort=${C} ${L} | search=${o||"none"} | tag=${c||"none"}`)}});return}if(r==="get"){x(e),A(s,()=>{let E=R(s).items.find((T)=>T.id===e.id||T.short_id===e.id);if(!E)throw Error(`Item not found: ${e.id}`);y({ok:!0,item:E,message:`${E.id}: ${E.title}`},e.json)});return}if(r==="update"){x(e),A(s,()=>{let t=R(s),E=t.items.findIndex((o)=>o.id===e.id||o.short_id===e.id);if(E===-1)throw Error(`Item not found: ${e.id}`);let T=t.items[E];if(e.title!==void 0)T.title=e.title;if(e.content!==void 0)T.content=e.content;if(e.url!==void 0)T.url=e.url;if(e.tag!==void 0){if(T.tags=T.tags||[],!T.tags.map((o)=>o.toLowerCase()).includes(e.tag.toLowerCase()))T.tags.push(e.tag)}T.updated_at=new Date().toISOString(),t.items[E]=T,U(s,t),y({ok:!0,item:T,message:`Updated ${T.id}`},e.json)});return}if(r==="archive"||r==="restore"){x(e),A(s,()=>{let t=R(s),E=t.items.findIndex((o)=>o.id===e.id||o.short_id===e.id);if(E===-1)throw Error(`Item not found: ${e.id}`);let T=t.items[E];T.archived=r==="archive",T.updated_at=new Date().toISOString(),t.items[E]=T,U(s,t),y({ok:!0,item:T,message:`${r==="archive"?"Archived":"Restored"} ${T.id}`},e.json)});return}if(r==="untag"){if(x(e),!e.tag)throw Error("Missing required --tag. Example: open-knowledge untag --id <id> -t <tag>");A(s,()=>{let t=R(s),E=t.items.findIndex((c)=>c.id===e.id||c.short_id===e.id);if(E===-1)throw Error(`Item not found: ${e.id}`);let T=t.items[E],o=T.tags?.length??0;T.tags=(T.tags??[]).filter((c)=>c.toLowerCase()!==e.tag.toLowerCase()),T.updated_at=new Date().toISOString(),t.items[E]=T,U(s,t),y({ok:!0,item:T,removed:o-T.tags.length,message:`Removed tag from ${T.id}`},e.json)});return}if(r==="upsert"){let t=e.title??i[1],E=e.content??i[2];A(s,()=>{let T=R(s),o=e.id?T.items.findIndex((X)=>X.id===e.id||X.short_id===e.id):-1,c=new Date().toISOString();if(o===-1){if(!t||!E)throw Error("New item requires title and content. Example: open-knowledge upsert <title> <content> [--id <id>]");let X=e.id??v(),p={id:X,short_id:ie(X),title:t,content:E,url:e.url??null,tags:e.tag?[e.tag]:[],metadata:{},archived:!1,created_at:c,updated_at:c};T.items.push(p),U(s,T),y({ok:!0,created:!0,item:p,message:`Upserted ${p.id}`},e.json);return}let _=T.items[o];if(t!==void 0)_.title=t;if(E!==void 0)_.content=E;if(e.url!==void 0)_.url=e.url;if(e.tag!==void 0){if(_.tags=_.tags||[],!_.tags.map((X)=>X.toLowerCase()).includes(e.tag.toLowerCase()))_.tags.push(e.tag)}_.updated_at=c,T.items[o]=_,U(s,T),y({ok:!0,created:!1,item:_,message:`Upserted ${_.id}`},e.json)});return}if(r==="delete"){if(x(e),!e.yes)throw Error("Refusing delete without --yes. Re-run with: open-knowledge delete --id <id> --yes");A(s,()=>{let t=R(s),E=t.items.length;t.items=t.items.filter((o)=>o.id!==e.id&&o.short_id!==e.id);let T=E!==t.items.length;if(U(s,t),!T)throw Error(`Item not found: ${e.id}`);I("info","Item deleted",{id:e.id}),y({ok:!0,deleted_id:e.id,message:`Deleted ${e.id}`},e.json)});return}if(r==="export"){let t=e.format??"json";if(t!=="json"&&t!=="jsonl")throw Error("Invalid --format. Use 'json' or 'jsonl'.");A(s,()=>{let E=R(s);if(t==="jsonl")for(let T of E.items)console.log(JSON.stringify(T));else y({ok:!0,items:E.items},e.json)});return}if(r==="prune"){if(!e.yes)throw Error("Refusing prune without --yes. Re-run with: open-knowledge prune --yes [--older-than <days>] [--empty]");A(s,()=>{let t=R(s),E=t.items.length;if(e.olderThan!==void 0){let o=new Date;o.setDate(o.getDate()-e.olderThan),t.items=t.items.filter((c)=>new Date(c.created_at)>=o)}if(e.empty)t.items=t.items.filter((o)=>o.content.trim().length>0);let T=E-t.items.length;U(s,t),I("info","Prune completed",{pruned:T,remaining:t.items.length}),y({ok:!0,pruned:T,remaining:t.items.length,message:`Pruned ${T} item(s)`},e.json)});return}if(r==="dedupe"){if(!e.yes)throw Error("Refusing dedupe without --yes. Re-run with: open-knowledge dedupe --yes [--json]");A(s,()=>{let t=R(s),E=new Set,T=t.items.length;t.items=t.items.filter((c)=>{let _=`${c.title}\x00${c.content}`;if(E.has(_))return!1;return E.add(_),!0});let o=T-t.items.length;U(s,t),I("info","Dedupe completed",{removed:o,remaining:t.items.length}),y({ok:!0,removed:o,remaining:t.items.length,message:`Dedupe removed ${o} duplicate(s)`},e.json)});return}if(r==="stats"){A(s,()=>{let t=R(s),E=t.items.filter((L)=>!L.archived),T=E.length,o=t.items.length-T,c=E.filter((L)=>L.url).length,_=E.filter((L)=>L.tags&&L.tags.length>0).length,X=T>0?E.map((L)=>L.created_at).sort()[0]:null,p=T>0?E.map((L)=>L.created_at).sort()[T-1]:null,d={};for(let L of E)for(let k of L.tags||[])d[k]=(d[k]||0)+1;let C=Object.entries(d).sort((L,k)=>k[1]-L[1]).slice(0,5).map(([L,k])=>({tag:L,count:k}));y({ok:!0,total:T,archived:o,with_url:c,with_tags:_,oldest:X,newest:p,top_tags:C,message:`${T} items | ${c} with URL | ${_} with tags`},e.json)});return}let S=Ve(i[0]),g=S?` Did you mean '${S}'?`:"";throw I("warn","Unknown command",{input:i[0],suggestion:S}),Error(`Unknown command: ${i[0]}.${g} Run 'open-knowledge --help' for available commands.`)}if(import.meta.main)Pe(process.argv.slice(2)).catch((n)=>{let i=n instanceof Error?n.message:String(n);I("error","CLI error",{message:i,stack:n instanceof Error?n.stack:void 0}),console.error(`Error: ${i}`),process.exitCode=1});export{Ve as suggestCommand,ve as sortItems,Pe as run,Qe as parseArgs};
310
+ --empty Remove items with empty content`)}function Sn(e){if(e==="add"){console.log("Usage: open-knowledge add <title> <content> [--url <url>] [-t <tag>] [--json]");return}if(e==="list"||e==="ls"){console.log("Usage: open-knowledge list|ls [--format table|json] [-p <page>] [-l <limit>] [-s <search>] [-t <tag>] [--sort created|title] [--desc] [--json]");return}if(e==="get"){console.log("Usage: open-knowledge get --id <id> [--json]");return}if(e==="update"||e==="edit"){console.log("Usage: open-knowledge update|edit --id <id> [--title <title>] [--content <content>] [--url <url>] [-t <tag>] [--json]");return}if(e==="archive"){console.log("Usage: open-knowledge archive --id <id> [--json]");return}if(e==="restore"||e==="unarchive"){console.log("Usage: open-knowledge restore|unarchive --id <id> [--json]");return}if(e==="upsert"){console.log("Usage: open-knowledge upsert [title] [content] [--id <id>] [--title <title>] [--content <content>] [--url <url>] [-t <tag>] [--json]");return}if(e==="untag"){console.log("Usage: open-knowledge untag --id <id> -t <tag> [--json]");return}if(e==="delete"||e==="rm"){console.log("Usage: open-knowledge delete|rm --id <id> -y [--json]");return}if(e==="export"){console.log("Usage: open-knowledge export [--format jsonl] [--json]");return}if(e==="prune"){console.log("Usage: open-knowledge prune --yes [--older-than <days>] [--empty] [--json]");return}if(e==="dedupe"){console.log("Usage: open-knowledge dedupe --yes [--json]");return}if(e==="stats"){console.log("Usage: open-knowledge stats [--json]");return}if(e==="paths"){console.log("Usage: open-knowledge paths [--scope local|global|project] [--json]");return}if(e==="db"){console.log("Usage: open-knowledge db init|stats [--scope local|global|project] [--json]");return}if(e==="wiki"){console.log("Usage: open-knowledge wiki init [--scope local|global|project] [--json]");return}if(e==="ingest"){console.log("Usage: open-knowledge ingest manifest <file|s3://bucket/key> [--scope local|global|project] [--json]");return}Un()}function An(e){if(e.noColor||process.env.NO_COLOR)return!1;if(process.env.FORCE_COLOR)return!0;return process.stdout.isTTY===!0}function L(e,t,n){if(t){console.log(JSON.stringify(e,null,2));return}if(typeof e==="string"){console.log(e);return}console.log(e.message??JSON.stringify(e,null,2))}function j(e){if(!e.id)throw Error("Missing required --id. Example: open-knowledge get --id <id>")}function In(e,t){let n=t.sort??"created";if(n!=="created"&&n!=="title")throw Error("Invalid --sort value. Use 'created' or 'title'.");let r=[...e].sort((c,s)=>{if(n==="title")return c.title.localeCompare(s.title);return c.created_at.localeCompare(s.created_at)});if(t.desc)r.reverse();return{sorted:r,sort:n,direction:t.desc?"desc":"asc"}}async function xn(e){let{positional:t,flags:n}=On(e);if(x("debug","CLI invoked",{command:t[0],flags:{json:n.json,store:n.store}}),n.version){console.log(n.json?JSON.stringify({name:M.name,version:M.version},null,2):`${M.name} ${M.version}`);return}if(n.completions){let i=n.completions;if(i==="bash")console.log('_open_knowledge() { local cur; cur="${COMP_WORDS[COMP_CWORD]}"; COMPREPLY=($(compgen -W "add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki ingest help ls rm edit unarchive --json --yes --help --version --desc --page --limit --search --sort --id --store --title --content --url --tag --format --completions --no-color --scope --archived --include-archived" -- "$cur")); }; complete -F _open_knowledge open-knowledge');else if(i==="zsh")console.log(`#compdef open-knowledge
311
+ _open_knowledge() { _arguments -C "1: :(add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki ingest help ls rm edit unarchive)" "(--json)--json" "(--yes)-y" "(--help)--help" "(--version)--version" "(--desc)--desc" "(--archived)--archived" "(--include-archived)--include-archived" "(-p --page)"{-p,--page}"[page number]:number:" "(-l --limit)"{-l,--limit}"[items per page]:number:" "(-s --search)"{-s,--search}"[search text]:text:" "(--sort)--sort"{created,title}:" "(--id)--id[item id]:id:" "(--store)--store[store path]:path:" "(--title)--title[new title]:" "(--content)--content[new content]:" "(--url)--url[source url]:" "(-t --tag)"{-t,--tag}"[tag]:tag:" "(--format)--format[json|jsonl]:" "(--completions)--completions[output completions]:shell:(bash zsh fish):" "(--no-color)--no-color[disable color]" "(--scope)--scope"{local,global,project}:" }; _open_knowledge`);else if(i==="fish")console.log('complete -c open-knowledge -f; complete -c open-knowledge -a "add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki ingest help ls rm edit unarchive"; complete -c open-knowledge -l json; complete -c open-knowledge -l yes -s y; complete -c open-knowledge -l help -s h; complete -c open-knowledge -l version -s v; complete -c open-knowledge -l desc; complete -c open-knowledge -l archived; complete -c open-knowledge -l include-archived; complete -c open-knowledge -s p -l page; complete -c open-knowledge -s l -l limit; complete -c open-knowledge -s s -l search; complete -c open-knowledge -l sort; complete -c open-knowledge -l id; complete -c open-knowledge -l store; complete -c open-knowledge -l title; complete -c open-knowledge -l content; complete -c open-knowledge -l url; complete -c open-knowledge -s t -l tag; complete -c open-knowledge -l format; complete -c open-knowledge -l completions; complete -c open-knowledge -l no-color; complete -c open-knowledge -l scope -a "local global project"');else throw Error("Invalid --completions value. Use 'bash', 'zsh', or 'fish'.");return}let r=kn(t[0]);if(!r||n.help||r==="help"){Sn(t[1]);return}let c=ie(n.scope),s=n.store;if(!s)if(n.scope==="project"||n.scope==="local")s=b(c.home).jsonStorePath;else s=V();if(r==="paths"){let i=b(c.home);L({ok:!0,scope:n.scope??"global",home:i.home,config_path:i.configPath,json_store_path:i.jsonStorePath,knowledge_db_path:i.knowledgeDbPath,artifacts_dir:i.artifactsDir,indexes_dir:i.indexesDir,logs_dir:i.logsDir,runs_dir:i.runsDir,schemas_dir:i.schemasDir,wiki_dir:i.wikiDir,config:v(i.configPath),message:i.home},n.json);return}if(r==="db"){let i=t[1]??"init",u=b(c.home);if(i!=="init"&&i!=="stats")throw Error("Invalid db action. Use 'init' or 'stats'.");if(i==="init"){let T=m(u.knowledgeDbPath);L({ok:!0,...T,message:`Initialized ${T.path}`},n.json);return}m(u.knowledgeDbPath);let o=ue(u.knowledgeDbPath);L({ok:!0,path:u.knowledgeDbPath,...o,message:`knowledge.db schema v${o.schema_version}`},n.json);return}if(r==="wiki"){if((t[1]??"init")!=="init")throw Error("Invalid wiki action. Use 'init'.");let u=b(c.home),o=v(u.configPath),T=ae(o,u),a=await pe(T);L({ok:!0,...a,message:`Initialized wiki layout in ${u.home}`},n.json);return}if(r==="ingest"){if((t[1]??"")!=="manifest")throw Error("Invalid ingest action. Use 'manifest'.");let u=t[2];if(!u)throw Error("Usage: open-knowledge ingest manifest <file|s3://bucket/key>");let o=b(c.home),T=v(o.configPath),a=await Ne({dbPath:o.knowledgeDbPath,input:u,config:T});L({ok:!0,...a,message:`Ingested ${a.items_seen} manifest item(s)`},n.json);return}if(q(s),r==="add"){let i=t[1],u=t[2];if(!i||!u)throw Error("Usage: open-knowledge add <title> <content>");l(s,()=>{let o=k(s),T={id:P(),title:i,content:u,url:n.url??null,tags:n.tag?[n.tag]:[],created_at:new Date().toISOString(),updated_at:new Date().toISOString()};o.items.push(T),w(s,o),x("info","Item added",{id:T.id,title:T.title}),L({ok:!0,item:T,message:`Added ${T.id}`},n.json)});return}if(r==="list"){if(n.format!==void 0&&n.format!=="table"&&n.format!=="json")throw Error("Invalid --format value for list. Use 'table' or 'json'.");l(s,()=>{let i=k(s),u=Number.isFinite(n.page)&&n.page>0?n.page:1,o=Number.isFinite(n.limit)&&n.limit>0?n.limit:20,T=n.search?String(n.search).toLowerCase():"",a=n.tag?String(n.tag).toLowerCase():"",f=n.format==="table"||!n.json&&!n.format&&An(n),y=n.json||n.format==="json",h=i.items;if(n.archived)h=h.filter((p)=>p.archived===!0);else if(!n.includeArchived)h=h.filter((p)=>!p.archived);if(T)h=h.filter((p)=>p.title.toLowerCase().includes(T)||p.content.toLowerCase().includes(T));if(a)h=h.filter((p)=>p.tags&&p.tags.map((W)=>W.toLowerCase()).includes(a));let{sorted:O,sort:C,direction:N}=In(h,n),U=(u-1)*o,X=O.slice(U,U+o),z=Math.max(1,Math.ceil(O.length/o));if(y){L({ok:!0,page:u,limit:o,total:O.length,total_pages:z,sort:C,direction:N,items:X},!0);return}if(X.length===0){L(`No items found (search=${T||"none"}, tag=${a||"none"})`,!1);return}if(f){let p=(S)=>S,W=`${p("ID")} ${p("TITLE")} ${p("CREATED")} ${p("URL")} ${p("TAGS")}`;console.log(W);for(let S of X)console.log(`${S.id} ${p(S.title)} ${S.created_at} ${S.url?p(S.url):""} ${S.tags?.length?p(`[${S.tags.join(", ")}]`):""}`);console.log(`Page ${u}/${z} | showing ${X.length} of ${O.length} | sort=${C} ${N} | search=${T||"none"} | tag=${a||"none"}`)}else{for(let p of X)console.log(`${p.id} ${p.title} ${p.created_at}${p.url?` ${p.url}`:""}${p.tags?.length?` [${p.tags.join(", ")}]`:""}`);console.log(`Page ${u}/${z} | showing ${X.length} of ${O.length} | sort=${C} ${N} | search=${T||"none"} | tag=${a||"none"}`)}});return}if(r==="get"){j(n),l(s,()=>{let u=k(s).items.find((o)=>o.id===n.id||o.short_id===n.id);if(!u)throw Error(`Item not found: ${n.id}`);L({ok:!0,item:u,message:`${u.id}: ${u.title}`},n.json)});return}if(r==="update"){j(n),l(s,()=>{let i=k(s),u=i.items.findIndex((T)=>T.id===n.id||T.short_id===n.id);if(u===-1)throw Error(`Item not found: ${n.id}`);let o=i.items[u];if(n.title!==void 0)o.title=n.title;if(n.content!==void 0)o.content=n.content;if(n.url!==void 0)o.url=n.url;if(n.tag!==void 0){if(o.tags=o.tags||[],!o.tags.map((T)=>T.toLowerCase()).includes(n.tag.toLowerCase()))o.tags.push(n.tag)}o.updated_at=new Date().toISOString(),i.items[u]=o,w(s,i),L({ok:!0,item:o,message:`Updated ${o.id}`},n.json)});return}if(r==="archive"||r==="restore"){j(n),l(s,()=>{let i=k(s),u=i.items.findIndex((T)=>T.id===n.id||T.short_id===n.id);if(u===-1)throw Error(`Item not found: ${n.id}`);let o=i.items[u];o.archived=r==="archive",o.updated_at=new Date().toISOString(),i.items[u]=o,w(s,i),L({ok:!0,item:o,message:`${r==="archive"?"Archived":"Restored"} ${o.id}`},n.json)});return}if(r==="untag"){if(j(n),!n.tag)throw Error("Missing required --tag. Example: open-knowledge untag --id <id> -t <tag>");l(s,()=>{let i=k(s),u=i.items.findIndex((a)=>a.id===n.id||a.short_id===n.id);if(u===-1)throw Error(`Item not found: ${n.id}`);let o=i.items[u],T=o.tags?.length??0;o.tags=(o.tags??[]).filter((a)=>a.toLowerCase()!==n.tag.toLowerCase()),o.updated_at=new Date().toISOString(),i.items[u]=o,w(s,i),L({ok:!0,item:o,removed:T-o.tags.length,message:`Removed tag from ${o.id}`},n.json)});return}if(r==="upsert"){let i=n.title??t[1],u=n.content??t[2];l(s,()=>{let o=k(s),T=n.id?o.items.findIndex((y)=>y.id===n.id||y.short_id===n.id):-1,a=new Date().toISOString();if(T===-1){if(!i||!u)throw Error("New item requires title and content. Example: open-knowledge upsert <title> <content> [--id <id>]");let y=n.id??P(),h={id:y,short_id:ce(y),title:i,content:u,url:n.url??null,tags:n.tag?[n.tag]:[],metadata:{},archived:!1,created_at:a,updated_at:a};o.items.push(h),w(s,o),L({ok:!0,created:!0,item:h,message:`Upserted ${h.id}`},n.json);return}let f=o.items[T];if(i!==void 0)f.title=i;if(u!==void 0)f.content=u;if(n.url!==void 0)f.url=n.url;if(n.tag!==void 0){if(f.tags=f.tags||[],!f.tags.map((y)=>y.toLowerCase()).includes(n.tag.toLowerCase()))f.tags.push(n.tag)}f.updated_at=a,o.items[T]=f,w(s,o),L({ok:!0,created:!1,item:f,message:`Upserted ${f.id}`},n.json)});return}if(r==="delete"){if(j(n),!n.yes)throw Error("Refusing delete without --yes. Re-run with: open-knowledge delete --id <id> --yes");l(s,()=>{let i=k(s),u=i.items.length;i.items=i.items.filter((T)=>T.id!==n.id&&T.short_id!==n.id);let o=u!==i.items.length;if(w(s,i),!o)throw Error(`Item not found: ${n.id}`);x("info","Item deleted",{id:n.id}),L({ok:!0,deleted_id:n.id,message:`Deleted ${n.id}`},n.json)});return}if(r==="export"){let i=n.format??"json";if(i!=="json"&&i!=="jsonl")throw Error("Invalid --format. Use 'json' or 'jsonl'.");l(s,()=>{let u=k(s);if(i==="jsonl")for(let o of u.items)console.log(JSON.stringify(o));else L({ok:!0,items:u.items},n.json)});return}if(r==="prune"){if(!n.yes)throw Error("Refusing prune without --yes. Re-run with: open-knowledge prune --yes [--older-than <days>] [--empty]");l(s,()=>{let i=k(s),u=i.items.length;if(n.olderThan!==void 0){let T=new Date;T.setDate(T.getDate()-n.olderThan),i.items=i.items.filter((a)=>new Date(a.created_at)>=T)}if(n.empty)i.items=i.items.filter((T)=>T.content.trim().length>0);let o=u-i.items.length;w(s,i),x("info","Prune completed",{pruned:o,remaining:i.items.length}),L({ok:!0,pruned:o,remaining:i.items.length,message:`Pruned ${o} item(s)`},n.json)});return}if(r==="dedupe"){if(!n.yes)throw Error("Refusing dedupe without --yes. Re-run with: open-knowledge dedupe --yes [--json]");l(s,()=>{let i=k(s),u=new Set,o=i.items.length;i.items=i.items.filter((a)=>{let f=`${a.title}\x00${a.content}`;if(u.has(f))return!1;return u.add(f),!0});let T=o-i.items.length;w(s,i),x("info","Dedupe completed",{removed:T,remaining:i.items.length}),L({ok:!0,removed:T,remaining:i.items.length,message:`Dedupe removed ${T} duplicate(s)`},n.json)});return}if(r==="stats"){l(s,()=>{let i=k(s),u=i.items.filter((N)=>!N.archived),o=u.length,T=i.items.length-o,a=u.filter((N)=>N.url).length,f=u.filter((N)=>N.tags&&N.tags.length>0).length,y=o>0?u.map((N)=>N.created_at).sort()[0]:null,h=o>0?u.map((N)=>N.created_at).sort()[o-1]:null,O={};for(let N of u)for(let U of N.tags||[])O[U]=(O[U]||0)+1;let C=Object.entries(O).sort((N,U)=>U[1]-N[1]).slice(0,5).map(([N,U])=>({tag:N,count:U}));L({ok:!0,total:o,archived:T,with_url:a,with_tags:f,oldest:y,newest:h,top_tags:C,message:`${o} items | ${a} with URL | ${f} with tags`},n.json)});return}let d=wn(t[0]),_=d?` Did you mean '${d}'?`:"";throw x("warn","Unknown command",{input:t[0],suggestion:d}),Error(`Unknown command: ${t[0]}.${_} Run 'open-knowledge --help' for available commands.`)}if(import.meta.main)xn(process.argv.slice(2)).catch((e)=>{let t=e instanceof Error?e.message:String(e);x("error","CLI error",{message:t,stack:e instanceof Error?e.stack:void 0}),console.error(`Error: ${t}`),process.exitCode=1});export{wn as suggestCommand,In as sortItems,xn as run,On as parseArgs};
package/package.json CHANGED
@@ -1,11 +1,11 @@
1
1
  {
2
2
  "name": "@hasna/knowledge",
3
- "version": "0.2.4",
3
+ "version": "0.2.5",
4
4
  "description": "Agent-friendly local knowledge CLI with JSON output, pagination, and safe destructive actions",
5
5
  "type": "module",
6
6
  "bin": {
7
- "open-knowledge": "./bin/open-knowledge.js",
8
- "open-knowledge-mcp": "./bin/open-knowledge-mcp.js"
7
+ "open-knowledge": "bin/open-knowledge.js",
8
+ "open-knowledge-mcp": "bin/open-knowledge-mcp.js"
9
9
  },
10
10
  "files": [
11
11
  "bin",
@@ -37,7 +37,7 @@
37
37
  },
38
38
  "repository": {
39
39
  "type": "git",
40
- "url": "https://github.com/hasna/knowledge"
40
+ "url": "git+https://github.com/hasna/knowledge.git"
41
41
  },
42
42
  "bugs": {
43
43
  "url": "https://github.com/hasna/knowledge/issues"
package/src/cli.ts CHANGED
@@ -9,6 +9,7 @@ import { ensureKnowledgeWorkspace, readKnowledgeConfig, resolveScopedWorkspace }
9
9
  import { getKnowledgeDbStats, migrateKnowledgeDb } from './knowledge-db';
10
10
  import { createArtifactStore } from './artifact-store';
11
11
  import { initializeWikiLayout } from './wiki-layout';
12
+ import { ingestOpenFilesManifest } from './manifest-ingest';
12
13
  import pkg from '../package.json' with { type: 'json' };
13
14
 
14
15
  type LogLevel = 'debug' | 'info' | 'warn' | 'error';
@@ -59,7 +60,7 @@ interface ParseResult {
59
60
  flags: Flags;
60
61
  }
61
62
 
62
- const COMMANDS = ['add', 'list', 'get', 'delete', 'update', 'archive', 'restore', 'upsert', 'untag', 'export', 'prune', 'dedupe', 'stats', 'paths', 'db', 'wiki', 'help'];
63
+ const COMMANDS = ['add', 'list', 'get', 'delete', 'update', 'archive', 'restore', 'upsert', 'untag', 'export', 'prune', 'dedupe', 'stats', 'paths', 'db', 'wiki', 'ingest', 'help'];
63
64
  const COMMAND_ALIASES: Record<string, string> = {
64
65
  ls: 'list',
65
66
  rm: 'delete',
@@ -162,6 +163,7 @@ Commands:
162
163
  paths Show resolved workspace/store paths
163
164
  db init|stats Initialize or inspect local knowledge.db
164
165
  wiki init Initialize scalable wiki/schema/index/log artifacts
166
+ ingest manifest <file|s3://> Ingest an open-files manifest into knowledge.db
165
167
  help [command] Show help
166
168
 
167
169
  Global Options:
@@ -223,6 +225,7 @@ function printCommandHelp(command: string): void {
223
225
  if (command === 'paths') { console.log('Usage: open-knowledge paths [--scope local|global|project] [--json]'); return; }
224
226
  if (command === 'db') { console.log('Usage: open-knowledge db init|stats [--scope local|global|project] [--json]'); return; }
225
227
  if (command === 'wiki') { console.log('Usage: open-knowledge wiki init [--scope local|global|project] [--json]'); return; }
228
+ if (command === 'ingest') { console.log('Usage: open-knowledge ingest manifest <file|s3://bucket/key> [--scope local|global|project] [--json]'); return; }
226
229
  printGlobalHelp();
227
230
  }
228
231
 
@@ -267,11 +270,11 @@ async function run(argv: string[]): Promise<void> {
267
270
  if (flags.completions) {
268
271
  const shell = flags.completions;
269
272
  if (shell === 'bash') {
270
- console.log(`_open_knowledge() { local cur; cur="${"$"}{COMP_WORDS[COMP_CWORD]}"; COMPREPLY=($(compgen -W "add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki help ls rm edit unarchive --json --yes --help --version --desc --page --limit --search --sort --id --store --title --content --url --tag --format --completions --no-color --scope --archived --include-archived" -- "$cur")); }; complete -F _open_knowledge open-knowledge`);
273
+ console.log(`_open_knowledge() { local cur; cur="${"$"}{COMP_WORDS[COMP_CWORD]}"; COMPREPLY=($(compgen -W "add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki ingest help ls rm edit unarchive --json --yes --help --version --desc --page --limit --search --sort --id --store --title --content --url --tag --format --completions --no-color --scope --archived --include-archived" -- "$cur")); }; complete -F _open_knowledge open-knowledge`);
271
274
  } else if (shell === 'zsh') {
272
- console.log(`#compdef open-knowledge\n_open_knowledge() { _arguments -C "1: :(add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki help ls rm edit unarchive)" "(--json)--json" "(--yes)-y" "(--help)--help" "(--version)--version" "(--desc)--desc" "(--archived)--archived" "(--include-archived)--include-archived" "(-p --page)"{-p,--page}"[page number]:number:" "(-l --limit)"{-l,--limit}"[items per page]:number:" "(-s --search)"{-s,--search}"[search text]:text:" "(--sort)--sort"\{created,title\}:" "(--id)--id[item id]:id:" "(--store)--store[store path]:path:" "(--title)--title[new title]:" "(--content)--content[new content]:" "(--url)--url[source url]:" "(-t --tag)"{-t,--tag}"[tag]:tag:" "(--format)--format[json|jsonl]:" "(--completions)--completions[output completions]:shell:(bash zsh fish):" "(--no-color)--no-color[disable color]" "(--scope)--scope"\{local,global,project\}:" }; _open_knowledge`);
275
+ console.log(`#compdef open-knowledge\n_open_knowledge() { _arguments -C "1: :(add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki ingest help ls rm edit unarchive)" "(--json)--json" "(--yes)-y" "(--help)--help" "(--version)--version" "(--desc)--desc" "(--archived)--archived" "(--include-archived)--include-archived" "(-p --page)"{-p,--page}"[page number]:number:" "(-l --limit)"{-l,--limit}"[items per page]:number:" "(-s --search)"{-s,--search}"[search text]:text:" "(--sort)--sort"\{created,title\}:" "(--id)--id[item id]:id:" "(--store)--store[store path]:path:" "(--title)--title[new title]:" "(--content)--content[new content]:" "(--url)--url[source url]:" "(-t --tag)"{-t,--tag}"[tag]:tag:" "(--format)--format[json|jsonl]:" "(--completions)--completions[output completions]:shell:(bash zsh fish):" "(--no-color)--no-color[disable color]" "(--scope)--scope"\{local,global,project\}:" }; _open_knowledge`);
273
276
  } else if (shell === 'fish') {
274
- console.log(`complete -c open-knowledge -f; complete -c open-knowledge -a "add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki help ls rm edit unarchive"; complete -c open-knowledge -l json; complete -c open-knowledge -l yes -s y; complete -c open-knowledge -l help -s h; complete -c open-knowledge -l version -s v; complete -c open-knowledge -l desc; complete -c open-knowledge -l archived; complete -c open-knowledge -l include-archived; complete -c open-knowledge -s p -l page; complete -c open-knowledge -s l -l limit; complete -c open-knowledge -s s -l search; complete -c open-knowledge -l sort; complete -c open-knowledge -l id; complete -c open-knowledge -l store; complete -c open-knowledge -l title; complete -c open-knowledge -l content; complete -c open-knowledge -l url; complete -c open-knowledge -s t -l tag; complete -c open-knowledge -l format; complete -c open-knowledge -l completions; complete -c open-knowledge -l no-color; complete -c open-knowledge -l scope -a "local global project"`);
277
+ console.log(`complete -c open-knowledge -f; complete -c open-knowledge -a "add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki ingest help ls rm edit unarchive"; complete -c open-knowledge -l json; complete -c open-knowledge -l yes -s y; complete -c open-knowledge -l help -s h; complete -c open-knowledge -l version -s v; complete -c open-knowledge -l desc; complete -c open-knowledge -l archived; complete -c open-knowledge -l include-archived; complete -c open-knowledge -s p -l page; complete -c open-knowledge -s l -l limit; complete -c open-knowledge -s s -l search; complete -c open-knowledge -l sort; complete -c open-knowledge -l id; complete -c open-knowledge -l store; complete -c open-knowledge -l title; complete -c open-knowledge -l content; complete -c open-knowledge -l url; complete -c open-knowledge -s t -l tag; complete -c open-knowledge -l format; complete -c open-knowledge -l completions; complete -c open-knowledge -l no-color; complete -c open-knowledge -l scope -a "local global project"`);
275
278
  } else {
276
279
  throw new Error("Invalid --completions value. Use 'bash', 'zsh', or 'fish'.");
277
280
  }
@@ -340,6 +343,23 @@ async function run(argv: string[]): Promise<void> {
340
343
  output({ ok: true, ...result, message: `Initialized wiki layout in ${resolvedWorkspace.home}` }, flags.json);
341
344
  return;
342
345
  }
346
+
347
+ if (command === 'ingest') {
348
+ const action = positional[1] ?? '';
349
+ if (action !== 'manifest') throw new Error("Invalid ingest action. Use 'manifest'.");
350
+ const input = positional[2];
351
+ if (!input) throw new Error('Usage: open-knowledge ingest manifest <file|s3://bucket/key>');
352
+ const resolvedWorkspace = ensureKnowledgeWorkspace(workspace.home);
353
+ const config = readKnowledgeConfig(resolvedWorkspace.configPath);
354
+ const result = await ingestOpenFilesManifest({
355
+ dbPath: resolvedWorkspace.knowledgeDbPath,
356
+ input,
357
+ config,
358
+ });
359
+ output({ ok: true, ...result, message: `Ingested ${result.items_seen} manifest item(s)` }, flags.json);
360
+ return;
361
+ }
362
+
343
363
  ensureStore(storePath);
344
364
 
345
365
  if (command === 'add') {
@@ -1,7 +1,7 @@
1
1
  import { Database } from 'bun:sqlite';
2
2
  import { ensureParentDir } from './workspace';
3
3
 
4
- export const CURRENT_SCHEMA_VERSION = 1;
4
+ export const CURRENT_SCHEMA_VERSION = 2;
5
5
 
6
6
  export interface KnowledgeDbStats {
7
7
  schema_version: number;
@@ -184,6 +184,21 @@ INSERT OR IGNORE INTO schema_versions(version, applied_at)
184
184
  VALUES (1, datetime('now'));
185
185
  `;
186
186
 
187
+ const MIGRATION_2 = `
188
+ DROP TABLE IF EXISTS chunks_fts;
189
+
190
+ CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
191
+ chunk_id UNINDEXED,
192
+ text,
193
+ title,
194
+ source_uri,
195
+ tokenize='porter unicode61'
196
+ );
197
+
198
+ INSERT OR IGNORE INTO schema_versions(version, applied_at)
199
+ VALUES (2, datetime('now'));
200
+ `;
201
+
187
202
  export function openKnowledgeDb(path: string): Database {
188
203
  ensureParentDir(path);
189
204
  const db = new Database(path);
@@ -195,6 +210,7 @@ export function migrateKnowledgeDb(path: string): { path: string; schema_version
195
210
  const db = openKnowledgeDb(path);
196
211
  try {
197
212
  db.exec(MIGRATION_1);
213
+ if (getSchemaVersion(db) < 2) db.exec(MIGRATION_2);
198
214
  return { path, schema_version: getSchemaVersion(db) };
199
215
  } finally {
200
216
  db.close();
@@ -0,0 +1,423 @@
1
+ import { createHash } from 'node:crypto';
2
+ import { existsSync, readFileSync } from 'node:fs';
3
+ import { basename } from 'node:path';
4
+ import type { Database } from 'bun:sqlite';
5
+ import { migrateKnowledgeDb, openKnowledgeDb } from './knowledge-db';
6
+ import { parseSourceRef, type SourceRef } from './source-ref';
7
+ import type { KnowledgeConfig } from './workspace';
8
+
9
+ export interface ManifestIngestOptions {
10
+ dbPath: string;
11
+ input: string;
12
+ config?: KnowledgeConfig;
13
+ now?: Date;
14
+ maxChunkChars?: number;
15
+ chunkOverlapChars?: number;
16
+ }
17
+
18
+ export interface ManifestIngestResult {
19
+ path: string;
20
+ db_path: string;
21
+ items_seen: number;
22
+ sources_upserted: number;
23
+ revisions_upserted: number;
24
+ chunks_inserted: number;
25
+ chunks_deleted: number;
26
+ skipped: number;
27
+ }
28
+
29
+ type ManifestObject = Record<string, unknown>;
30
+
31
+ interface NormalizedManifestItem {
32
+ raw: ManifestObject;
33
+ sourceRef: string;
34
+ sourceUri: string;
35
+ kind: SourceRef['kind'];
36
+ title: string | null;
37
+ revision: string;
38
+ hash: string | null;
39
+ extractedTextUri: string | null;
40
+ text: string | null;
41
+ metadata: ManifestObject;
42
+ acl: unknown;
43
+ status: string;
44
+ updatedAt: string;
45
+ }
46
+
47
+ function stableId(prefix: string, value: string): string {
48
+ return `${prefix}_${createHash('sha256').update(value).digest('hex').slice(0, 20)}`;
49
+ }
50
+
51
+ function asObject(value: unknown): ManifestObject | undefined {
52
+ return value && typeof value === 'object' && !Array.isArray(value) ? value as ManifestObject : undefined;
53
+ }
54
+
55
+ function asString(value: unknown): string | undefined {
56
+ return typeof value === 'string' && value.length > 0 ? value : undefined;
57
+ }
58
+
59
+ function asNumber(value: unknown): number | undefined {
60
+ return typeof value === 'number' && Number.isFinite(value) ? value : undefined;
61
+ }
62
+
63
+ function buildSourceRefFromItem(item: ManifestObject): string {
64
+ const explicit = asString(item.source_ref) ?? asString(item.source_uri) ?? asString(item.uri);
65
+ if (explicit) return explicit;
66
+
67
+ const fileId = asString(item.file_id);
68
+ if (fileId) {
69
+ const revision = asString(item.revision_id) ?? asString(item.revision);
70
+ const fileRef = `open-files://file/${encodeURIComponent(fileId)}`;
71
+ return revision ? `${fileRef}/revision/${encodeURIComponent(revision)}` : fileRef;
72
+ }
73
+
74
+ const sourceId = asString(item.source_id);
75
+ const path = asString(item.path);
76
+ if (sourceId && path) {
77
+ return `open-files://source/${encodeURIComponent(sourceId)}/path/${encodeURIComponent(path)}`;
78
+ }
79
+
80
+ throw new Error('Manifest item is missing source_ref, file_id, or source_id/path.');
81
+ }
82
+
83
+ function baseSourceUri(sourceRef: string, parsed: SourceRef): string {
84
+ if (parsed.kind === 'open-files' && parsed.entity === 'file' && parsed.revision_id) {
85
+ return sourceRef.replace(/\/revision\/[^/]+$/, '');
86
+ }
87
+ return sourceRef;
88
+ }
89
+
90
+ function textFromItem(item: ManifestObject): string | null {
91
+ const direct =
92
+ asString(item.extracted_text) ??
93
+ asString(item.text) ??
94
+ asString(item.content_text) ??
95
+ asString(item.markdown);
96
+ if (direct !== undefined) return direct;
97
+ const content = item.content;
98
+ return typeof content === 'string' ? content : null;
99
+ }
100
+
101
+ function extractedTextUriFromItem(item: ManifestObject): string | null {
102
+ const direct = asString(item.extracted_text_ref) ?? asString(item.extracted_text_uri) ?? asString(item.text_ref);
103
+ if (direct) return direct;
104
+ const content = asObject(item.content);
105
+ return asString(content?.extracted_text_ref) ?? asString(content?.extracted_text_uri) ?? null;
106
+ }
107
+
108
+ function titleFromItem(item: ManifestObject): string | null {
109
+ const path = asString(item.path);
110
+ return asString(item.title) ?? asString(item.name) ?? (path ? basename(path) : null);
111
+ }
112
+
113
+ function hashFromItem(item: ManifestObject): string | null {
114
+ return asString(item.hash) ?? asString(item.checksum) ?? asString(item.sha256) ?? null;
115
+ }
116
+
117
+ function revisionFromItem(item: ManifestObject, parsed: SourceRef, hash: string | null): string {
118
+ const revision =
119
+ asString(item.revision_id) ??
120
+ asString(item.revision) ??
121
+ asString(item.version_id) ??
122
+ (parsed.kind === 'open-files' ? parsed.revision_id : undefined) ??
123
+ hash ??
124
+ asString(item.updated_at);
125
+ return revision ?? 'current';
126
+ }
127
+
128
+ function metadataFromItem(item: ManifestObject, normalized: {
129
+ sourceRef: string;
130
+ sourceUri: string;
131
+ status: string;
132
+ }): ManifestObject {
133
+ const metadata: ManifestObject = {};
134
+ for (const [key, value] of Object.entries(item)) {
135
+ if (['text', 'content', 'content_text', 'extracted_text', 'markdown'].includes(key)) continue;
136
+ metadata[key] = value;
137
+ }
138
+ metadata.source_ref = normalized.sourceRef;
139
+ metadata.source_uri = normalized.sourceUri;
140
+ metadata.status = normalized.status;
141
+ return metadata;
142
+ }
143
+
144
+ function normalizeManifestItem(item: ManifestObject, now: string): NormalizedManifestItem {
145
+ const sourceRef = buildSourceRefFromItem(item);
146
+ const parsed = parseSourceRef(sourceRef);
147
+ const sourceUri = baseSourceUri(sourceRef, parsed);
148
+ const hash = hashFromItem(item);
149
+ const status = asString(item.status) ?? 'active';
150
+ return {
151
+ raw: item,
152
+ sourceRef,
153
+ sourceUri,
154
+ kind: parsed.kind,
155
+ title: titleFromItem(item),
156
+ revision: revisionFromItem(item, parsed, hash),
157
+ hash,
158
+ extractedTextUri: extractedTextUriFromItem(item),
159
+ text: textFromItem(item),
160
+ metadata: metadataFromItem(item, { sourceRef, sourceUri, status }),
161
+ acl: item.permissions ?? item.acl ?? {},
162
+ status,
163
+ updatedAt: asString(item.updated_at) ?? now,
164
+ };
165
+ }
166
+
167
+ function parseManifestText(text: string): ManifestObject[] {
168
+ const trimmed = text.trim();
169
+ if (!trimmed) return [];
170
+
171
+ if (trimmed.startsWith('[')) {
172
+ const parsed = JSON.parse(trimmed);
173
+ if (!Array.isArray(parsed)) throw new Error('Manifest array parse failed.');
174
+ return parsed.map((entry) => {
175
+ const item = asObject(entry);
176
+ if (!item) throw new Error('Manifest array entries must be objects.');
177
+ return item;
178
+ });
179
+ }
180
+
181
+ if (trimmed.startsWith('{')) {
182
+ try {
183
+ const parsed = JSON.parse(trimmed);
184
+ const object = asObject(parsed);
185
+ if (!object) throw new Error('Manifest object parse failed.');
186
+ if (Array.isArray(object.items)) {
187
+ return object.items.map((entry) => {
188
+ const item = asObject(entry);
189
+ if (!item) throw new Error('Manifest items entries must be objects.');
190
+ return item;
191
+ });
192
+ }
193
+ if ('source_ref' in object || 'source_uri' in object || 'file_id' in object) return [object];
194
+ } catch (error) {
195
+ const lines = trimmed.split(/\r?\n/).filter((line) => line.trim().length > 0);
196
+ if (lines.length <= 1) throw error;
197
+ return lines.map((line) => {
198
+ const item = asObject(JSON.parse(line));
199
+ if (!item) throw new Error('Manifest JSONL entries must be objects.');
200
+ return item;
201
+ });
202
+ }
203
+ }
204
+
205
+ return trimmed.split(/\r?\n/).filter((line) => line.trim().length > 0).map((line) => {
206
+ const item = asObject(JSON.parse(line));
207
+ if (!item) throw new Error('Manifest JSONL entries must be objects.');
208
+ return item;
209
+ });
210
+ }
211
+
212
+ async function readS3Text(uri: string, config?: KnowledgeConfig): Promise<string> {
213
+ const parsed = new URL(uri);
214
+ const bucket = parsed.hostname;
215
+ const key = decodeURIComponent(parsed.pathname.replace(/^\/+/, ''));
216
+ if (!bucket || !key) throw new Error(`Invalid S3 manifest URI: ${uri}`);
217
+ const [{ S3Client, GetObjectCommand }, { fromIni }] = await Promise.all([
218
+ import('@aws-sdk/client-s3'),
219
+ import('@aws-sdk/credential-providers'),
220
+ ]);
221
+ const s3Config = config?.storage.type === 's3' && config.storage.s3?.bucket === bucket ? config.storage.s3 : undefined;
222
+ const client = new S3Client({
223
+ region: s3Config?.region,
224
+ credentials: s3Config?.profile ? fromIni({ profile: s3Config.profile }) : undefined,
225
+ maxAttempts: s3Config?.max_attempts,
226
+ });
227
+ const response = await client.send(new GetObjectCommand({ Bucket: bucket, Key: key }));
228
+ if (!response.Body) return '';
229
+ return await response.Body.transformToString();
230
+ }
231
+
232
+ async function readManifestInput(input: string, config?: KnowledgeConfig): Promise<string> {
233
+ if (input.startsWith('s3://')) return readS3Text(input, config);
234
+ if (!existsSync(input)) throw new Error(`Manifest not found: ${input}`);
235
+ return readFileSync(input, 'utf8');
236
+ }
237
+
238
+ interface TextChunk {
239
+ ordinal: number;
240
+ text: string;
241
+ startOffset: number;
242
+ endOffset: number;
243
+ }
244
+
245
+ function chunkText(text: string, maxChars: number, overlapChars: number): TextChunk[] {
246
+ const normalized = text.replace(/\r\n/g, '\n');
247
+ if (!normalized.trim()) return [];
248
+ const chunks: TextChunk[] = [];
249
+ let start = 0;
250
+ while (start < normalized.length) {
251
+ const hardEnd = Math.min(normalized.length, start + maxChars);
252
+ let end = hardEnd;
253
+ if (hardEnd < normalized.length) {
254
+ const paragraphBreak = normalized.lastIndexOf('\n\n', hardEnd);
255
+ const sentenceBreak = normalized.lastIndexOf('. ', hardEnd);
256
+ const candidate = Math.max(paragraphBreak, sentenceBreak);
257
+ if (candidate > start + Math.floor(maxChars * 0.5)) end = candidate + (candidate === paragraphBreak ? 2 : 1);
258
+ }
259
+ const chunk = normalized.slice(start, end).trim();
260
+ if (chunk) {
261
+ chunks.push({
262
+ ordinal: chunks.length,
263
+ text: chunk,
264
+ startOffset: start,
265
+ endOffset: end,
266
+ });
267
+ }
268
+ if (end >= normalized.length) break;
269
+ start = Math.max(0, end - overlapChars);
270
+ }
271
+ return chunks;
272
+ }
273
+
274
+ function estimateTokenCount(text: string): number {
275
+ const words = text.trim().split(/\s+/).filter(Boolean).length;
276
+ return Math.max(1, Math.ceil(words * 1.25));
277
+ }
278
+
279
+ function deleteChunksForRevision(db: Database, sourceRevisionId: string): number {
280
+ const rows = db.query<{ id: string }, [string]>('SELECT id FROM chunks WHERE source_revision_id = ?').all(sourceRevisionId);
281
+ for (const row of rows) {
282
+ db.run('DELETE FROM chunks_fts WHERE chunk_id = ?', [row.id]);
283
+ }
284
+ db.run('DELETE FROM chunks WHERE source_revision_id = ?', [sourceRevisionId]);
285
+ return rows.length;
286
+ }
287
+
288
+ function upsertSource(db: Database, item: NormalizedManifestItem, now: string): string {
289
+ const sourceId = stableId('src', item.sourceUri);
290
+ db.run(
291
+ `INSERT INTO sources (id, uri, kind, title, metadata_json, acl_json, created_at, updated_at)
292
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
293
+ ON CONFLICT(uri) DO UPDATE SET
294
+ kind = excluded.kind,
295
+ title = excluded.title,
296
+ metadata_json = excluded.metadata_json,
297
+ acl_json = excluded.acl_json,
298
+ updated_at = excluded.updated_at`,
299
+ [
300
+ sourceId,
301
+ item.sourceUri,
302
+ item.kind,
303
+ item.title,
304
+ JSON.stringify(item.metadata),
305
+ JSON.stringify(item.acl ?? {}),
306
+ now,
307
+ item.updatedAt,
308
+ ],
309
+ );
310
+ const row = db.query<{ id: string }, [string]>('SELECT id FROM sources WHERE uri = ?').get(item.sourceUri);
311
+ if (!row) throw new Error(`Failed to upsert source: ${item.sourceUri}`);
312
+ return row.id;
313
+ }
314
+
315
+ function upsertRevision(db: Database, sourceId: string, item: NormalizedManifestItem, now: string): string {
316
+ const revisionId = stableId('rev', `${sourceId}\u0000${item.revision}`);
317
+ db.run(
318
+ `INSERT INTO source_revisions (id, source_id, revision, hash, extracted_text_uri, metadata_json, created_at)
319
+ VALUES (?, ?, ?, ?, ?, ?, ?)
320
+ ON CONFLICT(source_id, revision) DO UPDATE SET
321
+ hash = excluded.hash,
322
+ extracted_text_uri = excluded.extracted_text_uri,
323
+ metadata_json = excluded.metadata_json`,
324
+ [
325
+ revisionId,
326
+ sourceId,
327
+ item.revision,
328
+ item.hash,
329
+ item.extractedTextUri,
330
+ JSON.stringify(item.metadata),
331
+ now,
332
+ ],
333
+ );
334
+ const row = db.query<{ id: string }, [string, string]>(
335
+ 'SELECT id FROM source_revisions WHERE source_id = ? AND revision = ?',
336
+ ).get(sourceId, item.revision);
337
+ if (!row) throw new Error(`Failed to upsert source revision: ${item.sourceRef}`);
338
+ return row.id;
339
+ }
340
+
341
+ function insertChunks(db: Database, sourceRevisionId: string, item: NormalizedManifestItem, now: string, maxChars: number, overlapChars: number): number {
342
+ if (!item.text || item.status.toLowerCase() === 'deleted') return 0;
343
+ const chunks = chunkText(item.text, maxChars, overlapChars);
344
+ for (const chunk of chunks) {
345
+ const chunkId = stableId('chk', `${sourceRevisionId}\u0000${chunk.ordinal}\u0000${chunk.text}`);
346
+ const metadata = {
347
+ source_ref: item.sourceRef,
348
+ source_uri: item.sourceUri,
349
+ hash: item.hash,
350
+ status: item.status,
351
+ path: asString(item.raw.path) ?? null,
352
+ mime: asString(item.raw.mime) ?? asString(item.raw.content_type) ?? null,
353
+ size: asNumber(item.raw.size) ?? null,
354
+ };
355
+ db.run(
356
+ `INSERT INTO chunks (id, source_revision_id, kind, ordinal, text, token_count, start_offset, end_offset, metadata_json, created_at)
357
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
358
+ [
359
+ chunkId,
360
+ sourceRevisionId,
361
+ 'source',
362
+ chunk.ordinal,
363
+ chunk.text,
364
+ estimateTokenCount(chunk.text),
365
+ chunk.startOffset,
366
+ chunk.endOffset,
367
+ JSON.stringify(metadata),
368
+ now,
369
+ ],
370
+ );
371
+ db.run(
372
+ 'INSERT INTO chunks_fts (chunk_id, text, title, source_uri) VALUES (?, ?, ?, ?)',
373
+ [chunkId, chunk.text, item.title ?? '', item.sourceUri],
374
+ );
375
+ }
376
+ return chunks.length;
377
+ }
378
+
379
+ export async function ingestOpenFilesManifest(options: ManifestIngestOptions): Promise<ManifestIngestResult> {
380
+ const now = (options.now ?? new Date()).toISOString();
381
+ const maxChunkChars = options.maxChunkChars ?? 4000;
382
+ const chunkOverlapChars = options.chunkOverlapChars ?? 200;
383
+ if (maxChunkChars < 500) throw new Error('maxChunkChars must be at least 500.');
384
+ if (chunkOverlapChars < 0 || chunkOverlapChars >= maxChunkChars) throw new Error('chunkOverlapChars must be less than maxChunkChars.');
385
+
386
+ migrateKnowledgeDb(options.dbPath);
387
+ const text = await readManifestInput(options.input, options.config);
388
+ const items = parseManifestText(text);
389
+ const db = openKnowledgeDb(options.dbPath);
390
+ try {
391
+ const result = db.transaction(() => {
392
+ const seenSources = new Set<string>();
393
+ const seenRevisions = new Set<string>();
394
+ let chunksInserted = 0;
395
+ let chunksDeleted = 0;
396
+ let skipped = 0;
397
+ for (const raw of items) {
398
+ const item = normalizeManifestItem(raw, now);
399
+ const sourceId = upsertSource(db, item, now);
400
+ const revisionId = upsertRevision(db, sourceId, item, now);
401
+ seenSources.add(sourceId);
402
+ seenRevisions.add(revisionId);
403
+ if (item.text || item.status.toLowerCase() === 'deleted') {
404
+ chunksDeleted += deleteChunksForRevision(db, revisionId);
405
+ }
406
+ chunksInserted += insertChunks(db, revisionId, item, now, maxChunkChars, chunkOverlapChars);
407
+ }
408
+ return {
409
+ path: options.input,
410
+ db_path: options.dbPath,
411
+ items_seen: items.length,
412
+ sources_upserted: seenSources.size,
413
+ revisions_upserted: seenRevisions.size,
414
+ chunks_inserted: chunksInserted,
415
+ chunks_deleted: chunksDeleted,
416
+ skipped,
417
+ };
418
+ })();
419
+ return result;
420
+ } finally {
421
+ db.close();
422
+ }
423
+ }