@hasna/knowledge 0.2.5 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -0
- package/bin/open-knowledge-mcp.js +1 -1
- package/bin/open-knowledge.js +28 -15
- package/package.json +1 -1
- package/src/cli.ts +23 -4
- package/src/outbox-consume.ts +425 -0
package/README.md
CHANGED
|
@@ -65,6 +65,9 @@ open-knowledge wiki init --scope project
|
|
|
65
65
|
|
|
66
66
|
# Ingest an open-files source manifest into the project SQLite catalog
|
|
67
67
|
open-knowledge ingest manifest ./open-files-manifest.jsonl --scope project --json
|
|
68
|
+
|
|
69
|
+
# Consume open-files change events and invalidate stale source chunks
|
|
70
|
+
open-knowledge reindex outbox ./open-files-outbox.jsonl --scope project --json
|
|
68
71
|
```
|
|
69
72
|
|
|
70
73
|
## Commands
|
|
@@ -171,6 +174,14 @@ Import an open-files JSON or JSONL source manifest into `knowledge.db`. This
|
|
|
171
174
|
upserts sources and source revisions, stores hash/MIME/status/permission
|
|
172
175
|
metadata, and chunks embedded extracted text when the manifest includes it.
|
|
173
176
|
|
|
177
|
+
### reindex
|
|
178
|
+
```bash
|
|
179
|
+
open-knowledge reindex outbox <file|s3://bucket/key> [--scope project] [--json]
|
|
180
|
+
```
|
|
181
|
+
Consume open-files JSON or JSONL change events. This invalidates matching
|
|
182
|
+
source chunks and embeddings by source ref, revision, or hash, updates
|
|
183
|
+
permission/path/delete metadata, and records a local run ledger.
|
|
184
|
+
|
|
174
185
|
### help
|
|
175
186
|
```bash
|
|
176
187
|
open-knowledge help [command]
|
|
@@ -13659,7 +13659,7 @@ import { existsSync as existsSync3, readFileSync as readFileSync3, writeFileSync
|
|
|
13659
13659
|
// package.json
|
|
13660
13660
|
var package_default = {
|
|
13661
13661
|
name: "@hasna/knowledge",
|
|
13662
|
-
version: "0.2.
|
|
13662
|
+
version: "0.2.6",
|
|
13663
13663
|
description: "Agent-friendly local knowledge CLI with JSON output, pagination, and safe destructive actions",
|
|
13664
13664
|
type: "module",
|
|
13665
13665
|
bin: {
|
package/bin/open-knowledge.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env bun
|
|
2
2
|
// @bun
|
|
3
|
-
var
|
|
4
|
-
`);return
|
|
3
|
+
var U=import.meta.require;import{readFileSync as H,writeFileSync as J,existsSync as z,renameSync as Ie,unlinkSync as ae}from"fs";import{randomUUID as de}from"crypto";import{existsSync as be,mkdirSync as V,readFileSync as Se,writeFileSync as we}from"fs";import{homedir as ue}from"os";import{dirname as ke,join as g,resolve as me}from"path";var xe=g(".hasna","apps","knowledge");function Q(){return g(ue(),".open-knowledge","db.json")}function Z(){return g(ue(),".hasna","apps","knowledge")}function Ue(e=process.cwd()){return me(e,xe)}function v(e){return{home:e,configPath:g(e,"config.json"),jsonStorePath:g(e,"db.json"),knowledgeDbPath:g(e,"knowledge.db"),artifactsDir:g(e,"artifacts"),cacheDir:g(e,"cache"),exportsDir:g(e,"exports"),indexesDir:g(e,"indexes"),logsDir:g(e,"logs"),runsDir:g(e,"runs"),schemasDir:g(e,"schemas"),wikiDir:g(e,"wiki")}}function Ae(){return{version:1,mode:"local",storage:{type:"local",artifacts_root:"artifacts"},sources:{preferred_ref:"open-files",allowed_schemes:["open-files","s3","file","https","http"]}}}function I(e){let n=v(e);V(n.home,{recursive:!0});for(let t of[n.artifactsDir,n.cacheDir,n.exportsDir,n.indexesDir,n.logsDir,n.runsDir,n.schemasDir,n.wikiDir])V(t,{recursive:!0});if(!be(n.configPath))we(n.configPath,`${JSON.stringify(Ae(),null,2)}
|
|
4
|
+
`);return n}function ce(e,n=process.cwd()){if(e==="project"||e==="local")return v(Ue(n));return v(Z())}function Y(e){V(ke(e),{recursive:!0})}function K(e){let n=Se(e,"utf8");return JSON.parse(n)}function ee(){return v(Z()).jsonStorePath}function te(e){if(!z(e))if(Y(e),e===ee()&&z(Q()))J(e,H(Q(),"utf8"));else J(e,JSON.stringify({items:[]},null,2))}function Ce(e){return`${e}.lock`}function De(e,n){let i=Date.now();while(Date.now()-i<5000){try{if(!z(e)){J(e,JSON.stringify({owner:n,ts:Date.now()}));return}let d=JSON.parse(H(e,"utf8"));if(Date.now()-d.ts>1e4)ae(e)}catch{}let s=Date.now();while(Date.now()-s<50);}throw Error(`Could not acquire lock on ${e} after 5000ms`)}function je(e,n){try{if(z(e)){if(JSON.parse(H(e,"utf8")).owner===n)ae(e)}}catch{}}function L(e){te(e);let n=H(e,"utf8"),t=JSON.parse(n);if(!t||!Array.isArray(t.items))return{items:[]};return t}function w(e,n){let t=`${e}.tmp.${de()}`;J(t,JSON.stringify(n,null,2)),Ie(t,e)}function b(e,n){let t=de(),r=Ce(e);De(r,t);try{return n()}finally{je(r,t)}}function ne(){return`k_${Date.now().toString(36)}_${Math.random().toString(36).slice(2,8)}`}function Ee(e){return e.replace(/^k_/,"").slice(0,12)}import{Database as Xe}from"bun:sqlite";var Fe=`
|
|
5
5
|
PRAGMA journal_mode = WAL;
|
|
6
6
|
PRAGMA foreign_keys = ON;
|
|
7
7
|
|
|
@@ -168,7 +168,7 @@ CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
|
|
|
168
168
|
|
|
169
169
|
INSERT OR IGNORE INTO schema_versions(version, applied_at)
|
|
170
170
|
VALUES (1, datetime('now'));
|
|
171
|
-
`,
|
|
171
|
+
`,Me=`
|
|
172
172
|
DROP TABLE IF EXISTS chunks_fts;
|
|
173
173
|
|
|
174
174
|
CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
|
|
@@ -181,7 +181,7 @@ CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
|
|
|
181
181
|
|
|
182
182
|
INSERT OR IGNORE INTO schema_versions(version, applied_at)
|
|
183
183
|
VALUES (2, datetime('now'));
|
|
184
|
-
`;function
|
|
184
|
+
`;function X(e){Y(e);let n=new Xe(e);return n.exec("PRAGMA foreign_keys = ON;"),n}function C(e){let n=X(e);try{if(n.exec(Fe),re(n)<2)n.exec(Me);return{path:e,schema_version:re(n)}}finally{n.close()}}function re(e){return e.query("SELECT MAX(version) AS version FROM schema_versions").get()?.version??0}function A(e,n){return e.query(`SELECT COUNT(*) AS n FROM ${n}`).get()?.n??0}function Te(e){let n=X(e);try{return{schema_version:re(n),sources:A(n,"sources"),source_revisions:A(n,"source_revisions"),chunks:A(n,"chunks"),wiki_pages:A(n,"wiki_pages"),citations:A(n,"citations"),indexes:A(n,"knowledge_indexes"),runs:A(n,"runs"),run_events:A(n,"run_events")}}finally{n.close()}}import{existsSync as ve,mkdirSync as fe,readFileSync as Ke,writeFileSync as $e}from"fs";import{dirname as Be,join as ie,relative as We,sep as Ye}from"path";function $(e){let n=e.replace(/\\/g,"/").trim();if(!n||n.startsWith("/"))throw Error(`Invalid artifact key: ${e}`);let t=n.split("/").filter(Boolean);if(t.length===0||t.some((r)=>r==="."||r===".."))throw Error(`Invalid artifact key: ${e}`);return t.join("/")}function se(e,n){let t=We(e,n);if(t.startsWith("..")||t===".."||t.startsWith(`..${Ye}`))throw Error(`Artifact path escapes root: ${n}`)}class _e{root;type="local";canRead=!0;canWrite=!0;constructor(e){this.root=e;fe(e,{recursive:!0})}async put(e){let n=$(e.key),t=ie(this.root,n);return se(this.root,t),fe(Be(t),{recursive:!0}),$e(t,e.body),{key:n,uri:`file://${t}`}}async getText(e){let n=$(e),t=ie(this.root,n);return se(this.root,t),Ke(t,"utf8")}async exists(e){let n=$(e),t=ie(this.root,n);return se(this.root,t),ve(t)}}class pe{options;type="s3";canRead=!0;canWrite=!0;client;constructor(e){this.options=e;this.client=e.client}async getClient(){if(this.client)return this.client;let[{S3Client:e},{fromIni:n}]=await Promise.all([import("@aws-sdk/client-s3"),import("@aws-sdk/credential-providers")]);return this.client=new e({region:this.options.region,credentials:this.options.profile?n({profile:this.options.profile}):void 0,maxAttempts:this.options.max_attempts}),this.client}objectKey(e){let n=$(e),t=this.options.prefix?$(this.options.prefix):"";return t?`${t}/${n}`:n}async put(e){let[{PutObjectCommand:n},t]=await Promise.all([import("@aws-sdk/client-s3"),this.getClient()]),r=this.objectKey(e.key);return await t.send(new n({Bucket:this.options.bucket,Key:r,Body:e.body,ContentType:e.content_type,Metadata:e.metadata,ServerSideEncryption:this.options.server_side_encryption,SSEKMSKeyId:this.options.kms_key_id})),{key:r,uri:`s3://${this.options.bucket}/${r}`}}async getText(e){let[{GetObjectCommand:n},t]=await Promise.all([import("@aws-sdk/client-s3"),this.getClient()]),r=this.objectKey(e),i=await t.send(new n({Bucket:this.options.bucket,Key:r}));if(!i.Body)return"";return await i.Body.transformToString()}async exists(e){let[{HeadObjectCommand:n},t]=await Promise.all([import("@aws-sdk/client-s3"),this.getClient()]),r=this.objectKey(e);try{return await t.send(new n({Bucket:this.options.bucket,Key:r})),!0}catch(i){let s=i instanceof Error?i.name:"";if(s==="NotFound"||s==="NoSuchKey"||s==="NotFoundError")return!1;throw i}}}function le(e,n){if(e.storage.type==="s3"){if(!e.storage.s3?.bucket)throw Error("S3 artifact storage requires storage.s3.bucket");return new pe({bucket:e.storage.s3.bucket,prefix:e.storage.s3.prefix,region:e.storage.s3.region,profile:e.storage.s3.profile,max_attempts:e.storage.s3.max_attempts,server_side_encryption:e.storage.s3.server_side_encryption,kms_key_id:e.storage.s3.kms_key_id})}return new _e(n.artifactsDir)}function Je(e){let n=String(e.getUTCFullYear()),t=String(e.getUTCMonth()+1).padStart(2,"0"),r=String(e.getUTCDate()).padStart(2,"0");return{year:n,month:t,day:r}}function ze(){return`# Knowledge Agent Schema v1
|
|
185
185
|
|
|
186
186
|
## Source Rules
|
|
187
187
|
|
|
@@ -206,7 +206,7 @@ VALUES (2, datetime('now'));
|
|
|
206
206
|
## Lint Rules
|
|
207
207
|
|
|
208
208
|
- Flag stale pages, missing citations, contradictions, orphan pages, duplicate pages, and unresolved source refs.
|
|
209
|
-
`}function
|
|
209
|
+
`}function He(){return`# Knowledge Index
|
|
210
210
|
|
|
211
211
|
This is a compact orientation index for agents. It is not the full search index.
|
|
212
212
|
|
|
@@ -221,29 +221,41 @@ This is a compact orientation index for agents. It is not the full search index.
|
|
|
221
221
|
|
|
222
222
|
Raw source files are resolved through open-files. This app stores source refs,
|
|
223
223
|
citations, chunks, generated wiki artifacts, indexes, and run records.
|
|
224
|
-
`}function
|
|
224
|
+
`}function Ge(){return`# Wiki
|
|
225
225
|
|
|
226
226
|
Generated durable knowledge pages live here.
|
|
227
227
|
|
|
228
228
|
Pages should be concise, cited, and organized for both humans and agents.
|
|
229
|
-
`}async function
|
|
230
|
-
`,content_type:"application/x-ndjson"})];return await Promise.all(
|
|
231
|
-
`);if(!r.trim())return[];let
|
|
229
|
+
`}async function he(e,n=new Date){let{year:t,month:r,day:i}=Je(n),s="schemas/v1.md",d="indexes/root.md",_="wiki/README.md",o=`logs/${t}/${r}/${i}.jsonl`,c={ts:n.toISOString(),event:"wiki_layout_initialized",schema_key:"schemas/v1.md",root_index_key:"indexes/root.md",wiki_readme_key:"wiki/README.md"},u=[e.put({key:"schemas/v1.md",body:ze(),content_type:"text/markdown"}),e.put({key:"indexes/root.md",body:He(),content_type:"text/markdown"}),e.put({key:"wiki/README.md",body:Ge(),content_type:"text/markdown"}),e.put({key:o,body:`${JSON.stringify(c)}
|
|
230
|
+
`,content_type:"application/x-ndjson"})];return await Promise.all(u),{schema_key:"schemas/v1.md",root_index_key:"indexes/root.md",wiki_readme_key:"wiki/README.md",log_key:o,written:["schemas/v1.md","indexes/root.md","wiki/README.md",o]}}import{createHash as Ze}from"crypto";import{existsSync as et,readFileSync as tt}from"fs";import{basename as nt}from"path";function Ne(e,n){if(!e)throw Error(n);return e}function qe(e){let t=e.slice(13).split("/").filter(Boolean),r=t[0];if(r!=="file"&&r!=="source")throw Error("Invalid open-files ref. Expected open-files://file/<id>, open-files://file/<id>/revision/<revision_id>, or open-files://source/<id>/path/<path>.");let i=Ne(t[1],"Invalid open-files ref. Missing id.");if(r==="file"){if(t.length===2)return{kind:"open-files",uri:e,entity:r,id:i};if(t[2]==="revision"&&t[3]&&t.length===4)return{kind:"open-files",uri:e,entity:r,id:i,revision_id:decodeURIComponent(t[3])};throw Error("Invalid open-files file ref. Expected open-files://file/<id>/revision/<revision_id>.")}let s=t.indexOf("path"),d=s>=0?decodeURIComponent(t.slice(s+1).join("/")):void 0;return{kind:"open-files",uri:e,entity:r,id:i,path:d}}function Pe(e){let n=new URL(e),t=Ne(n.hostname,"Invalid s3 ref. Missing bucket."),r=decodeURIComponent(n.pathname.replace(/^\/+/,""));if(!r)throw Error("Invalid s3 ref. Missing object key.");return{kind:"s3",uri:e,bucket:t,key:r}}function Ve(e){let n=new URL(e);return{kind:"file",uri:e,path:decodeURIComponent(n.pathname)}}function Qe(e){let n=new URL(e);return{kind:"web",uri:e,url:n.toString()}}function G(e){if(e.startsWith("open-files://"))return qe(e);if(e.startsWith("s3://"))return Pe(e);if(e.startsWith("file://"))return Ve(e);if(e.startsWith("https://")||e.startsWith("http://"))return Qe(e);throw Error(`Unsupported source ref scheme: ${e}`)}function oe(e,n){return`${e}_${Ze("sha256").update(n).digest("hex").slice(0,20)}`}function F(e){return e&&typeof e==="object"&&!Array.isArray(e)?e:void 0}function T(e){return typeof e==="string"&&e.length>0?e:void 0}function rt(e){return typeof e==="number"&&Number.isFinite(e)?e:void 0}function it(e){let n=T(e.source_ref)??T(e.source_uri)??T(e.uri);if(n)return n;let t=T(e.file_id);if(t){let s=T(e.revision_id)??T(e.revision),d=`open-files://file/${encodeURIComponent(t)}`;return s?`${d}/revision/${encodeURIComponent(s)}`:d}let r=T(e.source_id),i=T(e.path);if(r&&i)return`open-files://source/${encodeURIComponent(r)}/path/${encodeURIComponent(i)}`;throw Error("Manifest item is missing source_ref, file_id, or source_id/path.")}function st(e,n){if(n.kind==="open-files"&&n.entity==="file"&&n.revision_id)return e.replace(/\/revision\/[^/]+$/,"");return e}function ot(e){let n=T(e.extracted_text)??T(e.text)??T(e.content_text)??T(e.markdown);if(n!==void 0)return n;let t=e.content;return typeof t==="string"?t:null}function ut(e){let n=T(e.extracted_text_ref)??T(e.extracted_text_uri)??T(e.text_ref);if(n)return n;let t=F(e.content);return T(t?.extracted_text_ref)??T(t?.extracted_text_uri)??null}function ct(e){let n=T(e.path);return T(e.title)??T(e.name)??(n?nt(n):null)}function at(e){return T(e.hash)??T(e.checksum)??T(e.sha256)??null}function dt(e,n,t){return T(e.revision_id)??T(e.revision)??T(e.version_id)??(n.kind==="open-files"?n.revision_id:void 0)??t??T(e.updated_at)??"current"}function Et(e,n){let t={};for(let[r,i]of Object.entries(e)){if(["text","content","content_text","extracted_text","markdown"].includes(r))continue;t[r]=i}return t.source_ref=n.sourceRef,t.source_uri=n.sourceUri,t.status=n.status,t}function Tt(e,n){let t=it(e),r=G(t),i=st(t,r),s=at(e),d=T(e.status)??"active";return{raw:e,sourceRef:t,sourceUri:i,kind:r.kind,title:ct(e),revision:dt(e,r,s),hash:s,extractedTextUri:ut(e),text:ot(e),metadata:Et(e,{sourceRef:t,sourceUri:i,status:d}),acl:e.permissions??e.acl??{},status:d,updatedAt:T(e.updated_at)??n}}function ft(e){let n=e.trim();if(!n)return[];if(n.startsWith("[")){let t=JSON.parse(n);if(!Array.isArray(t))throw Error("Manifest array parse failed.");return t.map((r)=>{let i=F(r);if(!i)throw Error("Manifest array entries must be objects.");return i})}if(n.startsWith("{"))try{let t=JSON.parse(n),r=F(t);if(!r)throw Error("Manifest object parse failed.");if(Array.isArray(r.items))return r.items.map((i)=>{let s=F(i);if(!s)throw Error("Manifest items entries must be objects.");return s});if("source_ref"in r||"source_uri"in r||"file_id"in r)return[r]}catch(t){let r=n.split(/\r?\n/).filter((i)=>i.trim().length>0);if(r.length<=1)throw t;return r.map((i)=>{let s=F(JSON.parse(i));if(!s)throw Error("Manifest JSONL entries must be objects.");return s})}return n.split(/\r?\n/).filter((t)=>t.trim().length>0).map((t)=>{let r=F(JSON.parse(t));if(!r)throw Error("Manifest JSONL entries must be objects.");return r})}async function _t(e,n){let t=new URL(e),r=t.hostname,i=decodeURIComponent(t.pathname.replace(/^\/+/,""));if(!r||!i)throw Error(`Invalid S3 manifest URI: ${e}`);let[{S3Client:s,GetObjectCommand:d},{fromIni:_}]=await Promise.all([import("@aws-sdk/client-s3"),import("@aws-sdk/credential-providers")]),o=n?.storage.type==="s3"&&n.storage.s3?.bucket===r?n.storage.s3:void 0,u=await new s({region:o?.region,credentials:o?.profile?_({profile:o.profile}):void 0,maxAttempts:o?.max_attempts}).send(new d({Bucket:r,Key:i}));if(!u.Body)return"";return await u.Body.transformToString()}async function pt(e,n){if(e.startsWith("s3://"))return _t(e,n);if(!et(e))throw Error(`Manifest not found: ${e}`);return tt(e,"utf8")}function lt(e,n,t){let r=e.replace(/\r\n/g,`
|
|
231
|
+
`);if(!r.trim())return[];let i=[],s=0;while(s<r.length){let d=Math.min(r.length,s+n),_=d;if(d<r.length){let c=r.lastIndexOf(`
|
|
232
232
|
|
|
233
|
-
`,d),
|
|
233
|
+
`,d),u=r.lastIndexOf(". ",d),a=Math.max(c,u);if(a>s+Math.floor(n*0.5))_=a+(a===c?2:1)}let o=r.slice(s,_).trim();if(o)i.push({ordinal:i.length,text:o,startOffset:s,endOffset:_});if(_>=r.length)break;s=Math.max(0,_-t)}return i}function ht(e){let n=e.trim().split(/\s+/).filter(Boolean).length;return Math.max(1,Math.ceil(n*1.25))}function Nt(e,n){let t=e.query("SELECT id FROM chunks WHERE source_revision_id = ?").all(n);for(let r of t)e.run("DELETE FROM chunks_fts WHERE chunk_id = ?",[r.id]);return e.run("DELETE FROM chunks WHERE source_revision_id = ?",[n]),t.length}function Ot(e,n,t){let r=oe("src",n.sourceUri);e.run(`INSERT INTO sources (id, uri, kind, title, metadata_json, acl_json, created_at, updated_at)
|
|
234
234
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
235
235
|
ON CONFLICT(uri) DO UPDATE SET
|
|
236
236
|
kind = excluded.kind,
|
|
237
237
|
title = excluded.title,
|
|
238
238
|
metadata_json = excluded.metadata_json,
|
|
239
239
|
acl_json = excluded.acl_json,
|
|
240
|
-
updated_at = excluded.updated_at`,[r,
|
|
240
|
+
updated_at = excluded.updated_at`,[r,n.sourceUri,n.kind,n.title,JSON.stringify(n.metadata),JSON.stringify(n.acl??{}),t,n.updatedAt]);let i=e.query("SELECT id FROM sources WHERE uri = ?").get(n.sourceUri);if(!i)throw Error(`Failed to upsert source: ${n.sourceUri}`);return i.id}function yt(e,n,t,r){let i=oe("rev",`${n}\x00${t.revision}`);e.run(`INSERT INTO source_revisions (id, source_id, revision, hash, extracted_text_uri, metadata_json, created_at)
|
|
241
241
|
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
242
242
|
ON CONFLICT(source_id, revision) DO UPDATE SET
|
|
243
243
|
hash = excluded.hash,
|
|
244
244
|
extracted_text_uri = excluded.extracted_text_uri,
|
|
245
|
-
metadata_json = excluded.metadata_json`,[
|
|
246
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,[
|
|
245
|
+
metadata_json = excluded.metadata_json`,[i,n,t.revision,t.hash,t.extractedTextUri,JSON.stringify(t.metadata),r]);let s=e.query("SELECT id FROM source_revisions WHERE source_id = ? AND revision = ?").get(n,t.revision);if(!s)throw Error(`Failed to upsert source revision: ${t.sourceRef}`);return s.id}function Rt(e,n,t,r,i,s){if(!t.text||t.status.toLowerCase()==="deleted")return 0;let d=lt(t.text,i,s);for(let _ of d){let o=oe("chk",`${n}\x00${_.ordinal}\x00${_.text}`),c={source_ref:t.sourceRef,source_uri:t.sourceUri,hash:t.hash,status:t.status,path:T(t.raw.path)??null,mime:T(t.raw.mime)??T(t.raw.content_type)??null,size:rt(t.raw.size)??null};e.run(`INSERT INTO chunks (id, source_revision_id, kind, ordinal, text, token_count, start_offset, end_offset, metadata_json, created_at)
|
|
246
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,[o,n,"source",_.ordinal,_.text,ht(_.text),_.startOffset,_.endOffset,JSON.stringify(c),r]),e.run("INSERT INTO chunks_fts (chunk_id, text, title, source_uri) VALUES (?, ?, ?, ?)",[o,_.text,t.title??"",t.sourceUri])}return d.length}async function Oe(e){let n=(e.now??new Date).toISOString(),t=e.maxChunkChars??4000,r=e.chunkOverlapChars??200;if(t<500)throw Error("maxChunkChars must be at least 500.");if(r<0||r>=t)throw Error("chunkOverlapChars must be less than maxChunkChars.");C(e.dbPath);let i=await pt(e.input,e.config),s=ft(i),d=X(e.dbPath);try{return d.transaction(()=>{let o=new Set,c=new Set,u=0,a=0,E=0;for(let h of s){let R=Tt(h,n),O=Ot(d,R,n),f=yt(d,O,R,n);if(o.add(O),c.add(f),R.text||R.status.toLowerCase()==="deleted")a+=Nt(d,f);u+=Rt(d,f,R,n,t,r)}return{path:e.input,db_path:e.dbPath,items_seen:s.length,sources_upserted:o.size,revisions_upserted:c.size,chunks_inserted:u,chunks_deleted:a,skipped:E}})()}finally{d.close()}}import{createHash as gt,randomUUID as Lt}from"crypto";import{existsSync as bt,readFileSync as St}from"fs";import{basename as wt}from"path";function q(e,n){return`${e}_${gt("sha256").update(n).digest("hex").slice(0,20)}`}function M(e){return e&&typeof e==="object"&&!Array.isArray(e)?e:void 0}function l(e){return typeof e==="string"&&e.length>0?e:void 0}function kt(e){let n=l(e.source_ref)??l(e.source_uri)??l(e.uri);if(n)return n;let t=l(e.file_id);if(t){let s=l(e.revision_id)??l(e.revision),d=`open-files://file/${encodeURIComponent(t)}`;return s?`${d}/revision/${encodeURIComponent(s)}`:d}let r=l(e.source_id),i=l(e.path);if(r&&i)return`open-files://source/${encodeURIComponent(r)}/path/${encodeURIComponent(i)}`;throw Error("Outbox event is missing source_ref, file_id, or source_id/path.")}function mt(e,n){if(n.kind==="open-files"&&n.entity==="file"&&n.revision_id)return e.replace(/\/revision\/[^/]+$/,"");return e}function xt(e){return l(e.hash)??l(e.checksum)??l(e.sha256)??null}function Ut(e,n,t){return l(e.revision_id)??l(e.revision)??l(e.version_id)??(n.kind==="open-files"?n.revision_id:void 0)??t??null}function At(e){return(l(e.event)??l(e.type)??l(e.action)??l(e.change_type)??"changed").toLowerCase()}function It(e){let n=l(e.path);return l(e.title)??l(e.name)??(n?wt(n):null)}function Ct(e,n){let t=kt(e),r=G(t),i=xt(e);return{raw:e,eventType:At(e),sourceRef:t,sourceUri:mt(t,r),kind:r.kind,title:It(e),revision:Ut(e,r,i),hash:i,status:l(e.status)?.toLowerCase()??null,updatedAt:l(e.updated_at)??n,acl:e.permissions??e.acl??void 0}}function Dt(e){let n=e.trim();if(!n)return[];if(n.startsWith("[")){let t=JSON.parse(n);if(!Array.isArray(t))throw Error("Outbox array parse failed.");return t.map((r)=>{let i=M(r);if(!i)throw Error("Outbox array entries must be objects.");return i})}if(n.startsWith("{"))try{let t=JSON.parse(n),r=M(t);if(!r)throw Error("Outbox object parse failed.");if(Array.isArray(r.events))return r.events.map((i)=>{let s=M(i);if(!s)throw Error("Outbox events entries must be objects.");return s});if("source_ref"in r||"source_uri"in r||"file_id"in r)return[r]}catch(t){let r=n.split(/\r?\n/).filter((i)=>i.trim().length>0);if(r.length<=1)throw t;return r.map((i)=>{let s=M(JSON.parse(i));if(!s)throw Error("Outbox JSONL entries must be objects.");return s})}return n.split(/\r?\n/).filter((t)=>t.trim().length>0).map((t)=>{let r=M(JSON.parse(t));if(!r)throw Error("Outbox JSONL entries must be objects.");return r})}async function jt(e,n){let t=new URL(e),r=t.hostname,i=decodeURIComponent(t.pathname.replace(/^\/+/,""));if(!r||!i)throw Error(`Invalid S3 outbox URI: ${e}`);let[{S3Client:s,GetObjectCommand:d},{fromIni:_}]=await Promise.all([import("@aws-sdk/client-s3"),import("@aws-sdk/credential-providers")]),o=n?.storage.type==="s3"&&n.storage.s3?.bucket===r?n.storage.s3:void 0,u=await new s({region:o?.region,credentials:o?.profile?_({profile:o.profile}):void 0,maxAttempts:o?.max_attempts}).send(new d({Bucket:r,Key:i}));if(!u.Body)return"";return await u.Body.transformToString()}async function Xt(e,n){if(e.startsWith("s3://"))return jt(e,n);if(!bt(e))throw Error(`Outbox not found: ${e}`);return St(e,"utf8")}function ye(e,n){let t={};if(e)try{t=M(JSON.parse(e))??{}}catch{t={}}return JSON.stringify({...t,...n})}function Ft(e,n,t){let r=q("src",n.sourceUri);e.run(`INSERT INTO sources (id, uri, kind, title, metadata_json, acl_json, created_at, updated_at)
|
|
247
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
248
|
+
ON CONFLICT(uri) DO UPDATE SET
|
|
249
|
+
kind = excluded.kind,
|
|
250
|
+
title = COALESCE(excluded.title, sources.title),
|
|
251
|
+
updated_at = excluded.updated_at`,[r,n.sourceUri,n.kind,n.title,JSON.stringify({source_ref:n.sourceRef,source_uri:n.sourceUri,status:n.status,last_outbox_event:n.eventType}),JSON.stringify(n.acl??{}),t,n.updatedAt]);let i=e.query("SELECT id, metadata_json, acl_json FROM sources WHERE uri = ?").get(n.sourceUri);if(!i)throw Error(`Failed to upsert source for outbox event: ${n.sourceUri}`);let s={source_ref:n.sourceRef,source_uri:n.sourceUri,last_outbox_event:n.eventType,last_outbox_at:n.updatedAt};if(n.status)s.status=n.status;if(l(n.raw.path))s.path=n.raw.path;return e.run("UPDATE sources SET metadata_json = ?, acl_json = CASE WHEN ? IS NULL THEN acl_json ELSE ? END, updated_at = ? WHERE id = ?",[ye(i.metadata_json,s),n.acl===void 0?null:JSON.stringify(n.acl),n.acl===void 0?null:JSON.stringify(n.acl),n.updatedAt,i.id]),i.id}function Mt(e,n,t,r){if(!t.revision)return null;let i=q("rev",`${n}\x00${t.revision}`),s={source_ref:t.sourceRef,source_uri:t.sourceUri,status:t.status,last_outbox_event:t.eventType,reindex_required:!0};return e.run(`INSERT INTO source_revisions (id, source_id, revision, hash, extracted_text_uri, metadata_json, created_at)
|
|
252
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
253
|
+
ON CONFLICT(source_id, revision) DO UPDATE SET
|
|
254
|
+
hash = COALESCE(excluded.hash, source_revisions.hash),
|
|
255
|
+
metadata_json = excluded.metadata_json`,[i,n,t.revision,t.hash,l(t.raw.extracted_text_ref)??null,JSON.stringify(s),r]),e.query("SELECT id FROM source_revisions WHERE source_id = ? AND revision = ?").get(n,t.revision)?.id??null}function vt(e,n,t){if(t.revision)return e.query("SELECT id FROM source_revisions WHERE source_id = ? AND revision = ?").all(n,t.revision).map((r)=>r.id);if(t.hash)return e.query("SELECT id FROM source_revisions WHERE source_id = ? AND hash = ?").all(n,t.hash).map((r)=>r.id);return e.query("SELECT id FROM source_revisions WHERE source_id = ?").all(n).map((r)=>r.id)}function Kt(e,n){let t=e.query("SELECT id FROM chunks WHERE source_revision_id = ?").all(n),r=0;for(let s of t){let d=e.query("SELECT COUNT(*) AS n FROM chunk_embeddings WHERE chunk_id = ?").get(s.id);r+=d?.n??0,e.run("DELETE FROM chunk_embeddings WHERE chunk_id = ?",[s.id]),e.run("DELETE FROM chunks_fts WHERE chunk_id = ?",[s.id])}e.run("DELETE FROM chunks WHERE source_revision_id = ?",[n]);let i=e.query("SELECT metadata_json FROM source_revisions WHERE id = ?").get(n);return e.run("UPDATE source_revisions SET metadata_json = ? WHERE id = ?",[ye(i?.metadata_json,{reindex_required:!0,invalidated_at:new Date().toISOString()}),n]),{chunksDeleted:t.length,embeddingsDeleted:r}}function $t(e,n){return n==="deleted"||["delete","deleted","remove","removed"].includes(e)}function Bt(e){return["move","moved","rename","renamed","path_changed"].includes(e)}function Wt(e){return["permission","permissions","permission_changed","acl_changed"].includes(e)}async function Re(e){let n=(e.now??new Date).toISOString();C(e.dbPath);let t=await Xt(e.input,e.config),r=Dt(t),i=X(e.dbPath),s=`run_${Lt()}`;try{return i.transaction(()=>{i.run(`INSERT INTO runs (id, type, prompt, status, provider, model, metadata_json, created_at, updated_at)
|
|
256
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,[s,"open-files-outbox",e.input,"completed","local","open-files-outbox",JSON.stringify({path:e.input,events:r.length}),n,n]);let d=new Set,_=new Set,o=0,c=0,u=0,a=0,E=0,h=0;return r.forEach((R,O)=>{let f=Ct(R,n),k=Ft(i,f,n);d.add(k);let N=Mt(i,k,f,n);if(N)_.add(N);let S=vt(i,k,f);for(let m of S){_.add(m);let j=Kt(i,m);o+=j.chunksDeleted,c+=j.embeddingsDeleted,u+=1}if($t(f.eventType,f.status))a+=1;if(Bt(f.eventType))E+=1;if(Wt(f.eventType)||f.acl!==void 0)h+=1;i.run(`INSERT INTO run_events (id, run_id, level, event, metadata_json, created_at)
|
|
257
|
+
VALUES (?, ?, ?, ?, ?, ?)`,[q("evt",`${s}\x00${O}\x00${f.sourceRef}\x00${f.eventType}`),s,"info",f.eventType,JSON.stringify({source_ref:f.sourceRef,source_uri:f.sourceUri,revision:f.revision,hash:f.hash,status:f.status,affected_revisions:S.length}),f.updatedAt])}),i.run(`INSERT INTO provider_usage (id, run_id, provider, model, input_tokens, output_tokens, cost_usd, metadata_json, created_at)
|
|
258
|
+
VALUES (?, ?, ?, ?, 0, 0, 0, ?, ?)`,[q("usage",s),s,"local","open-files-outbox",JSON.stringify({note:"No model provider used for outbox invalidation."}),n]),{path:e.input,db_path:e.dbPath,run_id:s,events_seen:r.length,sources_touched:d.size,revisions_touched:_.size,chunks_deleted:o,embeddings_deleted:c,stale_revisions:u,deleted_sources:a,moved_sources:E,permission_updates:h}})()}finally{i.close()}}var B={name:"@hasna/knowledge",version:"0.2.6",description:"Agent-friendly local knowledge CLI with JSON output, pagination, and safe destructive actions",type:"module",bin:{"open-knowledge":"bin/open-knowledge.js","open-knowledge-mcp":"bin/open-knowledge-mcp.js"},files:["bin","src","docs","LICENSE","README.md"],scripts:{test:"bun test","test:cli":"bun test tests/cli.test.ts",build:"bun build --target=bun --outfile=bin/open-knowledge.js --minify --external @aws-sdk/client-s3 --external @aws-sdk/credential-providers src/cli.ts && bun build --target=bun --outfile=bin/open-knowledge-mcp.js --external @modelcontextprotocol/sdk src/mcp.js",prepublishOnly:"bun run build",postinstall:"bun run build"},keywords:["knowledge","cli","agents","json","notes","local","store"],license:"Apache-2.0",publishConfig:{registry:"https://registry.npmjs.org",access:"public"},repository:{type:"git",url:"git+https://github.com/hasna/knowledge.git"},bugs:{url:"https://github.com/hasna/knowledge/issues"},author:"Hasna Inc. <hasna@example.com>",engines:{bun:">=1.0",node:">=18"},dependencies:{"@aws-sdk/client-s3":"^3.1063.0","@aws-sdk/credential-providers":"^3.1063.0","@modelcontextprotocol/sdk":"^1.29.0",zod:"^4.3.6"},devDependencies:{"@types/bun":"^1.3.14"}};var ge={debug:0,info:1,warn:2,error:3},Jt=()=>{if(process.env.DEBUG)return"debug";if(process.env.LOG_LEVEL==="debug")return"debug";if(process.env.LOG_LEVEL==="warn")return"warn";if(process.env.LOG_LEVEL==="error")return"error";return"info"};function D(e,n,t){if(ge[e]<ge[Jt()])return;let r={debug:"[DEBUG]",info:"[INFO]",warn:"[WARN]",error:"[ERROR]"}[e],i=t?`${r} ${n} ${JSON.stringify(t)}`:`${r} ${n}`;if(e==="error")console.error(i);else console.error(i)}var zt=["add","list","get","delete","update","archive","restore","upsert","untag","export","prune","dedupe","stats","paths","db","wiki","ingest","reindex","help"],Le={ls:"list",rm:"delete",edit:"update",unarchive:"restore"};function Ht(e){let n=[],t={};for(let r=0;r<e.length;r+=1){let i=e[r];if(!i.startsWith("-")){n.push(i);continue}switch(i){case"--json":t.json=!0;break;case"--yes":case"-y":t.yes=!0;break;case"--help":case"-h":t.help=!0;break;case"--version":case"-v":t.version=!0;break;case"--desc":t.desc=!0;break;case"--page":case"-p":t.page=Number(e[r+1]),r+=1;break;case"--limit":case"-l":t.limit=Number(e[r+1]),r+=1;break;case"--search":case"-s":t.search=e[r+1],r+=1;break;case"--sort":t.sort=e[r+1],r+=1;break;case"--id":t.id=e[r+1],r+=1;break;case"--store":t.store=e[r+1],r+=1;break;case"--title":t.title=e[r+1],r+=1;break;case"--content":t.content=e[r+1],r+=1;break;case"--url":t.url=e[r+1],r+=1;break;case"--tag":case"-t":t.tag=e[r+1],r+=1;break;case"--format":t.format=e[r+1],r+=1;break;case"--completions":t.completions=e[r+1],r+=1;break;case"--no-color":t.noColor=!0;break;case"--scope":t.scope=e[r+1],r+=1;break;case"--older-than":t.olderThan=Number(e[r+1]),r+=1;break;case"--empty":t.empty=!0;break;case"--archived":t.archived=!0;break;case"--include-archived":t.includeArchived=!0;break;default:throw Error(`Unknown flag: ${i}. Run 'open-knowledge --help' for valid options.`)}}return{positional:n,flags:t}}function Gt(e){if(!e)return"";return Le[e]??e}function qt(e,n){let t=Array.from({length:e.length+1},()=>Array(n.length+1).fill(0));for(let r=0;r<=e.length;r+=1)t[r][0]=r;for(let r=0;r<=n.length;r+=1)t[0][r]=r;for(let r=1;r<=e.length;r+=1)for(let i=1;i<=n.length;i+=1){let s=e[r-1]===n[i-1]?0:1;t[r][i]=Math.min(t[r-1][i]+1,t[r][i-1]+1,t[r-1][i-1]+s)}return t[e.length][n.length]}function Pt(e){if(!e)return"";let n=[...zt,...Object.keys(Le)],t="",r=Number.POSITIVE_INFINITY;for(let i of n){let s=qt(e,i);if(s<r)r=s,t=i}return r<=3?t:""}function Vt(){console.log(`open-knowledge - local agent knowledge store
|
|
247
259
|
|
|
248
260
|
Usage:
|
|
249
261
|
open-knowledge <command> [options]
|
|
@@ -266,6 +278,7 @@ Commands:
|
|
|
266
278
|
db init|stats Initialize or inspect local knowledge.db
|
|
267
279
|
wiki init Initialize scalable wiki/schema/index/log artifacts
|
|
268
280
|
ingest manifest <file|s3://> Ingest an open-files manifest into knowledge.db
|
|
281
|
+
reindex outbox <file|s3://> Consume open-files change events and invalidate chunks
|
|
269
282
|
help [command] Show help
|
|
270
283
|
|
|
271
284
|
Global Options:
|
|
@@ -307,5 +320,5 @@ Export Options:
|
|
|
307
320
|
|
|
308
321
|
Prune Options:
|
|
309
322
|
--older-than <days> Remove items older than N days
|
|
310
|
-
--empty Remove items with empty content`)}function
|
|
311
|
-
_open_knowledge() { _arguments -C "1: :(add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki ingest help ls rm edit unarchive)" "(--json)--json" "(--yes)-y" "(--help)--help" "(--version)--version" "(--desc)--desc" "(--archived)--archived" "(--include-archived)--include-archived" "(-p --page)"{-p,--page}"[page number]:number:" "(-l --limit)"{-l,--limit}"[items per page]:number:" "(-s --search)"{-s,--search}"[search text]:text:" "(--sort)--sort"{created,title}:" "(--id)--id[item id]:id:" "(--store)--store[store path]:path:" "(--title)--title[new title]:" "(--content)--content[new content]:" "(--url)--url[source url]:" "(-t --tag)"{-t,--tag}"[tag]:tag:" "(--format)--format[json|jsonl]:" "(--completions)--completions[output completions]:shell:(bash zsh fish):" "(--no-color)--no-color[disable color]" "(--scope)--scope"{local,global,project}:" }; _open_knowledge`);else if(i==="fish")console.log('complete -c open-knowledge -f; complete -c open-knowledge -a "add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki ingest help ls rm edit unarchive"; complete -c open-knowledge -l json; complete -c open-knowledge -l yes -s y; complete -c open-knowledge -l help -s h; complete -c open-knowledge -l version -s v; complete -c open-knowledge -l desc; complete -c open-knowledge -l archived; complete -c open-knowledge -l include-archived; complete -c open-knowledge -s p -l page; complete -c open-knowledge -s l -l limit; complete -c open-knowledge -s s -l search; complete -c open-knowledge -l sort; complete -c open-knowledge -l id; complete -c open-knowledge -l store; complete -c open-knowledge -l title; complete -c open-knowledge -l content; complete -c open-knowledge -l url; complete -c open-knowledge -s t -l tag; complete -c open-knowledge -l format; complete -c open-knowledge -l completions; complete -c open-knowledge -l no-color; complete -c open-knowledge -l scope -a "local global project"');else throw Error("Invalid --completions value. Use 'bash', 'zsh', or 'fish'.");return}let r=kn(t[0]);if(!r||n.help||r==="help"){Sn(t[1]);return}let c=ie(n.scope),s=n.store;if(!s)if(n.scope==="project"||n.scope==="local")s=b(c.home).jsonStorePath;else s=V();if(r==="paths"){let i=b(c.home);L({ok:!0,scope:n.scope??"global",home:i.home,config_path:i.configPath,json_store_path:i.jsonStorePath,knowledge_db_path:i.knowledgeDbPath,artifacts_dir:i.artifactsDir,indexes_dir:i.indexesDir,logs_dir:i.logsDir,runs_dir:i.runsDir,schemas_dir:i.schemasDir,wiki_dir:i.wikiDir,config:v(i.configPath),message:i.home},n.json);return}if(r==="db"){let i=t[1]??"init",u=b(c.home);if(i!=="init"&&i!=="stats")throw Error("Invalid db action. Use 'init' or 'stats'.");if(i==="init"){let T=m(u.knowledgeDbPath);L({ok:!0,...T,message:`Initialized ${T.path}`},n.json);return}m(u.knowledgeDbPath);let o=ue(u.knowledgeDbPath);L({ok:!0,path:u.knowledgeDbPath,...o,message:`knowledge.db schema v${o.schema_version}`},n.json);return}if(r==="wiki"){if((t[1]??"init")!=="init")throw Error("Invalid wiki action. Use 'init'.");let u=b(c.home),o=v(u.configPath),T=ae(o,u),a=await pe(T);L({ok:!0,...a,message:`Initialized wiki layout in ${u.home}`},n.json);return}if(r==="ingest"){if((t[1]??"")!=="manifest")throw Error("Invalid ingest action. Use 'manifest'.");let u=t[2];if(!u)throw Error("Usage: open-knowledge ingest manifest <file|s3://bucket/key>");let o=b(c.home),T=v(o.configPath),a=await Ne({dbPath:o.knowledgeDbPath,input:u,config:T});L({ok:!0,...a,message:`Ingested ${a.items_seen} manifest item(s)`},n.json);return}if(q(s),r==="add"){let i=t[1],u=t[2];if(!i||!u)throw Error("Usage: open-knowledge add <title> <content>");l(s,()=>{let o=k(s),T={id:P(),title:i,content:u,url:n.url??null,tags:n.tag?[n.tag]:[],created_at:new Date().toISOString(),updated_at:new Date().toISOString()};o.items.push(T),w(s,o),x("info","Item added",{id:T.id,title:T.title}),L({ok:!0,item:T,message:`Added ${T.id}`},n.json)});return}if(r==="list"){if(n.format!==void 0&&n.format!=="table"&&n.format!=="json")throw Error("Invalid --format value for list. Use 'table' or 'json'.");l(s,()=>{let i=k(s),u=Number.isFinite(n.page)&&n.page>0?n.page:1,o=Number.isFinite(n.limit)&&n.limit>0?n.limit:20,T=n.search?String(n.search).toLowerCase():"",a=n.tag?String(n.tag).toLowerCase():"",f=n.format==="table"||!n.json&&!n.format&&An(n),y=n.json||n.format==="json",h=i.items;if(n.archived)h=h.filter((p)=>p.archived===!0);else if(!n.includeArchived)h=h.filter((p)=>!p.archived);if(T)h=h.filter((p)=>p.title.toLowerCase().includes(T)||p.content.toLowerCase().includes(T));if(a)h=h.filter((p)=>p.tags&&p.tags.map((W)=>W.toLowerCase()).includes(a));let{sorted:O,sort:C,direction:N}=In(h,n),U=(u-1)*o,X=O.slice(U,U+o),z=Math.max(1,Math.ceil(O.length/o));if(y){L({ok:!0,page:u,limit:o,total:O.length,total_pages:z,sort:C,direction:N,items:X},!0);return}if(X.length===0){L(`No items found (search=${T||"none"}, tag=${a||"none"})`,!1);return}if(f){let p=(S)=>S,W=`${p("ID")} ${p("TITLE")} ${p("CREATED")} ${p("URL")} ${p("TAGS")}`;console.log(W);for(let S of X)console.log(`${S.id} ${p(S.title)} ${S.created_at} ${S.url?p(S.url):""} ${S.tags?.length?p(`[${S.tags.join(", ")}]`):""}`);console.log(`Page ${u}/${z} | showing ${X.length} of ${O.length} | sort=${C} ${N} | search=${T||"none"} | tag=${a||"none"}`)}else{for(let p of X)console.log(`${p.id} ${p.title} ${p.created_at}${p.url?` ${p.url}`:""}${p.tags?.length?` [${p.tags.join(", ")}]`:""}`);console.log(`Page ${u}/${z} | showing ${X.length} of ${O.length} | sort=${C} ${N} | search=${T||"none"} | tag=${a||"none"}`)}});return}if(r==="get"){j(n),l(s,()=>{let u=k(s).items.find((o)=>o.id===n.id||o.short_id===n.id);if(!u)throw Error(`Item not found: ${n.id}`);L({ok:!0,item:u,message:`${u.id}: ${u.title}`},n.json)});return}if(r==="update"){j(n),l(s,()=>{let i=k(s),u=i.items.findIndex((T)=>T.id===n.id||T.short_id===n.id);if(u===-1)throw Error(`Item not found: ${n.id}`);let o=i.items[u];if(n.title!==void 0)o.title=n.title;if(n.content!==void 0)o.content=n.content;if(n.url!==void 0)o.url=n.url;if(n.tag!==void 0){if(o.tags=o.tags||[],!o.tags.map((T)=>T.toLowerCase()).includes(n.tag.toLowerCase()))o.tags.push(n.tag)}o.updated_at=new Date().toISOString(),i.items[u]=o,w(s,i),L({ok:!0,item:o,message:`Updated ${o.id}`},n.json)});return}if(r==="archive"||r==="restore"){j(n),l(s,()=>{let i=k(s),u=i.items.findIndex((T)=>T.id===n.id||T.short_id===n.id);if(u===-1)throw Error(`Item not found: ${n.id}`);let o=i.items[u];o.archived=r==="archive",o.updated_at=new Date().toISOString(),i.items[u]=o,w(s,i),L({ok:!0,item:o,message:`${r==="archive"?"Archived":"Restored"} ${o.id}`},n.json)});return}if(r==="untag"){if(j(n),!n.tag)throw Error("Missing required --tag. Example: open-knowledge untag --id <id> -t <tag>");l(s,()=>{let i=k(s),u=i.items.findIndex((a)=>a.id===n.id||a.short_id===n.id);if(u===-1)throw Error(`Item not found: ${n.id}`);let o=i.items[u],T=o.tags?.length??0;o.tags=(o.tags??[]).filter((a)=>a.toLowerCase()!==n.tag.toLowerCase()),o.updated_at=new Date().toISOString(),i.items[u]=o,w(s,i),L({ok:!0,item:o,removed:T-o.tags.length,message:`Removed tag from ${o.id}`},n.json)});return}if(r==="upsert"){let i=n.title??t[1],u=n.content??t[2];l(s,()=>{let o=k(s),T=n.id?o.items.findIndex((y)=>y.id===n.id||y.short_id===n.id):-1,a=new Date().toISOString();if(T===-1){if(!i||!u)throw Error("New item requires title and content. Example: open-knowledge upsert <title> <content> [--id <id>]");let y=n.id??P(),h={id:y,short_id:ce(y),title:i,content:u,url:n.url??null,tags:n.tag?[n.tag]:[],metadata:{},archived:!1,created_at:a,updated_at:a};o.items.push(h),w(s,o),L({ok:!0,created:!0,item:h,message:`Upserted ${h.id}`},n.json);return}let f=o.items[T];if(i!==void 0)f.title=i;if(u!==void 0)f.content=u;if(n.url!==void 0)f.url=n.url;if(n.tag!==void 0){if(f.tags=f.tags||[],!f.tags.map((y)=>y.toLowerCase()).includes(n.tag.toLowerCase()))f.tags.push(n.tag)}f.updated_at=a,o.items[T]=f,w(s,o),L({ok:!0,created:!1,item:f,message:`Upserted ${f.id}`},n.json)});return}if(r==="delete"){if(j(n),!n.yes)throw Error("Refusing delete without --yes. Re-run with: open-knowledge delete --id <id> --yes");l(s,()=>{let i=k(s),u=i.items.length;i.items=i.items.filter((T)=>T.id!==n.id&&T.short_id!==n.id);let o=u!==i.items.length;if(w(s,i),!o)throw Error(`Item not found: ${n.id}`);x("info","Item deleted",{id:n.id}),L({ok:!0,deleted_id:n.id,message:`Deleted ${n.id}`},n.json)});return}if(r==="export"){let i=n.format??"json";if(i!=="json"&&i!=="jsonl")throw Error("Invalid --format. Use 'json' or 'jsonl'.");l(s,()=>{let u=k(s);if(i==="jsonl")for(let o of u.items)console.log(JSON.stringify(o));else L({ok:!0,items:u.items},n.json)});return}if(r==="prune"){if(!n.yes)throw Error("Refusing prune without --yes. Re-run with: open-knowledge prune --yes [--older-than <days>] [--empty]");l(s,()=>{let i=k(s),u=i.items.length;if(n.olderThan!==void 0){let T=new Date;T.setDate(T.getDate()-n.olderThan),i.items=i.items.filter((a)=>new Date(a.created_at)>=T)}if(n.empty)i.items=i.items.filter((T)=>T.content.trim().length>0);let o=u-i.items.length;w(s,i),x("info","Prune completed",{pruned:o,remaining:i.items.length}),L({ok:!0,pruned:o,remaining:i.items.length,message:`Pruned ${o} item(s)`},n.json)});return}if(r==="dedupe"){if(!n.yes)throw Error("Refusing dedupe without --yes. Re-run with: open-knowledge dedupe --yes [--json]");l(s,()=>{let i=k(s),u=new Set,o=i.items.length;i.items=i.items.filter((a)=>{let f=`${a.title}\x00${a.content}`;if(u.has(f))return!1;return u.add(f),!0});let T=o-i.items.length;w(s,i),x("info","Dedupe completed",{removed:T,remaining:i.items.length}),L({ok:!0,removed:T,remaining:i.items.length,message:`Dedupe removed ${T} duplicate(s)`},n.json)});return}if(r==="stats"){l(s,()=>{let i=k(s),u=i.items.filter((N)=>!N.archived),o=u.length,T=i.items.length-o,a=u.filter((N)=>N.url).length,f=u.filter((N)=>N.tags&&N.tags.length>0).length,y=o>0?u.map((N)=>N.created_at).sort()[0]:null,h=o>0?u.map((N)=>N.created_at).sort()[o-1]:null,O={};for(let N of u)for(let U of N.tags||[])O[U]=(O[U]||0)+1;let C=Object.entries(O).sort((N,U)=>U[1]-N[1]).slice(0,5).map(([N,U])=>({tag:N,count:U}));L({ok:!0,total:o,archived:T,with_url:a,with_tags:f,oldest:y,newest:h,top_tags:C,message:`${o} items | ${a} with URL | ${f} with tags`},n.json)});return}let d=wn(t[0]),_=d?` Did you mean '${d}'?`:"";throw x("warn","Unknown command",{input:t[0],suggestion:d}),Error(`Unknown command: ${t[0]}.${_} Run 'open-knowledge --help' for available commands.`)}if(import.meta.main)xn(process.argv.slice(2)).catch((e)=>{let t=e instanceof Error?e.message:String(e);x("error","CLI error",{message:t,stack:e instanceof Error?e.stack:void 0}),console.error(`Error: ${t}`),process.exitCode=1});export{wn as suggestCommand,In as sortItems,xn as run,On as parseArgs};
|
|
323
|
+
--empty Remove items with empty content`)}function Qt(e){if(e==="add"){console.log("Usage: open-knowledge add <title> <content> [--url <url>] [-t <tag>] [--json]");return}if(e==="list"||e==="ls"){console.log("Usage: open-knowledge list|ls [--format table|json] [-p <page>] [-l <limit>] [-s <search>] [-t <tag>] [--sort created|title] [--desc] [--json]");return}if(e==="get"){console.log("Usage: open-knowledge get --id <id> [--json]");return}if(e==="update"||e==="edit"){console.log("Usage: open-knowledge update|edit --id <id> [--title <title>] [--content <content>] [--url <url>] [-t <tag>] [--json]");return}if(e==="archive"){console.log("Usage: open-knowledge archive --id <id> [--json]");return}if(e==="restore"||e==="unarchive"){console.log("Usage: open-knowledge restore|unarchive --id <id> [--json]");return}if(e==="upsert"){console.log("Usage: open-knowledge upsert [title] [content] [--id <id>] [--title <title>] [--content <content>] [--url <url>] [-t <tag>] [--json]");return}if(e==="untag"){console.log("Usage: open-knowledge untag --id <id> -t <tag> [--json]");return}if(e==="delete"||e==="rm"){console.log("Usage: open-knowledge delete|rm --id <id> -y [--json]");return}if(e==="export"){console.log("Usage: open-knowledge export [--format jsonl] [--json]");return}if(e==="prune"){console.log("Usage: open-knowledge prune --yes [--older-than <days>] [--empty] [--json]");return}if(e==="dedupe"){console.log("Usage: open-knowledge dedupe --yes [--json]");return}if(e==="stats"){console.log("Usage: open-knowledge stats [--json]");return}if(e==="paths"){console.log("Usage: open-knowledge paths [--scope local|global|project] [--json]");return}if(e==="db"){console.log("Usage: open-knowledge db init|stats [--scope local|global|project] [--json]");return}if(e==="wiki"){console.log("Usage: open-knowledge wiki init [--scope local|global|project] [--json]");return}if(e==="ingest"){console.log("Usage: open-knowledge ingest manifest <file|s3://bucket/key> [--scope local|global|project] [--json]");return}if(e==="reindex"){console.log("Usage: open-knowledge reindex outbox <file|s3://bucket/key> [--scope local|global|project] [--json]");return}Vt()}function Zt(e){if(e.noColor||process.env.NO_COLOR)return!1;if(process.env.FORCE_COLOR)return!0;return process.stdout.isTTY===!0}function y(e,n,t){if(n){console.log(JSON.stringify(e,null,2));return}if(typeof e==="string"){console.log(e);return}console.log(e.message??JSON.stringify(e,null,2))}function W(e){if(!e.id)throw Error("Missing required --id. Example: open-knowledge get --id <id>")}function en(e,n){let t=n.sort??"created";if(t!=="created"&&t!=="title")throw Error("Invalid --sort value. Use 'created' or 'title'.");let r=[...e].sort((i,s)=>{if(t==="title")return i.title.localeCompare(s.title);return i.created_at.localeCompare(s.created_at)});if(n.desc)r.reverse();return{sorted:r,sort:t,direction:n.desc?"desc":"asc"}}async function tn(e){let{positional:n,flags:t}=Ht(e);if(D("debug","CLI invoked",{command:n[0],flags:{json:t.json,store:t.store}}),t.version){console.log(t.json?JSON.stringify({name:B.name,version:B.version},null,2):`${B.name} ${B.version}`);return}if(t.completions){let o=t.completions;if(o==="bash")console.log('_open_knowledge() { local cur; cur="${COMP_WORDS[COMP_CWORD]}"; COMPREPLY=($(compgen -W "add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki ingest reindex help ls rm edit unarchive --json --yes --help --version --desc --page --limit --search --sort --id --store --title --content --url --tag --format --completions --no-color --scope --archived --include-archived" -- "$cur")); }; complete -F _open_knowledge open-knowledge');else if(o==="zsh")console.log(`#compdef open-knowledge
|
|
324
|
+
_open_knowledge() { _arguments -C "1: :(add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki ingest reindex help ls rm edit unarchive)" "(--json)--json" "(--yes)-y" "(--help)--help" "(--version)--version" "(--desc)--desc" "(--archived)--archived" "(--include-archived)--include-archived" "(-p --page)"{-p,--page}"[page number]:number:" "(-l --limit)"{-l,--limit}"[items per page]:number:" "(-s --search)"{-s,--search}"[search text]:text:" "(--sort)--sort"{created,title}:" "(--id)--id[item id]:id:" "(--store)--store[store path]:path:" "(--title)--title[new title]:" "(--content)--content[new content]:" "(--url)--url[source url]:" "(-t --tag)"{-t,--tag}"[tag]:tag:" "(--format)--format[json|jsonl]:" "(--completions)--completions[output completions]:shell:(bash zsh fish):" "(--no-color)--no-color[disable color]" "(--scope)--scope"{local,global,project}:" }; _open_knowledge`);else if(o==="fish")console.log('complete -c open-knowledge -f; complete -c open-knowledge -a "add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki ingest reindex help ls rm edit unarchive"; complete -c open-knowledge -l json; complete -c open-knowledge -l yes -s y; complete -c open-knowledge -l help -s h; complete -c open-knowledge -l version -s v; complete -c open-knowledge -l desc; complete -c open-knowledge -l archived; complete -c open-knowledge -l include-archived; complete -c open-knowledge -s p -l page; complete -c open-knowledge -s l -l limit; complete -c open-knowledge -s s -l search; complete -c open-knowledge -l sort; complete -c open-knowledge -l id; complete -c open-knowledge -l store; complete -c open-knowledge -l title; complete -c open-knowledge -l content; complete -c open-knowledge -l url; complete -c open-knowledge -s t -l tag; complete -c open-knowledge -l format; complete -c open-knowledge -l completions; complete -c open-knowledge -l no-color; complete -c open-knowledge -l scope -a "local global project"');else throw Error("Invalid --completions value. Use 'bash', 'zsh', or 'fish'.");return}let r=Gt(n[0]);if(!r||t.help||r==="help"){Qt(n[1]);return}let i=ce(t.scope),s=t.store;if(!s)if(t.scope==="project"||t.scope==="local")s=I(i.home).jsonStorePath;else s=ee();if(r==="paths"){let o=I(i.home);y({ok:!0,scope:t.scope??"global",home:o.home,config_path:o.configPath,json_store_path:o.jsonStorePath,knowledge_db_path:o.knowledgeDbPath,artifacts_dir:o.artifactsDir,indexes_dir:o.indexesDir,logs_dir:o.logsDir,runs_dir:o.runsDir,schemas_dir:o.schemasDir,wiki_dir:o.wikiDir,config:K(o.configPath),message:o.home},t.json);return}if(r==="db"){let o=n[1]??"init",c=I(i.home);if(o!=="init"&&o!=="stats")throw Error("Invalid db action. Use 'init' or 'stats'.");if(o==="init"){let a=C(c.knowledgeDbPath);y({ok:!0,...a,message:`Initialized ${a.path}`},t.json);return}C(c.knowledgeDbPath);let u=Te(c.knowledgeDbPath);y({ok:!0,path:c.knowledgeDbPath,...u,message:`knowledge.db schema v${u.schema_version}`},t.json);return}if(r==="wiki"){if((n[1]??"init")!=="init")throw Error("Invalid wiki action. Use 'init'.");let c=I(i.home),u=K(c.configPath),a=le(u,c),E=await he(a);y({ok:!0,...E,message:`Initialized wiki layout in ${c.home}`},t.json);return}if(r==="ingest"){if((n[1]??"")!=="manifest")throw Error("Invalid ingest action. Use 'manifest'.");let c=n[2];if(!c)throw Error("Usage: open-knowledge ingest manifest <file|s3://bucket/key>");let u=I(i.home),a=K(u.configPath),E=await Oe({dbPath:u.knowledgeDbPath,input:c,config:a});y({ok:!0,...E,message:`Ingested ${E.items_seen} manifest item(s)`},t.json);return}if(r==="reindex"){if((n[1]??"")!=="outbox")throw Error("Invalid reindex action. Use 'outbox'.");let c=n[2];if(!c)throw Error("Usage: open-knowledge reindex outbox <file|s3://bucket/key>");let u=I(i.home),a=K(u.configPath),E=await Re({dbPath:u.knowledgeDbPath,input:c,config:a});y({ok:!0,...E,message:`Consumed ${E.events_seen} outbox event(s)`},t.json);return}if(te(s),r==="add"){let o=n[1],c=n[2];if(!o||!c)throw Error("Usage: open-knowledge add <title> <content>");b(s,()=>{let u=L(s),a={id:ne(),title:o,content:c,url:t.url??null,tags:t.tag?[t.tag]:[],created_at:new Date().toISOString(),updated_at:new Date().toISOString()};u.items.push(a),w(s,u),D("info","Item added",{id:a.id,title:a.title}),y({ok:!0,item:a,message:`Added ${a.id}`},t.json)});return}if(r==="list"){if(t.format!==void 0&&t.format!=="table"&&t.format!=="json")throw Error("Invalid --format value for list. Use 'table' or 'json'.");b(s,()=>{let o=L(s),c=Number.isFinite(t.page)&&t.page>0?t.page:1,u=Number.isFinite(t.limit)&&t.limit>0?t.limit:20,a=t.search?String(t.search).toLowerCase():"",E=t.tag?String(t.tag).toLowerCase():"",h=t.format==="table"||!t.json&&!t.format&&Zt(t),R=t.json||t.format==="json",O=o.items;if(t.archived)O=O.filter((p)=>p.archived===!0);else if(!t.includeArchived)O=O.filter((p)=>!p.archived);if(a)O=O.filter((p)=>p.title.toLowerCase().includes(a)||p.content.toLowerCase().includes(a));if(E)O=O.filter((p)=>p.tags&&p.tags.map((P)=>P.toLowerCase()).includes(E));let{sorted:f,sort:k,direction:N}=en(O,t),S=(c-1)*u,m=f.slice(S,S+u),j=Math.max(1,Math.ceil(f.length/u));if(R){y({ok:!0,page:c,limit:u,total:f.length,total_pages:j,sort:k,direction:N,items:m},!0);return}if(m.length===0){y(`No items found (search=${a||"none"}, tag=${E||"none"})`,!1);return}if(h){let p=(x)=>x,P=`${p("ID")} ${p("TITLE")} ${p("CREATED")} ${p("URL")} ${p("TAGS")}`;console.log(P);for(let x of m)console.log(`${x.id} ${p(x.title)} ${x.created_at} ${x.url?p(x.url):""} ${x.tags?.length?p(`[${x.tags.join(", ")}]`):""}`);console.log(`Page ${c}/${j} | showing ${m.length} of ${f.length} | sort=${k} ${N} | search=${a||"none"} | tag=${E||"none"}`)}else{for(let p of m)console.log(`${p.id} ${p.title} ${p.created_at}${p.url?` ${p.url}`:""}${p.tags?.length?` [${p.tags.join(", ")}]`:""}`);console.log(`Page ${c}/${j} | showing ${m.length} of ${f.length} | sort=${k} ${N} | search=${a||"none"} | tag=${E||"none"}`)}});return}if(r==="get"){W(t),b(s,()=>{let c=L(s).items.find((u)=>u.id===t.id||u.short_id===t.id);if(!c)throw Error(`Item not found: ${t.id}`);y({ok:!0,item:c,message:`${c.id}: ${c.title}`},t.json)});return}if(r==="update"){W(t),b(s,()=>{let o=L(s),c=o.items.findIndex((a)=>a.id===t.id||a.short_id===t.id);if(c===-1)throw Error(`Item not found: ${t.id}`);let u=o.items[c];if(t.title!==void 0)u.title=t.title;if(t.content!==void 0)u.content=t.content;if(t.url!==void 0)u.url=t.url;if(t.tag!==void 0){if(u.tags=u.tags||[],!u.tags.map((a)=>a.toLowerCase()).includes(t.tag.toLowerCase()))u.tags.push(t.tag)}u.updated_at=new Date().toISOString(),o.items[c]=u,w(s,o),y({ok:!0,item:u,message:`Updated ${u.id}`},t.json)});return}if(r==="archive"||r==="restore"){W(t),b(s,()=>{let o=L(s),c=o.items.findIndex((a)=>a.id===t.id||a.short_id===t.id);if(c===-1)throw Error(`Item not found: ${t.id}`);let u=o.items[c];u.archived=r==="archive",u.updated_at=new Date().toISOString(),o.items[c]=u,w(s,o),y({ok:!0,item:u,message:`${r==="archive"?"Archived":"Restored"} ${u.id}`},t.json)});return}if(r==="untag"){if(W(t),!t.tag)throw Error("Missing required --tag. Example: open-knowledge untag --id <id> -t <tag>");b(s,()=>{let o=L(s),c=o.items.findIndex((E)=>E.id===t.id||E.short_id===t.id);if(c===-1)throw Error(`Item not found: ${t.id}`);let u=o.items[c],a=u.tags?.length??0;u.tags=(u.tags??[]).filter((E)=>E.toLowerCase()!==t.tag.toLowerCase()),u.updated_at=new Date().toISOString(),o.items[c]=u,w(s,o),y({ok:!0,item:u,removed:a-u.tags.length,message:`Removed tag from ${u.id}`},t.json)});return}if(r==="upsert"){let o=t.title??n[1],c=t.content??n[2];b(s,()=>{let u=L(s),a=t.id?u.items.findIndex((R)=>R.id===t.id||R.short_id===t.id):-1,E=new Date().toISOString();if(a===-1){if(!o||!c)throw Error("New item requires title and content. Example: open-knowledge upsert <title> <content> [--id <id>]");let R=t.id??ne(),O={id:R,short_id:Ee(R),title:o,content:c,url:t.url??null,tags:t.tag?[t.tag]:[],metadata:{},archived:!1,created_at:E,updated_at:E};u.items.push(O),w(s,u),y({ok:!0,created:!0,item:O,message:`Upserted ${O.id}`},t.json);return}let h=u.items[a];if(o!==void 0)h.title=o;if(c!==void 0)h.content=c;if(t.url!==void 0)h.url=t.url;if(t.tag!==void 0){if(h.tags=h.tags||[],!h.tags.map((R)=>R.toLowerCase()).includes(t.tag.toLowerCase()))h.tags.push(t.tag)}h.updated_at=E,u.items[a]=h,w(s,u),y({ok:!0,created:!1,item:h,message:`Upserted ${h.id}`},t.json)});return}if(r==="delete"){if(W(t),!t.yes)throw Error("Refusing delete without --yes. Re-run with: open-knowledge delete --id <id> --yes");b(s,()=>{let o=L(s),c=o.items.length;o.items=o.items.filter((a)=>a.id!==t.id&&a.short_id!==t.id);let u=c!==o.items.length;if(w(s,o),!u)throw Error(`Item not found: ${t.id}`);D("info","Item deleted",{id:t.id}),y({ok:!0,deleted_id:t.id,message:`Deleted ${t.id}`},t.json)});return}if(r==="export"){let o=t.format??"json";if(o!=="json"&&o!=="jsonl")throw Error("Invalid --format. Use 'json' or 'jsonl'.");b(s,()=>{let c=L(s);if(o==="jsonl")for(let u of c.items)console.log(JSON.stringify(u));else y({ok:!0,items:c.items},t.json)});return}if(r==="prune"){if(!t.yes)throw Error("Refusing prune without --yes. Re-run with: open-knowledge prune --yes [--older-than <days>] [--empty]");b(s,()=>{let o=L(s),c=o.items.length;if(t.olderThan!==void 0){let a=new Date;a.setDate(a.getDate()-t.olderThan),o.items=o.items.filter((E)=>new Date(E.created_at)>=a)}if(t.empty)o.items=o.items.filter((a)=>a.content.trim().length>0);let u=c-o.items.length;w(s,o),D("info","Prune completed",{pruned:u,remaining:o.items.length}),y({ok:!0,pruned:u,remaining:o.items.length,message:`Pruned ${u} item(s)`},t.json)});return}if(r==="dedupe"){if(!t.yes)throw Error("Refusing dedupe without --yes. Re-run with: open-knowledge dedupe --yes [--json]");b(s,()=>{let o=L(s),c=new Set,u=o.items.length;o.items=o.items.filter((E)=>{let h=`${E.title}\x00${E.content}`;if(c.has(h))return!1;return c.add(h),!0});let a=u-o.items.length;w(s,o),D("info","Dedupe completed",{removed:a,remaining:o.items.length}),y({ok:!0,removed:a,remaining:o.items.length,message:`Dedupe removed ${a} duplicate(s)`},t.json)});return}if(r==="stats"){b(s,()=>{let o=L(s),c=o.items.filter((N)=>!N.archived),u=c.length,a=o.items.length-u,E=c.filter((N)=>N.url).length,h=c.filter((N)=>N.tags&&N.tags.length>0).length,R=u>0?c.map((N)=>N.created_at).sort()[0]:null,O=u>0?c.map((N)=>N.created_at).sort()[u-1]:null,f={};for(let N of c)for(let S of N.tags||[])f[S]=(f[S]||0)+1;let k=Object.entries(f).sort((N,S)=>S[1]-N[1]).slice(0,5).map(([N,S])=>({tag:N,count:S}));y({ok:!0,total:u,archived:a,with_url:E,with_tags:h,oldest:R,newest:O,top_tags:k,message:`${u} items | ${E} with URL | ${h} with tags`},t.json)});return}let d=Pt(n[0]),_=d?` Did you mean '${d}'?`:"";throw D("warn","Unknown command",{input:n[0],suggestion:d}),Error(`Unknown command: ${n[0]}.${_} Run 'open-knowledge --help' for available commands.`)}if(import.meta.main)tn(process.argv.slice(2)).catch((e)=>{let n=e instanceof Error?e.message:String(e);D("error","CLI error",{message:n,stack:e instanceof Error?e.stack:void 0}),console.error(`Error: ${n}`),process.exitCode=1});export{Pt as suggestCommand,en as sortItems,tn as run,Ht as parseArgs};
|
package/package.json
CHANGED
package/src/cli.ts
CHANGED
|
@@ -10,6 +10,7 @@ import { getKnowledgeDbStats, migrateKnowledgeDb } from './knowledge-db';
|
|
|
10
10
|
import { createArtifactStore } from './artifact-store';
|
|
11
11
|
import { initializeWikiLayout } from './wiki-layout';
|
|
12
12
|
import { ingestOpenFilesManifest } from './manifest-ingest';
|
|
13
|
+
import { consumeOpenFilesOutbox } from './outbox-consume';
|
|
13
14
|
import pkg from '../package.json' with { type: 'json' };
|
|
14
15
|
|
|
15
16
|
type LogLevel = 'debug' | 'info' | 'warn' | 'error';
|
|
@@ -60,7 +61,7 @@ interface ParseResult {
|
|
|
60
61
|
flags: Flags;
|
|
61
62
|
}
|
|
62
63
|
|
|
63
|
-
const COMMANDS = ['add', 'list', 'get', 'delete', 'update', 'archive', 'restore', 'upsert', 'untag', 'export', 'prune', 'dedupe', 'stats', 'paths', 'db', 'wiki', 'ingest', 'help'];
|
|
64
|
+
const COMMANDS = ['add', 'list', 'get', 'delete', 'update', 'archive', 'restore', 'upsert', 'untag', 'export', 'prune', 'dedupe', 'stats', 'paths', 'db', 'wiki', 'ingest', 'reindex', 'help'];
|
|
64
65
|
const COMMAND_ALIASES: Record<string, string> = {
|
|
65
66
|
ls: 'list',
|
|
66
67
|
rm: 'delete',
|
|
@@ -164,6 +165,7 @@ Commands:
|
|
|
164
165
|
db init|stats Initialize or inspect local knowledge.db
|
|
165
166
|
wiki init Initialize scalable wiki/schema/index/log artifacts
|
|
166
167
|
ingest manifest <file|s3://> Ingest an open-files manifest into knowledge.db
|
|
168
|
+
reindex outbox <file|s3://> Consume open-files change events and invalidate chunks
|
|
167
169
|
help [command] Show help
|
|
168
170
|
|
|
169
171
|
Global Options:
|
|
@@ -226,6 +228,7 @@ function printCommandHelp(command: string): void {
|
|
|
226
228
|
if (command === 'db') { console.log('Usage: open-knowledge db init|stats [--scope local|global|project] [--json]'); return; }
|
|
227
229
|
if (command === 'wiki') { console.log('Usage: open-knowledge wiki init [--scope local|global|project] [--json]'); return; }
|
|
228
230
|
if (command === 'ingest') { console.log('Usage: open-knowledge ingest manifest <file|s3://bucket/key> [--scope local|global|project] [--json]'); return; }
|
|
231
|
+
if (command === 'reindex') { console.log('Usage: open-knowledge reindex outbox <file|s3://bucket/key> [--scope local|global|project] [--json]'); return; }
|
|
229
232
|
printGlobalHelp();
|
|
230
233
|
}
|
|
231
234
|
|
|
@@ -270,11 +273,11 @@ async function run(argv: string[]): Promise<void> {
|
|
|
270
273
|
if (flags.completions) {
|
|
271
274
|
const shell = flags.completions;
|
|
272
275
|
if (shell === 'bash') {
|
|
273
|
-
console.log(`_open_knowledge() { local cur; cur="${"$"}{COMP_WORDS[COMP_CWORD]}"; COMPREPLY=($(compgen -W "add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki ingest help ls rm edit unarchive --json --yes --help --version --desc --page --limit --search --sort --id --store --title --content --url --tag --format --completions --no-color --scope --archived --include-archived" -- "$cur")); }; complete -F _open_knowledge open-knowledge`);
|
|
276
|
+
console.log(`_open_knowledge() { local cur; cur="${"$"}{COMP_WORDS[COMP_CWORD]}"; COMPREPLY=($(compgen -W "add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki ingest reindex help ls rm edit unarchive --json --yes --help --version --desc --page --limit --search --sort --id --store --title --content --url --tag --format --completions --no-color --scope --archived --include-archived" -- "$cur")); }; complete -F _open_knowledge open-knowledge`);
|
|
274
277
|
} else if (shell === 'zsh') {
|
|
275
|
-
console.log(`#compdef open-knowledge\n_open_knowledge() { _arguments -C "1: :(add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki ingest help ls rm edit unarchive)" "(--json)--json" "(--yes)-y" "(--help)--help" "(--version)--version" "(--desc)--desc" "(--archived)--archived" "(--include-archived)--include-archived" "(-p --page)"{-p,--page}"[page number]:number:" "(-l --limit)"{-l,--limit}"[items per page]:number:" "(-s --search)"{-s,--search}"[search text]:text:" "(--sort)--sort"\{created,title\}:" "(--id)--id[item id]:id:" "(--store)--store[store path]:path:" "(--title)--title[new title]:" "(--content)--content[new content]:" "(--url)--url[source url]:" "(-t --tag)"{-t,--tag}"[tag]:tag:" "(--format)--format[json|jsonl]:" "(--completions)--completions[output completions]:shell:(bash zsh fish):" "(--no-color)--no-color[disable color]" "(--scope)--scope"\{local,global,project\}:" }; _open_knowledge`);
|
|
278
|
+
console.log(`#compdef open-knowledge\n_open_knowledge() { _arguments -C "1: :(add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki ingest reindex help ls rm edit unarchive)" "(--json)--json" "(--yes)-y" "(--help)--help" "(--version)--version" "(--desc)--desc" "(--archived)--archived" "(--include-archived)--include-archived" "(-p --page)"{-p,--page}"[page number]:number:" "(-l --limit)"{-l,--limit}"[items per page]:number:" "(-s --search)"{-s,--search}"[search text]:text:" "(--sort)--sort"\{created,title\}:" "(--id)--id[item id]:id:" "(--store)--store[store path]:path:" "(--title)--title[new title]:" "(--content)--content[new content]:" "(--url)--url[source url]:" "(-t --tag)"{-t,--tag}"[tag]:tag:" "(--format)--format[json|jsonl]:" "(--completions)--completions[output completions]:shell:(bash zsh fish):" "(--no-color)--no-color[disable color]" "(--scope)--scope"\{local,global,project\}:" }; _open_knowledge`);
|
|
276
279
|
} else if (shell === 'fish') {
|
|
277
|
-
console.log(`complete -c open-knowledge -f; complete -c open-knowledge -a "add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki ingest help ls rm edit unarchive"; complete -c open-knowledge -l json; complete -c open-knowledge -l yes -s y; complete -c open-knowledge -l help -s h; complete -c open-knowledge -l version -s v; complete -c open-knowledge -l desc; complete -c open-knowledge -l archived; complete -c open-knowledge -l include-archived; complete -c open-knowledge -s p -l page; complete -c open-knowledge -s l -l limit; complete -c open-knowledge -s s -l search; complete -c open-knowledge -l sort; complete -c open-knowledge -l id; complete -c open-knowledge -l store; complete -c open-knowledge -l title; complete -c open-knowledge -l content; complete -c open-knowledge -l url; complete -c open-knowledge -s t -l tag; complete -c open-knowledge -l format; complete -c open-knowledge -l completions; complete -c open-knowledge -l no-color; complete -c open-knowledge -l scope -a "local global project"`);
|
|
280
|
+
console.log(`complete -c open-knowledge -f; complete -c open-knowledge -a "add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki ingest reindex help ls rm edit unarchive"; complete -c open-knowledge -l json; complete -c open-knowledge -l yes -s y; complete -c open-knowledge -l help -s h; complete -c open-knowledge -l version -s v; complete -c open-knowledge -l desc; complete -c open-knowledge -l archived; complete -c open-knowledge -l include-archived; complete -c open-knowledge -s p -l page; complete -c open-knowledge -s l -l limit; complete -c open-knowledge -s s -l search; complete -c open-knowledge -l sort; complete -c open-knowledge -l id; complete -c open-knowledge -l store; complete -c open-knowledge -l title; complete -c open-knowledge -l content; complete -c open-knowledge -l url; complete -c open-knowledge -s t -l tag; complete -c open-knowledge -l format; complete -c open-knowledge -l completions; complete -c open-knowledge -l no-color; complete -c open-knowledge -l scope -a "local global project"`);
|
|
278
281
|
} else {
|
|
279
282
|
throw new Error("Invalid --completions value. Use 'bash', 'zsh', or 'fish'.");
|
|
280
283
|
}
|
|
@@ -360,6 +363,22 @@ async function run(argv: string[]): Promise<void> {
|
|
|
360
363
|
return;
|
|
361
364
|
}
|
|
362
365
|
|
|
366
|
+
if (command === 'reindex') {
|
|
367
|
+
const action = positional[1] ?? '';
|
|
368
|
+
if (action !== 'outbox') throw new Error("Invalid reindex action. Use 'outbox'.");
|
|
369
|
+
const input = positional[2];
|
|
370
|
+
if (!input) throw new Error('Usage: open-knowledge reindex outbox <file|s3://bucket/key>');
|
|
371
|
+
const resolvedWorkspace = ensureKnowledgeWorkspace(workspace.home);
|
|
372
|
+
const config = readKnowledgeConfig(resolvedWorkspace.configPath);
|
|
373
|
+
const result = await consumeOpenFilesOutbox({
|
|
374
|
+
dbPath: resolvedWorkspace.knowledgeDbPath,
|
|
375
|
+
input,
|
|
376
|
+
config,
|
|
377
|
+
});
|
|
378
|
+
output({ ok: true, ...result, message: `Consumed ${result.events_seen} outbox event(s)` }, flags.json);
|
|
379
|
+
return;
|
|
380
|
+
}
|
|
381
|
+
|
|
363
382
|
ensureStore(storePath);
|
|
364
383
|
|
|
365
384
|
if (command === 'add') {
|
|
@@ -0,0 +1,425 @@
|
|
|
1
|
+
import { createHash, randomUUID } from 'node:crypto';
|
|
2
|
+
import { existsSync, readFileSync } from 'node:fs';
|
|
3
|
+
import { basename } from 'node:path';
|
|
4
|
+
import type { Database } from 'bun:sqlite';
|
|
5
|
+
import { migrateKnowledgeDb, openKnowledgeDb } from './knowledge-db';
|
|
6
|
+
import { parseSourceRef, type SourceRef } from './source-ref';
|
|
7
|
+
import type { KnowledgeConfig } from './workspace';
|
|
8
|
+
|
|
9
|
+
type OutboxObject = Record<string, unknown>;
|
|
10
|
+
|
|
11
|
+
export interface OutboxConsumeOptions {
|
|
12
|
+
dbPath: string;
|
|
13
|
+
input: string;
|
|
14
|
+
config?: KnowledgeConfig;
|
|
15
|
+
now?: Date;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export interface OutboxConsumeResult {
|
|
19
|
+
path: string;
|
|
20
|
+
db_path: string;
|
|
21
|
+
run_id: string;
|
|
22
|
+
events_seen: number;
|
|
23
|
+
sources_touched: number;
|
|
24
|
+
revisions_touched: number;
|
|
25
|
+
chunks_deleted: number;
|
|
26
|
+
embeddings_deleted: number;
|
|
27
|
+
stale_revisions: number;
|
|
28
|
+
deleted_sources: number;
|
|
29
|
+
moved_sources: number;
|
|
30
|
+
permission_updates: number;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
interface NormalizedOutboxEvent {
|
|
34
|
+
raw: OutboxObject;
|
|
35
|
+
eventType: string;
|
|
36
|
+
sourceRef: string;
|
|
37
|
+
sourceUri: string;
|
|
38
|
+
kind: SourceRef['kind'];
|
|
39
|
+
title: string | null;
|
|
40
|
+
revision: string | null;
|
|
41
|
+
hash: string | null;
|
|
42
|
+
status: string | null;
|
|
43
|
+
updatedAt: string;
|
|
44
|
+
acl: unknown;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function stableId(prefix: string, value: string): string {
|
|
48
|
+
return `${prefix}_${createHash('sha256').update(value).digest('hex').slice(0, 20)}`;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function asObject(value: unknown): OutboxObject | undefined {
|
|
52
|
+
return value && typeof value === 'object' && !Array.isArray(value) ? value as OutboxObject : undefined;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function asString(value: unknown): string | undefined {
|
|
56
|
+
return typeof value === 'string' && value.length > 0 ? value : undefined;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function buildSourceRef(event: OutboxObject): string {
|
|
60
|
+
const explicit = asString(event.source_ref) ?? asString(event.source_uri) ?? asString(event.uri);
|
|
61
|
+
if (explicit) return explicit;
|
|
62
|
+
const fileId = asString(event.file_id);
|
|
63
|
+
if (fileId) {
|
|
64
|
+
const revision = asString(event.revision_id) ?? asString(event.revision);
|
|
65
|
+
const fileRef = `open-files://file/${encodeURIComponent(fileId)}`;
|
|
66
|
+
return revision ? `${fileRef}/revision/${encodeURIComponent(revision)}` : fileRef;
|
|
67
|
+
}
|
|
68
|
+
const sourceId = asString(event.source_id);
|
|
69
|
+
const path = asString(event.path);
|
|
70
|
+
if (sourceId && path) {
|
|
71
|
+
return `open-files://source/${encodeURIComponent(sourceId)}/path/${encodeURIComponent(path)}`;
|
|
72
|
+
}
|
|
73
|
+
throw new Error('Outbox event is missing source_ref, file_id, or source_id/path.');
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
function baseSourceUri(sourceRef: string, parsed: SourceRef): string {
|
|
77
|
+
if (parsed.kind === 'open-files' && parsed.entity === 'file' && parsed.revision_id) {
|
|
78
|
+
return sourceRef.replace(/\/revision\/[^/]+$/, '');
|
|
79
|
+
}
|
|
80
|
+
return sourceRef;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
function hashFromEvent(event: OutboxObject): string | null {
|
|
84
|
+
return asString(event.hash) ?? asString(event.checksum) ?? asString(event.sha256) ?? null;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
function revisionFromEvent(event: OutboxObject, parsed: SourceRef, hash: string | null): string | null {
|
|
88
|
+
return (
|
|
89
|
+
asString(event.revision_id) ??
|
|
90
|
+
asString(event.revision) ??
|
|
91
|
+
asString(event.version_id) ??
|
|
92
|
+
(parsed.kind === 'open-files' ? parsed.revision_id : undefined) ??
|
|
93
|
+
hash ??
|
|
94
|
+
null
|
|
95
|
+
);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
function eventType(event: OutboxObject): string {
|
|
99
|
+
return (asString(event.event) ?? asString(event.type) ?? asString(event.action) ?? asString(event.change_type) ?? 'changed').toLowerCase();
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
function titleFromEvent(event: OutboxObject): string | null {
|
|
103
|
+
const path = asString(event.path);
|
|
104
|
+
return asString(event.title) ?? asString(event.name) ?? (path ? basename(path) : null);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
function normalizeEvent(event: OutboxObject, now: string): NormalizedOutboxEvent {
|
|
108
|
+
const sourceRef = buildSourceRef(event);
|
|
109
|
+
const parsed = parseSourceRef(sourceRef);
|
|
110
|
+
const hash = hashFromEvent(event);
|
|
111
|
+
return {
|
|
112
|
+
raw: event,
|
|
113
|
+
eventType: eventType(event),
|
|
114
|
+
sourceRef,
|
|
115
|
+
sourceUri: baseSourceUri(sourceRef, parsed),
|
|
116
|
+
kind: parsed.kind,
|
|
117
|
+
title: titleFromEvent(event),
|
|
118
|
+
revision: revisionFromEvent(event, parsed, hash),
|
|
119
|
+
hash,
|
|
120
|
+
status: asString(event.status)?.toLowerCase() ?? null,
|
|
121
|
+
updatedAt: asString(event.updated_at) ?? now,
|
|
122
|
+
acl: event.permissions ?? event.acl ?? undefined,
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
function parseOutboxText(text: string): OutboxObject[] {
|
|
127
|
+
const trimmed = text.trim();
|
|
128
|
+
if (!trimmed) return [];
|
|
129
|
+
if (trimmed.startsWith('[')) {
|
|
130
|
+
const parsed = JSON.parse(trimmed);
|
|
131
|
+
if (!Array.isArray(parsed)) throw new Error('Outbox array parse failed.');
|
|
132
|
+
return parsed.map((entry) => {
|
|
133
|
+
const event = asObject(entry);
|
|
134
|
+
if (!event) throw new Error('Outbox array entries must be objects.');
|
|
135
|
+
return event;
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
if (trimmed.startsWith('{')) {
|
|
139
|
+
try {
|
|
140
|
+
const parsed = JSON.parse(trimmed);
|
|
141
|
+
const object = asObject(parsed);
|
|
142
|
+
if (!object) throw new Error('Outbox object parse failed.');
|
|
143
|
+
if (Array.isArray(object.events)) {
|
|
144
|
+
return object.events.map((entry) => {
|
|
145
|
+
const event = asObject(entry);
|
|
146
|
+
if (!event) throw new Error('Outbox events entries must be objects.');
|
|
147
|
+
return event;
|
|
148
|
+
});
|
|
149
|
+
}
|
|
150
|
+
if ('source_ref' in object || 'source_uri' in object || 'file_id' in object) return [object];
|
|
151
|
+
} catch (error) {
|
|
152
|
+
const lines = trimmed.split(/\r?\n/).filter((line) => line.trim().length > 0);
|
|
153
|
+
if (lines.length <= 1) throw error;
|
|
154
|
+
return lines.map((line) => {
|
|
155
|
+
const event = asObject(JSON.parse(line));
|
|
156
|
+
if (!event) throw new Error('Outbox JSONL entries must be objects.');
|
|
157
|
+
return event;
|
|
158
|
+
});
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
return trimmed.split(/\r?\n/).filter((line) => line.trim().length > 0).map((line) => {
|
|
162
|
+
const event = asObject(JSON.parse(line));
|
|
163
|
+
if (!event) throw new Error('Outbox JSONL entries must be objects.');
|
|
164
|
+
return event;
|
|
165
|
+
});
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
async function readS3Text(uri: string, config?: KnowledgeConfig): Promise<string> {
|
|
169
|
+
const parsed = new URL(uri);
|
|
170
|
+
const bucket = parsed.hostname;
|
|
171
|
+
const key = decodeURIComponent(parsed.pathname.replace(/^\/+/, ''));
|
|
172
|
+
if (!bucket || !key) throw new Error(`Invalid S3 outbox URI: ${uri}`);
|
|
173
|
+
const [{ S3Client, GetObjectCommand }, { fromIni }] = await Promise.all([
|
|
174
|
+
import('@aws-sdk/client-s3'),
|
|
175
|
+
import('@aws-sdk/credential-providers'),
|
|
176
|
+
]);
|
|
177
|
+
const s3Config = config?.storage.type === 's3' && config.storage.s3?.bucket === bucket ? config.storage.s3 : undefined;
|
|
178
|
+
const client = new S3Client({
|
|
179
|
+
region: s3Config?.region,
|
|
180
|
+
credentials: s3Config?.profile ? fromIni({ profile: s3Config.profile }) : undefined,
|
|
181
|
+
maxAttempts: s3Config?.max_attempts,
|
|
182
|
+
});
|
|
183
|
+
const response = await client.send(new GetObjectCommand({ Bucket: bucket, Key: key }));
|
|
184
|
+
if (!response.Body) return '';
|
|
185
|
+
return await response.Body.transformToString();
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
async function readOutboxInput(input: string, config?: KnowledgeConfig): Promise<string> {
|
|
189
|
+
if (input.startsWith('s3://')) return readS3Text(input, config);
|
|
190
|
+
if (!existsSync(input)) throw new Error(`Outbox not found: ${input}`);
|
|
191
|
+
return readFileSync(input, 'utf8');
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
function mergeJson(existing: string | null | undefined, patch: OutboxObject): string {
|
|
195
|
+
let base: OutboxObject = {};
|
|
196
|
+
if (existing) {
|
|
197
|
+
try {
|
|
198
|
+
base = asObject(JSON.parse(existing)) ?? {};
|
|
199
|
+
} catch {
|
|
200
|
+
base = {};
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
return JSON.stringify({ ...base, ...patch });
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
function ensureSource(db: Database, event: NormalizedOutboxEvent, now: string): string {
|
|
207
|
+
const id = stableId('src', event.sourceUri);
|
|
208
|
+
db.run(
|
|
209
|
+
`INSERT INTO sources (id, uri, kind, title, metadata_json, acl_json, created_at, updated_at)
|
|
210
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
211
|
+
ON CONFLICT(uri) DO UPDATE SET
|
|
212
|
+
kind = excluded.kind,
|
|
213
|
+
title = COALESCE(excluded.title, sources.title),
|
|
214
|
+
updated_at = excluded.updated_at`,
|
|
215
|
+
[
|
|
216
|
+
id,
|
|
217
|
+
event.sourceUri,
|
|
218
|
+
event.kind,
|
|
219
|
+
event.title,
|
|
220
|
+
JSON.stringify({ source_ref: event.sourceRef, source_uri: event.sourceUri, status: event.status, last_outbox_event: event.eventType }),
|
|
221
|
+
JSON.stringify(event.acl ?? {}),
|
|
222
|
+
now,
|
|
223
|
+
event.updatedAt,
|
|
224
|
+
],
|
|
225
|
+
);
|
|
226
|
+
const row = db.query<{ id: string; metadata_json: string; acl_json: string }, [string]>('SELECT id, metadata_json, acl_json FROM sources WHERE uri = ?').get(event.sourceUri);
|
|
227
|
+
if (!row) throw new Error(`Failed to upsert source for outbox event: ${event.sourceUri}`);
|
|
228
|
+
const patch: OutboxObject = {
|
|
229
|
+
source_ref: event.sourceRef,
|
|
230
|
+
source_uri: event.sourceUri,
|
|
231
|
+
last_outbox_event: event.eventType,
|
|
232
|
+
last_outbox_at: event.updatedAt,
|
|
233
|
+
};
|
|
234
|
+
if (event.status) patch.status = event.status;
|
|
235
|
+
if (asString(event.raw.path)) patch.path = event.raw.path;
|
|
236
|
+
db.run(
|
|
237
|
+
'UPDATE sources SET metadata_json = ?, acl_json = CASE WHEN ? IS NULL THEN acl_json ELSE ? END, updated_at = ? WHERE id = ?',
|
|
238
|
+
[
|
|
239
|
+
mergeJson(row.metadata_json, patch),
|
|
240
|
+
event.acl === undefined ? null : JSON.stringify(event.acl),
|
|
241
|
+
event.acl === undefined ? null : JSON.stringify(event.acl),
|
|
242
|
+
event.updatedAt,
|
|
243
|
+
row.id,
|
|
244
|
+
],
|
|
245
|
+
);
|
|
246
|
+
return row.id;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
function ensureRevision(db: Database, sourceId: string, event: NormalizedOutboxEvent, now: string): string | null {
|
|
250
|
+
if (!event.revision) return null;
|
|
251
|
+
const id = stableId('rev', `${sourceId}\u0000${event.revision}`);
|
|
252
|
+
const metadata = {
|
|
253
|
+
source_ref: event.sourceRef,
|
|
254
|
+
source_uri: event.sourceUri,
|
|
255
|
+
status: event.status,
|
|
256
|
+
last_outbox_event: event.eventType,
|
|
257
|
+
reindex_required: true,
|
|
258
|
+
};
|
|
259
|
+
db.run(
|
|
260
|
+
`INSERT INTO source_revisions (id, source_id, revision, hash, extracted_text_uri, metadata_json, created_at)
|
|
261
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
262
|
+
ON CONFLICT(source_id, revision) DO UPDATE SET
|
|
263
|
+
hash = COALESCE(excluded.hash, source_revisions.hash),
|
|
264
|
+
metadata_json = excluded.metadata_json`,
|
|
265
|
+
[id, sourceId, event.revision, event.hash, asString(event.raw.extracted_text_ref) ?? null, JSON.stringify(metadata), now],
|
|
266
|
+
);
|
|
267
|
+
const row = db.query<{ id: string }, [string, string]>(
|
|
268
|
+
'SELECT id FROM source_revisions WHERE source_id = ? AND revision = ?',
|
|
269
|
+
).get(sourceId, event.revision);
|
|
270
|
+
return row?.id ?? null;
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
function revisionIdsForEvent(db: Database, sourceId: string, event: NormalizedOutboxEvent): string[] {
|
|
274
|
+
if (event.revision) {
|
|
275
|
+
return db.query<{ id: string }, [string, string]>(
|
|
276
|
+
'SELECT id FROM source_revisions WHERE source_id = ? AND revision = ?',
|
|
277
|
+
).all(sourceId, event.revision).map((row) => row.id);
|
|
278
|
+
}
|
|
279
|
+
if (event.hash) {
|
|
280
|
+
return db.query<{ id: string }, [string, string]>(
|
|
281
|
+
'SELECT id FROM source_revisions WHERE source_id = ? AND hash = ?',
|
|
282
|
+
).all(sourceId, event.hash).map((row) => row.id);
|
|
283
|
+
}
|
|
284
|
+
return db.query<{ id: string }, [string]>(
|
|
285
|
+
'SELECT id FROM source_revisions WHERE source_id = ?',
|
|
286
|
+
).all(sourceId).map((row) => row.id);
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
function invalidateRevision(db: Database, revisionId: string): { chunksDeleted: number; embeddingsDeleted: number } {
|
|
290
|
+
const chunks = db.query<{ id: string }, [string]>('SELECT id FROM chunks WHERE source_revision_id = ?').all(revisionId);
|
|
291
|
+
let embeddingsDeleted = 0;
|
|
292
|
+
for (const chunk of chunks) {
|
|
293
|
+
const row = db.query<{ n: number }, [string]>('SELECT COUNT(*) AS n FROM chunk_embeddings WHERE chunk_id = ?').get(chunk.id);
|
|
294
|
+
embeddingsDeleted += row?.n ?? 0;
|
|
295
|
+
db.run('DELETE FROM chunk_embeddings WHERE chunk_id = ?', [chunk.id]);
|
|
296
|
+
db.run('DELETE FROM chunks_fts WHERE chunk_id = ?', [chunk.id]);
|
|
297
|
+
}
|
|
298
|
+
db.run('DELETE FROM chunks WHERE source_revision_id = ?', [revisionId]);
|
|
299
|
+
const revision = db.query<{ metadata_json: string }, [string]>('SELECT metadata_json FROM source_revisions WHERE id = ?').get(revisionId);
|
|
300
|
+
db.run(
|
|
301
|
+
'UPDATE source_revisions SET metadata_json = ? WHERE id = ?',
|
|
302
|
+
[mergeJson(revision?.metadata_json, { reindex_required: true, invalidated_at: new Date().toISOString() }), revisionId],
|
|
303
|
+
);
|
|
304
|
+
return { chunksDeleted: chunks.length, embeddingsDeleted };
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
function isDeleteEvent(eventType: string, status: string | null): boolean {
|
|
308
|
+
return status === 'deleted' || ['delete', 'deleted', 'remove', 'removed'].includes(eventType);
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
function isMoveEvent(eventType: string): boolean {
|
|
312
|
+
return ['move', 'moved', 'rename', 'renamed', 'path_changed'].includes(eventType);
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
function isPermissionEvent(eventType: string): boolean {
|
|
316
|
+
return ['permission', 'permissions', 'permission_changed', 'acl_changed'].includes(eventType);
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
export async function consumeOpenFilesOutbox(options: OutboxConsumeOptions): Promise<OutboxConsumeResult> {
|
|
320
|
+
const now = (options.now ?? new Date()).toISOString();
|
|
321
|
+
migrateKnowledgeDb(options.dbPath);
|
|
322
|
+
const text = await readOutboxInput(options.input, options.config);
|
|
323
|
+
const events = parseOutboxText(text);
|
|
324
|
+
const db = openKnowledgeDb(options.dbPath);
|
|
325
|
+
const runId = `run_${randomUUID()}`;
|
|
326
|
+
try {
|
|
327
|
+
return db.transaction(() => {
|
|
328
|
+
db.run(
|
|
329
|
+
`INSERT INTO runs (id, type, prompt, status, provider, model, metadata_json, created_at, updated_at)
|
|
330
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
331
|
+
[
|
|
332
|
+
runId,
|
|
333
|
+
'open-files-outbox',
|
|
334
|
+
options.input,
|
|
335
|
+
'completed',
|
|
336
|
+
'local',
|
|
337
|
+
'open-files-outbox',
|
|
338
|
+
JSON.stringify({ path: options.input, events: events.length }),
|
|
339
|
+
now,
|
|
340
|
+
now,
|
|
341
|
+
],
|
|
342
|
+
);
|
|
343
|
+
|
|
344
|
+
const sourcesTouched = new Set<string>();
|
|
345
|
+
const revisionsTouched = new Set<string>();
|
|
346
|
+
let chunksDeleted = 0;
|
|
347
|
+
let embeddingsDeleted = 0;
|
|
348
|
+
let staleRevisions = 0;
|
|
349
|
+
let deletedSources = 0;
|
|
350
|
+
let movedSources = 0;
|
|
351
|
+
let permissionUpdates = 0;
|
|
352
|
+
|
|
353
|
+
events.forEach((raw, index) => {
|
|
354
|
+
const event = normalizeEvent(raw, now);
|
|
355
|
+
const sourceId = ensureSource(db, event, now);
|
|
356
|
+
sourcesTouched.add(sourceId);
|
|
357
|
+
const createdRevisionId = ensureRevision(db, sourceId, event, now);
|
|
358
|
+
if (createdRevisionId) revisionsTouched.add(createdRevisionId);
|
|
359
|
+
|
|
360
|
+
const affectedRevisionIds = revisionIdsForEvent(db, sourceId, event);
|
|
361
|
+
for (const revisionId of affectedRevisionIds) {
|
|
362
|
+
revisionsTouched.add(revisionId);
|
|
363
|
+
const invalidation = invalidateRevision(db, revisionId);
|
|
364
|
+
chunksDeleted += invalidation.chunksDeleted;
|
|
365
|
+
embeddingsDeleted += invalidation.embeddingsDeleted;
|
|
366
|
+
staleRevisions += 1;
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
if (isDeleteEvent(event.eventType, event.status)) deletedSources += 1;
|
|
370
|
+
if (isMoveEvent(event.eventType)) movedSources += 1;
|
|
371
|
+
if (isPermissionEvent(event.eventType) || event.acl !== undefined) permissionUpdates += 1;
|
|
372
|
+
|
|
373
|
+
db.run(
|
|
374
|
+
`INSERT INTO run_events (id, run_id, level, event, metadata_json, created_at)
|
|
375
|
+
VALUES (?, ?, ?, ?, ?, ?)`,
|
|
376
|
+
[
|
|
377
|
+
stableId('evt', `${runId}\u0000${index}\u0000${event.sourceRef}\u0000${event.eventType}`),
|
|
378
|
+
runId,
|
|
379
|
+
'info',
|
|
380
|
+
event.eventType,
|
|
381
|
+
JSON.stringify({
|
|
382
|
+
source_ref: event.sourceRef,
|
|
383
|
+
source_uri: event.sourceUri,
|
|
384
|
+
revision: event.revision,
|
|
385
|
+
hash: event.hash,
|
|
386
|
+
status: event.status,
|
|
387
|
+
affected_revisions: affectedRevisionIds.length,
|
|
388
|
+
}),
|
|
389
|
+
event.updatedAt,
|
|
390
|
+
],
|
|
391
|
+
);
|
|
392
|
+
});
|
|
393
|
+
|
|
394
|
+
db.run(
|
|
395
|
+
`INSERT INTO provider_usage (id, run_id, provider, model, input_tokens, output_tokens, cost_usd, metadata_json, created_at)
|
|
396
|
+
VALUES (?, ?, ?, ?, 0, 0, 0, ?, ?)`,
|
|
397
|
+
[
|
|
398
|
+
stableId('usage', runId),
|
|
399
|
+
runId,
|
|
400
|
+
'local',
|
|
401
|
+
'open-files-outbox',
|
|
402
|
+
JSON.stringify({ note: 'No model provider used for outbox invalidation.' }),
|
|
403
|
+
now,
|
|
404
|
+
],
|
|
405
|
+
);
|
|
406
|
+
|
|
407
|
+
return {
|
|
408
|
+
path: options.input,
|
|
409
|
+
db_path: options.dbPath,
|
|
410
|
+
run_id: runId,
|
|
411
|
+
events_seen: events.length,
|
|
412
|
+
sources_touched: sourcesTouched.size,
|
|
413
|
+
revisions_touched: revisionsTouched.size,
|
|
414
|
+
chunks_deleted: chunksDeleted,
|
|
415
|
+
embeddings_deleted: embeddingsDeleted,
|
|
416
|
+
stale_revisions: staleRevisions,
|
|
417
|
+
deleted_sources: deletedSources,
|
|
418
|
+
moved_sources: movedSources,
|
|
419
|
+
permission_updates: permissionUpdates,
|
|
420
|
+
};
|
|
421
|
+
})();
|
|
422
|
+
} finally {
|
|
423
|
+
db.close();
|
|
424
|
+
}
|
|
425
|
+
}
|