@hasna/knowledge 0.2.4 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -0
- package/bin/open-knowledge-mcp.js +4 -4
- package/bin/open-knowledge.js +39 -9
- package/package.json +4 -4
- package/src/cli.ts +24 -4
- package/src/knowledge-db.ts +17 -1
- package/src/manifest-ingest.ts +423 -0
package/README.md
CHANGED
|
@@ -62,6 +62,9 @@ open-knowledge db init --scope project
|
|
|
62
62
|
|
|
63
63
|
# Initialize scalable wiki/schema/index/log artifacts
|
|
64
64
|
open-knowledge wiki init --scope project
|
|
65
|
+
|
|
66
|
+
# Ingest an open-files source manifest into the project SQLite catalog
|
|
67
|
+
open-knowledge ingest manifest ./open-files-manifest.jsonl --scope project --json
|
|
65
68
|
```
|
|
66
69
|
|
|
67
70
|
## Commands
|
|
@@ -160,6 +163,14 @@ Create starter generated-knowledge artifacts through the artifact store:
|
|
|
160
163
|
`schemas/v1.md`, `indexes/root.md`, `wiki/README.md`, and a dated JSONL log
|
|
161
164
|
partition.
|
|
162
165
|
|
|
166
|
+
### ingest
|
|
167
|
+
```bash
|
|
168
|
+
open-knowledge ingest manifest <file|s3://bucket/key> [--scope project] [--json]
|
|
169
|
+
```
|
|
170
|
+
Import an open-files JSON or JSONL source manifest into `knowledge.db`. This
|
|
171
|
+
upserts sources and source revisions, stores hash/MIME/status/permission
|
|
172
|
+
metadata, and chunks embedded extracted text when the manifest includes it.
|
|
173
|
+
|
|
163
174
|
### help
|
|
164
175
|
```bash
|
|
165
176
|
open-knowledge help [command]
|
|
@@ -13659,12 +13659,12 @@ import { existsSync as existsSync3, readFileSync as readFileSync3, writeFileSync
|
|
|
13659
13659
|
// package.json
|
|
13660
13660
|
var package_default = {
|
|
13661
13661
|
name: "@hasna/knowledge",
|
|
13662
|
-
version: "0.2.
|
|
13662
|
+
version: "0.2.5",
|
|
13663
13663
|
description: "Agent-friendly local knowledge CLI with JSON output, pagination, and safe destructive actions",
|
|
13664
13664
|
type: "module",
|
|
13665
13665
|
bin: {
|
|
13666
|
-
"open-knowledge": "
|
|
13667
|
-
"open-knowledge-mcp": "
|
|
13666
|
+
"open-knowledge": "bin/open-knowledge.js",
|
|
13667
|
+
"open-knowledge-mcp": "bin/open-knowledge-mcp.js"
|
|
13668
13668
|
},
|
|
13669
13669
|
files: [
|
|
13670
13670
|
"bin",
|
|
@@ -13696,7 +13696,7 @@ var package_default = {
|
|
|
13696
13696
|
},
|
|
13697
13697
|
repository: {
|
|
13698
13698
|
type: "git",
|
|
13699
|
-
url: "https://github.com/hasna/knowledge"
|
|
13699
|
+
url: "git+https://github.com/hasna/knowledge.git"
|
|
13700
13700
|
},
|
|
13701
13701
|
bugs: {
|
|
13702
13702
|
url: "https://github.com/hasna/knowledge/issues"
|
package/bin/open-knowledge.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env bun
|
|
2
2
|
// @bun
|
|
3
|
-
var
|
|
4
|
-
`);return
|
|
3
|
+
var I=import.meta.require;import{readFileSync as Y,writeFileSync as B,existsSync as $,renameSync as Ae,unlinkSync as se}from"fs";import{randomUUID as oe}from"crypto";import{existsSync as ye,mkdirSync as J,readFileSync as Re,writeFileSync as Oe}from"fs";import{homedir as re}from"os";import{dirname as ke,join as R,resolve as le}from"path";var we=R(".hasna","apps","knowledge");function H(){return R(re(),".open-knowledge","db.json")}function Q(){return R(re(),".hasna","apps","knowledge")}function Ue(e=process.cwd()){return le(e,we)}function D(e){return{home:e,configPath:R(e,"config.json"),jsonStorePath:R(e,"db.json"),knowledgeDbPath:R(e,"knowledge.db"),artifactsDir:R(e,"artifacts"),cacheDir:R(e,"cache"),exportsDir:R(e,"exports"),indexesDir:R(e,"indexes"),logsDir:R(e,"logs"),runsDir:R(e,"runs"),schemasDir:R(e,"schemas"),wikiDir:R(e,"wiki")}}function Se(){return{version:1,mode:"local",storage:{type:"local",artifacts_root:"artifacts"},sources:{preferred_ref:"open-files",allowed_schemes:["open-files","s3","file","https","http"]}}}function b(e){let t=D(e);J(t.home,{recursive:!0});for(let n of[t.artifactsDir,t.cacheDir,t.exportsDir,t.indexesDir,t.logsDir,t.runsDir,t.schemasDir,t.wikiDir])J(n,{recursive:!0});if(!ye(t.configPath))Oe(t.configPath,`${JSON.stringify(Se(),null,2)}
|
|
4
|
+
`);return t}function ie(e,t=process.cwd()){if(e==="project"||e==="local")return D(Ue(t));return D(Q())}function K(e){J(ke(e),{recursive:!0})}function v(e){let t=Re(e,"utf8");return JSON.parse(t)}function V(){return D(Q()).jsonStorePath}function q(e){if(!$(e))if(K(e),e===V()&&$(H()))B(e,Y(H(),"utf8"));else B(e,JSON.stringify({items:[]},null,2))}function Ie(e){return`${e}.lock`}function xe(e,t){let c=Date.now();while(Date.now()-c<5000){try{if(!$(e)){B(e,JSON.stringify({owner:t,ts:Date.now()}));return}let d=JSON.parse(Y(e,"utf8"));if(Date.now()-d.ts>1e4)se(e)}catch{}let s=Date.now();while(Date.now()-s<50);}throw Error(`Could not acquire lock on ${e} after 5000ms`)}function Xe(e,t){try{if($(e)){if(JSON.parse(Y(e,"utf8")).owner===t)se(e)}}catch{}}function k(e){q(e);let t=Y(e,"utf8"),n=JSON.parse(t);if(!n||!Array.isArray(n.items))return{items:[]};return n}function w(e,t){let n=`${e}.tmp.${oe()}`;B(n,JSON.stringify(t,null,2)),Ae(n,e)}function l(e,t){let n=oe(),r=Ie(e);xe(r,n);try{return t()}finally{Xe(r,n)}}function P(){return`k_${Date.now().toString(36)}_${Math.random().toString(36).slice(2,8)}`}function ce(e){return e.replace(/^k_/,"").slice(0,12)}import{Database as be}from"bun:sqlite";var ge=`
|
|
5
5
|
PRAGMA journal_mode = WAL;
|
|
6
6
|
PRAGMA foreign_keys = ON;
|
|
7
7
|
|
|
@@ -168,7 +168,20 @@ CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
|
|
|
168
168
|
|
|
169
169
|
INSERT OR IGNORE INTO schema_versions(version, applied_at)
|
|
170
170
|
VALUES (1, datetime('now'));
|
|
171
|
-
|
|
171
|
+
`,Ce=`
|
|
172
|
+
DROP TABLE IF EXISTS chunks_fts;
|
|
173
|
+
|
|
174
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
|
|
175
|
+
chunk_id UNINDEXED,
|
|
176
|
+
text,
|
|
177
|
+
title,
|
|
178
|
+
source_uri,
|
|
179
|
+
tokenize='porter unicode61'
|
|
180
|
+
);
|
|
181
|
+
|
|
182
|
+
INSERT OR IGNORE INTO schema_versions(version, applied_at)
|
|
183
|
+
VALUES (2, datetime('now'));
|
|
184
|
+
`;function G(e){K(e);let t=new be(e);return t.exec("PRAGMA foreign_keys = ON;"),t}function m(e){let t=G(e);try{if(t.exec(ge),Z(t)<2)t.exec(Ce);return{path:e,schema_version:Z(t)}}finally{t.close()}}function Z(e){return e.query("SELECT MAX(version) AS version FROM schema_versions").get()?.version??0}function A(e,t){return e.query(`SELECT COUNT(*) AS n FROM ${t}`).get()?.n??0}function ue(e){let t=G(e);try{return{schema_version:Z(t),sources:A(t,"sources"),source_revisions:A(t,"source_revisions"),chunks:A(t,"chunks"),wiki_pages:A(t,"wiki_pages"),citations:A(t,"citations"),indexes:A(t,"knowledge_indexes"),runs:A(t,"runs"),run_events:A(t,"run_events")}}finally{t.close()}}import{existsSync as De,mkdirSync as Te,readFileSync as me,writeFileSync as Fe}from"fs";import{dirname as Me,join as ee,relative as je,sep as Ke}from"path";function F(e){let t=e.replace(/\\/g,"/").trim();if(!t||t.startsWith("/"))throw Error(`Invalid artifact key: ${e}`);let n=t.split("/").filter(Boolean);if(n.length===0||n.some((r)=>r==="."||r===".."))throw Error(`Invalid artifact key: ${e}`);return n.join("/")}function ne(e,t){let n=je(e,t);if(n.startsWith("..")||n===".."||n.startsWith(`..${Ke}`))throw Error(`Artifact path escapes root: ${t}`)}class Ee{root;type="local";canRead=!0;canWrite=!0;constructor(e){this.root=e;Te(e,{recursive:!0})}async put(e){let t=F(e.key),n=ee(this.root,t);return ne(this.root,n),Te(Me(n),{recursive:!0}),Fe(n,e.body),{key:t,uri:`file://${n}`}}async getText(e){let t=F(e),n=ee(this.root,t);return ne(this.root,n),me(n,"utf8")}async exists(e){let t=F(e),n=ee(this.root,t);return ne(this.root,n),De(n)}}class de{options;type="s3";canRead=!0;canWrite=!0;client;constructor(e){this.options=e;this.client=e.client}async getClient(){if(this.client)return this.client;let[{S3Client:e},{fromIni:t}]=await Promise.all([import("@aws-sdk/client-s3"),import("@aws-sdk/credential-providers")]);return this.client=new e({region:this.options.region,credentials:this.options.profile?t({profile:this.options.profile}):void 0,maxAttempts:this.options.max_attempts}),this.client}objectKey(e){let t=F(e),n=this.options.prefix?F(this.options.prefix):"";return n?`${n}/${t}`:t}async put(e){let[{PutObjectCommand:t},n]=await Promise.all([import("@aws-sdk/client-s3"),this.getClient()]),r=this.objectKey(e.key);return await n.send(new t({Bucket:this.options.bucket,Key:r,Body:e.body,ContentType:e.content_type,Metadata:e.metadata,ServerSideEncryption:this.options.server_side_encryption,SSEKMSKeyId:this.options.kms_key_id})),{key:r,uri:`s3://${this.options.bucket}/${r}`}}async getText(e){let[{GetObjectCommand:t},n]=await Promise.all([import("@aws-sdk/client-s3"),this.getClient()]),r=this.objectKey(e),c=await n.send(new t({Bucket:this.options.bucket,Key:r}));if(!c.Body)return"";return await c.Body.transformToString()}async exists(e){let[{HeadObjectCommand:t},n]=await Promise.all([import("@aws-sdk/client-s3"),this.getClient()]),r=this.objectKey(e);try{return await n.send(new t({Bucket:this.options.bucket,Key:r})),!0}catch(c){let s=c instanceof Error?c.name:"";if(s==="NotFound"||s==="NoSuchKey"||s==="NotFoundError")return!1;throw c}}}function ae(e,t){if(e.storage.type==="s3"){if(!e.storage.s3?.bucket)throw Error("S3 artifact storage requires storage.s3.bucket");return new de({bucket:e.storage.s3.bucket,prefix:e.storage.s3.prefix,region:e.storage.s3.region,profile:e.storage.s3.profile,max_attempts:e.storage.s3.max_attempts,server_side_encryption:e.storage.s3.server_side_encryption,kms_key_id:e.storage.s3.kms_key_id})}return new Ee(t.artifactsDir)}function ve(e){let t=String(e.getUTCFullYear()),n=String(e.getUTCMonth()+1).padStart(2,"0"),r=String(e.getUTCDate()).padStart(2,"0");return{year:t,month:n,day:r}}function Be(){return`# Knowledge Agent Schema v1
|
|
172
185
|
|
|
173
186
|
## Source Rules
|
|
174
187
|
|
|
@@ -193,7 +206,7 @@ VALUES (1, datetime('now'));
|
|
|
193
206
|
## Lint Rules
|
|
194
207
|
|
|
195
208
|
- Flag stale pages, missing citations, contradictions, orphan pages, duplicate pages, and unresolved source refs.
|
|
196
|
-
`}function
|
|
209
|
+
`}function $e(){return`# Knowledge Index
|
|
197
210
|
|
|
198
211
|
This is a compact orientation index for agents. It is not the full search index.
|
|
199
212
|
|
|
@@ -208,13 +221,29 @@ This is a compact orientation index for agents. It is not the full search index.
|
|
|
208
221
|
|
|
209
222
|
Raw source files are resolved through open-files. This app stores source refs,
|
|
210
223
|
citations, chunks, generated wiki artifacts, indexes, and run records.
|
|
211
|
-
`}function
|
|
224
|
+
`}function Ye(){return`# Wiki
|
|
212
225
|
|
|
213
226
|
Generated durable knowledge pages live here.
|
|
214
227
|
|
|
215
228
|
Pages should be concise, cited, and organized for both humans and agents.
|
|
216
|
-
`}async function
|
|
217
|
-
`,content_type:"application/x-ndjson"})];return await Promise.all(
|
|
229
|
+
`}async function pe(e,t=new Date){let{year:n,month:r,day:c}=ve(t),s="schemas/v1.md",d="indexes/root.md",_="wiki/README.md",i=`logs/${n}/${r}/${c}.jsonl`,u={ts:t.toISOString(),event:"wiki_layout_initialized",schema_key:"schemas/v1.md",root_index_key:"indexes/root.md",wiki_readme_key:"wiki/README.md"},o=[e.put({key:"schemas/v1.md",body:Be(),content_type:"text/markdown"}),e.put({key:"indexes/root.md",body:$e(),content_type:"text/markdown"}),e.put({key:"wiki/README.md",body:Ye(),content_type:"text/markdown"}),e.put({key:i,body:`${JSON.stringify(u)}
|
|
230
|
+
`,content_type:"application/x-ndjson"})];return await Promise.all(o),{schema_key:"schemas/v1.md",root_index_key:"indexes/root.md",wiki_readme_key:"wiki/README.md",log_key:i,written:["schemas/v1.md","indexes/root.md","wiki/README.md",i]}}import{createHash as He}from"crypto";import{existsSync as Qe,readFileSync as Ve}from"fs";import{basename as qe}from"path";function fe(e,t){if(!e)throw Error(t);return e}function Ge(e){let n=e.slice(13).split("/").filter(Boolean),r=n[0];if(r!=="file"&&r!=="source")throw Error("Invalid open-files ref. Expected open-files://file/<id>, open-files://file/<id>/revision/<revision_id>, or open-files://source/<id>/path/<path>.");let c=fe(n[1],"Invalid open-files ref. Missing id.");if(r==="file"){if(n.length===2)return{kind:"open-files",uri:e,entity:r,id:c};if(n[2]==="revision"&&n[3]&&n.length===4)return{kind:"open-files",uri:e,entity:r,id:c,revision_id:decodeURIComponent(n[3])};throw Error("Invalid open-files file ref. Expected open-files://file/<id>/revision/<revision_id>.")}let s=n.indexOf("path"),d=s>=0?decodeURIComponent(n.slice(s+1).join("/")):void 0;return{kind:"open-files",uri:e,entity:r,id:c,path:d}}function ze(e){let t=new URL(e),n=fe(t.hostname,"Invalid s3 ref. Missing bucket."),r=decodeURIComponent(t.pathname.replace(/^\/+/,""));if(!r)throw Error("Invalid s3 ref. Missing object key.");return{kind:"s3",uri:e,bucket:n,key:r}}function We(e){let t=new URL(e);return{kind:"file",uri:e,path:decodeURIComponent(t.pathname)}}function Je(e){let t=new URL(e);return{kind:"web",uri:e,url:t.toString()}}function _e(e){if(e.startsWith("open-files://"))return Ge(e);if(e.startsWith("s3://"))return ze(e);if(e.startsWith("file://"))return We(e);if(e.startsWith("https://")||e.startsWith("http://"))return Je(e);throw Error(`Unsupported source ref scheme: ${e}`)}function te(e,t){return`${e}_${He("sha256").update(t).digest("hex").slice(0,20)}`}function g(e){return e&&typeof e==="object"&&!Array.isArray(e)?e:void 0}function E(e){return typeof e==="string"&&e.length>0?e:void 0}function Pe(e){return typeof e==="number"&&Number.isFinite(e)?e:void 0}function Ze(e){let t=E(e.source_ref)??E(e.source_uri)??E(e.uri);if(t)return t;let n=E(e.file_id);if(n){let s=E(e.revision_id)??E(e.revision),d=`open-files://file/${encodeURIComponent(n)}`;return s?`${d}/revision/${encodeURIComponent(s)}`:d}let r=E(e.source_id),c=E(e.path);if(r&&c)return`open-files://source/${encodeURIComponent(r)}/path/${encodeURIComponent(c)}`;throw Error("Manifest item is missing source_ref, file_id, or source_id/path.")}function en(e,t){if(t.kind==="open-files"&&t.entity==="file"&&t.revision_id)return e.replace(/\/revision\/[^/]+$/,"");return e}function nn(e){let t=E(e.extracted_text)??E(e.text)??E(e.content_text)??E(e.markdown);if(t!==void 0)return t;let n=e.content;return typeof n==="string"?n:null}function tn(e){let t=E(e.extracted_text_ref)??E(e.extracted_text_uri)??E(e.text_ref);if(t)return t;let n=g(e.content);return E(n?.extracted_text_ref)??E(n?.extracted_text_uri)??null}function rn(e){let t=E(e.path);return E(e.title)??E(e.name)??(t?qe(t):null)}function sn(e){return E(e.hash)??E(e.checksum)??E(e.sha256)??null}function on(e,t,n){return E(e.revision_id)??E(e.revision)??E(e.version_id)??(t.kind==="open-files"?t.revision_id:void 0)??n??E(e.updated_at)??"current"}function cn(e,t){let n={};for(let[r,c]of Object.entries(e)){if(["text","content","content_text","extracted_text","markdown"].includes(r))continue;n[r]=c}return n.source_ref=t.sourceRef,n.source_uri=t.sourceUri,n.status=t.status,n}function un(e,t){let n=Ze(e),r=_e(n),c=en(n,r),s=sn(e),d=E(e.status)??"active";return{raw:e,sourceRef:n,sourceUri:c,kind:r.kind,title:rn(e),revision:on(e,r,s),hash:s,extractedTextUri:tn(e),text:nn(e),metadata:cn(e,{sourceRef:n,sourceUri:c,status:d}),acl:e.permissions??e.acl??{},status:d,updatedAt:E(e.updated_at)??t}}function Tn(e){let t=e.trim();if(!t)return[];if(t.startsWith("[")){let n=JSON.parse(t);if(!Array.isArray(n))throw Error("Manifest array parse failed.");return n.map((r)=>{let c=g(r);if(!c)throw Error("Manifest array entries must be objects.");return c})}if(t.startsWith("{"))try{let n=JSON.parse(t),r=g(n);if(!r)throw Error("Manifest object parse failed.");if(Array.isArray(r.items))return r.items.map((c)=>{let s=g(c);if(!s)throw Error("Manifest items entries must be objects.");return s});if("source_ref"in r||"source_uri"in r||"file_id"in r)return[r]}catch(n){let r=t.split(/\r?\n/).filter((c)=>c.trim().length>0);if(r.length<=1)throw n;return r.map((c)=>{let s=g(JSON.parse(c));if(!s)throw Error("Manifest JSONL entries must be objects.");return s})}return t.split(/\r?\n/).filter((n)=>n.trim().length>0).map((n)=>{let r=g(JSON.parse(n));if(!r)throw Error("Manifest JSONL entries must be objects.");return r})}async function En(e,t){let n=new URL(e),r=n.hostname,c=decodeURIComponent(n.pathname.replace(/^\/+/,""));if(!r||!c)throw Error(`Invalid S3 manifest URI: ${e}`);let[{S3Client:s,GetObjectCommand:d},{fromIni:_}]=await Promise.all([import("@aws-sdk/client-s3"),import("@aws-sdk/credential-providers")]),i=t?.storage.type==="s3"&&t.storage.s3?.bucket===r?t.storage.s3:void 0,o=await new s({region:i?.region,credentials:i?.profile?_({profile:i.profile}):void 0,maxAttempts:i?.max_attempts}).send(new d({Bucket:r,Key:c}));if(!o.Body)return"";return await o.Body.transformToString()}async function dn(e,t){if(e.startsWith("s3://"))return En(e,t);if(!Qe(e))throw Error(`Manifest not found: ${e}`);return Ve(e,"utf8")}function an(e,t,n){let r=e.replace(/\r\n/g,`
|
|
231
|
+
`);if(!r.trim())return[];let c=[],s=0;while(s<r.length){let d=Math.min(r.length,s+t),_=d;if(d<r.length){let u=r.lastIndexOf(`
|
|
232
|
+
|
|
233
|
+
`,d),o=r.lastIndexOf(". ",d),T=Math.max(u,o);if(T>s+Math.floor(t*0.5))_=T+(T===u?2:1)}let i=r.slice(s,_).trim();if(i)c.push({ordinal:c.length,text:i,startOffset:s,endOffset:_});if(_>=r.length)break;s=Math.max(0,_-n)}return c}function pn(e){let t=e.trim().split(/\s+/).filter(Boolean).length;return Math.max(1,Math.ceil(t*1.25))}function fn(e,t){let n=e.query("SELECT id FROM chunks WHERE source_revision_id = ?").all(t);for(let r of n)e.run("DELETE FROM chunks_fts WHERE chunk_id = ?",[r.id]);return e.run("DELETE FROM chunks WHERE source_revision_id = ?",[t]),n.length}function _n(e,t,n){let r=te("src",t.sourceUri);e.run(`INSERT INTO sources (id, uri, kind, title, metadata_json, acl_json, created_at, updated_at)
|
|
234
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
235
|
+
ON CONFLICT(uri) DO UPDATE SET
|
|
236
|
+
kind = excluded.kind,
|
|
237
|
+
title = excluded.title,
|
|
238
|
+
metadata_json = excluded.metadata_json,
|
|
239
|
+
acl_json = excluded.acl_json,
|
|
240
|
+
updated_at = excluded.updated_at`,[r,t.sourceUri,t.kind,t.title,JSON.stringify(t.metadata),JSON.stringify(t.acl??{}),n,t.updatedAt]);let c=e.query("SELECT id FROM sources WHERE uri = ?").get(t.sourceUri);if(!c)throw Error(`Failed to upsert source: ${t.sourceUri}`);return c.id}function Nn(e,t,n,r){let c=te("rev",`${t}\x00${n.revision}`);e.run(`INSERT INTO source_revisions (id, source_id, revision, hash, extracted_text_uri, metadata_json, created_at)
|
|
241
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
242
|
+
ON CONFLICT(source_id, revision) DO UPDATE SET
|
|
243
|
+
hash = excluded.hash,
|
|
244
|
+
extracted_text_uri = excluded.extracted_text_uri,
|
|
245
|
+
metadata_json = excluded.metadata_json`,[c,t,n.revision,n.hash,n.extractedTextUri,JSON.stringify(n.metadata),r]);let s=e.query("SELECT id FROM source_revisions WHERE source_id = ? AND revision = ?").get(t,n.revision);if(!s)throw Error(`Failed to upsert source revision: ${n.sourceRef}`);return s.id}function Ln(e,t,n,r,c,s){if(!n.text||n.status.toLowerCase()==="deleted")return 0;let d=an(n.text,c,s);for(let _ of d){let i=te("chk",`${t}\x00${_.ordinal}\x00${_.text}`),u={source_ref:n.sourceRef,source_uri:n.sourceUri,hash:n.hash,status:n.status,path:E(n.raw.path)??null,mime:E(n.raw.mime)??E(n.raw.content_type)??null,size:Pe(n.raw.size)??null};e.run(`INSERT INTO chunks (id, source_revision_id, kind, ordinal, text, token_count, start_offset, end_offset, metadata_json, created_at)
|
|
246
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,[i,t,"source",_.ordinal,_.text,pn(_.text),_.startOffset,_.endOffset,JSON.stringify(u),r]),e.run("INSERT INTO chunks_fts (chunk_id, text, title, source_uri) VALUES (?, ?, ?, ?)",[i,_.text,n.title??"",n.sourceUri])}return d.length}async function Ne(e){let t=(e.now??new Date).toISOString(),n=e.maxChunkChars??4000,r=e.chunkOverlapChars??200;if(n<500)throw Error("maxChunkChars must be at least 500.");if(r<0||r>=n)throw Error("chunkOverlapChars must be less than maxChunkChars.");m(e.dbPath);let c=await dn(e.input,e.config),s=Tn(c),d=G(e.dbPath);try{return d.transaction(()=>{let i=new Set,u=new Set,o=0,T=0,a=0;for(let f of s){let y=un(f,t),h=_n(d,y,t),O=Nn(d,h,y,t);if(i.add(h),u.add(O),y.text||y.status.toLowerCase()==="deleted")T+=fn(d,O);o+=Ln(d,O,y,t,n,r)}return{path:e.input,db_path:e.dbPath,items_seen:s.length,sources_upserted:i.size,revisions_upserted:u.size,chunks_inserted:o,chunks_deleted:T,skipped:a}})()}finally{d.close()}}var M={name:"@hasna/knowledge",version:"0.2.5",description:"Agent-friendly local knowledge CLI with JSON output, pagination, and safe destructive actions",type:"module",bin:{"open-knowledge":"bin/open-knowledge.js","open-knowledge-mcp":"bin/open-knowledge-mcp.js"},files:["bin","src","docs","LICENSE","README.md"],scripts:{test:"bun test","test:cli":"bun test tests/cli.test.ts",build:"bun build --target=bun --outfile=bin/open-knowledge.js --minify --external @aws-sdk/client-s3 --external @aws-sdk/credential-providers src/cli.ts && bun build --target=bun --outfile=bin/open-knowledge-mcp.js --external @modelcontextprotocol/sdk src/mcp.js",prepublishOnly:"bun run build",postinstall:"bun run build"},keywords:["knowledge","cli","agents","json","notes","local","store"],license:"Apache-2.0",publishConfig:{registry:"https://registry.npmjs.org",access:"public"},repository:{type:"git",url:"git+https://github.com/hasna/knowledge.git"},bugs:{url:"https://github.com/hasna/knowledge/issues"},author:"Hasna Inc. <hasna@example.com>",engines:{bun:">=1.0",node:">=18"},dependencies:{"@aws-sdk/client-s3":"^3.1063.0","@aws-sdk/credential-providers":"^3.1063.0","@modelcontextprotocol/sdk":"^1.29.0",zod:"^4.3.6"},devDependencies:{"@types/bun":"^1.3.14"}};var Le={debug:0,info:1,warn:2,error:3},yn=()=>{if(process.env.DEBUG)return"debug";if(process.env.LOG_LEVEL==="debug")return"debug";if(process.env.LOG_LEVEL==="warn")return"warn";if(process.env.LOG_LEVEL==="error")return"error";return"info"};function x(e,t,n){if(Le[e]<Le[yn()])return;let r={debug:"[DEBUG]",info:"[INFO]",warn:"[WARN]",error:"[ERROR]"}[e],c=n?`${r} ${t} ${JSON.stringify(n)}`:`${r} ${t}`;if(e==="error")console.error(c);else console.error(c)}var Rn=["add","list","get","delete","update","archive","restore","upsert","untag","export","prune","dedupe","stats","paths","db","wiki","ingest","help"],he={ls:"list",rm:"delete",edit:"update",unarchive:"restore"};function On(e){let t=[],n={};for(let r=0;r<e.length;r+=1){let c=e[r];if(!c.startsWith("-")){t.push(c);continue}switch(c){case"--json":n.json=!0;break;case"--yes":case"-y":n.yes=!0;break;case"--help":case"-h":n.help=!0;break;case"--version":case"-v":n.version=!0;break;case"--desc":n.desc=!0;break;case"--page":case"-p":n.page=Number(e[r+1]),r+=1;break;case"--limit":case"-l":n.limit=Number(e[r+1]),r+=1;break;case"--search":case"-s":n.search=e[r+1],r+=1;break;case"--sort":n.sort=e[r+1],r+=1;break;case"--id":n.id=e[r+1],r+=1;break;case"--store":n.store=e[r+1],r+=1;break;case"--title":n.title=e[r+1],r+=1;break;case"--content":n.content=e[r+1],r+=1;break;case"--url":n.url=e[r+1],r+=1;break;case"--tag":case"-t":n.tag=e[r+1],r+=1;break;case"--format":n.format=e[r+1],r+=1;break;case"--completions":n.completions=e[r+1],r+=1;break;case"--no-color":n.noColor=!0;break;case"--scope":n.scope=e[r+1],r+=1;break;case"--older-than":n.olderThan=Number(e[r+1]),r+=1;break;case"--empty":n.empty=!0;break;case"--archived":n.archived=!0;break;case"--include-archived":n.includeArchived=!0;break;default:throw Error(`Unknown flag: ${c}. Run 'open-knowledge --help' for valid options.`)}}return{positional:t,flags:n}}function kn(e){if(!e)return"";return he[e]??e}function ln(e,t){let n=Array.from({length:e.length+1},()=>Array(t.length+1).fill(0));for(let r=0;r<=e.length;r+=1)n[r][0]=r;for(let r=0;r<=t.length;r+=1)n[0][r]=r;for(let r=1;r<=e.length;r+=1)for(let c=1;c<=t.length;c+=1){let s=e[r-1]===t[c-1]?0:1;n[r][c]=Math.min(n[r-1][c]+1,n[r][c-1]+1,n[r-1][c-1]+s)}return n[e.length][t.length]}function wn(e){if(!e)return"";let t=[...Rn,...Object.keys(he)],n="",r=Number.POSITIVE_INFINITY;for(let c of t){let s=ln(e,c);if(s<r)r=s,n=c}return r<=3?n:""}function Un(){console.log(`open-knowledge - local agent knowledge store
|
|
218
247
|
|
|
219
248
|
Usage:
|
|
220
249
|
open-knowledge <command> [options]
|
|
@@ -236,6 +265,7 @@ Commands:
|
|
|
236
265
|
paths Show resolved workspace/store paths
|
|
237
266
|
db init|stats Initialize or inspect local knowledge.db
|
|
238
267
|
wiki init Initialize scalable wiki/schema/index/log artifacts
|
|
268
|
+
ingest manifest <file|s3://> Ingest an open-files manifest into knowledge.db
|
|
239
269
|
help [command] Show help
|
|
240
270
|
|
|
241
271
|
Global Options:
|
|
@@ -277,5 +307,5 @@ Export Options:
|
|
|
277
307
|
|
|
278
308
|
Prune Options:
|
|
279
309
|
--older-than <days> Remove items older than N days
|
|
280
|
-
--empty Remove items with empty content`)}function
|
|
281
|
-
_open_knowledge() { _arguments -C "1: :(add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki help ls rm edit unarchive)" "(--json)--json" "(--yes)-y" "(--help)--help" "(--version)--version" "(--desc)--desc" "(--archived)--archived" "(--include-archived)--include-archived" "(-p --page)"{-p,--page}"[page number]:number:" "(-l --limit)"{-l,--limit}"[items per page]:number:" "(-s --search)"{-s,--search}"[search text]:text:" "(--sort)--sort"{created,title}:" "(--id)--id[item id]:id:" "(--store)--store[store path]:path:" "(--title)--title[new title]:" "(--content)--content[new content]:" "(--url)--url[source url]:" "(-t --tag)"{-t,--tag}"[tag]:tag:" "(--format)--format[json|jsonl]:" "(--completions)--completions[output completions]:shell:(bash zsh fish):" "(--no-color)--no-color[disable color]" "(--scope)--scope"{local,global,project}:" }; _open_knowledge`);else if(t==="fish")console.log('complete -c open-knowledge -f; complete -c open-knowledge -a "add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki help ls rm edit unarchive"; complete -c open-knowledge -l json; complete -c open-knowledge -l yes -s y; complete -c open-knowledge -l help -s h; complete -c open-knowledge -l version -s v; complete -c open-knowledge -l desc; complete -c open-knowledge -l archived; complete -c open-knowledge -l include-archived; complete -c open-knowledge -s p -l page; complete -c open-knowledge -s l -l limit; complete -c open-knowledge -s s -l search; complete -c open-knowledge -l sort; complete -c open-knowledge -l id; complete -c open-knowledge -l store; complete -c open-knowledge -l title; complete -c open-knowledge -l content; complete -c open-knowledge -l url; complete -c open-knowledge -s t -l tag; complete -c open-knowledge -l format; complete -c open-knowledge -l completions; complete -c open-knowledge -l no-color; complete -c open-knowledge -l scope -a "local global project"');else throw Error("Invalid --completions value. Use 'bash', 'zsh', or 'fish'.");return}let r=He(i[0]);if(!r||e.help||r==="help"){qe(i[1]);return}let N=l(e.scope),s=e.store;if(!s)if(e.scope==="project"||e.scope==="local")s=j(N.home).jsonStorePath;else s=q();if(r==="paths"){let t=j(N.home);y({ok:!0,scope:e.scope??"global",home:t.home,config_path:t.configPath,json_store_path:t.jsonStorePath,knowledge_db_path:t.knowledgeDbPath,artifacts_dir:t.artifactsDir,indexes_dir:t.indexesDir,logs_dir:t.logsDir,runs_dir:t.runsDir,schemas_dir:t.schemasDir,wiki_dir:t.wikiDir,config:W(t.configPath),message:t.home},e.json);return}if(r==="db"){let t=i[1]??"init",E=j(N.home);if(t!=="init"&&t!=="stats")throw Error("Invalid db action. Use 'init' or 'stats'.");if(t==="init"){let o=P(E.knowledgeDbPath);y({ok:!0,...o,message:`Initialized ${o.path}`},e.json);return}P(E.knowledgeDbPath);let T=Te(E.knowledgeDbPath);y({ok:!0,path:E.knowledgeDbPath,...T,message:`knowledge.db schema v${T.schema_version}`},e.json);return}if(r==="wiki"){if((i[1]??"init")!=="init")throw Error("Invalid wiki action. Use 'init'.");let E=j(N.home),T=W(E.configPath),o=Ne(T,E),c=await ue(o);y({ok:!0,...c,message:`Initialized wiki layout in ${E.home}`},e.json);return}if(Z(s),r==="add"){let t=i[1],E=i[2];if(!t||!E)throw Error("Usage: open-knowledge add <title> <content>");A(s,()=>{let T=R(s),o={id:v(),title:t,content:E,url:e.url??null,tags:e.tag?[e.tag]:[],created_at:new Date().toISOString(),updated_at:new Date().toISOString()};T.items.push(o),U(s,T),I("info","Item added",{id:o.id,title:o.title}),y({ok:!0,item:o,message:`Added ${o.id}`},e.json)});return}if(r==="list"){if(e.format!==void 0&&e.format!=="table"&&e.format!=="json")throw Error("Invalid --format value for list. Use 'table' or 'json'.");A(s,()=>{let t=R(s),E=Number.isFinite(e.page)&&e.page>0?e.page:1,T=Number.isFinite(e.limit)&&e.limit>0?e.limit:20,o=e.search?String(e.search).toLowerCase():"",c=e.tag?String(e.tag).toLowerCase():"",_=e.format==="table"||!e.json&&!e.format&&Ze(e),X=e.json||e.format==="json",p=t.items;if(e.archived)p=p.filter((u)=>u.archived===!0);else if(!e.includeArchived)p=p.filter((u)=>!u.archived);if(o)p=p.filter((u)=>u.title.toLowerCase().includes(o)||u.content.toLowerCase().includes(o));if(c)p=p.filter((u)=>u.tags&&u.tags.map((Q)=>Q.toLowerCase()).includes(c));let{sorted:d,sort:C,direction:L}=ve(p,e),k=(E-1)*T,K=d.slice(k,k+T),z=Math.max(1,Math.ceil(d.length/T));if(X){y({ok:!0,page:E,limit:T,total:d.length,total_pages:z,sort:C,direction:L,items:K},!0);return}if(K.length===0){y(`No items found (search=${o||"none"}, tag=${c||"none"})`,!1);return}if(_){let u=(D)=>D,Q=`${u("ID")} ${u("TITLE")} ${u("CREATED")} ${u("URL")} ${u("TAGS")}`;console.log(Q);for(let D of K)console.log(`${D.id} ${u(D.title)} ${D.created_at} ${D.url?u(D.url):""} ${D.tags?.length?u(`[${D.tags.join(", ")}]`):""}`);console.log(`Page ${E}/${z} | showing ${K.length} of ${d.length} | sort=${C} ${L} | search=${o||"none"} | tag=${c||"none"}`)}else{for(let u of K)console.log(`${u.id} ${u.title} ${u.created_at}${u.url?` ${u.url}`:""}${u.tags?.length?` [${u.tags.join(", ")}]`:""}`);console.log(`Page ${E}/${z} | showing ${K.length} of ${d.length} | sort=${C} ${L} | search=${o||"none"} | tag=${c||"none"}`)}});return}if(r==="get"){x(e),A(s,()=>{let E=R(s).items.find((T)=>T.id===e.id||T.short_id===e.id);if(!E)throw Error(`Item not found: ${e.id}`);y({ok:!0,item:E,message:`${E.id}: ${E.title}`},e.json)});return}if(r==="update"){x(e),A(s,()=>{let t=R(s),E=t.items.findIndex((o)=>o.id===e.id||o.short_id===e.id);if(E===-1)throw Error(`Item not found: ${e.id}`);let T=t.items[E];if(e.title!==void 0)T.title=e.title;if(e.content!==void 0)T.content=e.content;if(e.url!==void 0)T.url=e.url;if(e.tag!==void 0){if(T.tags=T.tags||[],!T.tags.map((o)=>o.toLowerCase()).includes(e.tag.toLowerCase()))T.tags.push(e.tag)}T.updated_at=new Date().toISOString(),t.items[E]=T,U(s,t),y({ok:!0,item:T,message:`Updated ${T.id}`},e.json)});return}if(r==="archive"||r==="restore"){x(e),A(s,()=>{let t=R(s),E=t.items.findIndex((o)=>o.id===e.id||o.short_id===e.id);if(E===-1)throw Error(`Item not found: ${e.id}`);let T=t.items[E];T.archived=r==="archive",T.updated_at=new Date().toISOString(),t.items[E]=T,U(s,t),y({ok:!0,item:T,message:`${r==="archive"?"Archived":"Restored"} ${T.id}`},e.json)});return}if(r==="untag"){if(x(e),!e.tag)throw Error("Missing required --tag. Example: open-knowledge untag --id <id> -t <tag>");A(s,()=>{let t=R(s),E=t.items.findIndex((c)=>c.id===e.id||c.short_id===e.id);if(E===-1)throw Error(`Item not found: ${e.id}`);let T=t.items[E],o=T.tags?.length??0;T.tags=(T.tags??[]).filter((c)=>c.toLowerCase()!==e.tag.toLowerCase()),T.updated_at=new Date().toISOString(),t.items[E]=T,U(s,t),y({ok:!0,item:T,removed:o-T.tags.length,message:`Removed tag from ${T.id}`},e.json)});return}if(r==="upsert"){let t=e.title??i[1],E=e.content??i[2];A(s,()=>{let T=R(s),o=e.id?T.items.findIndex((X)=>X.id===e.id||X.short_id===e.id):-1,c=new Date().toISOString();if(o===-1){if(!t||!E)throw Error("New item requires title and content. Example: open-knowledge upsert <title> <content> [--id <id>]");let X=e.id??v(),p={id:X,short_id:ie(X),title:t,content:E,url:e.url??null,tags:e.tag?[e.tag]:[],metadata:{},archived:!1,created_at:c,updated_at:c};T.items.push(p),U(s,T),y({ok:!0,created:!0,item:p,message:`Upserted ${p.id}`},e.json);return}let _=T.items[o];if(t!==void 0)_.title=t;if(E!==void 0)_.content=E;if(e.url!==void 0)_.url=e.url;if(e.tag!==void 0){if(_.tags=_.tags||[],!_.tags.map((X)=>X.toLowerCase()).includes(e.tag.toLowerCase()))_.tags.push(e.tag)}_.updated_at=c,T.items[o]=_,U(s,T),y({ok:!0,created:!1,item:_,message:`Upserted ${_.id}`},e.json)});return}if(r==="delete"){if(x(e),!e.yes)throw Error("Refusing delete without --yes. Re-run with: open-knowledge delete --id <id> --yes");A(s,()=>{let t=R(s),E=t.items.length;t.items=t.items.filter((o)=>o.id!==e.id&&o.short_id!==e.id);let T=E!==t.items.length;if(U(s,t),!T)throw Error(`Item not found: ${e.id}`);I("info","Item deleted",{id:e.id}),y({ok:!0,deleted_id:e.id,message:`Deleted ${e.id}`},e.json)});return}if(r==="export"){let t=e.format??"json";if(t!=="json"&&t!=="jsonl")throw Error("Invalid --format. Use 'json' or 'jsonl'.");A(s,()=>{let E=R(s);if(t==="jsonl")for(let T of E.items)console.log(JSON.stringify(T));else y({ok:!0,items:E.items},e.json)});return}if(r==="prune"){if(!e.yes)throw Error("Refusing prune without --yes. Re-run with: open-knowledge prune --yes [--older-than <days>] [--empty]");A(s,()=>{let t=R(s),E=t.items.length;if(e.olderThan!==void 0){let o=new Date;o.setDate(o.getDate()-e.olderThan),t.items=t.items.filter((c)=>new Date(c.created_at)>=o)}if(e.empty)t.items=t.items.filter((o)=>o.content.trim().length>0);let T=E-t.items.length;U(s,t),I("info","Prune completed",{pruned:T,remaining:t.items.length}),y({ok:!0,pruned:T,remaining:t.items.length,message:`Pruned ${T} item(s)`},e.json)});return}if(r==="dedupe"){if(!e.yes)throw Error("Refusing dedupe without --yes. Re-run with: open-knowledge dedupe --yes [--json]");A(s,()=>{let t=R(s),E=new Set,T=t.items.length;t.items=t.items.filter((c)=>{let _=`${c.title}\x00${c.content}`;if(E.has(_))return!1;return E.add(_),!0});let o=T-t.items.length;U(s,t),I("info","Dedupe completed",{removed:o,remaining:t.items.length}),y({ok:!0,removed:o,remaining:t.items.length,message:`Dedupe removed ${o} duplicate(s)`},e.json)});return}if(r==="stats"){A(s,()=>{let t=R(s),E=t.items.filter((L)=>!L.archived),T=E.length,o=t.items.length-T,c=E.filter((L)=>L.url).length,_=E.filter((L)=>L.tags&&L.tags.length>0).length,X=T>0?E.map((L)=>L.created_at).sort()[0]:null,p=T>0?E.map((L)=>L.created_at).sort()[T-1]:null,d={};for(let L of E)for(let k of L.tags||[])d[k]=(d[k]||0)+1;let C=Object.entries(d).sort((L,k)=>k[1]-L[1]).slice(0,5).map(([L,k])=>({tag:L,count:k}));y({ok:!0,total:T,archived:o,with_url:c,with_tags:_,oldest:X,newest:p,top_tags:C,message:`${T} items | ${c} with URL | ${_} with tags`},e.json)});return}let S=Ve(i[0]),g=S?` Did you mean '${S}'?`:"";throw I("warn","Unknown command",{input:i[0],suggestion:S}),Error(`Unknown command: ${i[0]}.${g} Run 'open-knowledge --help' for available commands.`)}if(import.meta.main)Pe(process.argv.slice(2)).catch((n)=>{let i=n instanceof Error?n.message:String(n);I("error","CLI error",{message:i,stack:n instanceof Error?n.stack:void 0}),console.error(`Error: ${i}`),process.exitCode=1});export{Ve as suggestCommand,ve as sortItems,Pe as run,Qe as parseArgs};
|
|
310
|
+
--empty Remove items with empty content`)}function Sn(e){if(e==="add"){console.log("Usage: open-knowledge add <title> <content> [--url <url>] [-t <tag>] [--json]");return}if(e==="list"||e==="ls"){console.log("Usage: open-knowledge list|ls [--format table|json] [-p <page>] [-l <limit>] [-s <search>] [-t <tag>] [--sort created|title] [--desc] [--json]");return}if(e==="get"){console.log("Usage: open-knowledge get --id <id> [--json]");return}if(e==="update"||e==="edit"){console.log("Usage: open-knowledge update|edit --id <id> [--title <title>] [--content <content>] [--url <url>] [-t <tag>] [--json]");return}if(e==="archive"){console.log("Usage: open-knowledge archive --id <id> [--json]");return}if(e==="restore"||e==="unarchive"){console.log("Usage: open-knowledge restore|unarchive --id <id> [--json]");return}if(e==="upsert"){console.log("Usage: open-knowledge upsert [title] [content] [--id <id>] [--title <title>] [--content <content>] [--url <url>] [-t <tag>] [--json]");return}if(e==="untag"){console.log("Usage: open-knowledge untag --id <id> -t <tag> [--json]");return}if(e==="delete"||e==="rm"){console.log("Usage: open-knowledge delete|rm --id <id> -y [--json]");return}if(e==="export"){console.log("Usage: open-knowledge export [--format jsonl] [--json]");return}if(e==="prune"){console.log("Usage: open-knowledge prune --yes [--older-than <days>] [--empty] [--json]");return}if(e==="dedupe"){console.log("Usage: open-knowledge dedupe --yes [--json]");return}if(e==="stats"){console.log("Usage: open-knowledge stats [--json]");return}if(e==="paths"){console.log("Usage: open-knowledge paths [--scope local|global|project] [--json]");return}if(e==="db"){console.log("Usage: open-knowledge db init|stats [--scope local|global|project] [--json]");return}if(e==="wiki"){console.log("Usage: open-knowledge wiki init [--scope local|global|project] [--json]");return}if(e==="ingest"){console.log("Usage: open-knowledge ingest manifest <file|s3://bucket/key> [--scope local|global|project] [--json]");return}Un()}function An(e){if(e.noColor||process.env.NO_COLOR)return!1;if(process.env.FORCE_COLOR)return!0;return process.stdout.isTTY===!0}function L(e,t,n){if(t){console.log(JSON.stringify(e,null,2));return}if(typeof e==="string"){console.log(e);return}console.log(e.message??JSON.stringify(e,null,2))}function j(e){if(!e.id)throw Error("Missing required --id. Example: open-knowledge get --id <id>")}function In(e,t){let n=t.sort??"created";if(n!=="created"&&n!=="title")throw Error("Invalid --sort value. Use 'created' or 'title'.");let r=[...e].sort((c,s)=>{if(n==="title")return c.title.localeCompare(s.title);return c.created_at.localeCompare(s.created_at)});if(t.desc)r.reverse();return{sorted:r,sort:n,direction:t.desc?"desc":"asc"}}async function xn(e){let{positional:t,flags:n}=On(e);if(x("debug","CLI invoked",{command:t[0],flags:{json:n.json,store:n.store}}),n.version){console.log(n.json?JSON.stringify({name:M.name,version:M.version},null,2):`${M.name} ${M.version}`);return}if(n.completions){let i=n.completions;if(i==="bash")console.log('_open_knowledge() { local cur; cur="${COMP_WORDS[COMP_CWORD]}"; COMPREPLY=($(compgen -W "add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki ingest help ls rm edit unarchive --json --yes --help --version --desc --page --limit --search --sort --id --store --title --content --url --tag --format --completions --no-color --scope --archived --include-archived" -- "$cur")); }; complete -F _open_knowledge open-knowledge');else if(i==="zsh")console.log(`#compdef open-knowledge
|
|
311
|
+
_open_knowledge() { _arguments -C "1: :(add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki ingest help ls rm edit unarchive)" "(--json)--json" "(--yes)-y" "(--help)--help" "(--version)--version" "(--desc)--desc" "(--archived)--archived" "(--include-archived)--include-archived" "(-p --page)"{-p,--page}"[page number]:number:" "(-l --limit)"{-l,--limit}"[items per page]:number:" "(-s --search)"{-s,--search}"[search text]:text:" "(--sort)--sort"{created,title}:" "(--id)--id[item id]:id:" "(--store)--store[store path]:path:" "(--title)--title[new title]:" "(--content)--content[new content]:" "(--url)--url[source url]:" "(-t --tag)"{-t,--tag}"[tag]:tag:" "(--format)--format[json|jsonl]:" "(--completions)--completions[output completions]:shell:(bash zsh fish):" "(--no-color)--no-color[disable color]" "(--scope)--scope"{local,global,project}:" }; _open_knowledge`);else if(i==="fish")console.log('complete -c open-knowledge -f; complete -c open-knowledge -a "add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki ingest help ls rm edit unarchive"; complete -c open-knowledge -l json; complete -c open-knowledge -l yes -s y; complete -c open-knowledge -l help -s h; complete -c open-knowledge -l version -s v; complete -c open-knowledge -l desc; complete -c open-knowledge -l archived; complete -c open-knowledge -l include-archived; complete -c open-knowledge -s p -l page; complete -c open-knowledge -s l -l limit; complete -c open-knowledge -s s -l search; complete -c open-knowledge -l sort; complete -c open-knowledge -l id; complete -c open-knowledge -l store; complete -c open-knowledge -l title; complete -c open-knowledge -l content; complete -c open-knowledge -l url; complete -c open-knowledge -s t -l tag; complete -c open-knowledge -l format; complete -c open-knowledge -l completions; complete -c open-knowledge -l no-color; complete -c open-knowledge -l scope -a "local global project"');else throw Error("Invalid --completions value. Use 'bash', 'zsh', or 'fish'.");return}let r=kn(t[0]);if(!r||n.help||r==="help"){Sn(t[1]);return}let c=ie(n.scope),s=n.store;if(!s)if(n.scope==="project"||n.scope==="local")s=b(c.home).jsonStorePath;else s=V();if(r==="paths"){let i=b(c.home);L({ok:!0,scope:n.scope??"global",home:i.home,config_path:i.configPath,json_store_path:i.jsonStorePath,knowledge_db_path:i.knowledgeDbPath,artifacts_dir:i.artifactsDir,indexes_dir:i.indexesDir,logs_dir:i.logsDir,runs_dir:i.runsDir,schemas_dir:i.schemasDir,wiki_dir:i.wikiDir,config:v(i.configPath),message:i.home},n.json);return}if(r==="db"){let i=t[1]??"init",u=b(c.home);if(i!=="init"&&i!=="stats")throw Error("Invalid db action. Use 'init' or 'stats'.");if(i==="init"){let T=m(u.knowledgeDbPath);L({ok:!0,...T,message:`Initialized ${T.path}`},n.json);return}m(u.knowledgeDbPath);let o=ue(u.knowledgeDbPath);L({ok:!0,path:u.knowledgeDbPath,...o,message:`knowledge.db schema v${o.schema_version}`},n.json);return}if(r==="wiki"){if((t[1]??"init")!=="init")throw Error("Invalid wiki action. Use 'init'.");let u=b(c.home),o=v(u.configPath),T=ae(o,u),a=await pe(T);L({ok:!0,...a,message:`Initialized wiki layout in ${u.home}`},n.json);return}if(r==="ingest"){if((t[1]??"")!=="manifest")throw Error("Invalid ingest action. Use 'manifest'.");let u=t[2];if(!u)throw Error("Usage: open-knowledge ingest manifest <file|s3://bucket/key>");let o=b(c.home),T=v(o.configPath),a=await Ne({dbPath:o.knowledgeDbPath,input:u,config:T});L({ok:!0,...a,message:`Ingested ${a.items_seen} manifest item(s)`},n.json);return}if(q(s),r==="add"){let i=t[1],u=t[2];if(!i||!u)throw Error("Usage: open-knowledge add <title> <content>");l(s,()=>{let o=k(s),T={id:P(),title:i,content:u,url:n.url??null,tags:n.tag?[n.tag]:[],created_at:new Date().toISOString(),updated_at:new Date().toISOString()};o.items.push(T),w(s,o),x("info","Item added",{id:T.id,title:T.title}),L({ok:!0,item:T,message:`Added ${T.id}`},n.json)});return}if(r==="list"){if(n.format!==void 0&&n.format!=="table"&&n.format!=="json")throw Error("Invalid --format value for list. Use 'table' or 'json'.");l(s,()=>{let i=k(s),u=Number.isFinite(n.page)&&n.page>0?n.page:1,o=Number.isFinite(n.limit)&&n.limit>0?n.limit:20,T=n.search?String(n.search).toLowerCase():"",a=n.tag?String(n.tag).toLowerCase():"",f=n.format==="table"||!n.json&&!n.format&&An(n),y=n.json||n.format==="json",h=i.items;if(n.archived)h=h.filter((p)=>p.archived===!0);else if(!n.includeArchived)h=h.filter((p)=>!p.archived);if(T)h=h.filter((p)=>p.title.toLowerCase().includes(T)||p.content.toLowerCase().includes(T));if(a)h=h.filter((p)=>p.tags&&p.tags.map((W)=>W.toLowerCase()).includes(a));let{sorted:O,sort:C,direction:N}=In(h,n),U=(u-1)*o,X=O.slice(U,U+o),z=Math.max(1,Math.ceil(O.length/o));if(y){L({ok:!0,page:u,limit:o,total:O.length,total_pages:z,sort:C,direction:N,items:X},!0);return}if(X.length===0){L(`No items found (search=${T||"none"}, tag=${a||"none"})`,!1);return}if(f){let p=(S)=>S,W=`${p("ID")} ${p("TITLE")} ${p("CREATED")} ${p("URL")} ${p("TAGS")}`;console.log(W);for(let S of X)console.log(`${S.id} ${p(S.title)} ${S.created_at} ${S.url?p(S.url):""} ${S.tags?.length?p(`[${S.tags.join(", ")}]`):""}`);console.log(`Page ${u}/${z} | showing ${X.length} of ${O.length} | sort=${C} ${N} | search=${T||"none"} | tag=${a||"none"}`)}else{for(let p of X)console.log(`${p.id} ${p.title} ${p.created_at}${p.url?` ${p.url}`:""}${p.tags?.length?` [${p.tags.join(", ")}]`:""}`);console.log(`Page ${u}/${z} | showing ${X.length} of ${O.length} | sort=${C} ${N} | search=${T||"none"} | tag=${a||"none"}`)}});return}if(r==="get"){j(n),l(s,()=>{let u=k(s).items.find((o)=>o.id===n.id||o.short_id===n.id);if(!u)throw Error(`Item not found: ${n.id}`);L({ok:!0,item:u,message:`${u.id}: ${u.title}`},n.json)});return}if(r==="update"){j(n),l(s,()=>{let i=k(s),u=i.items.findIndex((T)=>T.id===n.id||T.short_id===n.id);if(u===-1)throw Error(`Item not found: ${n.id}`);let o=i.items[u];if(n.title!==void 0)o.title=n.title;if(n.content!==void 0)o.content=n.content;if(n.url!==void 0)o.url=n.url;if(n.tag!==void 0){if(o.tags=o.tags||[],!o.tags.map((T)=>T.toLowerCase()).includes(n.tag.toLowerCase()))o.tags.push(n.tag)}o.updated_at=new Date().toISOString(),i.items[u]=o,w(s,i),L({ok:!0,item:o,message:`Updated ${o.id}`},n.json)});return}if(r==="archive"||r==="restore"){j(n),l(s,()=>{let i=k(s),u=i.items.findIndex((T)=>T.id===n.id||T.short_id===n.id);if(u===-1)throw Error(`Item not found: ${n.id}`);let o=i.items[u];o.archived=r==="archive",o.updated_at=new Date().toISOString(),i.items[u]=o,w(s,i),L({ok:!0,item:o,message:`${r==="archive"?"Archived":"Restored"} ${o.id}`},n.json)});return}if(r==="untag"){if(j(n),!n.tag)throw Error("Missing required --tag. Example: open-knowledge untag --id <id> -t <tag>");l(s,()=>{let i=k(s),u=i.items.findIndex((a)=>a.id===n.id||a.short_id===n.id);if(u===-1)throw Error(`Item not found: ${n.id}`);let o=i.items[u],T=o.tags?.length??0;o.tags=(o.tags??[]).filter((a)=>a.toLowerCase()!==n.tag.toLowerCase()),o.updated_at=new Date().toISOString(),i.items[u]=o,w(s,i),L({ok:!0,item:o,removed:T-o.tags.length,message:`Removed tag from ${o.id}`},n.json)});return}if(r==="upsert"){let i=n.title??t[1],u=n.content??t[2];l(s,()=>{let o=k(s),T=n.id?o.items.findIndex((y)=>y.id===n.id||y.short_id===n.id):-1,a=new Date().toISOString();if(T===-1){if(!i||!u)throw Error("New item requires title and content. Example: open-knowledge upsert <title> <content> [--id <id>]");let y=n.id??P(),h={id:y,short_id:ce(y),title:i,content:u,url:n.url??null,tags:n.tag?[n.tag]:[],metadata:{},archived:!1,created_at:a,updated_at:a};o.items.push(h),w(s,o),L({ok:!0,created:!0,item:h,message:`Upserted ${h.id}`},n.json);return}let f=o.items[T];if(i!==void 0)f.title=i;if(u!==void 0)f.content=u;if(n.url!==void 0)f.url=n.url;if(n.tag!==void 0){if(f.tags=f.tags||[],!f.tags.map((y)=>y.toLowerCase()).includes(n.tag.toLowerCase()))f.tags.push(n.tag)}f.updated_at=a,o.items[T]=f,w(s,o),L({ok:!0,created:!1,item:f,message:`Upserted ${f.id}`},n.json)});return}if(r==="delete"){if(j(n),!n.yes)throw Error("Refusing delete without --yes. Re-run with: open-knowledge delete --id <id> --yes");l(s,()=>{let i=k(s),u=i.items.length;i.items=i.items.filter((T)=>T.id!==n.id&&T.short_id!==n.id);let o=u!==i.items.length;if(w(s,i),!o)throw Error(`Item not found: ${n.id}`);x("info","Item deleted",{id:n.id}),L({ok:!0,deleted_id:n.id,message:`Deleted ${n.id}`},n.json)});return}if(r==="export"){let i=n.format??"json";if(i!=="json"&&i!=="jsonl")throw Error("Invalid --format. Use 'json' or 'jsonl'.");l(s,()=>{let u=k(s);if(i==="jsonl")for(let o of u.items)console.log(JSON.stringify(o));else L({ok:!0,items:u.items},n.json)});return}if(r==="prune"){if(!n.yes)throw Error("Refusing prune without --yes. Re-run with: open-knowledge prune --yes [--older-than <days>] [--empty]");l(s,()=>{let i=k(s),u=i.items.length;if(n.olderThan!==void 0){let T=new Date;T.setDate(T.getDate()-n.olderThan),i.items=i.items.filter((a)=>new Date(a.created_at)>=T)}if(n.empty)i.items=i.items.filter((T)=>T.content.trim().length>0);let o=u-i.items.length;w(s,i),x("info","Prune completed",{pruned:o,remaining:i.items.length}),L({ok:!0,pruned:o,remaining:i.items.length,message:`Pruned ${o} item(s)`},n.json)});return}if(r==="dedupe"){if(!n.yes)throw Error("Refusing dedupe without --yes. Re-run with: open-knowledge dedupe --yes [--json]");l(s,()=>{let i=k(s),u=new Set,o=i.items.length;i.items=i.items.filter((a)=>{let f=`${a.title}\x00${a.content}`;if(u.has(f))return!1;return u.add(f),!0});let T=o-i.items.length;w(s,i),x("info","Dedupe completed",{removed:T,remaining:i.items.length}),L({ok:!0,removed:T,remaining:i.items.length,message:`Dedupe removed ${T} duplicate(s)`},n.json)});return}if(r==="stats"){l(s,()=>{let i=k(s),u=i.items.filter((N)=>!N.archived),o=u.length,T=i.items.length-o,a=u.filter((N)=>N.url).length,f=u.filter((N)=>N.tags&&N.tags.length>0).length,y=o>0?u.map((N)=>N.created_at).sort()[0]:null,h=o>0?u.map((N)=>N.created_at).sort()[o-1]:null,O={};for(let N of u)for(let U of N.tags||[])O[U]=(O[U]||0)+1;let C=Object.entries(O).sort((N,U)=>U[1]-N[1]).slice(0,5).map(([N,U])=>({tag:N,count:U}));L({ok:!0,total:o,archived:T,with_url:a,with_tags:f,oldest:y,newest:h,top_tags:C,message:`${o} items | ${a} with URL | ${f} with tags`},n.json)});return}let d=wn(t[0]),_=d?` Did you mean '${d}'?`:"";throw x("warn","Unknown command",{input:t[0],suggestion:d}),Error(`Unknown command: ${t[0]}.${_} Run 'open-knowledge --help' for available commands.`)}if(import.meta.main)xn(process.argv.slice(2)).catch((e)=>{let t=e instanceof Error?e.message:String(e);x("error","CLI error",{message:t,stack:e instanceof Error?e.stack:void 0}),console.error(`Error: ${t}`),process.exitCode=1});export{wn as suggestCommand,In as sortItems,xn as run,On as parseArgs};
|
package/package.json
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hasna/knowledge",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.5",
|
|
4
4
|
"description": "Agent-friendly local knowledge CLI with JSON output, pagination, and safe destructive actions",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
7
|
-
"open-knowledge": "
|
|
8
|
-
"open-knowledge-mcp": "
|
|
7
|
+
"open-knowledge": "bin/open-knowledge.js",
|
|
8
|
+
"open-knowledge-mcp": "bin/open-knowledge-mcp.js"
|
|
9
9
|
},
|
|
10
10
|
"files": [
|
|
11
11
|
"bin",
|
|
@@ -37,7 +37,7 @@
|
|
|
37
37
|
},
|
|
38
38
|
"repository": {
|
|
39
39
|
"type": "git",
|
|
40
|
-
"url": "https://github.com/hasna/knowledge"
|
|
40
|
+
"url": "git+https://github.com/hasna/knowledge.git"
|
|
41
41
|
},
|
|
42
42
|
"bugs": {
|
|
43
43
|
"url": "https://github.com/hasna/knowledge/issues"
|
package/src/cli.ts
CHANGED
|
@@ -9,6 +9,7 @@ import { ensureKnowledgeWorkspace, readKnowledgeConfig, resolveScopedWorkspace }
|
|
|
9
9
|
import { getKnowledgeDbStats, migrateKnowledgeDb } from './knowledge-db';
|
|
10
10
|
import { createArtifactStore } from './artifact-store';
|
|
11
11
|
import { initializeWikiLayout } from './wiki-layout';
|
|
12
|
+
import { ingestOpenFilesManifest } from './manifest-ingest';
|
|
12
13
|
import pkg from '../package.json' with { type: 'json' };
|
|
13
14
|
|
|
14
15
|
type LogLevel = 'debug' | 'info' | 'warn' | 'error';
|
|
@@ -59,7 +60,7 @@ interface ParseResult {
|
|
|
59
60
|
flags: Flags;
|
|
60
61
|
}
|
|
61
62
|
|
|
62
|
-
const COMMANDS = ['add', 'list', 'get', 'delete', 'update', 'archive', 'restore', 'upsert', 'untag', 'export', 'prune', 'dedupe', 'stats', 'paths', 'db', 'wiki', 'help'];
|
|
63
|
+
const COMMANDS = ['add', 'list', 'get', 'delete', 'update', 'archive', 'restore', 'upsert', 'untag', 'export', 'prune', 'dedupe', 'stats', 'paths', 'db', 'wiki', 'ingest', 'help'];
|
|
63
64
|
const COMMAND_ALIASES: Record<string, string> = {
|
|
64
65
|
ls: 'list',
|
|
65
66
|
rm: 'delete',
|
|
@@ -162,6 +163,7 @@ Commands:
|
|
|
162
163
|
paths Show resolved workspace/store paths
|
|
163
164
|
db init|stats Initialize or inspect local knowledge.db
|
|
164
165
|
wiki init Initialize scalable wiki/schema/index/log artifacts
|
|
166
|
+
ingest manifest <file|s3://> Ingest an open-files manifest into knowledge.db
|
|
165
167
|
help [command] Show help
|
|
166
168
|
|
|
167
169
|
Global Options:
|
|
@@ -223,6 +225,7 @@ function printCommandHelp(command: string): void {
|
|
|
223
225
|
if (command === 'paths') { console.log('Usage: open-knowledge paths [--scope local|global|project] [--json]'); return; }
|
|
224
226
|
if (command === 'db') { console.log('Usage: open-knowledge db init|stats [--scope local|global|project] [--json]'); return; }
|
|
225
227
|
if (command === 'wiki') { console.log('Usage: open-knowledge wiki init [--scope local|global|project] [--json]'); return; }
|
|
228
|
+
if (command === 'ingest') { console.log('Usage: open-knowledge ingest manifest <file|s3://bucket/key> [--scope local|global|project] [--json]'); return; }
|
|
226
229
|
printGlobalHelp();
|
|
227
230
|
}
|
|
228
231
|
|
|
@@ -267,11 +270,11 @@ async function run(argv: string[]): Promise<void> {
|
|
|
267
270
|
if (flags.completions) {
|
|
268
271
|
const shell = flags.completions;
|
|
269
272
|
if (shell === 'bash') {
|
|
270
|
-
console.log(`_open_knowledge() { local cur; cur="${"$"}{COMP_WORDS[COMP_CWORD]}"; COMPREPLY=($(compgen -W "add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki help ls rm edit unarchive --json --yes --help --version --desc --page --limit --search --sort --id --store --title --content --url --tag --format --completions --no-color --scope --archived --include-archived" -- "$cur")); }; complete -F _open_knowledge open-knowledge`);
|
|
273
|
+
console.log(`_open_knowledge() { local cur; cur="${"$"}{COMP_WORDS[COMP_CWORD]}"; COMPREPLY=($(compgen -W "add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki ingest help ls rm edit unarchive --json --yes --help --version --desc --page --limit --search --sort --id --store --title --content --url --tag --format --completions --no-color --scope --archived --include-archived" -- "$cur")); }; complete -F _open_knowledge open-knowledge`);
|
|
271
274
|
} else if (shell === 'zsh') {
|
|
272
|
-
console.log(`#compdef open-knowledge\n_open_knowledge() { _arguments -C "1: :(add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki help ls rm edit unarchive)" "(--json)--json" "(--yes)-y" "(--help)--help" "(--version)--version" "(--desc)--desc" "(--archived)--archived" "(--include-archived)--include-archived" "(-p --page)"{-p,--page}"[page number]:number:" "(-l --limit)"{-l,--limit}"[items per page]:number:" "(-s --search)"{-s,--search}"[search text]:text:" "(--sort)--sort"\{created,title\}:" "(--id)--id[item id]:id:" "(--store)--store[store path]:path:" "(--title)--title[new title]:" "(--content)--content[new content]:" "(--url)--url[source url]:" "(-t --tag)"{-t,--tag}"[tag]:tag:" "(--format)--format[json|jsonl]:" "(--completions)--completions[output completions]:shell:(bash zsh fish):" "(--no-color)--no-color[disable color]" "(--scope)--scope"\{local,global,project\}:" }; _open_knowledge`);
|
|
275
|
+
console.log(`#compdef open-knowledge\n_open_knowledge() { _arguments -C "1: :(add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki ingest help ls rm edit unarchive)" "(--json)--json" "(--yes)-y" "(--help)--help" "(--version)--version" "(--desc)--desc" "(--archived)--archived" "(--include-archived)--include-archived" "(-p --page)"{-p,--page}"[page number]:number:" "(-l --limit)"{-l,--limit}"[items per page]:number:" "(-s --search)"{-s,--search}"[search text]:text:" "(--sort)--sort"\{created,title\}:" "(--id)--id[item id]:id:" "(--store)--store[store path]:path:" "(--title)--title[new title]:" "(--content)--content[new content]:" "(--url)--url[source url]:" "(-t --tag)"{-t,--tag}"[tag]:tag:" "(--format)--format[json|jsonl]:" "(--completions)--completions[output completions]:shell:(bash zsh fish):" "(--no-color)--no-color[disable color]" "(--scope)--scope"\{local,global,project\}:" }; _open_knowledge`);
|
|
273
276
|
} else if (shell === 'fish') {
|
|
274
|
-
console.log(`complete -c open-knowledge -f; complete -c open-knowledge -a "add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki help ls rm edit unarchive"; complete -c open-knowledge -l json; complete -c open-knowledge -l yes -s y; complete -c open-knowledge -l help -s h; complete -c open-knowledge -l version -s v; complete -c open-knowledge -l desc; complete -c open-knowledge -l archived; complete -c open-knowledge -l include-archived; complete -c open-knowledge -s p -l page; complete -c open-knowledge -s l -l limit; complete -c open-knowledge -s s -l search; complete -c open-knowledge -l sort; complete -c open-knowledge -l id; complete -c open-knowledge -l store; complete -c open-knowledge -l title; complete -c open-knowledge -l content; complete -c open-knowledge -l url; complete -c open-knowledge -s t -l tag; complete -c open-knowledge -l format; complete -c open-knowledge -l completions; complete -c open-knowledge -l no-color; complete -c open-knowledge -l scope -a "local global project"`);
|
|
277
|
+
console.log(`complete -c open-knowledge -f; complete -c open-knowledge -a "add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki ingest help ls rm edit unarchive"; complete -c open-knowledge -l json; complete -c open-knowledge -l yes -s y; complete -c open-knowledge -l help -s h; complete -c open-knowledge -l version -s v; complete -c open-knowledge -l desc; complete -c open-knowledge -l archived; complete -c open-knowledge -l include-archived; complete -c open-knowledge -s p -l page; complete -c open-knowledge -s l -l limit; complete -c open-knowledge -s s -l search; complete -c open-knowledge -l sort; complete -c open-knowledge -l id; complete -c open-knowledge -l store; complete -c open-knowledge -l title; complete -c open-knowledge -l content; complete -c open-knowledge -l url; complete -c open-knowledge -s t -l tag; complete -c open-knowledge -l format; complete -c open-knowledge -l completions; complete -c open-knowledge -l no-color; complete -c open-knowledge -l scope -a "local global project"`);
|
|
275
278
|
} else {
|
|
276
279
|
throw new Error("Invalid --completions value. Use 'bash', 'zsh', or 'fish'.");
|
|
277
280
|
}
|
|
@@ -340,6 +343,23 @@ async function run(argv: string[]): Promise<void> {
|
|
|
340
343
|
output({ ok: true, ...result, message: `Initialized wiki layout in ${resolvedWorkspace.home}` }, flags.json);
|
|
341
344
|
return;
|
|
342
345
|
}
|
|
346
|
+
|
|
347
|
+
if (command === 'ingest') {
|
|
348
|
+
const action = positional[1] ?? '';
|
|
349
|
+
if (action !== 'manifest') throw new Error("Invalid ingest action. Use 'manifest'.");
|
|
350
|
+
const input = positional[2];
|
|
351
|
+
if (!input) throw new Error('Usage: open-knowledge ingest manifest <file|s3://bucket/key>');
|
|
352
|
+
const resolvedWorkspace = ensureKnowledgeWorkspace(workspace.home);
|
|
353
|
+
const config = readKnowledgeConfig(resolvedWorkspace.configPath);
|
|
354
|
+
const result = await ingestOpenFilesManifest({
|
|
355
|
+
dbPath: resolvedWorkspace.knowledgeDbPath,
|
|
356
|
+
input,
|
|
357
|
+
config,
|
|
358
|
+
});
|
|
359
|
+
output({ ok: true, ...result, message: `Ingested ${result.items_seen} manifest item(s)` }, flags.json);
|
|
360
|
+
return;
|
|
361
|
+
}
|
|
362
|
+
|
|
343
363
|
ensureStore(storePath);
|
|
344
364
|
|
|
345
365
|
if (command === 'add') {
|
package/src/knowledge-db.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { Database } from 'bun:sqlite';
|
|
2
2
|
import { ensureParentDir } from './workspace';
|
|
3
3
|
|
|
4
|
-
export const CURRENT_SCHEMA_VERSION =
|
|
4
|
+
export const CURRENT_SCHEMA_VERSION = 2;
|
|
5
5
|
|
|
6
6
|
export interface KnowledgeDbStats {
|
|
7
7
|
schema_version: number;
|
|
@@ -184,6 +184,21 @@ INSERT OR IGNORE INTO schema_versions(version, applied_at)
|
|
|
184
184
|
VALUES (1, datetime('now'));
|
|
185
185
|
`;
|
|
186
186
|
|
|
187
|
+
const MIGRATION_2 = `
|
|
188
|
+
DROP TABLE IF EXISTS chunks_fts;
|
|
189
|
+
|
|
190
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
|
|
191
|
+
chunk_id UNINDEXED,
|
|
192
|
+
text,
|
|
193
|
+
title,
|
|
194
|
+
source_uri,
|
|
195
|
+
tokenize='porter unicode61'
|
|
196
|
+
);
|
|
197
|
+
|
|
198
|
+
INSERT OR IGNORE INTO schema_versions(version, applied_at)
|
|
199
|
+
VALUES (2, datetime('now'));
|
|
200
|
+
`;
|
|
201
|
+
|
|
187
202
|
export function openKnowledgeDb(path: string): Database {
|
|
188
203
|
ensureParentDir(path);
|
|
189
204
|
const db = new Database(path);
|
|
@@ -195,6 +210,7 @@ export function migrateKnowledgeDb(path: string): { path: string; schema_version
|
|
|
195
210
|
const db = openKnowledgeDb(path);
|
|
196
211
|
try {
|
|
197
212
|
db.exec(MIGRATION_1);
|
|
213
|
+
if (getSchemaVersion(db) < 2) db.exec(MIGRATION_2);
|
|
198
214
|
return { path, schema_version: getSchemaVersion(db) };
|
|
199
215
|
} finally {
|
|
200
216
|
db.close();
|
|
@@ -0,0 +1,423 @@
|
|
|
1
|
+
import { createHash } from 'node:crypto';
|
|
2
|
+
import { existsSync, readFileSync } from 'node:fs';
|
|
3
|
+
import { basename } from 'node:path';
|
|
4
|
+
import type { Database } from 'bun:sqlite';
|
|
5
|
+
import { migrateKnowledgeDb, openKnowledgeDb } from './knowledge-db';
|
|
6
|
+
import { parseSourceRef, type SourceRef } from './source-ref';
|
|
7
|
+
import type { KnowledgeConfig } from './workspace';
|
|
8
|
+
|
|
9
|
+
export interface ManifestIngestOptions {
|
|
10
|
+
dbPath: string;
|
|
11
|
+
input: string;
|
|
12
|
+
config?: KnowledgeConfig;
|
|
13
|
+
now?: Date;
|
|
14
|
+
maxChunkChars?: number;
|
|
15
|
+
chunkOverlapChars?: number;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export interface ManifestIngestResult {
|
|
19
|
+
path: string;
|
|
20
|
+
db_path: string;
|
|
21
|
+
items_seen: number;
|
|
22
|
+
sources_upserted: number;
|
|
23
|
+
revisions_upserted: number;
|
|
24
|
+
chunks_inserted: number;
|
|
25
|
+
chunks_deleted: number;
|
|
26
|
+
skipped: number;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
type ManifestObject = Record<string, unknown>;
|
|
30
|
+
|
|
31
|
+
interface NormalizedManifestItem {
|
|
32
|
+
raw: ManifestObject;
|
|
33
|
+
sourceRef: string;
|
|
34
|
+
sourceUri: string;
|
|
35
|
+
kind: SourceRef['kind'];
|
|
36
|
+
title: string | null;
|
|
37
|
+
revision: string;
|
|
38
|
+
hash: string | null;
|
|
39
|
+
extractedTextUri: string | null;
|
|
40
|
+
text: string | null;
|
|
41
|
+
metadata: ManifestObject;
|
|
42
|
+
acl: unknown;
|
|
43
|
+
status: string;
|
|
44
|
+
updatedAt: string;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function stableId(prefix: string, value: string): string {
|
|
48
|
+
return `${prefix}_${createHash('sha256').update(value).digest('hex').slice(0, 20)}`;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function asObject(value: unknown): ManifestObject | undefined {
|
|
52
|
+
return value && typeof value === 'object' && !Array.isArray(value) ? value as ManifestObject : undefined;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function asString(value: unknown): string | undefined {
|
|
56
|
+
return typeof value === 'string' && value.length > 0 ? value : undefined;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function asNumber(value: unknown): number | undefined {
|
|
60
|
+
return typeof value === 'number' && Number.isFinite(value) ? value : undefined;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function buildSourceRefFromItem(item: ManifestObject): string {
|
|
64
|
+
const explicit = asString(item.source_ref) ?? asString(item.source_uri) ?? asString(item.uri);
|
|
65
|
+
if (explicit) return explicit;
|
|
66
|
+
|
|
67
|
+
const fileId = asString(item.file_id);
|
|
68
|
+
if (fileId) {
|
|
69
|
+
const revision = asString(item.revision_id) ?? asString(item.revision);
|
|
70
|
+
const fileRef = `open-files://file/${encodeURIComponent(fileId)}`;
|
|
71
|
+
return revision ? `${fileRef}/revision/${encodeURIComponent(revision)}` : fileRef;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const sourceId = asString(item.source_id);
|
|
75
|
+
const path = asString(item.path);
|
|
76
|
+
if (sourceId && path) {
|
|
77
|
+
return `open-files://source/${encodeURIComponent(sourceId)}/path/${encodeURIComponent(path)}`;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
throw new Error('Manifest item is missing source_ref, file_id, or source_id/path.');
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
function baseSourceUri(sourceRef: string, parsed: SourceRef): string {
|
|
84
|
+
if (parsed.kind === 'open-files' && parsed.entity === 'file' && parsed.revision_id) {
|
|
85
|
+
return sourceRef.replace(/\/revision\/[^/]+$/, '');
|
|
86
|
+
}
|
|
87
|
+
return sourceRef;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function textFromItem(item: ManifestObject): string | null {
|
|
91
|
+
const direct =
|
|
92
|
+
asString(item.extracted_text) ??
|
|
93
|
+
asString(item.text) ??
|
|
94
|
+
asString(item.content_text) ??
|
|
95
|
+
asString(item.markdown);
|
|
96
|
+
if (direct !== undefined) return direct;
|
|
97
|
+
const content = item.content;
|
|
98
|
+
return typeof content === 'string' ? content : null;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
function extractedTextUriFromItem(item: ManifestObject): string | null {
|
|
102
|
+
const direct = asString(item.extracted_text_ref) ?? asString(item.extracted_text_uri) ?? asString(item.text_ref);
|
|
103
|
+
if (direct) return direct;
|
|
104
|
+
const content = asObject(item.content);
|
|
105
|
+
return asString(content?.extracted_text_ref) ?? asString(content?.extracted_text_uri) ?? null;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function titleFromItem(item: ManifestObject): string | null {
|
|
109
|
+
const path = asString(item.path);
|
|
110
|
+
return asString(item.title) ?? asString(item.name) ?? (path ? basename(path) : null);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
function hashFromItem(item: ManifestObject): string | null {
|
|
114
|
+
return asString(item.hash) ?? asString(item.checksum) ?? asString(item.sha256) ?? null;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
function revisionFromItem(item: ManifestObject, parsed: SourceRef, hash: string | null): string {
|
|
118
|
+
const revision =
|
|
119
|
+
asString(item.revision_id) ??
|
|
120
|
+
asString(item.revision) ??
|
|
121
|
+
asString(item.version_id) ??
|
|
122
|
+
(parsed.kind === 'open-files' ? parsed.revision_id : undefined) ??
|
|
123
|
+
hash ??
|
|
124
|
+
asString(item.updated_at);
|
|
125
|
+
return revision ?? 'current';
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
function metadataFromItem(item: ManifestObject, normalized: {
|
|
129
|
+
sourceRef: string;
|
|
130
|
+
sourceUri: string;
|
|
131
|
+
status: string;
|
|
132
|
+
}): ManifestObject {
|
|
133
|
+
const metadata: ManifestObject = {};
|
|
134
|
+
for (const [key, value] of Object.entries(item)) {
|
|
135
|
+
if (['text', 'content', 'content_text', 'extracted_text', 'markdown'].includes(key)) continue;
|
|
136
|
+
metadata[key] = value;
|
|
137
|
+
}
|
|
138
|
+
metadata.source_ref = normalized.sourceRef;
|
|
139
|
+
metadata.source_uri = normalized.sourceUri;
|
|
140
|
+
metadata.status = normalized.status;
|
|
141
|
+
return metadata;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
function normalizeManifestItem(item: ManifestObject, now: string): NormalizedManifestItem {
|
|
145
|
+
const sourceRef = buildSourceRefFromItem(item);
|
|
146
|
+
const parsed = parseSourceRef(sourceRef);
|
|
147
|
+
const sourceUri = baseSourceUri(sourceRef, parsed);
|
|
148
|
+
const hash = hashFromItem(item);
|
|
149
|
+
const status = asString(item.status) ?? 'active';
|
|
150
|
+
return {
|
|
151
|
+
raw: item,
|
|
152
|
+
sourceRef,
|
|
153
|
+
sourceUri,
|
|
154
|
+
kind: parsed.kind,
|
|
155
|
+
title: titleFromItem(item),
|
|
156
|
+
revision: revisionFromItem(item, parsed, hash),
|
|
157
|
+
hash,
|
|
158
|
+
extractedTextUri: extractedTextUriFromItem(item),
|
|
159
|
+
text: textFromItem(item),
|
|
160
|
+
metadata: metadataFromItem(item, { sourceRef, sourceUri, status }),
|
|
161
|
+
acl: item.permissions ?? item.acl ?? {},
|
|
162
|
+
status,
|
|
163
|
+
updatedAt: asString(item.updated_at) ?? now,
|
|
164
|
+
};
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
function parseManifestText(text: string): ManifestObject[] {
|
|
168
|
+
const trimmed = text.trim();
|
|
169
|
+
if (!trimmed) return [];
|
|
170
|
+
|
|
171
|
+
if (trimmed.startsWith('[')) {
|
|
172
|
+
const parsed = JSON.parse(trimmed);
|
|
173
|
+
if (!Array.isArray(parsed)) throw new Error('Manifest array parse failed.');
|
|
174
|
+
return parsed.map((entry) => {
|
|
175
|
+
const item = asObject(entry);
|
|
176
|
+
if (!item) throw new Error('Manifest array entries must be objects.');
|
|
177
|
+
return item;
|
|
178
|
+
});
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
if (trimmed.startsWith('{')) {
|
|
182
|
+
try {
|
|
183
|
+
const parsed = JSON.parse(trimmed);
|
|
184
|
+
const object = asObject(parsed);
|
|
185
|
+
if (!object) throw new Error('Manifest object parse failed.');
|
|
186
|
+
if (Array.isArray(object.items)) {
|
|
187
|
+
return object.items.map((entry) => {
|
|
188
|
+
const item = asObject(entry);
|
|
189
|
+
if (!item) throw new Error('Manifest items entries must be objects.');
|
|
190
|
+
return item;
|
|
191
|
+
});
|
|
192
|
+
}
|
|
193
|
+
if ('source_ref' in object || 'source_uri' in object || 'file_id' in object) return [object];
|
|
194
|
+
} catch (error) {
|
|
195
|
+
const lines = trimmed.split(/\r?\n/).filter((line) => line.trim().length > 0);
|
|
196
|
+
if (lines.length <= 1) throw error;
|
|
197
|
+
return lines.map((line) => {
|
|
198
|
+
const item = asObject(JSON.parse(line));
|
|
199
|
+
if (!item) throw new Error('Manifest JSONL entries must be objects.');
|
|
200
|
+
return item;
|
|
201
|
+
});
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
return trimmed.split(/\r?\n/).filter((line) => line.trim().length > 0).map((line) => {
|
|
206
|
+
const item = asObject(JSON.parse(line));
|
|
207
|
+
if (!item) throw new Error('Manifest JSONL entries must be objects.');
|
|
208
|
+
return item;
|
|
209
|
+
});
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
async function readS3Text(uri: string, config?: KnowledgeConfig): Promise<string> {
|
|
213
|
+
const parsed = new URL(uri);
|
|
214
|
+
const bucket = parsed.hostname;
|
|
215
|
+
const key = decodeURIComponent(parsed.pathname.replace(/^\/+/, ''));
|
|
216
|
+
if (!bucket || !key) throw new Error(`Invalid S3 manifest URI: ${uri}`);
|
|
217
|
+
const [{ S3Client, GetObjectCommand }, { fromIni }] = await Promise.all([
|
|
218
|
+
import('@aws-sdk/client-s3'),
|
|
219
|
+
import('@aws-sdk/credential-providers'),
|
|
220
|
+
]);
|
|
221
|
+
const s3Config = config?.storage.type === 's3' && config.storage.s3?.bucket === bucket ? config.storage.s3 : undefined;
|
|
222
|
+
const client = new S3Client({
|
|
223
|
+
region: s3Config?.region,
|
|
224
|
+
credentials: s3Config?.profile ? fromIni({ profile: s3Config.profile }) : undefined,
|
|
225
|
+
maxAttempts: s3Config?.max_attempts,
|
|
226
|
+
});
|
|
227
|
+
const response = await client.send(new GetObjectCommand({ Bucket: bucket, Key: key }));
|
|
228
|
+
if (!response.Body) return '';
|
|
229
|
+
return await response.Body.transformToString();
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
async function readManifestInput(input: string, config?: KnowledgeConfig): Promise<string> {
|
|
233
|
+
if (input.startsWith('s3://')) return readS3Text(input, config);
|
|
234
|
+
if (!existsSync(input)) throw new Error(`Manifest not found: ${input}`);
|
|
235
|
+
return readFileSync(input, 'utf8');
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
interface TextChunk {
|
|
239
|
+
ordinal: number;
|
|
240
|
+
text: string;
|
|
241
|
+
startOffset: number;
|
|
242
|
+
endOffset: number;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
function chunkText(text: string, maxChars: number, overlapChars: number): TextChunk[] {
|
|
246
|
+
const normalized = text.replace(/\r\n/g, '\n');
|
|
247
|
+
if (!normalized.trim()) return [];
|
|
248
|
+
const chunks: TextChunk[] = [];
|
|
249
|
+
let start = 0;
|
|
250
|
+
while (start < normalized.length) {
|
|
251
|
+
const hardEnd = Math.min(normalized.length, start + maxChars);
|
|
252
|
+
let end = hardEnd;
|
|
253
|
+
if (hardEnd < normalized.length) {
|
|
254
|
+
const paragraphBreak = normalized.lastIndexOf('\n\n', hardEnd);
|
|
255
|
+
const sentenceBreak = normalized.lastIndexOf('. ', hardEnd);
|
|
256
|
+
const candidate = Math.max(paragraphBreak, sentenceBreak);
|
|
257
|
+
if (candidate > start + Math.floor(maxChars * 0.5)) end = candidate + (candidate === paragraphBreak ? 2 : 1);
|
|
258
|
+
}
|
|
259
|
+
const chunk = normalized.slice(start, end).trim();
|
|
260
|
+
if (chunk) {
|
|
261
|
+
chunks.push({
|
|
262
|
+
ordinal: chunks.length,
|
|
263
|
+
text: chunk,
|
|
264
|
+
startOffset: start,
|
|
265
|
+
endOffset: end,
|
|
266
|
+
});
|
|
267
|
+
}
|
|
268
|
+
if (end >= normalized.length) break;
|
|
269
|
+
start = Math.max(0, end - overlapChars);
|
|
270
|
+
}
|
|
271
|
+
return chunks;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
function estimateTokenCount(text: string): number {
|
|
275
|
+
const words = text.trim().split(/\s+/).filter(Boolean).length;
|
|
276
|
+
return Math.max(1, Math.ceil(words * 1.25));
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
function deleteChunksForRevision(db: Database, sourceRevisionId: string): number {
|
|
280
|
+
const rows = db.query<{ id: string }, [string]>('SELECT id FROM chunks WHERE source_revision_id = ?').all(sourceRevisionId);
|
|
281
|
+
for (const row of rows) {
|
|
282
|
+
db.run('DELETE FROM chunks_fts WHERE chunk_id = ?', [row.id]);
|
|
283
|
+
}
|
|
284
|
+
db.run('DELETE FROM chunks WHERE source_revision_id = ?', [sourceRevisionId]);
|
|
285
|
+
return rows.length;
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
function upsertSource(db: Database, item: NormalizedManifestItem, now: string): string {
|
|
289
|
+
const sourceId = stableId('src', item.sourceUri);
|
|
290
|
+
db.run(
|
|
291
|
+
`INSERT INTO sources (id, uri, kind, title, metadata_json, acl_json, created_at, updated_at)
|
|
292
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
293
|
+
ON CONFLICT(uri) DO UPDATE SET
|
|
294
|
+
kind = excluded.kind,
|
|
295
|
+
title = excluded.title,
|
|
296
|
+
metadata_json = excluded.metadata_json,
|
|
297
|
+
acl_json = excluded.acl_json,
|
|
298
|
+
updated_at = excluded.updated_at`,
|
|
299
|
+
[
|
|
300
|
+
sourceId,
|
|
301
|
+
item.sourceUri,
|
|
302
|
+
item.kind,
|
|
303
|
+
item.title,
|
|
304
|
+
JSON.stringify(item.metadata),
|
|
305
|
+
JSON.stringify(item.acl ?? {}),
|
|
306
|
+
now,
|
|
307
|
+
item.updatedAt,
|
|
308
|
+
],
|
|
309
|
+
);
|
|
310
|
+
const row = db.query<{ id: string }, [string]>('SELECT id FROM sources WHERE uri = ?').get(item.sourceUri);
|
|
311
|
+
if (!row) throw new Error(`Failed to upsert source: ${item.sourceUri}`);
|
|
312
|
+
return row.id;
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
function upsertRevision(db: Database, sourceId: string, item: NormalizedManifestItem, now: string): string {
|
|
316
|
+
const revisionId = stableId('rev', `${sourceId}\u0000${item.revision}`);
|
|
317
|
+
db.run(
|
|
318
|
+
`INSERT INTO source_revisions (id, source_id, revision, hash, extracted_text_uri, metadata_json, created_at)
|
|
319
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
320
|
+
ON CONFLICT(source_id, revision) DO UPDATE SET
|
|
321
|
+
hash = excluded.hash,
|
|
322
|
+
extracted_text_uri = excluded.extracted_text_uri,
|
|
323
|
+
metadata_json = excluded.metadata_json`,
|
|
324
|
+
[
|
|
325
|
+
revisionId,
|
|
326
|
+
sourceId,
|
|
327
|
+
item.revision,
|
|
328
|
+
item.hash,
|
|
329
|
+
item.extractedTextUri,
|
|
330
|
+
JSON.stringify(item.metadata),
|
|
331
|
+
now,
|
|
332
|
+
],
|
|
333
|
+
);
|
|
334
|
+
const row = db.query<{ id: string }, [string, string]>(
|
|
335
|
+
'SELECT id FROM source_revisions WHERE source_id = ? AND revision = ?',
|
|
336
|
+
).get(sourceId, item.revision);
|
|
337
|
+
if (!row) throw new Error(`Failed to upsert source revision: ${item.sourceRef}`);
|
|
338
|
+
return row.id;
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
function insertChunks(db: Database, sourceRevisionId: string, item: NormalizedManifestItem, now: string, maxChars: number, overlapChars: number): number {
|
|
342
|
+
if (!item.text || item.status.toLowerCase() === 'deleted') return 0;
|
|
343
|
+
const chunks = chunkText(item.text, maxChars, overlapChars);
|
|
344
|
+
for (const chunk of chunks) {
|
|
345
|
+
const chunkId = stableId('chk', `${sourceRevisionId}\u0000${chunk.ordinal}\u0000${chunk.text}`);
|
|
346
|
+
const metadata = {
|
|
347
|
+
source_ref: item.sourceRef,
|
|
348
|
+
source_uri: item.sourceUri,
|
|
349
|
+
hash: item.hash,
|
|
350
|
+
status: item.status,
|
|
351
|
+
path: asString(item.raw.path) ?? null,
|
|
352
|
+
mime: asString(item.raw.mime) ?? asString(item.raw.content_type) ?? null,
|
|
353
|
+
size: asNumber(item.raw.size) ?? null,
|
|
354
|
+
};
|
|
355
|
+
db.run(
|
|
356
|
+
`INSERT INTO chunks (id, source_revision_id, kind, ordinal, text, token_count, start_offset, end_offset, metadata_json, created_at)
|
|
357
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
358
|
+
[
|
|
359
|
+
chunkId,
|
|
360
|
+
sourceRevisionId,
|
|
361
|
+
'source',
|
|
362
|
+
chunk.ordinal,
|
|
363
|
+
chunk.text,
|
|
364
|
+
estimateTokenCount(chunk.text),
|
|
365
|
+
chunk.startOffset,
|
|
366
|
+
chunk.endOffset,
|
|
367
|
+
JSON.stringify(metadata),
|
|
368
|
+
now,
|
|
369
|
+
],
|
|
370
|
+
);
|
|
371
|
+
db.run(
|
|
372
|
+
'INSERT INTO chunks_fts (chunk_id, text, title, source_uri) VALUES (?, ?, ?, ?)',
|
|
373
|
+
[chunkId, chunk.text, item.title ?? '', item.sourceUri],
|
|
374
|
+
);
|
|
375
|
+
}
|
|
376
|
+
return chunks.length;
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
export async function ingestOpenFilesManifest(options: ManifestIngestOptions): Promise<ManifestIngestResult> {
|
|
380
|
+
const now = (options.now ?? new Date()).toISOString();
|
|
381
|
+
const maxChunkChars = options.maxChunkChars ?? 4000;
|
|
382
|
+
const chunkOverlapChars = options.chunkOverlapChars ?? 200;
|
|
383
|
+
if (maxChunkChars < 500) throw new Error('maxChunkChars must be at least 500.');
|
|
384
|
+
if (chunkOverlapChars < 0 || chunkOverlapChars >= maxChunkChars) throw new Error('chunkOverlapChars must be less than maxChunkChars.');
|
|
385
|
+
|
|
386
|
+
migrateKnowledgeDb(options.dbPath);
|
|
387
|
+
const text = await readManifestInput(options.input, options.config);
|
|
388
|
+
const items = parseManifestText(text);
|
|
389
|
+
const db = openKnowledgeDb(options.dbPath);
|
|
390
|
+
try {
|
|
391
|
+
const result = db.transaction(() => {
|
|
392
|
+
const seenSources = new Set<string>();
|
|
393
|
+
const seenRevisions = new Set<string>();
|
|
394
|
+
let chunksInserted = 0;
|
|
395
|
+
let chunksDeleted = 0;
|
|
396
|
+
let skipped = 0;
|
|
397
|
+
for (const raw of items) {
|
|
398
|
+
const item = normalizeManifestItem(raw, now);
|
|
399
|
+
const sourceId = upsertSource(db, item, now);
|
|
400
|
+
const revisionId = upsertRevision(db, sourceId, item, now);
|
|
401
|
+
seenSources.add(sourceId);
|
|
402
|
+
seenRevisions.add(revisionId);
|
|
403
|
+
if (item.text || item.status.toLowerCase() === 'deleted') {
|
|
404
|
+
chunksDeleted += deleteChunksForRevision(db, revisionId);
|
|
405
|
+
}
|
|
406
|
+
chunksInserted += insertChunks(db, revisionId, item, now, maxChunkChars, chunkOverlapChars);
|
|
407
|
+
}
|
|
408
|
+
return {
|
|
409
|
+
path: options.input,
|
|
410
|
+
db_path: options.dbPath,
|
|
411
|
+
items_seen: items.length,
|
|
412
|
+
sources_upserted: seenSources.size,
|
|
413
|
+
revisions_upserted: seenRevisions.size,
|
|
414
|
+
chunks_inserted: chunksInserted,
|
|
415
|
+
chunks_deleted: chunksDeleted,
|
|
416
|
+
skipped,
|
|
417
|
+
};
|
|
418
|
+
})();
|
|
419
|
+
return result;
|
|
420
|
+
} finally {
|
|
421
|
+
db.close();
|
|
422
|
+
}
|
|
423
|
+
}
|