@hasna/knowledge 0.2.14 → 0.2.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -2
- package/bin/open-knowledge-mcp.js +432 -2
- package/bin/open-knowledge.js +80 -40
- package/docs/architecture/ai-native-knowledge-base.md +7 -5
- package/docs/architecture/hybrid-semantic-search.md +17 -11
- package/package.json +1 -1
- package/src/cli.ts +24 -4
- package/src/mcp.js +17 -0
- package/src/search.ts +510 -0
- package/src/service.ts +10 -0
- package/src/wiki-layout.ts +41 -1
package/bin/open-knowledge.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env bun
|
|
2
2
|
// @bun
|
|
3
|
-
var I=import.meta.require;import{readFileSync as
|
|
4
|
-
`);return t}function
|
|
3
|
+
var I=import.meta.require;import{readFileSync as se,writeFileSync as re,existsSync as ie,renameSync as Qt,unlinkSync as qe}from"fs";import{randomUUID as ze}from"crypto";import{existsSync as Ht,mkdirSync as ye,readFileSync as qt,writeFileSync as zt}from"fs";import{homedir as Xe}from"os";import{dirname as Gt,join as O,resolve as Jt}from"path";var Y=O(".hasna","apps","knowledge");function be(){return O(Xe(),".open-knowledge","db.json")}function Te(){return O(Xe(),".hasna","apps","knowledge")}function Yt(e=process.cwd()){return Jt(e,Y)}function J(e){return{home:e,configPath:O(e,"config.json"),jsonStorePath:O(e,"db.json"),knowledgeDbPath:O(e,"knowledge.db"),artifactsDir:O(e,"artifacts"),cacheDir:O(e,"cache"),exportsDir:O(e,"exports"),indexesDir:O(e,"indexes"),logsDir:O(e,"logs"),runsDir:O(e,"runs"),schemasDir:O(e,"schemas"),wikiDir:O(e,"wiki")}}function Vt(){return{version:1,mode:"local",storage:{type:"local",artifacts_root:"artifacts"},sources:{preferred_ref:"open-files",allowed_schemes:["open-files","s3","file","https","http"]},providers:{default_model:"openai:gpt-5.2",aliases:{fast:"openai:gpt-5-mini",reasoning:"anthropic:claude-opus-4-6",sonnet:"anthropic:claude-sonnet-4-6",deepseek:"deepseek:deepseek-chat","deepseek-reasoning":"deepseek:deepseek-reasoner"},openai:{api_key_env:"OPENAI_API_KEY",default_model:"gpt-5.2"},anthropic:{api_key_env:"ANTHROPIC_API_KEY",default_model:"claude-sonnet-4-6"},deepseek:{api_key_env:"DEEPSEEK_API_KEY",default_model:"deepseek-chat"}},embeddings:{default_model:"openai:text-embedding-3-small",dimensions:1536,batch_size:64,max_parallel_calls:4},safety:{network:{web_search_enabled:!1,s3_reads_enabled:!1,allowed_s3_buckets:[]},redaction:{enabled:!0},approvals:{generated_writes_require_approval:!0}}}}function $e(e){let t=J(e);ye(t.home,{recursive:!0});for(let n of[t.artifactsDir,t.cacheDir,t.exportsDir,t.indexesDir,t.logsDir,t.runsDir,t.schemasDir,t.wikiDir])ye(n,{recursive:!0});if(!Ht(t.configPath))zt(t.configPath,`${JSON.stringify(Vt(),null,2)}
|
|
4
|
+
`);return t}function Be(e,t=process.cwd()){if(e==="project"||e==="local")return J(Yt(t));return J(Te())}function ne(e){ye(Gt(e),{recursive:!0})}function He(e){let t=qt(e,"utf8");return JSON.parse(t)}function ve(){return J(Te()).jsonStorePath}function xe(e){if(!ie(e))if(ne(e),e===ve()&&ie(be()))re(e,se(be(),"utf8"));else re(e,JSON.stringify({items:[]},null,2))}function Zt(e){return`${e}.lock`}function en(e,t){let i=Date.now();while(Date.now()-i<5000){try{if(!ie(e)){re(e,JSON.stringify({owner:t,ts:Date.now()}));return}let d=JSON.parse(se(e,"utf8"));if(Date.now()-d.ts>1e4)qe(e)}catch{}let s=Date.now();while(Date.now()-s<50);}throw Error(`Could not acquire lock on ${e} after 5000ms`)}function tn(e,t){try{if(ie(e)){if(JSON.parse(se(e,"utf8")).owner===t)qe(e)}}catch{}}function L(e){xe(e);let t=se(e,"utf8"),n=JSON.parse(t);if(!n||!Array.isArray(n.items))return{items:[]};return n}function j(e,t){let n=`${e}.tmp.${ze()}`;re(n,JSON.stringify(t,null,2)),Qt(n,e)}function C(e,t){let n=ze(),r=Zt(e);en(r,n);try{return t()}finally{tn(r,n)}}function Se(){return`k_${Date.now().toString(36)}_${Math.random().toString(36).slice(2,8)}`}function Ge(e){return e.replace(/^k_/,"").slice(0,12)}import{Database as nn}from"bun:sqlite";var rn=`
|
|
5
5
|
PRAGMA journal_mode = WAL;
|
|
6
6
|
PRAGMA foreign_keys = ON;
|
|
7
7
|
|
|
@@ -168,7 +168,7 @@ CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
|
|
|
168
168
|
|
|
169
169
|
INSERT OR IGNORE INTO schema_versions(version, applied_at)
|
|
170
170
|
VALUES (1, datetime('now'));
|
|
171
|
-
`,
|
|
171
|
+
`,sn=`
|
|
172
172
|
DROP TABLE IF EXISTS chunks_fts;
|
|
173
173
|
|
|
174
174
|
CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
|
|
@@ -181,7 +181,7 @@ CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
|
|
|
181
181
|
|
|
182
182
|
INSERT OR IGNORE INTO schema_versions(version, applied_at)
|
|
183
183
|
VALUES (2, datetime('now'));
|
|
184
|
-
`,
|
|
184
|
+
`,on=`
|
|
185
185
|
CREATE TABLE IF NOT EXISTS audit_events (
|
|
186
186
|
id TEXT PRIMARY KEY,
|
|
187
187
|
event_type TEXT NOT NULL,
|
|
@@ -212,7 +212,7 @@ CREATE INDEX IF NOT EXISTS idx_approval_gates_status ON approval_gates(status);
|
|
|
212
212
|
|
|
213
213
|
INSERT OR IGNORE INTO schema_versions(version, applied_at)
|
|
214
214
|
VALUES (3, datetime('now'));
|
|
215
|
-
`,
|
|
215
|
+
`,an=`
|
|
216
216
|
CREATE TABLE IF NOT EXISTS vector_index_entries (
|
|
217
217
|
id TEXT PRIMARY KEY,
|
|
218
218
|
chunk_id TEXT NOT NULL REFERENCES chunks(id) ON DELETE CASCADE,
|
|
@@ -243,7 +243,7 @@ CREATE INDEX IF NOT EXISTS idx_vector_index_status ON vector_index_entries(statu
|
|
|
243
243
|
|
|
244
244
|
INSERT OR IGNORE INTO schema_versions(version, applied_at)
|
|
245
245
|
VALUES (4, datetime('now'));
|
|
246
|
-
`;function
|
|
246
|
+
`;function S(e){ne(e);let t=new nn(e);return t.exec("PRAGMA foreign_keys = ON;"),t.exec("PRAGMA busy_timeout = 5000;"),t}function w(e){let t=S(e);try{if(t.exec(rn),V(t)<2)t.exec(sn);if(V(t)<3)t.exec(on);if(V(t)<4)t.exec(an);return{path:e,schema_version:V(t)}}finally{t.close()}}function V(e){return e.query("SELECT MAX(version) AS version FROM schema_versions").get()?.version??0}function N(e,t){return e.query(`SELECT COUNT(*) AS n FROM ${t}`).get()?.n??0}function Je(e){let t=S(e);try{return{schema_version:V(t),sources:N(t,"sources"),source_revisions:N(t,"source_revisions"),chunks:N(t,"chunks"),wiki_pages:N(t,"wiki_pages"),citations:N(t,"citations"),indexes:N(t,"knowledge_indexes"),runs:N(t,"runs"),run_events:N(t,"run_events"),redaction_findings:N(t,"redaction_findings"),audit_events:N(t,"audit_events"),approval_gates:N(t,"approval_gates"),storage_objects:N(t,"storage_objects"),embeddings:N(t,"chunk_embeddings"),vector_entries:N(t,"vector_index_entries")}}finally{t.close()}}import{existsSync as cn,mkdirSync as Ye,readFileSync as un,writeFileSync as dn}from"fs";import{dirname as ln,join as we,relative as _n,sep as fn}from"path";function Q(e){let t=e.replace(/\\/g,"/").trim();if(!t||t.startsWith("/"))throw Error(`Invalid artifact key: ${e}`);let n=t.split("/").filter(Boolean);if(n.length===0||n.some((r)=>r==="."||r===".."))throw Error(`Invalid artifact key: ${e}`);return n.join("/")}function Re(e,t){let n=_n(e,t);if(n.startsWith("..")||n===".."||n.startsWith(`..${fn}`))throw Error(`Artifact path escapes root: ${t}`)}class Ve{root;type="local";canRead=!0;canWrite=!0;constructor(e){this.root=e;Ye(e,{recursive:!0})}async put(e){let t=Q(e.key),n=we(this.root,t);return Re(this.root,n),Ye(ln(n),{recursive:!0}),dn(n,e.body),{key:t,uri:`file://${n}`}}async getText(e){let t=Q(e),n=we(this.root,t);return Re(this.root,n),un(n,"utf8")}async exists(e){let t=Q(e),n=we(this.root,t);return Re(this.root,n),cn(n)}}class Qe{options;type="s3";canRead=!0;canWrite=!0;client;constructor(e){this.options=e;this.client=e.client}async getClient(){if(this.client)return this.client;let[{S3Client:e},{fromIni:t}]=await Promise.all([import("@aws-sdk/client-s3"),import("@aws-sdk/credential-providers")]);return this.client=new e({region:this.options.region,credentials:this.options.profile?t({profile:this.options.profile}):void 0,maxAttempts:this.options.max_attempts}),this.client}objectKey(e){let t=Q(e),n=this.options.prefix?Q(this.options.prefix):"";return n?`${n}/${t}`:t}async put(e){let[{PutObjectCommand:t},n]=await Promise.all([import("@aws-sdk/client-s3"),this.getClient()]),r=this.objectKey(e.key);return await n.send(new t({Bucket:this.options.bucket,Key:r,Body:e.body,ContentType:e.content_type,Metadata:e.metadata,ServerSideEncryption:this.options.server_side_encryption,SSEKMSKeyId:this.options.kms_key_id})),{key:r,uri:`s3://${this.options.bucket}/${r}`}}async getText(e){let[{GetObjectCommand:t},n]=await Promise.all([import("@aws-sdk/client-s3"),this.getClient()]),r=this.objectKey(e),i=await n.send(new t({Bucket:this.options.bucket,Key:r}));if(!i.Body)return"";return await i.Body.transformToString()}async exists(e){let[{HeadObjectCommand:t},n]=await Promise.all([import("@aws-sdk/client-s3"),this.getClient()]),r=this.objectKey(e);try{return await n.send(new t({Bucket:this.options.bucket,Key:r})),!0}catch(i){let s=i instanceof Error?i.name:"";if(s==="NotFound"||s==="NoSuchKey"||s==="NotFoundError")return!1;throw i}}}function Ze(e,t){if(e.storage.type==="s3"){if(!e.storage.s3?.bucket)throw Error("S3 artifact storage requires storage.s3.bucket");return new Qe({bucket:e.storage.s3.bucket,prefix:e.storage.s3.prefix,region:e.storage.s3.region,profile:e.storage.s3.profile,max_attempts:e.storage.s3.max_attempts,server_side_encryption:e.storage.s3.server_side_encryption,kms_key_id:e.storage.s3.kms_key_id})}return new Ve(t.artifactsDir)}import{createHash as ct}from"crypto";var et={openai:{api_key_env:"OPENAI_API_KEY",default_model:"gpt-5.2"},anthropic:{api_key_env:"ANTHROPIC_API_KEY",default_model:"claude-sonnet-4-6"},deepseek:{api_key_env:"DEEPSEEK_API_KEY",default_model:"deepseek-chat"}},gn={openai:{text_generation:!0,structured_output:!0,tool_usage:!0,tool_streaming:!0,image_input:!0,native_web_search:!0,reasoning:!0,embeddings:!0},anthropic:{text_generation:!0,structured_output:!0,tool_usage:!0,tool_streaming:!0,image_input:!0,native_web_search:!1,reasoning:!0,embeddings:!1},deepseek:{text_generation:!0,structured_output:!0,tool_usage:!0,tool_streaming:!0,image_input:!1,native_web_search:!1,reasoning:!0,embeddings:!1}},pn={default:"openai:gpt-5.2",fast:"openai:gpt-5-mini",reasoning:"anthropic:claude-opus-4-6",sonnet:"anthropic:claude-sonnet-4-6",deepseek:"deepseek:deepseek-chat","deepseek-reasoning":"deepseek:deepseek-reasoner"};function tt(e){return e.providers??{}}function Oe(e,t){let n=tt(e)[t]??{};return{...et[t],...n}}function nt(e){let t=tt(e);return{...pn,...t.default_model?{default:t.default_model}:{},...t.aliases??{}}}function W(e){let[t,...n]=e.split(":"),r=n.join(":");if(t!=="openai"&&t!=="anthropic"&&t!=="deepseek")throw Error(`Unsupported AI provider: ${t}`);if(!r)throw Error(`Invalid model ref: ${e}. Expected provider:model.`);return{provider:t,model:r}}function Ne(e,t){return nt(t)[e]??e}function Ae(e){let t=nt(e);return Object.entries(t).map(([n,r])=>{let i=W(r);return{alias:n,model_ref:r,provider:i.provider,model:i.model,default:n==="default",capabilities:gn[i.provider]}})}function rt(e,t=process.env){return Object.keys(et).map((n)=>{let r=Oe(e,n),i=Boolean(t[r.api_key_env]);return{provider:n,api_key_env:r.api_key_env,configured:i,source:i?"env":"missing",base_url:r.base_url??null,default_model:r.default_model}})}function it(e,t=process.env){return{default_model:Ne("default",e),providers:rt(e,t),models:Ae(e)}}function oe(e,t,n=process.env){let r=rt(t,n).find((i)=>i.provider===e);if(!r)throw Error(`Unsupported AI provider: ${e}`);if(!r.configured)throw Error(`Missing ${r.api_key_env} for ${e}. Set the env var to use this provider.`);return r}function hn(e){return["deleted","stale","invalidated","reindex_required"].includes((e??"").toLowerCase())}function K(e){let t=e.status??null;return{source_owner:"open-files",source_ref:e.source_ref??null,source_uri:e.source_uri??null,source_kind:e.source_kind??null,source_revision_id:e.source_revision_id??null,revision:e.revision??null,hash:e.hash??null,chunk_id:e.chunk_id??null,start_offset:e.start_offset??null,end_offset:e.end_offset??null,status:t,read_only:!0,citation_required:!0,resolver:e.resolver??null,stale:hn(t)}}function Ie(e){return{source_owner:"open-files",generated_from:e.generated_from,artifact_key:e.artifact_key,source_refs:e.source_refs??[],read_only_sources:!0,citation_required:e.citation_required??!0,raw_source_bytes_stored_in_open_knowledge:!1}}function st(e,t){return{...e,provenance:t}}var mn="openai:text-embedding-3-small",ut=1536;function ae(e){return e?.embeddings??{}}function ot(e,t){return`${e}_${ct("sha256").update(t).digest("hex").slice(0,20)}`}function Ce(e){if(!e)return{};try{let t=JSON.parse(e);return t&&typeof t==="object"&&!Array.isArray(t)?t:{}}catch{return{}}}function U(e,t){for(let n of t){let r=e[n];if(typeof r==="string"&&r.length>0)return r}return null}function at(e,t){for(let n of t){let r=e[n];if(typeof r==="number"&&Number.isFinite(r))return r}return null}function Le(e){return Math.sqrt(e.reduce((t,n)=>t+n*n,0))}function En(e,t,n=Le(t)){let r=Le(e);if(r===0||n===0)return 0;let i=Math.min(e.length,t.length),s=0;for(let d=0;d<i;d+=1)s+=e[d]*t[d];return s/(r*n)}function kn(e,t){let n=ct("sha256").update(e).digest();return Array.from({length:t},(r,i)=>{let s=n[i%n.length]/255;return Number((s*2-1).toFixed(6))})}async function yn(e,t,n=process.env){oe("openai",t,n);let r=Oe(t,"openai"),{createOpenAI:i}=await import("@ai-sdk/openai"),s=i({apiKey:n[r.api_key_env],baseURL:r.base_url});if(s.embeddingModel)return s.embeddingModel(e);if(s.textEmbedding)return s.textEmbedding(e);if(s.textEmbeddingModel)return s.textEmbeddingModel(e);throw Error("OpenAI provider does not expose an embedding model factory.")}function De(e,t){if(!e||e==="default"||e==="embedding")return ae(t).default_model??mn;return e}async function dt(e,t={}){let n=De(t.modelRef,t.config),r=W(n);if(r.provider!=="openai")throw Error(`Embedding provider ${r.provider} is not supported yet. Use openai:text-embedding-3-small.`);let i=t.dimensions??ae(t.config).dimensions??ut;if(t.fake)return{provider:r.provider,model:r.model,dimensions:i,vectors:e.map((o)=>kn(o,i)),usage:{input_tokens:e.reduce((o,c)=>o+Math.max(1,Math.ceil(c.split(/\s+/).filter(Boolean).length*1.25)),0)}};let{embedMany:s}=await import("ai"),d=await yn(r.model,t.config,t.env),l=await s({model:d,values:e,maxParallelCalls:t.maxParallelCalls??ae(t.config).max_parallel_calls,providerOptions:{openai:{dimensions:i}}}),a=l.embeddings;return{provider:r.provider,model:r.model,dimensions:a[0]?.length??i,vectors:a,usage:{input_tokens:l.usage?.tokens??0}}}function bn(e,t){if(t.sourceRevisionId)return e.query(`SELECT
|
|
247
247
|
c.id,
|
|
248
248
|
c.text,
|
|
249
249
|
c.token_count,
|
|
@@ -281,14 +281,14 @@ VALUES (4, datetime('now'));
|
|
|
281
281
|
ON v.chunk_id = c.id AND v.provider = ? AND v.model = ?
|
|
282
282
|
WHERE v.id IS NULL
|
|
283
283
|
ORDER BY c.created_at ASC, c.ordinal ASC
|
|
284
|
-
LIMIT ?`).all(t.provider,t.model,t.limit)}function
|
|
284
|
+
LIMIT ?`).all(t.provider,t.model,t.limit)}function Tn(e){let t=Ce(e.metadata_json),n=t.provenance;if(n&&typeof n==="object"&&!Array.isArray(n))return n;return K({source_ref:U(t,["source_ref"]),source_uri:e.source_uri??U(t,["source_uri"]),source_kind:e.source_kind??U(t,["source_kind"]),source_revision_id:e.source_revision_id,revision:e.revision??U(t,["revision"]),hash:e.hash??U(t,["hash"]),chunk_id:e.id,start_offset:e.start_offset??at(t,["start_offset"]),end_offset:e.end_offset??at(t,["end_offset"]),status:U(t,["status"]),resolver:"open-files-read-only"})}function vn(e,t,n,r){let i=e.prepare(`
|
|
285
285
|
INSERT INTO chunk_embeddings (id, chunk_id, provider, model, dimensions, vector_json, created_at)
|
|
286
286
|
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
287
287
|
ON CONFLICT(chunk_id, provider, model) DO UPDATE SET
|
|
288
288
|
dimensions = excluded.dimensions,
|
|
289
289
|
vector_json = excluded.vector_json,
|
|
290
290
|
created_at = excluded.created_at
|
|
291
|
-
`),
|
|
291
|
+
`),s=e.prepare(`
|
|
292
292
|
INSERT INTO vector_index_entries (
|
|
293
293
|
id, chunk_id, source_revision_id, provider, model, dimensions, vector_json, vector_norm,
|
|
294
294
|
source_uri, source_ref, revision, hash, start_offset, end_offset, token_count, status,
|
|
@@ -310,10 +310,10 @@ VALUES (4, datetime('now'));
|
|
|
310
310
|
status = excluded.status,
|
|
311
311
|
metadata_json = excluded.metadata_json,
|
|
312
312
|
updated_at = excluded.updated_at
|
|
313
|
-
`);return e.transaction(()=>{for(let
|
|
313
|
+
`);return e.transaction(()=>{for(let l=0;l<t.length;l+=1){let a=t[l],o=n.vectors[l];if(!o)continue;let c=Ce(a.metadata_json),u=Tn(a),_=u.source_ref??U(c,["source_ref"]),f=u.source_uri??a.source_uri??U(c,["source_uri"]),m=u.revision??a.revision??U(c,["revision"]),y=u.hash??a.hash??U(c,["hash"]),k=u.status??U(c,["status"])??"active",g=JSON.stringify(o);i.run(ot("emb",`${a.id}\x00${n.provider}\x00${n.model}`),a.id,n.provider,n.model,n.dimensions,g,r),s.run(ot("vec",`${a.id}\x00${n.provider}\x00${n.model}`),a.id,a.source_revision_id,n.provider,n.model,n.dimensions,g,Le(o),f,_,m,y,u.start_offset,u.end_offset,a.token_count,k,JSON.stringify({...c,provenance:u,embedded_at:r}),r,r)}})(),t.length}async function lt(e){let t=De(e.modelRef,e.config),n=W(t);if(n.provider!=="openai")throw Error(`Embedding provider ${n.provider} is not supported yet.`);let r=(e.now??new Date).toISOString(),i=Math.max(1,Math.min(e.limit??100,1000));w(e.dbPath);let s=S(e.dbPath),d;try{d=bn(s,{provider:n.provider,model:n.model,limit:i,sourceRevisionId:e.sourceRevisionId})}finally{s.close()}if(d.length===0)return{provider:n.provider,model:n.model,dimensions:e.dimensions??ae(e.config).dimensions??ut,chunks_seen:0,chunks_embedded:0,embeddings_upserted:0,vector_entries_upserted:0,usage:{input_tokens:0}};let l=await dt(d.map((o)=>o.text),e),a=S(e.dbPath);try{let o=vn(a,d,l,r);return{provider:l.provider,model:l.model,dimensions:l.dimensions,chunks_seen:d.length,chunks_embedded:d.length,embeddings_upserted:o,vector_entries_upserted:o,usage:l.usage}}finally{a.close()}}function _t(e){w(e);let t=S(e);try{let n=t.query("SELECT COUNT(*) AS n FROM chunk_embeddings").get()?.n??0,r=t.query("SELECT COUNT(*) AS n FROM vector_index_entries").get()?.n??0,i=t.query(`SELECT provider, model, dimensions, COUNT(*) AS entries, MAX(updated_at) AS updated_at
|
|
314
314
|
FROM vector_index_entries
|
|
315
315
|
GROUP BY provider, model, dimensions
|
|
316
|
-
ORDER BY provider, model`).all();return{total_embeddings:
|
|
316
|
+
ORDER BY provider, model`).all();return{total_embeddings:n,total_vector_entries:r,indexes:i}}finally{t.close()}}async function ce(e){let t=De(e.modelRef,e.config),n=W(t),r=Math.max(1,Math.min(e.limit??10,100)),i=await dt([e.query],e),s=i.vectors[0]??[];w(e.dbPath);let d=S(e.dbPath);try{let a=d.query(`SELECT
|
|
317
317
|
v.chunk_id,
|
|
318
318
|
c.text,
|
|
319
319
|
v.vector_json,
|
|
@@ -325,59 +325,96 @@ VALUES (4, datetime('now'));
|
|
|
325
325
|
v.metadata_json
|
|
326
326
|
FROM vector_index_entries v
|
|
327
327
|
JOIN chunks c ON c.id = v.chunk_id
|
|
328
|
-
WHERE v.provider = ? AND v.model = ? AND v.status = 'active'`).all(
|
|
329
|
-
VALUES (?, ?, ?, ?, ?, ?, ?)`,[
|
|
330
|
-
VALUES (?, ?, ?, ?, ?, ?, ?)`,[`redact_${
|
|
331
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,[
|
|
328
|
+
WHERE v.provider = ? AND v.model = ? AND v.status = 'active'`).all(n.provider,n.model).map((o)=>{let c=JSON.parse(o.vector_json),u=Ce(o.metadata_json),_=u.provenance&&typeof u.provenance==="object"&&!Array.isArray(u.provenance)?u.provenance:null;return{chunk_id:o.chunk_id,score:En(s,c,o.vector_norm),text:o.text,source_uri:o.source_uri,source_ref:o.source_ref,revision:o.revision,hash:o.hash,provenance:_}}).sort((o,c)=>c.score-o.score).slice(0,r);return{provider:n.provider,model:n.model,dimensions:i.dimensions,query:e.query,results:a}}finally{d.close()}}import{createHash as Pn,randomUUID as Un}from"crypto";import{existsSync as jn,readFileSync as Mn}from"fs";import{basename as Kn}from"path";function ft(e,t){if(!e)throw Error(t);return e}function xn(e){let n=e.slice(13).split("/").filter(Boolean),r=n[0];if(r!=="file"&&r!=="source")throw Error("Invalid open-files ref. Expected open-files://file/<id>, open-files://file/<id>/revision/<revision_id>, or open-files://source/<id>/path/<path>.");let i=ft(n[1],"Invalid open-files ref. Missing id.");if(r==="file"){if(n.length===2)return{kind:"open-files",uri:e,entity:r,id:i};if(n[2]==="revision"&&n[3]&&n.length===4)return{kind:"open-files",uri:e,entity:r,id:i,revision_id:decodeURIComponent(n[3])};throw Error("Invalid open-files file ref. Expected open-files://file/<id>/revision/<revision_id>.")}let s=n.indexOf("path"),d=s>=0?decodeURIComponent(n.slice(s+1).join("/")):void 0;return{kind:"open-files",uri:e,entity:r,id:i,path:d}}function Sn(e){let t=new URL(e),n=ft(t.hostname,"Invalid s3 ref. Missing bucket."),r=decodeURIComponent(t.pathname.replace(/^\/+/,""));if(!r)throw Error("Invalid s3 ref. Missing object key.");return{kind:"s3",uri:e,bucket:n,key:r}}function wn(e){let t=new URL(e);return{kind:"file",uri:e,path:decodeURIComponent(t.pathname)}}function Rn(e){let t=new URL(e);return{kind:"web",uri:e,url:t.toString()}}function P(e){if(e.startsWith("open-files://"))return xn(e);if(e.startsWith("s3://"))return Sn(e);if(e.startsWith("file://"))return wn(e);if(e.startsWith("https://")||e.startsWith("http://"))return Rn(e);throw Error(`Unsupported source ref scheme: ${e}`)}function gt(e,t=P(e)){if(t.kind==="open-files"&&t.entity==="file"&&t.revision_id)return e.replace(/\/revision\/[^/]+$/,"");return e}function pt(e){let t=P(e);return t.kind==="open-files"&&t.entity==="file"?t.revision_id??null:null}import{createHash as On,randomUUID as Pe}from"crypto";import{relative as Nn,resolve as mt,sep as An}from"path";function ht(e){let t=process.env[e];return t==="1"||t==="true"||t==="yes"}function Et(e,t){let n=e,r=new Set(n.safety?.network?.allowed_s3_buckets??[]);if(e.storage.type==="s3"&&e.storage.s3?.bucket)r.add(e.storage.s3.bucket);if(process.env.HASNA_KNOWLEDGE_ALLOWED_S3_BUCKETS)for(let i of process.env.HASNA_KNOWLEDGE_ALLOWED_S3_BUCKETS.split(",").map((s)=>s.trim()).filter(Boolean))r.add(i);return{mode:e.mode,allowWriteRoots:[t.home,t.artifactsDir,t.cacheDir,t.exportsDir,t.indexesDir,t.logsDir,t.runsDir,t.schemasDir,t.wikiDir].map((i)=>mt(i)),readOnlySourceAccess:!0,network:{webSearchEnabled:n.safety?.network?.web_search_enabled??ht("HASNA_KNOWLEDGE_WEB_SEARCH"),s3ReadsEnabled:n.safety?.network?.s3_reads_enabled??ht("HASNA_KNOWLEDGE_ALLOW_S3_READS"),allowedS3Buckets:[...r].sort()},redaction:{enabled:n.safety?.redaction?.enabled??!0},approvals:{generatedWritesRequireApproval:n.safety?.approvals?.generated_writes_require_approval??!0}}}function In(e,t){let n=Nn(e,t);return n===""||!n.startsWith("..")&&n!==".."&&!n.startsWith(`..${An}`)}function X(e,t){let n=mt(e);if(!t.allowWriteRoots.some((r)=>In(r,n)))throw Error(`Safety policy denied write outside .hasna/apps/knowledge: ${e}`)}function F(e,t){let r=new URL(e).hostname;if(!t.network.s3ReadsEnabled)throw Error("Safety policy denied S3 read. Set safety.network.s3_reads_enabled=true or HASNA_KNOWLEDGE_ALLOW_S3_READS=1.");if(!t.network.allowedS3Buckets.includes(r))throw Error(`Safety policy denied S3 bucket "${r}". Add it to safety.network.allowed_s3_buckets or HASNA_KNOWLEDGE_ALLOWED_S3_BUCKETS.`)}function ue(e){if(!e.network.webSearchEnabled)throw Error("Safety policy denied web search. Set safety.network.web_search_enabled=true or HASNA_KNOWLEDGE_WEB_SEARCH=1.")}var Ln=[{type:"private_key_block",severity:"high",regex:/-----BEGIN [A-Z ]*PRIVATE KEY-----[\s\S]*?-----END [A-Z ]*PRIVATE KEY-----/g,replacement:"[REDACTED:private_key_block]"},{type:"secret_assignment",severity:"high",regex:/\b(?:api[_-]?key|secret|token|password)\s*[:=]\s*['"]?[^'"\s]{8,}/gi,replacement:"[REDACTED:secret_assignment]"},{type:"openai_api_key",severity:"high",regex:/\bsk-[A-Za-z0-9_-]{20,}\b/g,replacement:"[REDACTED:openai_api_key]"},{type:"anthropic_api_key",severity:"high",regex:/\bsk-ant-[A-Za-z0-9_-]{20,}\b/g,replacement:"[REDACTED:anthropic_api_key]"},{type:"aws_access_key_id",severity:"high",regex:/\bA(?:KIA|SIA)[A-Z0-9]{16}\b/g,replacement:"[REDACTED:aws_access_key_id]"}];function de(e,t){if(t&&!t.redaction.enabled)return{text:e,findings:[]};let n=e,r=[];for(let i of Ln)n=n.replace(i.regex,(s,...d)=>{let l=typeof d.at(-2)==="number"?d.at(-2):n.indexOf(s);return r.push({type:i.type,severity:i.severity,start:Math.max(0,l),end:Math.max(0,l+s.length)}),i.replacement});return{text:n,findings:r}}function Cn(e){return`audit_${On("sha256").update(`${e.event_type}\x00${e.action}\x00${e.target_uri??""}\x00${e.created_at??""}\x00${JSON.stringify(e.metadata??{})}\x00${Pe()}`).digest("hex").slice(0,24)}`}function R(e,t){let n=t.created_at??new Date().toISOString(),r=Cn({...t,created_at:n});return e.run(`INSERT INTO audit_events (id, event_type, action, target_uri, decision, metadata_json, created_at)
|
|
329
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)`,[r,t.event_type,t.action,t.target_uri??null,t.decision,JSON.stringify(t.metadata??{}),n]),r}function le(e,t){let n=t.created_at??new Date().toISOString();for(let r of t.findings)e.run(`INSERT INTO redaction_findings (id, source_uri, run_id, severity, finding_type, metadata_json, created_at)
|
|
330
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)`,[`redact_${Pe()}`,t.source_uri??null,t.run_id??null,r.severity,r.type,JSON.stringify({...t.metadata??{},start:r.start,end:r.end}),n]);return t.findings.length}function kt(e,t){let n=t.created_at??new Date().toISOString(),r=`approval_${Pe()}`;return e.run(`INSERT INTO approval_gates (id, action, target_uri, status, reason, approved_by, metadata_json, created_at, updated_at)
|
|
331
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,[r,t.action,t.target_uri??null,"approved",t.reason??null,t.approved_by??"local-cli",JSON.stringify(t.metadata??{}),n,n]),{id:r,status:"approved"}}function Dn(e,t,n){let r=e.query(`SELECT id FROM approval_gates
|
|
332
332
|
WHERE action = ? AND status = 'approved' AND (target_uri IS NULL OR target_uri = ? OR ? IS NULL)
|
|
333
|
-
ORDER BY updated_at DESC LIMIT 1`).get(t,
|
|
333
|
+
ORDER BY updated_at DESC LIMIT 1`).get(t,n??null,n??null);return Boolean(r)}function yt(e,t,n,r){let i=n==="generated_write"&&t.approvals.generatedWritesRequireApproval,s=!i||Dn(e,n,r);return{action:n,target_uri:r??null,approval_required:i,approved:s,decision:s?"allow":"requires_approval"}}function _e(e,t){return`${e}_${Pn("sha256").update(t).digest("hex").slice(0,20)}`}function H(e){return e&&typeof e==="object"&&!Array.isArray(e)?e:void 0}function v(e){return typeof e==="string"&&e.length>0?e:void 0}function Fn(e){let t=v(e.source_ref)??v(e.source_uri)??v(e.uri);if(t)return t;let n=v(e.file_id);if(n){let s=v(e.revision_id)??v(e.revision),d=`open-files://file/${encodeURIComponent(n)}`;return s?`${d}/revision/${encodeURIComponent(s)}`:d}let r=v(e.source_id),i=v(e.path);if(r&&i)return`open-files://source/${encodeURIComponent(r)}/path/${encodeURIComponent(i)}`;throw Error("Outbox event is missing source_ref, file_id, or source_id/path.")}function Wn(e,t){if(t.kind==="open-files"&&t.entity==="file"&&t.revision_id)return e.replace(/\/revision\/[^/]+$/,"");return e}function Xn(e){return v(e.hash)??v(e.checksum)??v(e.sha256)??null}function $n(e,t,n){return v(e.revision_id)??v(e.revision)??v(e.version_id)??(t.kind==="open-files"?t.revision_id:void 0)??n??null}function Bn(e){return(v(e.event)??v(e.type)??v(e.action)??v(e.change_type)??"changed").toLowerCase()}function Hn(e){let t=v(e.path);return v(e.title)??v(e.name)??(t?Kn(t):null)}function qn(e,t){let n=Fn(e),r=P(n),i=Xn(e);return{raw:e,eventType:Bn(e),sourceRef:n,sourceUri:Wn(n,r),kind:r.kind,title:Hn(e),revision:$n(e,r,i),hash:i,status:v(e.status)?.toLowerCase()??null,updatedAt:v(e.updated_at)??t,acl:e.permissions??e.acl??void 0}}function zn(e){let t=e.trim();if(!t)return[];if(t.startsWith("[")){let n=JSON.parse(t);if(!Array.isArray(n))throw Error("Outbox array parse failed.");return n.map((r)=>{let i=H(r);if(!i)throw Error("Outbox array entries must be objects.");return i})}if(t.startsWith("{"))try{let n=JSON.parse(t),r=H(n);if(!r)throw Error("Outbox object parse failed.");if(Array.isArray(r.events))return r.events.map((i)=>{let s=H(i);if(!s)throw Error("Outbox events entries must be objects.");return s});if("source_ref"in r||"source_uri"in r||"file_id"in r)return[r]}catch(n){let r=t.split(/\r?\n/).filter((i)=>i.trim().length>0);if(r.length<=1)throw n;return r.map((i)=>{let s=H(JSON.parse(i));if(!s)throw Error("Outbox JSONL entries must be objects.");return s})}return t.split(/\r?\n/).filter((n)=>n.trim().length>0).map((n)=>{let r=H(JSON.parse(n));if(!r)throw Error("Outbox JSONL entries must be objects.");return r})}async function Gn(e,t,n){let r=new URL(e),i=r.hostname,s=decodeURIComponent(r.pathname.replace(/^\/+/,""));if(!i||!s)throw Error(`Invalid S3 outbox URI: ${e}`);if(n)F(e,n);let[{S3Client:d,GetObjectCommand:l},{fromIni:a}]=await Promise.all([import("@aws-sdk/client-s3"),import("@aws-sdk/credential-providers")]),o=t?.storage.type==="s3"&&t.storage.s3?.bucket===i?t.storage.s3:void 0,u=await new d({region:o?.region,credentials:o?.profile?a({profile:o.profile}):void 0,maxAttempts:o?.max_attempts}).send(new l({Bucket:i,Key:s}));if(!u.Body)return"";return await u.Body.transformToString()}async function Jn(e,t,n){if(e.startsWith("s3://"))return Gn(e,t,n);if(!jn(e))throw Error(`Outbox not found: ${e}`);return Mn(e,"utf8")}function bt(e,t){let n={};if(e)try{n=H(JSON.parse(e))??{}}catch{n={}}return JSON.stringify({...n,...t})}function Yn(e,t,n){let r=_e("src",t.sourceUri);e.run(`INSERT INTO sources (id, uri, kind, title, metadata_json, acl_json, created_at, updated_at)
|
|
334
334
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
335
335
|
ON CONFLICT(uri) DO UPDATE SET
|
|
336
336
|
kind = excluded.kind,
|
|
337
337
|
title = COALESCE(excluded.title, sources.title),
|
|
338
|
-
updated_at = excluded.updated_at`,[
|
|
338
|
+
updated_at = excluded.updated_at`,[r,t.sourceUri,t.kind,t.title,JSON.stringify({source_ref:t.sourceRef,source_uri:t.sourceUri,status:t.status,last_outbox_event:t.eventType}),JSON.stringify(t.acl??{}),n,t.updatedAt]);let i=e.query("SELECT id, metadata_json, acl_json FROM sources WHERE uri = ?").get(t.sourceUri);if(!i)throw Error(`Failed to upsert source for outbox event: ${t.sourceUri}`);let s={source_ref:t.sourceRef,source_uri:t.sourceUri,last_outbox_event:t.eventType,last_outbox_at:t.updatedAt};if(t.status)s.status=t.status;if(v(t.raw.path))s.path=t.raw.path;return e.run("UPDATE sources SET metadata_json = ?, acl_json = CASE WHEN ? IS NULL THEN acl_json ELSE ? END, updated_at = ? WHERE id = ?",[bt(i.metadata_json,s),t.acl===void 0?null:JSON.stringify(t.acl),t.acl===void 0?null:JSON.stringify(t.acl),t.updatedAt,i.id]),i.id}function Vn(e,t,n,r){if(!n.revision)return null;let i=_e("rev",`${t}\x00${n.revision}`),s={source_ref:n.sourceRef,source_uri:n.sourceUri,status:n.status,last_outbox_event:n.eventType,reindex_required:!0};return e.run(`INSERT INTO source_revisions (id, source_id, revision, hash, extracted_text_uri, metadata_json, created_at)
|
|
339
339
|
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
340
340
|
ON CONFLICT(source_id, revision) DO UPDATE SET
|
|
341
341
|
hash = COALESCE(excluded.hash, source_revisions.hash),
|
|
342
|
-
metadata_json = excluded.metadata_json`,[i,t,
|
|
343
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,[
|
|
344
|
-
VALUES (?, ?, ?, ?, ?, ?)`,[
|
|
345
|
-
VALUES (?, ?, ?, ?, 0, 0, 0, ?, ?)`,[
|
|
346
|
-
`);if(!
|
|
342
|
+
metadata_json = excluded.metadata_json`,[i,t,n.revision,n.hash,v(n.raw.extracted_text_ref)??null,JSON.stringify(s),r]),e.query("SELECT id FROM source_revisions WHERE source_id = ? AND revision = ?").get(t,n.revision)?.id??null}function Qn(e,t,n){if(n.revision)return e.query("SELECT id FROM source_revisions WHERE source_id = ? AND revision = ?").all(t,n.revision).map((r)=>r.id);if(n.hash)return e.query("SELECT id FROM source_revisions WHERE source_id = ? AND hash = ?").all(t,n.hash).map((r)=>r.id);return e.query("SELECT id FROM source_revisions WHERE source_id = ?").all(t).map((r)=>r.id)}function Zn(e,t){let n=e.query("SELECT id FROM chunks WHERE source_revision_id = ?").all(t),r=0,i=0;for(let d of n){let l=e.query("SELECT COUNT(*) AS n FROM chunk_embeddings WHERE chunk_id = ?").get(d.id);r+=l?.n??0;let a=e.query("SELECT COUNT(*) AS n FROM vector_index_entries WHERE chunk_id = ?").get(d.id);i+=a?.n??0,e.run("DELETE FROM vector_index_entries WHERE chunk_id = ?",[d.id]),e.run("DELETE FROM chunk_embeddings WHERE chunk_id = ?",[d.id]),e.run("DELETE FROM chunks_fts WHERE chunk_id = ?",[d.id])}e.run("DELETE FROM chunks WHERE source_revision_id = ?",[t]);let s=e.query("SELECT metadata_json FROM source_revisions WHERE id = ?").get(t);return e.run("UPDATE source_revisions SET metadata_json = ? WHERE id = ?",[bt(s?.metadata_json,{reindex_required:!0,invalidated_at:new Date().toISOString()}),t]),{chunksDeleted:n.length,embeddingsDeleted:r,vectorEntriesDeleted:i}}function er(e,t){return t==="deleted"||["delete","deleted","remove","removed"].includes(e)}function tr(e){return["move","moved","rename","renamed","path_changed"].includes(e)}function nr(e){return["permission","permissions","permission_changed","acl_changed"].includes(e)}async function Tt(e){let t=(e.now??new Date).toISOString();if(e.safetyPolicy)X(e.dbPath,e.safetyPolicy);w(e.dbPath);let n=await Jn(e.input,e.config,e.safetyPolicy),r=zn(n),i=S(e.dbPath),s=`run_${Un()}`;try{return i.transaction(()=>{i.run(`INSERT INTO runs (id, type, prompt, status, provider, model, metadata_json, created_at, updated_at)
|
|
343
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,[s,"open-files-outbox",e.input,"completed","local","open-files-outbox",JSON.stringify({path:e.input,events:r.length}),t,t]);let d=new Set,l=new Set,a=0,o=0,c=0,u=0,_=0,f=0,m=0;return R(i,{event_type:"source_read",action:e.input.startsWith("s3://")?"s3_outbox_read":"local_outbox_read",target_uri:e.input,decision:"allow",metadata:{events:r.length,read_only:!0},created_at:t}),r.forEach((y,k)=>{let g=qn(y,t),b=Yn(i,g,t);d.add(b);let x=Vn(i,b,g,t);if(x)l.add(x);let p=Qn(i,b,g);for(let A of p){l.add(A);let h=Zn(i,A);a+=h.chunksDeleted,o+=h.embeddingsDeleted,c+=h.vectorEntriesDeleted,u+=1}if(er(g.eventType,g.status))_+=1;if(tr(g.eventType))f+=1;if(nr(g.eventType)||g.acl!==void 0)m+=1;i.run(`INSERT INTO run_events (id, run_id, level, event, metadata_json, created_at)
|
|
344
|
+
VALUES (?, ?, ?, ?, ?, ?)`,[_e("evt",`${s}\x00${k}\x00${g.sourceRef}\x00${g.eventType}`),s,"info",g.eventType,JSON.stringify({source_ref:g.sourceRef,source_uri:g.sourceUri,revision:g.revision,hash:g.hash,status:g.status,affected_revisions:p.length}),g.updatedAt])}),i.run(`INSERT INTO provider_usage (id, run_id, provider, model, input_tokens, output_tokens, cost_usd, metadata_json, created_at)
|
|
345
|
+
VALUES (?, ?, ?, ?, 0, 0, 0, ?, ?)`,[_e("usage",s),s,"local","open-files-outbox",JSON.stringify({note:"No model provider used for outbox invalidation."}),t]),R(i,{event_type:"write",action:"knowledge_outbox_invalidation",target_uri:e.dbPath,decision:"allow",metadata:{run_id:s,events:r.length,sources:d.size,revisions:l.size,chunks_deleted:a,embeddings_deleted:o,vector_entries_deleted:c},created_at:t}),{path:e.input,db_path:e.dbPath,run_id:s,events_seen:r.length,sources_touched:d.size,revisions_touched:l.size,chunks_deleted:a,embeddings_deleted:o,vector_entries_deleted:c,stale_revisions:u,deleted_sources:_,moved_sources:f,permission_updates:m}})()}finally{i.close()}}import{createHash as rr}from"crypto";import{existsSync as ir,readFileSync as sr}from"fs";import{basename as or}from"path";function Ue(e,t){return`${e}_${rr("sha256").update(t).digest("hex").slice(0,20)}`}function q(e){return e&&typeof e==="object"&&!Array.isArray(e)?e:void 0}function T(e){return typeof e==="string"&&e.length>0?e:void 0}function ar(e){return typeof e==="number"&&Number.isFinite(e)?e:void 0}function cr(e){let t=T(e.source_ref)??T(e.source_uri)??T(e.uri);if(t)return t;let n=T(e.file_id);if(n){let s=T(e.revision_id)??T(e.revision),d=`open-files://file/${encodeURIComponent(n)}`;return s?`${d}/revision/${encodeURIComponent(s)}`:d}let r=T(e.source_id),i=T(e.path);if(r&&i)return`open-files://source/${encodeURIComponent(r)}/path/${encodeURIComponent(i)}`;throw Error("Manifest item is missing source_ref, file_id, or source_id/path.")}function ur(e,t){if(t.kind==="open-files"&&t.entity==="file"&&t.revision_id)return e.replace(/\/revision\/[^/]+$/,"");return e}function dr(e){let t=T(e.extracted_text)??T(e.text)??T(e.content_text)??T(e.markdown);if(t!==void 0)return t;let n=e.content;return typeof n==="string"?n:null}function lr(e){let t=T(e.extracted_text_ref)??T(e.extracted_text_uri)??T(e.text_ref);if(t)return t;let n=q(e.content);return T(n?.extracted_text_ref)??T(n?.extracted_text_uri)??null}function _r(e){let t=T(e.path);return T(e.title)??T(e.name)??(t?or(t):null)}function fr(e){return T(e.hash)??T(e.checksum)??T(e.sha256)??null}function gr(e,t,n){return T(e.revision_id)??T(e.revision)??T(e.version_id)??(t.kind==="open-files"?t.revision_id:void 0)??n??T(e.updated_at)??"current"}function pr(e,t){let n={};for(let[r,i]of Object.entries(e)){if(["text","content","content_text","extracted_text","markdown"].includes(r))continue;n[r]=i}return n.source_ref=t.sourceRef,n.source_uri=t.sourceUri,n.status=t.status,n}function hr(e,t){let n=cr(e),r=P(n),i=ur(n,r),s=fr(e),d=T(e.status)??"active";return{raw:e,sourceRef:n,sourceUri:i,kind:r.kind,title:_r(e),revision:gr(e,r,s),hash:s,extractedTextUri:lr(e),text:dr(e),metadata:pr(e,{sourceRef:n,sourceUri:i,status:d}),acl:e.permissions??e.acl??{},status:d,updatedAt:T(e.updated_at)??t}}function mr(e){let t=e.trim();if(!t)return[];if(t.startsWith("[")){let n=JSON.parse(t);if(!Array.isArray(n))throw Error("Manifest array parse failed.");return n.map((r)=>{let i=q(r);if(!i)throw Error("Manifest array entries must be objects.");return i})}if(t.startsWith("{"))try{let n=JSON.parse(t),r=q(n);if(!r)throw Error("Manifest object parse failed.");if(Array.isArray(r.items))return r.items.map((i)=>{let s=q(i);if(!s)throw Error("Manifest items entries must be objects.");return s});if("source_ref"in r||"source_uri"in r||"file_id"in r)return[r]}catch(n){let r=t.split(/\r?\n/).filter((i)=>i.trim().length>0);if(r.length<=1)throw n;return r.map((i)=>{let s=q(JSON.parse(i));if(!s)throw Error("Manifest JSONL entries must be objects.");return s})}return t.split(/\r?\n/).filter((n)=>n.trim().length>0).map((n)=>{let r=q(JSON.parse(n));if(!r)throw Error("Manifest JSONL entries must be objects.");return r})}async function Er(e,t,n){let r=new URL(e),i=r.hostname,s=decodeURIComponent(r.pathname.replace(/^\/+/,""));if(!i||!s)throw Error(`Invalid S3 manifest URI: ${e}`);if(n)F(e,n);let[{S3Client:d,GetObjectCommand:l},{fromIni:a}]=await Promise.all([import("@aws-sdk/client-s3"),import("@aws-sdk/credential-providers")]),o=t?.storage.type==="s3"&&t.storage.s3?.bucket===i?t.storage.s3:void 0,u=await new d({region:o?.region,credentials:o?.profile?a({profile:o.profile}):void 0,maxAttempts:o?.max_attempts}).send(new l({Bucket:i,Key:s}));if(!u.Body)return"";return await u.Body.transformToString()}async function kr(e,t,n){if(e.startsWith("s3://"))return Er(e,t,n);if(!ir(e))throw Error(`Manifest not found: ${e}`);return sr(e,"utf8")}function yr(e,t,n){let r=e.replace(/\r\n/g,`
|
|
346
|
+
`);if(!r.trim())return[];let i=[],s=0;while(s<r.length){let d=Math.min(r.length,s+t),l=d;if(d<r.length){let o=r.lastIndexOf(`
|
|
347
347
|
|
|
348
|
-
`,
|
|
348
|
+
`,d),c=r.lastIndexOf(". ",d),u=Math.max(o,c);if(u>s+Math.floor(t*0.5))l=u+(u===o?2:1)}let a=r.slice(s,l).trim();if(a)i.push({ordinal:i.length,text:a,startOffset:s,endOffset:l});if(l>=r.length)break;s=Math.max(0,l-n)}return i}function br(e){let t=e.trim().split(/\s+/).filter(Boolean).length;return Math.max(1,Math.ceil(t*1.25))}function Tr(e,t){let n=e.query("SELECT id FROM chunks WHERE source_revision_id = ?").all(t);for(let r of n)e.run("DELETE FROM chunks_fts WHERE chunk_id = ?",[r.id]);return e.run("DELETE FROM chunks WHERE source_revision_id = ?",[t]),n.length}function vr(e,t,n){let r=Ue("src",t.sourceUri);e.run(`INSERT INTO sources (id, uri, kind, title, metadata_json, acl_json, created_at, updated_at)
|
|
349
349
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
350
350
|
ON CONFLICT(uri) DO UPDATE SET
|
|
351
351
|
kind = excluded.kind,
|
|
352
352
|
title = excluded.title,
|
|
353
353
|
metadata_json = excluded.metadata_json,
|
|
354
354
|
acl_json = excluded.acl_json,
|
|
355
|
-
updated_at = excluded.updated_at`,[
|
|
355
|
+
updated_at = excluded.updated_at`,[r,t.sourceUri,t.kind,t.title,JSON.stringify(t.metadata),JSON.stringify(t.acl??{}),n,t.updatedAt]);let i=e.query("SELECT id FROM sources WHERE uri = ?").get(t.sourceUri);if(!i)throw Error(`Failed to upsert source: ${t.sourceUri}`);return i.id}function xr(e,t,n,r){let i=Ue("rev",`${t}\x00${n.revision}`);e.run(`INSERT INTO source_revisions (id, source_id, revision, hash, extracted_text_uri, metadata_json, created_at)
|
|
356
356
|
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
357
357
|
ON CONFLICT(source_id, revision) DO UPDATE SET
|
|
358
358
|
hash = excluded.hash,
|
|
359
359
|
extracted_text_uri = excluded.extracted_text_uri,
|
|
360
|
-
metadata_json = excluded.metadata_json`,[i,t,
|
|
361
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,[
|
|
360
|
+
metadata_json = excluded.metadata_json`,[i,t,n.revision,n.hash,n.extractedTextUri,JSON.stringify(n.metadata),r]);let s=e.query("SELECT id FROM source_revisions WHERE source_id = ? AND revision = ?").get(t,n.revision);if(!s)throw Error(`Failed to upsert source revision: ${n.sourceRef}`);return s.id}function Sr(e,t,n,r,i,s,d){if(!n.text||n.status.toLowerCase()==="deleted")return{chunksInserted:0,redactions:0};let l=de(n.text,d);if(l.findings.length>0)le(e,{source_uri:n.sourceUri,findings:l.findings,metadata:{source_ref:n.sourceRef,revision:n.revision},created_at:r}),R(e,{event_type:"redaction",action:"source_text_redact",target_uri:n.sourceUri,decision:"redacted",metadata:{findings:l.findings.length,source_ref:n.sourceRef,revision:n.revision},created_at:r});let a=yr(l.text,i,s);for(let o of a){let c=Ue("chk",`${t}\x00${o.ordinal}\x00${o.text}`),u=K({source_ref:n.sourceRef,source_uri:n.sourceUri,source_kind:n.kind,source_revision_id:t,revision:n.revision,hash:n.hash,chunk_id:c,start_offset:o.startOffset,end_offset:o.endOffset,status:n.status,resolver:"open-files-read-only"}),_=st({source_ref:n.sourceRef,source_uri:n.sourceUri,source_kind:n.kind,source_revision_id:t,revision:n.revision,hash:n.hash,status:n.status,path:T(n.raw.path)??null,mime:T(n.raw.mime)??T(n.raw.content_type)??null,size:ar(n.raw.size)??null},u);e.run(`INSERT INTO chunks (id, source_revision_id, kind, ordinal, text, token_count, start_offset, end_offset, metadata_json, created_at)
|
|
361
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,[c,t,"source",o.ordinal,o.text,br(o.text),o.startOffset,o.endOffset,JSON.stringify(_),r]),e.run("INSERT INTO chunks_fts (chunk_id, text, title, source_uri) VALUES (?, ?, ?, ?)",[c,o.text,n.title??"",n.sourceUri])}return{chunksInserted:a.length,redactions:l.findings.length}}async function vt(e){let t=e.now??new Date;if(e.safetyPolicy)X(e.dbPath,e.safetyPolicy);w(e.dbPath);let n=await kr(e.input,e.config,e.safetyPolicy),r=mr(n);return je({dbPath:e.dbPath,items:r,sourceLabel:e.input,safetyPolicy:e.safetyPolicy,now:t,maxChunkChars:e.maxChunkChars,chunkOverlapChars:e.chunkOverlapChars})}async function je(e){let t=(e.now??new Date).toISOString(),n=e.maxChunkChars??4000,r=e.chunkOverlapChars??200;if(n<500)throw Error("maxChunkChars must be at least 500.");if(r<0||r>=n)throw Error("chunkOverlapChars must be less than maxChunkChars.");if(e.safetyPolicy)X(e.dbPath,e.safetyPolicy);w(e.dbPath);let i=S(e.dbPath);try{return i.transaction(()=>{let d=new Set,l=new Set,a=0,o=0,c=0,u=0;R(i,{event_type:"source_read",action:e.readAction??(e.sourceLabel.startsWith("s3://")?"s3_manifest_read":"local_manifest_read"),target_uri:e.sourceLabel,decision:"allow",metadata:{items:e.items.length,read_only:!0},created_at:t});for(let _ of e.items){let f=hr(_,t),m=vr(i,f,t),y=xr(i,m,f,t);if(d.add(m),l.add(y),f.text||f.status.toLowerCase()==="deleted")o+=Tr(i,y);let k=Sr(i,y,f,t,n,r,e.safetyPolicy);a+=k.chunksInserted,c+=k.redactions}return R(i,{event_type:"write",action:"knowledge_manifest_ingest",target_uri:e.dbPath,decision:"allow",metadata:{items:e.items.length,sources:d.size,revisions:l.size,chunks_inserted:a,redactions:c},created_at:t}),{path:e.sourceLabel,db_path:e.dbPath,items_seen:e.items.length,sources_upserted:d.size,revisions_upserted:l.size,chunks_inserted:a,chunks_deleted:o,redactions:c,skipped:u}})()}finally{i.close()}}import{createHash as Lr}from"crypto";import{existsSync as Cr,readFileSync as Dr}from"fs";import{basename as pe}from"path";function fe(e){if(!e)return{};try{let t=JSON.parse(e);return t&&typeof t==="object"&&!Array.isArray(t)?t:{}}catch{return{}}}function $(e,t){for(let n of t){let r=e[n];if(typeof r==="string"&&r.length>0)return r}return null}function xt(e,t){for(let n of t){let r=e[n];if(typeof r==="number"&&Number.isFinite(r))return r}return null}function wr(e,t){let n=e.mode;if(typeof n==="string"&&n!=="read_only")throw Error(`Source resolver denied ${t}. Permission mode is ${n}, expected read_only.`);let r=e.denied_purposes;if(Array.isArray(r)&&r.includes(t))throw Error(`Source resolver denied ${t}. Purpose is explicitly denied.`);let i=e.allowed_purposes;if(Array.isArray(i)&&i.length>0&&!i.includes(t))throw Error(`Source resolver denied ${t}. Allowed purposes: ${i.join(", ")}`)}function Rr(e,t,n){if(!t)return n;try{let r=P(e);if(r.kind==="open-files"&&r.entity==="file")return`${e}/revision/${encodeURIComponent(t.revision)}`}catch{return n}return n}function Or(e,t,n){return e.query(`SELECT id, uri, kind, title, metadata_json, acl_json, updated_at
|
|
362
362
|
FROM sources
|
|
363
363
|
WHERE uri = ? OR uri = ?
|
|
364
364
|
ORDER BY CASE WHEN uri = ? THEN 0 ELSE 1 END
|
|
365
|
-
LIMIT 1`).get(t,
|
|
365
|
+
LIMIT 1`).get(t,n,t)??null}function Nr(e,t,n){if(n)return e.query(`SELECT id, revision, hash, extracted_text_uri, metadata_json, created_at
|
|
366
366
|
FROM source_revisions
|
|
367
367
|
WHERE source_id = ? AND revision = ?
|
|
368
|
-
LIMIT 1`).get(t,
|
|
368
|
+
LIMIT 1`).get(t,n)??null;return e.query(`SELECT id, revision, hash, extracted_text_uri, metadata_json, created_at
|
|
369
369
|
FROM source_revisions
|
|
370
370
|
WHERE source_id = ?
|
|
371
371
|
ORDER BY created_at DESC, revision DESC
|
|
372
|
-
LIMIT 1`).get(t)??null}function
|
|
372
|
+
LIMIT 1`).get(t)??null}function Ar(e,t){if(!t)return 0;return e.query("SELECT COUNT(*) AS n FROM chunks WHERE source_revision_id = ?").get(t)?.n??0}function Ir(e,t,n){if(!t||n<=0)return[];return e.query(`SELECT id, kind, ordinal, text, token_count, start_offset, end_offset, metadata_json
|
|
373
373
|
FROM chunks
|
|
374
374
|
WHERE source_revision_id = ?
|
|
375
375
|
ORDER BY ordinal ASC
|
|
376
|
-
LIMIT ?`).all(t,
|
|
376
|
+
LIMIT ?`).all(t,n)}async function ge(e){let t=e.purpose??"knowledge_answer",n=Math.max(0,Math.min(e.limit??10,100)),r=(e.now??new Date).toISOString(),i=P(e.sourceRef),s=gt(e.sourceRef,i),d=pt(e.sourceRef);if(e.safetyPolicy){if(!e.safetyPolicy.readOnlySourceAccess)throw Error("Safety policy denied source resolution.");X(e.dbPath,e.safetyPolicy)}w(e.dbPath);let l=S(e.dbPath);try{return l.transaction(()=>{let a=Or(l,s,e.sourceRef);if(!a)return R(l,{event_type:"source_read",action:"open_files_resolve_missing",target_uri:e.sourceRef,decision:"allow",metadata:{purpose:t,read_only:!0,source_uri:s},created_at:r}),{source_ref:e.sourceRef,source_uri:s,purpose:t,read_only:!0,resolved:!1,resolver:{name:"open-files-read-only",mode:"local_catalog",contract:"open-files-knowledge-source-v1"},source:null,revision:null,content:{mime:null,size:null,hash:null,text_available:!1,chunks_total:0,chunks_returned:0,char_count_returned:0,extracted_text_ref:null,bytes_available:!1,bytes_exposed:!1},chunks:[],citations:[]};let o=fe(a.metadata_json),c=fe(a.acl_json);try{wr(c,t)}catch(p){throw R(l,{event_type:"source_read",action:"open_files_resolve",target_uri:e.sourceRef,decision:"deny",metadata:{purpose:t,read_only:!0,source_uri:a.uri,error:p instanceof Error?p.message:String(p)},created_at:r}),p}let u=Nr(l,a.id,d),_=fe(u?.metadata_json),f=Ar(l,u?.id??null),m=Ir(l,u?.id??null,n),y=Rr(a.uri,u,e.sourceRef),k=m.map((p)=>{let A=fe(p.metadata_json),h={resolver:"open-files-read-only",mode:"local_catalog",purpose:t,read_only:!0,source_ref:$(A,["source_ref"])??y,source_uri:a.uri,source_revision_id:u?.id??null,revision:u?.revision??null,hash:u?.hash??$(A,["hash"]),chunk_id:p.id,start_offset:p.start_offset,end_offset:p.end_offset,resolved_at:r},G=K({source_ref:h.source_ref,source_uri:h.source_uri,source_kind:a.kind,source_revision_id:h.source_revision_id,revision:h.revision,hash:h.hash,chunk_id:p.id,start_offset:p.start_offset,end_offset:p.end_offset,status:$(A,["status"]),resolver:h.resolver});return{id:p.id,kind:p.kind,ordinal:p.ordinal,text:p.text,token_count:p.token_count,start_offset:p.start_offset,end_offset:p.end_offset,metadata:A,evidence:h,provenance:G}}),g=k.map((p)=>({source_ref:p.evidence.source_ref,source_uri:a.uri,chunk_id:p.id,quote:p.text.slice(0,500),start_offset:p.start_offset,end_offset:p.end_offset,evidence:p.evidence,provenance:p.provenance}));R(l,{event_type:"source_read",action:"open_files_resolve",target_uri:e.sourceRef,decision:"allow",metadata:{purpose:t,read_only:!0,source_uri:a.uri,revision:u?.revision??null,chunks_returned:k.length,chunks_total:f},created_at:r});let b=$(o,["mime","content_type"])??$(_,["mime","content_type"]),x=xt(o,["size","size_bytes"])??xt(_,["size","size_bytes"]);return{source_ref:y,source_uri:a.uri,purpose:t,read_only:!0,resolved:!0,resolver:{name:"open-files-read-only",mode:"local_catalog",contract:"open-files-knowledge-source-v1"},source:{id:a.id,uri:a.uri,kind:a.kind,title:a.title,metadata:o,permissions:c,updated_at:a.updated_at},revision:u?{id:u.id,revision:u.revision,hash:u.hash,extracted_text_uri:u.extracted_text_uri,metadata:_,created_at:u.created_at,reindex_required:_.reindex_required===!0}:null,content:{mime:b,size:x,hash:u?.hash??$(o,["hash","checksum","sha256"]),text_available:f>0,chunks_total:f,chunks_returned:k.length,char_count_returned:k.reduce((p,A)=>p+A.text.length,0),extracted_text_ref:u?.extracted_text_uri??$(_,["extracted_text_ref","extracted_text_uri"]),bytes_available:!1,bytes_exposed:!1},chunks:k,citations:g}})()}finally{l.close()}}function z(e){return`sha256:${Lr("sha256").update(e).digest("hex")}`}function Pr(e){return e.replace(/<script[\s\S]*?<\/script>/gi," ").replace(/<style[\s\S]*?<\/style>/gi," ").replace(/<[^>]+>/g," ").replace(/ /g," ").replace(/&/g,"&").replace(/</g,"<").replace(/>/g,">").replace(/\s+\n/g,`
|
|
377
377
|
`).replace(/\n\s+/g,`
|
|
378
|
-
`).replace(/[ \t]{2,}/g," ").trim()}async function
|
|
378
|
+
`).replace(/[ \t]{2,}/g," ").trim()}async function Ur(e,t,n){let r=new URL(e),i=r.hostname,s=decodeURIComponent(r.pathname.replace(/^\/+/,""));if(!i||!s)throw Error(`Invalid S3 source URI: ${e}`);if(n)F(e,n);let[{S3Client:d,GetObjectCommand:l},{fromIni:a}]=await Promise.all([import("@aws-sdk/client-s3"),import("@aws-sdk/credential-providers")]),o=t?.storage.type==="s3"&&t.storage.s3?.bucket===i?t.storage.s3:void 0,u=await new d({region:o?.region,credentials:o?.profile?a({profile:o.profile}):void 0,maxAttempts:o?.max_attempts}).send(new l({Bucket:i,Key:s}));if(!u.Body)return"";return await u.Body.transformToString()}async function jr(e,t){if(t)ue(t);let n=await fetch(e,{headers:{accept:"text/markdown,text/plain,text/html,application/json;q=0.8,*/*;q=0.5","user-agent":"@hasna/knowledge source-ingest"}});if(!n.ok)throw Error(`Web source read failed ${n.status}: ${e}`);let r=n.headers.get("content-type"),i=await n.text();return{text:r?.includes("html")?Pr(i):i,mime:r}}function he(e){if(e.kind==="file")return pe(e.path);if(e.kind==="s3")return pe(e.key);if(e.kind==="web")return pe(new URL(e.url).pathname)||e.url;return e.path?pe(e.path):e.id}async function St(e,t,n){if(e.kind==="file"){if(!Cr(e.path))throw Error(`Source file not found: ${e.path}`);let r=Dr(e.path,"utf8");return{text:r,contentSource:"file",title:he(e),mime:"text/plain",size:r.length,hash:z(r),revision:null,extractedTextRef:null,metadata:{path:e.path},permissions:{mode:"read_only"}}}if(e.kind==="s3"){let r=await Ur(e.uri,t,n);return{text:r,contentSource:"s3",title:he(e),mime:"text/plain",size:r.length,hash:z(r),revision:null,extractedTextRef:null,metadata:{bucket:e.bucket,key:e.key},permissions:{mode:"read_only"}}}if(e.kind==="web"){let r=await jr(e.url,n);return{text:r.text,contentSource:"web",title:he(e),mime:r.mime,size:r.text.length,hash:z(r.text),revision:null,extractedTextRef:null,metadata:{url:e.url},permissions:{mode:"read_only"}}}throw Error(`Direct source reading is not available for ${e.uri}`)}async function Mr(e,t,n){if(e.startsWith("open-files://"))throw Error("Open-files extracted text refs require an open-files resolver API. Ingest an open-files manifest with extracted_text or an extracted_text_ref using file://, s3://, or https://.");let r=P(e);return{text:(await St(r,t,n)).text,contentSource:"extracted_text_ref"}}async function Kr(e){let t=await ge({dbPath:e.dbPath,sourceRef:e.sourceRef,purpose:e.purpose??"knowledge_index",limit:100,safetyPolicy:e.safetyPolicy,now:e.now});if(!t.resolved)throw Error("Open-files source is not in the local knowledge catalog. Ingest an open-files manifest first or use the open-files resolver API.");if(t.revision?.extracted_text_uri&&!t.content.text_available){let r=await Mr(t.revision.extracted_text_uri,e.config,e.safetyPolicy);return{text:r.text,contentSource:r.contentSource,title:t.source?.title??null,mime:t.content.mime,size:r.text.length,hash:t.revision.hash??z(r.text),revision:t.revision.revision,extractedTextRef:t.revision.extracted_text_uri,metadata:t.source?.metadata??{},permissions:t.source?.permissions??{mode:"read_only"}}}if(t.chunks.length===0)throw Error("Open-files source has no extracted text chunks yet. Ingest an open-files manifest with extracted_text or extracted_text_ref first.");let n=t.chunks.map((r)=>r.text).join(`
|
|
379
379
|
|
|
380
|
-
`);return{text:
|
|
380
|
+
`);return{text:n,contentSource:"catalog_chunks",title:t.source?.title??null,mime:t.content.mime,size:n.length,hash:t.revision?.hash??z(n),revision:t.revision?.revision??null,extractedTextRef:t.revision?.extracted_text_uri??null,metadata:t.source?.metadata??{},permissions:t.source?.permissions??{mode:"read_only"}}}function Fr(e,t,n,r){let i=n.hash??z(n.text),s={...n.metadata,source_ref:e,content_source:n.contentSource,read_only:!0},d={source_ref:e,name:n.title??he(t),mime:n.mime??"text/plain",size:n.size??n.text.length,hash:i,revision:n.revision??i,status:"active",updated_at:new Date().toISOString(),permissions:{mode:"read_only",allowed_purposes:[r],...n.permissions},metadata:s,extracted_text_ref:n.extractedTextRef,extracted_text:n.text};if(t.kind==="open-files"){if(t.entity==="file")d.file_id=t.id;if(t.entity==="source")d.source_id=t.id,d.path=t.path}if(t.kind==="file")d.path=t.path;if(t.kind==="s3")d.path=t.key;if(t.kind==="web")d.url=t.url;return d}async function wt(e){let t=e.purpose??"knowledge_index",n=P(e.sourceRef),r=n.kind==="open-files"?await Kr(e):await St(n,e.config,e.safetyPolicy),i=Fr(e.sourceRef,n,r,t);return{...await je({dbPath:e.dbPath,items:[i],sourceLabel:e.sourceRef,readAction:"source_ref_ingest_read",safetyPolicy:e.safetyPolicy,now:e.now}),source_ref:e.sourceRef,content_source:r.contentSource,read_only:!0,hash:String(i.hash)}}function Ee(e){if(!e)return{};try{let t=JSON.parse(e);return t&&typeof t==="object"&&!Array.isArray(t)?t:{}}catch{return{}}}function D(e,t){for(let n of t){let r=e[n];if(typeof r==="string"&&r.length>0)return r}return null}function Rt(e,t){for(let n of t){let r=e[n];if(typeof r==="number"&&Number.isFinite(r))return r}return null}function Ot(e){return Array.from(new Set(e))}function Wr(e){let t=e.normalize("NFKC").toLowerCase().match(/[\p{L}\p{N}_]+/gu)??[];return Ot(t.filter((n)=>n.length>0)).slice(0,16)}function Xr(e){if(e.length===0)return null;return e.map((t)=>`${t}*`).join(" OR ")}function $r(e){return e.replace(/[\\%_]/g,(t)=>`\\${t}`)}function Nt(e,t){return e.flatMap((n)=>Array.from({length:t},()=>`%${$r(n)}%`))}function Br(e,t){let n=Number.isFinite(e)?1/(1+Math.abs(e)):0,r=1/(1+t);return ke(Math.max(n,r))}function At(e,t){if(t.length===0)return 0;let n=t.filter((r)=>e.includes(r)).length;if(n===0)return 0;return ke(Math.min(0.85,0.35+n/t.length*0.5))}function Hr(e){return ke(Math.max(0,Math.min(1,(e+1)/2)))}function ke(e){return Number(e.toFixed(6))}function Z(e,t){let n=e.keyword??0,r=e.semantic??0,i=e.catalog??0,s=t?.chunk_id?0.05:0;return ke(Math.min(1,n*0.55+r*0.4+i*0.35+s))}function Me(e){let t=e.provenance;return t&&typeof t==="object"&&!Array.isArray(t)?t:null}function qr(e){let t=Ee(e.chunk_metadata_json),n=Me(t);if(n)return n;if(!e.source_revision_id&&!e.source_uri)return null;return K({source_ref:D(t,["source_ref"]),source_uri:e.source_uri??D(t,["source_uri"]),source_kind:e.source_kind??D(t,["source_kind"]),source_revision_id:e.source_revision_id,revision:e.revision??D(t,["revision"]),hash:e.hash??D(t,["hash"]),chunk_id:e.chunk_id,start_offset:e.start_offset??Rt(t,["start_offset"]),end_offset:e.end_offset??Rt(t,["end_offset"]),status:D(t,["status"]),resolver:"open-files-read-only"})}function zr(e,t,n){if(!t)return[];return e.query(`SELECT
|
|
381
|
+
chunks_fts.chunk_id,
|
|
382
|
+
c.kind AS chunk_kind,
|
|
383
|
+
c.wiki_page_id,
|
|
384
|
+
c.text,
|
|
385
|
+
c.token_count,
|
|
386
|
+
c.start_offset,
|
|
387
|
+
c.end_offset,
|
|
388
|
+
c.metadata_json AS chunk_metadata_json,
|
|
389
|
+
c.source_revision_id,
|
|
390
|
+
sr.revision,
|
|
391
|
+
sr.hash,
|
|
392
|
+
s.uri AS source_uri,
|
|
393
|
+
s.kind AS source_kind,
|
|
394
|
+
s.title AS source_title,
|
|
395
|
+
wp.path AS wiki_path,
|
|
396
|
+
wp.title AS wiki_title,
|
|
397
|
+
wp.artifact_uri AS wiki_artifact_uri,
|
|
398
|
+
wp.content_hash AS wiki_content_hash,
|
|
399
|
+
wp.status AS wiki_status,
|
|
400
|
+
wp.metadata_json AS wiki_metadata_json,
|
|
401
|
+
bm25(chunks_fts) AS rank
|
|
402
|
+
FROM chunks_fts
|
|
403
|
+
JOIN chunks c ON c.id = chunks_fts.chunk_id
|
|
404
|
+
LEFT JOIN source_revisions sr ON sr.id = c.source_revision_id
|
|
405
|
+
LEFT JOIN sources s ON s.id = sr.source_id
|
|
406
|
+
LEFT JOIN wiki_pages wp ON wp.id = c.wiki_page_id
|
|
407
|
+
WHERE chunks_fts MATCH ?
|
|
408
|
+
ORDER BY rank ASC
|
|
409
|
+
LIMIT ?`).all(t,n)}function It(e,t){if(t.length===0)return"1 = 0";return t.map(()=>`(${e.map((r)=>`lower(COALESCE(${r}, '')) LIKE ? ESCAPE '\\'`).join(" OR ")})`).join(" OR ")}function Gr(e,t,n){let r=["path","title","artifact_uri","metadata_json"];return e.query(`SELECT id, path, title, artifact_uri, content_hash, status, metadata_json
|
|
410
|
+
FROM wiki_pages
|
|
411
|
+
WHERE status = 'active' AND (${It(r,t)})
|
|
412
|
+
ORDER BY updated_at DESC
|
|
413
|
+
LIMIT ?`).all(...Nt(t,r.length),n)}function Jr(e,t,n){let r=["kind","name","shard_key","artifact_uri","metadata_json"];return e.query(`SELECT id, kind, name, artifact_uri, shard_key, metadata_json
|
|
414
|
+
FROM knowledge_indexes
|
|
415
|
+
WHERE ${It(r,t)}
|
|
416
|
+
ORDER BY updated_at DESC
|
|
417
|
+
LIMIT ?`).all(...Nt(t,r.length),n)}function Yr(e,t){let n=Ee(e.chunk_metadata_json),r=qr(e),i=D(n,["source_ref"]),s=e.source_uri??D(n,["source_uri"]),d=Boolean(e.wiki_page_id),l={kind:d?"wiki_chunk":"source_chunk",id:e.chunk_id,title:d?e.wiki_title:e.source_title,text:e.text,score:0,scores:{keyword:t},source:s||i?{uri:s,ref:i,kind:e.source_kind??D(n,["source_kind"]),revision:e.revision??D(n,["revision"]),hash:e.hash??D(n,["hash"])}:null,citation:{chunk_id:e.chunk_id,start_offset:e.start_offset,end_offset:e.end_offset},artifact:d?{uri:e.wiki_artifact_uri,path:e.wiki_path,hash:e.wiki_content_hash,shard_key:e.wiki_path}:null,provenance:r,reasons:["keyword_match"]};return l.score=Z(l.scores,l.citation),l}function Vr(e,t){let n=Ee(e.metadata_json),r=At(`${e.path} ${e.title} ${e.artifact_uri??""} ${e.metadata_json}`.toLowerCase(),t),i={kind:"wiki_page",id:e.id,title:e.title,text:null,score:0,scores:{catalog:r},source:null,citation:null,artifact:{uri:e.artifact_uri,path:e.path,hash:e.content_hash,shard_key:e.path},provenance:Me(n),reasons:["wiki_catalog_match"]};return i.score=Z(i.scores,i.citation),i}function Qr(e,t){let n=Ee(e.metadata_json),r=At(`${e.kind} ${e.name} ${e.shard_key??""} ${e.artifact_uri??""} ${e.metadata_json}`.toLowerCase(),t),i={kind:"knowledge_index",id:e.id,title:e.name,text:null,score:0,scores:{catalog:r},source:null,citation:null,artifact:{uri:e.artifact_uri,path:D(n,["artifact_key"]),hash:D(n,["content_hash"]),shard_key:e.shard_key},provenance:Me(n),reasons:["index_catalog_match"]};return i.score=Z(i.scores,i.citation),i}function me(e,t){let n=`${t.kind}:${t.id}`,r=e.get(n);if(!r){e.set(n,t);return}r.scores={keyword:Math.max(r.scores.keyword??0,t.scores.keyword??0)||void 0,semantic:Math.max(r.scores.semantic??0,t.scores.semantic??0)||void 0,catalog:Math.max(r.scores.catalog??0,t.scores.catalog??0)||void 0},r.reasons=Ot([...r.reasons,...t.reasons]),r.text=r.text??t.text,r.title=r.title??t.title,r.source=r.source??t.source,r.citation=r.citation??t.citation,r.artifact=r.artifact??t.artifact,r.provenance=r.provenance??t.provenance,r.score=Z(r.scores,r.citation)}function Zr(e){let t={source_chunk:0,wiki_chunk:1,wiki_page:2,knowledge_index:3};return e.sort((n,r)=>{if(r.score!==n.score)return r.score-n.score;return t[n.kind]-t[r.kind]||n.id.localeCompare(r.id)})}async function Lt(e){let t=e.query.trim();if(!t)throw Error("Search query is required.");let n=Math.max(1,Math.min(e.limit??10,100)),r=Wr(t),i=Xr(r),s=e.semantic===!0||e.fake===!0||Boolean(e.modelRef),d=[],l=null,a=null,o=null,c=0,u=0,_=0,f=new Map;w(e.dbPath);let m=S(e.dbPath);try{let k=zr(m,i,Math.max(n*3,20));c=k.length,k.forEach((x,p)=>me(f,Yr(x,Br(x.rank,p))));let g=Gr(m,r,Math.max(n,10)),b=Jr(m,r,Math.max(n,10));u=g.length+b.length,g.forEach((x)=>me(f,Vr(x,r))),b.forEach((x)=>me(f,Qr(x,r)))}finally{m.close()}if(s)try{let k=await ce({dbPath:e.dbPath,query:t,limit:Math.max(n*3,20),config:e.config,env:e.env,modelRef:e.modelRef,dimensions:e.dimensions,fake:e.fake,batchSize:e.batchSize,maxParallelCalls:e.maxParallelCalls});l=k.provider,a=k.model,o=k.dimensions,_=k.results.length;for(let g of k.results){let b={kind:"source_chunk",id:g.chunk_id,title:null,text:g.text,score:0,scores:{semantic:Hr(g.score)},source:{uri:g.source_uri,ref:g.source_ref,kind:g.provenance?.source_kind??null,revision:g.revision,hash:g.hash},citation:{chunk_id:g.chunk_id,start_offset:g.provenance?.start_offset??null,end_offset:g.provenance?.end_offset??null},artifact:null,provenance:g.provenance,reasons:["semantic_match"]};b.score=Z(b.scores,b.citation),me(f,b)}}catch(k){d.push(`semantic_search_failed: ${k instanceof Error?k.message:String(k)}`)}let y=Zr(Array.from(f.values())).slice(0,n);return{query:t,limit:n,mode:{keyword:!0,catalog:!0,semantic:s},semantic_provider:l,semantic_model:a,semantic_dimensions:o,counts:{keyword_results:c,catalog_results:u,semantic_results:_,merged_results:y.length},warnings:d,results:y}}import{createHash as ei,randomUUID as ti}from"crypto";var Ct=[{kind:"schema",prefix:"schemas/",description:"Machine-readable agent schemas and source rules."},{kind:"index",prefix:"indexes/",description:"Small orientation indexes and future shard manifests."},{kind:"log",prefix:"logs/",description:"Append-only JSONL run and wiki-maintenance log partitions."},{kind:"run",prefix:"runs/",description:"Prompt/tool/cost ledgers and generated output records."},{kind:"wiki_page",prefix:"wiki/",description:"Generated cited Markdown pages, not raw source files."},{kind:"export",prefix:"exports/",description:"Portable exports and snapshots of derived knowledge state."}];function Dt(e){let t=typeof e==="string"?Buffer.from(e):Buffer.from(e);return{hash:`sha256:${ei("sha256").update(t).digest("hex")}`,size_bytes:t.byteLength}}function Pt(e){return Ct.find((n)=>e.startsWith(n.prefix))?.kind??"artifact"}function Ut(e,t,n="global"){let r=Ke(e,t),i=e.storage.s3??null,s=i?.prefix?.replace(/^\/+|\/+$/g,"")??"",d=i?`s3://${i.bucket}/${s?`${s}/`:""}`:"";return{scope:n,mode:e.mode,storage_type:e.storage.type,workspace_home:t.home,local_layout:{app_path:Y,config_path:t.configPath,json_store_path:t.jsonStorePath,knowledge_db_path:t.knowledgeDbPath,directories:{artifacts:t.artifactsDir,cache:t.cacheDir,exports:t.exportsDir,indexes:t.indexesDir,logs:t.logsDir,runs:t.runsDir,schemas:t.schemasDir,wiki:t.wikiDir}},artifact_store:{type:e.storage.type,artifacts_root:e.storage.artifacts_root,uri_prefix:e.storage.type==="s3"?d:`file://${t.artifactsDir}/`,s3:i?{bucket:i.bucket,prefix:s,region:i.region??null,profile:i.profile??null,server_side_encryption:i.server_side_encryption??null,kms_key_configured:Boolean(i.kms_key_id)}:null},source_ownership:{owner:"open-files",preferred_ref:e.sources.preferred_ref,allowed_schemes:e.sources.allowed_schemes,raw_source_bytes_stored_in_open_knowledge:!1,stores:["source refs","source revisions and hashes","citation spans","redacted extracted chunks","embeddings","generated wiki artifacts","indexes","run ledgers"],does_not_store:["raw open-files bytes","S3 object credentials","connector secrets","hosted tenant ownership state"]},generated_artifacts:Ct,scalability:{catalog:"knowledge.db tracks sources, revisions, chunks, citations, indexes, runs, and storage_objects.",indexes:"Indexes are cataloged DB rows plus sharded artifacts, not one giant index.md.",logs:"Logs use dated JSONL partitions under logs/yyyy/mm/dd.jsonl.",markdown:"Markdown pages are the readable wiki layer over DB/object-store state."},warnings:r.warnings}}function Ke(e,t){let n=[],r=[];if(!t.home.endsWith(Y))r.push(`Workspace home does not end with ${Y}: ${t.home}`);if(e.storage.type==="s3"){if(!e.storage.s3?.bucket)n.push("storage.s3.bucket is required when storage.type is s3.");if(!e.storage.s3?.prefix)r.push("storage.s3.prefix is empty; generated knowledge artifacts will be written at the bucket root.");if(e.mode==="local")r.push("storage.type is s3 while mode is local; this is valid for BYO S3, but hosted wrappers should set mode to hosted.")}if(e.storage.type==="local"&&e.storage.s3)r.push("storage.s3 is configured but ignored while storage.type is local.");if(e.sources.preferred_ref!=="open-files")r.push("sources.preferred_ref should stay open-files for durable company knowledge.");if(!e.sources.allowed_schemes.includes("open-files"))n.push("sources.allowed_schemes must include open-files.");return{ok:n.length===0,errors:n,warnings:r}}function jt(e,t,n=new Date){let r=n.toISOString(),i=e.prepare(`
|
|
381
418
|
INSERT INTO storage_objects (
|
|
382
419
|
id, artifact_uri, kind, content_type, hash, size_bytes, metadata_json, created_at, updated_at
|
|
383
420
|
)
|
|
@@ -389,7 +426,7 @@ VALUES (4, datetime('now'));
|
|
|
389
426
|
size_bytes = excluded.size_bytes,
|
|
390
427
|
metadata_json = excluded.metadata_json,
|
|
391
428
|
updated_at = excluded.updated_at
|
|
392
|
-
`);e.transaction((
|
|
429
|
+
`);e.transaction((d)=>{for(let l of d)i.run(ti(),l.uri,l.kind,l.content_type??null,l.hash??null,l.size_bytes??null,JSON.stringify({key:l.key,...l.metadata??{}}),r,r)})(t)}import{createHash as ni}from"crypto";function ri(e){let t=String(e.getUTCFullYear()),n=String(e.getUTCMonth()+1).padStart(2,"0"),r=String(e.getUTCDate()).padStart(2,"0");return{year:t,month:n,day:r}}function Fe(e,t){return`${e}_${ni("sha256").update(t).digest("hex").slice(0,20)}`}function ii(e){let t=e.trim().split(/\s+/).filter(Boolean).length;return Math.max(1,Math.ceil(t*1.25))}function si(){return`# Knowledge Agent Schema v1
|
|
393
430
|
|
|
394
431
|
## Source Rules
|
|
395
432
|
|
|
@@ -414,7 +451,7 @@ VALUES (4, datetime('now'));
|
|
|
414
451
|
## Lint Rules
|
|
415
452
|
|
|
416
453
|
- Flag stale pages, missing citations, contradictions, orphan pages, duplicate pages, and unresolved source refs.
|
|
417
|
-
`}function
|
|
454
|
+
`}function oi(){return`# Knowledge Index
|
|
418
455
|
|
|
419
456
|
This is a compact orientation index for agents. It is not the full search index.
|
|
420
457
|
|
|
@@ -429,18 +466,19 @@ This is a compact orientation index for agents. It is not the full search index.
|
|
|
429
466
|
|
|
430
467
|
Raw source files are resolved through open-files. This app stores source refs,
|
|
431
468
|
citations, chunks, generated wiki artifacts, indexes, and run records.
|
|
432
|
-
`}function
|
|
469
|
+
`}function Mt(){return`# Wiki
|
|
433
470
|
|
|
434
471
|
Generated durable knowledge pages live here.
|
|
435
472
|
|
|
436
473
|
Pages should be concise, cited, and organized for both humans and agents.
|
|
437
|
-
`}async function
|
|
438
|
-
`,content_type:"application/x-ndjson"}],
|
|
474
|
+
`}async function Kt(e,t=new Date){let{year:n,month:r,day:i}=ri(t),s="schemas/v1.md",d="indexes/root.md",l="wiki/README.md",a=`logs/${n}/${r}/${i}.jsonl`,o={ts:t.toISOString(),event:"wiki_layout_initialized",schema_key:"schemas/v1.md",root_index_key:"indexes/root.md",wiki_readme_key:"wiki/README.md"},c=[{key:"schemas/v1.md",body:si(),content_type:"text/markdown"},{key:"indexes/root.md",body:oi(),content_type:"text/markdown"},{key:"wiki/README.md",body:Mt(),content_type:"text/markdown"},{key:a,body:`${JSON.stringify(o)}
|
|
475
|
+
`,content_type:"application/x-ndjson"}],u=await Promise.all(c.map(async(_)=>{let f=await e.put(_);return{key:f.key,uri:f.uri,kind:Pt(_.key),content_type:_.content_type,metadata:{provenance:Ie({generated_from:"wiki_layout_init",artifact_key:_.key,citation_required:_.key.startsWith("wiki/")||_.key.startsWith("indexes/")})},...Dt(_.body)}}));return{schema_key:"schemas/v1.md",root_index_key:"indexes/root.md",wiki_readme_key:"wiki/README.md",log_key:a,artifacts:u,written:["schemas/v1.md","indexes/root.md","wiki/README.md",a]}}function We(e){let t=e.metadata?.provenance;if(t&&typeof t==="object"&&!Array.isArray(t))return t;return Ie({generated_from:"wiki_layout_init",artifact_key:e.key})}function ai(e,t,n,r,i,s){let d=We(r),l=Fe("chk",`${t}\x00${r.hash??r.uri}`),a=e.query("SELECT id FROM chunks WHERE wiki_page_id = ?").all(t);for(let o of a)e.run("DELETE FROM chunks_fts WHERE chunk_id = ?",[o.id]);e.run("DELETE FROM chunks WHERE wiki_page_id = ?",[t]),e.run(`INSERT INTO chunks (id, wiki_page_id, kind, ordinal, text, token_count, start_offset, end_offset, metadata_json, created_at)
|
|
476
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,[l,t,"wiki",0,i,ii(i),0,i.length,JSON.stringify({artifact_key:r.key,artifact_uri:r.uri,content_hash:r.hash??null,provenance:d}),s]),e.run("INSERT INTO chunks_fts (chunk_id, text, title, source_uri) VALUES (?, ?, ?, ?)",[l,i,n,r.uri])}function Ft(e,t,n=new Date){let r=n.toISOString(),i=t.find((d)=>d.key.endsWith("indexes/root.md")),s=t.find((d)=>d.key.endsWith("wiki/README.md"));if(i)e.run(`INSERT INTO knowledge_indexes (id, kind, name, artifact_uri, shard_key, metadata_json, created_at, updated_at)
|
|
439
477
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
440
478
|
ON CONFLICT(kind, name, shard_key) DO UPDATE SET
|
|
441
479
|
artifact_uri = excluded.artifact_uri,
|
|
442
480
|
metadata_json = excluded.metadata_json,
|
|
443
|
-
updated_at = excluded.updated_at`,[
|
|
481
|
+
updated_at = excluded.updated_at`,[Fe("idx","root:indexes/root.md"),"root","root",i.uri,"root",JSON.stringify({artifact_key:i.key,content_hash:i.hash??null,provenance:We(i)}),r,r]);if(s){let d=Fe("wiki","wiki/README.md");e.run(`INSERT INTO wiki_pages (id, path, title, artifact_uri, content_hash, status, metadata_json, created_at, updated_at)
|
|
444
482
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
445
483
|
ON CONFLICT(path) DO UPDATE SET
|
|
446
484
|
title = excluded.title,
|
|
@@ -448,7 +486,7 @@ Pages should be concise, cited, and organized for both humans and agents.
|
|
|
448
486
|
content_hash = excluded.content_hash,
|
|
449
487
|
status = excluded.status,
|
|
450
488
|
metadata_json = excluded.metadata_json,
|
|
451
|
-
updated_at = excluded.updated_at`,[
|
|
489
|
+
updated_at = excluded.updated_at`,[d,"wiki/README.md","Wiki",s.uri,s.hash??null,"active",JSON.stringify({artifact_key:s.key,provenance:We(s)}),r,r]),ai(e,d,"Wiki",s,Mt(),r)}}class Wt{options;ensuredWorkspace;cachedConfig;constructor(e={}){this.options=e}get scope(){return this.options.scope??"global"}get workspace(){return this.ensuredWorkspace??Be(this.options.scope,this.options.cwd)}ensureWorkspace(){if(!this.ensuredWorkspace)this.ensuredWorkspace=$e(this.workspace.home);return this.ensuredWorkspace}jsonStorePath(){return this.ensureWorkspace().jsonStorePath}config(){if(!this.cachedConfig){let e=this.ensureWorkspace();this.cachedConfig=He(e.configPath)}return this.cachedConfig}safetyPolicy(){return Et(this.config(),this.ensureWorkspace())}artifactStore(){return Ze(this.config(),this.ensureWorkspace())}storageContract(){return Ut(this.config(),this.ensureWorkspace(),this.scope)}validateStorage(){return Ke(this.config(),this.ensureWorkspace())}paths(){let e=this.ensureWorkspace();return{ok:!0,scope:this.scope,home:e.home,config_path:e.configPath,json_store_path:e.jsonStorePath,knowledge_db_path:e.knowledgeDbPath,artifacts_dir:e.artifactsDir,indexes_dir:e.indexesDir,logs_dir:e.logsDir,runs_dir:e.runsDir,schemas_dir:e.schemasDir,wiki_dir:e.wikiDir,config:this.config(),message:e.home}}initDb(){return w(this.ensureWorkspace().knowledgeDbPath)}dbStats(){let e=this.ensureWorkspace();return w(e.knowledgeDbPath),Je(e.knowledgeDbPath)}async initWiki(){let e=this.ensureWorkspace();w(e.knowledgeDbPath);let t=await Kt(this.artifactStore()),n=S(e.knowledgeDbPath);try{jt(n,t.artifacts),Ft(n,t.artifacts)}finally{n.close()}return t}async ingestManifest(e){let t=this.ensureWorkspace();return vt({dbPath:t.knowledgeDbPath,input:e,config:this.config(),safetyPolicy:this.safetyPolicy()})}async ingestSource(e,t){let n=this.ensureWorkspace();return wt({dbPath:n.knowledgeDbPath,sourceRef:e,purpose:t,config:this.config(),safetyPolicy:this.safetyPolicy()})}async resolveSource(e,t={}){let n=this.ensureWorkspace();return ge({dbPath:n.knowledgeDbPath,sourceRef:e,purpose:t.purpose,limit:t.limit,safetyPolicy:this.safetyPolicy()})}async consumeOutbox(e){let t=this.ensureWorkspace();return Tt({dbPath:t.knowledgeDbPath,input:e,config:this.config(),safetyPolicy:this.safetyPolicy()})}providerStatus(e=process.env){return it(this.config(),e)}modelRegistry(){return Ae(this.config())}embeddingStatus(){let e=this.ensureWorkspace();return _t(e.knowledgeDbPath)}async indexEmbeddings(e={}){let t=this.ensureWorkspace();return lt({...e,dbPath:t.knowledgeDbPath,config:this.config()})}async semanticSearch(e){let t=this.ensureWorkspace();return ce({...e,dbPath:t.knowledgeDbPath,config:this.config()})}async search(e){let t=this.ensureWorkspace();return Lt({...e,dbPath:t.knowledgeDbPath,config:this.config()})}}function Xt(e={}){return new Wt(e)}var ee={name:"@hasna/knowledge",version:"0.2.15",description:"Agent-friendly local knowledge CLI with JSON output, pagination, and safe destructive actions",type:"module",bin:{"open-knowledge":"bin/open-knowledge.js","open-knowledge-mcp":"bin/open-knowledge-mcp.js"},files:["bin","src","docs","LICENSE","README.md"],scripts:{test:"bun test","test:cli":"bun test tests/cli.test.ts",build:"bun build --target=bun --outfile=bin/open-knowledge.js --minify --external @aws-sdk/client-s3 --external @aws-sdk/credential-providers --external ai --external @ai-sdk/openai --external @ai-sdk/anthropic --external @ai-sdk/deepseek src/cli.ts && bun build --target=bun --outfile=bin/open-knowledge-mcp.js --external @modelcontextprotocol/sdk --external @aws-sdk/client-s3 --external @aws-sdk/credential-providers --external ai --external @ai-sdk/openai --external @ai-sdk/anthropic --external @ai-sdk/deepseek src/mcp.js",prepublishOnly:"bun run build",postinstall:"bun run build"},keywords:["knowledge","cli","agents","json","notes","local","store"],license:"Apache-2.0",publishConfig:{registry:"https://registry.npmjs.org",access:"public"},repository:{type:"git",url:"git+https://github.com/hasna/knowledge.git"},bugs:{url:"https://github.com/hasna/knowledge/issues"},author:"Hasna Inc. <hasna@example.com>",engines:{bun:">=1.0",node:">=18"},dependencies:{"@aws-sdk/client-s3":"^3.1063.0","@aws-sdk/credential-providers":"^3.1063.0","@ai-sdk/anthropic":"^3.0.81","@ai-sdk/deepseek":"^2.0.35","@ai-sdk/openai":"^3.0.68","@modelcontextprotocol/sdk":"^1.29.0",ai:"^6.0.197",zod:"^4.3.6"},devDependencies:{"@types/bun":"^1.3.14"}};var $t={debug:0,info:1,warn:2,error:3},ui=()=>{if(process.env.DEBUG)return"debug";if(process.env.LOG_LEVEL==="debug")return"debug";if(process.env.LOG_LEVEL==="warn")return"warn";if(process.env.LOG_LEVEL==="error")return"error";return"info"};function B(e,t,n){if($t[e]<$t[ui()])return;let r={debug:"[DEBUG]",info:"[INFO]",warn:"[WARN]",error:"[ERROR]"}[e],i=n?`${r} ${t} ${JSON.stringify(n)}`:`${r} ${t}`;if(e==="error")console.error(i);else console.error(i)}var di=["add","list","get","delete","update","archive","restore","upsert","untag","export","prune","dedupe","stats","paths","storage","db","wiki","source","ingest","reindex","search","embeddings","providers","safety","help"],Bt={ls:"list",rm:"delete",edit:"update",unarchive:"restore"};function li(e){let t=[],n={};for(let r=0;r<e.length;r+=1){let i=e[r];if(!i.startsWith("-")){t.push(i);continue}switch(i){case"--json":n.json=!0;break;case"--yes":case"-y":n.yes=!0;break;case"--help":case"-h":n.help=!0;break;case"--version":case"-v":n.version=!0;break;case"--desc":n.desc=!0;break;case"--page":case"-p":n.page=Number(e[r+1]),r+=1;break;case"--limit":case"-l":n.limit=Number(e[r+1]),r+=1;break;case"--search":case"-s":n.search=e[r+1],r+=1;break;case"--sort":n.sort=e[r+1],r+=1;break;case"--id":n.id=e[r+1],r+=1;break;case"--store":n.store=e[r+1],r+=1;break;case"--title":n.title=e[r+1],r+=1;break;case"--content":n.content=e[r+1],r+=1;break;case"--url":n.url=e[r+1],r+=1;break;case"--tag":case"-t":n.tag=e[r+1],r+=1;break;case"--format":n.format=e[r+1],r+=1;break;case"--completions":n.completions=e[r+1],r+=1;break;case"--purpose":n.purpose=e[r+1],r+=1;break;case"--model":n.model=e[r+1],r+=1;break;case"--dimensions":n.dimensions=Number(e[r+1]),r+=1;break;case"--semantic":n.semantic=!0;break;case"--fake":n.fake=!0;break;case"--no-color":n.noColor=!0;break;case"--scope":n.scope=e[r+1],r+=1;break;case"--older-than":n.olderThan=Number(e[r+1]),r+=1;break;case"--empty":n.empty=!0;break;case"--archived":n.archived=!0;break;case"--include-archived":n.includeArchived=!0;break;default:throw Error(`Unknown flag: ${i}. Run 'open-knowledge --help' for valid options.`)}}return{positional:t,flags:n}}function _i(e){if(!e)return"";return Bt[e]??e}function fi(e,t){let n=Array.from({length:e.length+1},()=>Array(t.length+1).fill(0));for(let r=0;r<=e.length;r+=1)n[r][0]=r;for(let r=0;r<=t.length;r+=1)n[0][r]=r;for(let r=1;r<=e.length;r+=1)for(let i=1;i<=t.length;i+=1){let s=e[r-1]===t[i-1]?0:1;n[r][i]=Math.min(n[r-1][i]+1,n[r][i-1]+1,n[r-1][i-1]+s)}return n[e.length][t.length]}function gi(e){if(!e)return"";let t=[...di,...Object.keys(Bt)],n="",r=Number.POSITIVE_INFINITY;for(let i of t){let s=fi(e,i);if(s<r)r=s,n=i}return r<=3?n:""}function pi(){console.log(`open-knowledge - local agent knowledge store
|
|
452
490
|
|
|
453
491
|
Usage:
|
|
454
492
|
open-knowledge <command> [options]
|
|
@@ -475,6 +513,7 @@ Commands:
|
|
|
475
513
|
ingest manifest <file|s3://> Ingest an open-files manifest into knowledge.db
|
|
476
514
|
ingest source <source-ref> Ingest a read-only source ref into knowledge.db
|
|
477
515
|
reindex outbox <file|s3://> Consume open-files change events and invalidate chunks
|
|
516
|
+
search <query> Hybrid search sources, wiki pages, and indexes
|
|
478
517
|
embeddings status|index|search Build/query local vector embeddings
|
|
479
518
|
providers status|models|check Inspect AI SDK provider config and credentials
|
|
480
519
|
safety status|check|approve|audit|redact
|
|
@@ -486,6 +525,7 @@ Global Options:
|
|
|
486
525
|
--purpose <name> Read-only source purpose (default: knowledge_answer)
|
|
487
526
|
--model <provider:model> AI/embedding model ref
|
|
488
527
|
--dimensions <n> Embedding dimensions for local/fake providers
|
|
528
|
+
--semantic Include vector semantic results in search
|
|
489
529
|
--fake Use deterministic fake embeddings for local tests
|
|
490
530
|
--scope local|global|project Store scope (default: global ~/.hasna/apps/knowledge/)
|
|
491
531
|
--no-color Disable color output
|
|
@@ -523,5 +563,5 @@ Export Options:
|
|
|
523
563
|
|
|
524
564
|
Prune Options:
|
|
525
565
|
--older-than <days> Remove items older than N days
|
|
526
|
-
--empty Remove items with empty content`)}function
|
|
527
|
-
_open_knowledge() { _arguments -C "1: :(add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex embeddings providers safety help ls rm edit unarchive)" "(--json)--json" "(--yes)-y" "(--help)--help" "(--version)--version" "(--desc)--desc" "(--archived)--archived" "(--include-archived)--include-archived" "(--fake)--fake" "(-p --page)"{-p,--page}"[page number]:number:" "(-l --limit)"{-l,--limit}"[items per page]:number:" "(-s --search)"{-s,--search}"[search text]:text:" "(--sort)--sort"{created,title}:" "(--id)--id[item id]:id:" "(--store)--store[store path]:path:" "(--title)--title[new title]:" "(--content)--content[new content]:" "(--url)--url[source url]:" "(-t --tag)"{-t,--tag}"[tag]:tag:" "(--format)--format[json|jsonl]:" "(--completions)--completions[output completions]:shell:(bash zsh fish):" "(--purpose)--purpose[purpose]:" "(--model)--model[model ref]:" "(--dimensions)--dimensions[embedding dimensions]:number:" "(--no-color)--no-color[disable color]" "(--scope)--scope"{local,global,project}:" }; _open_knowledge`);else if(a==="fish")console.log('complete -c open-knowledge -f; complete -c open-knowledge -a "add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex embeddings providers safety help ls rm edit unarchive"; complete -c open-knowledge -l json; complete -c open-knowledge -l yes -s y; complete -c open-knowledge -l help -s h; complete -c open-knowledge -l version -s v; complete -c open-knowledge -l desc; complete -c open-knowledge -l archived; complete -c open-knowledge -l include-archived; complete -c open-knowledge -l fake; complete -c open-knowledge -s p -l page; complete -c open-knowledge -s l -l limit; complete -c open-knowledge -s s -l search; complete -c open-knowledge -l sort; complete -c open-knowledge -l id; complete -c open-knowledge -l store; complete -c open-knowledge -l title; complete -c open-knowledge -l content; complete -c open-knowledge -l url; complete -c open-knowledge -s t -l tag; complete -c open-knowledge -l format; complete -c open-knowledge -l completions; complete -c open-knowledge -l purpose; complete -c open-knowledge -l model; complete -c open-knowledge -l dimensions; complete -c open-knowledge -l no-color; complete -c open-knowledge -l scope -a "local global project"');else throw Error("Invalid --completions value. Use 'bash', 'zsh', or 'fish'.");return}let n=Fn(t[0]);if(!n||r.help||n==="help"){$n(t[1]);return}let i=Nt({scope:r.scope}),o=r.store;if(!o)if(r.scope==="project"||r.scope==="local")o=i.jsonStorePath();else o=he();if(n==="paths"){m(i.paths(),r.json);return}if(n==="storage"){let a=t[1]??"status";if(a==="status"){let s=i.storageContract(),d=i.validateStorage();m({ok:d.ok,...s,validation:d,message:`${s.storage_type} artifact storage at ${s.artifact_store.uri_prefix}`},r.json);return}if(a==="validate"){let s=i.validateStorage();m({ok:s.ok,validation:s,message:s.ok?"Storage contract valid":`Storage contract invalid: ${s.errors.join("; ")}`},r.json);return}throw Error("Invalid storage action. Use 'status' or 'validate'.")}if(n==="db"){let a=t[1]??"init";if(a!=="init"&&a!=="stats")throw Error("Invalid db action. Use 'init' or 'stats'.");if(a==="init"){let d=i.initDb();m({ok:!0,...d,message:`Initialized ${d.path}`},r.json);return}let s=i.dbStats();m({ok:!0,path:i.workspace.knowledgeDbPath,...s,message:`knowledge.db schema v${s.schema_version}`},r.json);return}if(n==="wiki"){if((t[1]??"init")!=="init")throw Error("Invalid wiki action. Use 'init'.");let s=await i.initWiki();m({ok:!0,...s,message:`Initialized wiki layout in ${i.workspace.home}`},r.json);return}if(n==="safety"){let a=t[1]??"status",s=i.ensureWorkspace(),d=i.safetyPolicy();i.initDb();let c=x(s.knowledgeDbPath);try{if(a==="status"){m({ok:!0,mode:d.mode,workspace:s.home,allow_write_roots:d.allowWriteRoots,read_only_source_access:d.readOnlySourceAccess,network:d.network,redaction:d.redaction,approvals:d.approvals,message:`Safety policy: ${d.mode}`},r.json);return}if(a==="check"){let l=t[2]??"generated_write",f=t[3]??null,h;try{if(l==="web_search")oe(d),h={action:l,target_uri:f,approval_required:!1,approved:!0,decision:"allow"};else if(l==="s3_read"){if(!f)throw Error("safety check s3_read requires an s3:// target.");K(f,d),h={action:l,target_uri:f,approval_required:!1,approved:!0,decision:"allow"}}else h=_t(c,d,l,f);w(c,{event_type:"safety_check",action:l,target_uri:f,decision:h.decision==="allow"?"allow":"requires_approval",metadata:h}),m({ok:!0,...h,message:`Safety check ${h.decision}`},r.json);return}catch(y){throw w(c,{event_type:"safety_check",action:l,target_uri:f,decision:"deny",metadata:{error:y instanceof Error?y.message:String(y)}}),y}}if(a==="approve"){let l=t[2]??"generated_write",f=t[3]??null,h=lt(c,{action:l,target_uri:f,reason:"local-cli approval",metadata:{scope:r.scope??"global"}});w(c,{event_type:"approval",action:l,target_uri:f,decision:"allow",metadata:{approval_id:h.id}}),m({ok:!0,...h,action:l,target_uri:f,message:`Approved ${l}`},r.json);return}if(a==="audit"){let l=c.query("SELECT id, event_type, action, target_uri, decision, metadata_json, created_at FROM audit_events ORDER BY created_at DESC LIMIT 50").all().map((f)=>({id:f.id,event_type:f.event_type,action:f.action,target_uri:f.target_uri,decision:f.decision,metadata:JSON.parse(f.metadata_json),created_at:f.created_at}));m({ok:!0,events:l,message:`${l.length} audit event(s)`},r.json);return}if(a==="redact"){let l=t.slice(2).join(" ");if(!l)throw Error("Usage: open-knowledge safety redact <text>");let f=ae(l,d);if(f.findings.length>0)de(c,{source_uri:"safety://redact",findings:f.findings,metadata:{command:"safety redact"}});w(c,{event_type:"redaction",action:"safety_redact",target_uri:"safety://redact",decision:f.findings.length>0?"redacted":"allow",metadata:{findings:f.findings.length}}),m({ok:!0,text:f.text,findings:f.findings,message:`Redacted ${f.findings.length} finding(s)`},r.json);return}throw Error("Invalid safety action. Use 'status', 'check', 'approve', 'audit', or 'redact'.")}finally{c.close()}}if(n==="source"){if((t[1]??"")!=="resolve")throw Error("Invalid source action. Use 'resolve'.");let s=t[2];if(!s)throw Error("Usage: open-knowledge source resolve <source-ref>");let d=await i.resolveSource(s,{purpose:r.purpose,limit:r.limit});m({ok:!0,...d,message:d.resolved?`Resolved ${d.source_ref} (${d.content.chunks_returned}/${d.content.chunks_total} chunks)`:`Source not indexed: ${s}`},r.json);return}if(n==="ingest"){let a=t[1]??"";if(a==="manifest"){let s=t[2];if(!s)throw Error("Usage: open-knowledge ingest manifest <file|s3://bucket/key>");let d=await i.ingestManifest(s);m({ok:!0,...d,message:`Ingested ${d.items_seen} manifest item(s)`},r.json);return}if(a==="source"){let s=t[2];if(!s)throw Error("Usage: open-knowledge ingest source <source-ref>");let d=await i.ingestSource(s,r.purpose);m({ok:!0,...d,message:`Ingested source ${d.source_ref} (${d.chunks_inserted} chunks)`},r.json);return}throw Error("Invalid ingest action. Use 'manifest' or 'source'.")}if(n==="reindex"){if((t[1]??"")!=="outbox")throw Error("Invalid reindex action. Use 'outbox'.");let s=t[2];if(!s)throw Error("Usage: open-knowledge reindex outbox <file|s3://bucket/key>");let d=await i.consumeOutbox(s);m({ok:!0,...d,message:`Consumed ${d.events_seen} outbox event(s)`},r.json);return}if(n==="embeddings"){let a=t[1]??"status";if(a==="status"){let s=i.embeddingStatus();m({ok:!0,...s,message:`${s.total_vector_entries} vector index entries`},r.json);return}if(a==="index"){let s=await i.indexEmbeddings({limit:r.limit,modelRef:r.model,dimensions:r.dimensions,fake:r.fake});m({ok:!0,...s,message:`Embedded ${s.chunks_embedded} chunk(s)`},r.json);return}if(a==="search"){let s=t.slice(2).join(" ");if(!s)throw Error("Usage: open-knowledge embeddings search <query>");let d=await i.semanticSearch({query:s,limit:r.limit,modelRef:r.model,dimensions:r.dimensions,fake:r.fake});m({ok:!0,...d,message:`${d.results.length} semantic result(s)`},r.json);return}throw Error("Invalid embeddings action. Use 'status', 'index', or 'search'.")}if(n==="providers"){let a=t[1]??"status";if(a==="status"){let s=i.providerStatus(),d=s.providers.filter((c)=>c.configured).length;m({ok:!0,...s,message:`${d}/${s.providers.length} provider credential(s) configured`},r.json);return}if(a==="models"){let s=i.modelRegistry();m({ok:!0,models:s,message:`${s.length} model alias(es)`},r.json);return}if(a==="check"){let s=t[2]??"default",d=ve(s,i.config()),c=F(d),l=ie(c.provider,i.config());m({ok:!0,target:s,model_ref:d,provider:c.provider,model:c.model,credential:l,message:`${c.provider} credentials configured`},r.json);return}throw Error("Invalid providers action. Use 'status', 'models', or 'check'.")}if(Ee(o),n==="add"){let a=t[1],s=t[2];if(!a||!s)throw Error("Usage: open-knowledge add <title> <content>");C(o,()=>{let d=L(o),c={id:ye(),title:a,content:s,url:r.url??null,tags:r.tag?[r.tag]:[],created_at:new Date().toISOString(),updated_at:new Date().toISOString()};d.items.push(c),U(o,d),W("info","Item added",{id:c.id,title:c.title}),m({ok:!0,item:c,message:`Added ${c.id}`},r.json)});return}if(n==="list"){if(r.format!==void 0&&r.format!=="table"&&r.format!=="json")throw Error("Invalid --format value for list. Use 'table' or 'json'.");C(o,()=>{let a=L(o),s=Number.isFinite(r.page)&&r.page>0?r.page:1,d=Number.isFinite(r.limit)&&r.limit>0?r.limit:20,c=r.search?String(r.search).toLowerCase():"",l=r.tag?String(r.tag).toLowerCase():"",f=r.format==="table"||!r.json&&!r.format&&Bn(r),h=r.json||r.format==="json",y=a.items;if(r.archived)y=y.filter((p)=>p.archived===!0);else if(!r.includeArchived)y=y.filter((p)=>!p.archived);if(c)y=y.filter((p)=>p.title.toLowerCase().includes(c)||p.content.toLowerCase().includes(c));if(l)y=y.filter((p)=>p.tags&&p.tags.map((G)=>G.toLowerCase()).includes(l));let{sorted:v,sort:T,direction:b}=qn(y,r),R=(s-1)*d,g=v.slice(R,R+d),A=Math.max(1,Math.ceil(v.length/d));if(h){m({ok:!0,page:s,limit:d,total:v.length,total_pages:A,sort:T,direction:b,items:g},!0);return}if(g.length===0){m(`No items found (search=${c||"none"}, tag=${l||"none"})`,!1);return}if(f){let p=(j)=>j,G=`${p("ID")} ${p("TITLE")} ${p("CREATED")} ${p("URL")} ${p("TAGS")}`;console.log(G);for(let j of g)console.log(`${j.id} ${p(j.title)} ${j.created_at} ${j.url?p(j.url):""} ${j.tags?.length?p(`[${j.tags.join(", ")}]`):""}`);console.log(`Page ${s}/${A} | showing ${g.length} of ${v.length} | sort=${T} ${b} | search=${c||"none"} | tag=${l||"none"}`)}else{for(let p of g)console.log(`${p.id} ${p.title} ${p.created_at}${p.url?` ${p.url}`:""}${p.tags?.length?` [${p.tags.join(", ")}]`:""}`);console.log(`Page ${s}/${A} | showing ${g.length} of ${v.length} | sort=${T} ${b} | search=${c||"none"} | tag=${l||"none"}`)}});return}if(n==="get"){Z(r),C(o,()=>{let s=L(o).items.find((d)=>d.id===r.id||d.short_id===r.id);if(!s)throw Error(`Item not found: ${r.id}`);m({ok:!0,item:s,message:`${s.id}: ${s.title}`},r.json)});return}if(n==="update"){Z(r),C(o,()=>{let a=L(o),s=a.items.findIndex((c)=>c.id===r.id||c.short_id===r.id);if(s===-1)throw Error(`Item not found: ${r.id}`);let d=a.items[s];if(r.title!==void 0)d.title=r.title;if(r.content!==void 0)d.content=r.content;if(r.url!==void 0)d.url=r.url;if(r.tag!==void 0){if(d.tags=d.tags||[],!d.tags.map((c)=>c.toLowerCase()).includes(r.tag.toLowerCase()))d.tags.push(r.tag)}d.updated_at=new Date().toISOString(),a.items[s]=d,U(o,a),m({ok:!0,item:d,message:`Updated ${d.id}`},r.json)});return}if(n==="archive"||n==="restore"){Z(r),C(o,()=>{let a=L(o),s=a.items.findIndex((c)=>c.id===r.id||c.short_id===r.id);if(s===-1)throw Error(`Item not found: ${r.id}`);let d=a.items[s];d.archived=n==="archive",d.updated_at=new Date().toISOString(),a.items[s]=d,U(o,a),m({ok:!0,item:d,message:`${n==="archive"?"Archived":"Restored"} ${d.id}`},r.json)});return}if(n==="untag"){if(Z(r),!r.tag)throw Error("Missing required --tag. Example: open-knowledge untag --id <id> -t <tag>");C(o,()=>{let a=L(o),s=a.items.findIndex((l)=>l.id===r.id||l.short_id===r.id);if(s===-1)throw Error(`Item not found: ${r.id}`);let d=a.items[s],c=d.tags?.length??0;d.tags=(d.tags??[]).filter((l)=>l.toLowerCase()!==r.tag.toLowerCase()),d.updated_at=new Date().toISOString(),a.items[s]=d,U(o,a),m({ok:!0,item:d,removed:c-d.tags.length,message:`Removed tag from ${d.id}`},r.json)});return}if(n==="upsert"){let a=r.title??t[1],s=r.content??t[2];C(o,()=>{let d=L(o),c=r.id?d.items.findIndex((h)=>h.id===r.id||h.short_id===r.id):-1,l=new Date().toISOString();if(c===-1){if(!a||!s)throw Error("New item requires title and content. Example: open-knowledge upsert <title> <content> [--id <id>]");let h=r.id??ye(),y={id:h,short_id:Fe(h),title:a,content:s,url:r.url??null,tags:r.tag?[r.tag]:[],metadata:{},archived:!1,created_at:l,updated_at:l};d.items.push(y),U(o,d),m({ok:!0,created:!0,item:y,message:`Upserted ${y.id}`},r.json);return}let f=d.items[c];if(a!==void 0)f.title=a;if(s!==void 0)f.content=s;if(r.url!==void 0)f.url=r.url;if(r.tag!==void 0){if(f.tags=f.tags||[],!f.tags.map((h)=>h.toLowerCase()).includes(r.tag.toLowerCase()))f.tags.push(r.tag)}f.updated_at=l,d.items[c]=f,U(o,d),m({ok:!0,created:!1,item:f,message:`Upserted ${f.id}`},r.json)});return}if(n==="delete"){if(Z(r),!r.yes)throw Error("Refusing delete without --yes. Re-run with: open-knowledge delete --id <id> --yes");C(o,()=>{let a=L(o),s=a.items.length;a.items=a.items.filter((c)=>c.id!==r.id&&c.short_id!==r.id);let d=s!==a.items.length;if(U(o,a),!d)throw Error(`Item not found: ${r.id}`);W("info","Item deleted",{id:r.id}),m({ok:!0,deleted_id:r.id,message:`Deleted ${r.id}`},r.json)});return}if(n==="export"){let a=r.format??"json";if(a!=="json"&&a!=="jsonl")throw Error("Invalid --format. Use 'json' or 'jsonl'.");C(o,()=>{let s=L(o);if(a==="jsonl")for(let d of s.items)console.log(JSON.stringify(d));else m({ok:!0,items:s.items},r.json)});return}if(n==="prune"){if(!r.yes)throw Error("Refusing prune without --yes. Re-run with: open-knowledge prune --yes [--older-than <days>] [--empty]");C(o,()=>{let a=L(o),s=a.items.length;if(r.olderThan!==void 0){let c=new Date;c.setDate(c.getDate()-r.olderThan),a.items=a.items.filter((l)=>new Date(l.created_at)>=c)}if(r.empty)a.items=a.items.filter((c)=>c.content.trim().length>0);let d=s-a.items.length;U(o,a),W("info","Prune completed",{pruned:d,remaining:a.items.length}),m({ok:!0,pruned:d,remaining:a.items.length,message:`Pruned ${d} item(s)`},r.json)});return}if(n==="dedupe"){if(!r.yes)throw Error("Refusing dedupe without --yes. Re-run with: open-knowledge dedupe --yes [--json]");C(o,()=>{let a=L(o),s=new Set,d=a.items.length;a.items=a.items.filter((l)=>{let f=`${l.title}\x00${l.content}`;if(s.has(f))return!1;return s.add(f),!0});let c=d-a.items.length;U(o,a),W("info","Dedupe completed",{removed:c,remaining:a.items.length}),m({ok:!0,removed:c,remaining:a.items.length,message:`Dedupe removed ${c} duplicate(s)`},r.json)});return}if(n==="stats"){C(o,()=>{let a=L(o),s=a.items.filter((b)=>!b.archived),d=s.length,c=a.items.length-d,l=s.filter((b)=>b.url).length,f=s.filter((b)=>b.tags&&b.tags.length>0).length,h=d>0?s.map((b)=>b.created_at).sort()[0]:null,y=d>0?s.map((b)=>b.created_at).sort()[d-1]:null,v={};for(let b of s)for(let R of b.tags||[])v[R]=(v[R]||0)+1;let T=Object.entries(v).sort((b,R)=>R[1]-b[1]).slice(0,5).map(([b,R])=>({tag:b,count:R}));m({ok:!0,total:d,archived:c,with_url:l,with_tags:f,oldest:h,newest:y,top_tags:T,message:`${d} items | ${l} with URL | ${f} with tags`},r.json)});return}let u=Xn(t[0]),_=u?` Did you mean '${u}'?`:"";throw W("warn","Unknown command",{input:t[0],suggestion:u}),Error(`Unknown command: ${t[0]}.${_} Run 'open-knowledge --help' for available commands.`)}if(import.meta.main)zn(process.argv.slice(2)).catch((e)=>{let t=e instanceof Error?e.message:String(e);W("error","CLI error",{message:t,stack:e instanceof Error?e.stack:void 0}),console.error(`Error: ${t}`),process.exitCode=1});export{Xn as suggestCommand,qn as sortItems,zn as run,Kn as parseArgs};
|
|
566
|
+
--empty Remove items with empty content`)}function hi(e){if(e==="add"){console.log("Usage: open-knowledge add <title> <content> [--url <url>] [-t <tag>] [--json]");return}if(e==="list"||e==="ls"){console.log("Usage: open-knowledge list|ls [--format table|json] [-p <page>] [-l <limit>] [-s <search>] [-t <tag>] [--sort created|title] [--desc] [--json]");return}if(e==="get"){console.log("Usage: open-knowledge get --id <id> [--json]");return}if(e==="update"||e==="edit"){console.log("Usage: open-knowledge update|edit --id <id> [--title <title>] [--content <content>] [--url <url>] [-t <tag>] [--json]");return}if(e==="archive"){console.log("Usage: open-knowledge archive --id <id> [--json]");return}if(e==="restore"||e==="unarchive"){console.log("Usage: open-knowledge restore|unarchive --id <id> [--json]");return}if(e==="upsert"){console.log("Usage: open-knowledge upsert [title] [content] [--id <id>] [--title <title>] [--content <content>] [--url <url>] [-t <tag>] [--json]");return}if(e==="untag"){console.log("Usage: open-knowledge untag --id <id> -t <tag> [--json]");return}if(e==="delete"||e==="rm"){console.log("Usage: open-knowledge delete|rm --id <id> -y [--json]");return}if(e==="export"){console.log("Usage: open-knowledge export [--format jsonl] [--json]");return}if(e==="prune"){console.log("Usage: open-knowledge prune --yes [--older-than <days>] [--empty] [--json]");return}if(e==="dedupe"){console.log("Usage: open-knowledge dedupe --yes [--json]");return}if(e==="stats"){console.log("Usage: open-knowledge stats [--json]");return}if(e==="paths"){console.log("Usage: open-knowledge paths [--scope local|global|project] [--json]");return}if(e==="storage"){console.log("Usage: open-knowledge storage status|validate [--scope local|global|project] [--json]");return}if(e==="db"){console.log("Usage: open-knowledge db init|stats [--scope local|global|project] [--json]");return}if(e==="wiki"){console.log("Usage: open-knowledge wiki init [--scope local|global|project] [--json]");return}if(e==="source"){console.log("Usage: open-knowledge source resolve <source-ref> [--purpose knowledge_answer|knowledge_index] [--limit <n>] [--scope local|global|project] [--json]");return}if(e==="ingest"){console.log("Usage: open-knowledge ingest manifest <file|s3://bucket/key> | source <source-ref> [--purpose knowledge_index] [--scope local|global|project] [--json]");return}if(e==="reindex"){console.log("Usage: open-knowledge reindex outbox <file|s3://bucket/key> [--scope local|global|project] [--json]");return}if(e==="search"){console.log("Usage: open-knowledge search <query> [--semantic] [--model openai:text-embedding-3-small] [--limit <n>] [--dimensions <n>] [--fake] [--scope local|global|project] [--json]");return}if(e==="embeddings"){console.log("Usage: open-knowledge embeddings status|index|search [query] [--model openai:text-embedding-3-small] [--limit <n>] [--dimensions <n>] [--fake] [--scope local|global|project] [--json]");return}if(e==="providers"){console.log("Usage: open-knowledge providers status|models|check [provider|model-alias] [--scope local|global|project] [--json]");return}if(e==="safety"){console.log("Usage: open-knowledge safety status|check|approve|audit|redact [args] [--scope local|global|project] [--json]");return}pi()}function mi(e){if(e.noColor||process.env.NO_COLOR)return!1;if(process.env.FORCE_COLOR)return!0;return process.stdout.isTTY===!0}function E(e,t,n){if(t){console.log(JSON.stringify(e,null,2));return}if(typeof e==="string"){console.log(e);return}console.log(e.message??JSON.stringify(e,null,2))}function te(e){if(!e.id)throw Error("Missing required --id. Example: open-knowledge get --id <id>")}function Ei(e,t){let n=t.sort??"created";if(n!=="created"&&n!=="title")throw Error("Invalid --sort value. Use 'created' or 'title'.");let r=[...e].sort((i,s)=>{if(n==="title")return i.title.localeCompare(s.title);return i.created_at.localeCompare(s.created_at)});if(t.desc)r.reverse();return{sorted:r,sort:n,direction:t.desc?"desc":"asc"}}async function ki(e){let{positional:t,flags:n}=li(e);if(B("debug","CLI invoked",{command:t[0],flags:{json:n.json,store:n.store}}),n.version){console.log(n.json?JSON.stringify({name:ee.name,version:ee.version},null,2):`${ee.name} ${ee.version}`);return}if(n.completions){let a=n.completions;if(a==="bash")console.log('_open_knowledge() { local cur; cur="${COMP_WORDS[COMP_CWORD]}"; COMPREPLY=($(compgen -W "add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex search embeddings providers safety help ls rm edit unarchive --json --yes --help --version --desc --page --limit --search --sort --id --store --title --content --url --tag --format --completions --purpose --model --dimensions --semantic --fake --no-color --scope --archived --include-archived" -- "$cur")); }; complete -F _open_knowledge open-knowledge');else if(a==="zsh")console.log(`#compdef open-knowledge
|
|
567
|
+
_open_knowledge() { _arguments -C "1: :(add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex search embeddings providers safety help ls rm edit unarchive)" "(--json)--json" "(--yes)-y" "(--help)--help" "(--version)--version" "(--desc)--desc" "(--archived)--archived" "(--include-archived)--include-archived" "(--semantic)--semantic" "(--fake)--fake" "(-p --page)"{-p,--page}"[page number]:number:" "(-l --limit)"{-l,--limit}"[items per page]:number:" "(-s --search)"{-s,--search}"[search text]:text:" "(--sort)--sort"{created,title}:" "(--id)--id[item id]:id:" "(--store)--store[store path]:path:" "(--title)--title[new title]:" "(--content)--content[new content]:" "(--url)--url[source url]:" "(-t --tag)"{-t,--tag}"[tag]:tag:" "(--format)--format[json|jsonl]:" "(--completions)--completions[output completions]:shell:(bash zsh fish):" "(--purpose)--purpose[purpose]:" "(--model)--model[model ref]:" "(--dimensions)--dimensions[embedding dimensions]:number:" "(--no-color)--no-color[disable color]" "(--scope)--scope"{local,global,project}:" }; _open_knowledge`);else if(a==="fish")console.log('complete -c open-knowledge -f; complete -c open-knowledge -a "add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex search embeddings providers safety help ls rm edit unarchive"; complete -c open-knowledge -l json; complete -c open-knowledge -l yes -s y; complete -c open-knowledge -l help -s h; complete -c open-knowledge -l version -s v; complete -c open-knowledge -l desc; complete -c open-knowledge -l archived; complete -c open-knowledge -l include-archived; complete -c open-knowledge -l semantic; complete -c open-knowledge -l fake; complete -c open-knowledge -s p -l page; complete -c open-knowledge -s l -l limit; complete -c open-knowledge -s s -l search; complete -c open-knowledge -l sort; complete -c open-knowledge -l id; complete -c open-knowledge -l store; complete -c open-knowledge -l title; complete -c open-knowledge -l content; complete -c open-knowledge -l url; complete -c open-knowledge -s t -l tag; complete -c open-knowledge -l format; complete -c open-knowledge -l completions; complete -c open-knowledge -l purpose; complete -c open-knowledge -l model; complete -c open-knowledge -l dimensions; complete -c open-knowledge -l no-color; complete -c open-knowledge -l scope -a "local global project"');else throw Error("Invalid --completions value. Use 'bash', 'zsh', or 'fish'.");return}let r=_i(t[0]);if(!r||n.help||r==="help"){hi(t[1]);return}let i=Xt({scope:n.scope}),s=n.store;if(!s)if(n.scope==="project"||n.scope==="local")s=i.jsonStorePath();else s=ve();if(r==="paths"){E(i.paths(),n.json);return}if(r==="storage"){let a=t[1]??"status";if(a==="status"){let o=i.storageContract(),c=i.validateStorage();E({ok:c.ok,...o,validation:c,message:`${o.storage_type} artifact storage at ${o.artifact_store.uri_prefix}`},n.json);return}if(a==="validate"){let o=i.validateStorage();E({ok:o.ok,validation:o,message:o.ok?"Storage contract valid":`Storage contract invalid: ${o.errors.join("; ")}`},n.json);return}throw Error("Invalid storage action. Use 'status' or 'validate'.")}if(r==="db"){let a=t[1]??"init";if(a!=="init"&&a!=="stats")throw Error("Invalid db action. Use 'init' or 'stats'.");if(a==="init"){let c=i.initDb();E({ok:!0,...c,message:`Initialized ${c.path}`},n.json);return}let o=i.dbStats();E({ok:!0,path:i.workspace.knowledgeDbPath,...o,message:`knowledge.db schema v${o.schema_version}`},n.json);return}if(r==="wiki"){if((t[1]??"init")!=="init")throw Error("Invalid wiki action. Use 'init'.");let o=await i.initWiki();E({ok:!0,...o,message:`Initialized wiki layout in ${i.workspace.home}`},n.json);return}if(r==="safety"){let a=t[1]??"status",o=i.ensureWorkspace(),c=i.safetyPolicy();i.initDb();let u=S(o.knowledgeDbPath);try{if(a==="status"){E({ok:!0,mode:c.mode,workspace:o.home,allow_write_roots:c.allowWriteRoots,read_only_source_access:c.readOnlySourceAccess,network:c.network,redaction:c.redaction,approvals:c.approvals,message:`Safety policy: ${c.mode}`},n.json);return}if(a==="check"){let _=t[2]??"generated_write",f=t[3]??null,m;try{if(_==="web_search")ue(c),m={action:_,target_uri:f,approval_required:!1,approved:!0,decision:"allow"};else if(_==="s3_read"){if(!f)throw Error("safety check s3_read requires an s3:// target.");F(f,c),m={action:_,target_uri:f,approval_required:!1,approved:!0,decision:"allow"}}else m=yt(u,c,_,f);R(u,{event_type:"safety_check",action:_,target_uri:f,decision:m.decision==="allow"?"allow":"requires_approval",metadata:m}),E({ok:!0,...m,message:`Safety check ${m.decision}`},n.json);return}catch(y){throw R(u,{event_type:"safety_check",action:_,target_uri:f,decision:"deny",metadata:{error:y instanceof Error?y.message:String(y)}}),y}}if(a==="approve"){let _=t[2]??"generated_write",f=t[3]??null,m=kt(u,{action:_,target_uri:f,reason:"local-cli approval",metadata:{scope:n.scope??"global"}});R(u,{event_type:"approval",action:_,target_uri:f,decision:"allow",metadata:{approval_id:m.id}}),E({ok:!0,...m,action:_,target_uri:f,message:`Approved ${_}`},n.json);return}if(a==="audit"){let _=u.query("SELECT id, event_type, action, target_uri, decision, metadata_json, created_at FROM audit_events ORDER BY created_at DESC LIMIT 50").all().map((f)=>({id:f.id,event_type:f.event_type,action:f.action,target_uri:f.target_uri,decision:f.decision,metadata:JSON.parse(f.metadata_json),created_at:f.created_at}));E({ok:!0,events:_,message:`${_.length} audit event(s)`},n.json);return}if(a==="redact"){let _=t.slice(2).join(" ");if(!_)throw Error("Usage: open-knowledge safety redact <text>");let f=de(_,c);if(f.findings.length>0)le(u,{source_uri:"safety://redact",findings:f.findings,metadata:{command:"safety redact"}});R(u,{event_type:"redaction",action:"safety_redact",target_uri:"safety://redact",decision:f.findings.length>0?"redacted":"allow",metadata:{findings:f.findings.length}}),E({ok:!0,text:f.text,findings:f.findings,message:`Redacted ${f.findings.length} finding(s)`},n.json);return}throw Error("Invalid safety action. Use 'status', 'check', 'approve', 'audit', or 'redact'.")}finally{u.close()}}if(r==="source"){if((t[1]??"")!=="resolve")throw Error("Invalid source action. Use 'resolve'.");let o=t[2];if(!o)throw Error("Usage: open-knowledge source resolve <source-ref>");let c=await i.resolveSource(o,{purpose:n.purpose,limit:n.limit});E({ok:!0,...c,message:c.resolved?`Resolved ${c.source_ref} (${c.content.chunks_returned}/${c.content.chunks_total} chunks)`:`Source not indexed: ${o}`},n.json);return}if(r==="ingest"){let a=t[1]??"";if(a==="manifest"){let o=t[2];if(!o)throw Error("Usage: open-knowledge ingest manifest <file|s3://bucket/key>");let c=await i.ingestManifest(o);E({ok:!0,...c,message:`Ingested ${c.items_seen} manifest item(s)`},n.json);return}if(a==="source"){let o=t[2];if(!o)throw Error("Usage: open-knowledge ingest source <source-ref>");let c=await i.ingestSource(o,n.purpose);E({ok:!0,...c,message:`Ingested source ${c.source_ref} (${c.chunks_inserted} chunks)`},n.json);return}throw Error("Invalid ingest action. Use 'manifest' or 'source'.")}if(r==="reindex"){if((t[1]??"")!=="outbox")throw Error("Invalid reindex action. Use 'outbox'.");let o=t[2];if(!o)throw Error("Usage: open-knowledge reindex outbox <file|s3://bucket/key>");let c=await i.consumeOutbox(o);E({ok:!0,...c,message:`Consumed ${c.events_seen} outbox event(s)`},n.json);return}if(r==="embeddings"){let a=t[1]??"status";if(a==="status"){let o=i.embeddingStatus();E({ok:!0,...o,message:`${o.total_vector_entries} vector index entries`},n.json);return}if(a==="index"){let o=await i.indexEmbeddings({limit:n.limit,modelRef:n.model,dimensions:n.dimensions,fake:n.fake});E({ok:!0,...o,message:`Embedded ${o.chunks_embedded} chunk(s)`},n.json);return}if(a==="search"){let o=t.slice(2).join(" ");if(!o)throw Error("Usage: open-knowledge embeddings search <query>");let c=await i.semanticSearch({query:o,limit:n.limit,modelRef:n.model,dimensions:n.dimensions,fake:n.fake});E({ok:!0,...c,message:`${c.results.length} semantic result(s)`},n.json);return}throw Error("Invalid embeddings action. Use 'status', 'index', or 'search'.")}if(r==="search"){let a=t.slice(1).join(" ");if(!a)throw Error("Usage: open-knowledge search <query>");let o=await i.search({query:a,limit:n.limit,semantic:n.semantic,modelRef:n.model,dimensions:n.dimensions,fake:n.fake});E({ok:!0,...o,message:`${o.results.length} search result(s)`},n.json);return}if(r==="providers"){let a=t[1]??"status";if(a==="status"){let o=i.providerStatus(),c=o.providers.filter((u)=>u.configured).length;E({ok:!0,...o,message:`${c}/${o.providers.length} provider credential(s) configured`},n.json);return}if(a==="models"){let o=i.modelRegistry();E({ok:!0,models:o,message:`${o.length} model alias(es)`},n.json);return}if(a==="check"){let o=t[2]??"default",c=Ne(o,i.config()),u=W(c),_=oe(u.provider,i.config());E({ok:!0,target:o,model_ref:c,provider:u.provider,model:u.model,credential:_,message:`${u.provider} credentials configured`},n.json);return}throw Error("Invalid providers action. Use 'status', 'models', or 'check'.")}if(xe(s),r==="add"){let a=t[1],o=t[2];if(!a||!o)throw Error("Usage: open-knowledge add <title> <content>");C(s,()=>{let c=L(s),u={id:Se(),title:a,content:o,url:n.url??null,tags:n.tag?[n.tag]:[],created_at:new Date().toISOString(),updated_at:new Date().toISOString()};c.items.push(u),j(s,c),B("info","Item added",{id:u.id,title:u.title}),E({ok:!0,item:u,message:`Added ${u.id}`},n.json)});return}if(r==="list"){if(n.format!==void 0&&n.format!=="table"&&n.format!=="json")throw Error("Invalid --format value for list. Use 'table' or 'json'.");C(s,()=>{let a=L(s),o=Number.isFinite(n.page)&&n.page>0?n.page:1,c=Number.isFinite(n.limit)&&n.limit>0?n.limit:20,u=n.search?String(n.search).toLowerCase():"",_=n.tag?String(n.tag).toLowerCase():"",f=n.format==="table"||!n.json&&!n.format&&mi(n),m=n.json||n.format==="json",y=a.items;if(n.archived)y=y.filter((h)=>h.archived===!0);else if(!n.includeArchived)y=y.filter((h)=>!h.archived);if(u)y=y.filter((h)=>h.title.toLowerCase().includes(u)||h.content.toLowerCase().includes(u));if(_)y=y.filter((h)=>h.tags&&h.tags.map((G)=>G.toLowerCase()).includes(_));let{sorted:k,sort:g,direction:b}=Ei(y,n),x=(o-1)*c,p=k.slice(x,x+c),A=Math.max(1,Math.ceil(k.length/c));if(m){E({ok:!0,page:o,limit:c,total:k.length,total_pages:A,sort:g,direction:b,items:p},!0);return}if(p.length===0){E(`No items found (search=${u||"none"}, tag=${_||"none"})`,!1);return}if(f){let h=(M)=>M,G=`${h("ID")} ${h("TITLE")} ${h("CREATED")} ${h("URL")} ${h("TAGS")}`;console.log(G);for(let M of p)console.log(`${M.id} ${h(M.title)} ${M.created_at} ${M.url?h(M.url):""} ${M.tags?.length?h(`[${M.tags.join(", ")}]`):""}`);console.log(`Page ${o}/${A} | showing ${p.length} of ${k.length} | sort=${g} ${b} | search=${u||"none"} | tag=${_||"none"}`)}else{for(let h of p)console.log(`${h.id} ${h.title} ${h.created_at}${h.url?` ${h.url}`:""}${h.tags?.length?` [${h.tags.join(", ")}]`:""}`);console.log(`Page ${o}/${A} | showing ${p.length} of ${k.length} | sort=${g} ${b} | search=${u||"none"} | tag=${_||"none"}`)}});return}if(r==="get"){te(n),C(s,()=>{let o=L(s).items.find((c)=>c.id===n.id||c.short_id===n.id);if(!o)throw Error(`Item not found: ${n.id}`);E({ok:!0,item:o,message:`${o.id}: ${o.title}`},n.json)});return}if(r==="update"){te(n),C(s,()=>{let a=L(s),o=a.items.findIndex((u)=>u.id===n.id||u.short_id===n.id);if(o===-1)throw Error(`Item not found: ${n.id}`);let c=a.items[o];if(n.title!==void 0)c.title=n.title;if(n.content!==void 0)c.content=n.content;if(n.url!==void 0)c.url=n.url;if(n.tag!==void 0){if(c.tags=c.tags||[],!c.tags.map((u)=>u.toLowerCase()).includes(n.tag.toLowerCase()))c.tags.push(n.tag)}c.updated_at=new Date().toISOString(),a.items[o]=c,j(s,a),E({ok:!0,item:c,message:`Updated ${c.id}`},n.json)});return}if(r==="archive"||r==="restore"){te(n),C(s,()=>{let a=L(s),o=a.items.findIndex((u)=>u.id===n.id||u.short_id===n.id);if(o===-1)throw Error(`Item not found: ${n.id}`);let c=a.items[o];c.archived=r==="archive",c.updated_at=new Date().toISOString(),a.items[o]=c,j(s,a),E({ok:!0,item:c,message:`${r==="archive"?"Archived":"Restored"} ${c.id}`},n.json)});return}if(r==="untag"){if(te(n),!n.tag)throw Error("Missing required --tag. Example: open-knowledge untag --id <id> -t <tag>");C(s,()=>{let a=L(s),o=a.items.findIndex((_)=>_.id===n.id||_.short_id===n.id);if(o===-1)throw Error(`Item not found: ${n.id}`);let c=a.items[o],u=c.tags?.length??0;c.tags=(c.tags??[]).filter((_)=>_.toLowerCase()!==n.tag.toLowerCase()),c.updated_at=new Date().toISOString(),a.items[o]=c,j(s,a),E({ok:!0,item:c,removed:u-c.tags.length,message:`Removed tag from ${c.id}`},n.json)});return}if(r==="upsert"){let a=n.title??t[1],o=n.content??t[2];C(s,()=>{let c=L(s),u=n.id?c.items.findIndex((m)=>m.id===n.id||m.short_id===n.id):-1,_=new Date().toISOString();if(u===-1){if(!a||!o)throw Error("New item requires title and content. Example: open-knowledge upsert <title> <content> [--id <id>]");let m=n.id??Se(),y={id:m,short_id:Ge(m),title:a,content:o,url:n.url??null,tags:n.tag?[n.tag]:[],metadata:{},archived:!1,created_at:_,updated_at:_};c.items.push(y),j(s,c),E({ok:!0,created:!0,item:y,message:`Upserted ${y.id}`},n.json);return}let f=c.items[u];if(a!==void 0)f.title=a;if(o!==void 0)f.content=o;if(n.url!==void 0)f.url=n.url;if(n.tag!==void 0){if(f.tags=f.tags||[],!f.tags.map((m)=>m.toLowerCase()).includes(n.tag.toLowerCase()))f.tags.push(n.tag)}f.updated_at=_,c.items[u]=f,j(s,c),E({ok:!0,created:!1,item:f,message:`Upserted ${f.id}`},n.json)});return}if(r==="delete"){if(te(n),!n.yes)throw Error("Refusing delete without --yes. Re-run with: open-knowledge delete --id <id> --yes");C(s,()=>{let a=L(s),o=a.items.length;a.items=a.items.filter((u)=>u.id!==n.id&&u.short_id!==n.id);let c=o!==a.items.length;if(j(s,a),!c)throw Error(`Item not found: ${n.id}`);B("info","Item deleted",{id:n.id}),E({ok:!0,deleted_id:n.id,message:`Deleted ${n.id}`},n.json)});return}if(r==="export"){let a=n.format??"json";if(a!=="json"&&a!=="jsonl")throw Error("Invalid --format. Use 'json' or 'jsonl'.");C(s,()=>{let o=L(s);if(a==="jsonl")for(let c of o.items)console.log(JSON.stringify(c));else E({ok:!0,items:o.items},n.json)});return}if(r==="prune"){if(!n.yes)throw Error("Refusing prune without --yes. Re-run with: open-knowledge prune --yes [--older-than <days>] [--empty]");C(s,()=>{let a=L(s),o=a.items.length;if(n.olderThan!==void 0){let u=new Date;u.setDate(u.getDate()-n.olderThan),a.items=a.items.filter((_)=>new Date(_.created_at)>=u)}if(n.empty)a.items=a.items.filter((u)=>u.content.trim().length>0);let c=o-a.items.length;j(s,a),B("info","Prune completed",{pruned:c,remaining:a.items.length}),E({ok:!0,pruned:c,remaining:a.items.length,message:`Pruned ${c} item(s)`},n.json)});return}if(r==="dedupe"){if(!n.yes)throw Error("Refusing dedupe without --yes. Re-run with: open-knowledge dedupe --yes [--json]");C(s,()=>{let a=L(s),o=new Set,c=a.items.length;a.items=a.items.filter((_)=>{let f=`${_.title}\x00${_.content}`;if(o.has(f))return!1;return o.add(f),!0});let u=c-a.items.length;j(s,a),B("info","Dedupe completed",{removed:u,remaining:a.items.length}),E({ok:!0,removed:u,remaining:a.items.length,message:`Dedupe removed ${u} duplicate(s)`},n.json)});return}if(r==="stats"){C(s,()=>{let a=L(s),o=a.items.filter((b)=>!b.archived),c=o.length,u=a.items.length-c,_=o.filter((b)=>b.url).length,f=o.filter((b)=>b.tags&&b.tags.length>0).length,m=c>0?o.map((b)=>b.created_at).sort()[0]:null,y=c>0?o.map((b)=>b.created_at).sort()[c-1]:null,k={};for(let b of o)for(let x of b.tags||[])k[x]=(k[x]||0)+1;let g=Object.entries(k).sort((b,x)=>x[1]-b[1]).slice(0,5).map(([b,x])=>({tag:b,count:x}));E({ok:!0,total:c,archived:u,with_url:_,with_tags:f,oldest:m,newest:y,top_tags:g,message:`${c} items | ${_} with URL | ${f} with tags`},n.json)});return}let d=gi(t[0]),l=d?` Did you mean '${d}'?`:"";throw B("warn","Unknown command",{input:t[0],suggestion:d}),Error(`Unknown command: ${t[0]}.${l} Run 'open-knowledge --help' for available commands.`)}if(import.meta.main)ki(process.argv.slice(2)).catch((e)=>{let t=e instanceof Error?e.message:String(e);B("error","CLI error",{message:t,stack:e instanceof Error?e.stack:void 0}),console.error(`Error: ${t}`),process.exitCode=1});export{gi as suggestCommand,Ei as sortItems,ki as run,li as parseArgs};
|