@hasna/knowledge 0.2.13 → 0.2.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +45 -3
- package/bin/open-knowledge-mcp.js +1106 -227
- package/bin/open-knowledge.js +192 -35
- package/docs/architecture/ai-native-knowledge-base.md +11 -0
- package/docs/architecture/hybrid-semantic-search.md +31 -8
- package/package.json +1 -1
- package/src/cli.ts +68 -4
- package/src/embeddings.ts +516 -0
- package/src/knowledge-db.ts +39 -1
- package/src/mcp.js +55 -0
- package/src/outbox-consume.ts +11 -2
- package/src/search.ts +510 -0
- package/src/service.ts +40 -0
- package/src/wiki-layout.ts +41 -1
- package/src/workspace.ts +12 -0
package/bin/open-knowledge.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env bun
|
|
2
2
|
// @bun
|
|
3
|
-
var
|
|
4
|
-
`);return t}function
|
|
3
|
+
var I=import.meta.require;import{readFileSync as se,writeFileSync as re,existsSync as ie,renameSync as Qt,unlinkSync as qe}from"fs";import{randomUUID as ze}from"crypto";import{existsSync as Ht,mkdirSync as ye,readFileSync as qt,writeFileSync as zt}from"fs";import{homedir as Xe}from"os";import{dirname as Gt,join as O,resolve as Jt}from"path";var Y=O(".hasna","apps","knowledge");function be(){return O(Xe(),".open-knowledge","db.json")}function Te(){return O(Xe(),".hasna","apps","knowledge")}function Yt(e=process.cwd()){return Jt(e,Y)}function J(e){return{home:e,configPath:O(e,"config.json"),jsonStorePath:O(e,"db.json"),knowledgeDbPath:O(e,"knowledge.db"),artifactsDir:O(e,"artifacts"),cacheDir:O(e,"cache"),exportsDir:O(e,"exports"),indexesDir:O(e,"indexes"),logsDir:O(e,"logs"),runsDir:O(e,"runs"),schemasDir:O(e,"schemas"),wikiDir:O(e,"wiki")}}function Vt(){return{version:1,mode:"local",storage:{type:"local",artifacts_root:"artifacts"},sources:{preferred_ref:"open-files",allowed_schemes:["open-files","s3","file","https","http"]},providers:{default_model:"openai:gpt-5.2",aliases:{fast:"openai:gpt-5-mini",reasoning:"anthropic:claude-opus-4-6",sonnet:"anthropic:claude-sonnet-4-6",deepseek:"deepseek:deepseek-chat","deepseek-reasoning":"deepseek:deepseek-reasoner"},openai:{api_key_env:"OPENAI_API_KEY",default_model:"gpt-5.2"},anthropic:{api_key_env:"ANTHROPIC_API_KEY",default_model:"claude-sonnet-4-6"},deepseek:{api_key_env:"DEEPSEEK_API_KEY",default_model:"deepseek-chat"}},embeddings:{default_model:"openai:text-embedding-3-small",dimensions:1536,batch_size:64,max_parallel_calls:4},safety:{network:{web_search_enabled:!1,s3_reads_enabled:!1,allowed_s3_buckets:[]},redaction:{enabled:!0},approvals:{generated_writes_require_approval:!0}}}}function $e(e){let t=J(e);ye(t.home,{recursive:!0});for(let n of[t.artifactsDir,t.cacheDir,t.exportsDir,t.indexesDir,t.logsDir,t.runsDir,t.schemasDir,t.wikiDir])ye(n,{recursive:!0});if(!Ht(t.configPath))zt(t.configPath,`${JSON.stringify(Vt(),null,2)}
|
|
4
|
+
`);return t}function Be(e,t=process.cwd()){if(e==="project"||e==="local")return J(Yt(t));return J(Te())}function ne(e){ye(Gt(e),{recursive:!0})}function He(e){let t=qt(e,"utf8");return JSON.parse(t)}function ve(){return J(Te()).jsonStorePath}function xe(e){if(!ie(e))if(ne(e),e===ve()&&ie(be()))re(e,se(be(),"utf8"));else re(e,JSON.stringify({items:[]},null,2))}function Zt(e){return`${e}.lock`}function en(e,t){let i=Date.now();while(Date.now()-i<5000){try{if(!ie(e)){re(e,JSON.stringify({owner:t,ts:Date.now()}));return}let d=JSON.parse(se(e,"utf8"));if(Date.now()-d.ts>1e4)qe(e)}catch{}let s=Date.now();while(Date.now()-s<50);}throw Error(`Could not acquire lock on ${e} after 5000ms`)}function tn(e,t){try{if(ie(e)){if(JSON.parse(se(e,"utf8")).owner===t)qe(e)}}catch{}}function L(e){xe(e);let t=se(e,"utf8"),n=JSON.parse(t);if(!n||!Array.isArray(n.items))return{items:[]};return n}function j(e,t){let n=`${e}.tmp.${ze()}`;re(n,JSON.stringify(t,null,2)),Qt(n,e)}function C(e,t){let n=ze(),r=Zt(e);en(r,n);try{return t()}finally{tn(r,n)}}function Se(){return`k_${Date.now().toString(36)}_${Math.random().toString(36).slice(2,8)}`}function Ge(e){return e.replace(/^k_/,"").slice(0,12)}import{Database as nn}from"bun:sqlite";var rn=`
|
|
5
5
|
PRAGMA journal_mode = WAL;
|
|
6
6
|
PRAGMA foreign_keys = ON;
|
|
7
7
|
|
|
@@ -168,7 +168,7 @@ CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
|
|
|
168
168
|
|
|
169
169
|
INSERT OR IGNORE INTO schema_versions(version, applied_at)
|
|
170
170
|
VALUES (1, datetime('now'));
|
|
171
|
-
`,
|
|
171
|
+
`,sn=`
|
|
172
172
|
DROP TABLE IF EXISTS chunks_fts;
|
|
173
173
|
|
|
174
174
|
CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
|
|
@@ -181,7 +181,7 @@ CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
|
|
|
181
181
|
|
|
182
182
|
INSERT OR IGNORE INTO schema_versions(version, applied_at)
|
|
183
183
|
VALUES (2, datetime('now'));
|
|
184
|
-
`,
|
|
184
|
+
`,on=`
|
|
185
185
|
CREATE TABLE IF NOT EXISTS audit_events (
|
|
186
186
|
id TEXT PRIMARY KEY,
|
|
187
187
|
event_type TEXT NOT NULL,
|
|
@@ -212,59 +212,209 @@ CREATE INDEX IF NOT EXISTS idx_approval_gates_status ON approval_gates(status);
|
|
|
212
212
|
|
|
213
213
|
INSERT OR IGNORE INTO schema_versions(version, applied_at)
|
|
214
214
|
VALUES (3, datetime('now'));
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
215
|
+
`,an=`
|
|
216
|
+
CREATE TABLE IF NOT EXISTS vector_index_entries (
|
|
217
|
+
id TEXT PRIMARY KEY,
|
|
218
|
+
chunk_id TEXT NOT NULL REFERENCES chunks(id) ON DELETE CASCADE,
|
|
219
|
+
source_revision_id TEXT REFERENCES source_revisions(id) ON DELETE CASCADE,
|
|
220
|
+
provider TEXT NOT NULL,
|
|
221
|
+
model TEXT NOT NULL,
|
|
222
|
+
dimensions INTEGER NOT NULL,
|
|
223
|
+
vector_json TEXT NOT NULL,
|
|
224
|
+
vector_norm REAL NOT NULL,
|
|
225
|
+
source_uri TEXT,
|
|
226
|
+
source_ref TEXT,
|
|
227
|
+
revision TEXT,
|
|
228
|
+
hash TEXT,
|
|
229
|
+
start_offset INTEGER,
|
|
230
|
+
end_offset INTEGER,
|
|
231
|
+
token_count INTEGER,
|
|
232
|
+
status TEXT NOT NULL DEFAULT 'active',
|
|
233
|
+
metadata_json TEXT NOT NULL DEFAULT '{}',
|
|
234
|
+
created_at TEXT NOT NULL,
|
|
235
|
+
updated_at TEXT NOT NULL,
|
|
236
|
+
UNIQUE(chunk_id, provider, model)
|
|
237
|
+
);
|
|
238
|
+
|
|
239
|
+
CREATE INDEX IF NOT EXISTS idx_vector_index_provider_model ON vector_index_entries(provider, model);
|
|
240
|
+
CREATE INDEX IF NOT EXISTS idx_vector_index_source_revision ON vector_index_entries(source_revision_id);
|
|
241
|
+
CREATE INDEX IF NOT EXISTS idx_vector_index_source_uri ON vector_index_entries(source_uri);
|
|
242
|
+
CREATE INDEX IF NOT EXISTS idx_vector_index_status ON vector_index_entries(status);
|
|
243
|
+
|
|
244
|
+
INSERT OR IGNORE INTO schema_versions(version, applied_at)
|
|
245
|
+
VALUES (4, datetime('now'));
|
|
246
|
+
`;function S(e){ne(e);let t=new nn(e);return t.exec("PRAGMA foreign_keys = ON;"),t.exec("PRAGMA busy_timeout = 5000;"),t}function w(e){let t=S(e);try{if(t.exec(rn),V(t)<2)t.exec(sn);if(V(t)<3)t.exec(on);if(V(t)<4)t.exec(an);return{path:e,schema_version:V(t)}}finally{t.close()}}function V(e){return e.query("SELECT MAX(version) AS version FROM schema_versions").get()?.version??0}function N(e,t){return e.query(`SELECT COUNT(*) AS n FROM ${t}`).get()?.n??0}function Je(e){let t=S(e);try{return{schema_version:V(t),sources:N(t,"sources"),source_revisions:N(t,"source_revisions"),chunks:N(t,"chunks"),wiki_pages:N(t,"wiki_pages"),citations:N(t,"citations"),indexes:N(t,"knowledge_indexes"),runs:N(t,"runs"),run_events:N(t,"run_events"),redaction_findings:N(t,"redaction_findings"),audit_events:N(t,"audit_events"),approval_gates:N(t,"approval_gates"),storage_objects:N(t,"storage_objects"),embeddings:N(t,"chunk_embeddings"),vector_entries:N(t,"vector_index_entries")}}finally{t.close()}}import{existsSync as cn,mkdirSync as Ye,readFileSync as un,writeFileSync as dn}from"fs";import{dirname as ln,join as we,relative as _n,sep as fn}from"path";function Q(e){let t=e.replace(/\\/g,"/").trim();if(!t||t.startsWith("/"))throw Error(`Invalid artifact key: ${e}`);let n=t.split("/").filter(Boolean);if(n.length===0||n.some((r)=>r==="."||r===".."))throw Error(`Invalid artifact key: ${e}`);return n.join("/")}function Re(e,t){let n=_n(e,t);if(n.startsWith("..")||n===".."||n.startsWith(`..${fn}`))throw Error(`Artifact path escapes root: ${t}`)}class Ve{root;type="local";canRead=!0;canWrite=!0;constructor(e){this.root=e;Ye(e,{recursive:!0})}async put(e){let t=Q(e.key),n=we(this.root,t);return Re(this.root,n),Ye(ln(n),{recursive:!0}),dn(n,e.body),{key:t,uri:`file://${n}`}}async getText(e){let t=Q(e),n=we(this.root,t);return Re(this.root,n),un(n,"utf8")}async exists(e){let t=Q(e),n=we(this.root,t);return Re(this.root,n),cn(n)}}class Qe{options;type="s3";canRead=!0;canWrite=!0;client;constructor(e){this.options=e;this.client=e.client}async getClient(){if(this.client)return this.client;let[{S3Client:e},{fromIni:t}]=await Promise.all([import("@aws-sdk/client-s3"),import("@aws-sdk/credential-providers")]);return this.client=new e({region:this.options.region,credentials:this.options.profile?t({profile:this.options.profile}):void 0,maxAttempts:this.options.max_attempts}),this.client}objectKey(e){let t=Q(e),n=this.options.prefix?Q(this.options.prefix):"";return n?`${n}/${t}`:t}async put(e){let[{PutObjectCommand:t},n]=await Promise.all([import("@aws-sdk/client-s3"),this.getClient()]),r=this.objectKey(e.key);return await n.send(new t({Bucket:this.options.bucket,Key:r,Body:e.body,ContentType:e.content_type,Metadata:e.metadata,ServerSideEncryption:this.options.server_side_encryption,SSEKMSKeyId:this.options.kms_key_id})),{key:r,uri:`s3://${this.options.bucket}/${r}`}}async getText(e){let[{GetObjectCommand:t},n]=await Promise.all([import("@aws-sdk/client-s3"),this.getClient()]),r=this.objectKey(e),i=await n.send(new t({Bucket:this.options.bucket,Key:r}));if(!i.Body)return"";return await i.Body.transformToString()}async exists(e){let[{HeadObjectCommand:t},n]=await Promise.all([import("@aws-sdk/client-s3"),this.getClient()]),r=this.objectKey(e);try{return await n.send(new t({Bucket:this.options.bucket,Key:r})),!0}catch(i){let s=i instanceof Error?i.name:"";if(s==="NotFound"||s==="NoSuchKey"||s==="NotFoundError")return!1;throw i}}}function Ze(e,t){if(e.storage.type==="s3"){if(!e.storage.s3?.bucket)throw Error("S3 artifact storage requires storage.s3.bucket");return new Qe({bucket:e.storage.s3.bucket,prefix:e.storage.s3.prefix,region:e.storage.s3.region,profile:e.storage.s3.profile,max_attempts:e.storage.s3.max_attempts,server_side_encryption:e.storage.s3.server_side_encryption,kms_key_id:e.storage.s3.kms_key_id})}return new Ve(t.artifactsDir)}import{createHash as ct}from"crypto";var et={openai:{api_key_env:"OPENAI_API_KEY",default_model:"gpt-5.2"},anthropic:{api_key_env:"ANTHROPIC_API_KEY",default_model:"claude-sonnet-4-6"},deepseek:{api_key_env:"DEEPSEEK_API_KEY",default_model:"deepseek-chat"}},gn={openai:{text_generation:!0,structured_output:!0,tool_usage:!0,tool_streaming:!0,image_input:!0,native_web_search:!0,reasoning:!0,embeddings:!0},anthropic:{text_generation:!0,structured_output:!0,tool_usage:!0,tool_streaming:!0,image_input:!0,native_web_search:!1,reasoning:!0,embeddings:!1},deepseek:{text_generation:!0,structured_output:!0,tool_usage:!0,tool_streaming:!0,image_input:!1,native_web_search:!1,reasoning:!0,embeddings:!1}},pn={default:"openai:gpt-5.2",fast:"openai:gpt-5-mini",reasoning:"anthropic:claude-opus-4-6",sonnet:"anthropic:claude-sonnet-4-6",deepseek:"deepseek:deepseek-chat","deepseek-reasoning":"deepseek:deepseek-reasoner"};function tt(e){return e.providers??{}}function Oe(e,t){let n=tt(e)[t]??{};return{...et[t],...n}}function nt(e){let t=tt(e);return{...pn,...t.default_model?{default:t.default_model}:{},...t.aliases??{}}}function W(e){let[t,...n]=e.split(":"),r=n.join(":");if(t!=="openai"&&t!=="anthropic"&&t!=="deepseek")throw Error(`Unsupported AI provider: ${t}`);if(!r)throw Error(`Invalid model ref: ${e}. Expected provider:model.`);return{provider:t,model:r}}function Ne(e,t){return nt(t)[e]??e}function Ae(e){let t=nt(e);return Object.entries(t).map(([n,r])=>{let i=W(r);return{alias:n,model_ref:r,provider:i.provider,model:i.model,default:n==="default",capabilities:gn[i.provider]}})}function rt(e,t=process.env){return Object.keys(et).map((n)=>{let r=Oe(e,n),i=Boolean(t[r.api_key_env]);return{provider:n,api_key_env:r.api_key_env,configured:i,source:i?"env":"missing",base_url:r.base_url??null,default_model:r.default_model}})}function it(e,t=process.env){return{default_model:Ne("default",e),providers:rt(e,t),models:Ae(e)}}function oe(e,t,n=process.env){let r=rt(t,n).find((i)=>i.provider===e);if(!r)throw Error(`Unsupported AI provider: ${e}`);if(!r.configured)throw Error(`Missing ${r.api_key_env} for ${e}. Set the env var to use this provider.`);return r}function hn(e){return["deleted","stale","invalidated","reindex_required"].includes((e??"").toLowerCase())}function K(e){let t=e.status??null;return{source_owner:"open-files",source_ref:e.source_ref??null,source_uri:e.source_uri??null,source_kind:e.source_kind??null,source_revision_id:e.source_revision_id??null,revision:e.revision??null,hash:e.hash??null,chunk_id:e.chunk_id??null,start_offset:e.start_offset??null,end_offset:e.end_offset??null,status:t,read_only:!0,citation_required:!0,resolver:e.resolver??null,stale:hn(t)}}function Ie(e){return{source_owner:"open-files",generated_from:e.generated_from,artifact_key:e.artifact_key,source_refs:e.source_refs??[],read_only_sources:!0,citation_required:e.citation_required??!0,raw_source_bytes_stored_in_open_knowledge:!1}}function st(e,t){return{...e,provenance:t}}var mn="openai:text-embedding-3-small",ut=1536;function ae(e){return e?.embeddings??{}}function ot(e,t){return`${e}_${ct("sha256").update(t).digest("hex").slice(0,20)}`}function Ce(e){if(!e)return{};try{let t=JSON.parse(e);return t&&typeof t==="object"&&!Array.isArray(t)?t:{}}catch{return{}}}function U(e,t){for(let n of t){let r=e[n];if(typeof r==="string"&&r.length>0)return r}return null}function at(e,t){for(let n of t){let r=e[n];if(typeof r==="number"&&Number.isFinite(r))return r}return null}function Le(e){return Math.sqrt(e.reduce((t,n)=>t+n*n,0))}function En(e,t,n=Le(t)){let r=Le(e);if(r===0||n===0)return 0;let i=Math.min(e.length,t.length),s=0;for(let d=0;d<i;d+=1)s+=e[d]*t[d];return s/(r*n)}function kn(e,t){let n=ct("sha256").update(e).digest();return Array.from({length:t},(r,i)=>{let s=n[i%n.length]/255;return Number((s*2-1).toFixed(6))})}async function yn(e,t,n=process.env){oe("openai",t,n);let r=Oe(t,"openai"),{createOpenAI:i}=await import("@ai-sdk/openai"),s=i({apiKey:n[r.api_key_env],baseURL:r.base_url});if(s.embeddingModel)return s.embeddingModel(e);if(s.textEmbedding)return s.textEmbedding(e);if(s.textEmbeddingModel)return s.textEmbeddingModel(e);throw Error("OpenAI provider does not expose an embedding model factory.")}function De(e,t){if(!e||e==="default"||e==="embedding")return ae(t).default_model??mn;return e}async function dt(e,t={}){let n=De(t.modelRef,t.config),r=W(n);if(r.provider!=="openai")throw Error(`Embedding provider ${r.provider} is not supported yet. Use openai:text-embedding-3-small.`);let i=t.dimensions??ae(t.config).dimensions??ut;if(t.fake)return{provider:r.provider,model:r.model,dimensions:i,vectors:e.map((o)=>kn(o,i)),usage:{input_tokens:e.reduce((o,c)=>o+Math.max(1,Math.ceil(c.split(/\s+/).filter(Boolean).length*1.25)),0)}};let{embedMany:s}=await import("ai"),d=await yn(r.model,t.config,t.env),l=await s({model:d,values:e,maxParallelCalls:t.maxParallelCalls??ae(t.config).max_parallel_calls,providerOptions:{openai:{dimensions:i}}}),a=l.embeddings;return{provider:r.provider,model:r.model,dimensions:a[0]?.length??i,vectors:a,usage:{input_tokens:l.usage?.tokens??0}}}function bn(e,t){if(t.sourceRevisionId)return e.query(`SELECT
|
|
247
|
+
c.id,
|
|
248
|
+
c.text,
|
|
249
|
+
c.token_count,
|
|
250
|
+
c.start_offset,
|
|
251
|
+
c.end_offset,
|
|
252
|
+
c.metadata_json,
|
|
253
|
+
c.source_revision_id,
|
|
254
|
+
sr.revision,
|
|
255
|
+
sr.hash,
|
|
256
|
+
s.uri AS source_uri,
|
|
257
|
+
s.kind AS source_kind
|
|
258
|
+
FROM chunks c
|
|
259
|
+
LEFT JOIN source_revisions sr ON sr.id = c.source_revision_id
|
|
260
|
+
LEFT JOIN sources s ON s.id = sr.source_id
|
|
261
|
+
LEFT JOIN vector_index_entries v
|
|
262
|
+
ON v.chunk_id = c.id AND v.provider = ? AND v.model = ?
|
|
263
|
+
WHERE v.id IS NULL AND c.source_revision_id = ?
|
|
264
|
+
ORDER BY c.created_at ASC, c.ordinal ASC
|
|
265
|
+
LIMIT ?`).all(t.provider,t.model,t.sourceRevisionId,t.limit);return e.query(`SELECT
|
|
266
|
+
c.id,
|
|
267
|
+
c.text,
|
|
268
|
+
c.token_count,
|
|
269
|
+
c.start_offset,
|
|
270
|
+
c.end_offset,
|
|
271
|
+
c.metadata_json,
|
|
272
|
+
c.source_revision_id,
|
|
273
|
+
sr.revision,
|
|
274
|
+
sr.hash,
|
|
275
|
+
s.uri AS source_uri,
|
|
276
|
+
s.kind AS source_kind
|
|
277
|
+
FROM chunks c
|
|
278
|
+
LEFT JOIN source_revisions sr ON sr.id = c.source_revision_id
|
|
279
|
+
LEFT JOIN sources s ON s.id = sr.source_id
|
|
280
|
+
LEFT JOIN vector_index_entries v
|
|
281
|
+
ON v.chunk_id = c.id AND v.provider = ? AND v.model = ?
|
|
282
|
+
WHERE v.id IS NULL
|
|
283
|
+
ORDER BY c.created_at ASC, c.ordinal ASC
|
|
284
|
+
LIMIT ?`).all(t.provider,t.model,t.limit)}function Tn(e){let t=Ce(e.metadata_json),n=t.provenance;if(n&&typeof n==="object"&&!Array.isArray(n))return n;return K({source_ref:U(t,["source_ref"]),source_uri:e.source_uri??U(t,["source_uri"]),source_kind:e.source_kind??U(t,["source_kind"]),source_revision_id:e.source_revision_id,revision:e.revision??U(t,["revision"]),hash:e.hash??U(t,["hash"]),chunk_id:e.id,start_offset:e.start_offset??at(t,["start_offset"]),end_offset:e.end_offset??at(t,["end_offset"]),status:U(t,["status"]),resolver:"open-files-read-only"})}function vn(e,t,n,r){let i=e.prepare(`
|
|
285
|
+
INSERT INTO chunk_embeddings (id, chunk_id, provider, model, dimensions, vector_json, created_at)
|
|
286
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
287
|
+
ON CONFLICT(chunk_id, provider, model) DO UPDATE SET
|
|
288
|
+
dimensions = excluded.dimensions,
|
|
289
|
+
vector_json = excluded.vector_json,
|
|
290
|
+
created_at = excluded.created_at
|
|
291
|
+
`),s=e.prepare(`
|
|
292
|
+
INSERT INTO vector_index_entries (
|
|
293
|
+
id, chunk_id, source_revision_id, provider, model, dimensions, vector_json, vector_norm,
|
|
294
|
+
source_uri, source_ref, revision, hash, start_offset, end_offset, token_count, status,
|
|
295
|
+
metadata_json, created_at, updated_at
|
|
296
|
+
)
|
|
297
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
298
|
+
ON CONFLICT(chunk_id, provider, model) DO UPDATE SET
|
|
299
|
+
source_revision_id = excluded.source_revision_id,
|
|
300
|
+
dimensions = excluded.dimensions,
|
|
301
|
+
vector_json = excluded.vector_json,
|
|
302
|
+
vector_norm = excluded.vector_norm,
|
|
303
|
+
source_uri = excluded.source_uri,
|
|
304
|
+
source_ref = excluded.source_ref,
|
|
305
|
+
revision = excluded.revision,
|
|
306
|
+
hash = excluded.hash,
|
|
307
|
+
start_offset = excluded.start_offset,
|
|
308
|
+
end_offset = excluded.end_offset,
|
|
309
|
+
token_count = excluded.token_count,
|
|
310
|
+
status = excluded.status,
|
|
311
|
+
metadata_json = excluded.metadata_json,
|
|
312
|
+
updated_at = excluded.updated_at
|
|
313
|
+
`);return e.transaction(()=>{for(let l=0;l<t.length;l+=1){let a=t[l],o=n.vectors[l];if(!o)continue;let c=Ce(a.metadata_json),u=Tn(a),_=u.source_ref??U(c,["source_ref"]),f=u.source_uri??a.source_uri??U(c,["source_uri"]),m=u.revision??a.revision??U(c,["revision"]),y=u.hash??a.hash??U(c,["hash"]),k=u.status??U(c,["status"])??"active",g=JSON.stringify(o);i.run(ot("emb",`${a.id}\x00${n.provider}\x00${n.model}`),a.id,n.provider,n.model,n.dimensions,g,r),s.run(ot("vec",`${a.id}\x00${n.provider}\x00${n.model}`),a.id,a.source_revision_id,n.provider,n.model,n.dimensions,g,Le(o),f,_,m,y,u.start_offset,u.end_offset,a.token_count,k,JSON.stringify({...c,provenance:u,embedded_at:r}),r,r)}})(),t.length}async function lt(e){let t=De(e.modelRef,e.config),n=W(t);if(n.provider!=="openai")throw Error(`Embedding provider ${n.provider} is not supported yet.`);let r=(e.now??new Date).toISOString(),i=Math.max(1,Math.min(e.limit??100,1000));w(e.dbPath);let s=S(e.dbPath),d;try{d=bn(s,{provider:n.provider,model:n.model,limit:i,sourceRevisionId:e.sourceRevisionId})}finally{s.close()}if(d.length===0)return{provider:n.provider,model:n.model,dimensions:e.dimensions??ae(e.config).dimensions??ut,chunks_seen:0,chunks_embedded:0,embeddings_upserted:0,vector_entries_upserted:0,usage:{input_tokens:0}};let l=await dt(d.map((o)=>o.text),e),a=S(e.dbPath);try{let o=vn(a,d,l,r);return{provider:l.provider,model:l.model,dimensions:l.dimensions,chunks_seen:d.length,chunks_embedded:d.length,embeddings_upserted:o,vector_entries_upserted:o,usage:l.usage}}finally{a.close()}}function _t(e){w(e);let t=S(e);try{let n=t.query("SELECT COUNT(*) AS n FROM chunk_embeddings").get()?.n??0,r=t.query("SELECT COUNT(*) AS n FROM vector_index_entries").get()?.n??0,i=t.query(`SELECT provider, model, dimensions, COUNT(*) AS entries, MAX(updated_at) AS updated_at
|
|
314
|
+
FROM vector_index_entries
|
|
315
|
+
GROUP BY provider, model, dimensions
|
|
316
|
+
ORDER BY provider, model`).all();return{total_embeddings:n,total_vector_entries:r,indexes:i}}finally{t.close()}}async function ce(e){let t=De(e.modelRef,e.config),n=W(t),r=Math.max(1,Math.min(e.limit??10,100)),i=await dt([e.query],e),s=i.vectors[0]??[];w(e.dbPath);let d=S(e.dbPath);try{let a=d.query(`SELECT
|
|
317
|
+
v.chunk_id,
|
|
318
|
+
c.text,
|
|
319
|
+
v.vector_json,
|
|
320
|
+
v.vector_norm,
|
|
321
|
+
v.source_uri,
|
|
322
|
+
v.source_ref,
|
|
323
|
+
v.revision,
|
|
324
|
+
v.hash,
|
|
325
|
+
v.metadata_json
|
|
326
|
+
FROM vector_index_entries v
|
|
327
|
+
JOIN chunks c ON c.id = v.chunk_id
|
|
328
|
+
WHERE v.provider = ? AND v.model = ? AND v.status = 'active'`).all(n.provider,n.model).map((o)=>{let c=JSON.parse(o.vector_json),u=Ce(o.metadata_json),_=u.provenance&&typeof u.provenance==="object"&&!Array.isArray(u.provenance)?u.provenance:null;return{chunk_id:o.chunk_id,score:En(s,c,o.vector_norm),text:o.text,source_uri:o.source_uri,source_ref:o.source_ref,revision:o.revision,hash:o.hash,provenance:_}}).sort((o,c)=>c.score-o.score).slice(0,r);return{provider:n.provider,model:n.model,dimensions:i.dimensions,query:e.query,results:a}}finally{d.close()}}import{createHash as Pn,randomUUID as Un}from"crypto";import{existsSync as jn,readFileSync as Mn}from"fs";import{basename as Kn}from"path";function ft(e,t){if(!e)throw Error(t);return e}function xn(e){let n=e.slice(13).split("/").filter(Boolean),r=n[0];if(r!=="file"&&r!=="source")throw Error("Invalid open-files ref. Expected open-files://file/<id>, open-files://file/<id>/revision/<revision_id>, or open-files://source/<id>/path/<path>.");let i=ft(n[1],"Invalid open-files ref. Missing id.");if(r==="file"){if(n.length===2)return{kind:"open-files",uri:e,entity:r,id:i};if(n[2]==="revision"&&n[3]&&n.length===4)return{kind:"open-files",uri:e,entity:r,id:i,revision_id:decodeURIComponent(n[3])};throw Error("Invalid open-files file ref. Expected open-files://file/<id>/revision/<revision_id>.")}let s=n.indexOf("path"),d=s>=0?decodeURIComponent(n.slice(s+1).join("/")):void 0;return{kind:"open-files",uri:e,entity:r,id:i,path:d}}function Sn(e){let t=new URL(e),n=ft(t.hostname,"Invalid s3 ref. Missing bucket."),r=decodeURIComponent(t.pathname.replace(/^\/+/,""));if(!r)throw Error("Invalid s3 ref. Missing object key.");return{kind:"s3",uri:e,bucket:n,key:r}}function wn(e){let t=new URL(e);return{kind:"file",uri:e,path:decodeURIComponent(t.pathname)}}function Rn(e){let t=new URL(e);return{kind:"web",uri:e,url:t.toString()}}function P(e){if(e.startsWith("open-files://"))return xn(e);if(e.startsWith("s3://"))return Sn(e);if(e.startsWith("file://"))return wn(e);if(e.startsWith("https://")||e.startsWith("http://"))return Rn(e);throw Error(`Unsupported source ref scheme: ${e}`)}function gt(e,t=P(e)){if(t.kind==="open-files"&&t.entity==="file"&&t.revision_id)return e.replace(/\/revision\/[^/]+$/,"");return e}function pt(e){let t=P(e);return t.kind==="open-files"&&t.entity==="file"?t.revision_id??null:null}import{createHash as On,randomUUID as Pe}from"crypto";import{relative as Nn,resolve as mt,sep as An}from"path";function ht(e){let t=process.env[e];return t==="1"||t==="true"||t==="yes"}function Et(e,t){let n=e,r=new Set(n.safety?.network?.allowed_s3_buckets??[]);if(e.storage.type==="s3"&&e.storage.s3?.bucket)r.add(e.storage.s3.bucket);if(process.env.HASNA_KNOWLEDGE_ALLOWED_S3_BUCKETS)for(let i of process.env.HASNA_KNOWLEDGE_ALLOWED_S3_BUCKETS.split(",").map((s)=>s.trim()).filter(Boolean))r.add(i);return{mode:e.mode,allowWriteRoots:[t.home,t.artifactsDir,t.cacheDir,t.exportsDir,t.indexesDir,t.logsDir,t.runsDir,t.schemasDir,t.wikiDir].map((i)=>mt(i)),readOnlySourceAccess:!0,network:{webSearchEnabled:n.safety?.network?.web_search_enabled??ht("HASNA_KNOWLEDGE_WEB_SEARCH"),s3ReadsEnabled:n.safety?.network?.s3_reads_enabled??ht("HASNA_KNOWLEDGE_ALLOW_S3_READS"),allowedS3Buckets:[...r].sort()},redaction:{enabled:n.safety?.redaction?.enabled??!0},approvals:{generatedWritesRequireApproval:n.safety?.approvals?.generated_writes_require_approval??!0}}}function In(e,t){let n=Nn(e,t);return n===""||!n.startsWith("..")&&n!==".."&&!n.startsWith(`..${An}`)}function X(e,t){let n=mt(e);if(!t.allowWriteRoots.some((r)=>In(r,n)))throw Error(`Safety policy denied write outside .hasna/apps/knowledge: ${e}`)}function F(e,t){let r=new URL(e).hostname;if(!t.network.s3ReadsEnabled)throw Error("Safety policy denied S3 read. Set safety.network.s3_reads_enabled=true or HASNA_KNOWLEDGE_ALLOW_S3_READS=1.");if(!t.network.allowedS3Buckets.includes(r))throw Error(`Safety policy denied S3 bucket "${r}". Add it to safety.network.allowed_s3_buckets or HASNA_KNOWLEDGE_ALLOWED_S3_BUCKETS.`)}function ue(e){if(!e.network.webSearchEnabled)throw Error("Safety policy denied web search. Set safety.network.web_search_enabled=true or HASNA_KNOWLEDGE_WEB_SEARCH=1.")}var Ln=[{type:"private_key_block",severity:"high",regex:/-----BEGIN [A-Z ]*PRIVATE KEY-----[\s\S]*?-----END [A-Z ]*PRIVATE KEY-----/g,replacement:"[REDACTED:private_key_block]"},{type:"secret_assignment",severity:"high",regex:/\b(?:api[_-]?key|secret|token|password)\s*[:=]\s*['"]?[^'"\s]{8,}/gi,replacement:"[REDACTED:secret_assignment]"},{type:"openai_api_key",severity:"high",regex:/\bsk-[A-Za-z0-9_-]{20,}\b/g,replacement:"[REDACTED:openai_api_key]"},{type:"anthropic_api_key",severity:"high",regex:/\bsk-ant-[A-Za-z0-9_-]{20,}\b/g,replacement:"[REDACTED:anthropic_api_key]"},{type:"aws_access_key_id",severity:"high",regex:/\bA(?:KIA|SIA)[A-Z0-9]{16}\b/g,replacement:"[REDACTED:aws_access_key_id]"}];function de(e,t){if(t&&!t.redaction.enabled)return{text:e,findings:[]};let n=e,r=[];for(let i of Ln)n=n.replace(i.regex,(s,...d)=>{let l=typeof d.at(-2)==="number"?d.at(-2):n.indexOf(s);return r.push({type:i.type,severity:i.severity,start:Math.max(0,l),end:Math.max(0,l+s.length)}),i.replacement});return{text:n,findings:r}}function Cn(e){return`audit_${On("sha256").update(`${e.event_type}\x00${e.action}\x00${e.target_uri??""}\x00${e.created_at??""}\x00${JSON.stringify(e.metadata??{})}\x00${Pe()}`).digest("hex").slice(0,24)}`}function R(e,t){let n=t.created_at??new Date().toISOString(),r=Cn({...t,created_at:n});return e.run(`INSERT INTO audit_events (id, event_type, action, target_uri, decision, metadata_json, created_at)
|
|
329
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)`,[r,t.event_type,t.action,t.target_uri??null,t.decision,JSON.stringify(t.metadata??{}),n]),r}function le(e,t){let n=t.created_at??new Date().toISOString();for(let r of t.findings)e.run(`INSERT INTO redaction_findings (id, source_uri, run_id, severity, finding_type, metadata_json, created_at)
|
|
330
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)`,[`redact_${Pe()}`,t.source_uri??null,t.run_id??null,r.severity,r.type,JSON.stringify({...t.metadata??{},start:r.start,end:r.end}),n]);return t.findings.length}function kt(e,t){let n=t.created_at??new Date().toISOString(),r=`approval_${Pe()}`;return e.run(`INSERT INTO approval_gates (id, action, target_uri, status, reason, approved_by, metadata_json, created_at, updated_at)
|
|
331
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,[r,t.action,t.target_uri??null,"approved",t.reason??null,t.approved_by??"local-cli",JSON.stringify(t.metadata??{}),n,n]),{id:r,status:"approved"}}function Dn(e,t,n){let r=e.query(`SELECT id FROM approval_gates
|
|
219
332
|
WHERE action = ? AND status = 'approved' AND (target_uri IS NULL OR target_uri = ? OR ? IS NULL)
|
|
220
|
-
ORDER BY updated_at DESC LIMIT 1`).get(t,
|
|
333
|
+
ORDER BY updated_at DESC LIMIT 1`).get(t,n??null,n??null);return Boolean(r)}function yt(e,t,n,r){let i=n==="generated_write"&&t.approvals.generatedWritesRequireApproval,s=!i||Dn(e,n,r);return{action:n,target_uri:r??null,approval_required:i,approved:s,decision:s?"allow":"requires_approval"}}function _e(e,t){return`${e}_${Pn("sha256").update(t).digest("hex").slice(0,20)}`}function H(e){return e&&typeof e==="object"&&!Array.isArray(e)?e:void 0}function v(e){return typeof e==="string"&&e.length>0?e:void 0}function Fn(e){let t=v(e.source_ref)??v(e.source_uri)??v(e.uri);if(t)return t;let n=v(e.file_id);if(n){let s=v(e.revision_id)??v(e.revision),d=`open-files://file/${encodeURIComponent(n)}`;return s?`${d}/revision/${encodeURIComponent(s)}`:d}let r=v(e.source_id),i=v(e.path);if(r&&i)return`open-files://source/${encodeURIComponent(r)}/path/${encodeURIComponent(i)}`;throw Error("Outbox event is missing source_ref, file_id, or source_id/path.")}function Wn(e,t){if(t.kind==="open-files"&&t.entity==="file"&&t.revision_id)return e.replace(/\/revision\/[^/]+$/,"");return e}function Xn(e){return v(e.hash)??v(e.checksum)??v(e.sha256)??null}function $n(e,t,n){return v(e.revision_id)??v(e.revision)??v(e.version_id)??(t.kind==="open-files"?t.revision_id:void 0)??n??null}function Bn(e){return(v(e.event)??v(e.type)??v(e.action)??v(e.change_type)??"changed").toLowerCase()}function Hn(e){let t=v(e.path);return v(e.title)??v(e.name)??(t?Kn(t):null)}function qn(e,t){let n=Fn(e),r=P(n),i=Xn(e);return{raw:e,eventType:Bn(e),sourceRef:n,sourceUri:Wn(n,r),kind:r.kind,title:Hn(e),revision:$n(e,r,i),hash:i,status:v(e.status)?.toLowerCase()??null,updatedAt:v(e.updated_at)??t,acl:e.permissions??e.acl??void 0}}function zn(e){let t=e.trim();if(!t)return[];if(t.startsWith("[")){let n=JSON.parse(t);if(!Array.isArray(n))throw Error("Outbox array parse failed.");return n.map((r)=>{let i=H(r);if(!i)throw Error("Outbox array entries must be objects.");return i})}if(t.startsWith("{"))try{let n=JSON.parse(t),r=H(n);if(!r)throw Error("Outbox object parse failed.");if(Array.isArray(r.events))return r.events.map((i)=>{let s=H(i);if(!s)throw Error("Outbox events entries must be objects.");return s});if("source_ref"in r||"source_uri"in r||"file_id"in r)return[r]}catch(n){let r=t.split(/\r?\n/).filter((i)=>i.trim().length>0);if(r.length<=1)throw n;return r.map((i)=>{let s=H(JSON.parse(i));if(!s)throw Error("Outbox JSONL entries must be objects.");return s})}return t.split(/\r?\n/).filter((n)=>n.trim().length>0).map((n)=>{let r=H(JSON.parse(n));if(!r)throw Error("Outbox JSONL entries must be objects.");return r})}async function Gn(e,t,n){let r=new URL(e),i=r.hostname,s=decodeURIComponent(r.pathname.replace(/^\/+/,""));if(!i||!s)throw Error(`Invalid S3 outbox URI: ${e}`);if(n)F(e,n);let[{S3Client:d,GetObjectCommand:l},{fromIni:a}]=await Promise.all([import("@aws-sdk/client-s3"),import("@aws-sdk/credential-providers")]),o=t?.storage.type==="s3"&&t.storage.s3?.bucket===i?t.storage.s3:void 0,u=await new d({region:o?.region,credentials:o?.profile?a({profile:o.profile}):void 0,maxAttempts:o?.max_attempts}).send(new l({Bucket:i,Key:s}));if(!u.Body)return"";return await u.Body.transformToString()}async function Jn(e,t,n){if(e.startsWith("s3://"))return Gn(e,t,n);if(!jn(e))throw Error(`Outbox not found: ${e}`);return Mn(e,"utf8")}function bt(e,t){let n={};if(e)try{n=H(JSON.parse(e))??{}}catch{n={}}return JSON.stringify({...n,...t})}function Yn(e,t,n){let r=_e("src",t.sourceUri);e.run(`INSERT INTO sources (id, uri, kind, title, metadata_json, acl_json, created_at, updated_at)
|
|
221
334
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
222
335
|
ON CONFLICT(uri) DO UPDATE SET
|
|
223
336
|
kind = excluded.kind,
|
|
224
337
|
title = COALESCE(excluded.title, sources.title),
|
|
225
|
-
updated_at = excluded.updated_at`,[
|
|
338
|
+
updated_at = excluded.updated_at`,[r,t.sourceUri,t.kind,t.title,JSON.stringify({source_ref:t.sourceRef,source_uri:t.sourceUri,status:t.status,last_outbox_event:t.eventType}),JSON.stringify(t.acl??{}),n,t.updatedAt]);let i=e.query("SELECT id, metadata_json, acl_json FROM sources WHERE uri = ?").get(t.sourceUri);if(!i)throw Error(`Failed to upsert source for outbox event: ${t.sourceUri}`);let s={source_ref:t.sourceRef,source_uri:t.sourceUri,last_outbox_event:t.eventType,last_outbox_at:t.updatedAt};if(t.status)s.status=t.status;if(v(t.raw.path))s.path=t.raw.path;return e.run("UPDATE sources SET metadata_json = ?, acl_json = CASE WHEN ? IS NULL THEN acl_json ELSE ? END, updated_at = ? WHERE id = ?",[bt(i.metadata_json,s),t.acl===void 0?null:JSON.stringify(t.acl),t.acl===void 0?null:JSON.stringify(t.acl),t.updatedAt,i.id]),i.id}function Vn(e,t,n,r){if(!n.revision)return null;let i=_e("rev",`${t}\x00${n.revision}`),s={source_ref:n.sourceRef,source_uri:n.sourceUri,status:n.status,last_outbox_event:n.eventType,reindex_required:!0};return e.run(`INSERT INTO source_revisions (id, source_id, revision, hash, extracted_text_uri, metadata_json, created_at)
|
|
226
339
|
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
227
340
|
ON CONFLICT(source_id, revision) DO UPDATE SET
|
|
228
341
|
hash = COALESCE(excluded.hash, source_revisions.hash),
|
|
229
|
-
metadata_json = excluded.metadata_json`,[i,t,
|
|
230
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,[s,"open-files-outbox",e.input,"completed","local","open-files-outbox",JSON.stringify({path:e.input,events:
|
|
231
|
-
VALUES (?, ?, ?, ?, ?, ?)`,[
|
|
232
|
-
VALUES (?, ?, ?, ?, 0, 0, 0, ?, ?)`,[
|
|
233
|
-
`);if(!
|
|
342
|
+
metadata_json = excluded.metadata_json`,[i,t,n.revision,n.hash,v(n.raw.extracted_text_ref)??null,JSON.stringify(s),r]),e.query("SELECT id FROM source_revisions WHERE source_id = ? AND revision = ?").get(t,n.revision)?.id??null}function Qn(e,t,n){if(n.revision)return e.query("SELECT id FROM source_revisions WHERE source_id = ? AND revision = ?").all(t,n.revision).map((r)=>r.id);if(n.hash)return e.query("SELECT id FROM source_revisions WHERE source_id = ? AND hash = ?").all(t,n.hash).map((r)=>r.id);return e.query("SELECT id FROM source_revisions WHERE source_id = ?").all(t).map((r)=>r.id)}function Zn(e,t){let n=e.query("SELECT id FROM chunks WHERE source_revision_id = ?").all(t),r=0,i=0;for(let d of n){let l=e.query("SELECT COUNT(*) AS n FROM chunk_embeddings WHERE chunk_id = ?").get(d.id);r+=l?.n??0;let a=e.query("SELECT COUNT(*) AS n FROM vector_index_entries WHERE chunk_id = ?").get(d.id);i+=a?.n??0,e.run("DELETE FROM vector_index_entries WHERE chunk_id = ?",[d.id]),e.run("DELETE FROM chunk_embeddings WHERE chunk_id = ?",[d.id]),e.run("DELETE FROM chunks_fts WHERE chunk_id = ?",[d.id])}e.run("DELETE FROM chunks WHERE source_revision_id = ?",[t]);let s=e.query("SELECT metadata_json FROM source_revisions WHERE id = ?").get(t);return e.run("UPDATE source_revisions SET metadata_json = ? WHERE id = ?",[bt(s?.metadata_json,{reindex_required:!0,invalidated_at:new Date().toISOString()}),t]),{chunksDeleted:n.length,embeddingsDeleted:r,vectorEntriesDeleted:i}}function er(e,t){return t==="deleted"||["delete","deleted","remove","removed"].includes(e)}function tr(e){return["move","moved","rename","renamed","path_changed"].includes(e)}function nr(e){return["permission","permissions","permission_changed","acl_changed"].includes(e)}async function Tt(e){let t=(e.now??new Date).toISOString();if(e.safetyPolicy)X(e.dbPath,e.safetyPolicy);w(e.dbPath);let n=await Jn(e.input,e.config,e.safetyPolicy),r=zn(n),i=S(e.dbPath),s=`run_${Un()}`;try{return i.transaction(()=>{i.run(`INSERT INTO runs (id, type, prompt, status, provider, model, metadata_json, created_at, updated_at)
|
|
343
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,[s,"open-files-outbox",e.input,"completed","local","open-files-outbox",JSON.stringify({path:e.input,events:r.length}),t,t]);let d=new Set,l=new Set,a=0,o=0,c=0,u=0,_=0,f=0,m=0;return R(i,{event_type:"source_read",action:e.input.startsWith("s3://")?"s3_outbox_read":"local_outbox_read",target_uri:e.input,decision:"allow",metadata:{events:r.length,read_only:!0},created_at:t}),r.forEach((y,k)=>{let g=qn(y,t),b=Yn(i,g,t);d.add(b);let x=Vn(i,b,g,t);if(x)l.add(x);let p=Qn(i,b,g);for(let A of p){l.add(A);let h=Zn(i,A);a+=h.chunksDeleted,o+=h.embeddingsDeleted,c+=h.vectorEntriesDeleted,u+=1}if(er(g.eventType,g.status))_+=1;if(tr(g.eventType))f+=1;if(nr(g.eventType)||g.acl!==void 0)m+=1;i.run(`INSERT INTO run_events (id, run_id, level, event, metadata_json, created_at)
|
|
344
|
+
VALUES (?, ?, ?, ?, ?, ?)`,[_e("evt",`${s}\x00${k}\x00${g.sourceRef}\x00${g.eventType}`),s,"info",g.eventType,JSON.stringify({source_ref:g.sourceRef,source_uri:g.sourceUri,revision:g.revision,hash:g.hash,status:g.status,affected_revisions:p.length}),g.updatedAt])}),i.run(`INSERT INTO provider_usage (id, run_id, provider, model, input_tokens, output_tokens, cost_usd, metadata_json, created_at)
|
|
345
|
+
VALUES (?, ?, ?, ?, 0, 0, 0, ?, ?)`,[_e("usage",s),s,"local","open-files-outbox",JSON.stringify({note:"No model provider used for outbox invalidation."}),t]),R(i,{event_type:"write",action:"knowledge_outbox_invalidation",target_uri:e.dbPath,decision:"allow",metadata:{run_id:s,events:r.length,sources:d.size,revisions:l.size,chunks_deleted:a,embeddings_deleted:o,vector_entries_deleted:c},created_at:t}),{path:e.input,db_path:e.dbPath,run_id:s,events_seen:r.length,sources_touched:d.size,revisions_touched:l.size,chunks_deleted:a,embeddings_deleted:o,vector_entries_deleted:c,stale_revisions:u,deleted_sources:_,moved_sources:f,permission_updates:m}})()}finally{i.close()}}import{createHash as rr}from"crypto";import{existsSync as ir,readFileSync as sr}from"fs";import{basename as or}from"path";function Ue(e,t){return`${e}_${rr("sha256").update(t).digest("hex").slice(0,20)}`}function q(e){return e&&typeof e==="object"&&!Array.isArray(e)?e:void 0}function T(e){return typeof e==="string"&&e.length>0?e:void 0}function ar(e){return typeof e==="number"&&Number.isFinite(e)?e:void 0}function cr(e){let t=T(e.source_ref)??T(e.source_uri)??T(e.uri);if(t)return t;let n=T(e.file_id);if(n){let s=T(e.revision_id)??T(e.revision),d=`open-files://file/${encodeURIComponent(n)}`;return s?`${d}/revision/${encodeURIComponent(s)}`:d}let r=T(e.source_id),i=T(e.path);if(r&&i)return`open-files://source/${encodeURIComponent(r)}/path/${encodeURIComponent(i)}`;throw Error("Manifest item is missing source_ref, file_id, or source_id/path.")}function ur(e,t){if(t.kind==="open-files"&&t.entity==="file"&&t.revision_id)return e.replace(/\/revision\/[^/]+$/,"");return e}function dr(e){let t=T(e.extracted_text)??T(e.text)??T(e.content_text)??T(e.markdown);if(t!==void 0)return t;let n=e.content;return typeof n==="string"?n:null}function lr(e){let t=T(e.extracted_text_ref)??T(e.extracted_text_uri)??T(e.text_ref);if(t)return t;let n=q(e.content);return T(n?.extracted_text_ref)??T(n?.extracted_text_uri)??null}function _r(e){let t=T(e.path);return T(e.title)??T(e.name)??(t?or(t):null)}function fr(e){return T(e.hash)??T(e.checksum)??T(e.sha256)??null}function gr(e,t,n){return T(e.revision_id)??T(e.revision)??T(e.version_id)??(t.kind==="open-files"?t.revision_id:void 0)??n??T(e.updated_at)??"current"}function pr(e,t){let n={};for(let[r,i]of Object.entries(e)){if(["text","content","content_text","extracted_text","markdown"].includes(r))continue;n[r]=i}return n.source_ref=t.sourceRef,n.source_uri=t.sourceUri,n.status=t.status,n}function hr(e,t){let n=cr(e),r=P(n),i=ur(n,r),s=fr(e),d=T(e.status)??"active";return{raw:e,sourceRef:n,sourceUri:i,kind:r.kind,title:_r(e),revision:gr(e,r,s),hash:s,extractedTextUri:lr(e),text:dr(e),metadata:pr(e,{sourceRef:n,sourceUri:i,status:d}),acl:e.permissions??e.acl??{},status:d,updatedAt:T(e.updated_at)??t}}function mr(e){let t=e.trim();if(!t)return[];if(t.startsWith("[")){let n=JSON.parse(t);if(!Array.isArray(n))throw Error("Manifest array parse failed.");return n.map((r)=>{let i=q(r);if(!i)throw Error("Manifest array entries must be objects.");return i})}if(t.startsWith("{"))try{let n=JSON.parse(t),r=q(n);if(!r)throw Error("Manifest object parse failed.");if(Array.isArray(r.items))return r.items.map((i)=>{let s=q(i);if(!s)throw Error("Manifest items entries must be objects.");return s});if("source_ref"in r||"source_uri"in r||"file_id"in r)return[r]}catch(n){let r=t.split(/\r?\n/).filter((i)=>i.trim().length>0);if(r.length<=1)throw n;return r.map((i)=>{let s=q(JSON.parse(i));if(!s)throw Error("Manifest JSONL entries must be objects.");return s})}return t.split(/\r?\n/).filter((n)=>n.trim().length>0).map((n)=>{let r=q(JSON.parse(n));if(!r)throw Error("Manifest JSONL entries must be objects.");return r})}async function Er(e,t,n){let r=new URL(e),i=r.hostname,s=decodeURIComponent(r.pathname.replace(/^\/+/,""));if(!i||!s)throw Error(`Invalid S3 manifest URI: ${e}`);if(n)F(e,n);let[{S3Client:d,GetObjectCommand:l},{fromIni:a}]=await Promise.all([import("@aws-sdk/client-s3"),import("@aws-sdk/credential-providers")]),o=t?.storage.type==="s3"&&t.storage.s3?.bucket===i?t.storage.s3:void 0,u=await new d({region:o?.region,credentials:o?.profile?a({profile:o.profile}):void 0,maxAttempts:o?.max_attempts}).send(new l({Bucket:i,Key:s}));if(!u.Body)return"";return await u.Body.transformToString()}async function kr(e,t,n){if(e.startsWith("s3://"))return Er(e,t,n);if(!ir(e))throw Error(`Manifest not found: ${e}`);return sr(e,"utf8")}function yr(e,t,n){let r=e.replace(/\r\n/g,`
|
|
346
|
+
`);if(!r.trim())return[];let i=[],s=0;while(s<r.length){let d=Math.min(r.length,s+t),l=d;if(d<r.length){let o=r.lastIndexOf(`
|
|
234
347
|
|
|
235
|
-
`,
|
|
348
|
+
`,d),c=r.lastIndexOf(". ",d),u=Math.max(o,c);if(u>s+Math.floor(t*0.5))l=u+(u===o?2:1)}let a=r.slice(s,l).trim();if(a)i.push({ordinal:i.length,text:a,startOffset:s,endOffset:l});if(l>=r.length)break;s=Math.max(0,l-n)}return i}function br(e){let t=e.trim().split(/\s+/).filter(Boolean).length;return Math.max(1,Math.ceil(t*1.25))}function Tr(e,t){let n=e.query("SELECT id FROM chunks WHERE source_revision_id = ?").all(t);for(let r of n)e.run("DELETE FROM chunks_fts WHERE chunk_id = ?",[r.id]);return e.run("DELETE FROM chunks WHERE source_revision_id = ?",[t]),n.length}function vr(e,t,n){let r=Ue("src",t.sourceUri);e.run(`INSERT INTO sources (id, uri, kind, title, metadata_json, acl_json, created_at, updated_at)
|
|
236
349
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
237
350
|
ON CONFLICT(uri) DO UPDATE SET
|
|
238
351
|
kind = excluded.kind,
|
|
239
352
|
title = excluded.title,
|
|
240
353
|
metadata_json = excluded.metadata_json,
|
|
241
354
|
acl_json = excluded.acl_json,
|
|
242
|
-
updated_at = excluded.updated_at`,[
|
|
355
|
+
updated_at = excluded.updated_at`,[r,t.sourceUri,t.kind,t.title,JSON.stringify(t.metadata),JSON.stringify(t.acl??{}),n,t.updatedAt]);let i=e.query("SELECT id FROM sources WHERE uri = ?").get(t.sourceUri);if(!i)throw Error(`Failed to upsert source: ${t.sourceUri}`);return i.id}function xr(e,t,n,r){let i=Ue("rev",`${t}\x00${n.revision}`);e.run(`INSERT INTO source_revisions (id, source_id, revision, hash, extracted_text_uri, metadata_json, created_at)
|
|
243
356
|
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
244
357
|
ON CONFLICT(source_id, revision) DO UPDATE SET
|
|
245
358
|
hash = excluded.hash,
|
|
246
359
|
extracted_text_uri = excluded.extracted_text_uri,
|
|
247
|
-
metadata_json = excluded.metadata_json`,[i,t,
|
|
248
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,[c,t,"source",o.ordinal,o.text,
|
|
360
|
+
metadata_json = excluded.metadata_json`,[i,t,n.revision,n.hash,n.extractedTextUri,JSON.stringify(n.metadata),r]);let s=e.query("SELECT id FROM source_revisions WHERE source_id = ? AND revision = ?").get(t,n.revision);if(!s)throw Error(`Failed to upsert source revision: ${n.sourceRef}`);return s.id}function Sr(e,t,n,r,i,s,d){if(!n.text||n.status.toLowerCase()==="deleted")return{chunksInserted:0,redactions:0};let l=de(n.text,d);if(l.findings.length>0)le(e,{source_uri:n.sourceUri,findings:l.findings,metadata:{source_ref:n.sourceRef,revision:n.revision},created_at:r}),R(e,{event_type:"redaction",action:"source_text_redact",target_uri:n.sourceUri,decision:"redacted",metadata:{findings:l.findings.length,source_ref:n.sourceRef,revision:n.revision},created_at:r});let a=yr(l.text,i,s);for(let o of a){let c=Ue("chk",`${t}\x00${o.ordinal}\x00${o.text}`),u=K({source_ref:n.sourceRef,source_uri:n.sourceUri,source_kind:n.kind,source_revision_id:t,revision:n.revision,hash:n.hash,chunk_id:c,start_offset:o.startOffset,end_offset:o.endOffset,status:n.status,resolver:"open-files-read-only"}),_=st({source_ref:n.sourceRef,source_uri:n.sourceUri,source_kind:n.kind,source_revision_id:t,revision:n.revision,hash:n.hash,status:n.status,path:T(n.raw.path)??null,mime:T(n.raw.mime)??T(n.raw.content_type)??null,size:ar(n.raw.size)??null},u);e.run(`INSERT INTO chunks (id, source_revision_id, kind, ordinal, text, token_count, start_offset, end_offset, metadata_json, created_at)
|
|
361
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,[c,t,"source",o.ordinal,o.text,br(o.text),o.startOffset,o.endOffset,JSON.stringify(_),r]),e.run("INSERT INTO chunks_fts (chunk_id, text, title, source_uri) VALUES (?, ?, ?, ?)",[c,o.text,n.title??"",n.sourceUri])}return{chunksInserted:a.length,redactions:l.findings.length}}async function vt(e){let t=e.now??new Date;if(e.safetyPolicy)X(e.dbPath,e.safetyPolicy);w(e.dbPath);let n=await kr(e.input,e.config,e.safetyPolicy),r=mr(n);return je({dbPath:e.dbPath,items:r,sourceLabel:e.input,safetyPolicy:e.safetyPolicy,now:t,maxChunkChars:e.maxChunkChars,chunkOverlapChars:e.chunkOverlapChars})}async function je(e){let t=(e.now??new Date).toISOString(),n=e.maxChunkChars??4000,r=e.chunkOverlapChars??200;if(n<500)throw Error("maxChunkChars must be at least 500.");if(r<0||r>=n)throw Error("chunkOverlapChars must be less than maxChunkChars.");if(e.safetyPolicy)X(e.dbPath,e.safetyPolicy);w(e.dbPath);let i=S(e.dbPath);try{return i.transaction(()=>{let d=new Set,l=new Set,a=0,o=0,c=0,u=0;R(i,{event_type:"source_read",action:e.readAction??(e.sourceLabel.startsWith("s3://")?"s3_manifest_read":"local_manifest_read"),target_uri:e.sourceLabel,decision:"allow",metadata:{items:e.items.length,read_only:!0},created_at:t});for(let _ of e.items){let f=hr(_,t),m=vr(i,f,t),y=xr(i,m,f,t);if(d.add(m),l.add(y),f.text||f.status.toLowerCase()==="deleted")o+=Tr(i,y);let k=Sr(i,y,f,t,n,r,e.safetyPolicy);a+=k.chunksInserted,c+=k.redactions}return R(i,{event_type:"write",action:"knowledge_manifest_ingest",target_uri:e.dbPath,decision:"allow",metadata:{items:e.items.length,sources:d.size,revisions:l.size,chunks_inserted:a,redactions:c},created_at:t}),{path:e.sourceLabel,db_path:e.dbPath,items_seen:e.items.length,sources_upserted:d.size,revisions_upserted:l.size,chunks_inserted:a,chunks_deleted:o,redactions:c,skipped:u}})()}finally{i.close()}}import{createHash as Lr}from"crypto";import{existsSync as Cr,readFileSync as Dr}from"fs";import{basename as pe}from"path";function fe(e){if(!e)return{};try{let t=JSON.parse(e);return t&&typeof t==="object"&&!Array.isArray(t)?t:{}}catch{return{}}}function $(e,t){for(let n of t){let r=e[n];if(typeof r==="string"&&r.length>0)return r}return null}function xt(e,t){for(let n of t){let r=e[n];if(typeof r==="number"&&Number.isFinite(r))return r}return null}function wr(e,t){let n=e.mode;if(typeof n==="string"&&n!=="read_only")throw Error(`Source resolver denied ${t}. Permission mode is ${n}, expected read_only.`);let r=e.denied_purposes;if(Array.isArray(r)&&r.includes(t))throw Error(`Source resolver denied ${t}. Purpose is explicitly denied.`);let i=e.allowed_purposes;if(Array.isArray(i)&&i.length>0&&!i.includes(t))throw Error(`Source resolver denied ${t}. Allowed purposes: ${i.join(", ")}`)}function Rr(e,t,n){if(!t)return n;try{let r=P(e);if(r.kind==="open-files"&&r.entity==="file")return`${e}/revision/${encodeURIComponent(t.revision)}`}catch{return n}return n}function Or(e,t,n){return e.query(`SELECT id, uri, kind, title, metadata_json, acl_json, updated_at
|
|
249
362
|
FROM sources
|
|
250
363
|
WHERE uri = ? OR uri = ?
|
|
251
364
|
ORDER BY CASE WHEN uri = ? THEN 0 ELSE 1 END
|
|
252
|
-
LIMIT 1`).get(t,
|
|
365
|
+
LIMIT 1`).get(t,n,t)??null}function Nr(e,t,n){if(n)return e.query(`SELECT id, revision, hash, extracted_text_uri, metadata_json, created_at
|
|
253
366
|
FROM source_revisions
|
|
254
367
|
WHERE source_id = ? AND revision = ?
|
|
255
|
-
LIMIT 1`).get(t,
|
|
368
|
+
LIMIT 1`).get(t,n)??null;return e.query(`SELECT id, revision, hash, extracted_text_uri, metadata_json, created_at
|
|
256
369
|
FROM source_revisions
|
|
257
370
|
WHERE source_id = ?
|
|
258
371
|
ORDER BY created_at DESC, revision DESC
|
|
259
|
-
LIMIT 1`).get(t)??null}function
|
|
372
|
+
LIMIT 1`).get(t)??null}function Ar(e,t){if(!t)return 0;return e.query("SELECT COUNT(*) AS n FROM chunks WHERE source_revision_id = ?").get(t)?.n??0}function Ir(e,t,n){if(!t||n<=0)return[];return e.query(`SELECT id, kind, ordinal, text, token_count, start_offset, end_offset, metadata_json
|
|
260
373
|
FROM chunks
|
|
261
374
|
WHERE source_revision_id = ?
|
|
262
375
|
ORDER BY ordinal ASC
|
|
263
|
-
LIMIT ?`).all(t,
|
|
376
|
+
LIMIT ?`).all(t,n)}async function ge(e){let t=e.purpose??"knowledge_answer",n=Math.max(0,Math.min(e.limit??10,100)),r=(e.now??new Date).toISOString(),i=P(e.sourceRef),s=gt(e.sourceRef,i),d=pt(e.sourceRef);if(e.safetyPolicy){if(!e.safetyPolicy.readOnlySourceAccess)throw Error("Safety policy denied source resolution.");X(e.dbPath,e.safetyPolicy)}w(e.dbPath);let l=S(e.dbPath);try{return l.transaction(()=>{let a=Or(l,s,e.sourceRef);if(!a)return R(l,{event_type:"source_read",action:"open_files_resolve_missing",target_uri:e.sourceRef,decision:"allow",metadata:{purpose:t,read_only:!0,source_uri:s},created_at:r}),{source_ref:e.sourceRef,source_uri:s,purpose:t,read_only:!0,resolved:!1,resolver:{name:"open-files-read-only",mode:"local_catalog",contract:"open-files-knowledge-source-v1"},source:null,revision:null,content:{mime:null,size:null,hash:null,text_available:!1,chunks_total:0,chunks_returned:0,char_count_returned:0,extracted_text_ref:null,bytes_available:!1,bytes_exposed:!1},chunks:[],citations:[]};let o=fe(a.metadata_json),c=fe(a.acl_json);try{wr(c,t)}catch(p){throw R(l,{event_type:"source_read",action:"open_files_resolve",target_uri:e.sourceRef,decision:"deny",metadata:{purpose:t,read_only:!0,source_uri:a.uri,error:p instanceof Error?p.message:String(p)},created_at:r}),p}let u=Nr(l,a.id,d),_=fe(u?.metadata_json),f=Ar(l,u?.id??null),m=Ir(l,u?.id??null,n),y=Rr(a.uri,u,e.sourceRef),k=m.map((p)=>{let A=fe(p.metadata_json),h={resolver:"open-files-read-only",mode:"local_catalog",purpose:t,read_only:!0,source_ref:$(A,["source_ref"])??y,source_uri:a.uri,source_revision_id:u?.id??null,revision:u?.revision??null,hash:u?.hash??$(A,["hash"]),chunk_id:p.id,start_offset:p.start_offset,end_offset:p.end_offset,resolved_at:r},G=K({source_ref:h.source_ref,source_uri:h.source_uri,source_kind:a.kind,source_revision_id:h.source_revision_id,revision:h.revision,hash:h.hash,chunk_id:p.id,start_offset:p.start_offset,end_offset:p.end_offset,status:$(A,["status"]),resolver:h.resolver});return{id:p.id,kind:p.kind,ordinal:p.ordinal,text:p.text,token_count:p.token_count,start_offset:p.start_offset,end_offset:p.end_offset,metadata:A,evidence:h,provenance:G}}),g=k.map((p)=>({source_ref:p.evidence.source_ref,source_uri:a.uri,chunk_id:p.id,quote:p.text.slice(0,500),start_offset:p.start_offset,end_offset:p.end_offset,evidence:p.evidence,provenance:p.provenance}));R(l,{event_type:"source_read",action:"open_files_resolve",target_uri:e.sourceRef,decision:"allow",metadata:{purpose:t,read_only:!0,source_uri:a.uri,revision:u?.revision??null,chunks_returned:k.length,chunks_total:f},created_at:r});let b=$(o,["mime","content_type"])??$(_,["mime","content_type"]),x=xt(o,["size","size_bytes"])??xt(_,["size","size_bytes"]);return{source_ref:y,source_uri:a.uri,purpose:t,read_only:!0,resolved:!0,resolver:{name:"open-files-read-only",mode:"local_catalog",contract:"open-files-knowledge-source-v1"},source:{id:a.id,uri:a.uri,kind:a.kind,title:a.title,metadata:o,permissions:c,updated_at:a.updated_at},revision:u?{id:u.id,revision:u.revision,hash:u.hash,extracted_text_uri:u.extracted_text_uri,metadata:_,created_at:u.created_at,reindex_required:_.reindex_required===!0}:null,content:{mime:b,size:x,hash:u?.hash??$(o,["hash","checksum","sha256"]),text_available:f>0,chunks_total:f,chunks_returned:k.length,char_count_returned:k.reduce((p,A)=>p+A.text.length,0),extracted_text_ref:u?.extracted_text_uri??$(_,["extracted_text_ref","extracted_text_uri"]),bytes_available:!1,bytes_exposed:!1},chunks:k,citations:g}})()}finally{l.close()}}function z(e){return`sha256:${Lr("sha256").update(e).digest("hex")}`}function Pr(e){return e.replace(/<script[\s\S]*?<\/script>/gi," ").replace(/<style[\s\S]*?<\/style>/gi," ").replace(/<[^>]+>/g," ").replace(/ /g," ").replace(/&/g,"&").replace(/</g,"<").replace(/>/g,">").replace(/\s+\n/g,`
|
|
264
377
|
`).replace(/\n\s+/g,`
|
|
265
|
-
`).replace(/[ \t]{2,}/g," ").trim()}async function
|
|
266
|
-
|
|
267
|
-
`);return{text:
|
|
378
|
+
`).replace(/[ \t]{2,}/g," ").trim()}async function Ur(e,t,n){let r=new URL(e),i=r.hostname,s=decodeURIComponent(r.pathname.replace(/^\/+/,""));if(!i||!s)throw Error(`Invalid S3 source URI: ${e}`);if(n)F(e,n);let[{S3Client:d,GetObjectCommand:l},{fromIni:a}]=await Promise.all([import("@aws-sdk/client-s3"),import("@aws-sdk/credential-providers")]),o=t?.storage.type==="s3"&&t.storage.s3?.bucket===i?t.storage.s3:void 0,u=await new d({region:o?.region,credentials:o?.profile?a({profile:o.profile}):void 0,maxAttempts:o?.max_attempts}).send(new l({Bucket:i,Key:s}));if(!u.Body)return"";return await u.Body.transformToString()}async function jr(e,t){if(t)ue(t);let n=await fetch(e,{headers:{accept:"text/markdown,text/plain,text/html,application/json;q=0.8,*/*;q=0.5","user-agent":"@hasna/knowledge source-ingest"}});if(!n.ok)throw Error(`Web source read failed ${n.status}: ${e}`);let r=n.headers.get("content-type"),i=await n.text();return{text:r?.includes("html")?Pr(i):i,mime:r}}function he(e){if(e.kind==="file")return pe(e.path);if(e.kind==="s3")return pe(e.key);if(e.kind==="web")return pe(new URL(e.url).pathname)||e.url;return e.path?pe(e.path):e.id}async function St(e,t,n){if(e.kind==="file"){if(!Cr(e.path))throw Error(`Source file not found: ${e.path}`);let r=Dr(e.path,"utf8");return{text:r,contentSource:"file",title:he(e),mime:"text/plain",size:r.length,hash:z(r),revision:null,extractedTextRef:null,metadata:{path:e.path},permissions:{mode:"read_only"}}}if(e.kind==="s3"){let r=await Ur(e.uri,t,n);return{text:r,contentSource:"s3",title:he(e),mime:"text/plain",size:r.length,hash:z(r),revision:null,extractedTextRef:null,metadata:{bucket:e.bucket,key:e.key},permissions:{mode:"read_only"}}}if(e.kind==="web"){let r=await jr(e.url,n);return{text:r.text,contentSource:"web",title:he(e),mime:r.mime,size:r.text.length,hash:z(r.text),revision:null,extractedTextRef:null,metadata:{url:e.url},permissions:{mode:"read_only"}}}throw Error(`Direct source reading is not available for ${e.uri}`)}async function Mr(e,t,n){if(e.startsWith("open-files://"))throw Error("Open-files extracted text refs require an open-files resolver API. Ingest an open-files manifest with extracted_text or an extracted_text_ref using file://, s3://, or https://.");let r=P(e);return{text:(await St(r,t,n)).text,contentSource:"extracted_text_ref"}}async function Kr(e){let t=await ge({dbPath:e.dbPath,sourceRef:e.sourceRef,purpose:e.purpose??"knowledge_index",limit:100,safetyPolicy:e.safetyPolicy,now:e.now});if(!t.resolved)throw Error("Open-files source is not in the local knowledge catalog. Ingest an open-files manifest first or use the open-files resolver API.");if(t.revision?.extracted_text_uri&&!t.content.text_available){let r=await Mr(t.revision.extracted_text_uri,e.config,e.safetyPolicy);return{text:r.text,contentSource:r.contentSource,title:t.source?.title??null,mime:t.content.mime,size:r.text.length,hash:t.revision.hash??z(r.text),revision:t.revision.revision,extractedTextRef:t.revision.extracted_text_uri,metadata:t.source?.metadata??{},permissions:t.source?.permissions??{mode:"read_only"}}}if(t.chunks.length===0)throw Error("Open-files source has no extracted text chunks yet. Ingest an open-files manifest with extracted_text or extracted_text_ref first.");let n=t.chunks.map((r)=>r.text).join(`
|
|
379
|
+
|
|
380
|
+
`);return{text:n,contentSource:"catalog_chunks",title:t.source?.title??null,mime:t.content.mime,size:n.length,hash:t.revision?.hash??z(n),revision:t.revision?.revision??null,extractedTextRef:t.revision?.extracted_text_uri??null,metadata:t.source?.metadata??{},permissions:t.source?.permissions??{mode:"read_only"}}}function Fr(e,t,n,r){let i=n.hash??z(n.text),s={...n.metadata,source_ref:e,content_source:n.contentSource,read_only:!0},d={source_ref:e,name:n.title??he(t),mime:n.mime??"text/plain",size:n.size??n.text.length,hash:i,revision:n.revision??i,status:"active",updated_at:new Date().toISOString(),permissions:{mode:"read_only",allowed_purposes:[r],...n.permissions},metadata:s,extracted_text_ref:n.extractedTextRef,extracted_text:n.text};if(t.kind==="open-files"){if(t.entity==="file")d.file_id=t.id;if(t.entity==="source")d.source_id=t.id,d.path=t.path}if(t.kind==="file")d.path=t.path;if(t.kind==="s3")d.path=t.key;if(t.kind==="web")d.url=t.url;return d}async function wt(e){let t=e.purpose??"knowledge_index",n=P(e.sourceRef),r=n.kind==="open-files"?await Kr(e):await St(n,e.config,e.safetyPolicy),i=Fr(e.sourceRef,n,r,t);return{...await je({dbPath:e.dbPath,items:[i],sourceLabel:e.sourceRef,readAction:"source_ref_ingest_read",safetyPolicy:e.safetyPolicy,now:e.now}),source_ref:e.sourceRef,content_source:r.contentSource,read_only:!0,hash:String(i.hash)}}function Ee(e){if(!e)return{};try{let t=JSON.parse(e);return t&&typeof t==="object"&&!Array.isArray(t)?t:{}}catch{return{}}}function D(e,t){for(let n of t){let r=e[n];if(typeof r==="string"&&r.length>0)return r}return null}function Rt(e,t){for(let n of t){let r=e[n];if(typeof r==="number"&&Number.isFinite(r))return r}return null}function Ot(e){return Array.from(new Set(e))}function Wr(e){let t=e.normalize("NFKC").toLowerCase().match(/[\p{L}\p{N}_]+/gu)??[];return Ot(t.filter((n)=>n.length>0)).slice(0,16)}function Xr(e){if(e.length===0)return null;return e.map((t)=>`${t}*`).join(" OR ")}function $r(e){return e.replace(/[\\%_]/g,(t)=>`\\${t}`)}function Nt(e,t){return e.flatMap((n)=>Array.from({length:t},()=>`%${$r(n)}%`))}function Br(e,t){let n=Number.isFinite(e)?1/(1+Math.abs(e)):0,r=1/(1+t);return ke(Math.max(n,r))}function At(e,t){if(t.length===0)return 0;let n=t.filter((r)=>e.includes(r)).length;if(n===0)return 0;return ke(Math.min(0.85,0.35+n/t.length*0.5))}function Hr(e){return ke(Math.max(0,Math.min(1,(e+1)/2)))}function ke(e){return Number(e.toFixed(6))}function Z(e,t){let n=e.keyword??0,r=e.semantic??0,i=e.catalog??0,s=t?.chunk_id?0.05:0;return ke(Math.min(1,n*0.55+r*0.4+i*0.35+s))}function Me(e){let t=e.provenance;return t&&typeof t==="object"&&!Array.isArray(t)?t:null}function qr(e){let t=Ee(e.chunk_metadata_json),n=Me(t);if(n)return n;if(!e.source_revision_id&&!e.source_uri)return null;return K({source_ref:D(t,["source_ref"]),source_uri:e.source_uri??D(t,["source_uri"]),source_kind:e.source_kind??D(t,["source_kind"]),source_revision_id:e.source_revision_id,revision:e.revision??D(t,["revision"]),hash:e.hash??D(t,["hash"]),chunk_id:e.chunk_id,start_offset:e.start_offset??Rt(t,["start_offset"]),end_offset:e.end_offset??Rt(t,["end_offset"]),status:D(t,["status"]),resolver:"open-files-read-only"})}function zr(e,t,n){if(!t)return[];return e.query(`SELECT
|
|
381
|
+
chunks_fts.chunk_id,
|
|
382
|
+
c.kind AS chunk_kind,
|
|
383
|
+
c.wiki_page_id,
|
|
384
|
+
c.text,
|
|
385
|
+
c.token_count,
|
|
386
|
+
c.start_offset,
|
|
387
|
+
c.end_offset,
|
|
388
|
+
c.metadata_json AS chunk_metadata_json,
|
|
389
|
+
c.source_revision_id,
|
|
390
|
+
sr.revision,
|
|
391
|
+
sr.hash,
|
|
392
|
+
s.uri AS source_uri,
|
|
393
|
+
s.kind AS source_kind,
|
|
394
|
+
s.title AS source_title,
|
|
395
|
+
wp.path AS wiki_path,
|
|
396
|
+
wp.title AS wiki_title,
|
|
397
|
+
wp.artifact_uri AS wiki_artifact_uri,
|
|
398
|
+
wp.content_hash AS wiki_content_hash,
|
|
399
|
+
wp.status AS wiki_status,
|
|
400
|
+
wp.metadata_json AS wiki_metadata_json,
|
|
401
|
+
bm25(chunks_fts) AS rank
|
|
402
|
+
FROM chunks_fts
|
|
403
|
+
JOIN chunks c ON c.id = chunks_fts.chunk_id
|
|
404
|
+
LEFT JOIN source_revisions sr ON sr.id = c.source_revision_id
|
|
405
|
+
LEFT JOIN sources s ON s.id = sr.source_id
|
|
406
|
+
LEFT JOIN wiki_pages wp ON wp.id = c.wiki_page_id
|
|
407
|
+
WHERE chunks_fts MATCH ?
|
|
408
|
+
ORDER BY rank ASC
|
|
409
|
+
LIMIT ?`).all(t,n)}function It(e,t){if(t.length===0)return"1 = 0";return t.map(()=>`(${e.map((r)=>`lower(COALESCE(${r}, '')) LIKE ? ESCAPE '\\'`).join(" OR ")})`).join(" OR ")}function Gr(e,t,n){let r=["path","title","artifact_uri","metadata_json"];return e.query(`SELECT id, path, title, artifact_uri, content_hash, status, metadata_json
|
|
410
|
+
FROM wiki_pages
|
|
411
|
+
WHERE status = 'active' AND (${It(r,t)})
|
|
412
|
+
ORDER BY updated_at DESC
|
|
413
|
+
LIMIT ?`).all(...Nt(t,r.length),n)}function Jr(e,t,n){let r=["kind","name","shard_key","artifact_uri","metadata_json"];return e.query(`SELECT id, kind, name, artifact_uri, shard_key, metadata_json
|
|
414
|
+
FROM knowledge_indexes
|
|
415
|
+
WHERE ${It(r,t)}
|
|
416
|
+
ORDER BY updated_at DESC
|
|
417
|
+
LIMIT ?`).all(...Nt(t,r.length),n)}function Yr(e,t){let n=Ee(e.chunk_metadata_json),r=qr(e),i=D(n,["source_ref"]),s=e.source_uri??D(n,["source_uri"]),d=Boolean(e.wiki_page_id),l={kind:d?"wiki_chunk":"source_chunk",id:e.chunk_id,title:d?e.wiki_title:e.source_title,text:e.text,score:0,scores:{keyword:t},source:s||i?{uri:s,ref:i,kind:e.source_kind??D(n,["source_kind"]),revision:e.revision??D(n,["revision"]),hash:e.hash??D(n,["hash"])}:null,citation:{chunk_id:e.chunk_id,start_offset:e.start_offset,end_offset:e.end_offset},artifact:d?{uri:e.wiki_artifact_uri,path:e.wiki_path,hash:e.wiki_content_hash,shard_key:e.wiki_path}:null,provenance:r,reasons:["keyword_match"]};return l.score=Z(l.scores,l.citation),l}function Vr(e,t){let n=Ee(e.metadata_json),r=At(`${e.path} ${e.title} ${e.artifact_uri??""} ${e.metadata_json}`.toLowerCase(),t),i={kind:"wiki_page",id:e.id,title:e.title,text:null,score:0,scores:{catalog:r},source:null,citation:null,artifact:{uri:e.artifact_uri,path:e.path,hash:e.content_hash,shard_key:e.path},provenance:Me(n),reasons:["wiki_catalog_match"]};return i.score=Z(i.scores,i.citation),i}function Qr(e,t){let n=Ee(e.metadata_json),r=At(`${e.kind} ${e.name} ${e.shard_key??""} ${e.artifact_uri??""} ${e.metadata_json}`.toLowerCase(),t),i={kind:"knowledge_index",id:e.id,title:e.name,text:null,score:0,scores:{catalog:r},source:null,citation:null,artifact:{uri:e.artifact_uri,path:D(n,["artifact_key"]),hash:D(n,["content_hash"]),shard_key:e.shard_key},provenance:Me(n),reasons:["index_catalog_match"]};return i.score=Z(i.scores,i.citation),i}function me(e,t){let n=`${t.kind}:${t.id}`,r=e.get(n);if(!r){e.set(n,t);return}r.scores={keyword:Math.max(r.scores.keyword??0,t.scores.keyword??0)||void 0,semantic:Math.max(r.scores.semantic??0,t.scores.semantic??0)||void 0,catalog:Math.max(r.scores.catalog??0,t.scores.catalog??0)||void 0},r.reasons=Ot([...r.reasons,...t.reasons]),r.text=r.text??t.text,r.title=r.title??t.title,r.source=r.source??t.source,r.citation=r.citation??t.citation,r.artifact=r.artifact??t.artifact,r.provenance=r.provenance??t.provenance,r.score=Z(r.scores,r.citation)}function Zr(e){let t={source_chunk:0,wiki_chunk:1,wiki_page:2,knowledge_index:3};return e.sort((n,r)=>{if(r.score!==n.score)return r.score-n.score;return t[n.kind]-t[r.kind]||n.id.localeCompare(r.id)})}async function Lt(e){let t=e.query.trim();if(!t)throw Error("Search query is required.");let n=Math.max(1,Math.min(e.limit??10,100)),r=Wr(t),i=Xr(r),s=e.semantic===!0||e.fake===!0||Boolean(e.modelRef),d=[],l=null,a=null,o=null,c=0,u=0,_=0,f=new Map;w(e.dbPath);let m=S(e.dbPath);try{let k=zr(m,i,Math.max(n*3,20));c=k.length,k.forEach((x,p)=>me(f,Yr(x,Br(x.rank,p))));let g=Gr(m,r,Math.max(n,10)),b=Jr(m,r,Math.max(n,10));u=g.length+b.length,g.forEach((x)=>me(f,Vr(x,r))),b.forEach((x)=>me(f,Qr(x,r)))}finally{m.close()}if(s)try{let k=await ce({dbPath:e.dbPath,query:t,limit:Math.max(n*3,20),config:e.config,env:e.env,modelRef:e.modelRef,dimensions:e.dimensions,fake:e.fake,batchSize:e.batchSize,maxParallelCalls:e.maxParallelCalls});l=k.provider,a=k.model,o=k.dimensions,_=k.results.length;for(let g of k.results){let b={kind:"source_chunk",id:g.chunk_id,title:null,text:g.text,score:0,scores:{semantic:Hr(g.score)},source:{uri:g.source_uri,ref:g.source_ref,kind:g.provenance?.source_kind??null,revision:g.revision,hash:g.hash},citation:{chunk_id:g.chunk_id,start_offset:g.provenance?.start_offset??null,end_offset:g.provenance?.end_offset??null},artifact:null,provenance:g.provenance,reasons:["semantic_match"]};b.score=Z(b.scores,b.citation),me(f,b)}}catch(k){d.push(`semantic_search_failed: ${k instanceof Error?k.message:String(k)}`)}let y=Zr(Array.from(f.values())).slice(0,n);return{query:t,limit:n,mode:{keyword:!0,catalog:!0,semantic:s},semantic_provider:l,semantic_model:a,semantic_dimensions:o,counts:{keyword_results:c,catalog_results:u,semantic_results:_,merged_results:y.length},warnings:d,results:y}}import{createHash as ei,randomUUID as ti}from"crypto";var Ct=[{kind:"schema",prefix:"schemas/",description:"Machine-readable agent schemas and source rules."},{kind:"index",prefix:"indexes/",description:"Small orientation indexes and future shard manifests."},{kind:"log",prefix:"logs/",description:"Append-only JSONL run and wiki-maintenance log partitions."},{kind:"run",prefix:"runs/",description:"Prompt/tool/cost ledgers and generated output records."},{kind:"wiki_page",prefix:"wiki/",description:"Generated cited Markdown pages, not raw source files."},{kind:"export",prefix:"exports/",description:"Portable exports and snapshots of derived knowledge state."}];function Dt(e){let t=typeof e==="string"?Buffer.from(e):Buffer.from(e);return{hash:`sha256:${ei("sha256").update(t).digest("hex")}`,size_bytes:t.byteLength}}function Pt(e){return Ct.find((n)=>e.startsWith(n.prefix))?.kind??"artifact"}function Ut(e,t,n="global"){let r=Ke(e,t),i=e.storage.s3??null,s=i?.prefix?.replace(/^\/+|\/+$/g,"")??"",d=i?`s3://${i.bucket}/${s?`${s}/`:""}`:"";return{scope:n,mode:e.mode,storage_type:e.storage.type,workspace_home:t.home,local_layout:{app_path:Y,config_path:t.configPath,json_store_path:t.jsonStorePath,knowledge_db_path:t.knowledgeDbPath,directories:{artifacts:t.artifactsDir,cache:t.cacheDir,exports:t.exportsDir,indexes:t.indexesDir,logs:t.logsDir,runs:t.runsDir,schemas:t.schemasDir,wiki:t.wikiDir}},artifact_store:{type:e.storage.type,artifacts_root:e.storage.artifacts_root,uri_prefix:e.storage.type==="s3"?d:`file://${t.artifactsDir}/`,s3:i?{bucket:i.bucket,prefix:s,region:i.region??null,profile:i.profile??null,server_side_encryption:i.server_side_encryption??null,kms_key_configured:Boolean(i.kms_key_id)}:null},source_ownership:{owner:"open-files",preferred_ref:e.sources.preferred_ref,allowed_schemes:e.sources.allowed_schemes,raw_source_bytes_stored_in_open_knowledge:!1,stores:["source refs","source revisions and hashes","citation spans","redacted extracted chunks","embeddings","generated wiki artifacts","indexes","run ledgers"],does_not_store:["raw open-files bytes","S3 object credentials","connector secrets","hosted tenant ownership state"]},generated_artifacts:Ct,scalability:{catalog:"knowledge.db tracks sources, revisions, chunks, citations, indexes, runs, and storage_objects.",indexes:"Indexes are cataloged DB rows plus sharded artifacts, not one giant index.md.",logs:"Logs use dated JSONL partitions under logs/yyyy/mm/dd.jsonl.",markdown:"Markdown pages are the readable wiki layer over DB/object-store state."},warnings:r.warnings}}function Ke(e,t){let n=[],r=[];if(!t.home.endsWith(Y))r.push(`Workspace home does not end with ${Y}: ${t.home}`);if(e.storage.type==="s3"){if(!e.storage.s3?.bucket)n.push("storage.s3.bucket is required when storage.type is s3.");if(!e.storage.s3?.prefix)r.push("storage.s3.prefix is empty; generated knowledge artifacts will be written at the bucket root.");if(e.mode==="local")r.push("storage.type is s3 while mode is local; this is valid for BYO S3, but hosted wrappers should set mode to hosted.")}if(e.storage.type==="local"&&e.storage.s3)r.push("storage.s3 is configured but ignored while storage.type is local.");if(e.sources.preferred_ref!=="open-files")r.push("sources.preferred_ref should stay open-files for durable company knowledge.");if(!e.sources.allowed_schemes.includes("open-files"))n.push("sources.allowed_schemes must include open-files.");return{ok:n.length===0,errors:n,warnings:r}}function jt(e,t,n=new Date){let r=n.toISOString(),i=e.prepare(`
|
|
268
418
|
INSERT INTO storage_objects (
|
|
269
419
|
id, artifact_uri, kind, content_type, hash, size_bytes, metadata_json, created_at, updated_at
|
|
270
420
|
)
|
|
@@ -276,7 +426,7 @@ VALUES (3, datetime('now'));
|
|
|
276
426
|
size_bytes = excluded.size_bytes,
|
|
277
427
|
metadata_json = excluded.metadata_json,
|
|
278
428
|
updated_at = excluded.updated_at
|
|
279
|
-
`);e.transaction((
|
|
429
|
+
`);e.transaction((d)=>{for(let l of d)i.run(ti(),l.uri,l.kind,l.content_type??null,l.hash??null,l.size_bytes??null,JSON.stringify({key:l.key,...l.metadata??{}}),r,r)})(t)}import{createHash as ni}from"crypto";function ri(e){let t=String(e.getUTCFullYear()),n=String(e.getUTCMonth()+1).padStart(2,"0"),r=String(e.getUTCDate()).padStart(2,"0");return{year:t,month:n,day:r}}function Fe(e,t){return`${e}_${ni("sha256").update(t).digest("hex").slice(0,20)}`}function ii(e){let t=e.trim().split(/\s+/).filter(Boolean).length;return Math.max(1,Math.ceil(t*1.25))}function si(){return`# Knowledge Agent Schema v1
|
|
280
430
|
|
|
281
431
|
## Source Rules
|
|
282
432
|
|
|
@@ -301,7 +451,7 @@ VALUES (3, datetime('now'));
|
|
|
301
451
|
## Lint Rules
|
|
302
452
|
|
|
303
453
|
- Flag stale pages, missing citations, contradictions, orphan pages, duplicate pages, and unresolved source refs.
|
|
304
|
-
`}function
|
|
454
|
+
`}function oi(){return`# Knowledge Index
|
|
305
455
|
|
|
306
456
|
This is a compact orientation index for agents. It is not the full search index.
|
|
307
457
|
|
|
@@ -316,18 +466,19 @@ This is a compact orientation index for agents. It is not the full search index.
|
|
|
316
466
|
|
|
317
467
|
Raw source files are resolved through open-files. This app stores source refs,
|
|
318
468
|
citations, chunks, generated wiki artifacts, indexes, and run records.
|
|
319
|
-
`}function
|
|
469
|
+
`}function Mt(){return`# Wiki
|
|
320
470
|
|
|
321
471
|
Generated durable knowledge pages live here.
|
|
322
472
|
|
|
323
473
|
Pages should be concise, cited, and organized for both humans and agents.
|
|
324
|
-
`}async function
|
|
325
|
-
`,content_type:"application/x-ndjson"}],u=await Promise.all(c.map(async(
|
|
474
|
+
`}async function Kt(e,t=new Date){let{year:n,month:r,day:i}=ri(t),s="schemas/v1.md",d="indexes/root.md",l="wiki/README.md",a=`logs/${n}/${r}/${i}.jsonl`,o={ts:t.toISOString(),event:"wiki_layout_initialized",schema_key:"schemas/v1.md",root_index_key:"indexes/root.md",wiki_readme_key:"wiki/README.md"},c=[{key:"schemas/v1.md",body:si(),content_type:"text/markdown"},{key:"indexes/root.md",body:oi(),content_type:"text/markdown"},{key:"wiki/README.md",body:Mt(),content_type:"text/markdown"},{key:a,body:`${JSON.stringify(o)}
|
|
475
|
+
`,content_type:"application/x-ndjson"}],u=await Promise.all(c.map(async(_)=>{let f=await e.put(_);return{key:f.key,uri:f.uri,kind:Pt(_.key),content_type:_.content_type,metadata:{provenance:Ie({generated_from:"wiki_layout_init",artifact_key:_.key,citation_required:_.key.startsWith("wiki/")||_.key.startsWith("indexes/")})},...Dt(_.body)}}));return{schema_key:"schemas/v1.md",root_index_key:"indexes/root.md",wiki_readme_key:"wiki/README.md",log_key:a,artifacts:u,written:["schemas/v1.md","indexes/root.md","wiki/README.md",a]}}function We(e){let t=e.metadata?.provenance;if(t&&typeof t==="object"&&!Array.isArray(t))return t;return Ie({generated_from:"wiki_layout_init",artifact_key:e.key})}function ai(e,t,n,r,i,s){let d=We(r),l=Fe("chk",`${t}\x00${r.hash??r.uri}`),a=e.query("SELECT id FROM chunks WHERE wiki_page_id = ?").all(t);for(let o of a)e.run("DELETE FROM chunks_fts WHERE chunk_id = ?",[o.id]);e.run("DELETE FROM chunks WHERE wiki_page_id = ?",[t]),e.run(`INSERT INTO chunks (id, wiki_page_id, kind, ordinal, text, token_count, start_offset, end_offset, metadata_json, created_at)
|
|
476
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,[l,t,"wiki",0,i,ii(i),0,i.length,JSON.stringify({artifact_key:r.key,artifact_uri:r.uri,content_hash:r.hash??null,provenance:d}),s]),e.run("INSERT INTO chunks_fts (chunk_id, text, title, source_uri) VALUES (?, ?, ?, ?)",[l,i,n,r.uri])}function Ft(e,t,n=new Date){let r=n.toISOString(),i=t.find((d)=>d.key.endsWith("indexes/root.md")),s=t.find((d)=>d.key.endsWith("wiki/README.md"));if(i)e.run(`INSERT INTO knowledge_indexes (id, kind, name, artifact_uri, shard_key, metadata_json, created_at, updated_at)
|
|
326
477
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
327
478
|
ON CONFLICT(kind, name, shard_key) DO UPDATE SET
|
|
328
479
|
artifact_uri = excluded.artifact_uri,
|
|
329
480
|
metadata_json = excluded.metadata_json,
|
|
330
|
-
updated_at = excluded.updated_at`,[
|
|
481
|
+
updated_at = excluded.updated_at`,[Fe("idx","root:indexes/root.md"),"root","root",i.uri,"root",JSON.stringify({artifact_key:i.key,content_hash:i.hash??null,provenance:We(i)}),r,r]);if(s){let d=Fe("wiki","wiki/README.md");e.run(`INSERT INTO wiki_pages (id, path, title, artifact_uri, content_hash, status, metadata_json, created_at, updated_at)
|
|
331
482
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
332
483
|
ON CONFLICT(path) DO UPDATE SET
|
|
333
484
|
title = excluded.title,
|
|
@@ -335,7 +486,7 @@ Pages should be concise, cited, and organized for both humans and agents.
|
|
|
335
486
|
content_hash = excluded.content_hash,
|
|
336
487
|
status = excluded.status,
|
|
337
488
|
metadata_json = excluded.metadata_json,
|
|
338
|
-
updated_at = excluded.updated_at`,[
|
|
489
|
+
updated_at = excluded.updated_at`,[d,"wiki/README.md","Wiki",s.uri,s.hash??null,"active",JSON.stringify({artifact_key:s.key,provenance:We(s)}),r,r]),ai(e,d,"Wiki",s,Mt(),r)}}class Wt{options;ensuredWorkspace;cachedConfig;constructor(e={}){this.options=e}get scope(){return this.options.scope??"global"}get workspace(){return this.ensuredWorkspace??Be(this.options.scope,this.options.cwd)}ensureWorkspace(){if(!this.ensuredWorkspace)this.ensuredWorkspace=$e(this.workspace.home);return this.ensuredWorkspace}jsonStorePath(){return this.ensureWorkspace().jsonStorePath}config(){if(!this.cachedConfig){let e=this.ensureWorkspace();this.cachedConfig=He(e.configPath)}return this.cachedConfig}safetyPolicy(){return Et(this.config(),this.ensureWorkspace())}artifactStore(){return Ze(this.config(),this.ensureWorkspace())}storageContract(){return Ut(this.config(),this.ensureWorkspace(),this.scope)}validateStorage(){return Ke(this.config(),this.ensureWorkspace())}paths(){let e=this.ensureWorkspace();return{ok:!0,scope:this.scope,home:e.home,config_path:e.configPath,json_store_path:e.jsonStorePath,knowledge_db_path:e.knowledgeDbPath,artifacts_dir:e.artifactsDir,indexes_dir:e.indexesDir,logs_dir:e.logsDir,runs_dir:e.runsDir,schemas_dir:e.schemasDir,wiki_dir:e.wikiDir,config:this.config(),message:e.home}}initDb(){return w(this.ensureWorkspace().knowledgeDbPath)}dbStats(){let e=this.ensureWorkspace();return w(e.knowledgeDbPath),Je(e.knowledgeDbPath)}async initWiki(){let e=this.ensureWorkspace();w(e.knowledgeDbPath);let t=await Kt(this.artifactStore()),n=S(e.knowledgeDbPath);try{jt(n,t.artifacts),Ft(n,t.artifacts)}finally{n.close()}return t}async ingestManifest(e){let t=this.ensureWorkspace();return vt({dbPath:t.knowledgeDbPath,input:e,config:this.config(),safetyPolicy:this.safetyPolicy()})}async ingestSource(e,t){let n=this.ensureWorkspace();return wt({dbPath:n.knowledgeDbPath,sourceRef:e,purpose:t,config:this.config(),safetyPolicy:this.safetyPolicy()})}async resolveSource(e,t={}){let n=this.ensureWorkspace();return ge({dbPath:n.knowledgeDbPath,sourceRef:e,purpose:t.purpose,limit:t.limit,safetyPolicy:this.safetyPolicy()})}async consumeOutbox(e){let t=this.ensureWorkspace();return Tt({dbPath:t.knowledgeDbPath,input:e,config:this.config(),safetyPolicy:this.safetyPolicy()})}providerStatus(e=process.env){return it(this.config(),e)}modelRegistry(){return Ae(this.config())}embeddingStatus(){let e=this.ensureWorkspace();return _t(e.knowledgeDbPath)}async indexEmbeddings(e={}){let t=this.ensureWorkspace();return lt({...e,dbPath:t.knowledgeDbPath,config:this.config()})}async semanticSearch(e){let t=this.ensureWorkspace();return ce({...e,dbPath:t.knowledgeDbPath,config:this.config()})}async search(e){let t=this.ensureWorkspace();return Lt({...e,dbPath:t.knowledgeDbPath,config:this.config()})}}function Xt(e={}){return new Wt(e)}var ee={name:"@hasna/knowledge",version:"0.2.15",description:"Agent-friendly local knowledge CLI with JSON output, pagination, and safe destructive actions",type:"module",bin:{"open-knowledge":"bin/open-knowledge.js","open-knowledge-mcp":"bin/open-knowledge-mcp.js"},files:["bin","src","docs","LICENSE","README.md"],scripts:{test:"bun test","test:cli":"bun test tests/cli.test.ts",build:"bun build --target=bun --outfile=bin/open-knowledge.js --minify --external @aws-sdk/client-s3 --external @aws-sdk/credential-providers --external ai --external @ai-sdk/openai --external @ai-sdk/anthropic --external @ai-sdk/deepseek src/cli.ts && bun build --target=bun --outfile=bin/open-knowledge-mcp.js --external @modelcontextprotocol/sdk --external @aws-sdk/client-s3 --external @aws-sdk/credential-providers --external ai --external @ai-sdk/openai --external @ai-sdk/anthropic --external @ai-sdk/deepseek src/mcp.js",prepublishOnly:"bun run build",postinstall:"bun run build"},keywords:["knowledge","cli","agents","json","notes","local","store"],license:"Apache-2.0",publishConfig:{registry:"https://registry.npmjs.org",access:"public"},repository:{type:"git",url:"git+https://github.com/hasna/knowledge.git"},bugs:{url:"https://github.com/hasna/knowledge/issues"},author:"Hasna Inc. <hasna@example.com>",engines:{bun:">=1.0",node:">=18"},dependencies:{"@aws-sdk/client-s3":"^3.1063.0","@aws-sdk/credential-providers":"^3.1063.0","@ai-sdk/anthropic":"^3.0.81","@ai-sdk/deepseek":"^2.0.35","@ai-sdk/openai":"^3.0.68","@modelcontextprotocol/sdk":"^1.29.0",ai:"^6.0.197",zod:"^4.3.6"},devDependencies:{"@types/bun":"^1.3.14"}};var $t={debug:0,info:1,warn:2,error:3},ui=()=>{if(process.env.DEBUG)return"debug";if(process.env.LOG_LEVEL==="debug")return"debug";if(process.env.LOG_LEVEL==="warn")return"warn";if(process.env.LOG_LEVEL==="error")return"error";return"info"};function B(e,t,n){if($t[e]<$t[ui()])return;let r={debug:"[DEBUG]",info:"[INFO]",warn:"[WARN]",error:"[ERROR]"}[e],i=n?`${r} ${t} ${JSON.stringify(n)}`:`${r} ${t}`;if(e==="error")console.error(i);else console.error(i)}var di=["add","list","get","delete","update","archive","restore","upsert","untag","export","prune","dedupe","stats","paths","storage","db","wiki","source","ingest","reindex","search","embeddings","providers","safety","help"],Bt={ls:"list",rm:"delete",edit:"update",unarchive:"restore"};function li(e){let t=[],n={};for(let r=0;r<e.length;r+=1){let i=e[r];if(!i.startsWith("-")){t.push(i);continue}switch(i){case"--json":n.json=!0;break;case"--yes":case"-y":n.yes=!0;break;case"--help":case"-h":n.help=!0;break;case"--version":case"-v":n.version=!0;break;case"--desc":n.desc=!0;break;case"--page":case"-p":n.page=Number(e[r+1]),r+=1;break;case"--limit":case"-l":n.limit=Number(e[r+1]),r+=1;break;case"--search":case"-s":n.search=e[r+1],r+=1;break;case"--sort":n.sort=e[r+1],r+=1;break;case"--id":n.id=e[r+1],r+=1;break;case"--store":n.store=e[r+1],r+=1;break;case"--title":n.title=e[r+1],r+=1;break;case"--content":n.content=e[r+1],r+=1;break;case"--url":n.url=e[r+1],r+=1;break;case"--tag":case"-t":n.tag=e[r+1],r+=1;break;case"--format":n.format=e[r+1],r+=1;break;case"--completions":n.completions=e[r+1],r+=1;break;case"--purpose":n.purpose=e[r+1],r+=1;break;case"--model":n.model=e[r+1],r+=1;break;case"--dimensions":n.dimensions=Number(e[r+1]),r+=1;break;case"--semantic":n.semantic=!0;break;case"--fake":n.fake=!0;break;case"--no-color":n.noColor=!0;break;case"--scope":n.scope=e[r+1],r+=1;break;case"--older-than":n.olderThan=Number(e[r+1]),r+=1;break;case"--empty":n.empty=!0;break;case"--archived":n.archived=!0;break;case"--include-archived":n.includeArchived=!0;break;default:throw Error(`Unknown flag: ${i}. Run 'open-knowledge --help' for valid options.`)}}return{positional:t,flags:n}}function _i(e){if(!e)return"";return Bt[e]??e}function fi(e,t){let n=Array.from({length:e.length+1},()=>Array(t.length+1).fill(0));for(let r=0;r<=e.length;r+=1)n[r][0]=r;for(let r=0;r<=t.length;r+=1)n[0][r]=r;for(let r=1;r<=e.length;r+=1)for(let i=1;i<=t.length;i+=1){let s=e[r-1]===t[i-1]?0:1;n[r][i]=Math.min(n[r-1][i]+1,n[r][i-1]+1,n[r-1][i-1]+s)}return n[e.length][t.length]}function gi(e){if(!e)return"";let t=[...di,...Object.keys(Bt)],n="",r=Number.POSITIVE_INFINITY;for(let i of t){let s=fi(e,i);if(s<r)r=s,n=i}return r<=3?n:""}function pi(){console.log(`open-knowledge - local agent knowledge store
|
|
339
490
|
|
|
340
491
|
Usage:
|
|
341
492
|
open-knowledge <command> [options]
|
|
@@ -362,6 +513,8 @@ Commands:
|
|
|
362
513
|
ingest manifest <file|s3://> Ingest an open-files manifest into knowledge.db
|
|
363
514
|
ingest source <source-ref> Ingest a read-only source ref into knowledge.db
|
|
364
515
|
reindex outbox <file|s3://> Consume open-files change events and invalidate chunks
|
|
516
|
+
search <query> Hybrid search sources, wiki pages, and indexes
|
|
517
|
+
embeddings status|index|search Build/query local vector embeddings
|
|
365
518
|
providers status|models|check Inspect AI SDK provider config and credentials
|
|
366
519
|
safety status|check|approve|audit|redact
|
|
367
520
|
help [command] Show help
|
|
@@ -370,6 +523,10 @@ Global Options:
|
|
|
370
523
|
--json Output JSON
|
|
371
524
|
--store <path> Override store path
|
|
372
525
|
--purpose <name> Read-only source purpose (default: knowledge_answer)
|
|
526
|
+
--model <provider:model> AI/embedding model ref
|
|
527
|
+
--dimensions <n> Embedding dimensions for local/fake providers
|
|
528
|
+
--semantic Include vector semantic results in search
|
|
529
|
+
--fake Use deterministic fake embeddings for local tests
|
|
373
530
|
--scope local|global|project Store scope (default: global ~/.hasna/apps/knowledge/)
|
|
374
531
|
--no-color Disable color output
|
|
375
532
|
--completions <shell> Output completions for bash|zsh|fish
|
|
@@ -406,5 +563,5 @@ Export Options:
|
|
|
406
563
|
|
|
407
564
|
Prune Options:
|
|
408
565
|
--older-than <days> Remove items older than N days
|
|
409
|
-
--empty Remove items with empty content`)}function
|
|
410
|
-
_open_knowledge() { _arguments -C "1: :(add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex providers safety help ls rm edit unarchive)" "(--json)--json" "(--yes)-y" "(--help)--help" "(--version)--version" "(--desc)--desc" "(--archived)--archived" "(--include-archived)--include-archived" "(-p --page)"{-p,--page}"[page number]:number:" "(-l --limit)"{-l,--limit}"[items per page]:number:" "(-s --search)"{-s,--search}"[search text]:text:" "(--sort)--sort"{created,title}:" "(--id)--id[item id]:id:" "(--store)--store[store path]:path:" "(--title)--title[new title]:" "(--content)--content[new content]:" "(--url)--url[source url]:" "(-t --tag)"{-t,--tag}"[tag]:tag:" "(--format)--format[json|jsonl]:" "(--completions)--completions[output completions]:shell:(bash zsh fish):" "(--purpose)--purpose[purpose]:" "(--no-color)--no-color[disable color]" "(--scope)--scope"{local,global,project}:" }; _open_knowledge`);else if(a==="fish")console.log('complete -c open-knowledge -f; complete -c open-knowledge -a "add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex providers safety help ls rm edit unarchive"; complete -c open-knowledge -l json; complete -c open-knowledge -l yes -s y; complete -c open-knowledge -l help -s h; complete -c open-knowledge -l version -s v; complete -c open-knowledge -l desc; complete -c open-knowledge -l archived; complete -c open-knowledge -l include-archived; complete -c open-knowledge -s p -l page; complete -c open-knowledge -s l -l limit; complete -c open-knowledge -s s -l search; complete -c open-knowledge -l sort; complete -c open-knowledge -l id; complete -c open-knowledge -l store; complete -c open-knowledge -l title; complete -c open-knowledge -l content; complete -c open-knowledge -l url; complete -c open-knowledge -s t -l tag; complete -c open-knowledge -l format; complete -c open-knowledge -l completions; complete -c open-knowledge -l purpose; complete -c open-knowledge -l no-color; complete -c open-knowledge -l scope -a "local global project"');else throw Error("Invalid --completions value. Use 'bash', 'zsh', or 'fish'.");return}let n=En(t[0]);if(!n||r.help||n==="help"){bn(t[1]);return}let i=pt({scope:r.scope}),s=r.store;if(!s)if(r.scope==="project"||r.scope==="local")s=i.jsonStorePath();else s=fe();if(n==="paths"){E(i.paths(),r.json);return}if(n==="storage"){let a=t[1]??"status";if(a==="status"){let o=i.storageContract(),c=i.validateStorage();E({ok:c.ok,...o,validation:c,message:`${o.storage_type} artifact storage at ${o.artifact_store.uri_prefix}`},r.json);return}if(a==="validate"){let o=i.validateStorage();E({ok:o.ok,validation:o,message:o.ok?"Storage contract valid":`Storage contract invalid: ${o.errors.join("; ")}`},r.json);return}throw Error("Invalid storage action. Use 'status' or 'validate'.")}if(n==="db"){let a=t[1]??"init";if(a!=="init"&&a!=="stats")throw Error("Invalid db action. Use 'init' or 'stats'.");if(a==="init"){let c=i.initDb();E({ok:!0,...c,message:`Initialized ${c.path}`},r.json);return}let o=i.dbStats();E({ok:!0,path:i.workspace.knowledgeDbPath,...o,message:`knowledge.db schema v${o.schema_version}`},r.json);return}if(n==="wiki"){if((t[1]??"init")!=="init")throw Error("Invalid wiki action. Use 'init'.");let o=await i.initWiki();E({ok:!0,...o,message:`Initialized wiki layout in ${i.workspace.home}`},r.json);return}if(n==="safety"){let a=t[1]??"status",o=i.ensureWorkspace(),c=i.safetyPolicy();i.initDb();let u=A(o.knowledgeDbPath);try{if(a==="status"){E({ok:!0,mode:c.mode,workspace:o.home,allow_write_roots:c.allowWriteRoots,read_only_source_access:c.readOnlySourceAccess,network:c.network,redaction:c.redaction,approvals:c.approvals,message:`Safety policy: ${c.mode}`},r.json);return}if(a==="check"){let d=t[2]??"generated_write",l=t[3]??null,m;try{if(d==="web_search")te(c),m={action:d,target_uri:l,approval_required:!1,approved:!0,decision:"allow"};else if(d==="s3_read"){if(!l)throw Error("safety check s3_read requires an s3:// target.");j(l,c),m={action:d,target_uri:l,approval_required:!1,approved:!0,decision:"allow"}}else m=ze(u,c,d,l);w(u,{event_type:"safety_check",action:d,target_uri:l,decision:m.decision==="allow"?"allow":"requires_approval",metadata:m}),E({ok:!0,...m,message:`Safety check ${m.decision}`},r.json);return}catch(T){throw w(u,{event_type:"safety_check",action:d,target_uri:l,decision:"deny",metadata:{error:T instanceof Error?T.message:String(T)}}),T}}if(a==="approve"){let d=t[2]??"generated_write",l=t[3]??null,m=Be(u,{action:d,target_uri:l,reason:"local-cli approval",metadata:{scope:r.scope??"global"}});w(u,{event_type:"approval",action:d,target_uri:l,decision:"allow",metadata:{approval_id:m.id}}),E({ok:!0,...m,action:d,target_uri:l,message:`Approved ${d}`},r.json);return}if(a==="audit"){let d=u.query("SELECT id, event_type, action, target_uri, decision, metadata_json, created_at FROM audit_events ORDER BY created_at DESC LIMIT 50").all().map((l)=>({id:l.id,event_type:l.event_type,action:l.action,target_uri:l.target_uri,decision:l.decision,metadata:JSON.parse(l.metadata_json),created_at:l.created_at}));E({ok:!0,events:d,message:`${d.length} audit event(s)`},r.json);return}if(a==="redact"){let d=t.slice(2).join(" ");if(!d)throw Error("Usage: open-knowledge safety redact <text>");let l=re(d,c);if(l.findings.length>0)ne(u,{source_uri:"safety://redact",findings:l.findings,metadata:{command:"safety redact"}});w(u,{event_type:"redaction",action:"safety_redact",target_uri:"safety://redact",decision:l.findings.length>0?"redacted":"allow",metadata:{findings:l.findings.length}}),E({ok:!0,text:l.text,findings:l.findings,message:`Redacted ${l.findings.length} finding(s)`},r.json);return}throw Error("Invalid safety action. Use 'status', 'check', 'approve', 'audit', or 'redact'.")}finally{u.close()}}if(n==="source"){if((t[1]??"")!=="resolve")throw Error("Invalid source action. Use 'resolve'.");let o=t[2];if(!o)throw Error("Usage: open-knowledge source resolve <source-ref>");let c=await i.resolveSource(o,{purpose:r.purpose,limit:r.limit});E({ok:!0,...c,message:c.resolved?`Resolved ${c.source_ref} (${c.content.chunks_returned}/${c.content.chunks_total} chunks)`:`Source not indexed: ${o}`},r.json);return}if(n==="ingest"){let a=t[1]??"";if(a==="manifest"){let o=t[2];if(!o)throw Error("Usage: open-knowledge ingest manifest <file|s3://bucket/key>");let c=await i.ingestManifest(o);E({ok:!0,...c,message:`Ingested ${c.items_seen} manifest item(s)`},r.json);return}if(a==="source"){let o=t[2];if(!o)throw Error("Usage: open-knowledge ingest source <source-ref>");let c=await i.ingestSource(o,r.purpose);E({ok:!0,...c,message:`Ingested source ${c.source_ref} (${c.chunks_inserted} chunks)`},r.json);return}throw Error("Invalid ingest action. Use 'manifest' or 'source'.")}if(n==="reindex"){if((t[1]??"")!=="outbox")throw Error("Invalid reindex action. Use 'outbox'.");let o=t[2];if(!o)throw Error("Usage: open-knowledge reindex outbox <file|s3://bucket/key>");let c=await i.consumeOutbox(o);E({ok:!0,...c,message:`Consumed ${c.events_seen} outbox event(s)`},r.json);return}if(n==="providers"){let a=t[1]??"status";if(a==="status"){let o=i.providerStatus(),c=o.providers.filter((u)=>u.configured).length;E({ok:!0,...o,message:`${c}/${o.providers.length} provider credential(s) configured`},r.json);return}if(a==="models"){let o=i.modelRegistry();E({ok:!0,models:o,message:`${o.length} model alias(es)`},r.json);return}if(a==="check"){let o=t[2]??"default",c=we(o,i.config()),u=be(c),d=it(u.provider,i.config());E({ok:!0,target:o,model_ref:c,provider:u.provider,model:u.model,credential:d,message:`${u.provider} credentials configured`},r.json);return}throw Error("Invalid providers action. Use 'status', 'models', or 'check'.")}if(ge(s),n==="add"){let a=t[1],o=t[2];if(!a||!o)throw Error("Usage: open-knowledge add <title> <content>");O(s,()=>{let c=v(s),u={id:pe(),title:a,content:o,url:r.url??null,tags:r.tag?[r.tag]:[],created_at:new Date().toISOString(),updated_at:new Date().toISOString()};c.items.push(u),U(s,c),M("info","Item added",{id:u.id,title:u.title}),E({ok:!0,item:u,message:`Added ${u.id}`},r.json)});return}if(n==="list"){if(r.format!==void 0&&r.format!=="table"&&r.format!=="json")throw Error("Invalid --format value for list. Use 'table' or 'json'.");O(s,()=>{let a=v(s),o=Number.isFinite(r.page)&&r.page>0?r.page:1,c=Number.isFinite(r.limit)&&r.limit>0?r.limit:20,u=r.search?String(r.search).toLowerCase():"",d=r.tag?String(r.tag).toLowerCase():"",l=r.format==="table"||!r.json&&!r.format&&wn(r),m=r.json||r.format==="json",T=a.items;if(r.archived)T=T.filter((h)=>h.archived===!0);else if(!r.includeArchived)T=T.filter((h)=>!h.archived);if(u)T=T.filter((h)=>h.title.toLowerCase().includes(u)||h.content.toLowerCase().includes(u));if(d)T=T.filter((h)=>h.tags&&h.tags.map((B)=>B.toLowerCase()).includes(d));let{sorted:p,sort:I,direction:k}=Sn(T,r),S=(o-1)*c,g=p.slice(S,S+c),R=Math.max(1,Math.ceil(p.length/c));if(m){E({ok:!0,page:o,limit:c,total:p.length,total_pages:R,sort:I,direction:k,items:g},!0);return}if(g.length===0){E(`No items found (search=${u||"none"}, tag=${d||"none"})`,!1);return}if(l){let h=(P)=>P,B=`${h("ID")} ${h("TITLE")} ${h("CREATED")} ${h("URL")} ${h("TAGS")}`;console.log(B);for(let P of g)console.log(`${P.id} ${h(P.title)} ${P.created_at} ${P.url?h(P.url):""} ${P.tags?.length?h(`[${P.tags.join(", ")}]`):""}`);console.log(`Page ${o}/${R} | showing ${g.length} of ${p.length} | sort=${I} ${k} | search=${u||"none"} | tag=${d||"none"}`)}else{for(let h of g)console.log(`${h.id} ${h.title} ${h.created_at}${h.url?` ${h.url}`:""}${h.tags?.length?` [${h.tags.join(", ")}]`:""}`);console.log(`Page ${o}/${R} | showing ${g.length} of ${p.length} | sort=${I} ${k} | search=${u||"none"} | tag=${d||"none"}`)}});return}if(n==="get"){Y(r),O(s,()=>{let o=v(s).items.find((c)=>c.id===r.id||c.short_id===r.id);if(!o)throw Error(`Item not found: ${r.id}`);E({ok:!0,item:o,message:`${o.id}: ${o.title}`},r.json)});return}if(n==="update"){Y(r),O(s,()=>{let a=v(s),o=a.items.findIndex((u)=>u.id===r.id||u.short_id===r.id);if(o===-1)throw Error(`Item not found: ${r.id}`);let c=a.items[o];if(r.title!==void 0)c.title=r.title;if(r.content!==void 0)c.content=r.content;if(r.url!==void 0)c.url=r.url;if(r.tag!==void 0){if(c.tags=c.tags||[],!c.tags.map((u)=>u.toLowerCase()).includes(r.tag.toLowerCase()))c.tags.push(r.tag)}c.updated_at=new Date().toISOString(),a.items[o]=c,U(s,a),E({ok:!0,item:c,message:`Updated ${c.id}`},r.json)});return}if(n==="archive"||n==="restore"){Y(r),O(s,()=>{let a=v(s),o=a.items.findIndex((u)=>u.id===r.id||u.short_id===r.id);if(o===-1)throw Error(`Item not found: ${r.id}`);let c=a.items[o];c.archived=n==="archive",c.updated_at=new Date().toISOString(),a.items[o]=c,U(s,a),E({ok:!0,item:c,message:`${n==="archive"?"Archived":"Restored"} ${c.id}`},r.json)});return}if(n==="untag"){if(Y(r),!r.tag)throw Error("Missing required --tag. Example: open-knowledge untag --id <id> -t <tag>");O(s,()=>{let a=v(s),o=a.items.findIndex((d)=>d.id===r.id||d.short_id===r.id);if(o===-1)throw Error(`Item not found: ${r.id}`);let c=a.items[o],u=c.tags?.length??0;c.tags=(c.tags??[]).filter((d)=>d.toLowerCase()!==r.tag.toLowerCase()),c.updated_at=new Date().toISOString(),a.items[o]=c,U(s,a),E({ok:!0,item:c,removed:u-c.tags.length,message:`Removed tag from ${c.id}`},r.json)});return}if(n==="upsert"){let a=r.title??t[1],o=r.content??t[2];O(s,()=>{let c=v(s),u=r.id?c.items.findIndex((m)=>m.id===r.id||m.short_id===r.id):-1,d=new Date().toISOString();if(u===-1){if(!a||!o)throw Error("New item requires title and content. Example: open-knowledge upsert <title> <content> [--id <id>]");let m=r.id??pe(),T={id:m,short_id:Ie(m),title:a,content:o,url:r.url??null,tags:r.tag?[r.tag]:[],metadata:{},archived:!1,created_at:d,updated_at:d};c.items.push(T),U(s,c),E({ok:!0,created:!0,item:T,message:`Upserted ${T.id}`},r.json);return}let l=c.items[u];if(a!==void 0)l.title=a;if(o!==void 0)l.content=o;if(r.url!==void 0)l.url=r.url;if(r.tag!==void 0){if(l.tags=l.tags||[],!l.tags.map((m)=>m.toLowerCase()).includes(r.tag.toLowerCase()))l.tags.push(r.tag)}l.updated_at=d,c.items[u]=l,U(s,c),E({ok:!0,created:!1,item:l,message:`Upserted ${l.id}`},r.json)});return}if(n==="delete"){if(Y(r),!r.yes)throw Error("Refusing delete without --yes. Re-run with: open-knowledge delete --id <id> --yes");O(s,()=>{let a=v(s),o=a.items.length;a.items=a.items.filter((u)=>u.id!==r.id&&u.short_id!==r.id);let c=o!==a.items.length;if(U(s,a),!c)throw Error(`Item not found: ${r.id}`);M("info","Item deleted",{id:r.id}),E({ok:!0,deleted_id:r.id,message:`Deleted ${r.id}`},r.json)});return}if(n==="export"){let a=r.format??"json";if(a!=="json"&&a!=="jsonl")throw Error("Invalid --format. Use 'json' or 'jsonl'.");O(s,()=>{let o=v(s);if(a==="jsonl")for(let c of o.items)console.log(JSON.stringify(c));else E({ok:!0,items:o.items},r.json)});return}if(n==="prune"){if(!r.yes)throw Error("Refusing prune without --yes. Re-run with: open-knowledge prune --yes [--older-than <days>] [--empty]");O(s,()=>{let a=v(s),o=a.items.length;if(r.olderThan!==void 0){let u=new Date;u.setDate(u.getDate()-r.olderThan),a.items=a.items.filter((d)=>new Date(d.created_at)>=u)}if(r.empty)a.items=a.items.filter((u)=>u.content.trim().length>0);let c=o-a.items.length;U(s,a),M("info","Prune completed",{pruned:c,remaining:a.items.length}),E({ok:!0,pruned:c,remaining:a.items.length,message:`Pruned ${c} item(s)`},r.json)});return}if(n==="dedupe"){if(!r.yes)throw Error("Refusing dedupe without --yes. Re-run with: open-knowledge dedupe --yes [--json]");O(s,()=>{let a=v(s),o=new Set,c=a.items.length;a.items=a.items.filter((d)=>{let l=`${d.title}\x00${d.content}`;if(o.has(l))return!1;return o.add(l),!0});let u=c-a.items.length;U(s,a),M("info","Dedupe completed",{removed:u,remaining:a.items.length}),E({ok:!0,removed:u,remaining:a.items.length,message:`Dedupe removed ${u} duplicate(s)`},r.json)});return}if(n==="stats"){O(s,()=>{let a=v(s),o=a.items.filter((k)=>!k.archived),c=o.length,u=a.items.length-c,d=o.filter((k)=>k.url).length,l=o.filter((k)=>k.tags&&k.tags.length>0).length,m=c>0?o.map((k)=>k.created_at).sort()[0]:null,T=c>0?o.map((k)=>k.created_at).sort()[c-1]:null,p={};for(let k of o)for(let S of k.tags||[])p[S]=(p[S]||0)+1;let I=Object.entries(p).sort((k,S)=>S[1]-k[1]).slice(0,5).map(([k,S])=>({tag:k,count:S}));E({ok:!0,total:c,archived:u,with_url:d,with_tags:l,oldest:m,newest:T,top_tags:I,message:`${c} items | ${d} with URL | ${l} with tags`},r.json)});return}let _=Tn(t[0]),f=_?` Did you mean '${_}'?`:"";throw M("warn","Unknown command",{input:t[0],suggestion:_}),Error(`Unknown command: ${t[0]}.${f} Run 'open-knowledge --help' for available commands.`)}if(import.meta.main)xn(process.argv.slice(2)).catch((e)=>{let t=e instanceof Error?e.message:String(e);M("error","CLI error",{message:t,stack:e instanceof Error?e.stack:void 0}),console.error(`Error: ${t}`),process.exitCode=1});export{Tn as suggestCommand,Sn as sortItems,xn as run,yn as parseArgs};
|
|
566
|
+
--empty Remove items with empty content`)}function hi(e){if(e==="add"){console.log("Usage: open-knowledge add <title> <content> [--url <url>] [-t <tag>] [--json]");return}if(e==="list"||e==="ls"){console.log("Usage: open-knowledge list|ls [--format table|json] [-p <page>] [-l <limit>] [-s <search>] [-t <tag>] [--sort created|title] [--desc] [--json]");return}if(e==="get"){console.log("Usage: open-knowledge get --id <id> [--json]");return}if(e==="update"||e==="edit"){console.log("Usage: open-knowledge update|edit --id <id> [--title <title>] [--content <content>] [--url <url>] [-t <tag>] [--json]");return}if(e==="archive"){console.log("Usage: open-knowledge archive --id <id> [--json]");return}if(e==="restore"||e==="unarchive"){console.log("Usage: open-knowledge restore|unarchive --id <id> [--json]");return}if(e==="upsert"){console.log("Usage: open-knowledge upsert [title] [content] [--id <id>] [--title <title>] [--content <content>] [--url <url>] [-t <tag>] [--json]");return}if(e==="untag"){console.log("Usage: open-knowledge untag --id <id> -t <tag> [--json]");return}if(e==="delete"||e==="rm"){console.log("Usage: open-knowledge delete|rm --id <id> -y [--json]");return}if(e==="export"){console.log("Usage: open-knowledge export [--format jsonl] [--json]");return}if(e==="prune"){console.log("Usage: open-knowledge prune --yes [--older-than <days>] [--empty] [--json]");return}if(e==="dedupe"){console.log("Usage: open-knowledge dedupe --yes [--json]");return}if(e==="stats"){console.log("Usage: open-knowledge stats [--json]");return}if(e==="paths"){console.log("Usage: open-knowledge paths [--scope local|global|project] [--json]");return}if(e==="storage"){console.log("Usage: open-knowledge storage status|validate [--scope local|global|project] [--json]");return}if(e==="db"){console.log("Usage: open-knowledge db init|stats [--scope local|global|project] [--json]");return}if(e==="wiki"){console.log("Usage: open-knowledge wiki init [--scope local|global|project] [--json]");return}if(e==="source"){console.log("Usage: open-knowledge source resolve <source-ref> [--purpose knowledge_answer|knowledge_index] [--limit <n>] [--scope local|global|project] [--json]");return}if(e==="ingest"){console.log("Usage: open-knowledge ingest manifest <file|s3://bucket/key> | source <source-ref> [--purpose knowledge_index] [--scope local|global|project] [--json]");return}if(e==="reindex"){console.log("Usage: open-knowledge reindex outbox <file|s3://bucket/key> [--scope local|global|project] [--json]");return}if(e==="search"){console.log("Usage: open-knowledge search <query> [--semantic] [--model openai:text-embedding-3-small] [--limit <n>] [--dimensions <n>] [--fake] [--scope local|global|project] [--json]");return}if(e==="embeddings"){console.log("Usage: open-knowledge embeddings status|index|search [query] [--model openai:text-embedding-3-small] [--limit <n>] [--dimensions <n>] [--fake] [--scope local|global|project] [--json]");return}if(e==="providers"){console.log("Usage: open-knowledge providers status|models|check [provider|model-alias] [--scope local|global|project] [--json]");return}if(e==="safety"){console.log("Usage: open-knowledge safety status|check|approve|audit|redact [args] [--scope local|global|project] [--json]");return}pi()}function mi(e){if(e.noColor||process.env.NO_COLOR)return!1;if(process.env.FORCE_COLOR)return!0;return process.stdout.isTTY===!0}function E(e,t,n){if(t){console.log(JSON.stringify(e,null,2));return}if(typeof e==="string"){console.log(e);return}console.log(e.message??JSON.stringify(e,null,2))}function te(e){if(!e.id)throw Error("Missing required --id. Example: open-knowledge get --id <id>")}function Ei(e,t){let n=t.sort??"created";if(n!=="created"&&n!=="title")throw Error("Invalid --sort value. Use 'created' or 'title'.");let r=[...e].sort((i,s)=>{if(n==="title")return i.title.localeCompare(s.title);return i.created_at.localeCompare(s.created_at)});if(t.desc)r.reverse();return{sorted:r,sort:n,direction:t.desc?"desc":"asc"}}async function ki(e){let{positional:t,flags:n}=li(e);if(B("debug","CLI invoked",{command:t[0],flags:{json:n.json,store:n.store}}),n.version){console.log(n.json?JSON.stringify({name:ee.name,version:ee.version},null,2):`${ee.name} ${ee.version}`);return}if(n.completions){let a=n.completions;if(a==="bash")console.log('_open_knowledge() { local cur; cur="${COMP_WORDS[COMP_CWORD]}"; COMPREPLY=($(compgen -W "add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex search embeddings providers safety help ls rm edit unarchive --json --yes --help --version --desc --page --limit --search --sort --id --store --title --content --url --tag --format --completions --purpose --model --dimensions --semantic --fake --no-color --scope --archived --include-archived" -- "$cur")); }; complete -F _open_knowledge open-knowledge');else if(a==="zsh")console.log(`#compdef open-knowledge
|
|
567
|
+
_open_knowledge() { _arguments -C "1: :(add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex search embeddings providers safety help ls rm edit unarchive)" "(--json)--json" "(--yes)-y" "(--help)--help" "(--version)--version" "(--desc)--desc" "(--archived)--archived" "(--include-archived)--include-archived" "(--semantic)--semantic" "(--fake)--fake" "(-p --page)"{-p,--page}"[page number]:number:" "(-l --limit)"{-l,--limit}"[items per page]:number:" "(-s --search)"{-s,--search}"[search text]:text:" "(--sort)--sort"{created,title}:" "(--id)--id[item id]:id:" "(--store)--store[store path]:path:" "(--title)--title[new title]:" "(--content)--content[new content]:" "(--url)--url[source url]:" "(-t --tag)"{-t,--tag}"[tag]:tag:" "(--format)--format[json|jsonl]:" "(--completions)--completions[output completions]:shell:(bash zsh fish):" "(--purpose)--purpose[purpose]:" "(--model)--model[model ref]:" "(--dimensions)--dimensions[embedding dimensions]:number:" "(--no-color)--no-color[disable color]" "(--scope)--scope"{local,global,project}:" }; _open_knowledge`);else if(a==="fish")console.log('complete -c open-knowledge -f; complete -c open-knowledge -a "add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex search embeddings providers safety help ls rm edit unarchive"; complete -c open-knowledge -l json; complete -c open-knowledge -l yes -s y; complete -c open-knowledge -l help -s h; complete -c open-knowledge -l version -s v; complete -c open-knowledge -l desc; complete -c open-knowledge -l archived; complete -c open-knowledge -l include-archived; complete -c open-knowledge -l semantic; complete -c open-knowledge -l fake; complete -c open-knowledge -s p -l page; complete -c open-knowledge -s l -l limit; complete -c open-knowledge -s s -l search; complete -c open-knowledge -l sort; complete -c open-knowledge -l id; complete -c open-knowledge -l store; complete -c open-knowledge -l title; complete -c open-knowledge -l content; complete -c open-knowledge -l url; complete -c open-knowledge -s t -l tag; complete -c open-knowledge -l format; complete -c open-knowledge -l completions; complete -c open-knowledge -l purpose; complete -c open-knowledge -l model; complete -c open-knowledge -l dimensions; complete -c open-knowledge -l no-color; complete -c open-knowledge -l scope -a "local global project"');else throw Error("Invalid --completions value. Use 'bash', 'zsh', or 'fish'.");return}let r=_i(t[0]);if(!r||n.help||r==="help"){hi(t[1]);return}let i=Xt({scope:n.scope}),s=n.store;if(!s)if(n.scope==="project"||n.scope==="local")s=i.jsonStorePath();else s=ve();if(r==="paths"){E(i.paths(),n.json);return}if(r==="storage"){let a=t[1]??"status";if(a==="status"){let o=i.storageContract(),c=i.validateStorage();E({ok:c.ok,...o,validation:c,message:`${o.storage_type} artifact storage at ${o.artifact_store.uri_prefix}`},n.json);return}if(a==="validate"){let o=i.validateStorage();E({ok:o.ok,validation:o,message:o.ok?"Storage contract valid":`Storage contract invalid: ${o.errors.join("; ")}`},n.json);return}throw Error("Invalid storage action. Use 'status' or 'validate'.")}if(r==="db"){let a=t[1]??"init";if(a!=="init"&&a!=="stats")throw Error("Invalid db action. Use 'init' or 'stats'.");if(a==="init"){let c=i.initDb();E({ok:!0,...c,message:`Initialized ${c.path}`},n.json);return}let o=i.dbStats();E({ok:!0,path:i.workspace.knowledgeDbPath,...o,message:`knowledge.db schema v${o.schema_version}`},n.json);return}if(r==="wiki"){if((t[1]??"init")!=="init")throw Error("Invalid wiki action. Use 'init'.");let o=await i.initWiki();E({ok:!0,...o,message:`Initialized wiki layout in ${i.workspace.home}`},n.json);return}if(r==="safety"){let a=t[1]??"status",o=i.ensureWorkspace(),c=i.safetyPolicy();i.initDb();let u=S(o.knowledgeDbPath);try{if(a==="status"){E({ok:!0,mode:c.mode,workspace:o.home,allow_write_roots:c.allowWriteRoots,read_only_source_access:c.readOnlySourceAccess,network:c.network,redaction:c.redaction,approvals:c.approvals,message:`Safety policy: ${c.mode}`},n.json);return}if(a==="check"){let _=t[2]??"generated_write",f=t[3]??null,m;try{if(_==="web_search")ue(c),m={action:_,target_uri:f,approval_required:!1,approved:!0,decision:"allow"};else if(_==="s3_read"){if(!f)throw Error("safety check s3_read requires an s3:// target.");F(f,c),m={action:_,target_uri:f,approval_required:!1,approved:!0,decision:"allow"}}else m=yt(u,c,_,f);R(u,{event_type:"safety_check",action:_,target_uri:f,decision:m.decision==="allow"?"allow":"requires_approval",metadata:m}),E({ok:!0,...m,message:`Safety check ${m.decision}`},n.json);return}catch(y){throw R(u,{event_type:"safety_check",action:_,target_uri:f,decision:"deny",metadata:{error:y instanceof Error?y.message:String(y)}}),y}}if(a==="approve"){let _=t[2]??"generated_write",f=t[3]??null,m=kt(u,{action:_,target_uri:f,reason:"local-cli approval",metadata:{scope:n.scope??"global"}});R(u,{event_type:"approval",action:_,target_uri:f,decision:"allow",metadata:{approval_id:m.id}}),E({ok:!0,...m,action:_,target_uri:f,message:`Approved ${_}`},n.json);return}if(a==="audit"){let _=u.query("SELECT id, event_type, action, target_uri, decision, metadata_json, created_at FROM audit_events ORDER BY created_at DESC LIMIT 50").all().map((f)=>({id:f.id,event_type:f.event_type,action:f.action,target_uri:f.target_uri,decision:f.decision,metadata:JSON.parse(f.metadata_json),created_at:f.created_at}));E({ok:!0,events:_,message:`${_.length} audit event(s)`},n.json);return}if(a==="redact"){let _=t.slice(2).join(" ");if(!_)throw Error("Usage: open-knowledge safety redact <text>");let f=de(_,c);if(f.findings.length>0)le(u,{source_uri:"safety://redact",findings:f.findings,metadata:{command:"safety redact"}});R(u,{event_type:"redaction",action:"safety_redact",target_uri:"safety://redact",decision:f.findings.length>0?"redacted":"allow",metadata:{findings:f.findings.length}}),E({ok:!0,text:f.text,findings:f.findings,message:`Redacted ${f.findings.length} finding(s)`},n.json);return}throw Error("Invalid safety action. Use 'status', 'check', 'approve', 'audit', or 'redact'.")}finally{u.close()}}if(r==="source"){if((t[1]??"")!=="resolve")throw Error("Invalid source action. Use 'resolve'.");let o=t[2];if(!o)throw Error("Usage: open-knowledge source resolve <source-ref>");let c=await i.resolveSource(o,{purpose:n.purpose,limit:n.limit});E({ok:!0,...c,message:c.resolved?`Resolved ${c.source_ref} (${c.content.chunks_returned}/${c.content.chunks_total} chunks)`:`Source not indexed: ${o}`},n.json);return}if(r==="ingest"){let a=t[1]??"";if(a==="manifest"){let o=t[2];if(!o)throw Error("Usage: open-knowledge ingest manifest <file|s3://bucket/key>");let c=await i.ingestManifest(o);E({ok:!0,...c,message:`Ingested ${c.items_seen} manifest item(s)`},n.json);return}if(a==="source"){let o=t[2];if(!o)throw Error("Usage: open-knowledge ingest source <source-ref>");let c=await i.ingestSource(o,n.purpose);E({ok:!0,...c,message:`Ingested source ${c.source_ref} (${c.chunks_inserted} chunks)`},n.json);return}throw Error("Invalid ingest action. Use 'manifest' or 'source'.")}if(r==="reindex"){if((t[1]??"")!=="outbox")throw Error("Invalid reindex action. Use 'outbox'.");let o=t[2];if(!o)throw Error("Usage: open-knowledge reindex outbox <file|s3://bucket/key>");let c=await i.consumeOutbox(o);E({ok:!0,...c,message:`Consumed ${c.events_seen} outbox event(s)`},n.json);return}if(r==="embeddings"){let a=t[1]??"status";if(a==="status"){let o=i.embeddingStatus();E({ok:!0,...o,message:`${o.total_vector_entries} vector index entries`},n.json);return}if(a==="index"){let o=await i.indexEmbeddings({limit:n.limit,modelRef:n.model,dimensions:n.dimensions,fake:n.fake});E({ok:!0,...o,message:`Embedded ${o.chunks_embedded} chunk(s)`},n.json);return}if(a==="search"){let o=t.slice(2).join(" ");if(!o)throw Error("Usage: open-knowledge embeddings search <query>");let c=await i.semanticSearch({query:o,limit:n.limit,modelRef:n.model,dimensions:n.dimensions,fake:n.fake});E({ok:!0,...c,message:`${c.results.length} semantic result(s)`},n.json);return}throw Error("Invalid embeddings action. Use 'status', 'index', or 'search'.")}if(r==="search"){let a=t.slice(1).join(" ");if(!a)throw Error("Usage: open-knowledge search <query>");let o=await i.search({query:a,limit:n.limit,semantic:n.semantic,modelRef:n.model,dimensions:n.dimensions,fake:n.fake});E({ok:!0,...o,message:`${o.results.length} search result(s)`},n.json);return}if(r==="providers"){let a=t[1]??"status";if(a==="status"){let o=i.providerStatus(),c=o.providers.filter((u)=>u.configured).length;E({ok:!0,...o,message:`${c}/${o.providers.length} provider credential(s) configured`},n.json);return}if(a==="models"){let o=i.modelRegistry();E({ok:!0,models:o,message:`${o.length} model alias(es)`},n.json);return}if(a==="check"){let o=t[2]??"default",c=Ne(o,i.config()),u=W(c),_=oe(u.provider,i.config());E({ok:!0,target:o,model_ref:c,provider:u.provider,model:u.model,credential:_,message:`${u.provider} credentials configured`},n.json);return}throw Error("Invalid providers action. Use 'status', 'models', or 'check'.")}if(xe(s),r==="add"){let a=t[1],o=t[2];if(!a||!o)throw Error("Usage: open-knowledge add <title> <content>");C(s,()=>{let c=L(s),u={id:Se(),title:a,content:o,url:n.url??null,tags:n.tag?[n.tag]:[],created_at:new Date().toISOString(),updated_at:new Date().toISOString()};c.items.push(u),j(s,c),B("info","Item added",{id:u.id,title:u.title}),E({ok:!0,item:u,message:`Added ${u.id}`},n.json)});return}if(r==="list"){if(n.format!==void 0&&n.format!=="table"&&n.format!=="json")throw Error("Invalid --format value for list. Use 'table' or 'json'.");C(s,()=>{let a=L(s),o=Number.isFinite(n.page)&&n.page>0?n.page:1,c=Number.isFinite(n.limit)&&n.limit>0?n.limit:20,u=n.search?String(n.search).toLowerCase():"",_=n.tag?String(n.tag).toLowerCase():"",f=n.format==="table"||!n.json&&!n.format&&mi(n),m=n.json||n.format==="json",y=a.items;if(n.archived)y=y.filter((h)=>h.archived===!0);else if(!n.includeArchived)y=y.filter((h)=>!h.archived);if(u)y=y.filter((h)=>h.title.toLowerCase().includes(u)||h.content.toLowerCase().includes(u));if(_)y=y.filter((h)=>h.tags&&h.tags.map((G)=>G.toLowerCase()).includes(_));let{sorted:k,sort:g,direction:b}=Ei(y,n),x=(o-1)*c,p=k.slice(x,x+c),A=Math.max(1,Math.ceil(k.length/c));if(m){E({ok:!0,page:o,limit:c,total:k.length,total_pages:A,sort:g,direction:b,items:p},!0);return}if(p.length===0){E(`No items found (search=${u||"none"}, tag=${_||"none"})`,!1);return}if(f){let h=(M)=>M,G=`${h("ID")} ${h("TITLE")} ${h("CREATED")} ${h("URL")} ${h("TAGS")}`;console.log(G);for(let M of p)console.log(`${M.id} ${h(M.title)} ${M.created_at} ${M.url?h(M.url):""} ${M.tags?.length?h(`[${M.tags.join(", ")}]`):""}`);console.log(`Page ${o}/${A} | showing ${p.length} of ${k.length} | sort=${g} ${b} | search=${u||"none"} | tag=${_||"none"}`)}else{for(let h of p)console.log(`${h.id} ${h.title} ${h.created_at}${h.url?` ${h.url}`:""}${h.tags?.length?` [${h.tags.join(", ")}]`:""}`);console.log(`Page ${o}/${A} | showing ${p.length} of ${k.length} | sort=${g} ${b} | search=${u||"none"} | tag=${_||"none"}`)}});return}if(r==="get"){te(n),C(s,()=>{let o=L(s).items.find((c)=>c.id===n.id||c.short_id===n.id);if(!o)throw Error(`Item not found: ${n.id}`);E({ok:!0,item:o,message:`${o.id}: ${o.title}`},n.json)});return}if(r==="update"){te(n),C(s,()=>{let a=L(s),o=a.items.findIndex((u)=>u.id===n.id||u.short_id===n.id);if(o===-1)throw Error(`Item not found: ${n.id}`);let c=a.items[o];if(n.title!==void 0)c.title=n.title;if(n.content!==void 0)c.content=n.content;if(n.url!==void 0)c.url=n.url;if(n.tag!==void 0){if(c.tags=c.tags||[],!c.tags.map((u)=>u.toLowerCase()).includes(n.tag.toLowerCase()))c.tags.push(n.tag)}c.updated_at=new Date().toISOString(),a.items[o]=c,j(s,a),E({ok:!0,item:c,message:`Updated ${c.id}`},n.json)});return}if(r==="archive"||r==="restore"){te(n),C(s,()=>{let a=L(s),o=a.items.findIndex((u)=>u.id===n.id||u.short_id===n.id);if(o===-1)throw Error(`Item not found: ${n.id}`);let c=a.items[o];c.archived=r==="archive",c.updated_at=new Date().toISOString(),a.items[o]=c,j(s,a),E({ok:!0,item:c,message:`${r==="archive"?"Archived":"Restored"} ${c.id}`},n.json)});return}if(r==="untag"){if(te(n),!n.tag)throw Error("Missing required --tag. Example: open-knowledge untag --id <id> -t <tag>");C(s,()=>{let a=L(s),o=a.items.findIndex((_)=>_.id===n.id||_.short_id===n.id);if(o===-1)throw Error(`Item not found: ${n.id}`);let c=a.items[o],u=c.tags?.length??0;c.tags=(c.tags??[]).filter((_)=>_.toLowerCase()!==n.tag.toLowerCase()),c.updated_at=new Date().toISOString(),a.items[o]=c,j(s,a),E({ok:!0,item:c,removed:u-c.tags.length,message:`Removed tag from ${c.id}`},n.json)});return}if(r==="upsert"){let a=n.title??t[1],o=n.content??t[2];C(s,()=>{let c=L(s),u=n.id?c.items.findIndex((m)=>m.id===n.id||m.short_id===n.id):-1,_=new Date().toISOString();if(u===-1){if(!a||!o)throw Error("New item requires title and content. Example: open-knowledge upsert <title> <content> [--id <id>]");let m=n.id??Se(),y={id:m,short_id:Ge(m),title:a,content:o,url:n.url??null,tags:n.tag?[n.tag]:[],metadata:{},archived:!1,created_at:_,updated_at:_};c.items.push(y),j(s,c),E({ok:!0,created:!0,item:y,message:`Upserted ${y.id}`},n.json);return}let f=c.items[u];if(a!==void 0)f.title=a;if(o!==void 0)f.content=o;if(n.url!==void 0)f.url=n.url;if(n.tag!==void 0){if(f.tags=f.tags||[],!f.tags.map((m)=>m.toLowerCase()).includes(n.tag.toLowerCase()))f.tags.push(n.tag)}f.updated_at=_,c.items[u]=f,j(s,c),E({ok:!0,created:!1,item:f,message:`Upserted ${f.id}`},n.json)});return}if(r==="delete"){if(te(n),!n.yes)throw Error("Refusing delete without --yes. Re-run with: open-knowledge delete --id <id> --yes");C(s,()=>{let a=L(s),o=a.items.length;a.items=a.items.filter((u)=>u.id!==n.id&&u.short_id!==n.id);let c=o!==a.items.length;if(j(s,a),!c)throw Error(`Item not found: ${n.id}`);B("info","Item deleted",{id:n.id}),E({ok:!0,deleted_id:n.id,message:`Deleted ${n.id}`},n.json)});return}if(r==="export"){let a=n.format??"json";if(a!=="json"&&a!=="jsonl")throw Error("Invalid --format. Use 'json' or 'jsonl'.");C(s,()=>{let o=L(s);if(a==="jsonl")for(let c of o.items)console.log(JSON.stringify(c));else E({ok:!0,items:o.items},n.json)});return}if(r==="prune"){if(!n.yes)throw Error("Refusing prune without --yes. Re-run with: open-knowledge prune --yes [--older-than <days>] [--empty]");C(s,()=>{let a=L(s),o=a.items.length;if(n.olderThan!==void 0){let u=new Date;u.setDate(u.getDate()-n.olderThan),a.items=a.items.filter((_)=>new Date(_.created_at)>=u)}if(n.empty)a.items=a.items.filter((u)=>u.content.trim().length>0);let c=o-a.items.length;j(s,a),B("info","Prune completed",{pruned:c,remaining:a.items.length}),E({ok:!0,pruned:c,remaining:a.items.length,message:`Pruned ${c} item(s)`},n.json)});return}if(r==="dedupe"){if(!n.yes)throw Error("Refusing dedupe without --yes. Re-run with: open-knowledge dedupe --yes [--json]");C(s,()=>{let a=L(s),o=new Set,c=a.items.length;a.items=a.items.filter((_)=>{let f=`${_.title}\x00${_.content}`;if(o.has(f))return!1;return o.add(f),!0});let u=c-a.items.length;j(s,a),B("info","Dedupe completed",{removed:u,remaining:a.items.length}),E({ok:!0,removed:u,remaining:a.items.length,message:`Dedupe removed ${u} duplicate(s)`},n.json)});return}if(r==="stats"){C(s,()=>{let a=L(s),o=a.items.filter((b)=>!b.archived),c=o.length,u=a.items.length-c,_=o.filter((b)=>b.url).length,f=o.filter((b)=>b.tags&&b.tags.length>0).length,m=c>0?o.map((b)=>b.created_at).sort()[0]:null,y=c>0?o.map((b)=>b.created_at).sort()[c-1]:null,k={};for(let b of o)for(let x of b.tags||[])k[x]=(k[x]||0)+1;let g=Object.entries(k).sort((b,x)=>x[1]-b[1]).slice(0,5).map(([b,x])=>({tag:b,count:x}));E({ok:!0,total:c,archived:u,with_url:_,with_tags:f,oldest:m,newest:y,top_tags:g,message:`${c} items | ${_} with URL | ${f} with tags`},n.json)});return}let d=gi(t[0]),l=d?` Did you mean '${d}'?`:"";throw B("warn","Unknown command",{input:t[0],suggestion:d}),Error(`Unknown command: ${t[0]}.${l} Run 'open-knowledge --help' for available commands.`)}if(import.meta.main)ki(process.argv.slice(2)).catch((e)=>{let t=e instanceof Error?e.message:String(e);B("error","CLI error",{message:t,stack:e instanceof Error?e.stack:void 0}),console.error(`Error: ${t}`),process.exitCode=1});export{gi as suggestCommand,Ei as sortItems,ki as run,li as parseArgs};
|