@hasna/knowledge 0.2.3 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +97 -4
- package/bin/open-knowledge-mcp.js +1067 -1569
- package/bin/open-knowledge.js +257 -4
- package/docs/architecture/ai-native-knowledge-base.md +191 -0
- package/docs/architecture/hybrid-semantic-search.md +135 -0
- package/package.json +12 -7
- package/src/artifact-store.ts +184 -0
- package/src/cli.ts +662 -0
- package/src/knowledge-db.ts +247 -0
- package/src/manifest-ingest.ts +423 -0
- package/src/mcp.js +533 -0
- package/src/schema.js +25 -0
- package/src/source-ref.ts +92 -0
- package/src/store.ts +16 -6
- package/src/wiki-layout.ts +104 -0
- package/src/workspace.ts +123 -0
package/bin/open-knowledge.js
CHANGED
|
@@ -1,6 +1,249 @@
|
|
|
1
1
|
#!/usr/bin/env bun
|
|
2
2
|
// @bun
|
|
3
|
-
import{
|
|
3
|
+
var I=import.meta.require;import{readFileSync as Y,writeFileSync as B,existsSync as $,renameSync as Ae,unlinkSync as se}from"fs";import{randomUUID as oe}from"crypto";import{existsSync as ye,mkdirSync as J,readFileSync as Re,writeFileSync as Oe}from"fs";import{homedir as re}from"os";import{dirname as ke,join as R,resolve as le}from"path";var we=R(".hasna","apps","knowledge");function H(){return R(re(),".open-knowledge","db.json")}function Q(){return R(re(),".hasna","apps","knowledge")}function Ue(e=process.cwd()){return le(e,we)}function D(e){return{home:e,configPath:R(e,"config.json"),jsonStorePath:R(e,"db.json"),knowledgeDbPath:R(e,"knowledge.db"),artifactsDir:R(e,"artifacts"),cacheDir:R(e,"cache"),exportsDir:R(e,"exports"),indexesDir:R(e,"indexes"),logsDir:R(e,"logs"),runsDir:R(e,"runs"),schemasDir:R(e,"schemas"),wikiDir:R(e,"wiki")}}function Se(){return{version:1,mode:"local",storage:{type:"local",artifacts_root:"artifacts"},sources:{preferred_ref:"open-files",allowed_schemes:["open-files","s3","file","https","http"]}}}function b(e){let t=D(e);J(t.home,{recursive:!0});for(let n of[t.artifactsDir,t.cacheDir,t.exportsDir,t.indexesDir,t.logsDir,t.runsDir,t.schemasDir,t.wikiDir])J(n,{recursive:!0});if(!ye(t.configPath))Oe(t.configPath,`${JSON.stringify(Se(),null,2)}
|
|
4
|
+
`);return t}function ie(e,t=process.cwd()){if(e==="project"||e==="local")return D(Ue(t));return D(Q())}function K(e){J(ke(e),{recursive:!0})}function v(e){let t=Re(e,"utf8");return JSON.parse(t)}function V(){return D(Q()).jsonStorePath}function q(e){if(!$(e))if(K(e),e===V()&&$(H()))B(e,Y(H(),"utf8"));else B(e,JSON.stringify({items:[]},null,2))}function Ie(e){return`${e}.lock`}function xe(e,t){let c=Date.now();while(Date.now()-c<5000){try{if(!$(e)){B(e,JSON.stringify({owner:t,ts:Date.now()}));return}let d=JSON.parse(Y(e,"utf8"));if(Date.now()-d.ts>1e4)se(e)}catch{}let s=Date.now();while(Date.now()-s<50);}throw Error(`Could not acquire lock on ${e} after 5000ms`)}function Xe(e,t){try{if($(e)){if(JSON.parse(Y(e,"utf8")).owner===t)se(e)}}catch{}}function k(e){q(e);let t=Y(e,"utf8"),n=JSON.parse(t);if(!n||!Array.isArray(n.items))return{items:[]};return n}function w(e,t){let n=`${e}.tmp.${oe()}`;B(n,JSON.stringify(t,null,2)),Ae(n,e)}function l(e,t){let n=oe(),r=Ie(e);xe(r,n);try{return t()}finally{Xe(r,n)}}function P(){return`k_${Date.now().toString(36)}_${Math.random().toString(36).slice(2,8)}`}function ce(e){return e.replace(/^k_/,"").slice(0,12)}import{Database as be}from"bun:sqlite";var ge=`
|
|
5
|
+
PRAGMA journal_mode = WAL;
|
|
6
|
+
PRAGMA foreign_keys = ON;
|
|
7
|
+
|
|
8
|
+
CREATE TABLE IF NOT EXISTS schema_versions (
|
|
9
|
+
version INTEGER PRIMARY KEY,
|
|
10
|
+
applied_at TEXT NOT NULL
|
|
11
|
+
);
|
|
12
|
+
|
|
13
|
+
CREATE TABLE IF NOT EXISTS sources (
|
|
14
|
+
id TEXT PRIMARY KEY,
|
|
15
|
+
uri TEXT NOT NULL UNIQUE,
|
|
16
|
+
kind TEXT NOT NULL,
|
|
17
|
+
title TEXT,
|
|
18
|
+
metadata_json TEXT NOT NULL DEFAULT '{}',
|
|
19
|
+
acl_json TEXT NOT NULL DEFAULT '{}',
|
|
20
|
+
created_at TEXT NOT NULL,
|
|
21
|
+
updated_at TEXT NOT NULL
|
|
22
|
+
);
|
|
23
|
+
|
|
24
|
+
CREATE TABLE IF NOT EXISTS source_revisions (
|
|
25
|
+
id TEXT PRIMARY KEY,
|
|
26
|
+
source_id TEXT NOT NULL REFERENCES sources(id) ON DELETE CASCADE,
|
|
27
|
+
revision TEXT NOT NULL,
|
|
28
|
+
hash TEXT,
|
|
29
|
+
extracted_text_uri TEXT,
|
|
30
|
+
metadata_json TEXT NOT NULL DEFAULT '{}',
|
|
31
|
+
created_at TEXT NOT NULL,
|
|
32
|
+
UNIQUE(source_id, revision)
|
|
33
|
+
);
|
|
34
|
+
|
|
35
|
+
CREATE TABLE IF NOT EXISTS chunks (
|
|
36
|
+
id TEXT PRIMARY KEY,
|
|
37
|
+
source_revision_id TEXT REFERENCES source_revisions(id) ON DELETE CASCADE,
|
|
38
|
+
wiki_page_id TEXT,
|
|
39
|
+
kind TEXT NOT NULL,
|
|
40
|
+
ordinal INTEGER NOT NULL,
|
|
41
|
+
text TEXT NOT NULL,
|
|
42
|
+
token_count INTEGER,
|
|
43
|
+
start_offset INTEGER,
|
|
44
|
+
end_offset INTEGER,
|
|
45
|
+
metadata_json TEXT NOT NULL DEFAULT '{}',
|
|
46
|
+
created_at TEXT NOT NULL
|
|
47
|
+
);
|
|
48
|
+
|
|
49
|
+
CREATE TABLE IF NOT EXISTS chunk_embeddings (
|
|
50
|
+
id TEXT PRIMARY KEY,
|
|
51
|
+
chunk_id TEXT NOT NULL REFERENCES chunks(id) ON DELETE CASCADE,
|
|
52
|
+
provider TEXT NOT NULL,
|
|
53
|
+
model TEXT NOT NULL,
|
|
54
|
+
dimensions INTEGER NOT NULL,
|
|
55
|
+
vector_json TEXT NOT NULL,
|
|
56
|
+
created_at TEXT NOT NULL,
|
|
57
|
+
UNIQUE(chunk_id, provider, model)
|
|
58
|
+
);
|
|
59
|
+
|
|
60
|
+
CREATE TABLE IF NOT EXISTS wiki_pages (
|
|
61
|
+
id TEXT PRIMARY KEY,
|
|
62
|
+
path TEXT NOT NULL UNIQUE,
|
|
63
|
+
title TEXT NOT NULL,
|
|
64
|
+
artifact_uri TEXT,
|
|
65
|
+
content_hash TEXT,
|
|
66
|
+
status TEXT NOT NULL DEFAULT 'active',
|
|
67
|
+
metadata_json TEXT NOT NULL DEFAULT '{}',
|
|
68
|
+
created_at TEXT NOT NULL,
|
|
69
|
+
updated_at TEXT NOT NULL
|
|
70
|
+
);
|
|
71
|
+
|
|
72
|
+
CREATE TABLE IF NOT EXISTS wiki_backlinks (
|
|
73
|
+
from_page_id TEXT NOT NULL REFERENCES wiki_pages(id) ON DELETE CASCADE,
|
|
74
|
+
to_page_id TEXT NOT NULL REFERENCES wiki_pages(id) ON DELETE CASCADE,
|
|
75
|
+
label TEXT,
|
|
76
|
+
created_at TEXT NOT NULL,
|
|
77
|
+
PRIMARY KEY(from_page_id, to_page_id)
|
|
78
|
+
);
|
|
79
|
+
|
|
80
|
+
CREATE TABLE IF NOT EXISTS citations (
|
|
81
|
+
id TEXT PRIMARY KEY,
|
|
82
|
+
wiki_page_id TEXT REFERENCES wiki_pages(id) ON DELETE CASCADE,
|
|
83
|
+
chunk_id TEXT REFERENCES chunks(id) ON DELETE SET NULL,
|
|
84
|
+
source_uri TEXT NOT NULL,
|
|
85
|
+
quote TEXT,
|
|
86
|
+
start_offset INTEGER,
|
|
87
|
+
end_offset INTEGER,
|
|
88
|
+
metadata_json TEXT NOT NULL DEFAULT '{}',
|
|
89
|
+
created_at TEXT NOT NULL
|
|
90
|
+
);
|
|
91
|
+
|
|
92
|
+
CREATE TABLE IF NOT EXISTS knowledge_indexes (
|
|
93
|
+
id TEXT PRIMARY KEY,
|
|
94
|
+
kind TEXT NOT NULL,
|
|
95
|
+
name TEXT NOT NULL,
|
|
96
|
+
artifact_uri TEXT,
|
|
97
|
+
shard_key TEXT,
|
|
98
|
+
metadata_json TEXT NOT NULL DEFAULT '{}',
|
|
99
|
+
created_at TEXT NOT NULL,
|
|
100
|
+
updated_at TEXT NOT NULL,
|
|
101
|
+
UNIQUE(kind, name, shard_key)
|
|
102
|
+
);
|
|
103
|
+
|
|
104
|
+
CREATE TABLE IF NOT EXISTS runs (
|
|
105
|
+
id TEXT PRIMARY KEY,
|
|
106
|
+
type TEXT NOT NULL,
|
|
107
|
+
prompt TEXT,
|
|
108
|
+
status TEXT NOT NULL,
|
|
109
|
+
provider TEXT,
|
|
110
|
+
model TEXT,
|
|
111
|
+
cost_tokens INTEGER NOT NULL DEFAULT 0,
|
|
112
|
+
cost_usd REAL NOT NULL DEFAULT 0,
|
|
113
|
+
metadata_json TEXT NOT NULL DEFAULT '{}',
|
|
114
|
+
created_at TEXT NOT NULL,
|
|
115
|
+
updated_at TEXT NOT NULL
|
|
116
|
+
);
|
|
117
|
+
|
|
118
|
+
CREATE TABLE IF NOT EXISTS run_events (
|
|
119
|
+
id TEXT PRIMARY KEY,
|
|
120
|
+
run_id TEXT NOT NULL REFERENCES runs(id) ON DELETE CASCADE,
|
|
121
|
+
level TEXT NOT NULL,
|
|
122
|
+
event TEXT NOT NULL,
|
|
123
|
+
metadata_json TEXT NOT NULL DEFAULT '{}',
|
|
124
|
+
created_at TEXT NOT NULL
|
|
125
|
+
);
|
|
126
|
+
|
|
127
|
+
CREATE TABLE IF NOT EXISTS provider_usage (
|
|
128
|
+
id TEXT PRIMARY KEY,
|
|
129
|
+
run_id TEXT REFERENCES runs(id) ON DELETE SET NULL,
|
|
130
|
+
provider TEXT NOT NULL,
|
|
131
|
+
model TEXT NOT NULL,
|
|
132
|
+
input_tokens INTEGER NOT NULL DEFAULT 0,
|
|
133
|
+
output_tokens INTEGER NOT NULL DEFAULT 0,
|
|
134
|
+
cost_usd REAL NOT NULL DEFAULT 0,
|
|
135
|
+
metadata_json TEXT NOT NULL DEFAULT '{}',
|
|
136
|
+
created_at TEXT NOT NULL
|
|
137
|
+
);
|
|
138
|
+
|
|
139
|
+
CREATE TABLE IF NOT EXISTS redaction_findings (
|
|
140
|
+
id TEXT PRIMARY KEY,
|
|
141
|
+
source_uri TEXT,
|
|
142
|
+
run_id TEXT REFERENCES runs(id) ON DELETE SET NULL,
|
|
143
|
+
severity TEXT NOT NULL,
|
|
144
|
+
finding_type TEXT NOT NULL,
|
|
145
|
+
metadata_json TEXT NOT NULL DEFAULT '{}',
|
|
146
|
+
created_at TEXT NOT NULL
|
|
147
|
+
);
|
|
148
|
+
|
|
149
|
+
CREATE TABLE IF NOT EXISTS storage_objects (
|
|
150
|
+
id TEXT PRIMARY KEY,
|
|
151
|
+
artifact_uri TEXT NOT NULL UNIQUE,
|
|
152
|
+
kind TEXT NOT NULL,
|
|
153
|
+
content_type TEXT,
|
|
154
|
+
hash TEXT,
|
|
155
|
+
size_bytes INTEGER,
|
|
156
|
+
metadata_json TEXT NOT NULL DEFAULT '{}',
|
|
157
|
+
created_at TEXT NOT NULL,
|
|
158
|
+
updated_at TEXT NOT NULL
|
|
159
|
+
);
|
|
160
|
+
|
|
161
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
|
|
162
|
+
text,
|
|
163
|
+
title,
|
|
164
|
+
source_uri,
|
|
165
|
+
content='',
|
|
166
|
+
tokenize='porter unicode61'
|
|
167
|
+
);
|
|
168
|
+
|
|
169
|
+
INSERT OR IGNORE INTO schema_versions(version, applied_at)
|
|
170
|
+
VALUES (1, datetime('now'));
|
|
171
|
+
`,Ce=`
|
|
172
|
+
DROP TABLE IF EXISTS chunks_fts;
|
|
173
|
+
|
|
174
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
|
|
175
|
+
chunk_id UNINDEXED,
|
|
176
|
+
text,
|
|
177
|
+
title,
|
|
178
|
+
source_uri,
|
|
179
|
+
tokenize='porter unicode61'
|
|
180
|
+
);
|
|
181
|
+
|
|
182
|
+
INSERT OR IGNORE INTO schema_versions(version, applied_at)
|
|
183
|
+
VALUES (2, datetime('now'));
|
|
184
|
+
`;function G(e){K(e);let t=new be(e);return t.exec("PRAGMA foreign_keys = ON;"),t}function m(e){let t=G(e);try{if(t.exec(ge),Z(t)<2)t.exec(Ce);return{path:e,schema_version:Z(t)}}finally{t.close()}}function Z(e){return e.query("SELECT MAX(version) AS version FROM schema_versions").get()?.version??0}function A(e,t){return e.query(`SELECT COUNT(*) AS n FROM ${t}`).get()?.n??0}function ue(e){let t=G(e);try{return{schema_version:Z(t),sources:A(t,"sources"),source_revisions:A(t,"source_revisions"),chunks:A(t,"chunks"),wiki_pages:A(t,"wiki_pages"),citations:A(t,"citations"),indexes:A(t,"knowledge_indexes"),runs:A(t,"runs"),run_events:A(t,"run_events")}}finally{t.close()}}import{existsSync as De,mkdirSync as Te,readFileSync as me,writeFileSync as Fe}from"fs";import{dirname as Me,join as ee,relative as je,sep as Ke}from"path";function F(e){let t=e.replace(/\\/g,"/").trim();if(!t||t.startsWith("/"))throw Error(`Invalid artifact key: ${e}`);let n=t.split("/").filter(Boolean);if(n.length===0||n.some((r)=>r==="."||r===".."))throw Error(`Invalid artifact key: ${e}`);return n.join("/")}function ne(e,t){let n=je(e,t);if(n.startsWith("..")||n===".."||n.startsWith(`..${Ke}`))throw Error(`Artifact path escapes root: ${t}`)}class Ee{root;type="local";canRead=!0;canWrite=!0;constructor(e){this.root=e;Te(e,{recursive:!0})}async put(e){let t=F(e.key),n=ee(this.root,t);return ne(this.root,n),Te(Me(n),{recursive:!0}),Fe(n,e.body),{key:t,uri:`file://${n}`}}async getText(e){let t=F(e),n=ee(this.root,t);return ne(this.root,n),me(n,"utf8")}async exists(e){let t=F(e),n=ee(this.root,t);return ne(this.root,n),De(n)}}class de{options;type="s3";canRead=!0;canWrite=!0;client;constructor(e){this.options=e;this.client=e.client}async getClient(){if(this.client)return this.client;let[{S3Client:e},{fromIni:t}]=await Promise.all([import("@aws-sdk/client-s3"),import("@aws-sdk/credential-providers")]);return this.client=new e({region:this.options.region,credentials:this.options.profile?t({profile:this.options.profile}):void 0,maxAttempts:this.options.max_attempts}),this.client}objectKey(e){let t=F(e),n=this.options.prefix?F(this.options.prefix):"";return n?`${n}/${t}`:t}async put(e){let[{PutObjectCommand:t},n]=await Promise.all([import("@aws-sdk/client-s3"),this.getClient()]),r=this.objectKey(e.key);return await n.send(new t({Bucket:this.options.bucket,Key:r,Body:e.body,ContentType:e.content_type,Metadata:e.metadata,ServerSideEncryption:this.options.server_side_encryption,SSEKMSKeyId:this.options.kms_key_id})),{key:r,uri:`s3://${this.options.bucket}/${r}`}}async getText(e){let[{GetObjectCommand:t},n]=await Promise.all([import("@aws-sdk/client-s3"),this.getClient()]),r=this.objectKey(e),c=await n.send(new t({Bucket:this.options.bucket,Key:r}));if(!c.Body)return"";return await c.Body.transformToString()}async exists(e){let[{HeadObjectCommand:t},n]=await Promise.all([import("@aws-sdk/client-s3"),this.getClient()]),r=this.objectKey(e);try{return await n.send(new t({Bucket:this.options.bucket,Key:r})),!0}catch(c){let s=c instanceof Error?c.name:"";if(s==="NotFound"||s==="NoSuchKey"||s==="NotFoundError")return!1;throw c}}}function ae(e,t){if(e.storage.type==="s3"){if(!e.storage.s3?.bucket)throw Error("S3 artifact storage requires storage.s3.bucket");return new de({bucket:e.storage.s3.bucket,prefix:e.storage.s3.prefix,region:e.storage.s3.region,profile:e.storage.s3.profile,max_attempts:e.storage.s3.max_attempts,server_side_encryption:e.storage.s3.server_side_encryption,kms_key_id:e.storage.s3.kms_key_id})}return new Ee(t.artifactsDir)}function ve(e){let t=String(e.getUTCFullYear()),n=String(e.getUTCMonth()+1).padStart(2,"0"),r=String(e.getUTCDate()).padStart(2,"0");return{year:t,month:n,day:r}}function Be(){return`# Knowledge Agent Schema v1
|
|
185
|
+
|
|
186
|
+
## Source Rules
|
|
187
|
+
|
|
188
|
+
- Treat open-files source references as the preferred source of truth.
|
|
189
|
+
- Do not copy raw source files into open-knowledge.
|
|
190
|
+
- Cite every durable fact with a source URI, revision/hash when available, and optional span.
|
|
191
|
+
- Mark uncertainty explicitly when sources disagree or are incomplete.
|
|
192
|
+
|
|
193
|
+
## Wiki Rules
|
|
194
|
+
|
|
195
|
+
- Write generated knowledge as Markdown pages under wiki/.
|
|
196
|
+
- Keep root indexes small; use topic, team, project, and machine-readable shards for scale.
|
|
197
|
+
- Preserve backlinks between related pages and decisions.
|
|
198
|
+
- Prefer updating existing pages over creating near-duplicates.
|
|
199
|
+
|
|
200
|
+
## Query Rules
|
|
201
|
+
|
|
202
|
+
- Search wiki pages first, then source chunks, then deeper read-only source refs.
|
|
203
|
+
- Use web search only when requested or when current external context is required.
|
|
204
|
+
- File useful answers back into the wiki only after approval or approved auto-write mode.
|
|
205
|
+
|
|
206
|
+
## Lint Rules
|
|
207
|
+
|
|
208
|
+
- Flag stale pages, missing citations, contradictions, orphan pages, duplicate pages, and unresolved source refs.
|
|
209
|
+
`}function $e(){return`# Knowledge Index
|
|
210
|
+
|
|
211
|
+
This is a compact orientation index for agents. It is not the full search index.
|
|
212
|
+
|
|
213
|
+
## Shards
|
|
214
|
+
|
|
215
|
+
- wiki/
|
|
216
|
+
- indexes/
|
|
217
|
+
- schemas/
|
|
218
|
+
- logs/
|
|
219
|
+
|
|
220
|
+
## Source Ownership
|
|
221
|
+
|
|
222
|
+
Raw source files are resolved through open-files. This app stores source refs,
|
|
223
|
+
citations, chunks, generated wiki artifacts, indexes, and run records.
|
|
224
|
+
`}function Ye(){return`# Wiki
|
|
225
|
+
|
|
226
|
+
Generated durable knowledge pages live here.
|
|
227
|
+
|
|
228
|
+
Pages should be concise, cited, and organized for both humans and agents.
|
|
229
|
+
`}async function pe(e,t=new Date){let{year:n,month:r,day:c}=ve(t),s="schemas/v1.md",d="indexes/root.md",_="wiki/README.md",i=`logs/${n}/${r}/${c}.jsonl`,u={ts:t.toISOString(),event:"wiki_layout_initialized",schema_key:"schemas/v1.md",root_index_key:"indexes/root.md",wiki_readme_key:"wiki/README.md"},o=[e.put({key:"schemas/v1.md",body:Be(),content_type:"text/markdown"}),e.put({key:"indexes/root.md",body:$e(),content_type:"text/markdown"}),e.put({key:"wiki/README.md",body:Ye(),content_type:"text/markdown"}),e.put({key:i,body:`${JSON.stringify(u)}
|
|
230
|
+
`,content_type:"application/x-ndjson"})];return await Promise.all(o),{schema_key:"schemas/v1.md",root_index_key:"indexes/root.md",wiki_readme_key:"wiki/README.md",log_key:i,written:["schemas/v1.md","indexes/root.md","wiki/README.md",i]}}import{createHash as He}from"crypto";import{existsSync as Qe,readFileSync as Ve}from"fs";import{basename as qe}from"path";function fe(e,t){if(!e)throw Error(t);return e}function Ge(e){let n=e.slice(13).split("/").filter(Boolean),r=n[0];if(r!=="file"&&r!=="source")throw Error("Invalid open-files ref. Expected open-files://file/<id>, open-files://file/<id>/revision/<revision_id>, or open-files://source/<id>/path/<path>.");let c=fe(n[1],"Invalid open-files ref. Missing id.");if(r==="file"){if(n.length===2)return{kind:"open-files",uri:e,entity:r,id:c};if(n[2]==="revision"&&n[3]&&n.length===4)return{kind:"open-files",uri:e,entity:r,id:c,revision_id:decodeURIComponent(n[3])};throw Error("Invalid open-files file ref. Expected open-files://file/<id>/revision/<revision_id>.")}let s=n.indexOf("path"),d=s>=0?decodeURIComponent(n.slice(s+1).join("/")):void 0;return{kind:"open-files",uri:e,entity:r,id:c,path:d}}function ze(e){let t=new URL(e),n=fe(t.hostname,"Invalid s3 ref. Missing bucket."),r=decodeURIComponent(t.pathname.replace(/^\/+/,""));if(!r)throw Error("Invalid s3 ref. Missing object key.");return{kind:"s3",uri:e,bucket:n,key:r}}function We(e){let t=new URL(e);return{kind:"file",uri:e,path:decodeURIComponent(t.pathname)}}function Je(e){let t=new URL(e);return{kind:"web",uri:e,url:t.toString()}}function _e(e){if(e.startsWith("open-files://"))return Ge(e);if(e.startsWith("s3://"))return ze(e);if(e.startsWith("file://"))return We(e);if(e.startsWith("https://")||e.startsWith("http://"))return Je(e);throw Error(`Unsupported source ref scheme: ${e}`)}function te(e,t){return`${e}_${He("sha256").update(t).digest("hex").slice(0,20)}`}function g(e){return e&&typeof e==="object"&&!Array.isArray(e)?e:void 0}function E(e){return typeof e==="string"&&e.length>0?e:void 0}function Pe(e){return typeof e==="number"&&Number.isFinite(e)?e:void 0}function Ze(e){let t=E(e.source_ref)??E(e.source_uri)??E(e.uri);if(t)return t;let n=E(e.file_id);if(n){let s=E(e.revision_id)??E(e.revision),d=`open-files://file/${encodeURIComponent(n)}`;return s?`${d}/revision/${encodeURIComponent(s)}`:d}let r=E(e.source_id),c=E(e.path);if(r&&c)return`open-files://source/${encodeURIComponent(r)}/path/${encodeURIComponent(c)}`;throw Error("Manifest item is missing source_ref, file_id, or source_id/path.")}function en(e,t){if(t.kind==="open-files"&&t.entity==="file"&&t.revision_id)return e.replace(/\/revision\/[^/]+$/,"");return e}function nn(e){let t=E(e.extracted_text)??E(e.text)??E(e.content_text)??E(e.markdown);if(t!==void 0)return t;let n=e.content;return typeof n==="string"?n:null}function tn(e){let t=E(e.extracted_text_ref)??E(e.extracted_text_uri)??E(e.text_ref);if(t)return t;let n=g(e.content);return E(n?.extracted_text_ref)??E(n?.extracted_text_uri)??null}function rn(e){let t=E(e.path);return E(e.title)??E(e.name)??(t?qe(t):null)}function sn(e){return E(e.hash)??E(e.checksum)??E(e.sha256)??null}function on(e,t,n){return E(e.revision_id)??E(e.revision)??E(e.version_id)??(t.kind==="open-files"?t.revision_id:void 0)??n??E(e.updated_at)??"current"}function cn(e,t){let n={};for(let[r,c]of Object.entries(e)){if(["text","content","content_text","extracted_text","markdown"].includes(r))continue;n[r]=c}return n.source_ref=t.sourceRef,n.source_uri=t.sourceUri,n.status=t.status,n}function un(e,t){let n=Ze(e),r=_e(n),c=en(n,r),s=sn(e),d=E(e.status)??"active";return{raw:e,sourceRef:n,sourceUri:c,kind:r.kind,title:rn(e),revision:on(e,r,s),hash:s,extractedTextUri:tn(e),text:nn(e),metadata:cn(e,{sourceRef:n,sourceUri:c,status:d}),acl:e.permissions??e.acl??{},status:d,updatedAt:E(e.updated_at)??t}}function Tn(e){let t=e.trim();if(!t)return[];if(t.startsWith("[")){let n=JSON.parse(t);if(!Array.isArray(n))throw Error("Manifest array parse failed.");return n.map((r)=>{let c=g(r);if(!c)throw Error("Manifest array entries must be objects.");return c})}if(t.startsWith("{"))try{let n=JSON.parse(t),r=g(n);if(!r)throw Error("Manifest object parse failed.");if(Array.isArray(r.items))return r.items.map((c)=>{let s=g(c);if(!s)throw Error("Manifest items entries must be objects.");return s});if("source_ref"in r||"source_uri"in r||"file_id"in r)return[r]}catch(n){let r=t.split(/\r?\n/).filter((c)=>c.trim().length>0);if(r.length<=1)throw n;return r.map((c)=>{let s=g(JSON.parse(c));if(!s)throw Error("Manifest JSONL entries must be objects.");return s})}return t.split(/\r?\n/).filter((n)=>n.trim().length>0).map((n)=>{let r=g(JSON.parse(n));if(!r)throw Error("Manifest JSONL entries must be objects.");return r})}async function En(e,t){let n=new URL(e),r=n.hostname,c=decodeURIComponent(n.pathname.replace(/^\/+/,""));if(!r||!c)throw Error(`Invalid S3 manifest URI: ${e}`);let[{S3Client:s,GetObjectCommand:d},{fromIni:_}]=await Promise.all([import("@aws-sdk/client-s3"),import("@aws-sdk/credential-providers")]),i=t?.storage.type==="s3"&&t.storage.s3?.bucket===r?t.storage.s3:void 0,o=await new s({region:i?.region,credentials:i?.profile?_({profile:i.profile}):void 0,maxAttempts:i?.max_attempts}).send(new d({Bucket:r,Key:c}));if(!o.Body)return"";return await o.Body.transformToString()}async function dn(e,t){if(e.startsWith("s3://"))return En(e,t);if(!Qe(e))throw Error(`Manifest not found: ${e}`);return Ve(e,"utf8")}function an(e,t,n){let r=e.replace(/\r\n/g,`
|
|
231
|
+
`);if(!r.trim())return[];let c=[],s=0;while(s<r.length){let d=Math.min(r.length,s+t),_=d;if(d<r.length){let u=r.lastIndexOf(`
|
|
232
|
+
|
|
233
|
+
`,d),o=r.lastIndexOf(". ",d),T=Math.max(u,o);if(T>s+Math.floor(t*0.5))_=T+(T===u?2:1)}let i=r.slice(s,_).trim();if(i)c.push({ordinal:c.length,text:i,startOffset:s,endOffset:_});if(_>=r.length)break;s=Math.max(0,_-n)}return c}function pn(e){let t=e.trim().split(/\s+/).filter(Boolean).length;return Math.max(1,Math.ceil(t*1.25))}function fn(e,t){let n=e.query("SELECT id FROM chunks WHERE source_revision_id = ?").all(t);for(let r of n)e.run("DELETE FROM chunks_fts WHERE chunk_id = ?",[r.id]);return e.run("DELETE FROM chunks WHERE source_revision_id = ?",[t]),n.length}function _n(e,t,n){let r=te("src",t.sourceUri);e.run(`INSERT INTO sources (id, uri, kind, title, metadata_json, acl_json, created_at, updated_at)
|
|
234
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
235
|
+
ON CONFLICT(uri) DO UPDATE SET
|
|
236
|
+
kind = excluded.kind,
|
|
237
|
+
title = excluded.title,
|
|
238
|
+
metadata_json = excluded.metadata_json,
|
|
239
|
+
acl_json = excluded.acl_json,
|
|
240
|
+
updated_at = excluded.updated_at`,[r,t.sourceUri,t.kind,t.title,JSON.stringify(t.metadata),JSON.stringify(t.acl??{}),n,t.updatedAt]);let c=e.query("SELECT id FROM sources WHERE uri = ?").get(t.sourceUri);if(!c)throw Error(`Failed to upsert source: ${t.sourceUri}`);return c.id}function Nn(e,t,n,r){let c=te("rev",`${t}\x00${n.revision}`);e.run(`INSERT INTO source_revisions (id, source_id, revision, hash, extracted_text_uri, metadata_json, created_at)
|
|
241
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
242
|
+
ON CONFLICT(source_id, revision) DO UPDATE SET
|
|
243
|
+
hash = excluded.hash,
|
|
244
|
+
extracted_text_uri = excluded.extracted_text_uri,
|
|
245
|
+
metadata_json = excluded.metadata_json`,[c,t,n.revision,n.hash,n.extractedTextUri,JSON.stringify(n.metadata),r]);let s=e.query("SELECT id FROM source_revisions WHERE source_id = ? AND revision = ?").get(t,n.revision);if(!s)throw Error(`Failed to upsert source revision: ${n.sourceRef}`);return s.id}function Ln(e,t,n,r,c,s){if(!n.text||n.status.toLowerCase()==="deleted")return 0;let d=an(n.text,c,s);for(let _ of d){let i=te("chk",`${t}\x00${_.ordinal}\x00${_.text}`),u={source_ref:n.sourceRef,source_uri:n.sourceUri,hash:n.hash,status:n.status,path:E(n.raw.path)??null,mime:E(n.raw.mime)??E(n.raw.content_type)??null,size:Pe(n.raw.size)??null};e.run(`INSERT INTO chunks (id, source_revision_id, kind, ordinal, text, token_count, start_offset, end_offset, metadata_json, created_at)
|
|
246
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,[i,t,"source",_.ordinal,_.text,pn(_.text),_.startOffset,_.endOffset,JSON.stringify(u),r]),e.run("INSERT INTO chunks_fts (chunk_id, text, title, source_uri) VALUES (?, ?, ?, ?)",[i,_.text,n.title??"",n.sourceUri])}return d.length}async function Ne(e){let t=(e.now??new Date).toISOString(),n=e.maxChunkChars??4000,r=e.chunkOverlapChars??200;if(n<500)throw Error("maxChunkChars must be at least 500.");if(r<0||r>=n)throw Error("chunkOverlapChars must be less than maxChunkChars.");m(e.dbPath);let c=await dn(e.input,e.config),s=Tn(c),d=G(e.dbPath);try{return d.transaction(()=>{let i=new Set,u=new Set,o=0,T=0,a=0;for(let f of s){let y=un(f,t),h=_n(d,y,t),O=Nn(d,h,y,t);if(i.add(h),u.add(O),y.text||y.status.toLowerCase()==="deleted")T+=fn(d,O);o+=Ln(d,O,y,t,n,r)}return{path:e.input,db_path:e.dbPath,items_seen:s.length,sources_upserted:i.size,revisions_upserted:u.size,chunks_inserted:o,chunks_deleted:T,skipped:a}})()}finally{d.close()}}var M={name:"@hasna/knowledge",version:"0.2.5",description:"Agent-friendly local knowledge CLI with JSON output, pagination, and safe destructive actions",type:"module",bin:{"open-knowledge":"bin/open-knowledge.js","open-knowledge-mcp":"bin/open-knowledge-mcp.js"},files:["bin","src","docs","LICENSE","README.md"],scripts:{test:"bun test","test:cli":"bun test tests/cli.test.ts",build:"bun build --target=bun --outfile=bin/open-knowledge.js --minify --external @aws-sdk/client-s3 --external @aws-sdk/credential-providers src/cli.ts && bun build --target=bun --outfile=bin/open-knowledge-mcp.js --external @modelcontextprotocol/sdk src/mcp.js",prepublishOnly:"bun run build",postinstall:"bun run build"},keywords:["knowledge","cli","agents","json","notes","local","store"],license:"Apache-2.0",publishConfig:{registry:"https://registry.npmjs.org",access:"public"},repository:{type:"git",url:"git+https://github.com/hasna/knowledge.git"},bugs:{url:"https://github.com/hasna/knowledge/issues"},author:"Hasna Inc. <hasna@example.com>",engines:{bun:">=1.0",node:">=18"},dependencies:{"@aws-sdk/client-s3":"^3.1063.0","@aws-sdk/credential-providers":"^3.1063.0","@modelcontextprotocol/sdk":"^1.29.0",zod:"^4.3.6"},devDependencies:{"@types/bun":"^1.3.14"}};var Le={debug:0,info:1,warn:2,error:3},yn=()=>{if(process.env.DEBUG)return"debug";if(process.env.LOG_LEVEL==="debug")return"debug";if(process.env.LOG_LEVEL==="warn")return"warn";if(process.env.LOG_LEVEL==="error")return"error";return"info"};function x(e,t,n){if(Le[e]<Le[yn()])return;let r={debug:"[DEBUG]",info:"[INFO]",warn:"[WARN]",error:"[ERROR]"}[e],c=n?`${r} ${t} ${JSON.stringify(n)}`:`${r} ${t}`;if(e==="error")console.error(c);else console.error(c)}var Rn=["add","list","get","delete","update","archive","restore","upsert","untag","export","prune","dedupe","stats","paths","db","wiki","ingest","help"],he={ls:"list",rm:"delete",edit:"update",unarchive:"restore"};function On(e){let t=[],n={};for(let r=0;r<e.length;r+=1){let c=e[r];if(!c.startsWith("-")){t.push(c);continue}switch(c){case"--json":n.json=!0;break;case"--yes":case"-y":n.yes=!0;break;case"--help":case"-h":n.help=!0;break;case"--version":case"-v":n.version=!0;break;case"--desc":n.desc=!0;break;case"--page":case"-p":n.page=Number(e[r+1]),r+=1;break;case"--limit":case"-l":n.limit=Number(e[r+1]),r+=1;break;case"--search":case"-s":n.search=e[r+1],r+=1;break;case"--sort":n.sort=e[r+1],r+=1;break;case"--id":n.id=e[r+1],r+=1;break;case"--store":n.store=e[r+1],r+=1;break;case"--title":n.title=e[r+1],r+=1;break;case"--content":n.content=e[r+1],r+=1;break;case"--url":n.url=e[r+1],r+=1;break;case"--tag":case"-t":n.tag=e[r+1],r+=1;break;case"--format":n.format=e[r+1],r+=1;break;case"--completions":n.completions=e[r+1],r+=1;break;case"--no-color":n.noColor=!0;break;case"--scope":n.scope=e[r+1],r+=1;break;case"--older-than":n.olderThan=Number(e[r+1]),r+=1;break;case"--empty":n.empty=!0;break;case"--archived":n.archived=!0;break;case"--include-archived":n.includeArchived=!0;break;default:throw Error(`Unknown flag: ${c}. Run 'open-knowledge --help' for valid options.`)}}return{positional:t,flags:n}}function kn(e){if(!e)return"";return he[e]??e}function ln(e,t){let n=Array.from({length:e.length+1},()=>Array(t.length+1).fill(0));for(let r=0;r<=e.length;r+=1)n[r][0]=r;for(let r=0;r<=t.length;r+=1)n[0][r]=r;for(let r=1;r<=e.length;r+=1)for(let c=1;c<=t.length;c+=1){let s=e[r-1]===t[c-1]?0:1;n[r][c]=Math.min(n[r-1][c]+1,n[r][c-1]+1,n[r-1][c-1]+s)}return n[e.length][t.length]}function wn(e){if(!e)return"";let t=[...Rn,...Object.keys(he)],n="",r=Number.POSITIVE_INFINITY;for(let c of t){let s=ln(e,c);if(s<r)r=s,n=c}return r<=3?n:""}function Un(){console.log(`open-knowledge - local agent knowledge store
|
|
4
247
|
|
|
5
248
|
Usage:
|
|
6
249
|
open-knowledge <command> [options]
|
|
@@ -10,17 +253,25 @@ Commands:
|
|
|
10
253
|
list (alias: ls) List items (supports pagination/search/sort/tag)
|
|
11
254
|
get --id <id> Get one item
|
|
12
255
|
update --id <id> Update an item (--title, --content, --url, --tag)
|
|
256
|
+
archive --id <id> Archive an item
|
|
257
|
+
restore --id <id> Restore an archived item
|
|
258
|
+
upsert [title] [content] Create or update an item by --id
|
|
259
|
+
untag --id <id> -t <tag> Remove a tag from an item
|
|
13
260
|
delete (alias: rm) --id <id> Delete item (requires --yes)
|
|
14
261
|
export Export all items (--format jsonl)
|
|
15
262
|
prune Remove old/empty items (requires --yes)
|
|
16
263
|
dedupe Remove duplicate items by title+content (requires --yes)
|
|
17
264
|
stats Show knowledge base statistics
|
|
265
|
+
paths Show resolved workspace/store paths
|
|
266
|
+
db init|stats Initialize or inspect local knowledge.db
|
|
267
|
+
wiki init Initialize scalable wiki/schema/index/log artifacts
|
|
268
|
+
ingest manifest <file|s3://> Ingest an open-files manifest into knowledge.db
|
|
18
269
|
help [command] Show help
|
|
19
270
|
|
|
20
271
|
Global Options:
|
|
21
272
|
--json Output JSON
|
|
22
273
|
--store <path> Override store path
|
|
23
|
-
--scope local|global|project Store scope (default: global ~/.
|
|
274
|
+
--scope local|global|project Store scope (default: global ~/.hasna/apps/knowledge/)
|
|
24
275
|
--no-color Disable color output
|
|
25
276
|
--completions <shell> Output completions for bash|zsh|fish
|
|
26
277
|
-v, --version Show version
|
|
@@ -34,6 +285,8 @@ List Options:
|
|
|
34
285
|
-t, --tag <tag> Filter by tag
|
|
35
286
|
--sort <created|title> Sort field (default: created)
|
|
36
287
|
--desc Sort descending
|
|
288
|
+
--archived Show only archived items
|
|
289
|
+
--include-archived Include archived items
|
|
37
290
|
|
|
38
291
|
Add/Update Options:
|
|
39
292
|
--url <url> Attach source URL
|
|
@@ -54,5 +307,5 @@ Export Options:
|
|
|
54
307
|
|
|
55
308
|
Prune Options:
|
|
56
309
|
--older-than <days> Remove items older than N days
|
|
57
|
-
--empty Remove items with empty content`)}function
|
|
58
|
-
_open_knowledge() { _arguments -C "1: :(add list get update delete export help ls rm edit)" "(--json)--json" "(--yes)-y" "(--help)--help" "(--version)--version" "(--desc)--desc" "(-p --page)"{-p,--page}"[page number]:number:" "(-l --limit)"{-l,--limit}"[items per page]:number:" "(-s --search)"{-s,--search}"[search text]:text:" "(--sort)--sort"{created,title}:" "(--id)--id[item id]:id:" "(--store)--store[store path]:path:" "(--title)--title[new title]:" "(--content)--content[new content]:" "(--url)--url[source url]:" "(-t --tag)"{-t,--tag}"[tag]:tag:" "(--format)--format[json|jsonl]:" "(--completions)--completions[output completions]:shell:(bash zsh fish):" "(--no-color)--no-color[disable color]" "(--scope)--scope"{local,global,project}:" }; _open_knowledge`);else if(Q==="fish")console.log('complete -c open-knowledge -f; complete -c open-knowledge -a "add list get update delete export help ls rm edit"; complete -c open-knowledge -l json; complete -c open-knowledge -l yes -s y; complete -c open-knowledge -l help -s h; complete -c open-knowledge -l version -s v; complete -c open-knowledge -l desc; complete -c open-knowledge -s p -l page; complete -c open-knowledge -s l -l limit; complete -c open-knowledge -s s -l search; complete -c open-knowledge -l sort; complete -c open-knowledge -l id; complete -c open-knowledge -l store; complete -c open-knowledge -l title; complete -c open-knowledge -l content; complete -c open-knowledge -l url; complete -c open-knowledge -s t -l tag; complete -c open-knowledge -l format; complete -c open-knowledge -l completions; complete -c open-knowledge -l no-color; complete -c open-knowledge -l scope -a "local global project"');else throw Error("Invalid --completions value. Use 'bash', 'zsh', or 'fish'.");return}let B=t(W[0]);if(!B||z.help||B==="help"){B0(W[1]);return}let K=z.store;if(!K)if(z.scope==="project")K="./.open-knowledge/db.json";else K=p();if(b(K),B==="add"){let Q=W[1],Y=W[2];if(!Q||!Y)throw Error("Usage: open-knowledge add <title> <content>");G(K,()=>{let X=V(K),Z={id:v(),title:Q,content:Y,url:z.url??null,tags:z.tag?[z.tag]:[],created_at:new Date().toISOString(),updated_at:new Date().toISOString()};X.items.push(Z),j(K,X),T("info","Item added",{id:Z.id,title:Z.title}),O({ok:!0,item:Z,message:`Added ${Z.id}`},z.json)});return}if(B==="list"){if(z.format!==void 0&&z.format!=="table"&&z.format!=="json")throw Error("Invalid --format value for list. Use 'table' or 'json'.");G(K,()=>{let Q=V(K),Y=Number.isFinite(z.page)&&z.page>0?z.page:1,X=Number.isFinite(z.limit)&&z.limit>0?z.limit:20,Z=z.search?String(z.search).toLowerCase():"",E=z.tag?String(z.tag).toLowerCase():"",_=z.format==="table"||!z.json&&!z.format&&K0(z),A=z.json||z.format==="json",D=Q.items;if(Z)D=D.filter(($)=>$.title.toLowerCase().includes(Z)||$.content.toLowerCase().includes(Z));if(E)D=D.filter(($)=>$.tags&&$.tags.map((I)=>I.toLowerCase()).includes(E));let{sorted:H,sort:J,direction:C}=Q0(D,z),w=(Y-1)*X,M=H.slice(w,w+X),y=Math.max(1,Math.ceil(H.length/X));if(A){O({ok:!0,page:Y,limit:X,total:H.length,total_pages:y,sort:J,direction:C,items:M},!0);return}if(M.length===0){O(`No items found (search=${Z||"none"}, tag=${E||"none"})`,!1);return}if(_){let $=(q)=>q,I=`${$("ID")} ${$("TITLE")} ${$("CREATED")} ${$("URL")} ${$("TAGS")}`;console.log(I);for(let q of M)console.log(`${q.id} ${$(q.title)} ${q.created_at} ${q.url?$(q.url):""} ${q.tags?.length?$(`[${q.tags.join(", ")}]`):""}`);console.log(`Page ${Y}/${y} | showing ${M.length} of ${H.length} | sort=${J} ${C} | search=${Z||"none"} | tag=${E||"none"}`)}else{for(let $ of M)console.log(`${$.id} ${$.title} ${$.created_at}${$.url?` ${$.url}`:""}${$.tags?.length?` [${$.tags.join(", ")}]`:""}`);console.log(`Page ${Y}/${y} | showing ${M.length} of ${H.length} | sort=${J} ${C} | search=${Z||"none"} | tag=${E||"none"}`)}});return}if(B==="get"){x(z),G(K,()=>{let Y=V(K).items.find((X)=>X.id===z.id);if(!Y)throw Error(`Item not found: ${z.id}`);O({ok:!0,item:Y,message:`${Y.id}: ${Y.title}`},z.json)});return}if(B==="update"){x(z),G(K,()=>{let Q=V(K),Y=Q.items.findIndex((Z)=>Z.id===z.id);if(Y===-1)throw Error(`Item not found: ${z.id}`);let X=Q.items[Y];if(z.title!==void 0)X.title=z.title;if(z.content!==void 0)X.content=z.content;if(z.url!==void 0)X.url=z.url;if(z.tag!==void 0){if(X.tags=X.tags||[],!X.tags.map((Z)=>Z.toLowerCase()).includes(z.tag.toLowerCase()))X.tags.push(z.tag)}X.updated_at=new Date().toISOString(),Q.items[Y]=X,j(K,Q),O({ok:!0,item:X,message:`Updated ${X.id}`},z.json)});return}if(B==="delete"){if(x(z),!z.yes)throw Error("Refusing delete without --yes. Re-run with: open-knowledge delete --id <id> --yes");G(K,()=>{let Q=V(K),Y=Q.items.length;Q.items=Q.items.filter((Z)=>Z.id!==z.id);let X=Y!==Q.items.length;if(j(K,Q),!X)throw Error(`Item not found: ${z.id}`);T("info","Item deleted",{id:z.id}),O({ok:!0,deleted_id:z.id,message:`Deleted ${z.id}`},z.json)});return}if(B==="export"){let Q=z.format??"json";if(Q!=="json"&&Q!=="jsonl")throw Error("Invalid --format. Use 'json' or 'jsonl'.");G(K,()=>{let Y=V(K);if(Q==="jsonl")for(let X of Y.items)console.log(JSON.stringify(X));else O({ok:!0,items:Y.items},z.json)});return}if(B==="prune"){if(!z.yes)throw Error("Refusing prune without --yes. Re-run with: open-knowledge prune --yes [--older-than <days>] [--empty]");G(K,()=>{let Q=V(K),Y=Q.items.length;if(z.olderThan!==void 0){let Z=new Date;Z.setDate(Z.getDate()-z.olderThan),Q.items=Q.items.filter((E)=>new Date(E.created_at)>=Z)}if(z.empty)Q.items=Q.items.filter((Z)=>Z.content.trim().length>0);let X=Y-Q.items.length;j(K,Q),T("info","Prune completed",{pruned:X,remaining:Q.items.length}),O({ok:!0,pruned:X,remaining:Q.items.length,message:`Pruned ${X} item(s)`},z.json)});return}if(B==="dedupe"){if(!z.yes)throw Error("Refusing dedupe without --yes. Re-run with: open-knowledge dedupe --yes [--json]");G(K,()=>{let Q=V(K),Y=new Set,X=Q.items.length;Q.items=Q.items.filter((E)=>{let _=`${E.title}\x00${E.content}`;if(Y.has(_))return!1;return Y.add(_),!0});let Z=X-Q.items.length;j(K,Q),T("info","Dedupe completed",{removed:Z,remaining:Q.items.length}),O({ok:!0,removed:Z,remaining:Q.items.length,message:`Dedupe removed ${Z} duplicate(s)`},z.json)});return}if(B==="stats"){G(K,()=>{let Q=V(K),Y=Q.items.length,X=Q.items.filter((H)=>H.url).length,Z=Q.items.filter((H)=>H.tags&&H.tags.length>0).length,E=Y>0?Q.items.map((H)=>H.created_at).sort()[0]:null,_=Y>0?Q.items.map((H)=>H.created_at).sort()[Y-1]:null,A={};for(let H of Q.items)for(let J of H.tags||[])A[J]=(A[J]||0)+1;let D=Object.entries(A).sort((H,J)=>J[1]-H[1]).slice(0,5).map(([H,J])=>({tag:H,count:J}));O({ok:!0,total:Y,with_url:X,with_tags:Z,oldest:E,newest:_,top_tags:D,message:`${Y} items | ${X} with URL | ${Z} with tags`},z.json)});return}let N=z0(W[0]),U=N?` Did you mean '${N}'?`:"";throw T("warn","Unknown command",{input:W[0],suggestion:N}),Error(`Unknown command: ${W[0]}.${U} Run 'open-knowledge --help' for available commands.`)}if(import.meta.main)try{W0(process.argv.slice(2))}catch(R){let W=R instanceof Error?R.message:String(R);T("error","CLI error",{message:W,stack:R instanceof Error?R.stack:void 0}),console.error(`Error: ${W}`),process.exitCode=1}export{z0 as suggestCommand,Q0 as sortItems,W0 as run,o as parseArgs};
|
|
310
|
+
--empty Remove items with empty content`)}function Sn(e){if(e==="add"){console.log("Usage: open-knowledge add <title> <content> [--url <url>] [-t <tag>] [--json]");return}if(e==="list"||e==="ls"){console.log("Usage: open-knowledge list|ls [--format table|json] [-p <page>] [-l <limit>] [-s <search>] [-t <tag>] [--sort created|title] [--desc] [--json]");return}if(e==="get"){console.log("Usage: open-knowledge get --id <id> [--json]");return}if(e==="update"||e==="edit"){console.log("Usage: open-knowledge update|edit --id <id> [--title <title>] [--content <content>] [--url <url>] [-t <tag>] [--json]");return}if(e==="archive"){console.log("Usage: open-knowledge archive --id <id> [--json]");return}if(e==="restore"||e==="unarchive"){console.log("Usage: open-knowledge restore|unarchive --id <id> [--json]");return}if(e==="upsert"){console.log("Usage: open-knowledge upsert [title] [content] [--id <id>] [--title <title>] [--content <content>] [--url <url>] [-t <tag>] [--json]");return}if(e==="untag"){console.log("Usage: open-knowledge untag --id <id> -t <tag> [--json]");return}if(e==="delete"||e==="rm"){console.log("Usage: open-knowledge delete|rm --id <id> -y [--json]");return}if(e==="export"){console.log("Usage: open-knowledge export [--format jsonl] [--json]");return}if(e==="prune"){console.log("Usage: open-knowledge prune --yes [--older-than <days>] [--empty] [--json]");return}if(e==="dedupe"){console.log("Usage: open-knowledge dedupe --yes [--json]");return}if(e==="stats"){console.log("Usage: open-knowledge stats [--json]");return}if(e==="paths"){console.log("Usage: open-knowledge paths [--scope local|global|project] [--json]");return}if(e==="db"){console.log("Usage: open-knowledge db init|stats [--scope local|global|project] [--json]");return}if(e==="wiki"){console.log("Usage: open-knowledge wiki init [--scope local|global|project] [--json]");return}if(e==="ingest"){console.log("Usage: open-knowledge ingest manifest <file|s3://bucket/key> [--scope local|global|project] [--json]");return}Un()}function An(e){if(e.noColor||process.env.NO_COLOR)return!1;if(process.env.FORCE_COLOR)return!0;return process.stdout.isTTY===!0}function L(e,t,n){if(t){console.log(JSON.stringify(e,null,2));return}if(typeof e==="string"){console.log(e);return}console.log(e.message??JSON.stringify(e,null,2))}function j(e){if(!e.id)throw Error("Missing required --id. Example: open-knowledge get --id <id>")}function In(e,t){let n=t.sort??"created";if(n!=="created"&&n!=="title")throw Error("Invalid --sort value. Use 'created' or 'title'.");let r=[...e].sort((c,s)=>{if(n==="title")return c.title.localeCompare(s.title);return c.created_at.localeCompare(s.created_at)});if(t.desc)r.reverse();return{sorted:r,sort:n,direction:t.desc?"desc":"asc"}}async function xn(e){let{positional:t,flags:n}=On(e);if(x("debug","CLI invoked",{command:t[0],flags:{json:n.json,store:n.store}}),n.version){console.log(n.json?JSON.stringify({name:M.name,version:M.version},null,2):`${M.name} ${M.version}`);return}if(n.completions){let i=n.completions;if(i==="bash")console.log('_open_knowledge() { local cur; cur="${COMP_WORDS[COMP_CWORD]}"; COMPREPLY=($(compgen -W "add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki ingest help ls rm edit unarchive --json --yes --help --version --desc --page --limit --search --sort --id --store --title --content --url --tag --format --completions --no-color --scope --archived --include-archived" -- "$cur")); }; complete -F _open_knowledge open-knowledge');else if(i==="zsh")console.log(`#compdef open-knowledge
|
|
311
|
+
_open_knowledge() { _arguments -C "1: :(add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki ingest help ls rm edit unarchive)" "(--json)--json" "(--yes)-y" "(--help)--help" "(--version)--version" "(--desc)--desc" "(--archived)--archived" "(--include-archived)--include-archived" "(-p --page)"{-p,--page}"[page number]:number:" "(-l --limit)"{-l,--limit}"[items per page]:number:" "(-s --search)"{-s,--search}"[search text]:text:" "(--sort)--sort"{created,title}:" "(--id)--id[item id]:id:" "(--store)--store[store path]:path:" "(--title)--title[new title]:" "(--content)--content[new content]:" "(--url)--url[source url]:" "(-t --tag)"{-t,--tag}"[tag]:tag:" "(--format)--format[json|jsonl]:" "(--completions)--completions[output completions]:shell:(bash zsh fish):" "(--no-color)--no-color[disable color]" "(--scope)--scope"{local,global,project}:" }; _open_knowledge`);else if(i==="fish")console.log('complete -c open-knowledge -f; complete -c open-knowledge -a "add list get update archive restore upsert untag delete export prune dedupe stats paths db wiki ingest help ls rm edit unarchive"; complete -c open-knowledge -l json; complete -c open-knowledge -l yes -s y; complete -c open-knowledge -l help -s h; complete -c open-knowledge -l version -s v; complete -c open-knowledge -l desc; complete -c open-knowledge -l archived; complete -c open-knowledge -l include-archived; complete -c open-knowledge -s p -l page; complete -c open-knowledge -s l -l limit; complete -c open-knowledge -s s -l search; complete -c open-knowledge -l sort; complete -c open-knowledge -l id; complete -c open-knowledge -l store; complete -c open-knowledge -l title; complete -c open-knowledge -l content; complete -c open-knowledge -l url; complete -c open-knowledge -s t -l tag; complete -c open-knowledge -l format; complete -c open-knowledge -l completions; complete -c open-knowledge -l no-color; complete -c open-knowledge -l scope -a "local global project"');else throw Error("Invalid --completions value. Use 'bash', 'zsh', or 'fish'.");return}let r=kn(t[0]);if(!r||n.help||r==="help"){Sn(t[1]);return}let c=ie(n.scope),s=n.store;if(!s)if(n.scope==="project"||n.scope==="local")s=b(c.home).jsonStorePath;else s=V();if(r==="paths"){let i=b(c.home);L({ok:!0,scope:n.scope??"global",home:i.home,config_path:i.configPath,json_store_path:i.jsonStorePath,knowledge_db_path:i.knowledgeDbPath,artifacts_dir:i.artifactsDir,indexes_dir:i.indexesDir,logs_dir:i.logsDir,runs_dir:i.runsDir,schemas_dir:i.schemasDir,wiki_dir:i.wikiDir,config:v(i.configPath),message:i.home},n.json);return}if(r==="db"){let i=t[1]??"init",u=b(c.home);if(i!=="init"&&i!=="stats")throw Error("Invalid db action. Use 'init' or 'stats'.");if(i==="init"){let T=m(u.knowledgeDbPath);L({ok:!0,...T,message:`Initialized ${T.path}`},n.json);return}m(u.knowledgeDbPath);let o=ue(u.knowledgeDbPath);L({ok:!0,path:u.knowledgeDbPath,...o,message:`knowledge.db schema v${o.schema_version}`},n.json);return}if(r==="wiki"){if((t[1]??"init")!=="init")throw Error("Invalid wiki action. Use 'init'.");let u=b(c.home),o=v(u.configPath),T=ae(o,u),a=await pe(T);L({ok:!0,...a,message:`Initialized wiki layout in ${u.home}`},n.json);return}if(r==="ingest"){if((t[1]??"")!=="manifest")throw Error("Invalid ingest action. Use 'manifest'.");let u=t[2];if(!u)throw Error("Usage: open-knowledge ingest manifest <file|s3://bucket/key>");let o=b(c.home),T=v(o.configPath),a=await Ne({dbPath:o.knowledgeDbPath,input:u,config:T});L({ok:!0,...a,message:`Ingested ${a.items_seen} manifest item(s)`},n.json);return}if(q(s),r==="add"){let i=t[1],u=t[2];if(!i||!u)throw Error("Usage: open-knowledge add <title> <content>");l(s,()=>{let o=k(s),T={id:P(),title:i,content:u,url:n.url??null,tags:n.tag?[n.tag]:[],created_at:new Date().toISOString(),updated_at:new Date().toISOString()};o.items.push(T),w(s,o),x("info","Item added",{id:T.id,title:T.title}),L({ok:!0,item:T,message:`Added ${T.id}`},n.json)});return}if(r==="list"){if(n.format!==void 0&&n.format!=="table"&&n.format!=="json")throw Error("Invalid --format value for list. Use 'table' or 'json'.");l(s,()=>{let i=k(s),u=Number.isFinite(n.page)&&n.page>0?n.page:1,o=Number.isFinite(n.limit)&&n.limit>0?n.limit:20,T=n.search?String(n.search).toLowerCase():"",a=n.tag?String(n.tag).toLowerCase():"",f=n.format==="table"||!n.json&&!n.format&&An(n),y=n.json||n.format==="json",h=i.items;if(n.archived)h=h.filter((p)=>p.archived===!0);else if(!n.includeArchived)h=h.filter((p)=>!p.archived);if(T)h=h.filter((p)=>p.title.toLowerCase().includes(T)||p.content.toLowerCase().includes(T));if(a)h=h.filter((p)=>p.tags&&p.tags.map((W)=>W.toLowerCase()).includes(a));let{sorted:O,sort:C,direction:N}=In(h,n),U=(u-1)*o,X=O.slice(U,U+o),z=Math.max(1,Math.ceil(O.length/o));if(y){L({ok:!0,page:u,limit:o,total:O.length,total_pages:z,sort:C,direction:N,items:X},!0);return}if(X.length===0){L(`No items found (search=${T||"none"}, tag=${a||"none"})`,!1);return}if(f){let p=(S)=>S,W=`${p("ID")} ${p("TITLE")} ${p("CREATED")} ${p("URL")} ${p("TAGS")}`;console.log(W);for(let S of X)console.log(`${S.id} ${p(S.title)} ${S.created_at} ${S.url?p(S.url):""} ${S.tags?.length?p(`[${S.tags.join(", ")}]`):""}`);console.log(`Page ${u}/${z} | showing ${X.length} of ${O.length} | sort=${C} ${N} | search=${T||"none"} | tag=${a||"none"}`)}else{for(let p of X)console.log(`${p.id} ${p.title} ${p.created_at}${p.url?` ${p.url}`:""}${p.tags?.length?` [${p.tags.join(", ")}]`:""}`);console.log(`Page ${u}/${z} | showing ${X.length} of ${O.length} | sort=${C} ${N} | search=${T||"none"} | tag=${a||"none"}`)}});return}if(r==="get"){j(n),l(s,()=>{let u=k(s).items.find((o)=>o.id===n.id||o.short_id===n.id);if(!u)throw Error(`Item not found: ${n.id}`);L({ok:!0,item:u,message:`${u.id}: ${u.title}`},n.json)});return}if(r==="update"){j(n),l(s,()=>{let i=k(s),u=i.items.findIndex((T)=>T.id===n.id||T.short_id===n.id);if(u===-1)throw Error(`Item not found: ${n.id}`);let o=i.items[u];if(n.title!==void 0)o.title=n.title;if(n.content!==void 0)o.content=n.content;if(n.url!==void 0)o.url=n.url;if(n.tag!==void 0){if(o.tags=o.tags||[],!o.tags.map((T)=>T.toLowerCase()).includes(n.tag.toLowerCase()))o.tags.push(n.tag)}o.updated_at=new Date().toISOString(),i.items[u]=o,w(s,i),L({ok:!0,item:o,message:`Updated ${o.id}`},n.json)});return}if(r==="archive"||r==="restore"){j(n),l(s,()=>{let i=k(s),u=i.items.findIndex((T)=>T.id===n.id||T.short_id===n.id);if(u===-1)throw Error(`Item not found: ${n.id}`);let o=i.items[u];o.archived=r==="archive",o.updated_at=new Date().toISOString(),i.items[u]=o,w(s,i),L({ok:!0,item:o,message:`${r==="archive"?"Archived":"Restored"} ${o.id}`},n.json)});return}if(r==="untag"){if(j(n),!n.tag)throw Error("Missing required --tag. Example: open-knowledge untag --id <id> -t <tag>");l(s,()=>{let i=k(s),u=i.items.findIndex((a)=>a.id===n.id||a.short_id===n.id);if(u===-1)throw Error(`Item not found: ${n.id}`);let o=i.items[u],T=o.tags?.length??0;o.tags=(o.tags??[]).filter((a)=>a.toLowerCase()!==n.tag.toLowerCase()),o.updated_at=new Date().toISOString(),i.items[u]=o,w(s,i),L({ok:!0,item:o,removed:T-o.tags.length,message:`Removed tag from ${o.id}`},n.json)});return}if(r==="upsert"){let i=n.title??t[1],u=n.content??t[2];l(s,()=>{let o=k(s),T=n.id?o.items.findIndex((y)=>y.id===n.id||y.short_id===n.id):-1,a=new Date().toISOString();if(T===-1){if(!i||!u)throw Error("New item requires title and content. Example: open-knowledge upsert <title> <content> [--id <id>]");let y=n.id??P(),h={id:y,short_id:ce(y),title:i,content:u,url:n.url??null,tags:n.tag?[n.tag]:[],metadata:{},archived:!1,created_at:a,updated_at:a};o.items.push(h),w(s,o),L({ok:!0,created:!0,item:h,message:`Upserted ${h.id}`},n.json);return}let f=o.items[T];if(i!==void 0)f.title=i;if(u!==void 0)f.content=u;if(n.url!==void 0)f.url=n.url;if(n.tag!==void 0){if(f.tags=f.tags||[],!f.tags.map((y)=>y.toLowerCase()).includes(n.tag.toLowerCase()))f.tags.push(n.tag)}f.updated_at=a,o.items[T]=f,w(s,o),L({ok:!0,created:!1,item:f,message:`Upserted ${f.id}`},n.json)});return}if(r==="delete"){if(j(n),!n.yes)throw Error("Refusing delete without --yes. Re-run with: open-knowledge delete --id <id> --yes");l(s,()=>{let i=k(s),u=i.items.length;i.items=i.items.filter((T)=>T.id!==n.id&&T.short_id!==n.id);let o=u!==i.items.length;if(w(s,i),!o)throw Error(`Item not found: ${n.id}`);x("info","Item deleted",{id:n.id}),L({ok:!0,deleted_id:n.id,message:`Deleted ${n.id}`},n.json)});return}if(r==="export"){let i=n.format??"json";if(i!=="json"&&i!=="jsonl")throw Error("Invalid --format. Use 'json' or 'jsonl'.");l(s,()=>{let u=k(s);if(i==="jsonl")for(let o of u.items)console.log(JSON.stringify(o));else L({ok:!0,items:u.items},n.json)});return}if(r==="prune"){if(!n.yes)throw Error("Refusing prune without --yes. Re-run with: open-knowledge prune --yes [--older-than <days>] [--empty]");l(s,()=>{let i=k(s),u=i.items.length;if(n.olderThan!==void 0){let T=new Date;T.setDate(T.getDate()-n.olderThan),i.items=i.items.filter((a)=>new Date(a.created_at)>=T)}if(n.empty)i.items=i.items.filter((T)=>T.content.trim().length>0);let o=u-i.items.length;w(s,i),x("info","Prune completed",{pruned:o,remaining:i.items.length}),L({ok:!0,pruned:o,remaining:i.items.length,message:`Pruned ${o} item(s)`},n.json)});return}if(r==="dedupe"){if(!n.yes)throw Error("Refusing dedupe without --yes. Re-run with: open-knowledge dedupe --yes [--json]");l(s,()=>{let i=k(s),u=new Set,o=i.items.length;i.items=i.items.filter((a)=>{let f=`${a.title}\x00${a.content}`;if(u.has(f))return!1;return u.add(f),!0});let T=o-i.items.length;w(s,i),x("info","Dedupe completed",{removed:T,remaining:i.items.length}),L({ok:!0,removed:T,remaining:i.items.length,message:`Dedupe removed ${T} duplicate(s)`},n.json)});return}if(r==="stats"){l(s,()=>{let i=k(s),u=i.items.filter((N)=>!N.archived),o=u.length,T=i.items.length-o,a=u.filter((N)=>N.url).length,f=u.filter((N)=>N.tags&&N.tags.length>0).length,y=o>0?u.map((N)=>N.created_at).sort()[0]:null,h=o>0?u.map((N)=>N.created_at).sort()[o-1]:null,O={};for(let N of u)for(let U of N.tags||[])O[U]=(O[U]||0)+1;let C=Object.entries(O).sort((N,U)=>U[1]-N[1]).slice(0,5).map(([N,U])=>({tag:N,count:U}));L({ok:!0,total:o,archived:T,with_url:a,with_tags:f,oldest:y,newest:h,top_tags:C,message:`${o} items | ${a} with URL | ${f} with tags`},n.json)});return}let d=wn(t[0]),_=d?` Did you mean '${d}'?`:"";throw x("warn","Unknown command",{input:t[0],suggestion:d}),Error(`Unknown command: ${t[0]}.${_} Run 'open-knowledge --help' for available commands.`)}if(import.meta.main)xn(process.argv.slice(2)).catch((e)=>{let t=e instanceof Error?e.message:String(e);x("error","CLI error",{message:t,stack:e instanceof Error?e.stack:void 0}),console.error(`Error: ${t}`),process.exitCode=1});export{wn as suggestCommand,In as sortItems,xn as run,On as parseArgs};
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
# AI-Native Knowledge Base Architecture
|
|
2
|
+
|
|
3
|
+
`open-knowledge` is the local-first knowledge engine for Hasna projects and
|
|
4
|
+
agents. It should make company knowledge durable, searchable, citable, and safe
|
|
5
|
+
for agents to reuse. It is not the raw file bucket. Raw source bytes belong to
|
|
6
|
+
`open-files`.
|
|
7
|
+
|
|
8
|
+
## Product Boundary
|
|
9
|
+
|
|
10
|
+
The open source package owns:
|
|
11
|
+
|
|
12
|
+
- Local CLI and MCP interfaces.
|
|
13
|
+
- Shared service modules used by CLI, MCP, tests, and future SDK callers.
|
|
14
|
+
- Local project workspace under `.hasna/apps/knowledge`.
|
|
15
|
+
- Source references, citations, extracted metadata, chunks, generated wiki
|
|
16
|
+
artifacts, schemas, indexes, run ledgers, and search state.
|
|
17
|
+
- Hybrid retrieval over keyword search, semantic vectors, wiki pages, citations,
|
|
18
|
+
and graph signals.
|
|
19
|
+
- Provider/runtime contracts for local BYOK agent workflows.
|
|
20
|
+
- Hosted-aware client contracts that can talk to a future SaaS wrapper.
|
|
21
|
+
|
|
22
|
+
`open-files` owns:
|
|
23
|
+
|
|
24
|
+
- Raw source bytes and snapshots.
|
|
25
|
+
- Local, S3, Google Drive, and future connector-backed source storage.
|
|
26
|
+
- Source ids, file ids, revisions, hashes, MIME metadata, storage locations, and
|
|
27
|
+
extraction outputs.
|
|
28
|
+
- Read-only content resolution for knowledge agents.
|
|
29
|
+
- Source manifests and source-change events that drive reindexing.
|
|
30
|
+
|
|
31
|
+
The future hosted/SaaS wrapper owns:
|
|
32
|
+
|
|
33
|
+
- Users, orgs, projects, memberships, API keys, and permissions.
|
|
34
|
+
- Billing, limits, queues, workers, hosted databases, hosted object storage
|
|
35
|
+
policies, connector secrets, audit, observability, and web UI.
|
|
36
|
+
- Permission-aware retrieval enforcement across tenants.
|
|
37
|
+
- Remote job orchestration for ingestion, embedding, web search, compile, lint,
|
|
38
|
+
and sync runs.
|
|
39
|
+
|
|
40
|
+
The OSS package must stay useful without a hosted account. Hosted mode should be
|
|
41
|
+
an optional remote client over explicit API contracts.
|
|
42
|
+
|
|
43
|
+
## Local Workspace
|
|
44
|
+
|
|
45
|
+
Project-local state lives at:
|
|
46
|
+
|
|
47
|
+
```text
|
|
48
|
+
.hasna/apps/knowledge/
|
|
49
|
+
config.json
|
|
50
|
+
knowledge.db
|
|
51
|
+
artifacts/
|
|
52
|
+
cache/
|
|
53
|
+
exports/
|
|
54
|
+
indexes/
|
|
55
|
+
logs/
|
|
56
|
+
runs/
|
|
57
|
+
schemas/
|
|
58
|
+
wiki/
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
The legacy JSON store at `~/.open-knowledge/db.json` remains readable for
|
|
62
|
+
migration and compatibility. New project mode should prefer
|
|
63
|
+
`.hasna/apps/knowledge/knowledge.db` and generated artifacts under the same app
|
|
64
|
+
home.
|
|
65
|
+
|
|
66
|
+
Global/user state may use a Hasna data directory, but project mode is the
|
|
67
|
+
default for company knowledge because it keeps artifacts close to the repo or
|
|
68
|
+
workspace they describe.
|
|
69
|
+
|
|
70
|
+
## Source References
|
|
71
|
+
|
|
72
|
+
`open-knowledge` stores references, not raw source bytes. Supported source ref
|
|
73
|
+
forms:
|
|
74
|
+
|
|
75
|
+
```text
|
|
76
|
+
open-files://file/<file_id>
|
|
77
|
+
open-files://file/<file_id>/revision/<revision_id>
|
|
78
|
+
open-files://source/<source_id>/path/<path>
|
|
79
|
+
s3://bucket/key
|
|
80
|
+
file:///absolute/path
|
|
81
|
+
https://example.com/page
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
For durable company knowledge, `open-files://` is preferred because it can carry
|
|
85
|
+
file revisions, hashes, extraction state, permissions, and storage metadata.
|
|
86
|
+
Direct `s3://`, `file://`, and `https://` refs are useful for bootstrap and
|
|
87
|
+
interop, but should be normalized into source records when possible.
|
|
88
|
+
|
|
89
|
+
## Remote And S3 Mode
|
|
90
|
+
|
|
91
|
+
Local mode writes artifacts to `.hasna/apps/knowledge`.
|
|
92
|
+
|
|
93
|
+
Remote/cloud mode can store generated knowledge artifacts in S3:
|
|
94
|
+
|
|
95
|
+
```text
|
|
96
|
+
s3://<knowledge-bucket>/<org>/<project>/knowledge/
|
|
97
|
+
artifacts/
|
|
98
|
+
indexes/
|
|
99
|
+
logs/
|
|
100
|
+
runs/
|
|
101
|
+
schemas/
|
|
102
|
+
wiki/
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
Raw files still route through `open-files`. Knowledge S3 storage is for derived
|
|
106
|
+
artifacts such as wiki pages, index shards, schema versions, logs, exports, and
|
|
107
|
+
run outputs.
|
|
108
|
+
|
|
109
|
+
## Wiki Model
|
|
110
|
+
|
|
111
|
+
The Karpathy-style wiki pattern is implemented as scalable artifacts, not three
|
|
112
|
+
giant files.
|
|
113
|
+
|
|
114
|
+
Small repositories may expose root Markdown summaries:
|
|
115
|
+
|
|
116
|
+
```text
|
|
117
|
+
wiki/index.md
|
|
118
|
+
schemas/current.md
|
|
119
|
+
logs/latest.md
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
Large knowledge bases use:
|
|
123
|
+
|
|
124
|
+
```text
|
|
125
|
+
schemas/
|
|
126
|
+
v1.md
|
|
127
|
+
v2.md
|
|
128
|
+
indexes/
|
|
129
|
+
root.md
|
|
130
|
+
engineering.md
|
|
131
|
+
product.md
|
|
132
|
+
machine/
|
|
133
|
+
engineering.json
|
|
134
|
+
logs/
|
|
135
|
+
2026/
|
|
136
|
+
06/
|
|
137
|
+
08.jsonl
|
|
138
|
+
wiki/
|
|
139
|
+
engineering/
|
|
140
|
+
product/
|
|
141
|
+
operations/
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
The database catalog tracks every schema, index shard, log partition, wiki page,
|
|
145
|
+
source citation, and generated artifact. Markdown remains the readable layer;
|
|
146
|
+
SQLite/Postgres and object storage carry the scalable catalog.
|
|
147
|
+
|
|
148
|
+
## Search Model
|
|
149
|
+
|
|
150
|
+
Search is hybrid:
|
|
151
|
+
|
|
152
|
+
1. `open-files` supplies source manifests, revisions, hashes, and extracted text.
|
|
153
|
+
2. `open-knowledge` chunks extracted text and generated wiki pages.
|
|
154
|
+
3. Chunks and pages are indexed with keyword search and embeddings.
|
|
155
|
+
4. Queries run through keyword FTS, vector search, and wiki/citation graph
|
|
156
|
+
expansion.
|
|
157
|
+
5. Results are merged, deduped, reranked, permission-filtered, and returned with
|
|
158
|
+
citations.
|
|
159
|
+
|
|
160
|
+
Local mode should start with SQLite FTS and a local vector-index option. Hosted
|
|
161
|
+
mode can use Postgres with pgvector or a managed vector index. Permission
|
|
162
|
+
filters must be applied before agent context is assembled.
|
|
163
|
+
|
|
164
|
+
## Agent Workflow
|
|
165
|
+
|
|
166
|
+
The target user flow is:
|
|
167
|
+
|
|
168
|
+
```bash
|
|
169
|
+
knowledge "<prompt>"
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
The command should:
|
|
173
|
+
|
|
174
|
+
1. Search existing wiki and indexed source chunks.
|
|
175
|
+
2. Resolve deeper read-only source content through `open-files` if needed.
|
|
176
|
+
3. Optionally use provider-native web search.
|
|
177
|
+
4. Produce an answer with citations.
|
|
178
|
+
5. Propose durable wiki/index/schema/log updates.
|
|
179
|
+
6. Write generated artifacts only after approval or in an explicitly approved
|
|
180
|
+
auto-write mode.
|
|
181
|
+
7. Record a run ledger with tool calls, sources, costs, outputs, and generated
|
|
182
|
+
records.
|
|
183
|
+
|
|
184
|
+
## Non-Goals
|
|
185
|
+
|
|
186
|
+
- Do not make `open-knowledge` own raw source files.
|
|
187
|
+
- Do not make hosted account, billing, worker, or tenant state required for local
|
|
188
|
+
use.
|
|
189
|
+
- Do not let semantic search bypass permissions.
|
|
190
|
+
- Do not treat one `index.md`, `schema.md`, or `log.md` as the final scalable
|
|
191
|
+
representation for a large company knowledge base.
|