@digitalvibes/ai-knowledge-db 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,292 @@
1
+ // src/client.ts
2
+ import pg from "pg";
3
+
4
+ // src/config.ts
5
+ var MODEL_DIMENSIONS = {
6
+ "text-embedding-3-small": 1536,
7
+ "text-embedding-3-large": 3072,
8
+ "text-embedding-ada-002": 1536
9
+ };
10
+ var env = (key) => {
11
+ const v = process.env[key];
12
+ return v && v.trim() !== "" ? v.trim() : void 0;
13
+ };
14
+ function resolveConfig(config = {}) {
15
+ const connectionString = config.connectionString ?? env("KNOWLEDGE_DB_URL");
16
+ if (!connectionString) {
17
+ throw new Error(
18
+ "[ai-knowledge-db] Missing connection string. Set KNOWLEDGE_DB_URL in your project's .env or pass { connectionString } to createKnowledgeDB()."
19
+ );
20
+ }
21
+ const openaiApiKey = config.openaiApiKey ?? env("OPENAI_API_KEY");
22
+ if (!openaiApiKey) {
23
+ throw new Error(
24
+ "[ai-knowledge-db] Missing OpenAI key. Set OPENAI_API_KEY in your project's .env or pass { openaiApiKey } to createKnowledgeDB()."
25
+ );
26
+ }
27
+ const embeddingModel = config.embeddingModel ?? env("KNOWLEDGE_EMBED_MODEL") ?? "text-embedding-3-small";
28
+ const embeddingDimensions = MODEL_DIMENSIONS[embeddingModel] ?? 1536;
29
+ return {
30
+ connectionString,
31
+ openaiApiKey,
32
+ embeddingModel,
33
+ embeddingDimensions,
34
+ clientId: config.clientId ?? env("KNOWLEDGE_CLIENT_ID"),
35
+ projectId: config.projectId ?? env("KNOWLEDGE_PROJECT_ID")
36
+ };
37
+ }
38
+
39
+ // src/embeddings.ts
40
+ import OpenAI from "openai";
41
+ var Embedder = class {
42
+ client;
43
+ model;
44
+ constructor(config) {
45
+ this.client = new OpenAI({ apiKey: config.openaiApiKey });
46
+ this.model = config.embeddingModel;
47
+ }
48
+ /** Embed a batch of strings in one API call. */
49
+ async embed(texts) {
50
+ if (texts.length === 0) return [];
51
+ const res = await this.client.embeddings.create({
52
+ model: this.model,
53
+ input: texts
54
+ });
55
+ return res.data.sort((a, b) => a.index - b.index).map((d) => d.embedding);
56
+ }
57
+ async embedOne(text) {
58
+ const [vec] = await this.embed([text]);
59
+ return vec;
60
+ }
61
+ };
62
+ function chunkText(text, opts = {}) {
63
+ const maxChars = opts.maxChars ?? 1200;
64
+ const overlap = opts.overlap ?? 150;
65
+ const clean = text.replace(/\r\n/g, "\n").trim();
66
+ if (clean.length <= maxChars) return clean ? [clean] : [];
67
+ const units = clean.split(/\n{2,}/).flatMap((p) => splitLongUnit(p, maxChars));
68
+ const chunks = [];
69
+ let current = "";
70
+ for (const unit of units) {
71
+ if (current && current.length + unit.length + 2 > maxChars) {
72
+ chunks.push(current.trim());
73
+ current = overlap > 0 ? current.slice(-overlap) + "\n\n" + unit : unit;
74
+ } else {
75
+ current = current ? current + "\n\n" + unit : unit;
76
+ }
77
+ }
78
+ if (current.trim()) chunks.push(current.trim());
79
+ return chunks;
80
+ }
81
+ function splitLongUnit(unit, maxChars) {
82
+ if (unit.length <= maxChars) return [unit];
83
+ const sentences = unit.match(/[^.!?\n]+[.!?]?\s*/g) ?? [unit];
84
+ const out = [];
85
+ let buf = "";
86
+ for (const s of sentences) {
87
+ if (s.length > maxChars) {
88
+ if (buf) {
89
+ out.push(buf);
90
+ buf = "";
91
+ }
92
+ for (let i = 0; i < s.length; i += maxChars) out.push(s.slice(i, i + maxChars));
93
+ } else if (buf.length + s.length > maxChars) {
94
+ out.push(buf);
95
+ buf = s;
96
+ } else {
97
+ buf += s;
98
+ }
99
+ }
100
+ if (buf) out.push(buf);
101
+ return out;
102
+ }
103
+
104
+ // src/schema.ts
105
+ var SCHEMA_SQL = `-- AI Knowledge DB schema (Postgres + pgvector)
106
+ -- Run once against your Hetzner/EasyPanel Postgres instance, or via kb.init().
107
+ -- If you switch to text-embedding-3-large, change 1536 -> 3072 and re-index.
108
+
109
+ create extension if not exists vector;
110
+ create extension if not exists "pgcrypto"; -- for gen_random_uuid()
111
+
112
+ create table if not exists knowledge (
113
+ id uuid primary key default gen_random_uuid(),
114
+ scope text not null check (scope in ('global', 'client', 'project')),
115
+ client_id text,
116
+ project_id text,
117
+ source text,
118
+ content text not null,
119
+ embedding vector(1536) not null,
120
+ metadata jsonb not null default '{}',
121
+ created_at timestamptz not null default now()
122
+ );
123
+
124
+ create index if not exists knowledge_client_idx on knowledge (client_id);
125
+ create index if not exists knowledge_project_idx on knowledge (project_id);
126
+ create index if not exists knowledge_scope_idx on knowledge (scope);
127
+ create index if not exists knowledge_metadata_idx on knowledge using gin (metadata);
128
+
129
+ create index if not exists knowledge_embedding_idx
130
+ on knowledge using hnsw (embedding vector_cosine_ops);
131
+ `;
132
+
133
+ // src/client.ts
134
+ var { Pool } = pg;
135
+ var KnowledgeDB = class {
136
+ pool;
137
+ embedder;
138
+ config;
139
+ constructor(config = {}) {
140
+ this.config = resolveConfig(config);
141
+ this.pool = new Pool({ connectionString: this.config.connectionString });
142
+ this.embedder = new Embedder(this.config);
143
+ }
144
+ /** Create the extension, table, and indexes if they don't exist. Safe to call repeatedly. */
145
+ async init() {
146
+ await this.pool.query(SCHEMA_SQL);
147
+ }
148
+ /**
149
+ * Add knowledge. The content is chunked, embedded, and stored. Returns the
150
+ * ids of the stored rows (one per chunk). Scope/client/project fall back to
151
+ * the env-configured defaults.
152
+ */
153
+ async add(input) {
154
+ const clientId = input.clientId ?? this.config.clientId ?? null;
155
+ const projectId = input.projectId ?? this.config.projectId ?? null;
156
+ const scope = input.scope ?? defaultScope(clientId, projectId);
157
+ const source = input.source ?? null;
158
+ const metadata = input.metadata ?? {};
159
+ const chunks = chunkText(input.content, input.chunking);
160
+ if (chunks.length === 0) return [];
161
+ const vectors = await this.embedder.embed(chunks);
162
+ const ids = [];
163
+ const client = await this.pool.connect();
164
+ try {
165
+ await client.query("begin");
166
+ for (let i = 0; i < chunks.length; i++) {
167
+ const chunkMeta = chunks.length > 1 ? { ...metadata, chunk: i, chunks: chunks.length } : metadata;
168
+ const { rows } = await client.query(
169
+ `insert into knowledge (scope, client_id, project_id, source, content, embedding, metadata)
170
+ values ($1, $2, $3, $4, $5, $6, $7) returning id`,
171
+ [scope, clientId, projectId, source, chunks[i], toVector(vectors[i]), chunkMeta]
172
+ );
173
+ ids.push(rows[0].id);
174
+ }
175
+ await client.query("commit");
176
+ } catch (err) {
177
+ await client.query("rollback");
178
+ throw err;
179
+ } finally {
180
+ client.release();
181
+ }
182
+ return ids;
183
+ }
184
+ /**
185
+ * Replace knowledge from a given source. Deletes existing rows that match the
186
+ * same scope + ids + source, then re-adds. Use this for idempotent re-ingest
187
+ * of a file or URL so you don't accumulate duplicates.
188
+ */
189
+ async upsertSource(input) {
190
+ const clientId = input.clientId ?? this.config.clientId ?? null;
191
+ const projectId = input.projectId ?? this.config.projectId ?? null;
192
+ const scope = input.scope ?? defaultScope(clientId, projectId);
193
+ await this.delete({ scope, clientId: clientId ?? void 0, projectId: projectId ?? void 0, source: input.source });
194
+ return this.add(input);
195
+ }
196
+ /** Semantic search, scoped to client / project / global knowledge. */
197
+ async search(query, opts = {}) {
198
+ const clientId = opts.clientId ?? this.config.clientId;
199
+ const projectId = opts.projectId ?? this.config.projectId;
200
+ const includeClient = opts.includeClientKnowledge ?? true;
201
+ const includeGlobal = opts.includeGlobal ?? true;
202
+ const limit = opts.limit ?? 8;
203
+ const minScore = opts.minScore ?? 0;
204
+ const queryVec = toVector(await this.embedder.embedOne(query));
205
+ const orClauses = [];
206
+ const params = [queryVec];
207
+ const p = (v) => `$${params.push(v)}`;
208
+ if (projectId && allows(opts.scopes, "project")) {
209
+ orClauses.push(`(scope = 'project' and project_id = ${p(projectId)})`);
210
+ }
211
+ if (clientId && includeClient && allows(opts.scopes, "client")) {
212
+ orClauses.push(`(scope = 'client' and client_id = ${p(clientId)})`);
213
+ }
214
+ if (includeGlobal && allows(opts.scopes, "global")) {
215
+ orClauses.push(`scope = 'global'`);
216
+ }
217
+ const scopeClause = orClauses.length ? `(${orClauses.join(" or ")})` : `scope = 'global'`;
218
+ const where = [scopeClause];
219
+ if (opts.metadata) {
220
+ where.push(`metadata @> ${p(JSON.stringify(opts.metadata))}::jsonb`);
221
+ }
222
+ const { rows } = await this.pool.query(
223
+ `select id, scope, client_id, project_id, source, content, metadata, created_at,
224
+ 1 - (embedding <=> $1) as score
225
+ from knowledge
226
+ where ${where.join(" and ")}
227
+ order by embedding <=> $1
228
+ limit ${p(limit)}`,
229
+ params
230
+ );
231
+ return rows.map(rowToResult).filter((r) => r.score >= minScore);
232
+ }
233
+ /** Delete rows matching a filter. Returns the number deleted. */
234
+ async delete(filter) {
235
+ const where = [];
236
+ const params = [];
237
+ const p = (v) => `$${params.push(v)}`;
238
+ if (filter.id) where.push(`id = ${p(filter.id)}`);
239
+ if (filter.scope) where.push(`scope = ${p(filter.scope)}`);
240
+ if (filter.clientId) where.push(`client_id = ${p(filter.clientId)}`);
241
+ if (filter.projectId) where.push(`project_id = ${p(filter.projectId)}`);
242
+ if (filter.source) where.push(`source = ${p(filter.source)}`);
243
+ if (where.length === 0) {
244
+ throw new Error("[ai-knowledge-db] delete() requires at least one filter to avoid wiping the table.");
245
+ }
246
+ const { rowCount } = await this.pool.query(
247
+ `delete from knowledge where ${where.join(" and ")}`,
248
+ params
249
+ );
250
+ return rowCount ?? 0;
251
+ }
252
+ /** Close the connection pool. Call on shutdown. */
253
+ async close() {
254
+ await this.pool.end();
255
+ }
256
+ };
257
+ function createKnowledgeDB(config) {
258
+ return new KnowledgeDB(config);
259
+ }
260
+ function defaultScope(clientId, projectId) {
261
+ if (projectId) return "project";
262
+ if (clientId) return "client";
263
+ return "global";
264
+ }
265
+ function allows(scopes, scope) {
266
+ return !scopes || scopes.includes(scope);
267
+ }
268
+ function toVector(vec) {
269
+ return `[${vec.join(",")}]`;
270
+ }
271
+ function rowToResult(row) {
272
+ return { ...rowToRecord(row), score: Number(row.score) };
273
+ }
274
+ function rowToRecord(row) {
275
+ return {
276
+ id: row.id,
277
+ scope: row.scope,
278
+ clientId: row.client_id,
279
+ projectId: row.project_id,
280
+ source: row.source,
281
+ content: row.content,
282
+ metadata: row.metadata ?? {},
283
+ createdAt: row.created_at instanceof Date ? row.created_at.toISOString() : row.created_at
284
+ };
285
+ }
286
+ export {
287
+ KnowledgeDB,
288
+ chunkText,
289
+ createKnowledgeDB,
290
+ resolveConfig
291
+ };
292
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/client.ts","../src/config.ts","../src/embeddings.ts","../src/schema.ts"],"sourcesContent":["import pg from \"pg\";\nimport { resolveConfig, type KnowledgeConfig, type ResolvedConfig } from \"./config.js\";\nimport { Embedder, chunkText } from \"./embeddings.js\";\nimport { SCHEMA_SQL } from \"./schema.js\";\nimport type {\n AddInput,\n DeleteFilter,\n KnowledgeRecord,\n Scope,\n SearchOptions,\n SearchResult,\n} from \"./types.js\";\n\nconst { Pool } = pg;\n\nexport class KnowledgeDB {\n private pool: pg.Pool;\n private embedder: Embedder;\n readonly config: ResolvedConfig;\n\n constructor(config: KnowledgeConfig = {}) {\n this.config = resolveConfig(config);\n this.pool = new Pool({ connectionString: this.config.connectionString });\n this.embedder = new Embedder(this.config);\n }\n\n /** Create the extension, table, and indexes if they don't exist. Safe to call repeatedly. */\n async init(): Promise<void> {\n await this.pool.query(SCHEMA_SQL);\n }\n\n /**\n * Add knowledge. The content is chunked, embedded, and stored. Returns the\n * ids of the stored rows (one per chunk). Scope/client/project fall back to\n * the env-configured defaults.\n */\n async add(input: AddInput): Promise<string[]> {\n const clientId = input.clientId ?? this.config.clientId ?? null;\n const projectId = input.projectId ?? this.config.projectId ?? null;\n const scope = input.scope ?? defaultScope(clientId, projectId);\n const source = input.source ?? null;\n const metadata = input.metadata ?? {};\n\n const chunks = chunkText(input.content, input.chunking);\n if (chunks.length === 0) return [];\n\n const vectors = await this.embedder.embed(chunks);\n const ids: string[] = [];\n\n const client = await this.pool.connect();\n try {\n await client.query(\"begin\");\n for (let i = 0; i < chunks.length; i++) {\n const chunkMeta =\n chunks.length > 1\n ? { ...metadata, chunk: i, chunks: chunks.length }\n : metadata;\n const { rows } = await client.query(\n `insert into knowledge (scope, client_id, project_id, source, content, embedding, metadata)\n values ($1, $2, $3, $4, $5, $6, $7) returning id`,\n [scope, clientId, projectId, source, chunks[i], toVector(vectors[i]), chunkMeta],\n );\n ids.push(rows[0].id);\n }\n await client.query(\"commit\");\n } catch (err) {\n await client.query(\"rollback\");\n throw err;\n } finally {\n client.release();\n }\n return ids;\n }\n\n /**\n * Replace knowledge from a given source. Deletes existing rows that match the\n * same scope + ids + source, then re-adds. Use this for idempotent re-ingest\n * of a file or URL so you don't accumulate duplicates.\n */\n async upsertSource(input: AddInput & { source: string }): Promise<string[]> {\n const clientId = input.clientId ?? this.config.clientId ?? null;\n const projectId = input.projectId ?? this.config.projectId ?? null;\n const scope = input.scope ?? defaultScope(clientId, projectId);\n await this.delete({ scope, clientId: clientId ?? undefined, projectId: projectId ?? undefined, source: input.source });\n return this.add(input);\n }\n\n /** Semantic search, scoped to client / project / global knowledge. */\n async search(query: string, opts: SearchOptions = {}): Promise<SearchResult[]> {\n const clientId = opts.clientId ?? this.config.clientId;\n const projectId = opts.projectId ?? this.config.projectId;\n const includeClient = opts.includeClientKnowledge ?? true;\n const includeGlobal = opts.includeGlobal ?? true;\n const limit = opts.limit ?? 8;\n const minScore = opts.minScore ?? 0;\n\n const queryVec = toVector(await this.embedder.embedOne(query));\n\n // Build a scope clause: project rows, optionally the client's shared rows,\n // optionally global rows — restricted to the requested scopes if given.\n const orClauses: string[] = [];\n const params: unknown[] = [queryVec];\n const p = (v: unknown) => `$${params.push(v)}`;\n\n if (projectId && allows(opts.scopes, \"project\")) {\n orClauses.push(`(scope = 'project' and project_id = ${p(projectId)})`);\n }\n if (clientId && includeClient && allows(opts.scopes, \"client\")) {\n orClauses.push(`(scope = 'client' and client_id = ${p(clientId)})`);\n }\n if (includeGlobal && allows(opts.scopes, \"global\")) {\n orClauses.push(`scope = 'global'`);\n }\n // If nothing matched (e.g. no ids at all), fall back to global-only.\n const scopeClause = orClauses.length ? `(${orClauses.join(\" or \")})` : `scope = 'global'`;\n\n const where: string[] = [scopeClause];\n if (opts.metadata) {\n where.push(`metadata @> ${p(JSON.stringify(opts.metadata))}::jsonb`);\n }\n\n const { rows } = await this.pool.query(\n `select id, scope, client_id, project_id, source, content, metadata, created_at,\n 1 - (embedding <=> $1) as score\n from knowledge\n where ${where.join(\" and \")}\n order by embedding <=> $1\n limit ${p(limit)}`,\n params,\n );\n\n return rows\n .map(rowToResult)\n .filter((r) => r.score >= minScore);\n }\n\n /** Delete rows matching a filter. Returns the number deleted. */\n async delete(filter: DeleteFilter): Promise<number> {\n const where: string[] = [];\n const params: unknown[] = [];\n const p = (v: unknown) => `$${params.push(v)}`;\n if (filter.id) where.push(`id = ${p(filter.id)}`);\n if (filter.scope) where.push(`scope = ${p(filter.scope)}`);\n if (filter.clientId) where.push(`client_id = ${p(filter.clientId)}`);\n if (filter.projectId) where.push(`project_id = ${p(filter.projectId)}`);\n if (filter.source) where.push(`source = ${p(filter.source)}`);\n if (where.length === 0) {\n throw new Error(\"[ai-knowledge-db] delete() requires at least one filter to avoid wiping the table.\");\n }\n const { rowCount } = await this.pool.query(\n `delete from knowledge where ${where.join(\" and \")}`,\n params,\n );\n return rowCount ?? 0;\n }\n\n /** Close the connection pool. Call on shutdown. */\n async close(): Promise<void> {\n await this.pool.end();\n }\n}\n\nexport function createKnowledgeDB(config?: KnowledgeConfig): KnowledgeDB {\n return new KnowledgeDB(config);\n}\n\nfunction defaultScope(clientId: string | null, projectId: string | null): Scope {\n if (projectId) return \"project\";\n if (clientId) return \"client\";\n return \"global\";\n}\n\nfunction allows(scopes: Scope[] | undefined, scope: Scope): boolean {\n return !scopes || scopes.includes(scope);\n}\n\n/** pgvector accepts a vector literal like '[0.1,0.2,...]'. */\nfunction toVector(vec: number[]): string {\n return `[${vec.join(\",\")}]`;\n}\n\nfunction rowToResult(row: any): SearchResult {\n return { ...rowToRecord(row), score: Number(row.score) };\n}\n\nfunction rowToRecord(row: any): KnowledgeRecord {\n return {\n id: row.id,\n scope: row.scope,\n clientId: row.client_id,\n projectId: row.project_id,\n source: row.source,\n content: row.content,\n metadata: row.metadata ?? {},\n createdAt: row.created_at instanceof Date ? row.created_at.toISOString() : row.created_at,\n };\n}\n","/**\n * All sensitive / per-deployment values live in the *consuming* project's\n * environment — never in this package. A website repo that installs\n * `@dibe/ai-knowledge-db` sets these in its own `.env`:\n *\n * KNOWLEDGE_DB_URL=postgres://user:pass@host:5432/knowledge (Hetzner/EasyPanel)\n * OPENAI_API_KEY=sk-...\n * KNOWLEDGE_CLIENT_ID=acme-corp # default client for this repo\n * KNOWLEDGE_PROJECT_ID=acme-website-2026 # default project for this repo\n *\n * Anything passed explicitly to createKnowledgeDB() overrides the env value,\n * but env is the intended default so callers usually pass nothing.\n */\n\nexport interface KnowledgeConfig {\n /** Postgres connection string. Defaults to env KNOWLEDGE_DB_URL. */\n connectionString?: string;\n /** OpenAI API key. Defaults to env OPENAI_API_KEY. */\n openaiApiKey?: string;\n /** Embedding model. Defaults to env KNOWLEDGE_EMBED_MODEL or text-embedding-3-small. */\n embeddingModel?: string;\n /** Default client scope for this repo. Defaults to env KNOWLEDGE_CLIENT_ID. */\n clientId?: string;\n /** Default project scope for this repo. Defaults to env KNOWLEDGE_PROJECT_ID. */\n projectId?: string;\n}\n\nexport interface ResolvedConfig {\n connectionString: string;\n openaiApiKey: string;\n embeddingModel: string;\n embeddingDimensions: number;\n clientId?: string;\n projectId?: string;\n}\n\n/** text-embedding-3-small → 1536, text-embedding-3-large → 3072. */\nconst MODEL_DIMENSIONS: Record<string, number> = {\n \"text-embedding-3-small\": 1536,\n \"text-embedding-3-large\": 3072,\n \"text-embedding-ada-002\": 1536,\n};\n\nconst env = (key: string): string | undefined => {\n const v = process.env[key];\n return v && v.trim() !== \"\" ? v.trim() : undefined;\n};\n\nexport function resolveConfig(config: KnowledgeConfig = {}): ResolvedConfig {\n const connectionString = config.connectionString ?? env(\"KNOWLEDGE_DB_URL\");\n if (!connectionString) {\n throw new Error(\n \"[ai-knowledge-db] Missing connection string. Set KNOWLEDGE_DB_URL in your project's .env \" +\n \"or pass { connectionString } to createKnowledgeDB().\",\n );\n }\n\n const openaiApiKey = config.openaiApiKey ?? env(\"OPENAI_API_KEY\");\n if (!openaiApiKey) {\n throw new Error(\n \"[ai-knowledge-db] Missing OpenAI key. Set OPENAI_API_KEY in your project's .env \" +\n \"or pass { openaiApiKey } to createKnowledgeDB().\",\n );\n }\n\n const embeddingModel =\n config.embeddingModel ?? env(\"KNOWLEDGE_EMBED_MODEL\") ?? \"text-embedding-3-small\";\n const embeddingDimensions = MODEL_DIMENSIONS[embeddingModel] ?? 1536;\n\n return {\n connectionString,\n openaiApiKey,\n embeddingModel,\n embeddingDimensions,\n clientId: config.clientId ?? env(\"KNOWLEDGE_CLIENT_ID\"),\n projectId: config.projectId ?? env(\"KNOWLEDGE_PROJECT_ID\"),\n };\n}\n","import OpenAI from \"openai\";\nimport type { ResolvedConfig } from \"./config.js\";\nimport type { ChunkOptions } from \"./types.js\";\n\nexport class Embedder {\n private client: OpenAI;\n private model: string;\n\n constructor(config: ResolvedConfig) {\n this.client = new OpenAI({ apiKey: config.openaiApiKey });\n this.model = config.embeddingModel;\n }\n\n /** Embed a batch of strings in one API call. */\n async embed(texts: string[]): Promise<number[][]> {\n if (texts.length === 0) return [];\n const res = await this.client.embeddings.create({\n model: this.model,\n input: texts,\n });\n // OpenAI preserves input order in the response.\n return res.data\n .sort((a, b) => a.index - b.index)\n .map((d) => d.embedding as number[]);\n }\n\n async embedOne(text: string): Promise<number[]> {\n const [vec] = await this.embed([text]);\n return vec;\n }\n}\n\n/**\n * Split text into overlapping chunks. Prefers paragraph boundaries, then\n * sentence boundaries, falling back to hard character cuts for very long runs.\n */\nexport function chunkText(text: string, opts: ChunkOptions = {}): string[] {\n const maxChars = opts.maxChars ?? 1200;\n const overlap = opts.overlap ?? 150;\n const clean = text.replace(/\\r\\n/g, \"\\n\").trim();\n if (clean.length <= maxChars) return clean ? [clean] : [];\n\n // Split into paragraph-ish units first.\n const units = clean.split(/\\n{2,}/).flatMap((p) => splitLongUnit(p, maxChars));\n\n const chunks: string[] = [];\n let current = \"\";\n for (const unit of units) {\n if (current && current.length + unit.length + 2 > maxChars) {\n chunks.push(current.trim());\n // carry overlap from the tail of the previous chunk\n current = overlap > 0 ? current.slice(-overlap) + \"\\n\\n\" + unit : unit;\n } else {\n current = current ? current + \"\\n\\n\" + unit : unit;\n }\n }\n if (current.trim()) chunks.push(current.trim());\n return chunks;\n}\n\n/** Break a single oversized paragraph on sentence, then hard, boundaries. */\nfunction splitLongUnit(unit: string, maxChars: number): string[] {\n if (unit.length <= maxChars) return [unit];\n const sentences = unit.match(/[^.!?\\n]+[.!?]?\\s*/g) ?? [unit];\n const out: string[] = [];\n let buf = \"\";\n for (const s of sentences) {\n if (s.length > maxChars) {\n if (buf) {\n out.push(buf);\n buf = \"\";\n }\n for (let i = 0; i < s.length; i += maxChars) out.push(s.slice(i, i + maxChars));\n } else if (buf.length + s.length > maxChars) {\n out.push(buf);\n buf = s;\n } else {\n buf += s;\n }\n }\n if (buf) out.push(buf);\n return out;\n}\n","/**\n * Canonical schema (Postgres + pgvector), kept as a string so the library never\n * has to read from disk — works identically in the ESM and CJS builds. The\n * build also writes this out to dist/schema.sql for the `./schema.sql` export\n * and for running by hand. Vector size matches text-embedding-3-small (1536).\n */\nexport const SCHEMA_SQL = `-- AI Knowledge DB schema (Postgres + pgvector)\n-- Run once against your Hetzner/EasyPanel Postgres instance, or via kb.init().\n-- If you switch to text-embedding-3-large, change 1536 -> 3072 and re-index.\n\ncreate extension if not exists vector;\ncreate extension if not exists \"pgcrypto\"; -- for gen_random_uuid()\n\ncreate table if not exists knowledge (\n id uuid primary key default gen_random_uuid(),\n scope text not null check (scope in ('global', 'client', 'project')),\n client_id text,\n project_id text,\n source text,\n content text not null,\n embedding vector(1536) not null,\n metadata jsonb not null default '{}',\n created_at timestamptz not null default now()\n);\n\ncreate index if not exists knowledge_client_idx on knowledge (client_id);\ncreate index if not exists knowledge_project_idx on knowledge (project_id);\ncreate index if not exists knowledge_scope_idx on knowledge (scope);\ncreate index if not exists knowledge_metadata_idx on knowledge using gin (metadata);\n\ncreate index if not exists knowledge_embedding_idx\n on knowledge using hnsw (embedding vector_cosine_ops);\n`;\n"],"mappings":";AAAA,OAAO,QAAQ;;;ACqCf,IAAM,mBAA2C;AAAA,EAC/C,0BAA0B;AAAA,EAC1B,0BAA0B;AAAA,EAC1B,0BAA0B;AAC5B;AAEA,IAAM,MAAM,CAAC,QAAoC;AAC/C,QAAM,IAAI,QAAQ,IAAI,GAAG;AACzB,SAAO,KAAK,EAAE,KAAK,MAAM,KAAK,EAAE,KAAK,IAAI;AAC3C;AAEO,SAAS,cAAc,SAA0B,CAAC,GAAmB;AAC1E,QAAM,mBAAmB,OAAO,oBAAoB,IAAI,kBAAkB;AAC1E,MAAI,CAAC,kBAAkB;AACrB,UAAM,IAAI;AAAA,MACR;AAAA,IAEF;AAAA,EACF;AAEA,QAAM,eAAe,OAAO,gBAAgB,IAAI,gBAAgB;AAChE,MAAI,CAAC,cAAc;AACjB,UAAM,IAAI;AAAA,MACR;AAAA,IAEF;AAAA,EACF;AAEA,QAAM,iBACJ,OAAO,kBAAkB,IAAI,uBAAuB,KAAK;AAC3D,QAAM,sBAAsB,iBAAiB,cAAc,KAAK;AAEhE,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,UAAU,OAAO,YAAY,IAAI,qBAAqB;AAAA,IACtD,WAAW,OAAO,aAAa,IAAI,sBAAsB;AAAA,EAC3D;AACF;;;AC7EA,OAAO,YAAY;AAIZ,IAAM,WAAN,MAAe;AAAA,EACZ;AAAA,EACA;AAAA,EAER,YAAY,QAAwB;AAClC,SAAK,SAAS,IAAI,OAAO,EAAE,QAAQ,OAAO,aAAa,CAAC;AACxD,SAAK,QAAQ,OAAO;AAAA,EACtB;AAAA;AAAA,EAGA,MAAM,MAAM,OAAsC;AAChD,QAAI,MAAM,WAAW,EAAG,QAAO,CAAC;AAChC,UAAM,MAAM,MAAM,KAAK,OAAO,WAAW,OAAO;AAAA,MAC9C,OAAO,KAAK;AAAA,MACZ,OAAO;AAAA,IACT,CAAC;AAED,WAAO,IAAI,KACR,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK,EAChC,IAAI,CAAC,MAAM,EAAE,SAAqB;AAAA,EACvC;AAAA,EAEA,MAAM,SAAS,MAAiC;AAC9C,UAAM,CAAC,GAAG,IAAI,MAAM,KAAK,MAAM,CAAC,IAAI,CAAC;AACrC,WAAO;AAAA,EACT;AACF;AAMO,SAAS,UAAU,MAAc,OAAqB,CAAC,GAAa;AACzE,QAAM,WAAW,KAAK,YAAY;AAClC,QAAM,UAAU,KAAK,WAAW;AAChC,QAAM,QAAQ,KAAK,QAAQ,SAAS,IAAI,EAAE,KAAK;AAC/C,MAAI,MAAM,UAAU,SAAU,QAAO,QAAQ,CAAC,KAAK,IAAI,CAAC;AAGxD,QAAM,QAAQ,MAAM,MAAM,QAAQ,EAAE,QAAQ,CAAC,MAAM,cAAc,GAAG,QAAQ,CAAC;AAE7E,QAAM,SAAmB,CAAC;AAC1B,MAAI,UAAU;AACd,aAAW,QAAQ,OAAO;AACxB,QAAI,WAAW,QAAQ,SAAS,KAAK,SAAS,IAAI,UAAU;AAC1D,aAAO,KAAK,QAAQ,KAAK,CAAC;AAE1B,gBAAU,UAAU,IAAI,QAAQ,MAAM,CAAC,OAAO,IAAI,SAAS,OAAO;AAAA,IACpE,OAAO;AACL,gBAAU,UAAU,UAAU,SAAS,OAAO;AAAA,IAChD;AAAA,EACF;AACA,MAAI,QAAQ,KAAK,EAAG,QAAO,KAAK,QAAQ,KAAK,CAAC;AAC9C,SAAO;AACT;AAGA,SAAS,cAAc,MAAc,UAA4B;AAC/D,MAAI,KAAK,UAAU,SAAU,QAAO,CAAC,IAAI;AACzC,QAAM,YAAY,KAAK,MAAM,qBAAqB,KAAK,CAAC,IAAI;AAC5D,QAAM,MAAgB,CAAC;AACvB,MAAI,MAAM;AACV,aAAW,KAAK,WAAW;AACzB,QAAI,EAAE,SAAS,UAAU;AACvB,UAAI,KAAK;AACP,YAAI,KAAK,GAAG;AACZ,cAAM;AAAA,MACR;AACA,eAAS,IAAI,GAAG,IAAI,EAAE,QAAQ,KAAK,SAAU,KAAI,KAAK,EAAE,MAAM,GAAG,IAAI,QAAQ,CAAC;AAAA,IAChF,WAAW,IAAI,SAAS,EAAE,SAAS,UAAU;AAC3C,UAAI,KAAK,GAAG;AACZ,YAAM;AAAA,IACR,OAAO;AACL,aAAO;AAAA,IACT;AAAA,EACF;AACA,MAAI,IAAK,KAAI,KAAK,GAAG;AACrB,SAAO;AACT;;;AC5EO,IAAM,aAAa;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;AHO1B,IAAM,EAAE,KAAK,IAAI;AAEV,IAAM,cAAN,MAAkB;AAAA,EACf;AAAA,EACA;AAAA,EACC;AAAA,EAET,YAAY,SAA0B,CAAC,GAAG;AACxC,SAAK,SAAS,cAAc,MAAM;AAClC,SAAK,OAAO,IAAI,KAAK,EAAE,kBAAkB,KAAK,OAAO,iBAAiB,CAAC;AACvE,SAAK,WAAW,IAAI,SAAS,KAAK,MAAM;AAAA,EAC1C;AAAA;AAAA,EAGA,MAAM,OAAsB;AAC1B,UAAM,KAAK,KAAK,MAAM,UAAU;AAAA,EAClC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,IAAI,OAAoC;AAC5C,UAAM,WAAW,MAAM,YAAY,KAAK,OAAO,YAAY;AAC3D,UAAM,YAAY,MAAM,aAAa,KAAK,OAAO,aAAa;AAC9D,UAAM,QAAQ,MAAM,SAAS,aAAa,UAAU,SAAS;AAC7D,UAAM,SAAS,MAAM,UAAU;AAC/B,UAAM,WAAW,MAAM,YAAY,CAAC;AAEpC,UAAM,SAAS,UAAU,MAAM,SAAS,MAAM,QAAQ;AACtD,QAAI,OAAO,WAAW,EAAG,QAAO,CAAC;AAEjC,UAAM,UAAU,MAAM,KAAK,SAAS,MAAM,MAAM;AAChD,UAAM,MAAgB,CAAC;AAEvB,UAAM,SAAS,MAAM,KAAK,KAAK,QAAQ;AACvC,QAAI;AACF,YAAM,OAAO,MAAM,OAAO;AAC1B,eAAS,IAAI,GAAG,IAAI,OAAO,QAAQ,KAAK;AACtC,cAAM,YACJ,OAAO,SAAS,IACZ,EAAE,GAAG,UAAU,OAAO,GAAG,QAAQ,OAAO,OAAO,IAC/C;AACN,cAAM,EAAE,KAAK,IAAI,MAAM,OAAO;AAAA,UAC5B;AAAA;AAAA,UAEA,CAAC,OAAO,UAAU,WAAW,QAAQ,OAAO,CAAC,GAAG,SAAS,QAAQ,CAAC,CAAC,GAAG,SAAS;AAAA,QACjF;AACA,YAAI,KAAK,KAAK,CAAC,EAAE,EAAE;AAAA,MACrB;AACA,YAAM,OAAO,MAAM,QAAQ;AAAA,IAC7B,SAAS,KAAK;AACZ,YAAM,OAAO,MAAM,UAAU;AAC7B,YAAM;AAAA,IACR,UAAE;AACA,aAAO,QAAQ;AAAA,IACjB;AACA,WAAO;AAAA,EACT;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,aAAa,OAAyD;AAC1E,UAAM,WAAW,MAAM,YAAY,KAAK,OAAO,YAAY;AAC3D,UAAM,YAAY,MAAM,aAAa,KAAK,OAAO,aAAa;AAC9D,UAAM,QAAQ,MAAM,SAAS,aAAa,UAAU,SAAS;AAC7D,UAAM,KAAK,OAAO,EAAE,OAAO,UAAU,YAAY,QAAW,WAAW,aAAa,QAAW,QAAQ,MAAM,OAAO,CAAC;AACrH,WAAO,KAAK,IAAI,KAAK;AAAA,EACvB;AAAA;AAAA,EAGA,MAAM,OAAO,OAAe,OAAsB,CAAC,GAA4B;AAC7E,UAAM,WAAW,KAAK,YAAY,KAAK,OAAO;AAC9C,UAAM,YAAY,KAAK,aAAa,KAAK,OAAO;AAChD,UAAM,gBAAgB,KAAK,0BAA0B;AACrD,UAAM,gBAAgB,KAAK,iBAAiB;AAC5C,UAAM,QAAQ,KAAK,SAAS;AAC5B,UAAM,WAAW,KAAK,YAAY;AAElC,UAAM,WAAW,SAAS,MAAM,KAAK,SAAS,SAAS,KAAK,CAAC;AAI7D,UAAM,YAAsB,CAAC;AAC7B,UAAM,SAAoB,CAAC,QAAQ;AACnC,UAAM,IAAI,CAAC,MAAe,IAAI,OAAO,KAAK,CAAC,CAAC;AAE5C,QAAI,aAAa,OAAO,KAAK,QAAQ,SAAS,GAAG;AAC/C,gBAAU,KAAK,uCAAuC,EAAE,SAAS,CAAC,GAAG;AAAA,IACvE;AACA,QAAI,YAAY,iBAAiB,OAAO,KAAK,QAAQ,QAAQ,GAAG;AAC9D,gBAAU,KAAK,qCAAqC,EAAE,QAAQ,CAAC,GAAG;AAAA,IACpE;AACA,QAAI,iBAAiB,OAAO,KAAK,QAAQ,QAAQ,GAAG;AAClD,gBAAU,KAAK,kBAAkB;AAAA,IACnC;AAEA,UAAM,cAAc,UAAU,SAAS,IAAI,UAAU,KAAK,MAAM,CAAC,MAAM;AAEvE,UAAM,QAAkB,CAAC,WAAW;AACpC,QAAI,KAAK,UAAU;AACjB,YAAM,KAAK,eAAe,EAAE,KAAK,UAAU,KAAK,QAAQ,CAAC,CAAC,SAAS;AAAA,IACrE;AAEA,UAAM,EAAE,KAAK,IAAI,MAAM,KAAK,KAAK;AAAA,MAC/B;AAAA;AAAA;AAAA,eAGS,MAAM,KAAK,OAAO,CAAC;AAAA;AAAA,eAEnB,EAAE,KAAK,CAAC;AAAA,MACjB;AAAA,IACF;AAEA,WAAO,KACJ,IAAI,WAAW,EACf,OAAO,CAAC,MAAM,EAAE,SAAS,QAAQ;AAAA,EACtC;AAAA;AAAA,EAGA,MAAM,OAAO,QAAuC;AAClD,UAAM,QAAkB,CAAC;AACzB,UAAM,SAAoB,CAAC;AAC3B,UAAM,IAAI,CAAC,MAAe,IAAI,OAAO,KAAK,CAAC,CAAC;AAC5C,QAAI,OAAO,GAAI,OAAM,KAAK,QAAQ,EAAE,OAAO,EAAE,CAAC,EAAE;AAChD,QAAI,OAAO,MAAO,OAAM,KAAK,WAAW,EAAE,OAAO,KAAK,CAAC,EAAE;AACzD,QAAI,OAAO,SAAU,OAAM,KAAK,eAAe,EAAE,OAAO,QAAQ,CAAC,EAAE;AACnE,QAAI,OAAO,UAAW,OAAM,KAAK,gBAAgB,EAAE,OAAO,SAAS,CAAC,EAAE;AACtE,QAAI,OAAO,OAAQ,OAAM,KAAK,YAAY,EAAE,OAAO,MAAM,CAAC,EAAE;AAC5D,QAAI,MAAM,WAAW,GAAG;AACtB,YAAM,IAAI,MAAM,oFAAoF;AAAA,IACtG;AACA,UAAM,EAAE,SAAS,IAAI,MAAM,KAAK,KAAK;AAAA,MACnC,+BAA+B,MAAM,KAAK,OAAO,CAAC;AAAA,MAClD;AAAA,IACF;AACA,WAAO,YAAY;AAAA,EACrB;AAAA;AAAA,EAGA,MAAM,QAAuB;AAC3B,UAAM,KAAK,KAAK,IAAI;AAAA,EACtB;AACF;AAEO,SAAS,kBAAkB,QAAuC;AACvE,SAAO,IAAI,YAAY,MAAM;AAC/B;AAEA,SAAS,aAAa,UAAyB,WAAiC;AAC9E,MAAI,UAAW,QAAO;AACtB,MAAI,SAAU,QAAO;AACrB,SAAO;AACT;AAEA,SAAS,OAAO,QAA6B,OAAuB;AAClE,SAAO,CAAC,UAAU,OAAO,SAAS,KAAK;AACzC;AAGA,SAAS,SAAS,KAAuB;AACvC,SAAO,IAAI,IAAI,KAAK,GAAG,CAAC;AAC1B;AAEA,SAAS,YAAY,KAAwB;AAC3C,SAAO,EAAE,GAAG,YAAY,GAAG,GAAG,OAAO,OAAO,IAAI,KAAK,EAAE;AACzD;AAEA,SAAS,YAAY,KAA2B;AAC9C,SAAO;AAAA,IACL,IAAI,IAAI;AAAA,IACR,OAAO,IAAI;AAAA,IACX,UAAU,IAAI;AAAA,IACd,WAAW,IAAI;AAAA,IACf,QAAQ,IAAI;AAAA,IACZ,SAAS,IAAI;AAAA,IACb,UAAU,IAAI,YAAY,CAAC;AAAA,IAC3B,WAAW,IAAI,sBAAsB,OAAO,IAAI,WAAW,YAAY,IAAI,IAAI;AAAA,EACjF;AACF;","names":[]}
@@ -0,0 +1,30 @@
1
+ -- AI Knowledge DB schema (Postgres + pgvector)
2
+ -- Run once against your Hetzner/EasyPanel Postgres instance, or via kb.init().
3
+ -- The vector dimension below matches text-embedding-3-small (1536).
4
+ -- If you switch to text-embedding-3-large, change 1536 -> 3072 and re-index.
5
+
6
+ create extension if not exists vector;
7
+ create extension if not exists "pgcrypto"; -- for gen_random_uuid()
8
+
9
+ create table if not exists knowledge (
10
+ id uuid primary key default gen_random_uuid(),
11
+ scope text not null check (scope in ('global', 'client', 'project')),
12
+ client_id text,
13
+ project_id text,
14
+ source text,
15
+ content text not null,
16
+ embedding vector(1536) not null,
17
+ metadata jsonb not null default '{}',
18
+ created_at timestamptz not null default now()
19
+ );
20
+
21
+ -- Scope filtering happens on every search, so index the columns we filter by.
22
+ create index if not exists knowledge_client_idx on knowledge (client_id);
23
+ create index if not exists knowledge_project_idx on knowledge (project_id);
24
+ create index if not exists knowledge_scope_idx on knowledge (scope);
25
+ create index if not exists knowledge_metadata_idx on knowledge using gin (metadata);
26
+
27
+ -- Approximate nearest-neighbour index for fast cosine search.
28
+ -- HNSW gives good recall/speed without tuning lists like IVFFlat.
29
+ create index if not exists knowledge_embedding_idx
30
+ on knowledge using hnsw (embedding vector_cosine_ops);
package/package.json ADDED
@@ -0,0 +1,63 @@
1
+ {
2
+ "name": "@digitalvibes/ai-knowledge-db",
3
+ "version": "0.1.0",
4
+ "description": "AI knowledge vector storage (Postgres + pgvector) for Dibe website/project work. Stores client- and project-scoped knowledge and serves it back via semantic search.",
5
+ "type": "module",
6
+ "license": "MIT",
7
+ "packageManager": "pnpm@9.15.2",
8
+ "publishConfig": {
9
+ "access": "public"
10
+ },
11
+ "sideEffects": false,
12
+ "main": "./dist/index.cjs",
13
+ "module": "./dist/index.js",
14
+ "types": "./dist/index.d.ts",
15
+ "exports": {
16
+ ".": {
17
+ "types": "./dist/index.d.ts",
18
+ "import": "./dist/index.js",
19
+ "require": "./dist/index.cjs",
20
+ "default": "./dist/index.js"
21
+ },
22
+ "./schema.sql": "./migrations/001_init_knowledge.sql"
23
+ },
24
+ "files": [
25
+ "dist",
26
+ "migrations",
27
+ "skills",
28
+ "README.md"
29
+ ],
30
+ "bin": {
31
+ "knowledge-db": "./dist/cli.js"
32
+ },
33
+ "scripts": {
34
+ "build": "tsup",
35
+ "dev": "tsup --watch",
36
+ "typecheck": "tsc --noEmit",
37
+ "prepack": "pnpm build",
38
+ "prepublishOnly": "pnpm typecheck && pnpm build"
39
+ },
40
+ "keywords": [
41
+ "vector",
42
+ "embeddings",
43
+ "pgvector",
44
+ "rag",
45
+ "knowledge-base",
46
+ "openai",
47
+ "ai"
48
+ ],
49
+ "author": "Dibe",
50
+ "dependencies": {
51
+ "openai": "^4.77.0",
52
+ "pg": "^8.13.1"
53
+ },
54
+ "devDependencies": {
55
+ "@types/node": "^22.10.0",
56
+ "@types/pg": "^8.11.10",
57
+ "tsup": "^8.3.5",
58
+ "typescript": "^5.7.2"
59
+ },
60
+ "engines": {
61
+ "node": ">=18"
62
+ }
63
+ }
@@ -0,0 +1,87 @@
1
+ ---
2
+ name: knowledge-db
3
+ description: Store and retrieve client- and project-scoped knowledge from the Dibe AI Knowledge vector DB (Postgres + pgvector). Use whenever you need durable project/client context — brand voice, decisions, specs, past work — or when the user says "remember this", "save to knowledge", "what do we know about X", "look up the client's …", or is building a website/app that should pull from stored knowledge.
4
+ ---
5
+
6
+ # Dibe AI Knowledge DB
7
+
8
+ A small vector store (`@digitalvibes/ai-knowledge-db`) that holds **client**, **project**, and **global** knowledge and serves it back via semantic search. Use it to persist context that should survive across sessions and to retrieve relevant context before doing work.
9
+
10
+ ## When to use this
11
+
12
+ - **Retrieve** before building: pull brand voice, design tokens, prior decisions, client facts before writing copy/components.
13
+ - **Add** durable facts: "remember that the client prefers X", project specs, naming conventions, completed-work notes.
14
+ - **Update** a source that changed: re-ingest a file/URL idempotently (no duplicates).
15
+
16
+ Do NOT use it for throwaway chat context or secrets/credentials.
17
+
18
+ ## Setup (once per consuming project)
19
+
20
+ The package reads everything from the project's own `.env` — nothing is hardcoded:
21
+
22
+ ```
23
+ KNOWLEDGE_DB_URL=postgres://… # Hetzner/EasyPanel pgvector instance
24
+ OPENAI_API_KEY=sk-…
25
+ KNOWLEDGE_CLIENT_ID=acme-corp # default client for this repo
26
+ KNOWLEDGE_PROJECT_ID=acme-website # default project for this repo
27
+ ```
28
+
29
+ Initialise the schema once: `npx knowledge-db init`
30
+
31
+ ## Scopes (the mental model)
32
+
33
+ | scope | meaning | example |
34
+ |-------|---------|---------|
35
+ | `global` | applies to every client/project | Dibe's own conventions, reusable patterns |
36
+ | `client` | shared across all of one client's projects | brand voice, logo rules, tone of voice |
37
+ | `project` | specific to one project | this site's sitemap, copy decisions, specs |
38
+
39
+ A search for a project automatically also pulls that **client's** shared knowledge and **global** knowledge, ranked together by relevance.
40
+
41
+ ## CLI (preferred for quick ops)
42
+
43
+ ```bash
44
+ # retrieve — scoping comes from env defaults unless overridden
45
+ npx knowledge-db search "what is the brand voice?"
46
+ npx knowledge-db search "homepage hero copy" --project acme-website --json
47
+
48
+ # add a fact (scope auto-derives: project if KNOWLEDGE_PROJECT_ID set, else client, else global)
49
+ npx knowledge-db add "Client prefers sentence case for all headings." --scope client
50
+ npx knowledge-db add "Hero CTA finalised as 'Start free'." --source decisions.md
51
+
52
+ # add / update a file or doc idempotently (re-running replaces prior rows for that source)
53
+ npx knowledge-db add-file ./brand/voice.md --scope client --source brand/voice.md
54
+
55
+ # remove
56
+ npx knowledge-db delete --source brand/voice.md
57
+ npx knowledge-db delete --id <uuid>
58
+ ```
59
+
60
+ `--json` on `search` returns structured results (id, scope, source, score, content) — use it when you need to feed results back into reasoning.
61
+
62
+ ## Library (for app/server code)
63
+
64
+ ```ts
65
+ import { createKnowledgeDB } from "@digitalvibes/ai-knowledge-db";
66
+
67
+ const kb = createKnowledgeDB(); // reads .env
68
+
69
+ // retrieve relevant context before generating
70
+ const hits = await kb.search("brand voice and tone", { limit: 5 });
71
+ const context = hits.map((h) => h.content).join("\n\n");
72
+
73
+ // store a durable fact
74
+ await kb.add({ content: "Client approved the green palette.", scope: "client" });
75
+
76
+ // idempotently re-ingest a doc
77
+ await kb.upsertSource({ content: fileText, source: "specs/sitemap.md" });
78
+
79
+ await kb.close();
80
+ ```
81
+
82
+ ## Workflow guidance for the agent
83
+
84
+ 1. **Before** building website copy/components or answering a "what do we know" question, run a `search` and ground your output in the results. Cite the `source` when relevant.
85
+ 2. **After** the user states a durable preference/decision ("remember…", "from now on…", "the client wants…"), `add` it with the right scope. Prefer `client` scope for anything reusable across the client's projects; `project` for this build only.
86
+ 3. When re-ingesting a known file/URL, use `add-file` / `upsertSource` (not `add`) so you don't create duplicates.
87
+ 4. If a search returns nothing useful, say so rather than inventing context.