@digitalvibes/ai-knowledge-db 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +26 -5
- package/dist/cli.cjs +199 -60
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +199 -60
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +153 -55
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +56 -10
- package/dist/index.d.ts +56 -10
- package/dist/index.js +153 -55
- package/dist/index.js.map +1 -1
- package/migrations/001_init_knowledge.sql +23 -17
- package/migrations/002_add_history.sql +13 -0
- package/package.json +1 -1
- package/skills/knowledge-db/SKILL.md +32 -6
package/dist/index.d.ts
CHANGED
|
@@ -39,10 +39,15 @@ interface KnowledgeRecord {
|
|
|
39
39
|
scope: Scope;
|
|
40
40
|
clientId: string | null;
|
|
41
41
|
projectId: string | null;
|
|
42
|
+
/** Stable identity for a versioned fact (null for free-form `add` entries). */
|
|
43
|
+
key: string | null;
|
|
42
44
|
source: string | null;
|
|
43
45
|
content: string;
|
|
44
46
|
metadata: Record<string, unknown>;
|
|
47
|
+
/** Valid-from timestamp. */
|
|
45
48
|
createdAt: string;
|
|
49
|
+
/** Valid-to timestamp; null = this is the current/active version. */
|
|
50
|
+
supersededAt: string | null;
|
|
46
51
|
}
|
|
47
52
|
interface AddInput {
|
|
48
53
|
/** The text to store. Will be chunked + embedded automatically. */
|
|
@@ -60,6 +65,15 @@ interface AddInput {
|
|
|
60
65
|
/** Override default chunking for this call. */
|
|
61
66
|
chunking?: ChunkOptions;
|
|
62
67
|
}
|
|
68
|
+
/**
|
|
69
|
+
* A versioned write. Writing a `key` that already exists supersedes the prior
|
|
70
|
+
* active version(s) (kept as history) and makes this the current one — so e.g.
|
|
71
|
+
* updated pricing always wins in search without losing the old value.
|
|
72
|
+
*/
|
|
73
|
+
interface PutInput extends AddInput {
|
|
74
|
+
/** Stable identity for this fact, e.g. 'pricing.basic-plan' or 'office.hours'. */
|
|
75
|
+
key: string;
|
|
76
|
+
}
|
|
63
77
|
interface ChunkOptions {
|
|
64
78
|
/** Target chunk size in characters. Default 1200. */
|
|
65
79
|
maxChars?: number;
|
|
@@ -82,6 +96,10 @@ interface SearchOptions {
|
|
|
82
96
|
minScore?: number;
|
|
83
97
|
/** Extra equality filters against the metadata jsonb column. */
|
|
84
98
|
metadata?: Record<string, unknown>;
|
|
99
|
+
/** Include superseded (historical) versions too. Default false → current only. */
|
|
100
|
+
includeHistory?: boolean;
|
|
101
|
+
/** Point-in-time search: return the versions that were current at this instant. Overrides includeHistory. */
|
|
102
|
+
asOf?: string | Date;
|
|
85
103
|
}
|
|
86
104
|
interface SearchResult extends KnowledgeRecord {
|
|
87
105
|
/** Cosine similarity, 0–1 (higher = closer). */
|
|
@@ -93,6 +111,14 @@ interface DeleteFilter {
|
|
|
93
111
|
clientId?: string;
|
|
94
112
|
projectId?: string;
|
|
95
113
|
source?: string;
|
|
114
|
+
key?: string;
|
|
115
|
+
}
|
|
116
|
+
interface HistoryOptions {
|
|
117
|
+
scope?: Scope;
|
|
118
|
+
clientId?: string;
|
|
119
|
+
projectId?: string;
|
|
120
|
+
/** Include superseded versions. Default true (history is the point). */
|
|
121
|
+
includeHistory?: boolean;
|
|
96
122
|
}
|
|
97
123
|
|
|
98
124
|
declare class KnowledgeDB {
|
|
@@ -100,28 +126,48 @@ declare class KnowledgeDB {
|
|
|
100
126
|
private embedder;
|
|
101
127
|
readonly config: ResolvedConfig;
|
|
102
128
|
constructor(config?: KnowledgeConfig);
|
|
103
|
-
/** Create the extension, table, and indexes
|
|
129
|
+
/** Create/upgrade the extension, table, and indexes. Safe to call repeatedly. */
|
|
104
130
|
init(): Promise<void>;
|
|
105
131
|
/**
|
|
106
|
-
*
|
|
107
|
-
*
|
|
108
|
-
*
|
|
132
|
+
* Append free-form knowledge (no version identity). Use for notes that
|
|
133
|
+
* accumulate. For facts that change over time (pricing, hours, contact), use
|
|
134
|
+
* put() or upsertSource() so newer versions supersede older ones.
|
|
109
135
|
*/
|
|
110
136
|
add(input: AddInput): Promise<string[]>;
|
|
111
137
|
/**
|
|
112
|
-
*
|
|
113
|
-
*
|
|
114
|
-
*
|
|
138
|
+
* Versioned write keyed by `key`. Any existing *active* rows with the same
|
|
139
|
+
* key (+ scope/client/project) are stamped superseded_at = now() and kept as
|
|
140
|
+
* history; the new content becomes the current version. So updated pricing
|
|
141
|
+
* wins in search while the old value remains for audit / point-in-time.
|
|
142
|
+
*/
|
|
143
|
+
put(input: PutInput): Promise<string[]>;
|
|
144
|
+
/**
|
|
145
|
+
* Re-ingest a document by `source`, superseding (not deleting) the prior
|
|
146
|
+
* active version for that source. Idempotent: re-running keeps history and
|
|
147
|
+
* makes the latest content current.
|
|
115
148
|
*/
|
|
116
149
|
upsertSource(input: AddInput & {
|
|
117
150
|
source: string;
|
|
118
151
|
}): Promise<string[]>;
|
|
119
|
-
/** Semantic search
|
|
152
|
+
/** Semantic search. Returns only current versions unless includeHistory/asOf. */
|
|
120
153
|
search(query: string, opts?: SearchOptions): Promise<SearchResult[]>;
|
|
121
|
-
/**
|
|
154
|
+
/**
|
|
155
|
+
* Return all versions of a `key` (or all rows for a source via metadata),
|
|
156
|
+
* newest first, including superseded ones. Useful for audit / "what did we
|
|
157
|
+
* say before".
|
|
158
|
+
*/
|
|
159
|
+
history(key: string, opts?: HistoryOptions): Promise<KnowledgeRecord[]>;
|
|
160
|
+
/** Hard-delete rows matching a filter (removes history too). Returns count. */
|
|
122
161
|
delete(filter: DeleteFilter): Promise<number>;
|
|
123
162
|
/** Close the connection pool. Call on shutdown. */
|
|
124
163
|
close(): Promise<void>;
|
|
164
|
+
private resolveTarget;
|
|
165
|
+
/** Supersede prior active rows matching column=value, then insert new active rows. */
|
|
166
|
+
private versionedWrite;
|
|
167
|
+
/** Insert chunks on a fresh connection (embeds inside). */
|
|
168
|
+
private insertChunks;
|
|
169
|
+
/** Insert chunk rows on an existing transaction client. */
|
|
170
|
+
private insertChunksTx;
|
|
125
171
|
}
|
|
126
172
|
declare function createKnowledgeDB(config?: KnowledgeConfig): KnowledgeDB;
|
|
127
173
|
|
|
@@ -131,4 +177,4 @@ declare function createKnowledgeDB(config?: KnowledgeConfig): KnowledgeDB;
|
|
|
131
177
|
*/
|
|
132
178
|
declare function chunkText(text: string, opts?: ChunkOptions): string[];
|
|
133
179
|
|
|
134
|
-
export { type AddInput, type ChunkOptions, type DeleteFilter, type KnowledgeConfig, KnowledgeDB, type KnowledgeRecord, type ResolvedConfig, type Scope, type SearchOptions, type SearchResult, chunkText, createKnowledgeDB, resolveConfig };
|
|
180
|
+
export { type AddInput, type ChunkOptions, type DeleteFilter, type HistoryOptions, type KnowledgeConfig, KnowledgeDB, type KnowledgeRecord, type PutInput, type ResolvedConfig, type Scope, type SearchOptions, type SearchResult, chunkText, createKnowledgeDB, resolveConfig };
|
package/dist/index.js
CHANGED
|
@@ -110,20 +110,28 @@ create extension if not exists vector;
|
|
|
110
110
|
create extension if not exists "pgcrypto"; -- for gen_random_uuid()
|
|
111
111
|
|
|
112
112
|
create table if not exists knowledge (
|
|
113
|
-
id
|
|
114
|
-
scope
|
|
115
|
-
client_id
|
|
116
|
-
project_id
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
113
|
+
id uuid primary key default gen_random_uuid(),
|
|
114
|
+
scope text not null check (scope in ('global', 'client', 'project')),
|
|
115
|
+
client_id text,
|
|
116
|
+
project_id text,
|
|
117
|
+
key text, -- stable identity for a versioned fact (e.g. 'pricing.basic-plan')
|
|
118
|
+
source text,
|
|
119
|
+
content text not null,
|
|
120
|
+
embedding vector(1536) not null,
|
|
121
|
+
metadata jsonb not null default '{}',
|
|
122
|
+
created_at timestamptz not null default now(), -- valid from
|
|
123
|
+
superseded_at timestamptz -- valid to; null = current
|
|
122
124
|
);
|
|
123
125
|
|
|
126
|
+
-- Upgrade existing (v0.1) installs in place.
|
|
127
|
+
alter table knowledge add column if not exists key text;
|
|
128
|
+
alter table knowledge add column if not exists superseded_at timestamptz;
|
|
129
|
+
|
|
124
130
|
create index if not exists knowledge_client_idx on knowledge (client_id);
|
|
125
131
|
create index if not exists knowledge_project_idx on knowledge (project_id);
|
|
126
132
|
create index if not exists knowledge_scope_idx on knowledge (scope);
|
|
133
|
+
create index if not exists knowledge_key_idx on knowledge (key);
|
|
134
|
+
create index if not exists knowledge_active_idx on knowledge (superseded_at);
|
|
127
135
|
create index if not exists knowledge_metadata_idx on knowledge using gin (metadata);
|
|
128
136
|
|
|
129
137
|
create index if not exists knowledge_embedding_idx
|
|
@@ -141,59 +149,37 @@ var KnowledgeDB = class {
|
|
|
141
149
|
this.pool = new Pool({ connectionString: this.config.connectionString });
|
|
142
150
|
this.embedder = new Embedder(this.config);
|
|
143
151
|
}
|
|
144
|
-
/** Create the extension, table, and indexes
|
|
152
|
+
/** Create/upgrade the extension, table, and indexes. Safe to call repeatedly. */
|
|
145
153
|
async init() {
|
|
146
154
|
await this.pool.query(SCHEMA_SQL);
|
|
147
155
|
}
|
|
148
156
|
/**
|
|
149
|
-
*
|
|
150
|
-
*
|
|
151
|
-
*
|
|
157
|
+
* Append free-form knowledge (no version identity). Use for notes that
|
|
158
|
+
* accumulate. For facts that change over time (pricing, hours, contact), use
|
|
159
|
+
* put() or upsertSource() so newer versions supersede older ones.
|
|
152
160
|
*/
|
|
153
161
|
async add(input) {
|
|
154
|
-
const
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
await client.query("begin");
|
|
166
|
-
for (let i = 0; i < chunks.length; i++) {
|
|
167
|
-
const chunkMeta = chunks.length > 1 ? { ...metadata, chunk: i, chunks: chunks.length } : metadata;
|
|
168
|
-
const { rows } = await client.query(
|
|
169
|
-
`insert into knowledge (scope, client_id, project_id, source, content, embedding, metadata)
|
|
170
|
-
values ($1, $2, $3, $4, $5, $6, $7) returning id`,
|
|
171
|
-
[scope, clientId, projectId, source, chunks[i], toVector(vectors[i]), chunkMeta]
|
|
172
|
-
);
|
|
173
|
-
ids.push(rows[0].id);
|
|
174
|
-
}
|
|
175
|
-
await client.query("commit");
|
|
176
|
-
} catch (err) {
|
|
177
|
-
await client.query("rollback");
|
|
178
|
-
throw err;
|
|
179
|
-
} finally {
|
|
180
|
-
client.release();
|
|
181
|
-
}
|
|
182
|
-
return ids;
|
|
162
|
+
const t = this.resolveTarget(input);
|
|
163
|
+
return this.insertChunks(this.pool, t, input, null);
|
|
164
|
+
}
|
|
165
|
+
/**
|
|
166
|
+
* Versioned write keyed by `key`. Any existing *active* rows with the same
|
|
167
|
+
* key (+ scope/client/project) are stamped superseded_at = now() and kept as
|
|
168
|
+
* history; the new content becomes the current version. So updated pricing
|
|
169
|
+
* wins in search while the old value remains for audit / point-in-time.
|
|
170
|
+
*/
|
|
171
|
+
async put(input) {
|
|
172
|
+
return this.versionedWrite("key", input.key, input);
|
|
183
173
|
}
|
|
184
174
|
/**
|
|
185
|
-
*
|
|
186
|
-
*
|
|
187
|
-
*
|
|
175
|
+
* Re-ingest a document by `source`, superseding (not deleting) the prior
|
|
176
|
+
* active version for that source. Idempotent: re-running keeps history and
|
|
177
|
+
* makes the latest content current.
|
|
188
178
|
*/
|
|
189
179
|
async upsertSource(input) {
|
|
190
|
-
|
|
191
|
-
const projectId = input.projectId ?? this.config.projectId ?? null;
|
|
192
|
-
const scope = input.scope ?? defaultScope(clientId, projectId);
|
|
193
|
-
await this.delete({ scope, clientId: clientId ?? void 0, projectId: projectId ?? void 0, source: input.source });
|
|
194
|
-
return this.add(input);
|
|
180
|
+
return this.versionedWrite("source", input.source, input);
|
|
195
181
|
}
|
|
196
|
-
/** Semantic search
|
|
182
|
+
/** Semantic search. Returns only current versions unless includeHistory/asOf. */
|
|
197
183
|
async search(query, opts = {}) {
|
|
198
184
|
const clientId = opts.clientId ?? this.config.clientId;
|
|
199
185
|
const projectId = opts.projectId ?? this.config.projectId;
|
|
@@ -202,9 +188,9 @@ var KnowledgeDB = class {
|
|
|
202
188
|
const limit = opts.limit ?? 8;
|
|
203
189
|
const minScore = opts.minScore ?? 0;
|
|
204
190
|
const queryVec = toVector(await this.embedder.embedOne(query));
|
|
205
|
-
const orClauses = [];
|
|
206
191
|
const params = [queryVec];
|
|
207
192
|
const p = (v) => `$${params.push(v)}`;
|
|
193
|
+
const orClauses = [];
|
|
208
194
|
if (projectId && allows(opts.scopes, "project")) {
|
|
209
195
|
orClauses.push(`(scope = 'project' and project_id = ${p(projectId)})`);
|
|
210
196
|
}
|
|
@@ -219,8 +205,16 @@ var KnowledgeDB = class {
|
|
|
219
205
|
if (opts.metadata) {
|
|
220
206
|
where.push(`metadata @> ${p(JSON.stringify(opts.metadata))}::jsonb`);
|
|
221
207
|
}
|
|
208
|
+
if (opts.asOf !== void 0) {
|
|
209
|
+
const at = typeof opts.asOf === "string" ? opts.asOf : opts.asOf.toISOString();
|
|
210
|
+
where.push(
|
|
211
|
+
`created_at <= ${p(at)}::timestamptz and (superseded_at is null or superseded_at > ${p(at)}::timestamptz)`
|
|
212
|
+
);
|
|
213
|
+
} else if (!opts.includeHistory) {
|
|
214
|
+
where.push(`superseded_at is null`);
|
|
215
|
+
}
|
|
222
216
|
const { rows } = await this.pool.query(
|
|
223
|
-
`select id, scope, client_id, project_id, source, content, metadata, created_at,
|
|
217
|
+
`select id, scope, client_id, project_id, key, source, content, metadata, created_at, superseded_at,
|
|
224
218
|
1 - (embedding <=> $1) as score
|
|
225
219
|
from knowledge
|
|
226
220
|
where ${where.join(" and ")}
|
|
@@ -230,7 +224,33 @@ var KnowledgeDB = class {
|
|
|
230
224
|
);
|
|
231
225
|
return rows.map(rowToResult).filter((r) => r.score >= minScore);
|
|
232
226
|
}
|
|
233
|
-
/**
|
|
227
|
+
/**
|
|
228
|
+
* Return all versions of a `key` (or all rows for a source via metadata),
|
|
229
|
+
* newest first, including superseded ones. Useful for audit / "what did we
|
|
230
|
+
* say before".
|
|
231
|
+
*/
|
|
232
|
+
async history(key, opts = {}) {
|
|
233
|
+
const clientId = opts.clientId ?? this.config.clientId ?? null;
|
|
234
|
+
const projectId = opts.projectId ?? this.config.projectId ?? null;
|
|
235
|
+
const scope = opts.scope ?? defaultScope(clientId, projectId);
|
|
236
|
+
const includeHistory = opts.includeHistory ?? true;
|
|
237
|
+
const where = [
|
|
238
|
+
`key = $1`,
|
|
239
|
+
`scope = $2`,
|
|
240
|
+
`client_id is not distinct from $3`,
|
|
241
|
+
`project_id is not distinct from $4`
|
|
242
|
+
];
|
|
243
|
+
if (!includeHistory) where.push(`superseded_at is null`);
|
|
244
|
+
const { rows } = await this.pool.query(
|
|
245
|
+
`select id, scope, client_id, project_id, key, source, content, metadata, created_at, superseded_at
|
|
246
|
+
from knowledge
|
|
247
|
+
where ${where.join(" and ")}
|
|
248
|
+
order by created_at desc`,
|
|
249
|
+
[key, scope, clientId, projectId]
|
|
250
|
+
);
|
|
251
|
+
return rows.map(rowToRecord);
|
|
252
|
+
}
|
|
253
|
+
/** Hard-delete rows matching a filter (removes history too). Returns count. */
|
|
234
254
|
async delete(filter) {
|
|
235
255
|
const where = [];
|
|
236
256
|
const params = [];
|
|
@@ -240,6 +260,7 @@ var KnowledgeDB = class {
|
|
|
240
260
|
if (filter.clientId) where.push(`client_id = ${p(filter.clientId)}`);
|
|
241
261
|
if (filter.projectId) where.push(`project_id = ${p(filter.projectId)}`);
|
|
242
262
|
if (filter.source) where.push(`source = ${p(filter.source)}`);
|
|
263
|
+
if (filter.key) where.push(`key = ${p(filter.key)}`);
|
|
243
264
|
if (where.length === 0) {
|
|
244
265
|
throw new Error("[ai-knowledge-db] delete() requires at least one filter to avoid wiping the table.");
|
|
245
266
|
}
|
|
@@ -253,6 +274,78 @@ var KnowledgeDB = class {
|
|
|
253
274
|
async close() {
|
|
254
275
|
await this.pool.end();
|
|
255
276
|
}
|
|
277
|
+
// --- internals ---------------------------------------------------------
|
|
278
|
+
resolveTarget(input) {
|
|
279
|
+
const clientId = input.clientId ?? this.config.clientId ?? null;
|
|
280
|
+
const projectId = input.projectId ?? this.config.projectId ?? null;
|
|
281
|
+
return {
|
|
282
|
+
clientId,
|
|
283
|
+
projectId,
|
|
284
|
+
scope: input.scope ?? defaultScope(clientId, projectId),
|
|
285
|
+
source: input.source ?? null
|
|
286
|
+
};
|
|
287
|
+
}
|
|
288
|
+
/** Supersede prior active rows matching column=value, then insert new active rows. */
|
|
289
|
+
async versionedWrite(matchColumn, matchValue, input) {
|
|
290
|
+
const t = this.resolveTarget(input);
|
|
291
|
+
const key = matchColumn === "key" ? matchValue : null;
|
|
292
|
+
const chunks = chunkText(input.content, input.chunking);
|
|
293
|
+
if (chunks.length === 0) return [];
|
|
294
|
+
const vectors = await this.embedder.embed(chunks);
|
|
295
|
+
const client = await this.pool.connect();
|
|
296
|
+
try {
|
|
297
|
+
await client.query("begin");
|
|
298
|
+
await client.query(
|
|
299
|
+
`update knowledge set superseded_at = now()
|
|
300
|
+
where ${matchColumn} = $1 and scope = $2
|
|
301
|
+
and client_id is not distinct from $3
|
|
302
|
+
and project_id is not distinct from $4
|
|
303
|
+
and superseded_at is null`,
|
|
304
|
+
[matchValue, t.scope, t.clientId, t.projectId]
|
|
305
|
+
);
|
|
306
|
+
const ids = await this.insertChunksTx(client, t, input, key, chunks, vectors);
|
|
307
|
+
await client.query("commit");
|
|
308
|
+
return ids;
|
|
309
|
+
} catch (err) {
|
|
310
|
+
await client.query("rollback");
|
|
311
|
+
throw err;
|
|
312
|
+
} finally {
|
|
313
|
+
client.release();
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
/** Insert chunks on a fresh connection (embeds inside). */
|
|
317
|
+
async insertChunks(runner, t, input, key) {
|
|
318
|
+
const chunks = chunkText(input.content, input.chunking);
|
|
319
|
+
if (chunks.length === 0) return [];
|
|
320
|
+
const vectors = await this.embedder.embed(chunks);
|
|
321
|
+
const client = await runner.connect();
|
|
322
|
+
try {
|
|
323
|
+
await client.query("begin");
|
|
324
|
+
const ids = await this.insertChunksTx(client, t, input, key, chunks, vectors);
|
|
325
|
+
await client.query("commit");
|
|
326
|
+
return ids;
|
|
327
|
+
} catch (err) {
|
|
328
|
+
await client.query("rollback");
|
|
329
|
+
throw err;
|
|
330
|
+
} finally {
|
|
331
|
+
client.release();
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
/** Insert chunk rows on an existing transaction client. */
|
|
335
|
+
async insertChunksTx(client, t, input, key, chunks, vectors) {
|
|
336
|
+
const metadata = input.metadata ?? {};
|
|
337
|
+
const ids = [];
|
|
338
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
339
|
+
const chunkMeta = chunks.length > 1 ? { ...metadata, chunk: i, chunks: chunks.length } : metadata;
|
|
340
|
+
const { rows } = await client.query(
|
|
341
|
+
`insert into knowledge (scope, client_id, project_id, key, source, content, embedding, metadata)
|
|
342
|
+
values ($1, $2, $3, $4, $5, $6, $7, $8) returning id`,
|
|
343
|
+
[t.scope, t.clientId, t.projectId, key, t.source, chunks[i], toVector(vectors[i]), chunkMeta]
|
|
344
|
+
);
|
|
345
|
+
ids.push(rows[0].id);
|
|
346
|
+
}
|
|
347
|
+
return ids;
|
|
348
|
+
}
|
|
256
349
|
};
|
|
257
350
|
function createKnowledgeDB(config) {
|
|
258
351
|
return new KnowledgeDB(config);
|
|
@@ -277,12 +370,17 @@ function rowToRecord(row) {
|
|
|
277
370
|
scope: row.scope,
|
|
278
371
|
clientId: row.client_id,
|
|
279
372
|
projectId: row.project_id,
|
|
373
|
+
key: row.key ?? null,
|
|
280
374
|
source: row.source,
|
|
281
375
|
content: row.content,
|
|
282
376
|
metadata: row.metadata ?? {},
|
|
283
|
-
createdAt: row.created_at
|
|
377
|
+
createdAt: toIso(row.created_at),
|
|
378
|
+
supersededAt: row.superseded_at ? toIso(row.superseded_at) : null
|
|
284
379
|
};
|
|
285
380
|
}
|
|
381
|
+
function toIso(v) {
|
|
382
|
+
return v instanceof Date ? v.toISOString() : String(v);
|
|
383
|
+
}
|
|
286
384
|
export {
|
|
287
385
|
KnowledgeDB,
|
|
288
386
|
chunkText,
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/client.ts","../src/config.ts","../src/embeddings.ts","../src/schema.ts"],"sourcesContent":["import pg from \"pg\";\nimport { resolveConfig, type KnowledgeConfig, type ResolvedConfig } from \"./config.js\";\nimport { Embedder, chunkText } from \"./embeddings.js\";\nimport { SCHEMA_SQL } from \"./schema.js\";\nimport type {\n AddInput,\n DeleteFilter,\n KnowledgeRecord,\n Scope,\n SearchOptions,\n SearchResult,\n} from \"./types.js\";\n\nconst { Pool } = pg;\n\nexport class KnowledgeDB {\n private pool: pg.Pool;\n private embedder: Embedder;\n readonly config: ResolvedConfig;\n\n constructor(config: KnowledgeConfig = {}) {\n this.config = resolveConfig(config);\n this.pool = new Pool({ connectionString: this.config.connectionString });\n this.embedder = new Embedder(this.config);\n }\n\n /** Create the extension, table, and indexes if they don't exist. Safe to call repeatedly. */\n async init(): Promise<void> {\n await this.pool.query(SCHEMA_SQL);\n }\n\n /**\n * Add knowledge. The content is chunked, embedded, and stored. Returns the\n * ids of the stored rows (one per chunk). Scope/client/project fall back to\n * the env-configured defaults.\n */\n async add(input: AddInput): Promise<string[]> {\n const clientId = input.clientId ?? this.config.clientId ?? null;\n const projectId = input.projectId ?? this.config.projectId ?? null;\n const scope = input.scope ?? defaultScope(clientId, projectId);\n const source = input.source ?? null;\n const metadata = input.metadata ?? {};\n\n const chunks = chunkText(input.content, input.chunking);\n if (chunks.length === 0) return [];\n\n const vectors = await this.embedder.embed(chunks);\n const ids: string[] = [];\n\n const client = await this.pool.connect();\n try {\n await client.query(\"begin\");\n for (let i = 0; i < chunks.length; i++) {\n const chunkMeta =\n chunks.length > 1\n ? { ...metadata, chunk: i, chunks: chunks.length }\n : metadata;\n const { rows } = await client.query(\n `insert into knowledge (scope, client_id, project_id, source, content, embedding, metadata)\n values ($1, $2, $3, $4, $5, $6, $7) returning id`,\n [scope, clientId, projectId, source, chunks[i], toVector(vectors[i]), chunkMeta],\n );\n ids.push(rows[0].id);\n }\n await client.query(\"commit\");\n } catch (err) {\n await client.query(\"rollback\");\n throw err;\n } finally {\n client.release();\n }\n return ids;\n }\n\n /**\n * Replace knowledge from a given source. Deletes existing rows that match the\n * same scope + ids + source, then re-adds. Use this for idempotent re-ingest\n * of a file or URL so you don't accumulate duplicates.\n */\n async upsertSource(input: AddInput & { source: string }): Promise<string[]> {\n const clientId = input.clientId ?? this.config.clientId ?? null;\n const projectId = input.projectId ?? this.config.projectId ?? null;\n const scope = input.scope ?? defaultScope(clientId, projectId);\n await this.delete({ scope, clientId: clientId ?? undefined, projectId: projectId ?? undefined, source: input.source });\n return this.add(input);\n }\n\n /** Semantic search, scoped to client / project / global knowledge. */\n async search(query: string, opts: SearchOptions = {}): Promise<SearchResult[]> {\n const clientId = opts.clientId ?? this.config.clientId;\n const projectId = opts.projectId ?? this.config.projectId;\n const includeClient = opts.includeClientKnowledge ?? true;\n const includeGlobal = opts.includeGlobal ?? true;\n const limit = opts.limit ?? 8;\n const minScore = opts.minScore ?? 0;\n\n const queryVec = toVector(await this.embedder.embedOne(query));\n\n // Build a scope clause: project rows, optionally the client's shared rows,\n // optionally global rows — restricted to the requested scopes if given.\n const orClauses: string[] = [];\n const params: unknown[] = [queryVec];\n const p = (v: unknown) => `$${params.push(v)}`;\n\n if (projectId && allows(opts.scopes, \"project\")) {\n orClauses.push(`(scope = 'project' and project_id = ${p(projectId)})`);\n }\n if (clientId && includeClient && allows(opts.scopes, \"client\")) {\n orClauses.push(`(scope = 'client' and client_id = ${p(clientId)})`);\n }\n if (includeGlobal && allows(opts.scopes, \"global\")) {\n orClauses.push(`scope = 'global'`);\n }\n // If nothing matched (e.g. no ids at all), fall back to global-only.\n const scopeClause = orClauses.length ? `(${orClauses.join(\" or \")})` : `scope = 'global'`;\n\n const where: string[] = [scopeClause];\n if (opts.metadata) {\n where.push(`metadata @> ${p(JSON.stringify(opts.metadata))}::jsonb`);\n }\n\n const { rows } = await this.pool.query(\n `select id, scope, client_id, project_id, source, content, metadata, created_at,\n 1 - (embedding <=> $1) as score\n from knowledge\n where ${where.join(\" and \")}\n order by embedding <=> $1\n limit ${p(limit)}`,\n params,\n );\n\n return rows\n .map(rowToResult)\n .filter((r) => r.score >= minScore);\n }\n\n /** Delete rows matching a filter. Returns the number deleted. */\n async delete(filter: DeleteFilter): Promise<number> {\n const where: string[] = [];\n const params: unknown[] = [];\n const p = (v: unknown) => `$${params.push(v)}`;\n if (filter.id) where.push(`id = ${p(filter.id)}`);\n if (filter.scope) where.push(`scope = ${p(filter.scope)}`);\n if (filter.clientId) where.push(`client_id = ${p(filter.clientId)}`);\n if (filter.projectId) where.push(`project_id = ${p(filter.projectId)}`);\n if (filter.source) where.push(`source = ${p(filter.source)}`);\n if (where.length === 0) {\n throw new Error(\"[ai-knowledge-db] delete() requires at least one filter to avoid wiping the table.\");\n }\n const { rowCount } = await this.pool.query(\n `delete from knowledge where ${where.join(\" and \")}`,\n params,\n );\n return rowCount ?? 0;\n }\n\n /** Close the connection pool. Call on shutdown. */\n async close(): Promise<void> {\n await this.pool.end();\n }\n}\n\nexport function createKnowledgeDB(config?: KnowledgeConfig): KnowledgeDB {\n return new KnowledgeDB(config);\n}\n\nfunction defaultScope(clientId: string | null, projectId: string | null): Scope {\n if (projectId) return \"project\";\n if (clientId) return \"client\";\n return \"global\";\n}\n\nfunction allows(scopes: Scope[] | undefined, scope: Scope): boolean {\n return !scopes || scopes.includes(scope);\n}\n\n/** pgvector accepts a vector literal like '[0.1,0.2,...]'. */\nfunction toVector(vec: number[]): string {\n return `[${vec.join(\",\")}]`;\n}\n\nfunction rowToResult(row: any): SearchResult {\n return { ...rowToRecord(row), score: Number(row.score) };\n}\n\nfunction rowToRecord(row: any): KnowledgeRecord {\n return {\n id: row.id,\n scope: row.scope,\n clientId: row.client_id,\n projectId: row.project_id,\n source: row.source,\n content: row.content,\n metadata: row.metadata ?? {},\n createdAt: row.created_at instanceof Date ? row.created_at.toISOString() : row.created_at,\n };\n}\n","/**\n * All sensitive / per-deployment values live in the *consuming* project's\n * environment — never in this package. A website repo that installs\n * `@dibe/ai-knowledge-db` sets these in its own `.env`:\n *\n * KNOWLEDGE_DB_URL=postgres://user:pass@host:5432/knowledge (Hetzner/EasyPanel)\n * OPENAI_API_KEY=sk-...\n * KNOWLEDGE_CLIENT_ID=acme-corp # default client for this repo\n * KNOWLEDGE_PROJECT_ID=acme-website-2026 # default project for this repo\n *\n * Anything passed explicitly to createKnowledgeDB() overrides the env value,\n * but env is the intended default so callers usually pass nothing.\n */\n\nexport interface KnowledgeConfig {\n /** Postgres connection string. Defaults to env KNOWLEDGE_DB_URL. */\n connectionString?: string;\n /** OpenAI API key. Defaults to env OPENAI_API_KEY. */\n openaiApiKey?: string;\n /** Embedding model. Defaults to env KNOWLEDGE_EMBED_MODEL or text-embedding-3-small. */\n embeddingModel?: string;\n /** Default client scope for this repo. Defaults to env KNOWLEDGE_CLIENT_ID. */\n clientId?: string;\n /** Default project scope for this repo. Defaults to env KNOWLEDGE_PROJECT_ID. */\n projectId?: string;\n}\n\nexport interface ResolvedConfig {\n connectionString: string;\n openaiApiKey: string;\n embeddingModel: string;\n embeddingDimensions: number;\n clientId?: string;\n projectId?: string;\n}\n\n/** text-embedding-3-small → 1536, text-embedding-3-large → 3072. */\nconst MODEL_DIMENSIONS: Record<string, number> = {\n \"text-embedding-3-small\": 1536,\n \"text-embedding-3-large\": 3072,\n \"text-embedding-ada-002\": 1536,\n};\n\nconst env = (key: string): string | undefined => {\n const v = process.env[key];\n return v && v.trim() !== \"\" ? v.trim() : undefined;\n};\n\nexport function resolveConfig(config: KnowledgeConfig = {}): ResolvedConfig {\n const connectionString = config.connectionString ?? env(\"KNOWLEDGE_DB_URL\");\n if (!connectionString) {\n throw new Error(\n \"[ai-knowledge-db] Missing connection string. Set KNOWLEDGE_DB_URL in your project's .env \" +\n \"or pass { connectionString } to createKnowledgeDB().\",\n );\n }\n\n const openaiApiKey = config.openaiApiKey ?? env(\"OPENAI_API_KEY\");\n if (!openaiApiKey) {\n throw new Error(\n \"[ai-knowledge-db] Missing OpenAI key. Set OPENAI_API_KEY in your project's .env \" +\n \"or pass { openaiApiKey } to createKnowledgeDB().\",\n );\n }\n\n const embeddingModel =\n config.embeddingModel ?? env(\"KNOWLEDGE_EMBED_MODEL\") ?? \"text-embedding-3-small\";\n const embeddingDimensions = MODEL_DIMENSIONS[embeddingModel] ?? 1536;\n\n return {\n connectionString,\n openaiApiKey,\n embeddingModel,\n embeddingDimensions,\n clientId: config.clientId ?? env(\"KNOWLEDGE_CLIENT_ID\"),\n projectId: config.projectId ?? env(\"KNOWLEDGE_PROJECT_ID\"),\n };\n}\n","import OpenAI from \"openai\";\nimport type { ResolvedConfig } from \"./config.js\";\nimport type { ChunkOptions } from \"./types.js\";\n\nexport class Embedder {\n private client: OpenAI;\n private model: string;\n\n constructor(config: ResolvedConfig) {\n this.client = new OpenAI({ apiKey: config.openaiApiKey });\n this.model = config.embeddingModel;\n }\n\n /** Embed a batch of strings in one API call. */\n async embed(texts: string[]): Promise<number[][]> {\n if (texts.length === 0) return [];\n const res = await this.client.embeddings.create({\n model: this.model,\n input: texts,\n });\n // OpenAI preserves input order in the response.\n return res.data\n .sort((a, b) => a.index - b.index)\n .map((d) => d.embedding as number[]);\n }\n\n async embedOne(text: string): Promise<number[]> {\n const [vec] = await this.embed([text]);\n return vec;\n }\n}\n\n/**\n * Split text into overlapping chunks. Prefers paragraph boundaries, then\n * sentence boundaries, falling back to hard character cuts for very long runs.\n */\nexport function chunkText(text: string, opts: ChunkOptions = {}): string[] {\n const maxChars = opts.maxChars ?? 1200;\n const overlap = opts.overlap ?? 150;\n const clean = text.replace(/\\r\\n/g, \"\\n\").trim();\n if (clean.length <= maxChars) return clean ? [clean] : [];\n\n // Split into paragraph-ish units first.\n const units = clean.split(/\\n{2,}/).flatMap((p) => splitLongUnit(p, maxChars));\n\n const chunks: string[] = [];\n let current = \"\";\n for (const unit of units) {\n if (current && current.length + unit.length + 2 > maxChars) {\n chunks.push(current.trim());\n // carry overlap from the tail of the previous chunk\n current = overlap > 0 ? current.slice(-overlap) + \"\\n\\n\" + unit : unit;\n } else {\n current = current ? current + \"\\n\\n\" + unit : unit;\n }\n }\n if (current.trim()) chunks.push(current.trim());\n return chunks;\n}\n\n/** Break a single oversized paragraph on sentence, then hard, boundaries. */\nfunction splitLongUnit(unit: string, maxChars: number): string[] {\n if (unit.length <= maxChars) return [unit];\n const sentences = unit.match(/[^.!?\\n]+[.!?]?\\s*/g) ?? [unit];\n const out: string[] = [];\n let buf = \"\";\n for (const s of sentences) {\n if (s.length > maxChars) {\n if (buf) {\n out.push(buf);\n buf = \"\";\n }\n for (let i = 0; i < s.length; i += maxChars) out.push(s.slice(i, i + maxChars));\n } else if (buf.length + s.length > maxChars) {\n out.push(buf);\n buf = s;\n } else {\n buf += s;\n }\n }\n if (buf) out.push(buf);\n return out;\n}\n","/**\n * Canonical schema (Postgres + pgvector), kept as a string so the library never\n * has to read from disk — works identically in the ESM and CJS builds. The\n * build also writes this out to dist/schema.sql for the `./schema.sql` export\n * and for running by hand. Vector size matches text-embedding-3-small (1536).\n */\nexport const SCHEMA_SQL = `-- AI Knowledge DB schema (Postgres + pgvector)\n-- Run once against your Hetzner/EasyPanel Postgres instance, or via kb.init().\n-- If you switch to text-embedding-3-large, change 1536 -> 3072 and re-index.\n\ncreate extension if not exists vector;\ncreate extension if not exists \"pgcrypto\"; -- for gen_random_uuid()\n\ncreate table if not exists knowledge (\n id uuid primary key default gen_random_uuid(),\n scope text not null check (scope in ('global', 'client', 'project')),\n client_id text,\n project_id text,\n source text,\n content text not null,\n embedding vector(1536) not null,\n metadata jsonb not null default '{}',\n created_at timestamptz not null default now()\n);\n\ncreate index if not exists knowledge_client_idx on knowledge (client_id);\ncreate index if not exists knowledge_project_idx on knowledge (project_id);\ncreate index if not exists knowledge_scope_idx on knowledge (scope);\ncreate index if not exists knowledge_metadata_idx on knowledge using gin (metadata);\n\ncreate index if not exists knowledge_embedding_idx\n on knowledge using hnsw (embedding vector_cosine_ops);\n`;\n"],"mappings":";AAAA,OAAO,QAAQ;;;ACqCf,IAAM,mBAA2C;AAAA,EAC/C,0BAA0B;AAAA,EAC1B,0BAA0B;AAAA,EAC1B,0BAA0B;AAC5B;AAEA,IAAM,MAAM,CAAC,QAAoC;AAC/C,QAAM,IAAI,QAAQ,IAAI,GAAG;AACzB,SAAO,KAAK,EAAE,KAAK,MAAM,KAAK,EAAE,KAAK,IAAI;AAC3C;AAEO,SAAS,cAAc,SAA0B,CAAC,GAAmB;AAC1E,QAAM,mBAAmB,OAAO,oBAAoB,IAAI,kBAAkB;AAC1E,MAAI,CAAC,kBAAkB;AACrB,UAAM,IAAI;AAAA,MACR;AAAA,IAEF;AAAA,EACF;AAEA,QAAM,eAAe,OAAO,gBAAgB,IAAI,gBAAgB;AAChE,MAAI,CAAC,cAAc;AACjB,UAAM,IAAI;AAAA,MACR;AAAA,IAEF;AAAA,EACF;AAEA,QAAM,iBACJ,OAAO,kBAAkB,IAAI,uBAAuB,KAAK;AAC3D,QAAM,sBAAsB,iBAAiB,cAAc,KAAK;AAEhE,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,UAAU,OAAO,YAAY,IAAI,qBAAqB;AAAA,IACtD,WAAW,OAAO,aAAa,IAAI,sBAAsB;AAAA,EAC3D;AACF;;;AC7EA,OAAO,YAAY;AAIZ,IAAM,WAAN,MAAe;AAAA,EACZ;AAAA,EACA;AAAA,EAER,YAAY,QAAwB;AAClC,SAAK,SAAS,IAAI,OAAO,EAAE,QAAQ,OAAO,aAAa,CAAC;AACxD,SAAK,QAAQ,OAAO;AAAA,EACtB;AAAA;AAAA,EAGA,MAAM,MAAM,OAAsC;AAChD,QAAI,MAAM,WAAW,EAAG,QAAO,CAAC;AAChC,UAAM,MAAM,MAAM,KAAK,OAAO,WAAW,OAAO;AAAA,MAC9C,OAAO,KAAK;AAAA,MACZ,OAAO;AAAA,IACT,CAAC;AAED,WAAO,IAAI,KACR,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK,EAChC,IAAI,CAAC,MAAM,EAAE,SAAqB;AAAA,EACvC;AAAA,EAEA,MAAM,SAAS,MAAiC;AAC9C,UAAM,CAAC,GAAG,IAAI,MAAM,KAAK,MAAM,CAAC,IAAI,CAAC;AACrC,WAAO;AAAA,EACT;AACF;AAMO,SAAS,UAAU,MAAc,OAAqB,CAAC,GAAa;AACzE,QAAM,WAAW,KAAK,YAAY;AAClC,QAAM,UAAU,KAAK,WAAW;AAChC,QAAM,QAAQ,KAAK,QAAQ,SAAS,IAAI,EAAE,KAAK;AAC/C,MAAI,MAAM,UAAU,SAAU,QAAO,QAAQ,CAAC,KAAK,IAAI,CAAC;AAGxD,QAAM,QAAQ,MAAM,MAAM,QAAQ,EAAE,QAAQ,CAAC,MAAM,cAAc,GAAG,QAAQ,CAAC;AAE7E,QAAM,SAAmB,CAAC;AAC1B,MAAI,UAAU;AACd,aAAW,QAAQ,OAAO;AACxB,QAAI,WAAW,QAAQ,SAAS,KAAK,SAAS,IAAI,UAAU;AAC1D,aAAO,KAAK,QAAQ,KAAK,CAAC;AAE1B,gBAAU,UAAU,IAAI,QAAQ,MAAM,CAAC,OAAO,IAAI,SAAS,OAAO;AAAA,IACpE,OAAO;AACL,gBAAU,UAAU,UAAU,SAAS,OAAO;AAAA,IAChD;AAAA,EACF;AACA,MAAI,QAAQ,KAAK,EAAG,QAAO,KAAK,QAAQ,KAAK,CAAC;AAC9C,SAAO;AACT;AAGA,SAAS,cAAc,MAAc,UAA4B;AAC/D,MAAI,KAAK,UAAU,SAAU,QAAO,CAAC,IAAI;AACzC,QAAM,YAAY,KAAK,MAAM,qBAAqB,KAAK,CAAC,IAAI;AAC5D,QAAM,MAAgB,CAAC;AACvB,MAAI,MAAM;AACV,aAAW,KAAK,WAAW;AACzB,QAAI,EAAE,SAAS,UAAU;AACvB,UAAI,KAAK;AACP,YAAI,KAAK,GAAG;AACZ,cAAM;AAAA,MACR;AACA,eAAS,IAAI,GAAG,IAAI,EAAE,QAAQ,KAAK,SAAU,KAAI,KAAK,EAAE,MAAM,GAAG,IAAI,QAAQ,CAAC;AAAA,IAChF,WAAW,IAAI,SAAS,EAAE,SAAS,UAAU;AAC3C,UAAI,KAAK,GAAG;AACZ,YAAM;AAAA,IACR,OAAO;AACL,aAAO;AAAA,IACT;AAAA,EACF;AACA,MAAI,IAAK,KAAI,KAAK,GAAG;AACrB,SAAO;AACT;;;AC5EO,IAAM,aAAa;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;AHO1B,IAAM,EAAE,KAAK,IAAI;AAEV,IAAM,cAAN,MAAkB;AAAA,EACf;AAAA,EACA;AAAA,EACC;AAAA,EAET,YAAY,SAA0B,CAAC,GAAG;AACxC,SAAK,SAAS,cAAc,MAAM;AAClC,SAAK,OAAO,IAAI,KAAK,EAAE,kBAAkB,KAAK,OAAO,iBAAiB,CAAC;AACvE,SAAK,WAAW,IAAI,SAAS,KAAK,MAAM;AAAA,EAC1C;AAAA;AAAA,EAGA,MAAM,OAAsB;AAC1B,UAAM,KAAK,KAAK,MAAM,UAAU;AAAA,EAClC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,IAAI,OAAoC;AAC5C,UAAM,WAAW,MAAM,YAAY,KAAK,OAAO,YAAY;AAC3D,UAAM,YAAY,MAAM,aAAa,KAAK,OAAO,aAAa;AAC9D,UAAM,QAAQ,MAAM,SAAS,aAAa,UAAU,SAAS;AAC7D,UAAM,SAAS,MAAM,UAAU;AAC/B,UAAM,WAAW,MAAM,YAAY,CAAC;AAEpC,UAAM,SAAS,UAAU,MAAM,SAAS,MAAM,QAAQ;AACtD,QAAI,OAAO,WAAW,EAAG,QAAO,CAAC;AAEjC,UAAM,UAAU,MAAM,KAAK,SAAS,MAAM,MAAM;AAChD,UAAM,MAAgB,CAAC;AAEvB,UAAM,SAAS,MAAM,KAAK,KAAK,QAAQ;AACvC,QAAI;AACF,YAAM,OAAO,MAAM,OAAO;AAC1B,eAAS,IAAI,GAAG,IAAI,OAAO,QAAQ,KAAK;AACtC,cAAM,YACJ,OAAO,SAAS,IACZ,EAAE,GAAG,UAAU,OAAO,GAAG,QAAQ,OAAO,OAAO,IAC/C;AACN,cAAM,EAAE,KAAK,IAAI,MAAM,OAAO;AAAA,UAC5B;AAAA;AAAA,UAEA,CAAC,OAAO,UAAU,WAAW,QAAQ,OAAO,CAAC,GAAG,SAAS,QAAQ,CAAC,CAAC,GAAG,SAAS;AAAA,QACjF;AACA,YAAI,KAAK,KAAK,CAAC,EAAE,EAAE;AAAA,MACrB;AACA,YAAM,OAAO,MAAM,QAAQ;AAAA,IAC7B,SAAS,KAAK;AACZ,YAAM,OAAO,MAAM,UAAU;AAC7B,YAAM;AAAA,IACR,UAAE;AACA,aAAO,QAAQ;AAAA,IACjB;AACA,WAAO;AAAA,EACT;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,aAAa,OAAyD;AAC1E,UAAM,WAAW,MAAM,YAAY,KAAK,OAAO,YAAY;AAC3D,UAAM,YAAY,MAAM,aAAa,KAAK,OAAO,aAAa;AAC9D,UAAM,QAAQ,MAAM,SAAS,aAAa,UAAU,SAAS;AAC7D,UAAM,KAAK,OAAO,EAAE,OAAO,UAAU,YAAY,QAAW,WAAW,aAAa,QAAW,QAAQ,MAAM,OAAO,CAAC;AACrH,WAAO,KAAK,IAAI,KAAK;AAAA,EACvB;AAAA;AAAA,EAGA,MAAM,OAAO,OAAe,OAAsB,CAAC,GAA4B;AAC7E,UAAM,WAAW,KAAK,YAAY,KAAK,OAAO;AAC9C,UAAM,YAAY,KAAK,aAAa,KAAK,OAAO;AAChD,UAAM,gBAAgB,KAAK,0BAA0B;AACrD,UAAM,gBAAgB,KAAK,iBAAiB;AAC5C,UAAM,QAAQ,KAAK,SAAS;AAC5B,UAAM,WAAW,KAAK,YAAY;AAElC,UAAM,WAAW,SAAS,MAAM,KAAK,SAAS,SAAS,KAAK,CAAC;AAI7D,UAAM,YAAsB,CAAC;AAC7B,UAAM,SAAoB,CAAC,QAAQ;AACnC,UAAM,IAAI,CAAC,MAAe,IAAI,OAAO,KAAK,CAAC,CAAC;AAE5C,QAAI,aAAa,OAAO,KAAK,QAAQ,SAAS,GAAG;AAC/C,gBAAU,KAAK,uCAAuC,EAAE,SAAS,CAAC,GAAG;AAAA,IACvE;AACA,QAAI,YAAY,iBAAiB,OAAO,KAAK,QAAQ,QAAQ,GAAG;AAC9D,gBAAU,KAAK,qCAAqC,EAAE,QAAQ,CAAC,GAAG;AAAA,IACpE;AACA,QAAI,iBAAiB,OAAO,KAAK,QAAQ,QAAQ,GAAG;AAClD,gBAAU,KAAK,kBAAkB;AAAA,IACnC;AAEA,UAAM,cAAc,UAAU,SAAS,IAAI,UAAU,KAAK,MAAM,CAAC,MAAM;AAEvE,UAAM,QAAkB,CAAC,WAAW;AACpC,QAAI,KAAK,UAAU;AACjB,YAAM,KAAK,eAAe,EAAE,KAAK,UAAU,KAAK,QAAQ,CAAC,CAAC,SAAS;AAAA,IACrE;AAEA,UAAM,EAAE,KAAK,IAAI,MAAM,KAAK,KAAK;AAAA,MAC/B;AAAA;AAAA;AAAA,eAGS,MAAM,KAAK,OAAO,CAAC;AAAA;AAAA,eAEnB,EAAE,KAAK,CAAC;AAAA,MACjB;AAAA,IACF;AAEA,WAAO,KACJ,IAAI,WAAW,EACf,OAAO,CAAC,MAAM,EAAE,SAAS,QAAQ;AAAA,EACtC;AAAA;AAAA,EAGA,MAAM,OAAO,QAAuC;AAClD,UAAM,QAAkB,CAAC;AACzB,UAAM,SAAoB,CAAC;AAC3B,UAAM,IAAI,CAAC,MAAe,IAAI,OAAO,KAAK,CAAC,CAAC;AAC5C,QAAI,OAAO,GAAI,OAAM,KAAK,QAAQ,EAAE,OAAO,EAAE,CAAC,EAAE;AAChD,QAAI,OAAO,MAAO,OAAM,KAAK,WAAW,EAAE,OAAO,KAAK,CAAC,EAAE;AACzD,QAAI,OAAO,SAAU,OAAM,KAAK,eAAe,EAAE,OAAO,QAAQ,CAAC,EAAE;AACnE,QAAI,OAAO,UAAW,OAAM,KAAK,gBAAgB,EAAE,OAAO,SAAS,CAAC,EAAE;AACtE,QAAI,OAAO,OAAQ,OAAM,KAAK,YAAY,EAAE,OAAO,MAAM,CAAC,EAAE;AAC5D,QAAI,MAAM,WAAW,GAAG;AACtB,YAAM,IAAI,MAAM,oFAAoF;AAAA,IACtG;AACA,UAAM,EAAE,SAAS,IAAI,MAAM,KAAK,KAAK;AAAA,MACnC,+BAA+B,MAAM,KAAK,OAAO,CAAC;AAAA,MAClD;AAAA,IACF;AACA,WAAO,YAAY;AAAA,EACrB;AAAA;AAAA,EAGA,MAAM,QAAuB;AAC3B,UAAM,KAAK,KAAK,IAAI;AAAA,EACtB;AACF;AAEO,SAAS,kBAAkB,QAAuC;AACvE,SAAO,IAAI,YAAY,MAAM;AAC/B;AAEA,SAAS,aAAa,UAAyB,WAAiC;AAC9E,MAAI,UAAW,QAAO;AACtB,MAAI,SAAU,QAAO;AACrB,SAAO;AACT;AAEA,SAAS,OAAO,QAA6B,OAAuB;AAClE,SAAO,CAAC,UAAU,OAAO,SAAS,KAAK;AACzC;AAGA,SAAS,SAAS,KAAuB;AACvC,SAAO,IAAI,IAAI,KAAK,GAAG,CAAC;AAC1B;AAEA,SAAS,YAAY,KAAwB;AAC3C,SAAO,EAAE,GAAG,YAAY,GAAG,GAAG,OAAO,OAAO,IAAI,KAAK,EAAE;AACzD;AAEA,SAAS,YAAY,KAA2B;AAC9C,SAAO;AAAA,IACL,IAAI,IAAI;AAAA,IACR,OAAO,IAAI;AAAA,IACX,UAAU,IAAI;AAAA,IACd,WAAW,IAAI;AAAA,IACf,QAAQ,IAAI;AAAA,IACZ,SAAS,IAAI;AAAA,IACb,UAAU,IAAI,YAAY,CAAC;AAAA,IAC3B,WAAW,IAAI,sBAAsB,OAAO,IAAI,WAAW,YAAY,IAAI,IAAI;AAAA,EACjF;AACF;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../src/client.ts","../src/config.ts","../src/embeddings.ts","../src/schema.ts"],"sourcesContent":["import pg from \"pg\";\nimport { resolveConfig, type KnowledgeConfig, type ResolvedConfig } from \"./config.js\";\nimport { Embedder, chunkText } from \"./embeddings.js\";\nimport { SCHEMA_SQL } from \"./schema.js\";\nimport type {\n AddInput,\n DeleteFilter,\n HistoryOptions,\n KnowledgeRecord,\n PutInput,\n Scope,\n SearchOptions,\n SearchResult,\n} from \"./types.js\";\n\nconst { Pool } = pg;\n\nexport class KnowledgeDB {\n private pool: pg.Pool;\n private embedder: Embedder;\n readonly config: ResolvedConfig;\n\n constructor(config: KnowledgeConfig = {}) {\n this.config = resolveConfig(config);\n this.pool = new Pool({ connectionString: this.config.connectionString });\n this.embedder = new Embedder(this.config);\n }\n\n /** Create/upgrade the extension, table, and indexes. Safe to call repeatedly. */\n async init(): Promise<void> {\n await this.pool.query(SCHEMA_SQL);\n }\n\n /**\n * Append free-form knowledge (no version identity). Use for notes that\n * accumulate. For facts that change over time (pricing, hours, contact), use\n * put() or upsertSource() so newer versions supersede older ones.\n */\n async add(input: AddInput): Promise<string[]> {\n const t = this.resolveTarget(input);\n return this.insertChunks(this.pool, t, input, null);\n }\n\n /**\n * Versioned write keyed by `key`. Any existing *active* rows with the same\n * key (+ scope/client/project) are stamped superseded_at = now() and kept as\n * history; the new content becomes the current version. So updated pricing\n * wins in search while the old value remains for audit / point-in-time.\n */\n async put(input: PutInput): Promise<string[]> {\n return this.versionedWrite(\"key\", input.key, input);\n }\n\n /**\n * Re-ingest a document by `source`, superseding (not deleting) the prior\n * active version for that source. Idempotent: re-running keeps history and\n * makes the latest content current.\n */\n async upsertSource(input: AddInput & { source: string }): Promise<string[]> {\n return this.versionedWrite(\"source\", input.source, input);\n }\n\n /** Semantic search. Returns only current versions unless includeHistory/asOf. */\n async search(query: string, opts: SearchOptions = {}): Promise<SearchResult[]> {\n const clientId = opts.clientId ?? this.config.clientId;\n const projectId = opts.projectId ?? this.config.projectId;\n const includeClient = opts.includeClientKnowledge ?? true;\n const includeGlobal = opts.includeGlobal ?? true;\n const limit = opts.limit ?? 8;\n const minScore = opts.minScore ?? 0;\n\n const queryVec = toVector(await this.embedder.embedOne(query));\n\n const params: unknown[] = [queryVec];\n const p = (v: unknown) => `$${params.push(v)}`;\n\n const orClauses: string[] = [];\n if (projectId && allows(opts.scopes, \"project\")) {\n orClauses.push(`(scope = 'project' and project_id = ${p(projectId)})`);\n }\n if (clientId && includeClient && allows(opts.scopes, \"client\")) {\n orClauses.push(`(scope = 'client' and client_id = ${p(clientId)})`);\n }\n if (includeGlobal && allows(opts.scopes, \"global\")) {\n orClauses.push(`scope = 'global'`);\n }\n const scopeClause = orClauses.length ? `(${orClauses.join(\" or \")})` : `scope = 'global'`;\n\n const where: string[] = [scopeClause];\n if (opts.metadata) {\n where.push(`metadata @> ${p(JSON.stringify(opts.metadata))}::jsonb`);\n }\n // Recency / history filtering.\n if (opts.asOf !== undefined) {\n const at = typeof opts.asOf === \"string\" ? opts.asOf : opts.asOf.toISOString();\n where.push(\n `created_at <= ${p(at)}::timestamptz and (superseded_at is null or superseded_at > ${p(at)}::timestamptz)`,\n );\n } else if (!opts.includeHistory) {\n where.push(`superseded_at is null`);\n }\n\n const { rows } = await this.pool.query(\n `select id, scope, client_id, project_id, key, source, content, metadata, created_at, superseded_at,\n 1 - (embedding <=> $1) as score\n from knowledge\n where ${where.join(\" and \")}\n order by embedding <=> $1\n limit ${p(limit)}`,\n params,\n );\n\n return rows.map(rowToResult).filter((r) => r.score >= minScore);\n }\n\n /**\n * Return all versions of a `key` (or all rows for a source via metadata),\n * newest first, including superseded ones. Useful for audit / \"what did we\n * say before\".\n */\n async history(key: string, opts: HistoryOptions = {}): Promise<KnowledgeRecord[]> {\n const clientId = opts.clientId ?? this.config.clientId ?? null;\n const projectId = opts.projectId ?? this.config.projectId ?? null;\n const scope = opts.scope ?? defaultScope(clientId, projectId);\n const includeHistory = opts.includeHistory ?? true;\n const where = [\n `key = $1`,\n `scope = $2`,\n `client_id is not distinct from $3`,\n `project_id is not distinct from $4`,\n ];\n if (!includeHistory) where.push(`superseded_at is null`);\n const { rows } = await this.pool.query(\n `select id, scope, client_id, project_id, key, source, content, metadata, created_at, superseded_at\n from knowledge\n where ${where.join(\" and \")}\n order by created_at desc`,\n [key, scope, clientId, projectId],\n );\n return rows.map(rowToRecord);\n }\n\n /** Hard-delete rows matching a filter (removes history too). Returns count. */\n async delete(filter: DeleteFilter): Promise<number> {\n const where: string[] = [];\n const params: unknown[] = [];\n const p = (v: unknown) => `$${params.push(v)}`;\n if (filter.id) where.push(`id = ${p(filter.id)}`);\n if (filter.scope) where.push(`scope = ${p(filter.scope)}`);\n if (filter.clientId) where.push(`client_id = ${p(filter.clientId)}`);\n if (filter.projectId) where.push(`project_id = ${p(filter.projectId)}`);\n if (filter.source) where.push(`source = ${p(filter.source)}`);\n if (filter.key) where.push(`key = ${p(filter.key)}`);\n if (where.length === 0) {\n throw new Error(\"[ai-knowledge-db] delete() requires at least one filter to avoid wiping the table.\");\n }\n const { rowCount } = await this.pool.query(\n `delete from knowledge where ${where.join(\" and \")}`,\n params,\n );\n return rowCount ?? 0;\n }\n\n /** Close the connection pool. Call on shutdown. */\n async close(): Promise<void> {\n await this.pool.end();\n }\n\n // --- internals ---------------------------------------------------------\n\n private resolveTarget(input: AddInput): {\n scope: Scope;\n clientId: string | null;\n projectId: string | null;\n source: string | null;\n } {\n const clientId = input.clientId ?? this.config.clientId ?? null;\n const projectId = input.projectId ?? this.config.projectId ?? null;\n return {\n clientId,\n projectId,\n scope: input.scope ?? defaultScope(clientId, projectId),\n source: input.source ?? null,\n };\n }\n\n /** Supersede prior active rows matching column=value, then insert new active rows. */\n private async versionedWrite(\n matchColumn: \"key\" | \"source\",\n matchValue: string,\n input: AddInput,\n ): Promise<string[]> {\n const t = this.resolveTarget(input);\n const key = matchColumn === \"key\" ? matchValue : null;\n const chunks = chunkText(input.content, input.chunking);\n if (chunks.length === 0) return [];\n const vectors = await this.embedder.embed(chunks);\n\n const client = await this.pool.connect();\n try {\n await client.query(\"begin\");\n await client.query(\n `update knowledge set superseded_at = now()\n where ${matchColumn} = $1 and scope = $2\n and client_id is not distinct from $3\n and project_id is not distinct from $4\n and superseded_at is null`,\n [matchValue, t.scope, t.clientId, t.projectId],\n );\n const ids = await this.insertChunksTx(client, t, input, key, chunks, vectors);\n await client.query(\"commit\");\n return ids;\n } catch (err) {\n await client.query(\"rollback\");\n throw err;\n } finally {\n client.release();\n }\n }\n\n /** Insert chunks on a fresh connection (embeds inside). */\n private async insertChunks(\n runner: pg.Pool,\n t: { scope: Scope; clientId: string | null; projectId: string | null; source: string | null },\n input: AddInput,\n key: string | null,\n ): Promise<string[]> {\n const chunks = chunkText(input.content, input.chunking);\n if (chunks.length === 0) return [];\n const vectors = await this.embedder.embed(chunks);\n const client = await runner.connect();\n try {\n await client.query(\"begin\");\n const ids = await this.insertChunksTx(client, t, input, key, chunks, vectors);\n await client.query(\"commit\");\n return ids;\n } catch (err) {\n await client.query(\"rollback\");\n throw err;\n } finally {\n client.release();\n }\n }\n\n /** Insert chunk rows on an existing transaction client. */\n private async insertChunksTx(\n client: pg.PoolClient,\n t: { scope: Scope; clientId: string | null; projectId: string | null; source: string | null },\n input: AddInput,\n key: string | null,\n chunks: string[],\n vectors: number[][],\n ): Promise<string[]> {\n const metadata = input.metadata ?? {};\n const ids: string[] = [];\n for (let i = 0; i < chunks.length; i++) {\n const chunkMeta =\n chunks.length > 1 ? { ...metadata, chunk: i, chunks: chunks.length } : metadata;\n const { rows } = await client.query(\n `insert into knowledge (scope, client_id, project_id, key, source, content, embedding, metadata)\n values ($1, $2, $3, $4, $5, $6, $7, $8) returning id`,\n [t.scope, t.clientId, t.projectId, key, t.source, chunks[i], toVector(vectors[i]), chunkMeta],\n );\n ids.push(rows[0].id);\n }\n return ids;\n }\n}\n\nexport function createKnowledgeDB(config?: KnowledgeConfig): KnowledgeDB {\n return new KnowledgeDB(config);\n}\n\nfunction defaultScope(clientId: string | null, projectId: string | null): Scope {\n if (projectId) return \"project\";\n if (clientId) return \"client\";\n return \"global\";\n}\n\nfunction allows(scopes: Scope[] | undefined, scope: Scope): boolean {\n return !scopes || scopes.includes(scope);\n}\n\n/** pgvector accepts a vector literal like '[0.1,0.2,...]'. */\nfunction toVector(vec: number[]): string {\n return `[${vec.join(\",\")}]`;\n}\n\nfunction rowToResult(row: any): SearchResult {\n return { ...rowToRecord(row), score: Number(row.score) };\n}\n\nfunction rowToRecord(row: any): KnowledgeRecord {\n return {\n id: row.id,\n scope: row.scope,\n clientId: row.client_id,\n projectId: row.project_id,\n key: row.key ?? null,\n source: row.source,\n content: row.content,\n metadata: row.metadata ?? {},\n createdAt: toIso(row.created_at),\n supersededAt: row.superseded_at ? toIso(row.superseded_at) : null,\n };\n}\n\nfunction toIso(v: unknown): string {\n return v instanceof Date ? v.toISOString() : String(v);\n}\n","/**\n * All sensitive / per-deployment values live in the *consuming* project's\n * environment — never in this package. A website repo that installs\n * `@dibe/ai-knowledge-db` sets these in its own `.env`:\n *\n * KNOWLEDGE_DB_URL=postgres://user:pass@host:5432/knowledge (Hetzner/EasyPanel)\n * OPENAI_API_KEY=sk-...\n * KNOWLEDGE_CLIENT_ID=acme-corp # default client for this repo\n * KNOWLEDGE_PROJECT_ID=acme-website-2026 # default project for this repo\n *\n * Anything passed explicitly to createKnowledgeDB() overrides the env value,\n * but env is the intended default so callers usually pass nothing.\n */\n\nexport interface KnowledgeConfig {\n /** Postgres connection string. Defaults to env KNOWLEDGE_DB_URL. */\n connectionString?: string;\n /** OpenAI API key. Defaults to env OPENAI_API_KEY. */\n openaiApiKey?: string;\n /** Embedding model. Defaults to env KNOWLEDGE_EMBED_MODEL or text-embedding-3-small. */\n embeddingModel?: string;\n /** Default client scope for this repo. Defaults to env KNOWLEDGE_CLIENT_ID. */\n clientId?: string;\n /** Default project scope for this repo. Defaults to env KNOWLEDGE_PROJECT_ID. */\n projectId?: string;\n}\n\nexport interface ResolvedConfig {\n connectionString: string;\n openaiApiKey: string;\n embeddingModel: string;\n embeddingDimensions: number;\n clientId?: string;\n projectId?: string;\n}\n\n/** text-embedding-3-small → 1536, text-embedding-3-large → 3072. */\nconst MODEL_DIMENSIONS: Record<string, number> = {\n \"text-embedding-3-small\": 1536,\n \"text-embedding-3-large\": 3072,\n \"text-embedding-ada-002\": 1536,\n};\n\nconst env = (key: string): string | undefined => {\n const v = process.env[key];\n return v && v.trim() !== \"\" ? v.trim() : undefined;\n};\n\nexport function resolveConfig(config: KnowledgeConfig = {}): ResolvedConfig {\n const connectionString = config.connectionString ?? env(\"KNOWLEDGE_DB_URL\");\n if (!connectionString) {\n throw new Error(\n \"[ai-knowledge-db] Missing connection string. Set KNOWLEDGE_DB_URL in your project's .env \" +\n \"or pass { connectionString } to createKnowledgeDB().\",\n );\n }\n\n const openaiApiKey = config.openaiApiKey ?? env(\"OPENAI_API_KEY\");\n if (!openaiApiKey) {\n throw new Error(\n \"[ai-knowledge-db] Missing OpenAI key. Set OPENAI_API_KEY in your project's .env \" +\n \"or pass { openaiApiKey } to createKnowledgeDB().\",\n );\n }\n\n const embeddingModel =\n config.embeddingModel ?? env(\"KNOWLEDGE_EMBED_MODEL\") ?? \"text-embedding-3-small\";\n const embeddingDimensions = MODEL_DIMENSIONS[embeddingModel] ?? 1536;\n\n return {\n connectionString,\n openaiApiKey,\n embeddingModel,\n embeddingDimensions,\n clientId: config.clientId ?? env(\"KNOWLEDGE_CLIENT_ID\"),\n projectId: config.projectId ?? env(\"KNOWLEDGE_PROJECT_ID\"),\n };\n}\n","import OpenAI from \"openai\";\nimport type { ResolvedConfig } from \"./config.js\";\nimport type { ChunkOptions } from \"./types.js\";\n\nexport class Embedder {\n private client: OpenAI;\n private model: string;\n\n constructor(config: ResolvedConfig) {\n this.client = new OpenAI({ apiKey: config.openaiApiKey });\n this.model = config.embeddingModel;\n }\n\n /** Embed a batch of strings in one API call. */\n async embed(texts: string[]): Promise<number[][]> {\n if (texts.length === 0) return [];\n const res = await this.client.embeddings.create({\n model: this.model,\n input: texts,\n });\n // OpenAI preserves input order in the response.\n return res.data\n .sort((a, b) => a.index - b.index)\n .map((d) => d.embedding as number[]);\n }\n\n async embedOne(text: string): Promise<number[]> {\n const [vec] = await this.embed([text]);\n return vec;\n }\n}\n\n/**\n * Split text into overlapping chunks. Prefers paragraph boundaries, then\n * sentence boundaries, falling back to hard character cuts for very long runs.\n */\nexport function chunkText(text: string, opts: ChunkOptions = {}): string[] {\n const maxChars = opts.maxChars ?? 1200;\n const overlap = opts.overlap ?? 150;\n const clean = text.replace(/\\r\\n/g, \"\\n\").trim();\n if (clean.length <= maxChars) return clean ? [clean] : [];\n\n // Split into paragraph-ish units first.\n const units = clean.split(/\\n{2,}/).flatMap((p) => splitLongUnit(p, maxChars));\n\n const chunks: string[] = [];\n let current = \"\";\n for (const unit of units) {\n if (current && current.length + unit.length + 2 > maxChars) {\n chunks.push(current.trim());\n // carry overlap from the tail of the previous chunk\n current = overlap > 0 ? current.slice(-overlap) + \"\\n\\n\" + unit : unit;\n } else {\n current = current ? current + \"\\n\\n\" + unit : unit;\n }\n }\n if (current.trim()) chunks.push(current.trim());\n return chunks;\n}\n\n/** Break a single oversized paragraph on sentence, then hard, boundaries. */\nfunction splitLongUnit(unit: string, maxChars: number): string[] {\n if (unit.length <= maxChars) return [unit];\n const sentences = unit.match(/[^.!?\\n]+[.!?]?\\s*/g) ?? [unit];\n const out: string[] = [];\n let buf = \"\";\n for (const s of sentences) {\n if (s.length > maxChars) {\n if (buf) {\n out.push(buf);\n buf = \"\";\n }\n for (let i = 0; i < s.length; i += maxChars) out.push(s.slice(i, i + maxChars));\n } else if (buf.length + s.length > maxChars) {\n out.push(buf);\n buf = s;\n } else {\n buf += s;\n }\n }\n if (buf) out.push(buf);\n return out;\n}\n","/**\n * Canonical schema (Postgres + pgvector), kept as a string so the library never\n * has to read from disk — works identically in the ESM and CJS builds. It is\n * idempotent AND upgrade-safe: running it via kb.init() on a fresh database\n * creates everything, and on an existing v0.1 database it adds the v0.2 history\n * columns in place. Vector size matches text-embedding-3-small (1536).\n *\n * Versioning model (v0.2): a row's lifetime is [created_at, superseded_at).\n * superseded_at IS NULL => the row is the current/active version.\n * Writing a new version of the same `key` (or `source`) stamps the old rows\n * with superseded_at = now() instead of deleting them, so history is retained\n * and searches return only the newest version by default.\n */\nexport const SCHEMA_SQL = `-- AI Knowledge DB schema (Postgres + pgvector)\n-- Run once against your Hetzner/EasyPanel Postgres instance, or via kb.init().\n-- If you switch to text-embedding-3-large, change 1536 -> 3072 and re-index.\n\ncreate extension if not exists vector;\ncreate extension if not exists \"pgcrypto\"; -- for gen_random_uuid()\n\ncreate table if not exists knowledge (\n id uuid primary key default gen_random_uuid(),\n scope text not null check (scope in ('global', 'client', 'project')),\n client_id text,\n project_id text,\n key text, -- stable identity for a versioned fact (e.g. 'pricing.basic-plan')\n source text,\n content text not null,\n embedding vector(1536) not null,\n metadata jsonb not null default '{}',\n created_at timestamptz not null default now(), -- valid from\n superseded_at timestamptz -- valid to; null = current\n);\n\n-- Upgrade existing (v0.1) installs in place.\nalter table knowledge add column if not exists key text;\nalter table knowledge add column if not exists superseded_at timestamptz;\n\ncreate index if not exists knowledge_client_idx on knowledge (client_id);\ncreate index if not exists knowledge_project_idx on knowledge (project_id);\ncreate index if not exists knowledge_scope_idx on knowledge (scope);\ncreate index if not exists knowledge_key_idx on knowledge (key);\ncreate index if not exists knowledge_active_idx on knowledge (superseded_at);\ncreate index if not exists knowledge_metadata_idx on knowledge using gin (metadata);\n\ncreate index if not exists knowledge_embedding_idx\n on knowledge using hnsw (embedding vector_cosine_ops);\n`;\n"],"mappings":";AAAA,OAAO,QAAQ;;;ACqCf,IAAM,mBAA2C;AAAA,EAC/C,0BAA0B;AAAA,EAC1B,0BAA0B;AAAA,EAC1B,0BAA0B;AAC5B;AAEA,IAAM,MAAM,CAAC,QAAoC;AAC/C,QAAM,IAAI,QAAQ,IAAI,GAAG;AACzB,SAAO,KAAK,EAAE,KAAK,MAAM,KAAK,EAAE,KAAK,IAAI;AAC3C;AAEO,SAAS,cAAc,SAA0B,CAAC,GAAmB;AAC1E,QAAM,mBAAmB,OAAO,oBAAoB,IAAI,kBAAkB;AAC1E,MAAI,CAAC,kBAAkB;AACrB,UAAM,IAAI;AAAA,MACR;AAAA,IAEF;AAAA,EACF;AAEA,QAAM,eAAe,OAAO,gBAAgB,IAAI,gBAAgB;AAChE,MAAI,CAAC,cAAc;AACjB,UAAM,IAAI;AAAA,MACR;AAAA,IAEF;AAAA,EACF;AAEA,QAAM,iBACJ,OAAO,kBAAkB,IAAI,uBAAuB,KAAK;AAC3D,QAAM,sBAAsB,iBAAiB,cAAc,KAAK;AAEhE,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,UAAU,OAAO,YAAY,IAAI,qBAAqB;AAAA,IACtD,WAAW,OAAO,aAAa,IAAI,sBAAsB;AAAA,EAC3D;AACF;;;AC7EA,OAAO,YAAY;AAIZ,IAAM,WAAN,MAAe;AAAA,EACZ;AAAA,EACA;AAAA,EAER,YAAY,QAAwB;AAClC,SAAK,SAAS,IAAI,OAAO,EAAE,QAAQ,OAAO,aAAa,CAAC;AACxD,SAAK,QAAQ,OAAO;AAAA,EACtB;AAAA;AAAA,EAGA,MAAM,MAAM,OAAsC;AAChD,QAAI,MAAM,WAAW,EAAG,QAAO,CAAC;AAChC,UAAM,MAAM,MAAM,KAAK,OAAO,WAAW,OAAO;AAAA,MAC9C,OAAO,KAAK;AAAA,MACZ,OAAO;AAAA,IACT,CAAC;AAED,WAAO,IAAI,KACR,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK,EAChC,IAAI,CAAC,MAAM,EAAE,SAAqB;AAAA,EACvC;AAAA,EAEA,MAAM,SAAS,MAAiC;AAC9C,UAAM,CAAC,GAAG,IAAI,MAAM,KAAK,MAAM,CAAC,IAAI,CAAC;AACrC,WAAO;AAAA,EACT;AACF;AAMO,SAAS,UAAU,MAAc,OAAqB,CAAC,GAAa;AACzE,QAAM,WAAW,KAAK,YAAY;AAClC,QAAM,UAAU,KAAK,WAAW;AAChC,QAAM,QAAQ,KAAK,QAAQ,SAAS,IAAI,EAAE,KAAK;AAC/C,MAAI,MAAM,UAAU,SAAU,QAAO,QAAQ,CAAC,KAAK,IAAI,CAAC;AAGxD,QAAM,QAAQ,MAAM,MAAM,QAAQ,EAAE,QAAQ,CAAC,MAAM,cAAc,GAAG,QAAQ,CAAC;AAE7E,QAAM,SAAmB,CAAC;AAC1B,MAAI,UAAU;AACd,aAAW,QAAQ,OAAO;AACxB,QAAI,WAAW,QAAQ,SAAS,KAAK,SAAS,IAAI,UAAU;AAC1D,aAAO,KAAK,QAAQ,KAAK,CAAC;AAE1B,gBAAU,UAAU,IAAI,QAAQ,MAAM,CAAC,OAAO,IAAI,SAAS,OAAO;AAAA,IACpE,OAAO;AACL,gBAAU,UAAU,UAAU,SAAS,OAAO;AAAA,IAChD;AAAA,EACF;AACA,MAAI,QAAQ,KAAK,EAAG,QAAO,KAAK,QAAQ,KAAK,CAAC;AAC9C,SAAO;AACT;AAGA,SAAS,cAAc,MAAc,UAA4B;AAC/D,MAAI,KAAK,UAAU,SAAU,QAAO,CAAC,IAAI;AACzC,QAAM,YAAY,KAAK,MAAM,qBAAqB,KAAK,CAAC,IAAI;AAC5D,QAAM,MAAgB,CAAC;AACvB,MAAI,MAAM;AACV,aAAW,KAAK,WAAW;AACzB,QAAI,EAAE,SAAS,UAAU;AACvB,UAAI,KAAK;AACP,YAAI,KAAK,GAAG;AACZ,cAAM;AAAA,MACR;AACA,eAAS,IAAI,GAAG,IAAI,EAAE,QAAQ,KAAK,SAAU,KAAI,KAAK,EAAE,MAAM,GAAG,IAAI,QAAQ,CAAC;AAAA,IAChF,WAAW,IAAI,SAAS,EAAE,SAAS,UAAU;AAC3C,UAAI,KAAK,GAAG;AACZ,YAAM;AAAA,IACR,OAAO;AACL,aAAO;AAAA,IACT;AAAA,EACF;AACA,MAAI,IAAK,KAAI,KAAK,GAAG;AACrB,SAAO;AACT;;;ACrEO,IAAM,aAAa;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;AHE1B,IAAM,EAAE,KAAK,IAAI;AAEV,IAAM,cAAN,MAAkB;AAAA,EACf;AAAA,EACA;AAAA,EACC;AAAA,EAET,YAAY,SAA0B,CAAC,GAAG;AACxC,SAAK,SAAS,cAAc,MAAM;AAClC,SAAK,OAAO,IAAI,KAAK,EAAE,kBAAkB,KAAK,OAAO,iBAAiB,CAAC;AACvE,SAAK,WAAW,IAAI,SAAS,KAAK,MAAM;AAAA,EAC1C;AAAA;AAAA,EAGA,MAAM,OAAsB;AAC1B,UAAM,KAAK,KAAK,MAAM,UAAU;AAAA,EAClC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,IAAI,OAAoC;AAC5C,UAAM,IAAI,KAAK,cAAc,KAAK;AAClC,WAAO,KAAK,aAAa,KAAK,MAAM,GAAG,OAAO,IAAI;AAAA,EACpD;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAM,IAAI,OAAoC;AAC5C,WAAO,KAAK,eAAe,OAAO,MAAM,KAAK,KAAK;AAAA,EACpD;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,aAAa,OAAyD;AAC1E,WAAO,KAAK,eAAe,UAAU,MAAM,QAAQ,KAAK;AAAA,EAC1D;AAAA;AAAA,EAGA,MAAM,OAAO,OAAe,OAAsB,CAAC,GAA4B;AAC7E,UAAM,WAAW,KAAK,YAAY,KAAK,OAAO;AAC9C,UAAM,YAAY,KAAK,aAAa,KAAK,OAAO;AAChD,UAAM,gBAAgB,KAAK,0BAA0B;AACrD,UAAM,gBAAgB,KAAK,iBAAiB;AAC5C,UAAM,QAAQ,KAAK,SAAS;AAC5B,UAAM,WAAW,KAAK,YAAY;AAElC,UAAM,WAAW,SAAS,MAAM,KAAK,SAAS,SAAS,KAAK,CAAC;AAE7D,UAAM,SAAoB,CAAC,QAAQ;AACnC,UAAM,IAAI,CAAC,MAAe,IAAI,OAAO,KAAK,CAAC,CAAC;AAE5C,UAAM,YAAsB,CAAC;AAC7B,QAAI,aAAa,OAAO,KAAK,QAAQ,SAAS,GAAG;AAC/C,gBAAU,KAAK,uCAAuC,EAAE,SAAS,CAAC,GAAG;AAAA,IACvE;AACA,QAAI,YAAY,iBAAiB,OAAO,KAAK,QAAQ,QAAQ,GAAG;AAC9D,gBAAU,KAAK,qCAAqC,EAAE,QAAQ,CAAC,GAAG;AAAA,IACpE;AACA,QAAI,iBAAiB,OAAO,KAAK,QAAQ,QAAQ,GAAG;AAClD,gBAAU,KAAK,kBAAkB;AAAA,IACnC;AACA,UAAM,cAAc,UAAU,SAAS,IAAI,UAAU,KAAK,MAAM,CAAC,MAAM;AAEvE,UAAM,QAAkB,CAAC,WAAW;AACpC,QAAI,KAAK,UAAU;AACjB,YAAM,KAAK,eAAe,EAAE,KAAK,UAAU,KAAK,QAAQ,CAAC,CAAC,SAAS;AAAA,IACrE;AAEA,QAAI,KAAK,SAAS,QAAW;AAC3B,YAAM,KAAK,OAAO,KAAK,SAAS,WAAW,KAAK,OAAO,KAAK,KAAK,YAAY;AAC7E,YAAM;AAAA,QACJ,iBAAiB,EAAE,EAAE,CAAC,+DAA+D,EAAE,EAAE,CAAC;AAAA,MAC5F;AAAA,IACF,WAAW,CAAC,KAAK,gBAAgB;AAC/B,YAAM,KAAK,uBAAuB;AAAA,IACpC;AAEA,UAAM,EAAE,KAAK,IAAI,MAAM,KAAK,KAAK;AAAA,MAC/B;AAAA;AAAA;AAAA,eAGS,MAAM,KAAK,OAAO,CAAC;AAAA;AAAA,eAEnB,EAAE,KAAK,CAAC;AAAA,MACjB;AAAA,IACF;AAEA,WAAO,KAAK,IAAI,WAAW,EAAE,OAAO,CAAC,MAAM,EAAE,SAAS,QAAQ;AAAA,EAChE;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,QAAQ,KAAa,OAAuB,CAAC,GAA+B;AAChF,UAAM,WAAW,KAAK,YAAY,KAAK,OAAO,YAAY;AAC1D,UAAM,YAAY,KAAK,aAAa,KAAK,OAAO,aAAa;AAC7D,UAAM,QAAQ,KAAK,SAAS,aAAa,UAAU,SAAS;AAC5D,UAAM,iBAAiB,KAAK,kBAAkB;AAC9C,UAAM,QAAQ;AAAA,MACZ;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AACA,QAAI,CAAC,eAAgB,OAAM,KAAK,uBAAuB;AACvD,UAAM,EAAE,KAAK,IAAI,MAAM,KAAK,KAAK;AAAA,MAC/B;AAAA;AAAA,eAES,MAAM,KAAK,OAAO,CAAC;AAAA;AAAA,MAE5B,CAAC,KAAK,OAAO,UAAU,SAAS;AAAA,IAClC;AACA,WAAO,KAAK,IAAI,WAAW;AAAA,EAC7B;AAAA;AAAA,EAGA,MAAM,OAAO,QAAuC;AAClD,UAAM,QAAkB,CAAC;AACzB,UAAM,SAAoB,CAAC;AAC3B,UAAM,IAAI,CAAC,MAAe,IAAI,OAAO,KAAK,CAAC,CAAC;AAC5C,QAAI,OAAO,GAAI,OAAM,KAAK,QAAQ,EAAE,OAAO,EAAE,CAAC,EAAE;AAChD,QAAI,OAAO,MAAO,OAAM,KAAK,WAAW,EAAE,OAAO,KAAK,CAAC,EAAE;AACzD,QAAI,OAAO,SAAU,OAAM,KAAK,eAAe,EAAE,OAAO,QAAQ,CAAC,EAAE;AACnE,QAAI,OAAO,UAAW,OAAM,KAAK,gBAAgB,EAAE,OAAO,SAAS,CAAC,EAAE;AACtE,QAAI,OAAO,OAAQ,OAAM,KAAK,YAAY,EAAE,OAAO,MAAM,CAAC,EAAE;AAC5D,QAAI,OAAO,IAAK,OAAM,KAAK,SAAS,EAAE,OAAO,GAAG,CAAC,EAAE;AACnD,QAAI,MAAM,WAAW,GAAG;AACtB,YAAM,IAAI,MAAM,oFAAoF;AAAA,IACtG;AACA,UAAM,EAAE,SAAS,IAAI,MAAM,KAAK,KAAK;AAAA,MACnC,+BAA+B,MAAM,KAAK,OAAO,CAAC;AAAA,MAClD;AAAA,IACF;AACA,WAAO,YAAY;AAAA,EACrB;AAAA;AAAA,EAGA,MAAM,QAAuB;AAC3B,UAAM,KAAK,KAAK,IAAI;AAAA,EACtB;AAAA;AAAA,EAIQ,cAAc,OAKpB;AACA,UAAM,WAAW,MAAM,YAAY,KAAK,OAAO,YAAY;AAC3D,UAAM,YAAY,MAAM,aAAa,KAAK,OAAO,aAAa;AAC9D,WAAO;AAAA,MACL;AAAA,MACA;AAAA,MACA,OAAO,MAAM,SAAS,aAAa,UAAU,SAAS;AAAA,MACtD,QAAQ,MAAM,UAAU;AAAA,IAC1B;AAAA,EACF;AAAA;AAAA,EAGA,MAAc,eACZ,aACA,YACA,OACmB;AACnB,UAAM,IAAI,KAAK,cAAc,KAAK;AAClC,UAAM,MAAM,gBAAgB,QAAQ,aAAa;AACjD,UAAM,SAAS,UAAU,MAAM,SAAS,MAAM,QAAQ;AACtD,QAAI,OAAO,WAAW,EAAG,QAAO,CAAC;AACjC,UAAM,UAAU,MAAM,KAAK,SAAS,MAAM,MAAM;AAEhD,UAAM,SAAS,MAAM,KAAK,KAAK,QAAQ;AACvC,QAAI;AACF,YAAM,OAAO,MAAM,OAAO;AAC1B,YAAM,OAAO;AAAA,QACX;AAAA,iBACS,WAAW;AAAA;AAAA;AAAA;AAAA,QAIpB,CAAC,YAAY,EAAE,OAAO,EAAE,UAAU,EAAE,SAAS;AAAA,MAC/C;AACA,YAAM,MAAM,MAAM,KAAK,eAAe,QAAQ,GAAG,OAAO,KAAK,QAAQ,OAAO;AAC5E,YAAM,OAAO,MAAM,QAAQ;AAC3B,aAAO;AAAA,IACT,SAAS,KAAK;AACZ,YAAM,OAAO,MAAM,UAAU;AAC7B,YAAM;AAAA,IACR,UAAE;AACA,aAAO,QAAQ;AAAA,IACjB;AAAA,EACF;AAAA;AAAA,EAGA,MAAc,aACZ,QACA,GACA,OACA,KACmB;AACnB,UAAM,SAAS,UAAU,MAAM,SAAS,MAAM,QAAQ;AACtD,QAAI,OAAO,WAAW,EAAG,QAAO,CAAC;AACjC,UAAM,UAAU,MAAM,KAAK,SAAS,MAAM,MAAM;AAChD,UAAM,SAAS,MAAM,OAAO,QAAQ;AACpC,QAAI;AACF,YAAM,OAAO,MAAM,OAAO;AAC1B,YAAM,MAAM,MAAM,KAAK,eAAe,QAAQ,GAAG,OAAO,KAAK,QAAQ,OAAO;AAC5E,YAAM,OAAO,MAAM,QAAQ;AAC3B,aAAO;AAAA,IACT,SAAS,KAAK;AACZ,YAAM,OAAO,MAAM,UAAU;AAC7B,YAAM;AAAA,IACR,UAAE;AACA,aAAO,QAAQ;AAAA,IACjB;AAAA,EACF;AAAA;AAAA,EAGA,MAAc,eACZ,QACA,GACA,OACA,KACA,QACA,SACmB;AACnB,UAAM,WAAW,MAAM,YAAY,CAAC;AACpC,UAAM,MAAgB,CAAC;AACvB,aAAS,IAAI,GAAG,IAAI,OAAO,QAAQ,KAAK;AACtC,YAAM,YACJ,OAAO,SAAS,IAAI,EAAE,GAAG,UAAU,OAAO,GAAG,QAAQ,OAAO,OAAO,IAAI;AACzE,YAAM,EAAE,KAAK,IAAI,MAAM,OAAO;AAAA,QAC5B;AAAA;AAAA,QAEA,CAAC,EAAE,OAAO,EAAE,UAAU,EAAE,WAAW,KAAK,EAAE,QAAQ,OAAO,CAAC,GAAG,SAAS,QAAQ,CAAC,CAAC,GAAG,SAAS;AAAA,MAC9F;AACA,UAAI,KAAK,KAAK,CAAC,EAAE,EAAE;AAAA,IACrB;AACA,WAAO;AAAA,EACT;AACF;AAEO,SAAS,kBAAkB,QAAuC;AACvE,SAAO,IAAI,YAAY,MAAM;AAC/B;AAEA,SAAS,aAAa,UAAyB,WAAiC;AAC9E,MAAI,UAAW,QAAO;AACtB,MAAI,SAAU,QAAO;AACrB,SAAO;AACT;AAEA,SAAS,OAAO,QAA6B,OAAuB;AAClE,SAAO,CAAC,UAAU,OAAO,SAAS,KAAK;AACzC;AAGA,SAAS,SAAS,KAAuB;AACvC,SAAO,IAAI,IAAI,KAAK,GAAG,CAAC;AAC1B;AAEA,SAAS,YAAY,KAAwB;AAC3C,SAAO,EAAE,GAAG,YAAY,GAAG,GAAG,OAAO,OAAO,IAAI,KAAK,EAAE;AACzD;AAEA,SAAS,YAAY,KAA2B;AAC9C,SAAO;AAAA,IACL,IAAI,IAAI;AAAA,IACR,OAAO,IAAI;AAAA,IACX,UAAU,IAAI;AAAA,IACd,WAAW,IAAI;AAAA,IACf,KAAK,IAAI,OAAO;AAAA,IAChB,QAAQ,IAAI;AAAA,IACZ,SAAS,IAAI;AAAA,IACb,UAAU,IAAI,YAAY,CAAC;AAAA,IAC3B,WAAW,MAAM,IAAI,UAAU;AAAA,IAC/B,cAAc,IAAI,gBAAgB,MAAM,IAAI,aAAa,IAAI;AAAA,EAC/D;AACF;AAEA,SAAS,MAAM,GAAoB;AACjC,SAAO,aAAa,OAAO,EAAE,YAAY,IAAI,OAAO,CAAC;AACvD;","names":[]}
|
|
@@ -1,30 +1,36 @@
|
|
|
1
|
-
-- AI Knowledge DB schema (Postgres + pgvector)
|
|
1
|
+
-- AI Knowledge DB schema (Postgres + pgvector) — current, idempotent + upgrade-safe.
|
|
2
2
|
-- Run once against your Hetzner/EasyPanel Postgres instance, or via kb.init().
|
|
3
|
-
-- The vector dimension below matches text-embedding-3-small (1536).
|
|
4
3
|
-- If you switch to text-embedding-3-large, change 1536 -> 3072 and re-index.
|
|
4
|
+
-- This file mirrors the schema the library applies at runtime (see 002_add_history.sql
|
|
5
|
+
-- for the v0.2 history delta in isolation).
|
|
5
6
|
|
|
6
7
|
create extension if not exists vector;
|
|
7
8
|
create extension if not exists "pgcrypto"; -- for gen_random_uuid()
|
|
8
9
|
|
|
9
10
|
create table if not exists knowledge (
|
|
10
|
-
id
|
|
11
|
-
scope
|
|
12
|
-
client_id
|
|
13
|
-
project_id
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
11
|
+
id uuid primary key default gen_random_uuid(),
|
|
12
|
+
scope text not null check (scope in ('global', 'client', 'project')),
|
|
13
|
+
client_id text,
|
|
14
|
+
project_id text,
|
|
15
|
+
key text, -- stable identity for a versioned fact (e.g. 'pricing.basic-plan')
|
|
16
|
+
source text,
|
|
17
|
+
content text not null,
|
|
18
|
+
embedding vector(1536) not null,
|
|
19
|
+
metadata jsonb not null default '{}',
|
|
20
|
+
created_at timestamptz not null default now(), -- valid from
|
|
21
|
+
superseded_at timestamptz -- valid to; null = current
|
|
19
22
|
);
|
|
20
23
|
|
|
21
|
-
--
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
24
|
+
-- Upgrade existing (v0.1) installs in place.
|
|
25
|
+
alter table knowledge add column if not exists key text;
|
|
26
|
+
alter table knowledge add column if not exists superseded_at timestamptz;
|
|
27
|
+
|
|
28
|
+
create index if not exists knowledge_client_idx on knowledge (client_id);
|
|
29
|
+
create index if not exists knowledge_project_idx on knowledge (project_id);
|
|
30
|
+
create index if not exists knowledge_scope_idx on knowledge (scope);
|
|
31
|
+
create index if not exists knowledge_key_idx on knowledge (key);
|
|
32
|
+
create index if not exists knowledge_active_idx on knowledge (superseded_at);
|
|
25
33
|
create index if not exists knowledge_metadata_idx on knowledge using gin (metadata);
|
|
26
34
|
|
|
27
|
-
-- Approximate nearest-neighbour index for fast cosine search.
|
|
28
|
-
-- HNSW gives good recall/speed without tuning lists like IVFFlat.
|
|
29
35
|
create index if not exists knowledge_embedding_idx
|
|
30
36
|
on knowledge using hnsw (embedding vector_cosine_ops);
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
-- v0.2.0: versioned knowledge (keep history, prefer newest)
|
|
2
|
+
-- Safe to run on a v0.1 database. kb.init() applies this automatically.
|
|
3
|
+
--
|
|
4
|
+
-- A row's lifetime is [created_at, superseded_at). superseded_at IS NULL means
|
|
5
|
+
-- it's the current version. put()/upsertSource() stamp the old rows superseded
|
|
6
|
+
-- instead of deleting them, so history is retained and searches return the
|
|
7
|
+
-- newest version by default.
|
|
8
|
+
|
|
9
|
+
alter table knowledge add column if not exists key text;
|
|
10
|
+
alter table knowledge add column if not exists superseded_at timestamptz;
|
|
11
|
+
|
|
12
|
+
create index if not exists knowledge_key_idx on knowledge (key);
|
|
13
|
+
create index if not exists knowledge_active_idx on knowledge (superseded_at);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@digitalvibes/ai-knowledge-db",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"description": "AI knowledge vector storage (Postgres + pgvector) for Dibe website/project work. Stores client- and project-scoped knowledge and serves it back via semantic search.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "MIT",
|