@rce-mcp/data-plane 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/.tsbuildinfo +1 -0
- package/dist/index.d.ts +633 -0
- package/dist/index.js +2207 -0
- package/dist/sqlite-runtime.d.ts +18 -0
- package/dist/sqlite-runtime.js +71 -0
- package/package.json +22 -0
- package/src/index.ts +3393 -0
- package/src/sqlite-runtime.ts +137 -0
- package/test/ioredis-mock.d.ts +1 -0
- package/test/queue.integration.test.ts +129 -0
- package/test/runtime-mode.test.ts +56 -0
- package/test/sqlite-queue.integration.test.ts +54 -0
- package/test/usage-metering.integration.test.ts +71 -0
- package/tsconfig.build.json +13 -0
- package/tsconfig.json +4 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,2207 @@
|
|
|
1
|
+
import { createHash, randomUUID } from "node:crypto";
|
|
2
|
+
import { mkdirSync } from "node:fs";
|
|
3
|
+
import { dirname, resolve as resolvePath } from "node:path";
|
|
4
|
+
import { Pool } from "pg";
|
|
5
|
+
import { Redis } from "ioredis";
|
|
6
|
+
import { openSqliteDatabase } from "./sqlite-runtime.js";
|
|
7
|
+
export { sqliteRuntimeDriverName } from "./sqlite-runtime.js";
|
|
8
|
+
const RUNTIME_MODES = new Set(["cloud", "local", "hybrid"]);
|
|
9
|
+
export const CLOUD_REQUIRED_ENV_VARS = [
|
|
10
|
+
"DATABASE_URL",
|
|
11
|
+
"REDIS_URL",
|
|
12
|
+
"S3_BUCKET",
|
|
13
|
+
"S3_REGION",
|
|
14
|
+
"S3_ACCESS_KEY_ID",
|
|
15
|
+
"S3_SECRET_ACCESS_KEY"
|
|
16
|
+
];
|
|
17
|
+
export function parseRuntimeMode(value) {
|
|
18
|
+
const normalized = (value ?? "hybrid").trim().toLowerCase();
|
|
19
|
+
if (RUNTIME_MODES.has(normalized)) {
|
|
20
|
+
return normalized;
|
|
21
|
+
}
|
|
22
|
+
throw new Error(`invalid RCE_RUNTIME_MODE: ${value ?? ""}. Expected cloud|local|hybrid.`);
|
|
23
|
+
}
|
|
24
|
+
export function missingCloudRuntimeEnvVars(env) {
|
|
25
|
+
return CLOUD_REQUIRED_ENV_VARS.filter((key) => {
|
|
26
|
+
const value = env[key];
|
|
27
|
+
return typeof value !== "string" || value.trim() === "";
|
|
28
|
+
});
|
|
29
|
+
}
|
|
30
|
+
export function isCloudRuntimeConfigured(env) {
|
|
31
|
+
return missingCloudRuntimeEnvVars(env).length === 0;
|
|
32
|
+
}
|
|
33
|
+
export function resolveRuntimeMode(env) {
|
|
34
|
+
const requested_mode = parseRuntimeMode(env.RCE_RUNTIME_MODE);
|
|
35
|
+
const missing_cloud_vars = missingCloudRuntimeEnvVars(env);
|
|
36
|
+
const cloud_configured = missing_cloud_vars.length === 0;
|
|
37
|
+
if (requested_mode === "cloud") {
|
|
38
|
+
if (!cloud_configured) {
|
|
39
|
+
throw new Error(`RCE_RUNTIME_MODE=cloud requires: ${missing_cloud_vars.join(", ")}`);
|
|
40
|
+
}
|
|
41
|
+
return {
|
|
42
|
+
requested_mode,
|
|
43
|
+
effective_mode: "cloud",
|
|
44
|
+
cloud_configured,
|
|
45
|
+
missing_cloud_vars
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
if (requested_mode === "local") {
|
|
49
|
+
return {
|
|
50
|
+
requested_mode,
|
|
51
|
+
effective_mode: "local",
|
|
52
|
+
cloud_configured,
|
|
53
|
+
missing_cloud_vars
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
return {
|
|
57
|
+
requested_mode,
|
|
58
|
+
effective_mode: cloud_configured ? "cloud" : "local",
|
|
59
|
+
cloud_configured,
|
|
60
|
+
missing_cloud_vars
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
const DEFAULT_CANDIDATE_SCORE_WEIGHTS = {
|
|
64
|
+
lexical_weight: 0.6,
|
|
65
|
+
vector_weight: 0.4,
|
|
66
|
+
path_match_boost: 0.2,
|
|
67
|
+
recency_boost: 0.1,
|
|
68
|
+
generated_penalty: 0.2
|
|
69
|
+
};
|
|
70
|
+
function sha256(value) {
|
|
71
|
+
return createHash("sha256").update(value).digest("hex");
|
|
72
|
+
}
|
|
73
|
+
function parseEmbedding(raw) {
|
|
74
|
+
if (Array.isArray(raw)) {
|
|
75
|
+
return raw.map((value) => Number(value));
|
|
76
|
+
}
|
|
77
|
+
if (typeof raw === "string") {
|
|
78
|
+
const trimmed = raw.trim();
|
|
79
|
+
const body = trimmed.startsWith("[") && trimmed.endsWith("]") ? trimmed.slice(1, -1) : trimmed;
|
|
80
|
+
if (body.length === 0) {
|
|
81
|
+
return [];
|
|
82
|
+
}
|
|
83
|
+
return body.split(",").map((value) => Number.parseFloat(value.trim()));
|
|
84
|
+
}
|
|
85
|
+
return [];
|
|
86
|
+
}
|
|
87
|
+
function toVectorLiteral(embedding) {
|
|
88
|
+
return `[${embedding.join(",")}]`;
|
|
89
|
+
}
|
|
90
|
+
function toIsoString(value) {
|
|
91
|
+
if (value instanceof Date) {
|
|
92
|
+
return value.toISOString();
|
|
93
|
+
}
|
|
94
|
+
if (typeof value === "string") {
|
|
95
|
+
return value;
|
|
96
|
+
}
|
|
97
|
+
return new Date(String(value)).toISOString();
|
|
98
|
+
}
|
|
99
|
+
async function runTx(pool, fn) {
|
|
100
|
+
const client = await pool.connect();
|
|
101
|
+
try {
|
|
102
|
+
await client.query("BEGIN");
|
|
103
|
+
const output = await fn(client);
|
|
104
|
+
await client.query("COMMIT");
|
|
105
|
+
return output;
|
|
106
|
+
}
|
|
107
|
+
catch (error) {
|
|
108
|
+
await client.query("ROLLBACK");
|
|
109
|
+
throw error;
|
|
110
|
+
}
|
|
111
|
+
finally {
|
|
112
|
+
client.release();
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
function ensureSqliteParent(dbPath) {
|
|
116
|
+
if (dbPath === ":memory:") {
|
|
117
|
+
return;
|
|
118
|
+
}
|
|
119
|
+
mkdirSync(dirname(resolvePath(dbPath)), { recursive: true });
|
|
120
|
+
}
|
|
121
|
+
function compileGlob(glob) {
|
|
122
|
+
const escaped = glob.replace(/[.+^${}()|[\]\\]/g, "\\$&");
|
|
123
|
+
return new RegExp(`^${escaped.replace(/\*/g, ".*").replace(/\?/g, ".")}$`);
|
|
124
|
+
}
|
|
125
|
+
function sqliteBool(value) {
|
|
126
|
+
return Number(value) === 1;
|
|
127
|
+
}
|
|
128
|
+
function nowIso() {
|
|
129
|
+
return new Date().toISOString();
|
|
130
|
+
}
|
|
131
|
+
function percentile(values, p) {
|
|
132
|
+
if (values.length === 0) {
|
|
133
|
+
return 0;
|
|
134
|
+
}
|
|
135
|
+
const sorted = [...values].sort((a, b) => a - b);
|
|
136
|
+
const idx = Math.min(sorted.length - 1, Math.max(0, Math.ceil(sorted.length * p) - 1));
|
|
137
|
+
return sorted[idx] ?? 0;
|
|
138
|
+
}
|
|
139
|
+
function tokenizeForRanking(text) {
|
|
140
|
+
const coarseTokens = text
|
|
141
|
+
.toLowerCase()
|
|
142
|
+
.split(/[^a-z0-9_./-]+/)
|
|
143
|
+
.map((token) => token.trim())
|
|
144
|
+
.filter(Boolean);
|
|
145
|
+
const expandedTokens = [];
|
|
146
|
+
for (const token of coarseTokens) {
|
|
147
|
+
expandedTokens.push(token);
|
|
148
|
+
for (const part of token.split(/[./_-]+/).filter(Boolean)) {
|
|
149
|
+
expandedTokens.push(part);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
return [...new Set(expandedTokens)];
|
|
153
|
+
}
|
|
154
|
+
function lexicalScoreForRanking(queryTokens, haystack) {
|
|
155
|
+
if (queryTokens.length === 0) {
|
|
156
|
+
return 0;
|
|
157
|
+
}
|
|
158
|
+
const haystackTokens = new Set(tokenizeForRanking(haystack));
|
|
159
|
+
let overlap = 0;
|
|
160
|
+
for (const token of queryTokens) {
|
|
161
|
+
if (haystackTokens.has(token)) {
|
|
162
|
+
overlap += 1;
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
return overlap / queryTokens.length;
|
|
166
|
+
}
|
|
167
|
+
function cosineSimilarity(a, b) {
|
|
168
|
+
if (a.length === 0 || b.length === 0) {
|
|
169
|
+
return 0;
|
|
170
|
+
}
|
|
171
|
+
const max = Math.min(a.length, b.length);
|
|
172
|
+
let dot = 0;
|
|
173
|
+
let normA = 0;
|
|
174
|
+
let normB = 0;
|
|
175
|
+
for (let i = 0; i < max; i += 1) {
|
|
176
|
+
dot += (a[i] ?? 0) * (b[i] ?? 0);
|
|
177
|
+
normA += (a[i] ?? 0) * (a[i] ?? 0);
|
|
178
|
+
normB += (b[i] ?? 0) * (b[i] ?? 0);
|
|
179
|
+
}
|
|
180
|
+
if (normA === 0 || normB === 0) {
|
|
181
|
+
return 0;
|
|
182
|
+
}
|
|
183
|
+
return dot / (Math.sqrt(normA) * Math.sqrt(normB));
|
|
184
|
+
}
|
|
185
|
+
export class SqliteIndexRepository {
|
|
186
|
+
dbPath;
|
|
187
|
+
db;
|
|
188
|
+
constructor(dbPath) {
|
|
189
|
+
this.dbPath = dbPath;
|
|
190
|
+
ensureSqliteParent(dbPath);
|
|
191
|
+
this.db = openSqliteDatabase(dbPath);
|
|
192
|
+
this.db.exec("PRAGMA journal_mode = WAL;");
|
|
193
|
+
this.db.exec("PRAGMA foreign_keys = ON;");
|
|
194
|
+
}
|
|
195
|
+
close() {
|
|
196
|
+
this.db.close();
|
|
197
|
+
}
|
|
198
|
+
async migrate() {
|
|
199
|
+
this.db.exec(`
|
|
200
|
+
CREATE TABLE IF NOT EXISTS workspaces (
|
|
201
|
+
id TEXT PRIMARY KEY,
|
|
202
|
+
tenant_id TEXT NOT NULL,
|
|
203
|
+
name TEXT NOT NULL,
|
|
204
|
+
project_root_path TEXT NOT NULL,
|
|
205
|
+
created_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ', 'now')),
|
|
206
|
+
UNIQUE (tenant_id, project_root_path)
|
|
207
|
+
);
|
|
208
|
+
|
|
209
|
+
CREATE TABLE IF NOT EXISTS indexes (
|
|
210
|
+
id TEXT PRIMARY KEY,
|
|
211
|
+
tenant_id TEXT NOT NULL,
|
|
212
|
+
workspace_id TEXT NOT NULL,
|
|
213
|
+
version TEXT NOT NULL,
|
|
214
|
+
status TEXT NOT NULL,
|
|
215
|
+
created_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ', 'now')),
|
|
216
|
+
updated_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ', 'now')),
|
|
217
|
+
UNIQUE (workspace_id, version)
|
|
218
|
+
);
|
|
219
|
+
|
|
220
|
+
CREATE TABLE IF NOT EXISTS manifests (
|
|
221
|
+
id TEXT PRIMARY KEY,
|
|
222
|
+
index_id TEXT NOT NULL,
|
|
223
|
+
object_key TEXT NOT NULL,
|
|
224
|
+
checksum TEXT NOT NULL,
|
|
225
|
+
created_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ', 'now'))
|
|
226
|
+
);
|
|
227
|
+
|
|
228
|
+
CREATE TABLE IF NOT EXISTS index_metadata (
|
|
229
|
+
index_id TEXT PRIMARY KEY,
|
|
230
|
+
tenant_id TEXT NOT NULL,
|
|
231
|
+
embedding_provider TEXT NOT NULL,
|
|
232
|
+
embedding_model TEXT,
|
|
233
|
+
embedding_dimensions INTEGER NOT NULL,
|
|
234
|
+
embedding_version TEXT,
|
|
235
|
+
chunking_strategy TEXT NOT NULL,
|
|
236
|
+
chunking_fallback_strategy TEXT NOT NULL,
|
|
237
|
+
created_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ', 'now'))
|
|
238
|
+
);
|
|
239
|
+
|
|
240
|
+
CREATE TABLE IF NOT EXISTS files (
|
|
241
|
+
id TEXT PRIMARY KEY,
|
|
242
|
+
tenant_id TEXT NOT NULL,
|
|
243
|
+
index_id TEXT NOT NULL,
|
|
244
|
+
repo_path TEXT NOT NULL,
|
|
245
|
+
content_hash TEXT NOT NULL,
|
|
246
|
+
size_bytes INTEGER NOT NULL,
|
|
247
|
+
language TEXT,
|
|
248
|
+
warning_metadata TEXT,
|
|
249
|
+
updated_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ', 'now')),
|
|
250
|
+
UNIQUE (index_id, repo_path)
|
|
251
|
+
);
|
|
252
|
+
|
|
253
|
+
CREATE TABLE IF NOT EXISTS chunks (
|
|
254
|
+
id TEXT PRIMARY KEY,
|
|
255
|
+
tenant_id TEXT NOT NULL,
|
|
256
|
+
file_id TEXT NOT NULL,
|
|
257
|
+
repo_path TEXT NOT NULL,
|
|
258
|
+
start_line INTEGER NOT NULL,
|
|
259
|
+
end_line INTEGER NOT NULL,
|
|
260
|
+
text TEXT NOT NULL,
|
|
261
|
+
embedding TEXT NOT NULL,
|
|
262
|
+
generated INTEGER NOT NULL DEFAULT 0,
|
|
263
|
+
lexical_doc TEXT,
|
|
264
|
+
updated_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ', 'now'))
|
|
265
|
+
);
|
|
266
|
+
`);
|
|
267
|
+
this.db.exec("CREATE INDEX IF NOT EXISTS idx_sqlite_workspaces_tenant_path ON workspaces(tenant_id, project_root_path)");
|
|
268
|
+
this.db.exec("CREATE INDEX IF NOT EXISTS idx_sqlite_indexes_workspace_status ON indexes(workspace_id, status, created_at DESC)");
|
|
269
|
+
this.db.exec("CREATE INDEX IF NOT EXISTS idx_sqlite_index_metadata_tenant_index ON index_metadata(tenant_id, index_id)");
|
|
270
|
+
this.db.exec("CREATE INDEX IF NOT EXISTS idx_sqlite_files_index_path ON files(index_id, repo_path)");
|
|
271
|
+
this.db.exec("CREATE INDEX IF NOT EXISTS idx_sqlite_chunks_file_id ON chunks(file_id)");
|
|
272
|
+
this.db.exec("CREATE INDEX IF NOT EXISTS idx_sqlite_chunks_repo_path ON chunks(repo_path)");
|
|
273
|
+
this.db.exec("CREATE INDEX IF NOT EXISTS idx_sqlite_chunks_lexical_doc ON chunks(lexical_doc)");
|
|
274
|
+
}
|
|
275
|
+
async upsertWorkspace(input) {
|
|
276
|
+
this.db
|
|
277
|
+
.prepare(`
|
|
278
|
+
INSERT INTO workspaces (id, tenant_id, name, project_root_path)
|
|
279
|
+
VALUES (?, ?, ?, ?)
|
|
280
|
+
ON CONFLICT (id)
|
|
281
|
+
DO UPDATE SET tenant_id = excluded.tenant_id, name = excluded.name, project_root_path = excluded.project_root_path
|
|
282
|
+
`)
|
|
283
|
+
.run(input.workspace_id, input.tenant_id, input.name, input.project_root_path);
|
|
284
|
+
}
|
|
285
|
+
async resolveWorkspaceByProjectRoot(tenant_id, project_root_path) {
|
|
286
|
+
const row = this.db
|
|
287
|
+
.prepare(`
|
|
288
|
+
SELECT id AS workspace_id, tenant_id, project_root_path, name
|
|
289
|
+
FROM workspaces
|
|
290
|
+
WHERE tenant_id = ? AND project_root_path = ?
|
|
291
|
+
LIMIT 1
|
|
292
|
+
`)
|
|
293
|
+
.get(tenant_id, project_root_path);
|
|
294
|
+
if (!row) {
|
|
295
|
+
return undefined;
|
|
296
|
+
}
|
|
297
|
+
return {
|
|
298
|
+
workspace_id: row.workspace_id,
|
|
299
|
+
tenant_id: row.tenant_id,
|
|
300
|
+
project_root_path: row.project_root_path,
|
|
301
|
+
name: row.name
|
|
302
|
+
};
|
|
303
|
+
}
|
|
304
|
+
async resolveWorkspaceByWorkspaceId(tenant_id, workspace_id) {
|
|
305
|
+
const row = this.db
|
|
306
|
+
.prepare(`
|
|
307
|
+
SELECT id AS workspace_id, tenant_id, project_root_path, name
|
|
308
|
+
FROM workspaces
|
|
309
|
+
WHERE tenant_id = ? AND id = ?
|
|
310
|
+
LIMIT 1
|
|
311
|
+
`)
|
|
312
|
+
.get(tenant_id, workspace_id);
|
|
313
|
+
if (!row) {
|
|
314
|
+
return undefined;
|
|
315
|
+
}
|
|
316
|
+
return {
|
|
317
|
+
workspace_id: row.workspace_id,
|
|
318
|
+
tenant_id: row.tenant_id,
|
|
319
|
+
project_root_path: row.project_root_path,
|
|
320
|
+
name: row.name
|
|
321
|
+
};
|
|
322
|
+
}
|
|
323
|
+
async createIndexVersion(input) {
|
|
324
|
+
const index_id = `idx_${randomUUID()}`;
|
|
325
|
+
const status = input.status ?? "indexing";
|
|
326
|
+
const now = nowIso();
|
|
327
|
+
this.db
|
|
328
|
+
.prepare(`
|
|
329
|
+
INSERT INTO indexes (id, tenant_id, workspace_id, version, status, created_at, updated_at)
|
|
330
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
331
|
+
`)
|
|
332
|
+
.run(index_id, input.tenant_id, input.workspace_id, input.index_version, status, now, now);
|
|
333
|
+
return {
|
|
334
|
+
index_id,
|
|
335
|
+
workspace_id: input.workspace_id,
|
|
336
|
+
tenant_id: input.tenant_id,
|
|
337
|
+
index_version: input.index_version,
|
|
338
|
+
status,
|
|
339
|
+
created_at: now,
|
|
340
|
+
updated_at: now
|
|
341
|
+
};
|
|
342
|
+
}
|
|
343
|
+
async markIndexStatus(input) {
|
|
344
|
+
this.db
|
|
345
|
+
.prepare(`
|
|
346
|
+
UPDATE indexes
|
|
347
|
+
SET status = ?, updated_at = ?
|
|
348
|
+
WHERE id = ? AND tenant_id = ? AND workspace_id = ?
|
|
349
|
+
`)
|
|
350
|
+
.run(input.status, nowIso(), input.index_id, input.tenant_id, input.workspace_id);
|
|
351
|
+
}
|
|
352
|
+
async getIndexByVersion(input) {
|
|
353
|
+
const row = this.db
|
|
354
|
+
.prepare(`
|
|
355
|
+
SELECT id AS index_id, workspace_id, tenant_id, version AS index_version, status, created_at, updated_at
|
|
356
|
+
FROM indexes
|
|
357
|
+
WHERE tenant_id = ? AND workspace_id = ? AND version = ?
|
|
358
|
+
LIMIT 1
|
|
359
|
+
`)
|
|
360
|
+
.get(input.tenant_id, input.workspace_id, input.index_version);
|
|
361
|
+
if (!row) {
|
|
362
|
+
return undefined;
|
|
363
|
+
}
|
|
364
|
+
return {
|
|
365
|
+
...row,
|
|
366
|
+
created_at: toIsoString(row.created_at),
|
|
367
|
+
updated_at: toIsoString(row.updated_at)
|
|
368
|
+
};
|
|
369
|
+
}
|
|
370
|
+
async resetIndexContent(input) {
|
|
371
|
+
this.db.exec("BEGIN");
|
|
372
|
+
try {
|
|
373
|
+
this.db
|
|
374
|
+
.prepare(`
|
|
375
|
+
DELETE FROM chunks
|
|
376
|
+
WHERE tenant_id = ? AND file_id IN (
|
|
377
|
+
SELECT id FROM files WHERE tenant_id = ? AND index_id = ?
|
|
378
|
+
)
|
|
379
|
+
`)
|
|
380
|
+
.run(input.tenant_id, input.tenant_id, input.index_id);
|
|
381
|
+
this.db.prepare("DELETE FROM files WHERE tenant_id = ? AND index_id = ?").run(input.tenant_id, input.index_id);
|
|
382
|
+
this.db
|
|
383
|
+
.prepare(`
|
|
384
|
+
DELETE FROM manifests
|
|
385
|
+
WHERE index_id = ?
|
|
386
|
+
`)
|
|
387
|
+
.run(input.index_id);
|
|
388
|
+
this.db.exec("COMMIT");
|
|
389
|
+
}
|
|
390
|
+
catch (error) {
|
|
391
|
+
this.db.exec("ROLLBACK");
|
|
392
|
+
throw error;
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
async getLatestReadyIndex(input) {
|
|
396
|
+
const row = this.db
|
|
397
|
+
.prepare(`
|
|
398
|
+
SELECT id AS index_id, workspace_id, tenant_id, version AS index_version, status, created_at, updated_at
|
|
399
|
+
FROM indexes
|
|
400
|
+
WHERE tenant_id = ? AND workspace_id = ? AND status = 'ready'
|
|
401
|
+
ORDER BY created_at DESC
|
|
402
|
+
LIMIT 1
|
|
403
|
+
`)
|
|
404
|
+
.get(input.tenant_id, input.workspace_id);
|
|
405
|
+
if (!row) {
|
|
406
|
+
return undefined;
|
|
407
|
+
}
|
|
408
|
+
return {
|
|
409
|
+
...row,
|
|
410
|
+
created_at: toIsoString(row.created_at),
|
|
411
|
+
updated_at: toIsoString(row.updated_at)
|
|
412
|
+
};
|
|
413
|
+
}
|
|
414
|
+
async getFilesByIndex(input) {
|
|
415
|
+
const rows = this.db
|
|
416
|
+
.prepare(`
|
|
417
|
+
SELECT id AS file_id, repo_path, content_hash, language
|
|
418
|
+
FROM files
|
|
419
|
+
WHERE tenant_id = ? AND index_id = ?
|
|
420
|
+
`)
|
|
421
|
+
.all(input.tenant_id, input.index_id);
|
|
422
|
+
return rows.map((row) => ({
|
|
423
|
+
file_id: row.file_id,
|
|
424
|
+
repo_path: row.repo_path,
|
|
425
|
+
content_hash: row.content_hash,
|
|
426
|
+
...(row.language ? { language: row.language } : {})
|
|
427
|
+
}));
|
|
428
|
+
}
|
|
429
|
+
async copyFileFromIndex(input) {
|
|
430
|
+
this.db.exec("BEGIN");
|
|
431
|
+
try {
|
|
432
|
+
const sourceFile = this.db
|
|
433
|
+
.prepare(`
|
|
434
|
+
SELECT id AS file_id, repo_path, content_hash, size_bytes, language, warning_metadata, updated_at
|
|
435
|
+
FROM files
|
|
436
|
+
WHERE tenant_id = ? AND index_id = ? AND repo_path = ?
|
|
437
|
+
LIMIT 1
|
|
438
|
+
`)
|
|
439
|
+
.get(input.tenant_id, input.source_index_id, input.repo_path);
|
|
440
|
+
if (!sourceFile) {
|
|
441
|
+
this.db.exec("COMMIT");
|
|
442
|
+
return;
|
|
443
|
+
}
|
|
444
|
+
const targetFileId = `fil_${randomUUID()}`;
|
|
445
|
+
this.db
|
|
446
|
+
.prepare(`
|
|
447
|
+
INSERT INTO files (id, tenant_id, index_id, repo_path, content_hash, size_bytes, language, warning_metadata, updated_at)
|
|
448
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
449
|
+
`)
|
|
450
|
+
.run(targetFileId, input.tenant_id, input.target_index_id, sourceFile.repo_path, sourceFile.content_hash, sourceFile.size_bytes, sourceFile.language, sourceFile.warning_metadata, toIsoString(sourceFile.updated_at));
|
|
451
|
+
const chunks = this.db
|
|
452
|
+
.prepare(`
|
|
453
|
+
SELECT repo_path, start_line, end_line, text, embedding, generated, updated_at
|
|
454
|
+
FROM chunks
|
|
455
|
+
WHERE tenant_id = ? AND file_id = ?
|
|
456
|
+
`)
|
|
457
|
+
.all(input.tenant_id, sourceFile.file_id);
|
|
458
|
+
const insertChunk = this.db.prepare(`
|
|
459
|
+
INSERT INTO chunks (id, tenant_id, file_id, repo_path, start_line, end_line, text, embedding, generated, lexical_doc, updated_at)
|
|
460
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
461
|
+
`);
|
|
462
|
+
for (const chunk of chunks) {
|
|
463
|
+
insertChunk.run(`chk_${randomUUID()}`, input.tenant_id, targetFileId, chunk.repo_path, chunk.start_line, chunk.end_line, chunk.text, chunk.embedding, chunk.generated, chunk.text, toIsoString(chunk.updated_at));
|
|
464
|
+
}
|
|
465
|
+
this.db.exec("COMMIT");
|
|
466
|
+
}
|
|
467
|
+
catch (error) {
|
|
468
|
+
this.db.exec("ROLLBACK");
|
|
469
|
+
throw error;
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
async upsertFile(input) {
|
|
473
|
+
const existing = this.db
|
|
474
|
+
.prepare(`
|
|
475
|
+
SELECT id AS file_id
|
|
476
|
+
FROM files
|
|
477
|
+
WHERE index_id = ? AND repo_path = ?
|
|
478
|
+
LIMIT 1
|
|
479
|
+
`)
|
|
480
|
+
.get(input.index_id, input.repo_path);
|
|
481
|
+
const warningMetadata = JSON.stringify(input.warning_metadata ?? null);
|
|
482
|
+
if (existing) {
|
|
483
|
+
this.db
|
|
484
|
+
.prepare(`
|
|
485
|
+
UPDATE files
|
|
486
|
+
SET content_hash = ?, size_bytes = ?, language = ?, warning_metadata = ?, updated_at = ?
|
|
487
|
+
WHERE id = ?
|
|
488
|
+
`)
|
|
489
|
+
.run(input.content_hash, input.size_bytes, input.language ?? null, warningMetadata, input.updated_at ?? nowIso(), existing.file_id);
|
|
490
|
+
return { file_id: existing.file_id };
|
|
491
|
+
}
|
|
492
|
+
const file_id = `fil_${randomUUID()}`;
|
|
493
|
+
this.db
|
|
494
|
+
.prepare(`
|
|
495
|
+
INSERT INTO files (id, tenant_id, index_id, repo_path, content_hash, size_bytes, language, warning_metadata, updated_at)
|
|
496
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
497
|
+
`)
|
|
498
|
+
.run(file_id, input.tenant_id, input.index_id, input.repo_path, input.content_hash, input.size_bytes, input.language ?? null, warningMetadata, input.updated_at ?? nowIso());
|
|
499
|
+
return { file_id };
|
|
500
|
+
}
|
|
501
|
+
async replaceFileChunks(input) {
|
|
502
|
+
this.db.exec("BEGIN");
|
|
503
|
+
try {
|
|
504
|
+
this.db.prepare("DELETE FROM chunks WHERE tenant_id = ? AND file_id = ?").run(input.tenant_id, input.file_id);
|
|
505
|
+
const insert = this.db.prepare(`
|
|
506
|
+
INSERT INTO chunks (id, tenant_id, file_id, repo_path, start_line, end_line, text, embedding, generated, lexical_doc, updated_at)
|
|
507
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
508
|
+
`);
|
|
509
|
+
for (const chunk of input.chunks) {
|
|
510
|
+
insert.run(`chk_${randomUUID()}`, input.tenant_id, input.file_id, input.repo_path, chunk.start_line, chunk.end_line, chunk.snippet, JSON.stringify(chunk.embedding), chunk.generated ? 1 : 0, chunk.snippet, chunk.updated_at ?? nowIso());
|
|
511
|
+
}
|
|
512
|
+
this.db.exec("COMMIT");
|
|
513
|
+
}
|
|
514
|
+
catch (error) {
|
|
515
|
+
this.db.exec("ROLLBACK");
|
|
516
|
+
throw error;
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
async saveManifest(input) {
|
|
520
|
+
this.db
|
|
521
|
+
.prepare(`
|
|
522
|
+
INSERT INTO manifests (id, index_id, object_key, checksum, created_at)
|
|
523
|
+
VALUES (?, ?, ?, ?, ?)
|
|
524
|
+
`)
|
|
525
|
+
.run(`mft_${randomUUID()}`, input.index_id, input.object_key, input.checksum, nowIso());
|
|
526
|
+
}
|
|
527
|
+
async saveIndexMetadata(input) {
|
|
528
|
+
this.db
|
|
529
|
+
.prepare(`
|
|
530
|
+
INSERT INTO index_metadata (
|
|
531
|
+
index_id,
|
|
532
|
+
tenant_id,
|
|
533
|
+
embedding_provider,
|
|
534
|
+
embedding_model,
|
|
535
|
+
embedding_dimensions,
|
|
536
|
+
embedding_version,
|
|
537
|
+
chunking_strategy,
|
|
538
|
+
chunking_fallback_strategy,
|
|
539
|
+
created_at
|
|
540
|
+
)
|
|
541
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
542
|
+
ON CONFLICT (index_id)
|
|
543
|
+
DO UPDATE SET
|
|
544
|
+
tenant_id = excluded.tenant_id,
|
|
545
|
+
embedding_provider = excluded.embedding_provider,
|
|
546
|
+
embedding_model = excluded.embedding_model,
|
|
547
|
+
embedding_dimensions = excluded.embedding_dimensions,
|
|
548
|
+
embedding_version = excluded.embedding_version,
|
|
549
|
+
chunking_strategy = excluded.chunking_strategy,
|
|
550
|
+
chunking_fallback_strategy = excluded.chunking_fallback_strategy
|
|
551
|
+
`)
|
|
552
|
+
.run(input.index_id, input.tenant_id, input.embedding_provider, input.embedding_model ?? null, input.embedding_dimensions, input.embedding_version ?? null, input.chunking_strategy, input.chunking_fallback_strategy, nowIso());
|
|
553
|
+
}
|
|
554
|
+
async getIndexMetadata(input) {
|
|
555
|
+
const row = this.db
|
|
556
|
+
.prepare(`
|
|
557
|
+
SELECT
|
|
558
|
+
embedding_provider,
|
|
559
|
+
embedding_model,
|
|
560
|
+
embedding_dimensions,
|
|
561
|
+
embedding_version,
|
|
562
|
+
chunking_strategy,
|
|
563
|
+
chunking_fallback_strategy,
|
|
564
|
+
created_at
|
|
565
|
+
FROM index_metadata
|
|
566
|
+
WHERE tenant_id = ? AND index_id = ?
|
|
567
|
+
LIMIT 1
|
|
568
|
+
`)
|
|
569
|
+
.get(input.tenant_id, input.index_id);
|
|
570
|
+
if (!row) {
|
|
571
|
+
return undefined;
|
|
572
|
+
}
|
|
573
|
+
return {
|
|
574
|
+
embedding_provider: row.embedding_provider,
|
|
575
|
+
...(row.embedding_model ? { embedding_model: row.embedding_model } : {}),
|
|
576
|
+
embedding_dimensions: row.embedding_dimensions,
|
|
577
|
+
...(row.embedding_version ? { embedding_version: row.embedding_version } : {}),
|
|
578
|
+
chunking_strategy: row.chunking_strategy,
|
|
579
|
+
chunking_fallback_strategy: row.chunking_fallback_strategy,
|
|
580
|
+
created_at: toIsoString(row.created_at)
|
|
581
|
+
};
|
|
582
|
+
}
|
|
583
|
+
async listChunksByIndex(input) {
|
|
584
|
+
const where = ["f.tenant_id = ?", "f.index_id = ?"];
|
|
585
|
+
const params = [input.tenant_id, input.index_id];
|
|
586
|
+
if (input.filters?.language) {
|
|
587
|
+
where.push("f.language = ?");
|
|
588
|
+
params.push(input.filters.language);
|
|
589
|
+
}
|
|
590
|
+
if (input.filters?.path_prefix) {
|
|
591
|
+
where.push("c.repo_path LIKE ?");
|
|
592
|
+
params.push(`${input.filters.path_prefix}%`);
|
|
593
|
+
}
|
|
594
|
+
const rows = this.db
|
|
595
|
+
.prepare(`
|
|
596
|
+
SELECT
|
|
597
|
+
c.id AS chunk_id,
|
|
598
|
+
c.file_id,
|
|
599
|
+
c.repo_path AS path,
|
|
600
|
+
c.start_line,
|
|
601
|
+
c.end_line,
|
|
602
|
+
c.text AS snippet,
|
|
603
|
+
f.language,
|
|
604
|
+
c.generated,
|
|
605
|
+
c.updated_at,
|
|
606
|
+
c.embedding
|
|
607
|
+
FROM chunks c
|
|
608
|
+
INNER JOIN files f ON f.id = c.file_id
|
|
609
|
+
WHERE ${where.join(" AND ")}
|
|
610
|
+
`)
|
|
611
|
+
.all(...params);
|
|
612
|
+
const globRegex = input.filters?.glob ? compileGlob(input.filters.glob) : undefined;
|
|
613
|
+
return rows
|
|
614
|
+
.filter((row) => (globRegex ? globRegex.test(row.path) : true))
|
|
615
|
+
.map((row) => ({
|
|
616
|
+
chunk_id: row.chunk_id,
|
|
617
|
+
file_id: row.file_id,
|
|
618
|
+
path: row.path,
|
|
619
|
+
start_line: row.start_line,
|
|
620
|
+
end_line: row.end_line,
|
|
621
|
+
snippet: row.snippet,
|
|
622
|
+
...(row.language ? { language: row.language } : {}),
|
|
623
|
+
...(sqliteBool(row.generated) ? { generated: true } : {}),
|
|
624
|
+
updated_at: toIsoString(row.updated_at),
|
|
625
|
+
embedding: parseEmbedding(row.embedding)
|
|
626
|
+
}));
|
|
627
|
+
}
|
|
628
|
+
async rankChunksByIndex(input) {
|
|
629
|
+
const weights = input.candidate_weights ?? DEFAULT_CANDIDATE_SCORE_WEIGHTS;
|
|
630
|
+
const where = ["f.tenant_id = ?", "f.index_id = ?"];
|
|
631
|
+
const params = [input.tenant_id, input.index_id];
|
|
632
|
+
if (input.filters?.language) {
|
|
633
|
+
where.push("f.language = ?");
|
|
634
|
+
params.push(input.filters.language);
|
|
635
|
+
}
|
|
636
|
+
if (input.filters?.path_prefix) {
|
|
637
|
+
where.push("c.repo_path LIKE ?");
|
|
638
|
+
params.push(`${input.filters.path_prefix}%`);
|
|
639
|
+
}
|
|
640
|
+
const rows = this.db
|
|
641
|
+
.prepare(`
|
|
642
|
+
SELECT
|
|
643
|
+
c.id AS chunk_id,
|
|
644
|
+
c.file_id,
|
|
645
|
+
c.repo_path AS path,
|
|
646
|
+
c.start_line,
|
|
647
|
+
c.end_line,
|
|
648
|
+
c.text AS snippet,
|
|
649
|
+
f.language,
|
|
650
|
+
c.generated,
|
|
651
|
+
c.updated_at,
|
|
652
|
+
c.embedding
|
|
653
|
+
FROM chunks c
|
|
654
|
+
INNER JOIN files f ON f.id = c.file_id
|
|
655
|
+
WHERE ${where.join(" AND ")}
|
|
656
|
+
`)
|
|
657
|
+
.all(...params);
|
|
658
|
+
const globRegex = input.filters?.glob ? compileGlob(input.filters.glob) : undefined;
|
|
659
|
+
const ranked = rows
|
|
660
|
+
.filter((row) => (globRegex ? globRegex.test(row.path) : true))
|
|
661
|
+
.map((row) => {
|
|
662
|
+
const lexical = lexicalScoreForRanking(input.query_tokens, `${row.path}\n${row.snippet}`);
|
|
663
|
+
const vector = cosineSimilarity(input.query_embedding, parseEmbedding(row.embedding));
|
|
664
|
+
const pathMatch = input.query_tokens.some((token) => row.path.toLowerCase().includes(token));
|
|
665
|
+
const recencyBoost = Date.now() - new Date(toIsoString(row.updated_at)).getTime() < 14 * 24 * 3600 * 1_000;
|
|
666
|
+
let score = lexical * weights.lexical_weight + vector * weights.vector_weight;
|
|
667
|
+
if (pathMatch) {
|
|
668
|
+
score += weights.path_match_boost;
|
|
669
|
+
}
|
|
670
|
+
if (recencyBoost) {
|
|
671
|
+
score += weights.recency_boost;
|
|
672
|
+
}
|
|
673
|
+
if (sqliteBool(row.generated)) {
|
|
674
|
+
score -= weights.generated_penalty;
|
|
675
|
+
}
|
|
676
|
+
return {
|
|
677
|
+
chunk_id: row.chunk_id,
|
|
678
|
+
file_id: row.file_id,
|
|
679
|
+
path: row.path,
|
|
680
|
+
start_line: row.start_line,
|
|
681
|
+
end_line: row.end_line,
|
|
682
|
+
snippet: row.snippet,
|
|
683
|
+
...(row.language ? { language: row.language } : {}),
|
|
684
|
+
...(sqliteBool(row.generated) ? { generated: true } : {}),
|
|
685
|
+
updated_at: toIsoString(row.updated_at),
|
|
686
|
+
score,
|
|
687
|
+
lexical_score: lexical,
|
|
688
|
+
vector_score: vector,
|
|
689
|
+
path_match: pathMatch,
|
|
690
|
+
recency_boosted: recencyBoost
|
|
691
|
+
};
|
|
692
|
+
})
|
|
693
|
+
.sort((a, b) => b.score - a.score)
|
|
694
|
+
.slice(0, Math.max(input.top_k * 4, input.top_k));
|
|
695
|
+
return ranked;
|
|
696
|
+
}
|
|
697
|
+
}
|
|
698
|
+
export class PostgresIndexRepository {
|
|
699
|
+
pool;
|
|
700
|
+
options;
|
|
701
|
+
embeddingStorage = "vector";
|
|
702
|
+
constructor(pool, options = {}) {
|
|
703
|
+
this.pool = pool;
|
|
704
|
+
this.options = options;
|
|
705
|
+
}
|
|
706
|
+
async migrate() {
|
|
707
|
+
const dimensions = this.options.chunkEmbeddingDimensions ?? 24;
|
|
708
|
+
const preferPgVector = this.options.preferPgVector ?? true;
|
|
709
|
+
this.embeddingStorage = "array";
|
|
710
|
+
if (preferPgVector) {
|
|
711
|
+
try {
|
|
712
|
+
await this.pool.query("CREATE EXTENSION IF NOT EXISTS vector");
|
|
713
|
+
await this.pool.query("SELECT '[1,2]'::vector");
|
|
714
|
+
this.embeddingStorage = "vector";
|
|
715
|
+
}
|
|
716
|
+
catch {
|
|
717
|
+
this.embeddingStorage = "array";
|
|
718
|
+
}
|
|
719
|
+
}
|
|
720
|
+
await this.pool.query(`
|
|
721
|
+
CREATE TABLE IF NOT EXISTS tenants (
|
|
722
|
+
id TEXT PRIMARY KEY,
|
|
723
|
+
name TEXT NOT NULL,
|
|
724
|
+
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
725
|
+
);
|
|
726
|
+
`);
|
|
727
|
+
await this.pool.query(`
|
|
728
|
+
CREATE TABLE IF NOT EXISTS workspaces (
|
|
729
|
+
id TEXT PRIMARY KEY,
|
|
730
|
+
tenant_id TEXT NOT NULL REFERENCES tenants(id) ON DELETE CASCADE,
|
|
731
|
+
name TEXT NOT NULL,
|
|
732
|
+
project_root_path TEXT NOT NULL,
|
|
733
|
+
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
734
|
+
UNIQUE (tenant_id, project_root_path)
|
|
735
|
+
);
|
|
736
|
+
`);
|
|
737
|
+
await this.pool.query(`
|
|
738
|
+
CREATE TABLE IF NOT EXISTS indexes (
|
|
739
|
+
id TEXT PRIMARY KEY,
|
|
740
|
+
tenant_id TEXT NOT NULL REFERENCES tenants(id) ON DELETE CASCADE,
|
|
741
|
+
workspace_id TEXT NOT NULL REFERENCES workspaces(id) ON DELETE CASCADE,
|
|
742
|
+
version TEXT NOT NULL,
|
|
743
|
+
status TEXT NOT NULL,
|
|
744
|
+
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
745
|
+
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
746
|
+
UNIQUE (workspace_id, version)
|
|
747
|
+
);
|
|
748
|
+
`);
|
|
749
|
+
await this.pool.query(`
|
|
750
|
+
CREATE TABLE IF NOT EXISTS manifests (
|
|
751
|
+
id TEXT PRIMARY KEY,
|
|
752
|
+
index_id TEXT NOT NULL REFERENCES indexes(id) ON DELETE CASCADE,
|
|
753
|
+
object_key TEXT NOT NULL,
|
|
754
|
+
checksum TEXT NOT NULL,
|
|
755
|
+
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
756
|
+
);
|
|
757
|
+
`);
|
|
758
|
+
await this.pool.query(`
|
|
759
|
+
CREATE TABLE IF NOT EXISTS index_metadata (
|
|
760
|
+
index_id TEXT PRIMARY KEY REFERENCES indexes(id) ON DELETE CASCADE,
|
|
761
|
+
tenant_id TEXT NOT NULL,
|
|
762
|
+
embedding_provider TEXT NOT NULL,
|
|
763
|
+
embedding_model TEXT,
|
|
764
|
+
embedding_dimensions INTEGER NOT NULL,
|
|
765
|
+
embedding_version TEXT,
|
|
766
|
+
chunking_strategy TEXT NOT NULL,
|
|
767
|
+
chunking_fallback_strategy TEXT NOT NULL,
|
|
768
|
+
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
769
|
+
);
|
|
770
|
+
`);
|
|
771
|
+
await this.pool.query(`
|
|
772
|
+
CREATE TABLE IF NOT EXISTS files (
|
|
773
|
+
id TEXT PRIMARY KEY,
|
|
774
|
+
tenant_id TEXT NOT NULL REFERENCES tenants(id) ON DELETE CASCADE,
|
|
775
|
+
index_id TEXT NOT NULL REFERENCES indexes(id) ON DELETE CASCADE,
|
|
776
|
+
repo_path TEXT NOT NULL,
|
|
777
|
+
content_hash TEXT NOT NULL,
|
|
778
|
+
size_bytes INTEGER NOT NULL,
|
|
779
|
+
language TEXT,
|
|
780
|
+
warning_metadata JSONB,
|
|
781
|
+
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
782
|
+
UNIQUE (index_id, repo_path)
|
|
783
|
+
);
|
|
784
|
+
`);
|
|
785
|
+
const embeddingType = this.embeddingStorage === "vector" ? `vector(${dimensions})` : "DOUBLE PRECISION[]";
|
|
786
|
+
await this.pool.query(`
|
|
787
|
+
CREATE TABLE IF NOT EXISTS chunks (
|
|
788
|
+
id TEXT PRIMARY KEY,
|
|
789
|
+
tenant_id TEXT NOT NULL REFERENCES tenants(id) ON DELETE CASCADE,
|
|
790
|
+
file_id TEXT NOT NULL REFERENCES files(id) ON DELETE CASCADE,
|
|
791
|
+
repo_path TEXT NOT NULL,
|
|
792
|
+
start_line INTEGER NOT NULL,
|
|
793
|
+
end_line INTEGER NOT NULL,
|
|
794
|
+
text TEXT NOT NULL,
|
|
795
|
+
embedding ${embeddingType} NOT NULL,
|
|
796
|
+
generated BOOLEAN NOT NULL DEFAULT FALSE,
|
|
797
|
+
lexical_doc TEXT,
|
|
798
|
+
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
799
|
+
);
|
|
800
|
+
`);
|
|
801
|
+
await this.pool.query("CREATE INDEX IF NOT EXISTS idx_workspaces_tenant_path ON workspaces(tenant_id, project_root_path)");
|
|
802
|
+
await this.pool.query("CREATE INDEX IF NOT EXISTS idx_indexes_workspace_status ON indexes(workspace_id, status, created_at DESC)");
|
|
803
|
+
await this.pool.query("CREATE INDEX IF NOT EXISTS idx_index_metadata_tenant_index ON index_metadata(tenant_id, index_id)");
|
|
804
|
+
await this.pool.query("CREATE INDEX IF NOT EXISTS idx_files_index_path ON files(index_id, repo_path)");
|
|
805
|
+
await this.pool.query("CREATE INDEX IF NOT EXISTS idx_chunks_file_id ON chunks(file_id)");
|
|
806
|
+
await this.pool.query("CREATE INDEX IF NOT EXISTS idx_chunks_repo_path ON chunks(repo_path)");
|
|
807
|
+
await this.pool.query("CREATE INDEX IF NOT EXISTS idx_chunks_lexical_doc ON chunks(lexical_doc)");
|
|
808
|
+
if (this.embeddingStorage === "vector") {
|
|
809
|
+
try {
|
|
810
|
+
await this.pool.query("CREATE INDEX IF NOT EXISTS idx_chunks_embedding_vector ON chunks USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100)");
|
|
811
|
+
}
|
|
812
|
+
catch {
|
|
813
|
+
// Some test DBs cannot create ivfflat indexes.
|
|
814
|
+
}
|
|
815
|
+
}
|
|
816
|
+
}
|
|
817
|
+
async upsertWorkspace(input) {
|
|
818
|
+
await this.pool.query("INSERT INTO tenants (id, name) VALUES ($1, $2) ON CONFLICT (id) DO NOTHING", [input.tenant_id, input.tenant_id]);
|
|
819
|
+
await this.pool.query(`
|
|
820
|
+
INSERT INTO workspaces (id, tenant_id, name, project_root_path)
|
|
821
|
+
VALUES ($1, $2, $3, $4)
|
|
822
|
+
ON CONFLICT (id)
|
|
823
|
+
DO UPDATE SET tenant_id = EXCLUDED.tenant_id, name = EXCLUDED.name, project_root_path = EXCLUDED.project_root_path
|
|
824
|
+
`, [input.workspace_id, input.tenant_id, input.name, input.project_root_path]);
|
|
825
|
+
}
|
|
826
|
+
async resolveWorkspaceByProjectRoot(tenant_id, project_root_path) {
|
|
827
|
+
const result = await this.pool.query(`
|
|
828
|
+
SELECT id AS workspace_id, tenant_id, project_root_path, name
|
|
829
|
+
FROM workspaces
|
|
830
|
+
WHERE tenant_id = $1 AND project_root_path = $2
|
|
831
|
+
LIMIT 1
|
|
832
|
+
`, [tenant_id, project_root_path]);
|
|
833
|
+
const row = result.rows[0];
|
|
834
|
+
if (!row) {
|
|
835
|
+
return undefined;
|
|
836
|
+
}
|
|
837
|
+
return {
|
|
838
|
+
workspace_id: row.workspace_id,
|
|
839
|
+
tenant_id: row.tenant_id,
|
|
840
|
+
project_root_path: row.project_root_path,
|
|
841
|
+
name: row.name
|
|
842
|
+
};
|
|
843
|
+
}
|
|
844
|
+
async resolveWorkspaceByWorkspaceId(tenant_id, workspace_id) {
|
|
845
|
+
const result = await this.pool.query(`
|
|
846
|
+
SELECT id AS workspace_id, tenant_id, project_root_path, name
|
|
847
|
+
FROM workspaces
|
|
848
|
+
WHERE tenant_id = $1 AND id = $2
|
|
849
|
+
LIMIT 1
|
|
850
|
+
`, [tenant_id, workspace_id]);
|
|
851
|
+
const row = result.rows[0];
|
|
852
|
+
if (!row) {
|
|
853
|
+
return undefined;
|
|
854
|
+
}
|
|
855
|
+
return {
|
|
856
|
+
workspace_id: row.workspace_id,
|
|
857
|
+
tenant_id: row.tenant_id,
|
|
858
|
+
project_root_path: row.project_root_path,
|
|
859
|
+
name: row.name
|
|
860
|
+
};
|
|
861
|
+
}
|
|
862
|
+
async createIndexVersion(input) {
|
|
863
|
+
const status = input.status ?? "indexing";
|
|
864
|
+
const indexId = `idx_${randomUUID()}`;
|
|
865
|
+
const result = await this.pool.query(`
|
|
866
|
+
INSERT INTO indexes (id, tenant_id, workspace_id, version, status)
|
|
867
|
+
VALUES ($1, $2, $3, $4, $5)
|
|
868
|
+
RETURNING id AS index_id, workspace_id, tenant_id, version AS index_version, status, created_at, updated_at
|
|
869
|
+
`, [indexId, input.tenant_id, input.workspace_id, input.index_version, status]);
|
|
870
|
+
const row = result.rows[0];
|
|
871
|
+
return {
|
|
872
|
+
...row,
|
|
873
|
+
created_at: toIsoString(row.created_at),
|
|
874
|
+
updated_at: toIsoString(row.updated_at)
|
|
875
|
+
};
|
|
876
|
+
}
|
|
877
|
+
async markIndexStatus(input) {
|
|
878
|
+
await this.pool.query(`
|
|
879
|
+
UPDATE indexes
|
|
880
|
+
SET status = $4, updated_at = NOW()
|
|
881
|
+
WHERE id = $1 AND tenant_id = $2 AND workspace_id = $3
|
|
882
|
+
`, [input.index_id, input.tenant_id, input.workspace_id, input.status]);
|
|
883
|
+
}
|
|
884
|
+
async getIndexByVersion(input) {
|
|
885
|
+
const result = await this.pool.query(`
|
|
886
|
+
SELECT id AS index_id, workspace_id, tenant_id, version AS index_version, status, created_at, updated_at
|
|
887
|
+
FROM indexes
|
|
888
|
+
WHERE tenant_id = $1 AND workspace_id = $2 AND version = $3
|
|
889
|
+
LIMIT 1
|
|
890
|
+
`, [input.tenant_id, input.workspace_id, input.index_version]);
|
|
891
|
+
const row = result.rows[0];
|
|
892
|
+
if (!row) {
|
|
893
|
+
return undefined;
|
|
894
|
+
}
|
|
895
|
+
return {
|
|
896
|
+
...row,
|
|
897
|
+
created_at: toIsoString(row.created_at),
|
|
898
|
+
updated_at: toIsoString(row.updated_at)
|
|
899
|
+
};
|
|
900
|
+
}
|
|
901
|
+
async resetIndexContent(input) {
|
|
902
|
+
await runTx(this.pool, async (client) => {
|
|
903
|
+
await client.query(`
|
|
904
|
+
DELETE FROM chunks
|
|
905
|
+
WHERE tenant_id = $1 AND file_id IN (
|
|
906
|
+
SELECT id FROM files WHERE tenant_id = $1 AND index_id = $2
|
|
907
|
+
)
|
|
908
|
+
`, [input.tenant_id, input.index_id]);
|
|
909
|
+
await client.query("DELETE FROM files WHERE tenant_id = $1 AND index_id = $2", [input.tenant_id, input.index_id]);
|
|
910
|
+
await client.query("DELETE FROM manifests WHERE index_id = $1", [input.index_id]);
|
|
911
|
+
});
|
|
912
|
+
}
|
|
913
|
+
async getLatestReadyIndex(input) {
|
|
914
|
+
const result = await this.pool.query(`
|
|
915
|
+
SELECT id AS index_id, workspace_id, tenant_id, version AS index_version, status, created_at, updated_at
|
|
916
|
+
FROM indexes
|
|
917
|
+
WHERE tenant_id = $1 AND workspace_id = $2 AND status = 'ready'
|
|
918
|
+
ORDER BY created_at DESC
|
|
919
|
+
LIMIT 1
|
|
920
|
+
`, [input.tenant_id, input.workspace_id]);
|
|
921
|
+
const row = result.rows[0];
|
|
922
|
+
if (!row) {
|
|
923
|
+
return undefined;
|
|
924
|
+
}
|
|
925
|
+
return {
|
|
926
|
+
...row,
|
|
927
|
+
created_at: toIsoString(row.created_at),
|
|
928
|
+
updated_at: toIsoString(row.updated_at)
|
|
929
|
+
};
|
|
930
|
+
}
|
|
931
|
+
async getFilesByIndex(input) {
|
|
932
|
+
const result = await this.pool.query(`
|
|
933
|
+
SELECT id AS file_id, repo_path, content_hash, language
|
|
934
|
+
FROM files
|
|
935
|
+
WHERE tenant_id = $1 AND index_id = $2
|
|
936
|
+
`, [input.tenant_id, input.index_id]);
|
|
937
|
+
return result.rows.map((row) => ({
|
|
938
|
+
file_id: row.file_id,
|
|
939
|
+
repo_path: row.repo_path,
|
|
940
|
+
content_hash: row.content_hash,
|
|
941
|
+
...(row.language ? { language: row.language } : {})
|
|
942
|
+
}));
|
|
943
|
+
}
|
|
944
|
+
async copyFileFromIndex(input) {
|
|
945
|
+
await runTx(this.pool, async (client) => {
|
|
946
|
+
const sourceFileResult = await client.query(`
|
|
947
|
+
SELECT id AS file_id, repo_path, content_hash, size_bytes, language, warning_metadata, updated_at
|
|
948
|
+
FROM files
|
|
949
|
+
WHERE tenant_id = $1 AND index_id = $2 AND repo_path = $3
|
|
950
|
+
LIMIT 1
|
|
951
|
+
`, [input.tenant_id, input.source_index_id, input.repo_path]);
|
|
952
|
+
const sourceFile = sourceFileResult.rows[0];
|
|
953
|
+
if (!sourceFile) {
|
|
954
|
+
return;
|
|
955
|
+
}
|
|
956
|
+
const targetFileId = `fil_${randomUUID()}`;
|
|
957
|
+
await client.query(`
|
|
958
|
+
INSERT INTO files (id, tenant_id, index_id, repo_path, content_hash, size_bytes, language, warning_metadata, updated_at)
|
|
959
|
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8::jsonb, $9::timestamptz)
|
|
960
|
+
`, [
|
|
961
|
+
targetFileId,
|
|
962
|
+
input.tenant_id,
|
|
963
|
+
input.target_index_id,
|
|
964
|
+
sourceFile.repo_path,
|
|
965
|
+
sourceFile.content_hash,
|
|
966
|
+
sourceFile.size_bytes,
|
|
967
|
+
sourceFile.language,
|
|
968
|
+
JSON.stringify(sourceFile.warning_metadata ?? null),
|
|
969
|
+
toIsoString(sourceFile.updated_at)
|
|
970
|
+
]);
|
|
971
|
+
const chunkRows = await client.query(`
|
|
972
|
+
SELECT repo_path, start_line, end_line, text, embedding, generated, updated_at
|
|
973
|
+
FROM chunks
|
|
974
|
+
WHERE tenant_id = $1 AND file_id = $2
|
|
975
|
+
`, [input.tenant_id, sourceFile.file_id]);
|
|
976
|
+
for (const chunk of chunkRows.rows) {
|
|
977
|
+
const embedding = parseEmbedding(chunk.embedding);
|
|
978
|
+
await this.insertChunk(client, {
|
|
979
|
+
tenant_id: input.tenant_id,
|
|
980
|
+
file_id: targetFileId,
|
|
981
|
+
repo_path: chunk.repo_path,
|
|
982
|
+
start_line: chunk.start_line,
|
|
983
|
+
end_line: chunk.end_line,
|
|
984
|
+
snippet: chunk.text,
|
|
985
|
+
embedding,
|
|
986
|
+
generated: chunk.generated,
|
|
987
|
+
updated_at: toIsoString(chunk.updated_at)
|
|
988
|
+
});
|
|
989
|
+
}
|
|
990
|
+
});
|
|
991
|
+
}
|
|
992
|
+
async upsertFile(input) {
|
|
993
|
+
const fileId = `fil_${randomUUID()}`;
|
|
994
|
+
const result = await this.pool.query(`
|
|
995
|
+
INSERT INTO files (id, tenant_id, index_id, repo_path, content_hash, size_bytes, language, warning_metadata, updated_at)
|
|
996
|
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8::jsonb, $9::timestamptz)
|
|
997
|
+
ON CONFLICT (index_id, repo_path)
|
|
998
|
+
DO UPDATE SET
|
|
999
|
+
content_hash = EXCLUDED.content_hash,
|
|
1000
|
+
size_bytes = EXCLUDED.size_bytes,
|
|
1001
|
+
language = EXCLUDED.language,
|
|
1002
|
+
warning_metadata = EXCLUDED.warning_metadata,
|
|
1003
|
+
updated_at = EXCLUDED.updated_at
|
|
1004
|
+
RETURNING id AS file_id
|
|
1005
|
+
`, [
|
|
1006
|
+
fileId,
|
|
1007
|
+
input.tenant_id,
|
|
1008
|
+
input.index_id,
|
|
1009
|
+
input.repo_path,
|
|
1010
|
+
input.content_hash,
|
|
1011
|
+
input.size_bytes,
|
|
1012
|
+
input.language ?? null,
|
|
1013
|
+
JSON.stringify(input.warning_metadata ?? null),
|
|
1014
|
+
input.updated_at ?? new Date().toISOString()
|
|
1015
|
+
]);
|
|
1016
|
+
return result.rows[0];
|
|
1017
|
+
}
|
|
1018
|
+
async replaceFileChunks(input) {
|
|
1019
|
+
await runTx(this.pool, async (client) => {
|
|
1020
|
+
await client.query("DELETE FROM chunks WHERE tenant_id = $1 AND file_id = $2", [input.tenant_id, input.file_id]);
|
|
1021
|
+
for (const chunk of input.chunks) {
|
|
1022
|
+
await this.insertChunk(client, {
|
|
1023
|
+
tenant_id: input.tenant_id,
|
|
1024
|
+
file_id: input.file_id,
|
|
1025
|
+
repo_path: input.repo_path,
|
|
1026
|
+
start_line: chunk.start_line,
|
|
1027
|
+
end_line: chunk.end_line,
|
|
1028
|
+
snippet: chunk.snippet,
|
|
1029
|
+
embedding: chunk.embedding,
|
|
1030
|
+
generated: chunk.generated,
|
|
1031
|
+
updated_at: chunk.updated_at ?? new Date().toISOString()
|
|
1032
|
+
});
|
|
1033
|
+
}
|
|
1034
|
+
});
|
|
1035
|
+
}
|
|
1036
|
+
async saveManifest(input) {
|
|
1037
|
+
await this.pool.query(`
|
|
1038
|
+
INSERT INTO manifests (id, index_id, object_key, checksum)
|
|
1039
|
+
VALUES ($1, $2, $3, $4)
|
|
1040
|
+
`, [`mft_${randomUUID()}`, input.index_id, input.object_key, input.checksum]);
|
|
1041
|
+
}
|
|
1042
|
+
async saveIndexMetadata(input) {
|
|
1043
|
+
await this.pool.query(`
|
|
1044
|
+
INSERT INTO index_metadata (
|
|
1045
|
+
index_id,
|
|
1046
|
+
tenant_id,
|
|
1047
|
+
embedding_provider,
|
|
1048
|
+
embedding_model,
|
|
1049
|
+
embedding_dimensions,
|
|
1050
|
+
embedding_version,
|
|
1051
|
+
chunking_strategy,
|
|
1052
|
+
chunking_fallback_strategy
|
|
1053
|
+
)
|
|
1054
|
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
|
|
1055
|
+
ON CONFLICT (index_id)
|
|
1056
|
+
DO UPDATE SET
|
|
1057
|
+
tenant_id = EXCLUDED.tenant_id,
|
|
1058
|
+
embedding_provider = EXCLUDED.embedding_provider,
|
|
1059
|
+
embedding_model = EXCLUDED.embedding_model,
|
|
1060
|
+
embedding_dimensions = EXCLUDED.embedding_dimensions,
|
|
1061
|
+
embedding_version = EXCLUDED.embedding_version,
|
|
1062
|
+
chunking_strategy = EXCLUDED.chunking_strategy,
|
|
1063
|
+
chunking_fallback_strategy = EXCLUDED.chunking_fallback_strategy
|
|
1064
|
+
`, [
|
|
1065
|
+
input.index_id,
|
|
1066
|
+
input.tenant_id,
|
|
1067
|
+
input.embedding_provider,
|
|
1068
|
+
input.embedding_model ?? null,
|
|
1069
|
+
input.embedding_dimensions,
|
|
1070
|
+
input.embedding_version ?? null,
|
|
1071
|
+
input.chunking_strategy,
|
|
1072
|
+
input.chunking_fallback_strategy
|
|
1073
|
+
]);
|
|
1074
|
+
}
|
|
1075
|
+
async getIndexMetadata(input) {
|
|
1076
|
+
const result = await this.pool.query(`
|
|
1077
|
+
SELECT
|
|
1078
|
+
embedding_provider,
|
|
1079
|
+
embedding_model,
|
|
1080
|
+
embedding_dimensions,
|
|
1081
|
+
embedding_version,
|
|
1082
|
+
chunking_strategy,
|
|
1083
|
+
chunking_fallback_strategy,
|
|
1084
|
+
created_at
|
|
1085
|
+
FROM index_metadata
|
|
1086
|
+
WHERE tenant_id = $1 AND index_id = $2
|
|
1087
|
+
LIMIT 1
|
|
1088
|
+
`, [input.tenant_id, input.index_id]);
|
|
1089
|
+
const row = result.rows[0];
|
|
1090
|
+
if (!row) {
|
|
1091
|
+
return undefined;
|
|
1092
|
+
}
|
|
1093
|
+
return {
|
|
1094
|
+
embedding_provider: row.embedding_provider,
|
|
1095
|
+
...(row.embedding_model ? { embedding_model: row.embedding_model } : {}),
|
|
1096
|
+
embedding_dimensions: row.embedding_dimensions,
|
|
1097
|
+
...(row.embedding_version ? { embedding_version: row.embedding_version } : {}),
|
|
1098
|
+
chunking_strategy: row.chunking_strategy,
|
|
1099
|
+
chunking_fallback_strategy: row.chunking_fallback_strategy,
|
|
1100
|
+
created_at: toIsoString(row.created_at)
|
|
1101
|
+
};
|
|
1102
|
+
}
|
|
1103
|
+
async listChunksByIndex(input) {
|
|
1104
|
+
const params = [input.tenant_id, input.index_id];
|
|
1105
|
+
const where = ["f.tenant_id = $1", "f.index_id = $2"];
|
|
1106
|
+
if (input.filters?.language) {
|
|
1107
|
+
params.push(input.filters.language);
|
|
1108
|
+
where.push(`f.language = $${params.length}`);
|
|
1109
|
+
}
|
|
1110
|
+
if (input.filters?.path_prefix) {
|
|
1111
|
+
params.push(`${input.filters.path_prefix}%`);
|
|
1112
|
+
where.push(`c.repo_path LIKE $${params.length}`);
|
|
1113
|
+
}
|
|
1114
|
+
if (input.filters?.glob) {
|
|
1115
|
+
const regex = this.globToPostgresRegex(input.filters.glob);
|
|
1116
|
+
params.push(regex);
|
|
1117
|
+
where.push(`c.repo_path ~ $${params.length}`);
|
|
1118
|
+
}
|
|
1119
|
+
const result = await this.pool.query(`
|
|
1120
|
+
SELECT
|
|
1121
|
+
c.id AS chunk_id,
|
|
1122
|
+
c.file_id,
|
|
1123
|
+
c.repo_path AS path,
|
|
1124
|
+
c.start_line,
|
|
1125
|
+
c.end_line,
|
|
1126
|
+
c.text AS snippet,
|
|
1127
|
+
f.language,
|
|
1128
|
+
c.generated,
|
|
1129
|
+
c.updated_at,
|
|
1130
|
+
c.embedding
|
|
1131
|
+
FROM chunks c
|
|
1132
|
+
INNER JOIN files f ON f.id = c.file_id
|
|
1133
|
+
WHERE ${where.join(" AND ")}
|
|
1134
|
+
`, params);
|
|
1135
|
+
return result.rows.map((row) => ({
|
|
1136
|
+
chunk_id: row.chunk_id,
|
|
1137
|
+
file_id: row.file_id,
|
|
1138
|
+
path: row.path,
|
|
1139
|
+
start_line: row.start_line,
|
|
1140
|
+
end_line: row.end_line,
|
|
1141
|
+
snippet: row.snippet,
|
|
1142
|
+
...(row.language ? { language: row.language } : {}),
|
|
1143
|
+
...(row.generated ? { generated: row.generated } : {}),
|
|
1144
|
+
updated_at: toIsoString(row.updated_at),
|
|
1145
|
+
embedding: parseEmbedding(row.embedding)
|
|
1146
|
+
}));
|
|
1147
|
+
}
|
|
1148
|
+
async rankChunksByIndex(input) {
|
|
1149
|
+
const weights = input.candidate_weights ?? DEFAULT_CANDIDATE_SCORE_WEIGHTS;
|
|
1150
|
+
const params = [input.tenant_id, input.index_id];
|
|
1151
|
+
const where = ["f.tenant_id = $1", "f.index_id = $2"];
|
|
1152
|
+
if (input.filters?.language) {
|
|
1153
|
+
params.push(input.filters.language);
|
|
1154
|
+
where.push(`f.language = $${params.length}`);
|
|
1155
|
+
}
|
|
1156
|
+
if (input.filters?.path_prefix) {
|
|
1157
|
+
params.push(`${input.filters.path_prefix}%`);
|
|
1158
|
+
where.push(`c.repo_path LIKE $${params.length}`);
|
|
1159
|
+
}
|
|
1160
|
+
if (input.filters?.glob) {
|
|
1161
|
+
const regex = this.globToPostgresRegex(input.filters.glob);
|
|
1162
|
+
params.push(regex);
|
|
1163
|
+
where.push(`c.repo_path ~ $${params.length}`);
|
|
1164
|
+
}
|
|
1165
|
+
if (this.embeddingStorage === "vector") {
|
|
1166
|
+
const normalizedTokens = input.query_tokens.map((token) => token.toLowerCase()).filter((token) => token.length > 0);
|
|
1167
|
+
params.push(normalizedTokens.length > 0 ? normalizedTokens : [""]);
|
|
1168
|
+
const tokenArrayIndex = params.length;
|
|
1169
|
+
params.push(toVectorLiteral(input.query_embedding));
|
|
1170
|
+
const queryVectorIndex = params.length;
|
|
1171
|
+
params.push(Math.max(input.top_k * 4, input.top_k));
|
|
1172
|
+
const limitIndex = params.length;
|
|
1173
|
+
params.push(weights.lexical_weight);
|
|
1174
|
+
const lexicalWeightIndex = params.length;
|
|
1175
|
+
params.push(weights.vector_weight);
|
|
1176
|
+
const vectorWeightIndex = params.length;
|
|
1177
|
+
params.push(weights.path_match_boost);
|
|
1178
|
+
const pathMatchBoostIndex = params.length;
|
|
1179
|
+
params.push(weights.recency_boost);
|
|
1180
|
+
const recencyBoostIndex = params.length;
|
|
1181
|
+
params.push(weights.generated_penalty);
|
|
1182
|
+
const generatedPenaltyIndex = params.length;
|
|
1183
|
+
const result = await this.pool.query(`
|
|
1184
|
+
WITH scored AS (
|
|
1185
|
+
SELECT
|
|
1186
|
+
c.id AS chunk_id,
|
|
1187
|
+
c.file_id,
|
|
1188
|
+
c.repo_path AS path,
|
|
1189
|
+
c.start_line,
|
|
1190
|
+
c.end_line,
|
|
1191
|
+
c.text AS snippet,
|
|
1192
|
+
f.language,
|
|
1193
|
+
c.generated,
|
|
1194
|
+
c.updated_at,
|
|
1195
|
+
(
|
|
1196
|
+
SELECT COALESCE(COUNT(*), 0)
|
|
1197
|
+
FROM unnest($${tokenArrayIndex}::text[]) AS token
|
|
1198
|
+
WHERE token <> '' AND (
|
|
1199
|
+
position(token in lower(c.repo_path)) > 0
|
|
1200
|
+
OR position(token in lower(c.lexical_doc)) > 0
|
|
1201
|
+
)
|
|
1202
|
+
)::double precision / GREATEST(array_length($${tokenArrayIndex}::text[], 1), 1)::double precision AS lexical_score,
|
|
1203
|
+
(1 - (c.embedding <=> $${queryVectorIndex}::vector))::double precision AS vector_score,
|
|
1204
|
+
EXISTS (
|
|
1205
|
+
SELECT 1
|
|
1206
|
+
FROM unnest($${tokenArrayIndex}::text[]) AS token
|
|
1207
|
+
WHERE token <> '' AND position(token in lower(c.repo_path)) > 0
|
|
1208
|
+
) AS path_match,
|
|
1209
|
+
(c.updated_at >= NOW() - INTERVAL '14 days') AS recency_boosted
|
|
1210
|
+
FROM chunks c
|
|
1211
|
+
INNER JOIN files f ON f.id = c.file_id
|
|
1212
|
+
WHERE ${where.join(" AND ")}
|
|
1213
|
+
)
|
|
1214
|
+
SELECT
|
|
1215
|
+
chunk_id,
|
|
1216
|
+
file_id,
|
|
1217
|
+
path,
|
|
1218
|
+
start_line,
|
|
1219
|
+
end_line,
|
|
1220
|
+
snippet,
|
|
1221
|
+
language,
|
|
1222
|
+
generated,
|
|
1223
|
+
updated_at,
|
|
1224
|
+
(lexical_score * $${lexicalWeightIndex})
|
|
1225
|
+
+ (vector_score * $${vectorWeightIndex})
|
|
1226
|
+
+ (CASE WHEN path_match THEN $${pathMatchBoostIndex} ELSE 0 END)
|
|
1227
|
+
+ (CASE WHEN recency_boosted THEN $${recencyBoostIndex} ELSE 0 END)
|
|
1228
|
+
+ (CASE WHEN generated THEN -$${generatedPenaltyIndex} ELSE 0 END) AS score,
|
|
1229
|
+
lexical_score,
|
|
1230
|
+
vector_score,
|
|
1231
|
+
path_match,
|
|
1232
|
+
recency_boosted
|
|
1233
|
+
FROM scored
|
|
1234
|
+
ORDER BY score DESC
|
|
1235
|
+
LIMIT $${limitIndex}
|
|
1236
|
+
`, params);
|
|
1237
|
+
return result.rows.map((row) => ({
|
|
1238
|
+
chunk_id: row.chunk_id,
|
|
1239
|
+
file_id: row.file_id,
|
|
1240
|
+
path: row.path,
|
|
1241
|
+
start_line: row.start_line,
|
|
1242
|
+
end_line: row.end_line,
|
|
1243
|
+
snippet: row.snippet,
|
|
1244
|
+
...(row.language ? { language: row.language } : {}),
|
|
1245
|
+
...(row.generated ? { generated: row.generated } : {}),
|
|
1246
|
+
updated_at: toIsoString(row.updated_at),
|
|
1247
|
+
score: Number(row.score),
|
|
1248
|
+
lexical_score: Number(row.lexical_score),
|
|
1249
|
+
vector_score: Number(row.vector_score),
|
|
1250
|
+
path_match: row.path_match,
|
|
1251
|
+
recency_boosted: row.recency_boosted
|
|
1252
|
+
}));
|
|
1253
|
+
}
|
|
1254
|
+
const rows = await this.listChunksByIndex({
|
|
1255
|
+
tenant_id: input.tenant_id,
|
|
1256
|
+
index_id: input.index_id,
|
|
1257
|
+
filters: input.filters
|
|
1258
|
+
});
|
|
1259
|
+
return rows
|
|
1260
|
+
.map((row) => {
|
|
1261
|
+
const lexical = lexicalScoreForRanking(input.query_tokens, `${row.path}\n${row.snippet}`);
|
|
1262
|
+
const vector = cosineSimilarity(input.query_embedding, row.embedding);
|
|
1263
|
+
const pathMatch = input.query_tokens.some((token) => row.path.toLowerCase().includes(token));
|
|
1264
|
+
const recencyBoost = Date.now() - new Date(row.updated_at).getTime() < 14 * 24 * 3600 * 1_000;
|
|
1265
|
+
let score = lexical * weights.lexical_weight + vector * weights.vector_weight;
|
|
1266
|
+
if (pathMatch) {
|
|
1267
|
+
score += weights.path_match_boost;
|
|
1268
|
+
}
|
|
1269
|
+
if (recencyBoost) {
|
|
1270
|
+
score += weights.recency_boost;
|
|
1271
|
+
}
|
|
1272
|
+
if (row.generated) {
|
|
1273
|
+
score -= weights.generated_penalty;
|
|
1274
|
+
}
|
|
1275
|
+
return {
|
|
1276
|
+
chunk_id: row.chunk_id,
|
|
1277
|
+
file_id: row.file_id,
|
|
1278
|
+
path: row.path,
|
|
1279
|
+
start_line: row.start_line,
|
|
1280
|
+
end_line: row.end_line,
|
|
1281
|
+
snippet: row.snippet,
|
|
1282
|
+
...(row.language ? { language: row.language } : {}),
|
|
1283
|
+
...(row.generated ? { generated: true } : {}),
|
|
1284
|
+
updated_at: row.updated_at,
|
|
1285
|
+
score,
|
|
1286
|
+
lexical_score: lexical,
|
|
1287
|
+
vector_score: vector,
|
|
1288
|
+
path_match: pathMatch,
|
|
1289
|
+
recency_boosted: recencyBoost
|
|
1290
|
+
};
|
|
1291
|
+
})
|
|
1292
|
+
.sort((a, b) => b.score - a.score)
|
|
1293
|
+
.slice(0, Math.max(input.top_k * 4, input.top_k));
|
|
1294
|
+
}
|
|
1295
|
+
async insertChunk(client, input) {
|
|
1296
|
+
const embedding = this.embeddingStorage === "vector" ? toVectorLiteral(input.embedding) : input.embedding;
|
|
1297
|
+
const cast = this.embeddingStorage === "vector" ? "::vector" : "::double precision[]";
|
|
1298
|
+
await client.query(`
|
|
1299
|
+
INSERT INTO chunks (id, tenant_id, file_id, repo_path, start_line, end_line, text, embedding, generated, lexical_doc, updated_at)
|
|
1300
|
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8${cast}, $9, $7, $10::timestamptz)
|
|
1301
|
+
`, [
|
|
1302
|
+
`chk_${randomUUID()}`,
|
|
1303
|
+
input.tenant_id,
|
|
1304
|
+
input.file_id,
|
|
1305
|
+
input.repo_path,
|
|
1306
|
+
input.start_line,
|
|
1307
|
+
input.end_line,
|
|
1308
|
+
input.snippet,
|
|
1309
|
+
embedding,
|
|
1310
|
+
input.generated ?? false,
|
|
1311
|
+
input.updated_at
|
|
1312
|
+
]);
|
|
1313
|
+
}
|
|
1314
|
+
globToPostgresRegex(glob) {
|
|
1315
|
+
const escaped = glob.replace(/[.+^${}()|[\]\\]/g, "\\$&");
|
|
1316
|
+
return `^${escaped.replace(/\*/g, ".*").replace(/\?/g, ".")}$`;
|
|
1317
|
+
}
|
|
1318
|
+
}
|
|
1319
|
+
export function createPostgresPool(databaseUrl) {
|
|
1320
|
+
return new Pool({ connectionString: databaseUrl });
|
|
1321
|
+
}
|
|
1322
|
+
export class RedisQueryCache {
|
|
1323
|
+
redis;
|
|
1324
|
+
keyPrefix;
|
|
1325
|
+
constructor(redis, options) {
|
|
1326
|
+
this.redis = redis;
|
|
1327
|
+
this.keyPrefix = options?.keyPrefix ?? "rce:query_cache";
|
|
1328
|
+
}
|
|
1329
|
+
async get(cacheKey) {
|
|
1330
|
+
const raw = await this.redis.get(this.wrapKey(cacheKey));
|
|
1331
|
+
if (!raw) {
|
|
1332
|
+
return undefined;
|
|
1333
|
+
}
|
|
1334
|
+
return JSON.parse(raw);
|
|
1335
|
+
}
|
|
1336
|
+
async set(cacheKey, value, ttlSeconds = 60) {
|
|
1337
|
+
await this.redis.set(this.wrapKey(cacheKey), JSON.stringify(value), "EX", ttlSeconds);
|
|
1338
|
+
}
|
|
1339
|
+
async invalidateWorkspace(workspace_id) {
|
|
1340
|
+
const keys = await this.redis.keys(`${this.keyPrefix}:${workspace_id}:*`);
|
|
1341
|
+
if (keys.length > 0) {
|
|
1342
|
+
await this.redis.del(...keys);
|
|
1343
|
+
}
|
|
1344
|
+
}
|
|
1345
|
+
wrapKey(cacheKey) {
|
|
1346
|
+
return `${this.keyPrefix}:${cacheKey}`;
|
|
1347
|
+
}
|
|
1348
|
+
}
|
|
1349
|
+
export class InMemoryQueryCache {
|
|
1350
|
+
cache = new Map();
|
|
1351
|
+
async get(cacheKey) {
|
|
1352
|
+
const hit = this.cache.get(cacheKey);
|
|
1353
|
+
if (!hit || hit.expires_at <= Date.now()) {
|
|
1354
|
+
return undefined;
|
|
1355
|
+
}
|
|
1356
|
+
return hit.value;
|
|
1357
|
+
}
|
|
1358
|
+
async set(cacheKey, value, ttlSeconds = 60) {
|
|
1359
|
+
this.cache.set(cacheKey, {
|
|
1360
|
+
value,
|
|
1361
|
+
expires_at: Date.now() + ttlSeconds * 1_000
|
|
1362
|
+
});
|
|
1363
|
+
}
|
|
1364
|
+
async invalidateWorkspace(workspace_id) {
|
|
1365
|
+
for (const key of this.cache.keys()) {
|
|
1366
|
+
if (key.startsWith(`${workspace_id}:`)) {
|
|
1367
|
+
this.cache.delete(key);
|
|
1368
|
+
}
|
|
1369
|
+
}
|
|
1370
|
+
}
|
|
1371
|
+
}
|
|
1372
|
+
function workspaceIdFromCacheKey(cacheKey) {
|
|
1373
|
+
const split = cacheKey.split(":", 1)[0];
|
|
1374
|
+
return split && split.length > 0 ? split : "unknown";
|
|
1375
|
+
}
|
|
1376
|
+
export class SqliteQueryCache {
|
|
1377
|
+
dbPath;
|
|
1378
|
+
db;
|
|
1379
|
+
constructor(dbPath) {
|
|
1380
|
+
this.dbPath = dbPath;
|
|
1381
|
+
ensureSqliteParent(dbPath);
|
|
1382
|
+
this.db = openSqliteDatabase(dbPath);
|
|
1383
|
+
this.db.exec("PRAGMA journal_mode = WAL;");
|
|
1384
|
+
this.db.exec(`
|
|
1385
|
+
CREATE TABLE IF NOT EXISTS query_cache (
|
|
1386
|
+
cache_key TEXT PRIMARY KEY,
|
|
1387
|
+
workspace_id TEXT NOT NULL,
|
|
1388
|
+
value TEXT NOT NULL,
|
|
1389
|
+
expires_at INTEGER NOT NULL
|
|
1390
|
+
);
|
|
1391
|
+
CREATE INDEX IF NOT EXISTS idx_sqlite_query_cache_workspace ON query_cache(workspace_id);
|
|
1392
|
+
CREATE INDEX IF NOT EXISTS idx_sqlite_query_cache_expires ON query_cache(expires_at);
|
|
1393
|
+
`);
|
|
1394
|
+
}
|
|
1395
|
+
close() {
|
|
1396
|
+
this.db.close();
|
|
1397
|
+
}
|
|
1398
|
+
async get(cacheKey) {
|
|
1399
|
+
const row = this.db
|
|
1400
|
+
.prepare("SELECT value, expires_at FROM query_cache WHERE cache_key = ? LIMIT 1")
|
|
1401
|
+
.get(cacheKey);
|
|
1402
|
+
if (!row) {
|
|
1403
|
+
return undefined;
|
|
1404
|
+
}
|
|
1405
|
+
if (row.expires_at <= Date.now()) {
|
|
1406
|
+
this.db.prepare("DELETE FROM query_cache WHERE cache_key = ?").run(cacheKey);
|
|
1407
|
+
return undefined;
|
|
1408
|
+
}
|
|
1409
|
+
return JSON.parse(row.value);
|
|
1410
|
+
}
|
|
1411
|
+
async set(cacheKey, value, ttlSeconds = 60) {
|
|
1412
|
+
const expires_at = Date.now() + ttlSeconds * 1_000;
|
|
1413
|
+
this.db
|
|
1414
|
+
.prepare(`
|
|
1415
|
+
INSERT INTO query_cache (cache_key, workspace_id, value, expires_at)
|
|
1416
|
+
VALUES (?, ?, ?, ?)
|
|
1417
|
+
ON CONFLICT (cache_key)
|
|
1418
|
+
DO UPDATE SET workspace_id = excluded.workspace_id, value = excluded.value, expires_at = excluded.expires_at
|
|
1419
|
+
`)
|
|
1420
|
+
.run(cacheKey, workspaceIdFromCacheKey(cacheKey), JSON.stringify(value), expires_at);
|
|
1421
|
+
}
|
|
1422
|
+
async invalidateWorkspace(workspace_id) {
|
|
1423
|
+
this.db.prepare("DELETE FROM query_cache WHERE workspace_id = ?").run(workspace_id);
|
|
1424
|
+
}
|
|
1425
|
+
}
|
|
1426
|
+
export class RedisIndexJobQueue {
|
|
1427
|
+
redis;
|
|
1428
|
+
pendingKey;
|
|
1429
|
+
processingKey;
|
|
1430
|
+
deadLetterKey;
|
|
1431
|
+
maxAttempts;
|
|
1432
|
+
leasePrefix;
|
|
1433
|
+
claimLeaseSeconds;
|
|
1434
|
+
reconnectRetries;
|
|
1435
|
+
reconnectDelayMs;
|
|
1436
|
+
constructor(redis, options) {
|
|
1437
|
+
this.redis = redis;
|
|
1438
|
+
const prefix = options?.keyPrefix ?? "rce:index_jobs";
|
|
1439
|
+
this.pendingKey = `${prefix}:pending`;
|
|
1440
|
+
this.processingKey = `${prefix}:processing`;
|
|
1441
|
+
this.deadLetterKey = `${prefix}:dead`;
|
|
1442
|
+
this.leasePrefix = `${prefix}:lease`;
|
|
1443
|
+
this.maxAttempts = options?.maxAttempts ?? 3;
|
|
1444
|
+
this.claimLeaseSeconds = Math.max(5, options?.claimLeaseSeconds ?? 120);
|
|
1445
|
+
this.reconnectRetries = Math.max(0, options?.reconnectRetries ?? 1);
|
|
1446
|
+
this.reconnectDelayMs = Math.max(0, options?.reconnectDelayMs ?? 100);
|
|
1447
|
+
}
|
|
1448
|
+
async enqueue(job) {
|
|
1449
|
+
const payload = {
|
|
1450
|
+
job_id: job.job_id ?? `job_${randomUUID()}`,
|
|
1451
|
+
tenant_id: job.tenant_id,
|
|
1452
|
+
workspace_id: job.workspace_id,
|
|
1453
|
+
index_version: job.index_version,
|
|
1454
|
+
manifest_key: job.manifest_key,
|
|
1455
|
+
attempts: 0,
|
|
1456
|
+
enqueued_at: new Date().toISOString()
|
|
1457
|
+
};
|
|
1458
|
+
await this.runRedisOperation(() => this.redis.rpush(this.pendingKey, JSON.stringify(payload)));
|
|
1459
|
+
return payload;
|
|
1460
|
+
}
|
|
1461
|
+
async claimNext(timeoutSeconds = 1) {
|
|
1462
|
+
const raw = await this.runRedisOperation(() => this.redis.brpoplpush(this.pendingKey, this.processingKey, timeoutSeconds));
|
|
1463
|
+
if (!raw) {
|
|
1464
|
+
return undefined;
|
|
1465
|
+
}
|
|
1466
|
+
const payload = JSON.parse(raw);
|
|
1467
|
+
const claimedAt = nowIso();
|
|
1468
|
+
payload.claimed_at = claimedAt;
|
|
1469
|
+
const claimedRaw = JSON.stringify(payload);
|
|
1470
|
+
if (claimedRaw !== raw) {
|
|
1471
|
+
await this.runRedisOperation(() => this.redis.lrem(this.processingKey, 1, raw));
|
|
1472
|
+
await this.runRedisOperation(() => this.redis.rpush(this.processingKey, claimedRaw));
|
|
1473
|
+
}
|
|
1474
|
+
await this.runRedisOperation(() => this.redis.set(this.leaseKey(payload.job_id), claimedAt, "EX", this.claimLeaseSeconds));
|
|
1475
|
+
return { raw: claimedRaw, payload };
|
|
1476
|
+
}
|
|
1477
|
+
async ack(claimed) {
|
|
1478
|
+
await this.runRedisOperation(() => this.redis.lrem(this.processingKey, 1, claimed.raw));
|
|
1479
|
+
await this.runRedisOperation(() => this.redis.del(this.leaseKey(claimed.payload.job_id)));
|
|
1480
|
+
}
|
|
1481
|
+
async retryOrDeadLetter(claimed, errorMessage) {
|
|
1482
|
+
await this.runRedisOperation(() => this.redis.lrem(this.processingKey, 1, claimed.raw));
|
|
1483
|
+
await this.runRedisOperation(() => this.redis.del(this.leaseKey(claimed.payload.job_id)));
|
|
1484
|
+
const attempts = claimed.payload.attempts + 1;
|
|
1485
|
+
if (attempts >= this.maxAttempts) {
|
|
1486
|
+
const failed = {
|
|
1487
|
+
...claimed.payload,
|
|
1488
|
+
attempts,
|
|
1489
|
+
failed_at: new Date().toISOString(),
|
|
1490
|
+
last_error: errorMessage
|
|
1491
|
+
};
|
|
1492
|
+
await this.runRedisOperation(() => this.redis.rpush(this.deadLetterKey, JSON.stringify(failed)));
|
|
1493
|
+
return;
|
|
1494
|
+
}
|
|
1495
|
+
const retryPayload = {
|
|
1496
|
+
...claimed.payload,
|
|
1497
|
+
attempts,
|
|
1498
|
+
claimed_at: undefined
|
|
1499
|
+
};
|
|
1500
|
+
await this.runRedisOperation(() => this.redis.rpush(this.pendingKey, JSON.stringify(retryPayload)));
|
|
1501
|
+
}
|
|
1502
|
+
async reclaimOrphaned(maxClaimAgeSeconds = this.claimLeaseSeconds) {
|
|
1503
|
+
const processing = await this.runRedisOperation(() => this.redis.lrange(this.processingKey, 0, -1));
|
|
1504
|
+
let reclaimed = 0;
|
|
1505
|
+
const ageThresholdMs = Math.max(1, maxClaimAgeSeconds) * 1_000;
|
|
1506
|
+
for (const raw of processing) {
|
|
1507
|
+
const payload = JSON.parse(raw);
|
|
1508
|
+
const lease = await this.runRedisOperation(() => this.redis.get(this.leaseKey(payload.job_id)));
|
|
1509
|
+
const claimedAtMs = payload.claimed_at ? new Date(payload.claimed_at).getTime() : 0;
|
|
1510
|
+
const staleByAge = claimedAtMs > 0 && Date.now() - claimedAtMs >= ageThresholdMs;
|
|
1511
|
+
if (lease && !staleByAge) {
|
|
1512
|
+
continue;
|
|
1513
|
+
}
|
|
1514
|
+
if (payload.claimed_at && !staleByAge) {
|
|
1515
|
+
continue;
|
|
1516
|
+
}
|
|
1517
|
+
const removed = await this.runRedisOperation(() => this.redis.lrem(this.processingKey, 1, raw));
|
|
1518
|
+
if (removed > 0) {
|
|
1519
|
+
const retryPayload = {
|
|
1520
|
+
...payload,
|
|
1521
|
+
claimed_at: undefined
|
|
1522
|
+
};
|
|
1523
|
+
await this.runRedisOperation(() => this.redis.rpush(this.pendingKey, JSON.stringify(retryPayload)));
|
|
1524
|
+
reclaimed += 1;
|
|
1525
|
+
}
|
|
1526
|
+
}
|
|
1527
|
+
return reclaimed;
|
|
1528
|
+
}
|
|
1529
|
+
async deadLetterCount() {
|
|
1530
|
+
return this.runRedisOperation(() => this.redis.llen(this.deadLetterKey));
|
|
1531
|
+
}
|
|
1532
|
+
async pendingCount() {
|
|
1533
|
+
return this.runRedisOperation(() => this.redis.llen(this.pendingKey));
|
|
1534
|
+
}
|
|
1535
|
+
async processingCount() {
|
|
1536
|
+
return this.runRedisOperation(() => this.redis.llen(this.processingKey));
|
|
1537
|
+
}
|
|
1538
|
+
async listDeadLetters() {
|
|
1539
|
+
const rows = await this.runRedisOperation(() => this.redis.lrange(this.deadLetterKey, 0, -1));
|
|
1540
|
+
return rows.map((row) => JSON.parse(row));
|
|
1541
|
+
}
|
|
1542
|
+
async runRedisOperation(operation) {
|
|
1543
|
+
for (let attempt = 0;; attempt += 1) {
|
|
1544
|
+
try {
|
|
1545
|
+
return await operation();
|
|
1546
|
+
}
|
|
1547
|
+
catch (error) {
|
|
1548
|
+
const finalAttempt = attempt >= this.reconnectRetries;
|
|
1549
|
+
if (finalAttempt || !this.isRecoverableRedisError(error)) {
|
|
1550
|
+
throw error;
|
|
1551
|
+
}
|
|
1552
|
+
try {
|
|
1553
|
+
if (typeof this.redis.connect === "function") {
|
|
1554
|
+
await this.redis.connect();
|
|
1555
|
+
}
|
|
1556
|
+
}
|
|
1557
|
+
catch {
|
|
1558
|
+
// Ignore reconnect failures here; next operation attempt will fail if redis is still unavailable.
|
|
1559
|
+
}
|
|
1560
|
+
if (this.reconnectDelayMs > 0) {
|
|
1561
|
+
await waitMs(this.reconnectDelayMs);
|
|
1562
|
+
}
|
|
1563
|
+
}
|
|
1564
|
+
}
|
|
1565
|
+
}
|
|
1566
|
+
isRecoverableRedisError(error) {
|
|
1567
|
+
if (!(error instanceof Error)) {
|
|
1568
|
+
return false;
|
|
1569
|
+
}
|
|
1570
|
+
const message = error.message.toLowerCase();
|
|
1571
|
+
return (message.includes("connection") ||
|
|
1572
|
+
message.includes("socket") ||
|
|
1573
|
+
message.includes("econnrefused") ||
|
|
1574
|
+
message.includes("econnreset") ||
|
|
1575
|
+
message.includes("closed"));
|
|
1576
|
+
}
|
|
1577
|
+
leaseKey(jobId) {
|
|
1578
|
+
return `${this.leasePrefix}:${jobId}`;
|
|
1579
|
+
}
|
|
1580
|
+
}
|
|
1581
|
+
export class InMemoryIndexJobQueue {
|
|
1582
|
+
pending = [];
|
|
1583
|
+
dead = [];
|
|
1584
|
+
processing = new Map();
|
|
1585
|
+
maxAttempts;
|
|
1586
|
+
constructor(options) {
|
|
1587
|
+
this.maxAttempts = options?.maxAttempts ?? 3;
|
|
1588
|
+
}
|
|
1589
|
+
async enqueue(job) {
|
|
1590
|
+
const payload = {
|
|
1591
|
+
job_id: job.job_id ?? `job_${randomUUID()}`,
|
|
1592
|
+
tenant_id: job.tenant_id,
|
|
1593
|
+
workspace_id: job.workspace_id,
|
|
1594
|
+
index_version: job.index_version,
|
|
1595
|
+
manifest_key: job.manifest_key,
|
|
1596
|
+
attempts: 0,
|
|
1597
|
+
enqueued_at: new Date().toISOString()
|
|
1598
|
+
};
|
|
1599
|
+
this.pending.push(payload);
|
|
1600
|
+
return payload;
|
|
1601
|
+
}
|
|
1602
|
+
async claimNext() {
|
|
1603
|
+
const next = this.pending.shift();
|
|
1604
|
+
if (!next) {
|
|
1605
|
+
return undefined;
|
|
1606
|
+
}
|
|
1607
|
+
const claimed = {
|
|
1608
|
+
...next,
|
|
1609
|
+
claimed_at: nowIso()
|
|
1610
|
+
};
|
|
1611
|
+
this.processing.set(claimed.job_id, claimed);
|
|
1612
|
+
return {
|
|
1613
|
+
payload: claimed,
|
|
1614
|
+
raw: JSON.stringify(next)
|
|
1615
|
+
};
|
|
1616
|
+
}
|
|
1617
|
+
async ack(claimed) {
|
|
1618
|
+
this.processing.delete(claimed.payload.job_id);
|
|
1619
|
+
return;
|
|
1620
|
+
}
|
|
1621
|
+
async retryOrDeadLetter(claimed, errorMessage) {
|
|
1622
|
+
this.processing.delete(claimed.payload.job_id);
|
|
1623
|
+
const attempts = claimed.payload.attempts + 1;
|
|
1624
|
+
if (attempts >= this.maxAttempts) {
|
|
1625
|
+
this.dead.push({
|
|
1626
|
+
...claimed.payload,
|
|
1627
|
+
attempts,
|
|
1628
|
+
failed_at: new Date().toISOString(),
|
|
1629
|
+
last_error: errorMessage
|
|
1630
|
+
});
|
|
1631
|
+
return;
|
|
1632
|
+
}
|
|
1633
|
+
this.pending.push({
|
|
1634
|
+
...claimed.payload,
|
|
1635
|
+
attempts,
|
|
1636
|
+
claimed_at: undefined
|
|
1637
|
+
});
|
|
1638
|
+
}
|
|
1639
|
+
async reclaimOrphaned(maxClaimAgeSeconds = 120) {
|
|
1640
|
+
const cutoff = Date.now() - Math.max(1, maxClaimAgeSeconds) * 1_000;
|
|
1641
|
+
let reclaimed = 0;
|
|
1642
|
+
for (const [jobId, payload] of this.processing.entries()) {
|
|
1643
|
+
const claimedAt = payload.claimed_at ? new Date(payload.claimed_at).getTime() : 0;
|
|
1644
|
+
if (claimedAt > cutoff) {
|
|
1645
|
+
continue;
|
|
1646
|
+
}
|
|
1647
|
+
this.processing.delete(jobId);
|
|
1648
|
+
this.pending.push({
|
|
1649
|
+
...payload,
|
|
1650
|
+
claimed_at: undefined
|
|
1651
|
+
});
|
|
1652
|
+
reclaimed += 1;
|
|
1653
|
+
}
|
|
1654
|
+
return reclaimed;
|
|
1655
|
+
}
|
|
1656
|
+
async deadLetterCount() {
|
|
1657
|
+
return this.dead.length;
|
|
1658
|
+
}
|
|
1659
|
+
async pendingCount() {
|
|
1660
|
+
return this.pending.length;
|
|
1661
|
+
}
|
|
1662
|
+
async processingCount() {
|
|
1663
|
+
return this.processing.size;
|
|
1664
|
+
}
|
|
1665
|
+
async listDeadLetters() {
|
|
1666
|
+
return [...this.dead];
|
|
1667
|
+
}
|
|
1668
|
+
}
|
|
1669
|
+
function waitMs(delay) {
|
|
1670
|
+
return new Promise((resolve) => setTimeout(resolve, delay));
|
|
1671
|
+
}
|
|
1672
|
+
export class SqliteIndexJobQueue {
|
|
1673
|
+
dbPath;
|
|
1674
|
+
db;
|
|
1675
|
+
maxAttempts;
|
|
1676
|
+
claimTtlSeconds;
|
|
1677
|
+
constructor(dbPath, options) {
|
|
1678
|
+
this.dbPath = dbPath;
|
|
1679
|
+
ensureSqliteParent(dbPath);
|
|
1680
|
+
this.db = openSqliteDatabase(dbPath);
|
|
1681
|
+
this.db.exec("PRAGMA journal_mode = WAL;");
|
|
1682
|
+
this.db.exec(`
|
|
1683
|
+
CREATE TABLE IF NOT EXISTS index_jobs (
|
|
1684
|
+
job_id TEXT PRIMARY KEY,
|
|
1685
|
+
tenant_id TEXT NOT NULL,
|
|
1686
|
+
workspace_id TEXT NOT NULL,
|
|
1687
|
+
index_version TEXT NOT NULL,
|
|
1688
|
+
manifest_key TEXT NOT NULL,
|
|
1689
|
+
attempts INTEGER NOT NULL,
|
|
1690
|
+
enqueued_at TEXT NOT NULL,
|
|
1691
|
+
status TEXT NOT NULL,
|
|
1692
|
+
claimed_at TEXT,
|
|
1693
|
+
failed_at TEXT,
|
|
1694
|
+
last_error TEXT
|
|
1695
|
+
);
|
|
1696
|
+
CREATE INDEX IF NOT EXISTS idx_sqlite_index_jobs_status_enqueued ON index_jobs(status, enqueued_at);
|
|
1697
|
+
CREATE INDEX IF NOT EXISTS idx_sqlite_index_jobs_status_failed ON index_jobs(status, failed_at);
|
|
1698
|
+
`);
|
|
1699
|
+
this.maxAttempts = options?.maxAttempts ?? 3;
|
|
1700
|
+
this.claimTtlSeconds = Math.max(5, options?.claimTtlSeconds ?? 120);
|
|
1701
|
+
}
|
|
1702
|
+
close() {
|
|
1703
|
+
this.db.close();
|
|
1704
|
+
}
|
|
1705
|
+
async enqueue(job) {
|
|
1706
|
+
const payload = {
|
|
1707
|
+
job_id: job.job_id ?? `job_${randomUUID()}`,
|
|
1708
|
+
tenant_id: job.tenant_id,
|
|
1709
|
+
workspace_id: job.workspace_id,
|
|
1710
|
+
index_version: job.index_version,
|
|
1711
|
+
manifest_key: job.manifest_key,
|
|
1712
|
+
attempts: 0,
|
|
1713
|
+
enqueued_at: nowIso()
|
|
1714
|
+
};
|
|
1715
|
+
this.db
|
|
1716
|
+
.prepare(`
|
|
1717
|
+
INSERT INTO index_jobs (job_id, tenant_id, workspace_id, index_version, manifest_key, attempts, enqueued_at, status)
|
|
1718
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, 'pending')
|
|
1719
|
+
`)
|
|
1720
|
+
.run(payload.job_id, payload.tenant_id, payload.workspace_id, payload.index_version, payload.manifest_key, payload.attempts, payload.enqueued_at);
|
|
1721
|
+
return payload;
|
|
1722
|
+
}
|
|
1723
|
+
async claimNext(timeoutSeconds = 1) {
|
|
1724
|
+
const deadline = Date.now() + Math.max(0, timeoutSeconds) * 1_000;
|
|
1725
|
+
do {
|
|
1726
|
+
const claimed = this.claimNextImmediate();
|
|
1727
|
+
if (claimed) {
|
|
1728
|
+
return claimed;
|
|
1729
|
+
}
|
|
1730
|
+
if (timeoutSeconds <= 0) {
|
|
1731
|
+
break;
|
|
1732
|
+
}
|
|
1733
|
+
await waitMs(50);
|
|
1734
|
+
} while (Date.now() <= deadline);
|
|
1735
|
+
return undefined;
|
|
1736
|
+
}
|
|
1737
|
+
async ack(claimed) {
|
|
1738
|
+
this.db.prepare("DELETE FROM index_jobs WHERE job_id = ?").run(claimed.payload.job_id);
|
|
1739
|
+
}
|
|
1740
|
+
async retryOrDeadLetter(claimed, errorMessage) {
|
|
1741
|
+
const attempts = claimed.payload.attempts + 1;
|
|
1742
|
+
if (attempts >= this.maxAttempts) {
|
|
1743
|
+
this.db
|
|
1744
|
+
.prepare(`
|
|
1745
|
+
UPDATE index_jobs
|
|
1746
|
+
SET attempts = ?, status = 'dead', failed_at = ?, last_error = ?, claimed_at = NULL
|
|
1747
|
+
WHERE job_id = ?
|
|
1748
|
+
`)
|
|
1749
|
+
.run(attempts, nowIso(), errorMessage, claimed.payload.job_id);
|
|
1750
|
+
return;
|
|
1751
|
+
}
|
|
1752
|
+
this.db
|
|
1753
|
+
.prepare(`
|
|
1754
|
+
UPDATE index_jobs
|
|
1755
|
+
SET attempts = ?, status = 'pending', last_error = ?, claimed_at = NULL
|
|
1756
|
+
WHERE job_id = ?
|
|
1757
|
+
`)
|
|
1758
|
+
.run(attempts, errorMessage, claimed.payload.job_id);
|
|
1759
|
+
}
|
|
1760
|
+
async reclaimOrphaned(maxClaimAgeSeconds = this.claimTtlSeconds) {
|
|
1761
|
+
const cutoff = new Date(Date.now() - Math.max(1, maxClaimAgeSeconds) * 1_000).toISOString();
|
|
1762
|
+
const updated = this.db
|
|
1763
|
+
.prepare(`
|
|
1764
|
+
UPDATE index_jobs
|
|
1765
|
+
SET status = 'pending', claimed_at = NULL, last_error = COALESCE(last_error, 'reclaimed orphaned in-flight job')
|
|
1766
|
+
WHERE status = 'processing' AND claimed_at IS NOT NULL AND claimed_at < ?
|
|
1767
|
+
`)
|
|
1768
|
+
.run(cutoff);
|
|
1769
|
+
return Number(updated.changes ?? 0);
|
|
1770
|
+
}
|
|
1771
|
+
async deadLetterCount() {
|
|
1772
|
+
const row = this.db.prepare("SELECT COUNT(*) AS count FROM index_jobs WHERE status = 'dead'").get();
|
|
1773
|
+
return Number(row.count);
|
|
1774
|
+
}
|
|
1775
|
+
async pendingCount() {
|
|
1776
|
+
const row = this.db.prepare("SELECT COUNT(*) AS count FROM index_jobs WHERE status = 'pending'").get();
|
|
1777
|
+
return Number(row.count);
|
|
1778
|
+
}
|
|
1779
|
+
async processingCount() {
|
|
1780
|
+
const row = this.db.prepare("SELECT COUNT(*) AS count FROM index_jobs WHERE status = 'processing'").get();
|
|
1781
|
+
return Number(row.count);
|
|
1782
|
+
}
|
|
1783
|
+
async listDeadLetters() {
|
|
1784
|
+
const rows = this.db
|
|
1785
|
+
.prepare(`
|
|
1786
|
+
SELECT job_id, tenant_id, workspace_id, index_version, manifest_key, attempts, enqueued_at, failed_at, last_error
|
|
1787
|
+
FROM index_jobs
|
|
1788
|
+
WHERE status = 'dead'
|
|
1789
|
+
ORDER BY failed_at DESC
|
|
1790
|
+
`)
|
|
1791
|
+
.all();
|
|
1792
|
+
return rows.map((row) => ({
|
|
1793
|
+
job_id: row.job_id,
|
|
1794
|
+
tenant_id: row.tenant_id,
|
|
1795
|
+
workspace_id: row.workspace_id,
|
|
1796
|
+
index_version: row.index_version,
|
|
1797
|
+
manifest_key: row.manifest_key,
|
|
1798
|
+
attempts: Number(row.attempts),
|
|
1799
|
+
enqueued_at: row.enqueued_at,
|
|
1800
|
+
failed_at: row.failed_at,
|
|
1801
|
+
last_error: row.last_error
|
|
1802
|
+
}));
|
|
1803
|
+
}
|
|
1804
|
+
claimNextImmediate() {
|
|
1805
|
+
const row = this.db
|
|
1806
|
+
.prepare(`
|
|
1807
|
+
SELECT job_id, tenant_id, workspace_id, index_version, manifest_key, attempts, enqueued_at
|
|
1808
|
+
FROM index_jobs
|
|
1809
|
+
WHERE status = 'pending'
|
|
1810
|
+
ORDER BY enqueued_at ASC
|
|
1811
|
+
LIMIT 1
|
|
1812
|
+
`)
|
|
1813
|
+
.get();
|
|
1814
|
+
if (!row) {
|
|
1815
|
+
return undefined;
|
|
1816
|
+
}
|
|
1817
|
+
const claimedAt = nowIso();
|
|
1818
|
+
const changed = this.db
|
|
1819
|
+
.prepare(`
|
|
1820
|
+
UPDATE index_jobs
|
|
1821
|
+
SET status = 'processing', claimed_at = ?
|
|
1822
|
+
WHERE job_id = ? AND status = 'pending'
|
|
1823
|
+
`)
|
|
1824
|
+
.run(claimedAt, row.job_id);
|
|
1825
|
+
if (Number(changed.changes) !== 1) {
|
|
1826
|
+
return undefined;
|
|
1827
|
+
}
|
|
1828
|
+
const payload = {
|
|
1829
|
+
job_id: row.job_id,
|
|
1830
|
+
tenant_id: row.tenant_id,
|
|
1831
|
+
workspace_id: row.workspace_id,
|
|
1832
|
+
index_version: row.index_version,
|
|
1833
|
+
manifest_key: row.manifest_key,
|
|
1834
|
+
attempts: Number(row.attempts),
|
|
1835
|
+
enqueued_at: row.enqueued_at,
|
|
1836
|
+
claimed_at: claimedAt
|
|
1837
|
+
};
|
|
1838
|
+
return {
|
|
1839
|
+
payload,
|
|
1840
|
+
raw: JSON.stringify(payload)
|
|
1841
|
+
};
|
|
1842
|
+
}
|
|
1843
|
+
}
|
|
1844
|
+
export class InMemoryUsageMeterStore {
|
|
1845
|
+
usage = [];
|
|
1846
|
+
audit = [];
|
|
1847
|
+
async migrate() {
|
|
1848
|
+
return;
|
|
1849
|
+
}
|
|
1850
|
+
async recordUsage(input) {
|
|
1851
|
+
this.usage.push({
|
|
1852
|
+
...input,
|
|
1853
|
+
created_at: input.created_at ?? nowIso()
|
|
1854
|
+
});
|
|
1855
|
+
}
|
|
1856
|
+
async recordAuditEvent(input) {
|
|
1857
|
+
this.audit.push({
|
|
1858
|
+
event_id: `aud_${randomUUID()}`,
|
|
1859
|
+
tenant_id: input.tenant_id,
|
|
1860
|
+
subject: input.subject,
|
|
1861
|
+
action: input.action,
|
|
1862
|
+
resource: input.resource,
|
|
1863
|
+
status: input.status,
|
|
1864
|
+
trace_id: input.trace_id,
|
|
1865
|
+
...(input.details ? { details: input.details } : {}),
|
|
1866
|
+
created_at: input.created_at ?? nowIso()
|
|
1867
|
+
});
|
|
1868
|
+
}
|
|
1869
|
+
async listUsageSummary(input) {
|
|
1870
|
+
const fromMs = input?.from ? new Date(input.from).getTime() : Number.NEGATIVE_INFINITY;
|
|
1871
|
+
const toMs = input?.to ? new Date(input.to).getTime() : Number.POSITIVE_INFINITY;
|
|
1872
|
+
const rows = this.usage.filter((row) => {
|
|
1873
|
+
if (input?.tenant_id && row.tenant_id !== input.tenant_id) {
|
|
1874
|
+
return false;
|
|
1875
|
+
}
|
|
1876
|
+
const ts = new Date(row.created_at ?? nowIso()).getTime();
|
|
1877
|
+
return ts >= fromMs && ts <= toMs;
|
|
1878
|
+
});
|
|
1879
|
+
const grouped = new Map();
|
|
1880
|
+
for (const row of rows) {
|
|
1881
|
+
const key = `${row.tenant_id}:${row.tool_name}`;
|
|
1882
|
+
const existing = grouped.get(key) ??
|
|
1883
|
+
{
|
|
1884
|
+
tenant_id: row.tenant_id,
|
|
1885
|
+
tool_name: row.tool_name,
|
|
1886
|
+
request_count: 0,
|
|
1887
|
+
error_count: 0,
|
|
1888
|
+
total_units: 0,
|
|
1889
|
+
latencies: [],
|
|
1890
|
+
last: row.created_at ?? nowIso()
|
|
1891
|
+
};
|
|
1892
|
+
existing.request_count += 1;
|
|
1893
|
+
existing.error_count += row.status === "error" ? 1 : 0;
|
|
1894
|
+
existing.total_units += row.units;
|
|
1895
|
+
existing.latencies.push(row.latency_ms);
|
|
1896
|
+
if ((row.created_at ?? nowIso()) > existing.last) {
|
|
1897
|
+
existing.last = row.created_at ?? nowIso();
|
|
1898
|
+
}
|
|
1899
|
+
grouped.set(key, existing);
|
|
1900
|
+
}
|
|
1901
|
+
return [...grouped.values()].map((row) => ({
|
|
1902
|
+
tenant_id: row.tenant_id,
|
|
1903
|
+
tool_name: row.tool_name,
|
|
1904
|
+
request_count: row.request_count,
|
|
1905
|
+
error_count: row.error_count,
|
|
1906
|
+
total_units: row.total_units,
|
|
1907
|
+
p95_latency_ms: percentile(row.latencies, 0.95),
|
|
1908
|
+
last_seen_at: row.last
|
|
1909
|
+
}));
|
|
1910
|
+
}
|
|
1911
|
+
async listAuditEvents(input) {
|
|
1912
|
+
const limit = Math.max(1, input?.limit ?? 100);
|
|
1913
|
+
return this.audit
|
|
1914
|
+
.filter((row) => (input?.tenant_id ? row.tenant_id === input.tenant_id : true))
|
|
1915
|
+
.sort((a, b) => b.created_at.localeCompare(a.created_at))
|
|
1916
|
+
.slice(0, limit);
|
|
1917
|
+
}
|
|
1918
|
+
}
|
|
1919
|
+
export class SqliteUsageMeterStore {
|
|
1920
|
+
dbPath;
|
|
1921
|
+
db;
|
|
1922
|
+
constructor(dbPath) {
|
|
1923
|
+
this.dbPath = dbPath;
|
|
1924
|
+
ensureSqliteParent(dbPath);
|
|
1925
|
+
this.db = openSqliteDatabase(dbPath);
|
|
1926
|
+
this.db.exec("PRAGMA journal_mode = WAL;");
|
|
1927
|
+
}
|
|
1928
|
+
close() {
|
|
1929
|
+
this.db.close();
|
|
1930
|
+
}
|
|
1931
|
+
async migrate() {
|
|
1932
|
+
this.db.exec(`
|
|
1933
|
+
CREATE TABLE IF NOT EXISTS usage_metering (
|
|
1934
|
+
id TEXT PRIMARY KEY,
|
|
1935
|
+
tenant_id TEXT NOT NULL,
|
|
1936
|
+
workspace_id TEXT,
|
|
1937
|
+
tool_name TEXT NOT NULL,
|
|
1938
|
+
trace_id TEXT NOT NULL,
|
|
1939
|
+
status TEXT NOT NULL,
|
|
1940
|
+
latency_ms INTEGER NOT NULL,
|
|
1941
|
+
result_count INTEGER NOT NULL,
|
|
1942
|
+
units INTEGER NOT NULL,
|
|
1943
|
+
created_at TEXT NOT NULL
|
|
1944
|
+
);
|
|
1945
|
+
CREATE INDEX IF NOT EXISTS idx_usage_metering_tenant_created ON usage_metering(tenant_id, created_at DESC);
|
|
1946
|
+
CREATE TABLE IF NOT EXISTS audit_events (
|
|
1947
|
+
id TEXT PRIMARY KEY,
|
|
1948
|
+
tenant_id TEXT NOT NULL,
|
|
1949
|
+
subject TEXT NOT NULL,
|
|
1950
|
+
action TEXT NOT NULL,
|
|
1951
|
+
resource TEXT NOT NULL,
|
|
1952
|
+
status TEXT NOT NULL,
|
|
1953
|
+
trace_id TEXT NOT NULL,
|
|
1954
|
+
details TEXT,
|
|
1955
|
+
created_at TEXT NOT NULL
|
|
1956
|
+
);
|
|
1957
|
+
CREATE INDEX IF NOT EXISTS idx_audit_events_tenant_created ON audit_events(tenant_id, created_at DESC);
|
|
1958
|
+
`);
|
|
1959
|
+
}
|
|
1960
|
+
async recordUsage(input) {
|
|
1961
|
+
this.db
|
|
1962
|
+
.prepare(`
|
|
1963
|
+
INSERT INTO usage_metering
|
|
1964
|
+
(id, tenant_id, workspace_id, tool_name, trace_id, status, latency_ms, result_count, units, created_at)
|
|
1965
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
1966
|
+
`)
|
|
1967
|
+
.run(`usg_${randomUUID()}`, input.tenant_id, input.workspace_id ?? null, input.tool_name, input.trace_id, input.status, Math.max(0, Math.floor(input.latency_ms)), Math.max(0, Math.floor(input.result_count)), Math.max(0, Math.floor(input.units)), input.created_at ?? nowIso());
|
|
1968
|
+
}
|
|
1969
|
+
async recordAuditEvent(input) {
|
|
1970
|
+
this.db
|
|
1971
|
+
.prepare(`
|
|
1972
|
+
INSERT INTO audit_events
|
|
1973
|
+
(id, tenant_id, subject, action, resource, status, trace_id, details, created_at)
|
|
1974
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
1975
|
+
`)
|
|
1976
|
+
.run(`aud_${randomUUID()}`, input.tenant_id, input.subject, input.action, input.resource, input.status, input.trace_id, input.details ? JSON.stringify(input.details) : null, input.created_at ?? nowIso());
|
|
1977
|
+
}
|
|
1978
|
+
async listUsageSummary(input) {
|
|
1979
|
+
const where = [];
|
|
1980
|
+
const params = [];
|
|
1981
|
+
if (input?.tenant_id) {
|
|
1982
|
+
where.push("tenant_id = ?");
|
|
1983
|
+
params.push(input.tenant_id);
|
|
1984
|
+
}
|
|
1985
|
+
if (input?.from) {
|
|
1986
|
+
where.push("created_at >= ?");
|
|
1987
|
+
params.push(input.from);
|
|
1988
|
+
}
|
|
1989
|
+
if (input?.to) {
|
|
1990
|
+
where.push("created_at <= ?");
|
|
1991
|
+
params.push(input.to);
|
|
1992
|
+
}
|
|
1993
|
+
const rows = this.db
|
|
1994
|
+
.prepare(`
|
|
1995
|
+
SELECT tenant_id, tool_name, status, latency_ms, units, created_at
|
|
1996
|
+
FROM usage_metering
|
|
1997
|
+
${where.length > 0 ? `WHERE ${where.join(" AND ")}` : ""}
|
|
1998
|
+
`)
|
|
1999
|
+
.all(...params);
|
|
2000
|
+
const grouped = new Map();
|
|
2001
|
+
for (const row of rows) {
|
|
2002
|
+
const key = `${row.tenant_id}:${row.tool_name}`;
|
|
2003
|
+
const existing = grouped.get(key) ??
|
|
2004
|
+
{
|
|
2005
|
+
tenant_id: row.tenant_id,
|
|
2006
|
+
tool_name: row.tool_name,
|
|
2007
|
+
request_count: 0,
|
|
2008
|
+
error_count: 0,
|
|
2009
|
+
total_units: 0,
|
|
2010
|
+
latencies: [],
|
|
2011
|
+
last: row.created_at
|
|
2012
|
+
};
|
|
2013
|
+
existing.request_count += 1;
|
|
2014
|
+
existing.error_count += row.status === "error" ? 1 : 0;
|
|
2015
|
+
existing.total_units += Number(row.units);
|
|
2016
|
+
existing.latencies.push(Number(row.latency_ms));
|
|
2017
|
+
if (row.created_at > existing.last) {
|
|
2018
|
+
existing.last = row.created_at;
|
|
2019
|
+
}
|
|
2020
|
+
grouped.set(key, existing);
|
|
2021
|
+
}
|
|
2022
|
+
return [...grouped.values()].map((row) => ({
|
|
2023
|
+
tenant_id: row.tenant_id,
|
|
2024
|
+
tool_name: row.tool_name,
|
|
2025
|
+
request_count: row.request_count,
|
|
2026
|
+
error_count: row.error_count,
|
|
2027
|
+
total_units: row.total_units,
|
|
2028
|
+
p95_latency_ms: percentile(row.latencies, 0.95),
|
|
2029
|
+
last_seen_at: row.last
|
|
2030
|
+
}));
|
|
2031
|
+
}
|
|
2032
|
+
async listAuditEvents(input) {
|
|
2033
|
+
const where = input?.tenant_id ? "WHERE tenant_id = ?" : "";
|
|
2034
|
+
const limit = Math.max(1, input?.limit ?? 100);
|
|
2035
|
+
const rows = this.db
|
|
2036
|
+
.prepare(`
|
|
2037
|
+
SELECT id AS event_id, tenant_id, subject, action, resource, status, trace_id, details, created_at
|
|
2038
|
+
FROM audit_events
|
|
2039
|
+
${where}
|
|
2040
|
+
ORDER BY created_at DESC
|
|
2041
|
+
LIMIT ?
|
|
2042
|
+
`)
|
|
2043
|
+
.all(...(input?.tenant_id ? [input.tenant_id, limit] : [limit]));
|
|
2044
|
+
return rows.map((row) => ({
|
|
2045
|
+
event_id: row.event_id,
|
|
2046
|
+
tenant_id: row.tenant_id,
|
|
2047
|
+
subject: row.subject,
|
|
2048
|
+
action: row.action,
|
|
2049
|
+
resource: row.resource,
|
|
2050
|
+
status: row.status,
|
|
2051
|
+
trace_id: row.trace_id,
|
|
2052
|
+
...(row.details ? { details: JSON.parse(row.details) } : {}),
|
|
2053
|
+
created_at: row.created_at
|
|
2054
|
+
}));
|
|
2055
|
+
}
|
|
2056
|
+
}
|
|
2057
|
+
export class PostgresUsageMeterStore {
|
|
2058
|
+
pool;
|
|
2059
|
+
constructor(pool) {
|
|
2060
|
+
this.pool = pool;
|
|
2061
|
+
}
|
|
2062
|
+
async migrate() {
|
|
2063
|
+
await this.pool.query(`
|
|
2064
|
+
CREATE TABLE IF NOT EXISTS usage_metering (
|
|
2065
|
+
id TEXT PRIMARY KEY,
|
|
2066
|
+
tenant_id TEXT NOT NULL,
|
|
2067
|
+
workspace_id TEXT,
|
|
2068
|
+
tool_name TEXT NOT NULL,
|
|
2069
|
+
trace_id TEXT NOT NULL,
|
|
2070
|
+
status TEXT NOT NULL,
|
|
2071
|
+
latency_ms INTEGER NOT NULL,
|
|
2072
|
+
result_count INTEGER NOT NULL,
|
|
2073
|
+
units INTEGER NOT NULL,
|
|
2074
|
+
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
2075
|
+
);
|
|
2076
|
+
`);
|
|
2077
|
+
await this.pool.query("CREATE INDEX IF NOT EXISTS idx_usage_metering_tenant_created ON usage_metering(tenant_id, created_at DESC)");
|
|
2078
|
+
await this.pool.query(`
|
|
2079
|
+
CREATE TABLE IF NOT EXISTS audit_events (
|
|
2080
|
+
id TEXT PRIMARY KEY,
|
|
2081
|
+
tenant_id TEXT NOT NULL,
|
|
2082
|
+
subject TEXT NOT NULL,
|
|
2083
|
+
action TEXT NOT NULL,
|
|
2084
|
+
resource TEXT NOT NULL,
|
|
2085
|
+
status TEXT NOT NULL,
|
|
2086
|
+
trace_id TEXT NOT NULL,
|
|
2087
|
+
details JSONB,
|
|
2088
|
+
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
2089
|
+
);
|
|
2090
|
+
`);
|
|
2091
|
+
await this.pool.query("CREATE INDEX IF NOT EXISTS idx_audit_events_tenant_created ON audit_events(tenant_id, created_at DESC)");
|
|
2092
|
+
}
|
|
2093
|
+
async recordUsage(input) {
|
|
2094
|
+
await this.pool.query(`
|
|
2095
|
+
INSERT INTO usage_metering
|
|
2096
|
+
(id, tenant_id, workspace_id, tool_name, trace_id, status, latency_ms, result_count, units, created_at)
|
|
2097
|
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10::timestamptz)
|
|
2098
|
+
`, [
|
|
2099
|
+
`usg_${randomUUID()}`,
|
|
2100
|
+
input.tenant_id,
|
|
2101
|
+
input.workspace_id ?? null,
|
|
2102
|
+
input.tool_name,
|
|
2103
|
+
input.trace_id,
|
|
2104
|
+
input.status,
|
|
2105
|
+
Math.max(0, Math.floor(input.latency_ms)),
|
|
2106
|
+
Math.max(0, Math.floor(input.result_count)),
|
|
2107
|
+
Math.max(0, Math.floor(input.units)),
|
|
2108
|
+
input.created_at ?? nowIso()
|
|
2109
|
+
]);
|
|
2110
|
+
}
|
|
2111
|
+
async recordAuditEvent(input) {
|
|
2112
|
+
await this.pool.query(`
|
|
2113
|
+
INSERT INTO audit_events
|
|
2114
|
+
(id, tenant_id, subject, action, resource, status, trace_id, details, created_at)
|
|
2115
|
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8::jsonb, $9::timestamptz)
|
|
2116
|
+
`, [
|
|
2117
|
+
`aud_${randomUUID()}`,
|
|
2118
|
+
input.tenant_id,
|
|
2119
|
+
input.subject,
|
|
2120
|
+
input.action,
|
|
2121
|
+
input.resource,
|
|
2122
|
+
input.status,
|
|
2123
|
+
input.trace_id,
|
|
2124
|
+
JSON.stringify(input.details ?? null),
|
|
2125
|
+
input.created_at ?? nowIso()
|
|
2126
|
+
]);
|
|
2127
|
+
}
|
|
2128
|
+
async listUsageSummary(input) {
|
|
2129
|
+
const where = [];
|
|
2130
|
+
const params = [];
|
|
2131
|
+
if (input?.tenant_id) {
|
|
2132
|
+
params.push(input.tenant_id);
|
|
2133
|
+
where.push(`tenant_id = $${params.length}`);
|
|
2134
|
+
}
|
|
2135
|
+
if (input?.from) {
|
|
2136
|
+
params.push(input.from);
|
|
2137
|
+
where.push(`created_at >= $${params.length}::timestamptz`);
|
|
2138
|
+
}
|
|
2139
|
+
if (input?.to) {
|
|
2140
|
+
params.push(input.to);
|
|
2141
|
+
where.push(`created_at <= $${params.length}::timestamptz`);
|
|
2142
|
+
}
|
|
2143
|
+
const result = await this.pool.query(`
|
|
2144
|
+
SELECT
|
|
2145
|
+
tenant_id,
|
|
2146
|
+
tool_name,
|
|
2147
|
+
COUNT(*)::int AS request_count,
|
|
2148
|
+
COUNT(*) FILTER (WHERE status = 'error')::int AS error_count,
|
|
2149
|
+
COALESCE(SUM(units), 0)::int AS total_units,
|
|
2150
|
+
COALESCE(PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY latency_ms), 0)::double precision AS p95_latency_ms,
|
|
2151
|
+
MAX(created_at) AS last_seen_at
|
|
2152
|
+
FROM usage_metering
|
|
2153
|
+
${where.length > 0 ? `WHERE ${where.join(" AND ")}` : ""}
|
|
2154
|
+
GROUP BY tenant_id, tool_name
|
|
2155
|
+
ORDER BY tenant_id, tool_name
|
|
2156
|
+
`, params);
|
|
2157
|
+
return result.rows.map((row) => ({
|
|
2158
|
+
tenant_id: row.tenant_id,
|
|
2159
|
+
tool_name: row.tool_name,
|
|
2160
|
+
request_count: Number(row.request_count),
|
|
2161
|
+
error_count: Number(row.error_count),
|
|
2162
|
+
total_units: Number(row.total_units),
|
|
2163
|
+
p95_latency_ms: Number(row.p95_latency_ms),
|
|
2164
|
+
...(row.last_seen_at ? { last_seen_at: toIsoString(row.last_seen_at) } : {})
|
|
2165
|
+
}));
|
|
2166
|
+
}
|
|
2167
|
+
async listAuditEvents(input) {
|
|
2168
|
+
const params = [];
|
|
2169
|
+
const where = [];
|
|
2170
|
+
if (input?.tenant_id) {
|
|
2171
|
+
params.push(input.tenant_id);
|
|
2172
|
+
where.push(`tenant_id = $${params.length}`);
|
|
2173
|
+
}
|
|
2174
|
+
params.push(Math.max(1, input?.limit ?? 100));
|
|
2175
|
+
const limitIndex = params.length;
|
|
2176
|
+
const result = await this.pool.query(`
|
|
2177
|
+
SELECT id AS event_id, tenant_id, subject, action, resource, status, trace_id, details, created_at
|
|
2178
|
+
FROM audit_events
|
|
2179
|
+
${where.length > 0 ? `WHERE ${where.join(" AND ")}` : ""}
|
|
2180
|
+
ORDER BY created_at DESC
|
|
2181
|
+
LIMIT $${limitIndex}
|
|
2182
|
+
`, params);
|
|
2183
|
+
return result.rows.map((row) => ({
|
|
2184
|
+
event_id: row.event_id,
|
|
2185
|
+
tenant_id: row.tenant_id,
|
|
2186
|
+
subject: row.subject,
|
|
2187
|
+
action: row.action,
|
|
2188
|
+
resource: row.resource,
|
|
2189
|
+
status: row.status,
|
|
2190
|
+
trace_id: row.trace_id,
|
|
2191
|
+
...(row.details ? { details: row.details } : {}),
|
|
2192
|
+
created_at: toIsoString(row.created_at)
|
|
2193
|
+
}));
|
|
2194
|
+
}
|
|
2195
|
+
}
|
|
2196
|
+
export function createRedisClient(url) {
|
|
2197
|
+
return new Redis(url);
|
|
2198
|
+
}
|
|
2199
|
+
export function buildQueryCacheKey(input) {
|
|
2200
|
+
const digest = sha256(JSON.stringify({
|
|
2201
|
+
query: input.query,
|
|
2202
|
+
top_k: input.top_k,
|
|
2203
|
+
filters: input.filters ?? null,
|
|
2204
|
+
index_version: input.index_version
|
|
2205
|
+
}));
|
|
2206
|
+
return `${input.workspace_id}:${input.index_version}:${digest}`;
|
|
2207
|
+
}
|