kiro-memory 1.9.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -1
- package/package.json +5 -5
- package/plugin/dist/cli/contextkit.js +2611 -345
- package/plugin/dist/hooks/agentSpawn.js +853 -223
- package/plugin/dist/hooks/kiro-hooks.js +841 -211
- package/plugin/dist/hooks/postToolUse.js +853 -222
- package/plugin/dist/hooks/stop.js +850 -220
- package/plugin/dist/hooks/userPromptSubmit.js +848 -216
- package/plugin/dist/index.js +843 -340
- package/plugin/dist/plugins/github/github-client.js +152 -0
- package/plugin/dist/plugins/github/index.js +412 -0
- package/plugin/dist/plugins/github/issue-parser.js +54 -0
- package/plugin/dist/plugins/slack/formatter.js +90 -0
- package/plugin/dist/plugins/slack/index.js +215 -0
- package/plugin/dist/sdk/index.js +841 -215
- package/plugin/dist/servers/mcp-server.js +4461 -397
- package/plugin/dist/services/search/EmbeddingService.js +146 -37
- package/plugin/dist/services/search/HybridSearch.js +564 -116
- package/plugin/dist/services/search/VectorSearch.js +187 -60
- package/plugin/dist/services/search/index.js +565 -254
- package/plugin/dist/services/sqlite/Backup.js +416 -0
- package/plugin/dist/services/sqlite/Database.js +126 -153
- package/plugin/dist/services/sqlite/ImportExport.js +452 -0
- package/plugin/dist/services/sqlite/Observations.js +314 -19
- package/plugin/dist/services/sqlite/Prompts.js +1 -1
- package/plugin/dist/services/sqlite/Search.js +41 -29
- package/plugin/dist/services/sqlite/Summaries.js +4 -4
- package/plugin/dist/services/sqlite/index.js +1428 -208
- package/plugin/dist/viewer.css +1 -0
- package/plugin/dist/viewer.html +2 -179
- package/plugin/dist/viewer.js +23 -24942
- package/plugin/dist/viewer.js.map +7 -0
- package/plugin/dist/worker-service.js +427 -5569
- package/plugin/dist/worker-service.js.map +7 -0
|
@@ -22,13 +22,13 @@ __export(Search_exports, {
|
|
|
22
22
|
searchObservationsLIKE: () => searchObservationsLIKE,
|
|
23
23
|
searchSummariesFiltered: () => searchSummariesFiltered
|
|
24
24
|
});
|
|
25
|
-
import { existsSync as
|
|
25
|
+
import { existsSync as existsSync2, statSync } from "fs";
|
|
26
26
|
function escapeLikePattern(input) {
|
|
27
27
|
return input.replace(/[%_\\]/g, "\\$&");
|
|
28
28
|
}
|
|
29
29
|
function sanitizeFTS5Query(query) {
|
|
30
30
|
const trimmed = query.length > 1e4 ? query.substring(0, 1e4) : query;
|
|
31
|
-
const terms = trimmed.replace(/[""]/g, "").split(/\s+/).filter((t) => t.length > 0).slice(0, 100).map((t) => `"${t}"`);
|
|
31
|
+
const terms = trimmed.replace(/[""\u0022]/g, "").split(/\s+/).filter((t) => t.length > 0).slice(0, 100).map((t) => `"${t}"`);
|
|
32
32
|
return terms.join(" ");
|
|
33
33
|
}
|
|
34
34
|
function searchObservationsFTS(db, query, filters = {}) {
|
|
@@ -125,7 +125,7 @@ function searchObservationsLIKE(db, query, filters = {}) {
|
|
|
125
125
|
sql += " AND created_at_epoch <= ?";
|
|
126
126
|
params.push(filters.dateEnd);
|
|
127
127
|
}
|
|
128
|
-
sql += " ORDER BY created_at_epoch DESC LIMIT ?";
|
|
128
|
+
sql += " ORDER BY created_at_epoch DESC, id DESC LIMIT ?";
|
|
129
129
|
params.push(limit);
|
|
130
130
|
const stmt = db.query(sql);
|
|
131
131
|
return stmt.all(...params);
|
|
@@ -150,7 +150,7 @@ function searchSummariesFiltered(db, query, filters = {}) {
|
|
|
150
150
|
sql += " AND created_at_epoch <= ?";
|
|
151
151
|
params.push(filters.dateEnd);
|
|
152
152
|
}
|
|
153
|
-
sql += " ORDER BY created_at_epoch DESC LIMIT ?";
|
|
153
|
+
sql += " ORDER BY created_at_epoch DESC, id DESC LIMIT ?";
|
|
154
154
|
params.push(limit);
|
|
155
155
|
const stmt = db.query(sql);
|
|
156
156
|
return stmt.all(...params);
|
|
@@ -160,7 +160,7 @@ function getObservationsByIds(db, ids) {
|
|
|
160
160
|
const validIds = ids.filter((id) => typeof id === "number" && Number.isInteger(id) && id > 0).slice(0, 500);
|
|
161
161
|
if (validIds.length === 0) return [];
|
|
162
162
|
const placeholders = validIds.map(() => "?").join(",");
|
|
163
|
-
const sql = `SELECT * FROM observations WHERE id IN (${placeholders}) ORDER BY created_at_epoch DESC`;
|
|
163
|
+
const sql = `SELECT * FROM observations WHERE id IN (${placeholders}) ORDER BY created_at_epoch DESC, id DESC`;
|
|
164
164
|
const stmt = db.query(sql);
|
|
165
165
|
return stmt.all(...validIds);
|
|
166
166
|
}
|
|
@@ -172,11 +172,11 @@ function getTimeline(db, anchorId, depthBefore = 5, depthAfter = 5) {
|
|
|
172
172
|
const beforeStmt = db.query(`
|
|
173
173
|
SELECT id, 'observation' as type, title, text as content, project, created_at, created_at_epoch
|
|
174
174
|
FROM observations
|
|
175
|
-
WHERE created_at_epoch < ?
|
|
176
|
-
ORDER BY created_at_epoch DESC
|
|
175
|
+
WHERE (created_at_epoch < ? OR (created_at_epoch = ? AND id < ?))
|
|
176
|
+
ORDER BY created_at_epoch DESC, id DESC
|
|
177
177
|
LIMIT ?
|
|
178
178
|
`);
|
|
179
|
-
const before = beforeStmt.all(anchorEpoch, depthBefore).reverse();
|
|
179
|
+
const before = beforeStmt.all(anchorEpoch, anchorEpoch, anchorId, depthBefore).reverse();
|
|
180
180
|
const selfStmt = db.query(`
|
|
181
181
|
SELECT id, 'observation' as type, title, text as content, project, created_at, created_at_epoch
|
|
182
182
|
FROM observations WHERE id = ?
|
|
@@ -185,34 +185,46 @@ function getTimeline(db, anchorId, depthBefore = 5, depthAfter = 5) {
|
|
|
185
185
|
const afterStmt = db.query(`
|
|
186
186
|
SELECT id, 'observation' as type, title, text as content, project, created_at, created_at_epoch
|
|
187
187
|
FROM observations
|
|
188
|
-
WHERE created_at_epoch > ?
|
|
189
|
-
ORDER BY created_at_epoch ASC
|
|
188
|
+
WHERE (created_at_epoch > ? OR (created_at_epoch = ? AND id > ?))
|
|
189
|
+
ORDER BY created_at_epoch ASC, id ASC
|
|
190
190
|
LIMIT ?
|
|
191
191
|
`);
|
|
192
|
-
const after = afterStmt.all(anchorEpoch, depthAfter);
|
|
192
|
+
const after = afterStmt.all(anchorEpoch, anchorEpoch, anchorId, depthAfter);
|
|
193
193
|
return [...before, ...self, ...after];
|
|
194
194
|
}
|
|
195
195
|
function getProjectStats(db, project) {
|
|
196
|
-
const
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
196
|
+
const sql = `
|
|
197
|
+
WITH
|
|
198
|
+
obs_stats AS (
|
|
199
|
+
SELECT
|
|
200
|
+
COUNT(*) as count,
|
|
201
|
+
COALESCE(SUM(discovery_tokens), 0) as discovery_tokens,
|
|
202
|
+
COALESCE(SUM(
|
|
203
|
+
CAST((LENGTH(COALESCE(title, '')) + LENGTH(COALESCE(narrative, ''))) / 4 AS INTEGER)
|
|
204
|
+
), 0) as read_tokens
|
|
205
|
+
FROM observations WHERE project = ?
|
|
206
|
+
),
|
|
207
|
+
sum_count AS (SELECT COUNT(*) as count FROM summaries WHERE project = ?),
|
|
208
|
+
ses_count AS (SELECT COUNT(*) as count FROM sessions WHERE project = ?),
|
|
209
|
+
prm_count AS (SELECT COUNT(*) as count FROM prompts WHERE project = ?)
|
|
210
|
+
SELECT
|
|
211
|
+
obs_stats.count as observations,
|
|
212
|
+
obs_stats.discovery_tokens,
|
|
213
|
+
obs_stats.read_tokens,
|
|
214
|
+
sum_count.count as summaries,
|
|
215
|
+
ses_count.count as sessions,
|
|
216
|
+
prm_count.count as prompts
|
|
217
|
+
FROM obs_stats, sum_count, ses_count, prm_count
|
|
218
|
+
`;
|
|
219
|
+
const row = db.query(sql).get(project, project, project, project);
|
|
220
|
+
const discoveryTokens = row?.discovery_tokens || 0;
|
|
221
|
+
const readTokens = row?.read_tokens || 0;
|
|
210
222
|
const savings = Math.max(0, discoveryTokens - readTokens);
|
|
211
223
|
return {
|
|
212
|
-
observations:
|
|
213
|
-
summaries:
|
|
214
|
-
sessions:
|
|
215
|
-
prompts:
|
|
224
|
+
observations: row?.observations || 0,
|
|
225
|
+
summaries: row?.summaries || 0,
|
|
226
|
+
sessions: row?.sessions || 0,
|
|
227
|
+
prompts: row?.prompts || 0,
|
|
216
228
|
tokenEconomics: { discoveryTokens, readTokens, savings }
|
|
217
229
|
};
|
|
218
230
|
}
|
|
@@ -220,7 +232,7 @@ function getStaleObservations(db, project) {
|
|
|
220
232
|
const rows = db.query(`
|
|
221
233
|
SELECT * FROM observations
|
|
222
234
|
WHERE project = ? AND files_modified IS NOT NULL AND files_modified != ''
|
|
223
|
-
ORDER BY created_at_epoch DESC
|
|
235
|
+
ORDER BY created_at_epoch DESC, id DESC
|
|
224
236
|
LIMIT 500
|
|
225
237
|
`).all(project);
|
|
226
238
|
const staleObs = [];
|
|
@@ -230,7 +242,7 @@ function getStaleObservations(db, project) {
|
|
|
230
242
|
let isStale = false;
|
|
231
243
|
for (const filepath of files) {
|
|
232
244
|
try {
|
|
233
|
-
if (!
|
|
245
|
+
if (!existsSync2(filepath)) continue;
|
|
234
246
|
const stat = statSync(filepath);
|
|
235
247
|
if (stat.mtimeMs > obs.created_at_epoch) {
|
|
236
248
|
isStale = true;
|
|
@@ -263,6 +275,290 @@ var init_Search = __esm({
|
|
|
263
275
|
}
|
|
264
276
|
});
|
|
265
277
|
|
|
278
|
+
// src/utils/secrets.ts
|
|
279
|
+
function redactSecrets(text) {
|
|
280
|
+
if (!text) return text;
|
|
281
|
+
let redacted = text;
|
|
282
|
+
for (const { pattern } of SECRET_PATTERNS) {
|
|
283
|
+
pattern.lastIndex = 0;
|
|
284
|
+
redacted = redacted.replace(pattern, (match) => {
|
|
285
|
+
const prefix = match.substring(0, Math.min(4, match.length));
|
|
286
|
+
return `${prefix}***REDACTED***`;
|
|
287
|
+
});
|
|
288
|
+
}
|
|
289
|
+
return redacted;
|
|
290
|
+
}
|
|
291
|
+
var SECRET_PATTERNS;
|
|
292
|
+
var init_secrets = __esm({
|
|
293
|
+
"src/utils/secrets.ts"() {
|
|
294
|
+
"use strict";
|
|
295
|
+
SECRET_PATTERNS = [
|
|
296
|
+
// AWS Access Keys (AKIA, ABIA, ACCA, ASIA prefixes + 16 alphanumeric chars)
|
|
297
|
+
{ name: "aws-key", pattern: /(?:AKIA|ABIA|ACCA|ASIA)[A-Z0-9]{16}/g },
|
|
298
|
+
// JWT tokens (three base64url segments separated by dots)
|
|
299
|
+
{ name: "jwt", pattern: /eyJ[a-zA-Z0-9_-]{10,}\.eyJ[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}/g },
|
|
300
|
+
// Generic API keys in key=value or key: value assignments
|
|
301
|
+
{ name: "api-key", pattern: /(?:api[_-]?key|apikey|api[_-]?secret)\s*[:=]\s*['"]?([a-zA-Z0-9_\-]{20,})['"]?/gi },
|
|
302
|
+
// Password/secret/token in variable assignments
|
|
303
|
+
{ name: "credential", pattern: /(?:password|passwd|pwd|secret|token|auth[_-]?token|access[_-]?token|bearer)\s*[:=]\s*['"]?([^\s'"]{8,})['"]?/gi },
|
|
304
|
+
// Credentials embedded in URLs (user:pass@host)
|
|
305
|
+
{ name: "url-credential", pattern: /(?:https?:\/\/)([^:]+):([^@]+)@/g },
|
|
306
|
+
// PEM-encoded private keys (RSA, EC, DSA, OpenSSH)
|
|
307
|
+
{ name: "private-key", pattern: /-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----/g },
|
|
308
|
+
// GitHub personal access tokens (ghp_, gho_, ghu_, ghs_, ghr_ prefixes)
|
|
309
|
+
{ name: "github-token", pattern: /gh[pousr]_[a-zA-Z0-9]{36,}/g },
|
|
310
|
+
// Slack bot/user/app tokens
|
|
311
|
+
{ name: "slack-token", pattern: /xox[bpoas]-[a-zA-Z0-9-]{10,}/g },
|
|
312
|
+
// HTTP Authorization Bearer header values
|
|
313
|
+
{ name: "bearer-header", pattern: /\bBearer\s+([a-zA-Z0-9_\-\.]{20,})/g },
|
|
314
|
+
// Generic hex secrets (32+ hex chars after a key/secret/token/password label)
|
|
315
|
+
{ name: "hex-secret", pattern: /(?:key|secret|token|password)\s*[:=]\s*['"]?([0-9a-f]{32,})['"]?/gi }
|
|
316
|
+
];
|
|
317
|
+
}
|
|
318
|
+
});
|
|
319
|
+
|
|
320
|
+
// src/utils/categorizer.ts
|
|
321
|
+
function categorize(input) {
|
|
322
|
+
const scores = /* @__PURE__ */ new Map();
|
|
323
|
+
const searchText = [
|
|
324
|
+
input.title,
|
|
325
|
+
input.text || "",
|
|
326
|
+
input.narrative || "",
|
|
327
|
+
input.concepts || ""
|
|
328
|
+
].join(" ").toLowerCase();
|
|
329
|
+
const allFiles = [input.filesModified || "", input.filesRead || ""].join(",");
|
|
330
|
+
for (const rule of CATEGORY_RULES) {
|
|
331
|
+
let score = 0;
|
|
332
|
+
for (const kw of rule.keywords) {
|
|
333
|
+
if (searchText.includes(kw.toLowerCase())) {
|
|
334
|
+
score += rule.weight;
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
if (rule.types && rule.types.includes(input.type)) {
|
|
338
|
+
score += rule.weight * 2;
|
|
339
|
+
}
|
|
340
|
+
if (rule.filePatterns && allFiles) {
|
|
341
|
+
for (const pattern of rule.filePatterns) {
|
|
342
|
+
if (pattern.test(allFiles)) {
|
|
343
|
+
score += rule.weight;
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
if (score > 0) {
|
|
348
|
+
scores.set(rule.category, (scores.get(rule.category) || 0) + score);
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
let bestCategory = "general";
|
|
352
|
+
let bestScore = 0;
|
|
353
|
+
for (const [category, score] of scores) {
|
|
354
|
+
if (score > bestScore) {
|
|
355
|
+
bestScore = score;
|
|
356
|
+
bestCategory = category;
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
return bestCategory;
|
|
360
|
+
}
|
|
361
|
+
var CATEGORY_RULES;
|
|
362
|
+
var init_categorizer = __esm({
|
|
363
|
+
"src/utils/categorizer.ts"() {
|
|
364
|
+
"use strict";
|
|
365
|
+
CATEGORY_RULES = [
|
|
366
|
+
{
|
|
367
|
+
category: "security",
|
|
368
|
+
keywords: [
|
|
369
|
+
"security",
|
|
370
|
+
"vulnerability",
|
|
371
|
+
"cve",
|
|
372
|
+
"xss",
|
|
373
|
+
"csrf",
|
|
374
|
+
"injection",
|
|
375
|
+
"sanitize",
|
|
376
|
+
"escape",
|
|
377
|
+
"auth",
|
|
378
|
+
"authentication",
|
|
379
|
+
"authorization",
|
|
380
|
+
"permission",
|
|
381
|
+
"helmet",
|
|
382
|
+
"cors",
|
|
383
|
+
"rate-limit",
|
|
384
|
+
"token",
|
|
385
|
+
"encrypt",
|
|
386
|
+
"decrypt",
|
|
387
|
+
"secret",
|
|
388
|
+
"redact",
|
|
389
|
+
"owasp"
|
|
390
|
+
],
|
|
391
|
+
filePatterns: [/security/i, /auth/i, /secrets?\.ts/i],
|
|
392
|
+
weight: 10
|
|
393
|
+
},
|
|
394
|
+
{
|
|
395
|
+
category: "testing",
|
|
396
|
+
keywords: [
|
|
397
|
+
"test",
|
|
398
|
+
"spec",
|
|
399
|
+
"expect",
|
|
400
|
+
"assert",
|
|
401
|
+
"mock",
|
|
402
|
+
"stub",
|
|
403
|
+
"fixture",
|
|
404
|
+
"coverage",
|
|
405
|
+
"jest",
|
|
406
|
+
"vitest",
|
|
407
|
+
"bun test",
|
|
408
|
+
"unit test",
|
|
409
|
+
"integration test",
|
|
410
|
+
"e2e"
|
|
411
|
+
],
|
|
412
|
+
types: ["test"],
|
|
413
|
+
filePatterns: [/\.test\./i, /\.spec\./i, /tests?\//i, /__tests__/i],
|
|
414
|
+
weight: 8
|
|
415
|
+
},
|
|
416
|
+
{
|
|
417
|
+
category: "debugging",
|
|
418
|
+
keywords: [
|
|
419
|
+
"debug",
|
|
420
|
+
"fix",
|
|
421
|
+
"bug",
|
|
422
|
+
"error",
|
|
423
|
+
"crash",
|
|
424
|
+
"stacktrace",
|
|
425
|
+
"stack trace",
|
|
426
|
+
"exception",
|
|
427
|
+
"breakpoint",
|
|
428
|
+
"investigate",
|
|
429
|
+
"root cause",
|
|
430
|
+
"troubleshoot",
|
|
431
|
+
"diagnose",
|
|
432
|
+
"bisect",
|
|
433
|
+
"regression"
|
|
434
|
+
],
|
|
435
|
+
types: ["bugfix"],
|
|
436
|
+
weight: 8
|
|
437
|
+
},
|
|
438
|
+
{
|
|
439
|
+
category: "architecture",
|
|
440
|
+
keywords: [
|
|
441
|
+
"architect",
|
|
442
|
+
"design",
|
|
443
|
+
"pattern",
|
|
444
|
+
"modular",
|
|
445
|
+
"migration",
|
|
446
|
+
"schema",
|
|
447
|
+
"database",
|
|
448
|
+
"api design",
|
|
449
|
+
"abstract",
|
|
450
|
+
"dependency injection",
|
|
451
|
+
"singleton",
|
|
452
|
+
"factory",
|
|
453
|
+
"observer",
|
|
454
|
+
"middleware",
|
|
455
|
+
"pipeline",
|
|
456
|
+
"microservice",
|
|
457
|
+
"monolith"
|
|
458
|
+
],
|
|
459
|
+
types: ["decision", "constraint"],
|
|
460
|
+
weight: 7
|
|
461
|
+
},
|
|
462
|
+
{
|
|
463
|
+
category: "refactoring",
|
|
464
|
+
keywords: [
|
|
465
|
+
"refactor",
|
|
466
|
+
"rename",
|
|
467
|
+
"extract",
|
|
468
|
+
"inline",
|
|
469
|
+
"move",
|
|
470
|
+
"split",
|
|
471
|
+
"merge",
|
|
472
|
+
"simplify",
|
|
473
|
+
"cleanup",
|
|
474
|
+
"clean up",
|
|
475
|
+
"dead code",
|
|
476
|
+
"consolidate",
|
|
477
|
+
"reorganize",
|
|
478
|
+
"restructure",
|
|
479
|
+
"decouple"
|
|
480
|
+
],
|
|
481
|
+
weight: 6
|
|
482
|
+
},
|
|
483
|
+
{
|
|
484
|
+
category: "config",
|
|
485
|
+
keywords: [
|
|
486
|
+
"config",
|
|
487
|
+
"configuration",
|
|
488
|
+
"env",
|
|
489
|
+
"environment",
|
|
490
|
+
"dotenv",
|
|
491
|
+
".env",
|
|
492
|
+
"settings",
|
|
493
|
+
"tsconfig",
|
|
494
|
+
"eslint",
|
|
495
|
+
"prettier",
|
|
496
|
+
"webpack",
|
|
497
|
+
"vite",
|
|
498
|
+
"esbuild",
|
|
499
|
+
"docker",
|
|
500
|
+
"ci/cd",
|
|
501
|
+
"github actions",
|
|
502
|
+
"deploy",
|
|
503
|
+
"build",
|
|
504
|
+
"bundle",
|
|
505
|
+
"package.json"
|
|
506
|
+
],
|
|
507
|
+
filePatterns: [
|
|
508
|
+
/\.config\./i,
|
|
509
|
+
/\.env/i,
|
|
510
|
+
/tsconfig/i,
|
|
511
|
+
/\.ya?ml/i,
|
|
512
|
+
/Dockerfile/i,
|
|
513
|
+
/docker-compose/i
|
|
514
|
+
],
|
|
515
|
+
weight: 5
|
|
516
|
+
},
|
|
517
|
+
{
|
|
518
|
+
category: "docs",
|
|
519
|
+
keywords: [
|
|
520
|
+
"document",
|
|
521
|
+
"readme",
|
|
522
|
+
"changelog",
|
|
523
|
+
"jsdoc",
|
|
524
|
+
"comment",
|
|
525
|
+
"explain",
|
|
526
|
+
"guide",
|
|
527
|
+
"tutorial",
|
|
528
|
+
"api doc",
|
|
529
|
+
"openapi",
|
|
530
|
+
"swagger"
|
|
531
|
+
],
|
|
532
|
+
types: ["docs"],
|
|
533
|
+
filePatterns: [/\.md$/i, /docs?\//i, /readme/i, /changelog/i],
|
|
534
|
+
weight: 5
|
|
535
|
+
},
|
|
536
|
+
{
|
|
537
|
+
category: "feature-dev",
|
|
538
|
+
keywords: [
|
|
539
|
+
"feature",
|
|
540
|
+
"implement",
|
|
541
|
+
"add",
|
|
542
|
+
"create",
|
|
543
|
+
"new",
|
|
544
|
+
"endpoint",
|
|
545
|
+
"component",
|
|
546
|
+
"module",
|
|
547
|
+
"service",
|
|
548
|
+
"handler",
|
|
549
|
+
"route",
|
|
550
|
+
"hook",
|
|
551
|
+
"plugin",
|
|
552
|
+
"integration"
|
|
553
|
+
],
|
|
554
|
+
types: ["feature", "file-write"],
|
|
555
|
+
weight: 3
|
|
556
|
+
// lowest — generic catch-all for development
|
|
557
|
+
}
|
|
558
|
+
];
|
|
559
|
+
}
|
|
560
|
+
});
|
|
561
|
+
|
|
266
562
|
// src/services/sqlite/Observations.ts
|
|
267
563
|
var Observations_exports = {};
|
|
268
564
|
__export(Observations_exports, {
|
|
@@ -288,11 +584,23 @@ function isDuplicateObservation(db, contentHash, windowMs = 3e4) {
|
|
|
288
584
|
}
|
|
289
585
|
function createObservation(db, memorySessionId, project, type, title, subtitle, text, narrative, facts, concepts, filesRead, filesModified, promptNumber, contentHash = null, discoveryTokens = 0) {
|
|
290
586
|
const now = /* @__PURE__ */ new Date();
|
|
587
|
+
const safeTitle = redactSecrets(title);
|
|
588
|
+
const safeText = text ? redactSecrets(text) : text;
|
|
589
|
+
const safeNarrative = narrative ? redactSecrets(narrative) : narrative;
|
|
590
|
+
const autoCategory = categorize({
|
|
591
|
+
type,
|
|
592
|
+
title: safeTitle,
|
|
593
|
+
text: safeText,
|
|
594
|
+
narrative: safeNarrative,
|
|
595
|
+
concepts,
|
|
596
|
+
filesModified,
|
|
597
|
+
filesRead
|
|
598
|
+
});
|
|
291
599
|
const result = db.run(
|
|
292
600
|
`INSERT INTO observations
|
|
293
|
-
(memory_session_id, project, type, title, subtitle, text, narrative, facts, concepts, files_read, files_modified, prompt_number, created_at, created_at_epoch, content_hash, discovery_tokens)
|
|
294
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
295
|
-
[memorySessionId, project, type,
|
|
601
|
+
(memory_session_id, project, type, title, subtitle, text, narrative, facts, concepts, files_read, files_modified, prompt_number, created_at, created_at_epoch, content_hash, discovery_tokens, auto_category)
|
|
602
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
603
|
+
[memorySessionId, project, type, safeTitle, subtitle, safeText, safeNarrative, facts, concepts, filesRead, filesModified, promptNumber, now.toISOString(), now.getTime(), contentHash, discoveryTokens, autoCategory]
|
|
296
604
|
);
|
|
297
605
|
return Number(result.lastInsertRowid);
|
|
298
606
|
}
|
|
@@ -304,16 +612,16 @@ function getObservationsBySession(db, memorySessionId) {
|
|
|
304
612
|
}
|
|
305
613
|
function getObservationsByProject(db, project, limit = 100) {
|
|
306
614
|
const query = db.query(
|
|
307
|
-
"SELECT * FROM observations WHERE project = ? ORDER BY created_at_epoch DESC LIMIT ?"
|
|
615
|
+
"SELECT * FROM observations WHERE project = ? ORDER BY created_at_epoch DESC, id DESC LIMIT ?"
|
|
308
616
|
);
|
|
309
617
|
return query.all(project, limit);
|
|
310
618
|
}
|
|
311
619
|
function searchObservations(db, searchTerm, project) {
|
|
312
620
|
const sql = project ? `SELECT * FROM observations
|
|
313
621
|
WHERE project = ? AND (title LIKE ? ESCAPE '\\' OR text LIKE ? ESCAPE '\\' OR narrative LIKE ? ESCAPE '\\')
|
|
314
|
-
ORDER BY created_at_epoch DESC` : `SELECT * FROM observations
|
|
622
|
+
ORDER BY created_at_epoch DESC, id DESC` : `SELECT * FROM observations
|
|
315
623
|
WHERE title LIKE ? ESCAPE '\\' OR text LIKE ? ESCAPE '\\' OR narrative LIKE ? ESCAPE '\\'
|
|
316
|
-
ORDER BY created_at_epoch DESC`;
|
|
624
|
+
ORDER BY created_at_epoch DESC, id DESC`;
|
|
317
625
|
const pattern = `%${escapeLikePattern2(searchTerm)}%`;
|
|
318
626
|
const query = db.query(sql);
|
|
319
627
|
if (project) {
|
|
@@ -346,21 +654,32 @@ function consolidateObservations(db, project, options = {}) {
|
|
|
346
654
|
ORDER BY cnt DESC
|
|
347
655
|
`).all(project, minGroupSize);
|
|
348
656
|
if (groups.length === 0) return { merged: 0, removed: 0 };
|
|
349
|
-
|
|
350
|
-
|
|
657
|
+
if (options.dryRun) {
|
|
658
|
+
let totalMerged = 0;
|
|
659
|
+
let totalRemoved = 0;
|
|
660
|
+
for (const group of groups) {
|
|
661
|
+
const obsIds = group.ids.split(",").map(Number);
|
|
662
|
+
const placeholders = obsIds.map(() => "?").join(",");
|
|
663
|
+
const count = db.query(
|
|
664
|
+
`SELECT COUNT(*) as cnt FROM observations WHERE id IN (${placeholders})`
|
|
665
|
+
).get(...obsIds)?.cnt || 0;
|
|
666
|
+
if (count >= minGroupSize) {
|
|
667
|
+
totalMerged += 1;
|
|
668
|
+
totalRemoved += count - 1;
|
|
669
|
+
}
|
|
670
|
+
}
|
|
671
|
+
return { merged: totalMerged, removed: totalRemoved };
|
|
672
|
+
}
|
|
351
673
|
const runConsolidation = db.transaction(() => {
|
|
674
|
+
let merged = 0;
|
|
675
|
+
let removed = 0;
|
|
352
676
|
for (const group of groups) {
|
|
353
677
|
const obsIds = group.ids.split(",").map(Number);
|
|
354
678
|
const placeholders = obsIds.map(() => "?").join(",");
|
|
355
679
|
const observations = db.query(
|
|
356
|
-
`SELECT * FROM observations WHERE id IN (${placeholders}) ORDER BY created_at_epoch DESC`
|
|
680
|
+
`SELECT * FROM observations WHERE id IN (${placeholders}) ORDER BY created_at_epoch DESC, id DESC`
|
|
357
681
|
).all(...obsIds);
|
|
358
682
|
if (observations.length < minGroupSize) continue;
|
|
359
|
-
if (options.dryRun) {
|
|
360
|
-
totalMerged += 1;
|
|
361
|
-
totalRemoved += observations.length - 1;
|
|
362
|
-
continue;
|
|
363
|
-
}
|
|
364
683
|
const keeper = observations[0];
|
|
365
684
|
const others = observations.slice(1);
|
|
366
685
|
const uniqueTexts = /* @__PURE__ */ new Set();
|
|
@@ -373,31 +692,27 @@ function consolidateObservations(db, project, options = {}) {
|
|
|
373
692
|
const consolidatedText = Array.from(uniqueTexts).join("\n---\n").substring(0, 1e5);
|
|
374
693
|
db.run(
|
|
375
694
|
"UPDATE observations SET text = ?, title = ? WHERE id = ?",
|
|
376
|
-
[consolidatedText, `[
|
|
695
|
+
[consolidatedText, `[consolidated x${observations.length}] ${keeper.title}`, keeper.id]
|
|
377
696
|
);
|
|
378
697
|
const removeIds = others.map((o) => o.id);
|
|
379
698
|
const removePlaceholders = removeIds.map(() => "?").join(",");
|
|
380
699
|
db.run(`DELETE FROM observations WHERE id IN (${removePlaceholders})`, removeIds);
|
|
381
700
|
db.run(`DELETE FROM observation_embeddings WHERE observation_id IN (${removePlaceholders})`, removeIds);
|
|
382
|
-
|
|
383
|
-
|
|
701
|
+
merged += 1;
|
|
702
|
+
removed += removeIds.length;
|
|
384
703
|
}
|
|
704
|
+
return { merged, removed };
|
|
385
705
|
});
|
|
386
|
-
runConsolidation();
|
|
387
|
-
return { merged: totalMerged, removed: totalRemoved };
|
|
706
|
+
return runConsolidation();
|
|
388
707
|
}
|
|
389
708
|
var init_Observations = __esm({
|
|
390
709
|
"src/services/sqlite/Observations.ts"() {
|
|
391
710
|
"use strict";
|
|
711
|
+
init_secrets();
|
|
712
|
+
init_categorizer();
|
|
392
713
|
}
|
|
393
714
|
});
|
|
394
715
|
|
|
395
|
-
// src/services/search/ChromaManager.ts
|
|
396
|
-
import { ChromaClient } from "chromadb";
|
|
397
|
-
import { join as join2 } from "path";
|
|
398
|
-
import { homedir as homedir2 } from "os";
|
|
399
|
-
import { existsSync as existsSync2, mkdirSync as mkdirSync2 } from "fs";
|
|
400
|
-
|
|
401
716
|
// src/utils/logger.ts
|
|
402
717
|
import { appendFileSync, existsSync, mkdirSync, readFileSync } from "fs";
|
|
403
718
|
import { join } from "path";
|
|
@@ -617,144 +932,49 @@ ${data.stack}` : ` ${data.message}`;
|
|
|
617
932
|
};
|
|
618
933
|
var logger = new Logger();
|
|
619
934
|
|
|
620
|
-
// src/services/search/
|
|
621
|
-
var
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
}
|
|
634
|
-
/**
|
|
635
|
-
* Initialize ChromaDB connection and collection
|
|
636
|
-
*/
|
|
637
|
-
async initialize() {
|
|
638
|
-
try {
|
|
639
|
-
await this.client.heartbeat();
|
|
640
|
-
this.collection = await this.client.getOrCreateCollection({
|
|
641
|
-
name: "kiro-memory-observations",
|
|
642
|
-
metadata: { description: "Kiro Memory observation embeddings" }
|
|
643
|
-
});
|
|
644
|
-
this.isAvailable = true;
|
|
645
|
-
logger.info("CHROMA", "ChromaDB initialized successfully");
|
|
646
|
-
return true;
|
|
647
|
-
} catch (error) {
|
|
648
|
-
logger.warn("CHROMA", "ChromaDB not available, falling back to SQLite search", {}, error);
|
|
649
|
-
this.isAvailable = false;
|
|
650
|
-
return false;
|
|
651
|
-
}
|
|
652
|
-
}
|
|
653
|
-
/**
|
|
654
|
-
* Add observation embedding to ChromaDB
|
|
655
|
-
*/
|
|
656
|
-
async addObservation(id, content, metadata) {
|
|
657
|
-
if (!this.isAvailable || !this.collection) {
|
|
658
|
-
logger.debug("CHROMA", "ChromaDB not available, skipping embedding");
|
|
659
|
-
return;
|
|
660
|
-
}
|
|
661
|
-
try {
|
|
662
|
-
await this.collection.add({
|
|
663
|
-
ids: [id],
|
|
664
|
-
documents: [content],
|
|
665
|
-
metadatas: [metadata]
|
|
666
|
-
});
|
|
667
|
-
logger.debug("CHROMA", `Added observation ${id} to vector DB`);
|
|
668
|
-
} catch (error) {
|
|
669
|
-
logger.error("CHROMA", `Failed to add observation ${id}`, {}, error);
|
|
670
|
-
}
|
|
671
|
-
}
|
|
672
|
-
/**
|
|
673
|
-
* Search observations by semantic similarity
|
|
674
|
-
*/
|
|
675
|
-
async search(query, options = {}) {
|
|
676
|
-
if (!this.isAvailable || !this.collection) {
|
|
677
|
-
logger.debug("CHROMA", "ChromaDB not available, returning empty results");
|
|
678
|
-
return [];
|
|
679
|
-
}
|
|
680
|
-
try {
|
|
681
|
-
const where = options.project ? { project: options.project } : void 0;
|
|
682
|
-
const results = await this.collection.query({
|
|
683
|
-
queryTexts: [query],
|
|
684
|
-
nResults: options.limit || 10,
|
|
685
|
-
where
|
|
686
|
-
});
|
|
687
|
-
const hits = [];
|
|
688
|
-
if (results.ids && results.ids[0]) {
|
|
689
|
-
for (let i = 0; i < results.ids[0].length; i++) {
|
|
690
|
-
hits.push({
|
|
691
|
-
id: results.ids[0][i],
|
|
692
|
-
content: results.documents?.[0]?.[i] || "",
|
|
693
|
-
metadata: results.metadatas?.[0]?.[i] || {},
|
|
694
|
-
distance: results.distances?.[0]?.[i] || 0
|
|
695
|
-
});
|
|
696
|
-
}
|
|
697
|
-
}
|
|
698
|
-
logger.debug("CHROMA", `Search returned ${hits.length} results`);
|
|
699
|
-
return hits;
|
|
700
|
-
} catch (error) {
|
|
701
|
-
logger.error("CHROMA", "Search failed", {}, error);
|
|
702
|
-
return [];
|
|
703
|
-
}
|
|
704
|
-
}
|
|
705
|
-
/**
|
|
706
|
-
* Delete observation from ChromaDB
|
|
707
|
-
*/
|
|
708
|
-
async deleteObservation(id) {
|
|
709
|
-
if (!this.isAvailable || !this.collection) {
|
|
710
|
-
return;
|
|
711
|
-
}
|
|
712
|
-
try {
|
|
713
|
-
await this.collection.delete({ ids: [id] });
|
|
714
|
-
logger.debug("CHROMA", `Deleted observation ${id}`);
|
|
715
|
-
} catch (error) {
|
|
716
|
-
logger.error("CHROMA", `Failed to delete observation ${id}`, {}, error);
|
|
717
|
-
}
|
|
718
|
-
}
|
|
719
|
-
/**
|
|
720
|
-
* Check if ChromaDB is available
|
|
721
|
-
*/
|
|
722
|
-
isChromaAvailable() {
|
|
723
|
-
return this.isAvailable;
|
|
724
|
-
}
|
|
725
|
-
/**
|
|
726
|
-
* Get collection stats
|
|
727
|
-
*/
|
|
728
|
-
async getStats() {
|
|
729
|
-
if (!this.isAvailable || !this.collection) {
|
|
730
|
-
return { count: 0 };
|
|
731
|
-
}
|
|
732
|
-
try {
|
|
733
|
-
const count = await this.collection.count();
|
|
734
|
-
return { count };
|
|
735
|
-
} catch (error) {
|
|
736
|
-
logger.error("CHROMA", "Failed to get stats", {}, error);
|
|
737
|
-
return { count: 0 };
|
|
738
|
-
}
|
|
935
|
+
// src/services/search/EmbeddingService.ts
|
|
936
|
+
var MODEL_CONFIGS = {
|
|
937
|
+
"all-MiniLM-L6-v2": {
|
|
938
|
+
modelId: "Xenova/all-MiniLM-L6-v2",
|
|
939
|
+
dimensions: 384
|
|
940
|
+
},
|
|
941
|
+
"jina-code-v2": {
|
|
942
|
+
modelId: "jinaai/jina-embeddings-v2-base-code",
|
|
943
|
+
dimensions: 768
|
|
944
|
+
},
|
|
945
|
+
"bge-small-en": {
|
|
946
|
+
modelId: "BAAI/bge-small-en-v1.5",
|
|
947
|
+
dimensions: 384
|
|
739
948
|
}
|
|
740
949
|
};
|
|
741
|
-
var
|
|
742
|
-
function getChromaManager() {
|
|
743
|
-
if (!chromaManager) {
|
|
744
|
-
chromaManager = new ChromaManager();
|
|
745
|
-
}
|
|
746
|
-
return chromaManager;
|
|
747
|
-
}
|
|
748
|
-
|
|
749
|
-
// src/services/search/EmbeddingService.ts
|
|
950
|
+
var FASTEMBED_COMPATIBLE_MODELS = /* @__PURE__ */ new Set(["all-MiniLM-L6-v2", "bge-small-en"]);
|
|
750
951
|
var EmbeddingService = class {
|
|
751
952
|
provider = null;
|
|
752
953
|
model = null;
|
|
753
954
|
initialized = false;
|
|
754
955
|
initializing = null;
|
|
956
|
+
config;
|
|
957
|
+
configName;
|
|
958
|
+
constructor() {
|
|
959
|
+
const envModel = process.env.KIRO_MEMORY_EMBEDDING_MODEL || "all-MiniLM-L6-v2";
|
|
960
|
+
this.configName = envModel;
|
|
961
|
+
if (MODEL_CONFIGS[envModel]) {
|
|
962
|
+
this.config = MODEL_CONFIGS[envModel];
|
|
963
|
+
} else if (envModel.includes("/")) {
|
|
964
|
+
const dimensions = parseInt(process.env.KIRO_MEMORY_EMBEDDING_DIMENSIONS || "384", 10);
|
|
965
|
+
this.config = {
|
|
966
|
+
modelId: envModel,
|
|
967
|
+
dimensions: isNaN(dimensions) ? 384 : dimensions
|
|
968
|
+
};
|
|
969
|
+
} else {
|
|
970
|
+
logger.warn("EMBEDDING", `Unknown model name '${envModel}', falling back to 'all-MiniLM-L6-v2'`);
|
|
971
|
+
this.configName = "all-MiniLM-L6-v2";
|
|
972
|
+
this.config = MODEL_CONFIGS["all-MiniLM-L6-v2"];
|
|
973
|
+
}
|
|
974
|
+
}
|
|
755
975
|
/**
|
|
756
|
-
*
|
|
757
|
-
*
|
|
976
|
+
* Initialize the embedding service.
|
|
977
|
+
* Tries fastembed (when compatible), then @huggingface/transformers, then falls back to null.
|
|
758
978
|
*/
|
|
759
979
|
async initialize() {
|
|
760
980
|
if (this.initialized) return this.provider !== null;
|
|
@@ -765,45 +985,48 @@ var EmbeddingService = class {
|
|
|
765
985
|
return result;
|
|
766
986
|
}
|
|
767
987
|
async _doInitialize() {
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
988
|
+
const fastembedCompatible = FASTEMBED_COMPATIBLE_MODELS.has(this.configName);
|
|
989
|
+
if (fastembedCompatible) {
|
|
990
|
+
try {
|
|
991
|
+
const fastembed = await import("fastembed");
|
|
992
|
+
const EmbeddingModel = fastembed.EmbeddingModel || fastembed.default?.EmbeddingModel;
|
|
993
|
+
const FlagEmbedding = fastembed.FlagEmbedding || fastembed.default?.FlagEmbedding;
|
|
994
|
+
if (FlagEmbedding && EmbeddingModel) {
|
|
995
|
+
this.model = await FlagEmbedding.init({
|
|
996
|
+
model: EmbeddingModel.BGESmallENV15
|
|
997
|
+
});
|
|
998
|
+
this.provider = "fastembed";
|
|
999
|
+
this.initialized = true;
|
|
1000
|
+
logger.info("EMBEDDING", `Initialized with fastembed (BGE-small-en-v1.5) for model '${this.configName}'`);
|
|
1001
|
+
return true;
|
|
1002
|
+
}
|
|
1003
|
+
} catch (error) {
|
|
1004
|
+
logger.debug("EMBEDDING", `fastembed not available: ${error}`);
|
|
780
1005
|
}
|
|
781
|
-
} catch (error) {
|
|
782
|
-
logger.debug("EMBEDDING", `fastembed non disponibile: ${error}`);
|
|
783
1006
|
}
|
|
784
1007
|
try {
|
|
785
1008
|
const transformers = await import("@huggingface/transformers");
|
|
786
1009
|
const pipeline = transformers.pipeline || transformers.default?.pipeline;
|
|
787
1010
|
if (pipeline) {
|
|
788
|
-
this.model = await pipeline("feature-extraction",
|
|
1011
|
+
this.model = await pipeline("feature-extraction", this.config.modelId, {
|
|
789
1012
|
quantized: true
|
|
790
1013
|
});
|
|
791
1014
|
this.provider = "transformers";
|
|
792
1015
|
this.initialized = true;
|
|
793
|
-
logger.info("EMBEDDING",
|
|
1016
|
+
logger.info("EMBEDDING", `Initialized with @huggingface/transformers (${this.config.modelId})`);
|
|
794
1017
|
return true;
|
|
795
1018
|
}
|
|
796
1019
|
} catch (error) {
|
|
797
|
-
logger.debug("EMBEDDING", `@huggingface/transformers
|
|
1020
|
+
logger.debug("EMBEDDING", `@huggingface/transformers not available: ${error}`);
|
|
798
1021
|
}
|
|
799
1022
|
this.provider = null;
|
|
800
1023
|
this.initialized = true;
|
|
801
|
-
logger.warn("EMBEDDING", "
|
|
1024
|
+
logger.warn("EMBEDDING", "No embedding provider available, semantic search disabled");
|
|
802
1025
|
return false;
|
|
803
1026
|
}
|
|
804
1027
|
/**
|
|
805
|
-
*
|
|
806
|
-
*
|
|
1028
|
+
* Generate embedding for a single text.
|
|
1029
|
+
* Returns Float32Array with configured dimensions, or null if not available.
|
|
807
1030
|
*/
|
|
808
1031
|
async embed(text) {
|
|
809
1032
|
if (!this.initialized) await this.initialize();
|
|
@@ -816,46 +1039,118 @@ var EmbeddingService = class {
|
|
|
816
1039
|
return await this._embedTransformers(truncated);
|
|
817
1040
|
}
|
|
818
1041
|
} catch (error) {
|
|
819
|
-
logger.error("EMBEDDING", `
|
|
1042
|
+
logger.error("EMBEDDING", `Error generating embedding: ${error}`);
|
|
820
1043
|
}
|
|
821
1044
|
return null;
|
|
822
1045
|
}
|
|
823
1046
|
/**
|
|
824
|
-
*
|
|
1047
|
+
* Generate embeddings in batch.
|
|
1048
|
+
* Uses native batch support when available (fastembed, transformers),
|
|
1049
|
+
* falls back to serial processing on batch failure.
|
|
825
1050
|
*/
|
|
826
1051
|
async embedBatch(texts) {
|
|
827
1052
|
if (!this.initialized) await this.initialize();
|
|
828
1053
|
if (!this.provider || !this.model) return texts.map(() => null);
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
}
|
|
835
|
-
|
|
1054
|
+
if (texts.length === 0) return [];
|
|
1055
|
+
const truncated = texts.map((t) => t.substring(0, 2e3));
|
|
1056
|
+
try {
|
|
1057
|
+
if (this.provider === "fastembed") {
|
|
1058
|
+
return await this._embedBatchFastembed(truncated);
|
|
1059
|
+
} else if (this.provider === "transformers") {
|
|
1060
|
+
return await this._embedBatchTransformers(truncated);
|
|
836
1061
|
}
|
|
1062
|
+
} catch (error) {
|
|
1063
|
+
logger.warn("EMBEDDING", `Batch embedding failed, falling back to serial: ${error}`);
|
|
837
1064
|
}
|
|
838
|
-
return
|
|
1065
|
+
return this._embedBatchSerial(truncated);
|
|
839
1066
|
}
|
|
840
1067
|
/**
|
|
841
|
-
*
|
|
1068
|
+
* Check if the service is available.
|
|
842
1069
|
*/
|
|
843
1070
|
isAvailable() {
|
|
844
1071
|
return this.initialized && this.provider !== null;
|
|
845
1072
|
}
|
|
846
1073
|
/**
|
|
847
|
-
*
|
|
1074
|
+
* Name of the active provider.
|
|
848
1075
|
*/
|
|
849
1076
|
getProvider() {
|
|
850
1077
|
return this.provider;
|
|
851
1078
|
}
|
|
852
1079
|
/**
|
|
853
|
-
*
|
|
1080
|
+
* Embedding vector dimensions for the active model configuration.
|
|
854
1081
|
*/
|
|
855
1082
|
getDimensions() {
|
|
856
|
-
return
|
|
1083
|
+
return this.config.dimensions;
|
|
1084
|
+
}
|
|
1085
|
+
/**
|
|
1086
|
+
* Human-readable model name used as identifier in the observation_embeddings table.
|
|
1087
|
+
* Returns the short name (e.g., 'all-MiniLM-L6-v2') or the full HF model ID for custom models.
|
|
1088
|
+
*/
|
|
1089
|
+
getModelName() {
|
|
1090
|
+
return this.configName;
|
|
1091
|
+
}
|
|
1092
|
+
// --- Batch implementations ---
|
|
1093
|
+
/**
|
|
1094
|
+
* Native batch embedding with fastembed.
|
|
1095
|
+
* FlagEmbedding.embed() accepts string[] and returns an async iterable of batches.
|
|
1096
|
+
*/
|
|
1097
|
+
async _embedBatchFastembed(texts) {
|
|
1098
|
+
const results = [];
|
|
1099
|
+
const embeddings = this.model.embed(texts, texts.length);
|
|
1100
|
+
for await (const batch of embeddings) {
|
|
1101
|
+
if (batch) {
|
|
1102
|
+
for (const vec of batch) {
|
|
1103
|
+
results.push(vec instanceof Float32Array ? vec : new Float32Array(vec));
|
|
1104
|
+
}
|
|
1105
|
+
}
|
|
1106
|
+
}
|
|
1107
|
+
while (results.length < texts.length) {
|
|
1108
|
+
results.push(null);
|
|
1109
|
+
}
|
|
1110
|
+
return results;
|
|
857
1111
|
}
|
|
858
|
-
|
|
1112
|
+
/**
|
|
1113
|
+
* Batch embedding with @huggingface/transformers pipeline.
|
|
1114
|
+
* The pipeline accepts string[] and returns a Tensor with shape [N, dims].
|
|
1115
|
+
*/
|
|
1116
|
+
async _embedBatchTransformers(texts) {
|
|
1117
|
+
const output = await this.model(texts, {
|
|
1118
|
+
pooling: "mean",
|
|
1119
|
+
normalize: true
|
|
1120
|
+
});
|
|
1121
|
+
if (!output?.data) {
|
|
1122
|
+
return texts.map(() => null);
|
|
1123
|
+
}
|
|
1124
|
+
const dims = this.getDimensions();
|
|
1125
|
+
const data = output.data instanceof Float32Array ? output.data : new Float32Array(output.data);
|
|
1126
|
+
const results = [];
|
|
1127
|
+
for (let i = 0; i < texts.length; i++) {
|
|
1128
|
+
const offset = i * dims;
|
|
1129
|
+
if (offset + dims <= data.length) {
|
|
1130
|
+
results.push(data.slice(offset, offset + dims));
|
|
1131
|
+
} else {
|
|
1132
|
+
results.push(null);
|
|
1133
|
+
}
|
|
1134
|
+
}
|
|
1135
|
+
return results;
|
|
1136
|
+
}
|
|
1137
|
+
/**
|
|
1138
|
+
* Serial fallback: embed texts one at a time.
|
|
1139
|
+
* Used when native batch fails.
|
|
1140
|
+
*/
|
|
1141
|
+
async _embedBatchSerial(texts) {
|
|
1142
|
+
const results = [];
|
|
1143
|
+
for (const text of texts) {
|
|
1144
|
+
try {
|
|
1145
|
+
const embedding = await this.embed(text);
|
|
1146
|
+
results.push(embedding);
|
|
1147
|
+
} catch {
|
|
1148
|
+
results.push(null);
|
|
1149
|
+
}
|
|
1150
|
+
}
|
|
1151
|
+
return results;
|
|
1152
|
+
}
|
|
1153
|
+
// --- Single-text provider implementations ---
|
|
859
1154
|
async _embedFastembed(text) {
|
|
860
1155
|
const embeddings = this.model.embed([text], 1);
|
|
861
1156
|
for await (const batch of embeddings) {
|
|
@@ -886,17 +1181,21 @@ function getEmbeddingService() {
|
|
|
886
1181
|
}
|
|
887
1182
|
|
|
888
1183
|
// src/services/search/VectorSearch.ts
|
|
1184
|
+
var DEFAULT_MAX_CANDIDATES = 2e3;
|
|
889
1185
|
function cosineSimilarity(a, b) {
|
|
890
|
-
|
|
1186
|
+
const len = a.length;
|
|
1187
|
+
if (len !== b.length) return 0;
|
|
891
1188
|
let dotProduct = 0;
|
|
892
1189
|
let normA = 0;
|
|
893
1190
|
let normB = 0;
|
|
894
|
-
for (let i = 0; i <
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
1191
|
+
for (let i = 0; i < len; i++) {
|
|
1192
|
+
const ai = a[i];
|
|
1193
|
+
const bi = b[i];
|
|
1194
|
+
dotProduct += ai * bi;
|
|
1195
|
+
normA += ai * ai;
|
|
1196
|
+
normB += bi * bi;
|
|
898
1197
|
}
|
|
899
|
-
const denominator = Math.sqrt(normA
|
|
1198
|
+
const denominator = Math.sqrt(normA * normB);
|
|
900
1199
|
if (denominator === 0) return 0;
|
|
901
1200
|
return dotProduct / denominator;
|
|
902
1201
|
}
|
|
@@ -909,23 +1208,36 @@ function bufferToFloat32(buf) {
|
|
|
909
1208
|
}
|
|
910
1209
|
var VectorSearch = class {
|
|
911
1210
|
/**
|
|
912
|
-
*
|
|
1211
|
+
* Semantic search with SQL pre-filtering for scalability.
|
|
1212
|
+
*
|
|
1213
|
+
* 2-phase strategy:
|
|
1214
|
+
* 1. SQL pre-filters by project + sorts by recency (loads max N candidates)
|
|
1215
|
+
* 2. JS computes cosine similarity only on filtered candidates
|
|
1216
|
+
*
|
|
1217
|
+
* With 50k observations and maxCandidates=2000, loads only ~4% of data.
|
|
913
1218
|
*/
|
|
914
1219
|
async search(db, queryEmbedding, options = {}) {
|
|
915
1220
|
const limit = options.limit || 10;
|
|
916
1221
|
const threshold = options.threshold || 0.3;
|
|
1222
|
+
const maxCandidates = options.maxCandidates || DEFAULT_MAX_CANDIDATES;
|
|
917
1223
|
try {
|
|
918
|
-
|
|
1224
|
+
const conditions = [];
|
|
1225
|
+
const params = [];
|
|
1226
|
+
if (options.project) {
|
|
1227
|
+
conditions.push("o.project = ?");
|
|
1228
|
+
params.push(options.project);
|
|
1229
|
+
}
|
|
1230
|
+
const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
|
|
1231
|
+
const sql = `
|
|
919
1232
|
SELECT e.observation_id, e.embedding,
|
|
920
1233
|
o.title, o.text, o.type, o.project, o.created_at, o.created_at_epoch
|
|
921
1234
|
FROM observation_embeddings e
|
|
922
1235
|
JOIN observations o ON o.id = e.observation_id
|
|
1236
|
+
${whereClause}
|
|
1237
|
+
ORDER BY o.created_at_epoch DESC
|
|
1238
|
+
LIMIT ?
|
|
923
1239
|
`;
|
|
924
|
-
|
|
925
|
-
if (options.project) {
|
|
926
|
-
sql += " WHERE o.project = ?";
|
|
927
|
-
params.push(options.project);
|
|
928
|
-
}
|
|
1240
|
+
params.push(maxCandidates);
|
|
929
1241
|
const rows = db.query(sql).all(...params);
|
|
930
1242
|
const scored = [];
|
|
931
1243
|
for (const row of rows) {
|
|
@@ -946,14 +1258,15 @@ var VectorSearch = class {
|
|
|
946
1258
|
}
|
|
947
1259
|
}
|
|
948
1260
|
scored.sort((a, b) => b.similarity - a.similarity);
|
|
1261
|
+
logger.debug("VECTOR", `Search: ${rows.length} candidates \u2192 ${scored.length} above threshold \u2192 ${Math.min(scored.length, limit)} results`);
|
|
949
1262
|
return scored.slice(0, limit);
|
|
950
1263
|
} catch (error) {
|
|
951
|
-
logger.error("VECTOR", `
|
|
1264
|
+
logger.error("VECTOR", `Vector search error: ${error}`);
|
|
952
1265
|
return [];
|
|
953
1266
|
}
|
|
954
1267
|
}
|
|
955
1268
|
/**
|
|
956
|
-
*
|
|
1269
|
+
* Store embedding for an observation.
|
|
957
1270
|
*/
|
|
958
1271
|
async storeEmbedding(db, observationId, embedding, model) {
|
|
959
1272
|
try {
|
|
@@ -969,18 +1282,18 @@ var VectorSearch = class {
|
|
|
969
1282
|
embedding.length,
|
|
970
1283
|
(/* @__PURE__ */ new Date()).toISOString()
|
|
971
1284
|
);
|
|
972
|
-
logger.debug("VECTOR", `Embedding
|
|
1285
|
+
logger.debug("VECTOR", `Embedding saved for observation ${observationId}`);
|
|
973
1286
|
} catch (error) {
|
|
974
|
-
logger.error("VECTOR", `
|
|
1287
|
+
logger.error("VECTOR", `Error saving embedding: ${error}`);
|
|
975
1288
|
}
|
|
976
1289
|
}
|
|
977
1290
|
/**
|
|
978
|
-
*
|
|
1291
|
+
* Generate embeddings for observations that don't have them yet.
|
|
979
1292
|
*/
|
|
980
1293
|
async backfillEmbeddings(db, batchSize = 50) {
|
|
981
1294
|
const embeddingService2 = getEmbeddingService();
|
|
982
1295
|
if (!await embeddingService2.initialize()) {
|
|
983
|
-
logger.warn("VECTOR", "Embedding service
|
|
1296
|
+
logger.warn("VECTOR", "Embedding service not available, backfill skipped");
|
|
984
1297
|
return 0;
|
|
985
1298
|
}
|
|
986
1299
|
const rows = db.query(`
|
|
@@ -993,7 +1306,7 @@ var VectorSearch = class {
|
|
|
993
1306
|
`).all(batchSize);
|
|
994
1307
|
if (rows.length === 0) return 0;
|
|
995
1308
|
let count = 0;
|
|
996
|
-
const model = embeddingService2.
|
|
1309
|
+
const model = embeddingService2.getModelName();
|
|
997
1310
|
for (const row of rows) {
|
|
998
1311
|
const parts = [row.title];
|
|
999
1312
|
if (row.text) parts.push(row.text);
|
|
@@ -1006,11 +1319,11 @@ var VectorSearch = class {
|
|
|
1006
1319
|
count++;
|
|
1007
1320
|
}
|
|
1008
1321
|
}
|
|
1009
|
-
logger.info("VECTOR", `Backfill
|
|
1322
|
+
logger.info("VECTOR", `Backfill completed: ${count}/${rows.length} embeddings generated`);
|
|
1010
1323
|
return count;
|
|
1011
1324
|
}
|
|
1012
1325
|
/**
|
|
1013
|
-
*
|
|
1326
|
+
* Embedding statistics.
|
|
1014
1327
|
*/
|
|
1015
1328
|
getStats(db) {
|
|
1016
1329
|
try {
|
|
@@ -1098,21 +1411,21 @@ function estimateTokens(text) {
|
|
|
1098
1411
|
var HybridSearch = class {
|
|
1099
1412
|
embeddingInitialized = false;
|
|
1100
1413
|
/**
|
|
1101
|
-
*
|
|
1414
|
+
* Initialize the embedding service (lazy, non-blocking)
|
|
1102
1415
|
*/
|
|
1103
1416
|
async initialize() {
|
|
1104
1417
|
try {
|
|
1105
1418
|
const embeddingService2 = getEmbeddingService();
|
|
1106
1419
|
await embeddingService2.initialize();
|
|
1107
1420
|
this.embeddingInitialized = embeddingService2.isAvailable();
|
|
1108
|
-
logger.info("SEARCH", `HybridSearch
|
|
1421
|
+
logger.info("SEARCH", `HybridSearch initialized (embedding: ${this.embeddingInitialized ? "active" : "disabled"})`);
|
|
1109
1422
|
} catch (error) {
|
|
1110
|
-
logger.warn("SEARCH", "
|
|
1423
|
+
logger.warn("SEARCH", "Embedding initialization failed, using only FTS5", {}, error);
|
|
1111
1424
|
this.embeddingInitialized = false;
|
|
1112
1425
|
}
|
|
1113
1426
|
}
|
|
1114
1427
|
/**
|
|
1115
|
-
*
|
|
1428
|
+
* Hybrid search with 4-signal scoring
|
|
1116
1429
|
*/
|
|
1117
1430
|
async search(db, query, options = {}) {
|
|
1118
1431
|
const limit = options.limit || 10;
|
|
@@ -1128,7 +1441,7 @@ var HybridSearch = class {
|
|
|
1128
1441
|
const vectorResults = await vectorSearch2.search(db, queryEmbedding, {
|
|
1129
1442
|
project: options.project,
|
|
1130
1443
|
limit: limit * 2,
|
|
1131
|
-
//
|
|
1444
|
+
// Fetch more results for ranking
|
|
1132
1445
|
threshold: 0.3
|
|
1133
1446
|
});
|
|
1134
1447
|
for (const hit of vectorResults) {
|
|
@@ -1145,10 +1458,10 @@ var HybridSearch = class {
|
|
|
1145
1458
|
source: "vector"
|
|
1146
1459
|
});
|
|
1147
1460
|
}
|
|
1148
|
-
logger.debug("SEARCH", `Vector search: ${vectorResults.length}
|
|
1461
|
+
logger.debug("SEARCH", `Vector search: ${vectorResults.length} results`);
|
|
1149
1462
|
}
|
|
1150
1463
|
} catch (error) {
|
|
1151
|
-
logger.warn("SEARCH", "
|
|
1464
|
+
logger.warn("SEARCH", "Vector search failed, using only keyword", {}, error);
|
|
1152
1465
|
}
|
|
1153
1466
|
}
|
|
1154
1467
|
try {
|
|
@@ -1178,9 +1491,9 @@ var HybridSearch = class {
|
|
|
1178
1491
|
});
|
|
1179
1492
|
}
|
|
1180
1493
|
}
|
|
1181
|
-
logger.debug("SEARCH", `Keyword search: ${keywordResults.length}
|
|
1494
|
+
logger.debug("SEARCH", `Keyword search: ${keywordResults.length} results`);
|
|
1182
1495
|
} catch (error) {
|
|
1183
|
-
logger.error("SEARCH", "
|
|
1496
|
+
logger.error("SEARCH", "Keyword search failed", {}, error);
|
|
1184
1497
|
}
|
|
1185
1498
|
if (rawItems.size === 0) return [];
|
|
1186
1499
|
const allFTS5Ranks = Array.from(rawItems.values()).filter((item) => item.fts5Rank !== null).map((item) => item.fts5Rank);
|
|
@@ -1233,14 +1546,12 @@ function getHybridSearch() {
|
|
|
1233
1546
|
}
|
|
1234
1547
|
export {
|
|
1235
1548
|
CONTEXT_WEIGHTS,
|
|
1236
|
-
ChromaManager,
|
|
1237
1549
|
HybridSearch,
|
|
1238
1550
|
KNOWLEDGE_TYPE_BOOST,
|
|
1239
1551
|
SEARCH_WEIGHTS,
|
|
1240
1552
|
accessRecencyScore,
|
|
1241
1553
|
computeCompositeScore,
|
|
1242
1554
|
estimateTokens,
|
|
1243
|
-
getChromaManager,
|
|
1244
1555
|
getHybridSearch,
|
|
1245
1556
|
knowledgeTypeBoost,
|
|
1246
1557
|
normalizeFTS5Rank,
|