kiro-memory 1.9.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -1
- package/package.json +5 -5
- package/plugin/dist/cli/contextkit.js +2611 -345
- package/plugin/dist/hooks/agentSpawn.js +853 -223
- package/plugin/dist/hooks/kiro-hooks.js +841 -211
- package/plugin/dist/hooks/postToolUse.js +853 -222
- package/plugin/dist/hooks/stop.js +850 -220
- package/plugin/dist/hooks/userPromptSubmit.js +848 -216
- package/plugin/dist/index.js +843 -340
- package/plugin/dist/plugins/github/github-client.js +152 -0
- package/plugin/dist/plugins/github/index.js +412 -0
- package/plugin/dist/plugins/github/issue-parser.js +54 -0
- package/plugin/dist/plugins/slack/formatter.js +90 -0
- package/plugin/dist/plugins/slack/index.js +215 -0
- package/plugin/dist/sdk/index.js +841 -215
- package/plugin/dist/servers/mcp-server.js +4461 -397
- package/plugin/dist/services/search/EmbeddingService.js +146 -37
- package/plugin/dist/services/search/HybridSearch.js +564 -116
- package/plugin/dist/services/search/VectorSearch.js +187 -60
- package/plugin/dist/services/search/index.js +565 -254
- package/plugin/dist/services/sqlite/Backup.js +416 -0
- package/plugin/dist/services/sqlite/Database.js +126 -153
- package/plugin/dist/services/sqlite/ImportExport.js +452 -0
- package/plugin/dist/services/sqlite/Observations.js +314 -19
- package/plugin/dist/services/sqlite/Prompts.js +1 -1
- package/plugin/dist/services/sqlite/Search.js +41 -29
- package/plugin/dist/services/sqlite/Summaries.js +4 -4
- package/plugin/dist/services/sqlite/index.js +1428 -208
- package/plugin/dist/viewer.css +1 -0
- package/plugin/dist/viewer.html +2 -179
- package/plugin/dist/viewer.js +23 -24942
- package/plugin/dist/viewer.js.map +7 -0
- package/plugin/dist/worker-service.js +427 -5569
- package/plugin/dist/worker-service.js.map +7 -0
|
@@ -28,7 +28,7 @@ function escapeLikePattern(input) {
|
|
|
28
28
|
}
|
|
29
29
|
function sanitizeFTS5Query(query) {
|
|
30
30
|
const trimmed = query.length > 1e4 ? query.substring(0, 1e4) : query;
|
|
31
|
-
const terms = trimmed.replace(/[""]/g, "").split(/\s+/).filter((t) => t.length > 0).slice(0, 100).map((t) => `"${t}"`);
|
|
31
|
+
const terms = trimmed.replace(/[""\u0022]/g, "").split(/\s+/).filter((t) => t.length > 0).slice(0, 100).map((t) => `"${t}"`);
|
|
32
32
|
return terms.join(" ");
|
|
33
33
|
}
|
|
34
34
|
function searchObservationsFTS(db, query, filters = {}) {
|
|
@@ -125,7 +125,7 @@ function searchObservationsLIKE(db, query, filters = {}) {
|
|
|
125
125
|
sql += " AND created_at_epoch <= ?";
|
|
126
126
|
params.push(filters.dateEnd);
|
|
127
127
|
}
|
|
128
|
-
sql += " ORDER BY created_at_epoch DESC LIMIT ?";
|
|
128
|
+
sql += " ORDER BY created_at_epoch DESC, id DESC LIMIT ?";
|
|
129
129
|
params.push(limit);
|
|
130
130
|
const stmt = db.query(sql);
|
|
131
131
|
return stmt.all(...params);
|
|
@@ -150,7 +150,7 @@ function searchSummariesFiltered(db, query, filters = {}) {
|
|
|
150
150
|
sql += " AND created_at_epoch <= ?";
|
|
151
151
|
params.push(filters.dateEnd);
|
|
152
152
|
}
|
|
153
|
-
sql += " ORDER BY created_at_epoch DESC LIMIT ?";
|
|
153
|
+
sql += " ORDER BY created_at_epoch DESC, id DESC LIMIT ?";
|
|
154
154
|
params.push(limit);
|
|
155
155
|
const stmt = db.query(sql);
|
|
156
156
|
return stmt.all(...params);
|
|
@@ -160,7 +160,7 @@ function getObservationsByIds(db, ids) {
|
|
|
160
160
|
const validIds = ids.filter((id) => typeof id === "number" && Number.isInteger(id) && id > 0).slice(0, 500);
|
|
161
161
|
if (validIds.length === 0) return [];
|
|
162
162
|
const placeholders = validIds.map(() => "?").join(",");
|
|
163
|
-
const sql = `SELECT * FROM observations WHERE id IN (${placeholders}) ORDER BY created_at_epoch DESC`;
|
|
163
|
+
const sql = `SELECT * FROM observations WHERE id IN (${placeholders}) ORDER BY created_at_epoch DESC, id DESC`;
|
|
164
164
|
const stmt = db.query(sql);
|
|
165
165
|
return stmt.all(...validIds);
|
|
166
166
|
}
|
|
@@ -172,11 +172,11 @@ function getTimeline(db, anchorId, depthBefore = 5, depthAfter = 5) {
|
|
|
172
172
|
const beforeStmt = db.query(`
|
|
173
173
|
SELECT id, 'observation' as type, title, text as content, project, created_at, created_at_epoch
|
|
174
174
|
FROM observations
|
|
175
|
-
WHERE created_at_epoch < ?
|
|
176
|
-
ORDER BY created_at_epoch DESC
|
|
175
|
+
WHERE (created_at_epoch < ? OR (created_at_epoch = ? AND id < ?))
|
|
176
|
+
ORDER BY created_at_epoch DESC, id DESC
|
|
177
177
|
LIMIT ?
|
|
178
178
|
`);
|
|
179
|
-
const before = beforeStmt.all(anchorEpoch, depthBefore).reverse();
|
|
179
|
+
const before = beforeStmt.all(anchorEpoch, anchorEpoch, anchorId, depthBefore).reverse();
|
|
180
180
|
const selfStmt = db.query(`
|
|
181
181
|
SELECT id, 'observation' as type, title, text as content, project, created_at, created_at_epoch
|
|
182
182
|
FROM observations WHERE id = ?
|
|
@@ -185,34 +185,46 @@ function getTimeline(db, anchorId, depthBefore = 5, depthAfter = 5) {
|
|
|
185
185
|
const afterStmt = db.query(`
|
|
186
186
|
SELECT id, 'observation' as type, title, text as content, project, created_at, created_at_epoch
|
|
187
187
|
FROM observations
|
|
188
|
-
WHERE created_at_epoch > ?
|
|
189
|
-
ORDER BY created_at_epoch ASC
|
|
188
|
+
WHERE (created_at_epoch > ? OR (created_at_epoch = ? AND id > ?))
|
|
189
|
+
ORDER BY created_at_epoch ASC, id ASC
|
|
190
190
|
LIMIT ?
|
|
191
191
|
`);
|
|
192
|
-
const after = afterStmt.all(anchorEpoch, depthAfter);
|
|
192
|
+
const after = afterStmt.all(anchorEpoch, anchorEpoch, anchorId, depthAfter);
|
|
193
193
|
return [...before, ...self, ...after];
|
|
194
194
|
}
|
|
195
195
|
function getProjectStats(db, project) {
|
|
196
|
-
const
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
196
|
+
const sql = `
|
|
197
|
+
WITH
|
|
198
|
+
obs_stats AS (
|
|
199
|
+
SELECT
|
|
200
|
+
COUNT(*) as count,
|
|
201
|
+
COALESCE(SUM(discovery_tokens), 0) as discovery_tokens,
|
|
202
|
+
COALESCE(SUM(
|
|
203
|
+
CAST((LENGTH(COALESCE(title, '')) + LENGTH(COALESCE(narrative, ''))) / 4 AS INTEGER)
|
|
204
|
+
), 0) as read_tokens
|
|
205
|
+
FROM observations WHERE project = ?
|
|
206
|
+
),
|
|
207
|
+
sum_count AS (SELECT COUNT(*) as count FROM summaries WHERE project = ?),
|
|
208
|
+
ses_count AS (SELECT COUNT(*) as count FROM sessions WHERE project = ?),
|
|
209
|
+
prm_count AS (SELECT COUNT(*) as count FROM prompts WHERE project = ?)
|
|
210
|
+
SELECT
|
|
211
|
+
obs_stats.count as observations,
|
|
212
|
+
obs_stats.discovery_tokens,
|
|
213
|
+
obs_stats.read_tokens,
|
|
214
|
+
sum_count.count as summaries,
|
|
215
|
+
ses_count.count as sessions,
|
|
216
|
+
prm_count.count as prompts
|
|
217
|
+
FROM obs_stats, sum_count, ses_count, prm_count
|
|
218
|
+
`;
|
|
219
|
+
const row = db.query(sql).get(project, project, project, project);
|
|
220
|
+
const discoveryTokens = row?.discovery_tokens || 0;
|
|
221
|
+
const readTokens = row?.read_tokens || 0;
|
|
210
222
|
const savings = Math.max(0, discoveryTokens - readTokens);
|
|
211
223
|
return {
|
|
212
|
-
observations:
|
|
213
|
-
summaries:
|
|
214
|
-
sessions:
|
|
215
|
-
prompts:
|
|
224
|
+
observations: row?.observations || 0,
|
|
225
|
+
summaries: row?.summaries || 0,
|
|
226
|
+
sessions: row?.sessions || 0,
|
|
227
|
+
prompts: row?.prompts || 0,
|
|
216
228
|
tokenEconomics: { discoveryTokens, readTokens, savings }
|
|
217
229
|
};
|
|
218
230
|
}
|
|
@@ -220,7 +232,7 @@ function getStaleObservations(db, project) {
|
|
|
220
232
|
const rows = db.query(`
|
|
221
233
|
SELECT * FROM observations
|
|
222
234
|
WHERE project = ? AND files_modified IS NOT NULL AND files_modified != ''
|
|
223
|
-
ORDER BY created_at_epoch DESC
|
|
235
|
+
ORDER BY created_at_epoch DESC, id DESC
|
|
224
236
|
LIMIT 500
|
|
225
237
|
`).all(project);
|
|
226
238
|
const staleObs = [];
|
|
@@ -263,6 +275,290 @@ var init_Search = __esm({
|
|
|
263
275
|
}
|
|
264
276
|
});
|
|
265
277
|
|
|
278
|
+
// src/utils/secrets.ts
|
|
279
|
+
function redactSecrets(text) {
|
|
280
|
+
if (!text) return text;
|
|
281
|
+
let redacted = text;
|
|
282
|
+
for (const { pattern } of SECRET_PATTERNS) {
|
|
283
|
+
pattern.lastIndex = 0;
|
|
284
|
+
redacted = redacted.replace(pattern, (match) => {
|
|
285
|
+
const prefix = match.substring(0, Math.min(4, match.length));
|
|
286
|
+
return `${prefix}***REDACTED***`;
|
|
287
|
+
});
|
|
288
|
+
}
|
|
289
|
+
return redacted;
|
|
290
|
+
}
|
|
291
|
+
var SECRET_PATTERNS;
|
|
292
|
+
var init_secrets = __esm({
|
|
293
|
+
"src/utils/secrets.ts"() {
|
|
294
|
+
"use strict";
|
|
295
|
+
SECRET_PATTERNS = [
|
|
296
|
+
// AWS Access Keys (AKIA, ABIA, ACCA, ASIA prefixes + 16 alphanumeric chars)
|
|
297
|
+
{ name: "aws-key", pattern: /(?:AKIA|ABIA|ACCA|ASIA)[A-Z0-9]{16}/g },
|
|
298
|
+
// JWT tokens (three base64url segments separated by dots)
|
|
299
|
+
{ name: "jwt", pattern: /eyJ[a-zA-Z0-9_-]{10,}\.eyJ[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}/g },
|
|
300
|
+
// Generic API keys in key=value or key: value assignments
|
|
301
|
+
{ name: "api-key", pattern: /(?:api[_-]?key|apikey|api[_-]?secret)\s*[:=]\s*['"]?([a-zA-Z0-9_\-]{20,})['"]?/gi },
|
|
302
|
+
// Password/secret/token in variable assignments
|
|
303
|
+
{ name: "credential", pattern: /(?:password|passwd|pwd|secret|token|auth[_-]?token|access[_-]?token|bearer)\s*[:=]\s*['"]?([^\s'"]{8,})['"]?/gi },
|
|
304
|
+
// Credentials embedded in URLs (user:pass@host)
|
|
305
|
+
{ name: "url-credential", pattern: /(?:https?:\/\/)([^:]+):([^@]+)@/g },
|
|
306
|
+
// PEM-encoded private keys (RSA, EC, DSA, OpenSSH)
|
|
307
|
+
{ name: "private-key", pattern: /-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----/g },
|
|
308
|
+
// GitHub personal access tokens (ghp_, gho_, ghu_, ghs_, ghr_ prefixes)
|
|
309
|
+
{ name: "github-token", pattern: /gh[pousr]_[a-zA-Z0-9]{36,}/g },
|
|
310
|
+
// Slack bot/user/app tokens
|
|
311
|
+
{ name: "slack-token", pattern: /xox[bpoas]-[a-zA-Z0-9-]{10,}/g },
|
|
312
|
+
// HTTP Authorization Bearer header values
|
|
313
|
+
{ name: "bearer-header", pattern: /\bBearer\s+([a-zA-Z0-9_\-\.]{20,})/g },
|
|
314
|
+
// Generic hex secrets (32+ hex chars after a key/secret/token/password label)
|
|
315
|
+
{ name: "hex-secret", pattern: /(?:key|secret|token|password)\s*[:=]\s*['"]?([0-9a-f]{32,})['"]?/gi }
|
|
316
|
+
];
|
|
317
|
+
}
|
|
318
|
+
});
|
|
319
|
+
|
|
320
|
+
// src/utils/categorizer.ts
|
|
321
|
+
function categorize(input) {
|
|
322
|
+
const scores = /* @__PURE__ */ new Map();
|
|
323
|
+
const searchText = [
|
|
324
|
+
input.title,
|
|
325
|
+
input.text || "",
|
|
326
|
+
input.narrative || "",
|
|
327
|
+
input.concepts || ""
|
|
328
|
+
].join(" ").toLowerCase();
|
|
329
|
+
const allFiles = [input.filesModified || "", input.filesRead || ""].join(",");
|
|
330
|
+
for (const rule of CATEGORY_RULES) {
|
|
331
|
+
let score = 0;
|
|
332
|
+
for (const kw of rule.keywords) {
|
|
333
|
+
if (searchText.includes(kw.toLowerCase())) {
|
|
334
|
+
score += rule.weight;
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
if (rule.types && rule.types.includes(input.type)) {
|
|
338
|
+
score += rule.weight * 2;
|
|
339
|
+
}
|
|
340
|
+
if (rule.filePatterns && allFiles) {
|
|
341
|
+
for (const pattern of rule.filePatterns) {
|
|
342
|
+
if (pattern.test(allFiles)) {
|
|
343
|
+
score += rule.weight;
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
if (score > 0) {
|
|
348
|
+
scores.set(rule.category, (scores.get(rule.category) || 0) + score);
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
let bestCategory = "general";
|
|
352
|
+
let bestScore = 0;
|
|
353
|
+
for (const [category, score] of scores) {
|
|
354
|
+
if (score > bestScore) {
|
|
355
|
+
bestScore = score;
|
|
356
|
+
bestCategory = category;
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
return bestCategory;
|
|
360
|
+
}
|
|
361
|
+
var CATEGORY_RULES;
|
|
362
|
+
var init_categorizer = __esm({
|
|
363
|
+
"src/utils/categorizer.ts"() {
|
|
364
|
+
"use strict";
|
|
365
|
+
CATEGORY_RULES = [
|
|
366
|
+
{
|
|
367
|
+
category: "security",
|
|
368
|
+
keywords: [
|
|
369
|
+
"security",
|
|
370
|
+
"vulnerability",
|
|
371
|
+
"cve",
|
|
372
|
+
"xss",
|
|
373
|
+
"csrf",
|
|
374
|
+
"injection",
|
|
375
|
+
"sanitize",
|
|
376
|
+
"escape",
|
|
377
|
+
"auth",
|
|
378
|
+
"authentication",
|
|
379
|
+
"authorization",
|
|
380
|
+
"permission",
|
|
381
|
+
"helmet",
|
|
382
|
+
"cors",
|
|
383
|
+
"rate-limit",
|
|
384
|
+
"token",
|
|
385
|
+
"encrypt",
|
|
386
|
+
"decrypt",
|
|
387
|
+
"secret",
|
|
388
|
+
"redact",
|
|
389
|
+
"owasp"
|
|
390
|
+
],
|
|
391
|
+
filePatterns: [/security/i, /auth/i, /secrets?\.ts/i],
|
|
392
|
+
weight: 10
|
|
393
|
+
},
|
|
394
|
+
{
|
|
395
|
+
category: "testing",
|
|
396
|
+
keywords: [
|
|
397
|
+
"test",
|
|
398
|
+
"spec",
|
|
399
|
+
"expect",
|
|
400
|
+
"assert",
|
|
401
|
+
"mock",
|
|
402
|
+
"stub",
|
|
403
|
+
"fixture",
|
|
404
|
+
"coverage",
|
|
405
|
+
"jest",
|
|
406
|
+
"vitest",
|
|
407
|
+
"bun test",
|
|
408
|
+
"unit test",
|
|
409
|
+
"integration test",
|
|
410
|
+
"e2e"
|
|
411
|
+
],
|
|
412
|
+
types: ["test"],
|
|
413
|
+
filePatterns: [/\.test\./i, /\.spec\./i, /tests?\//i, /__tests__/i],
|
|
414
|
+
weight: 8
|
|
415
|
+
},
|
|
416
|
+
{
|
|
417
|
+
category: "debugging",
|
|
418
|
+
keywords: [
|
|
419
|
+
"debug",
|
|
420
|
+
"fix",
|
|
421
|
+
"bug",
|
|
422
|
+
"error",
|
|
423
|
+
"crash",
|
|
424
|
+
"stacktrace",
|
|
425
|
+
"stack trace",
|
|
426
|
+
"exception",
|
|
427
|
+
"breakpoint",
|
|
428
|
+
"investigate",
|
|
429
|
+
"root cause",
|
|
430
|
+
"troubleshoot",
|
|
431
|
+
"diagnose",
|
|
432
|
+
"bisect",
|
|
433
|
+
"regression"
|
|
434
|
+
],
|
|
435
|
+
types: ["bugfix"],
|
|
436
|
+
weight: 8
|
|
437
|
+
},
|
|
438
|
+
{
|
|
439
|
+
category: "architecture",
|
|
440
|
+
keywords: [
|
|
441
|
+
"architect",
|
|
442
|
+
"design",
|
|
443
|
+
"pattern",
|
|
444
|
+
"modular",
|
|
445
|
+
"migration",
|
|
446
|
+
"schema",
|
|
447
|
+
"database",
|
|
448
|
+
"api design",
|
|
449
|
+
"abstract",
|
|
450
|
+
"dependency injection",
|
|
451
|
+
"singleton",
|
|
452
|
+
"factory",
|
|
453
|
+
"observer",
|
|
454
|
+
"middleware",
|
|
455
|
+
"pipeline",
|
|
456
|
+
"microservice",
|
|
457
|
+
"monolith"
|
|
458
|
+
],
|
|
459
|
+
types: ["decision", "constraint"],
|
|
460
|
+
weight: 7
|
|
461
|
+
},
|
|
462
|
+
{
|
|
463
|
+
category: "refactoring",
|
|
464
|
+
keywords: [
|
|
465
|
+
"refactor",
|
|
466
|
+
"rename",
|
|
467
|
+
"extract",
|
|
468
|
+
"inline",
|
|
469
|
+
"move",
|
|
470
|
+
"split",
|
|
471
|
+
"merge",
|
|
472
|
+
"simplify",
|
|
473
|
+
"cleanup",
|
|
474
|
+
"clean up",
|
|
475
|
+
"dead code",
|
|
476
|
+
"consolidate",
|
|
477
|
+
"reorganize",
|
|
478
|
+
"restructure",
|
|
479
|
+
"decouple"
|
|
480
|
+
],
|
|
481
|
+
weight: 6
|
|
482
|
+
},
|
|
483
|
+
{
|
|
484
|
+
category: "config",
|
|
485
|
+
keywords: [
|
|
486
|
+
"config",
|
|
487
|
+
"configuration",
|
|
488
|
+
"env",
|
|
489
|
+
"environment",
|
|
490
|
+
"dotenv",
|
|
491
|
+
".env",
|
|
492
|
+
"settings",
|
|
493
|
+
"tsconfig",
|
|
494
|
+
"eslint",
|
|
495
|
+
"prettier",
|
|
496
|
+
"webpack",
|
|
497
|
+
"vite",
|
|
498
|
+
"esbuild",
|
|
499
|
+
"docker",
|
|
500
|
+
"ci/cd",
|
|
501
|
+
"github actions",
|
|
502
|
+
"deploy",
|
|
503
|
+
"build",
|
|
504
|
+
"bundle",
|
|
505
|
+
"package.json"
|
|
506
|
+
],
|
|
507
|
+
filePatterns: [
|
|
508
|
+
/\.config\./i,
|
|
509
|
+
/\.env/i,
|
|
510
|
+
/tsconfig/i,
|
|
511
|
+
/\.ya?ml/i,
|
|
512
|
+
/Dockerfile/i,
|
|
513
|
+
/docker-compose/i
|
|
514
|
+
],
|
|
515
|
+
weight: 5
|
|
516
|
+
},
|
|
517
|
+
{
|
|
518
|
+
category: "docs",
|
|
519
|
+
keywords: [
|
|
520
|
+
"document",
|
|
521
|
+
"readme",
|
|
522
|
+
"changelog",
|
|
523
|
+
"jsdoc",
|
|
524
|
+
"comment",
|
|
525
|
+
"explain",
|
|
526
|
+
"guide",
|
|
527
|
+
"tutorial",
|
|
528
|
+
"api doc",
|
|
529
|
+
"openapi",
|
|
530
|
+
"swagger"
|
|
531
|
+
],
|
|
532
|
+
types: ["docs"],
|
|
533
|
+
filePatterns: [/\.md$/i, /docs?\//i, /readme/i, /changelog/i],
|
|
534
|
+
weight: 5
|
|
535
|
+
},
|
|
536
|
+
{
|
|
537
|
+
category: "feature-dev",
|
|
538
|
+
keywords: [
|
|
539
|
+
"feature",
|
|
540
|
+
"implement",
|
|
541
|
+
"add",
|
|
542
|
+
"create",
|
|
543
|
+
"new",
|
|
544
|
+
"endpoint",
|
|
545
|
+
"component",
|
|
546
|
+
"module",
|
|
547
|
+
"service",
|
|
548
|
+
"handler",
|
|
549
|
+
"route",
|
|
550
|
+
"hook",
|
|
551
|
+
"plugin",
|
|
552
|
+
"integration"
|
|
553
|
+
],
|
|
554
|
+
types: ["feature", "file-write"],
|
|
555
|
+
weight: 3
|
|
556
|
+
// lowest — generic catch-all for development
|
|
557
|
+
}
|
|
558
|
+
];
|
|
559
|
+
}
|
|
560
|
+
});
|
|
561
|
+
|
|
266
562
|
// src/services/sqlite/Observations.ts
|
|
267
563
|
var Observations_exports = {};
|
|
268
564
|
__export(Observations_exports, {
|
|
@@ -288,11 +584,23 @@ function isDuplicateObservation(db, contentHash, windowMs = 3e4) {
|
|
|
288
584
|
}
|
|
289
585
|
function createObservation(db, memorySessionId, project, type, title, subtitle, text, narrative, facts, concepts, filesRead, filesModified, promptNumber, contentHash = null, discoveryTokens = 0) {
|
|
290
586
|
const now = /* @__PURE__ */ new Date();
|
|
587
|
+
const safeTitle = redactSecrets(title);
|
|
588
|
+
const safeText = text ? redactSecrets(text) : text;
|
|
589
|
+
const safeNarrative = narrative ? redactSecrets(narrative) : narrative;
|
|
590
|
+
const autoCategory = categorize({
|
|
591
|
+
type,
|
|
592
|
+
title: safeTitle,
|
|
593
|
+
text: safeText,
|
|
594
|
+
narrative: safeNarrative,
|
|
595
|
+
concepts,
|
|
596
|
+
filesModified,
|
|
597
|
+
filesRead
|
|
598
|
+
});
|
|
291
599
|
const result = db.run(
|
|
292
600
|
`INSERT INTO observations
|
|
293
|
-
(memory_session_id, project, type, title, subtitle, text, narrative, facts, concepts, files_read, files_modified, prompt_number, created_at, created_at_epoch, content_hash, discovery_tokens)
|
|
294
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
295
|
-
[memorySessionId, project, type,
|
|
601
|
+
(memory_session_id, project, type, title, subtitle, text, narrative, facts, concepts, files_read, files_modified, prompt_number, created_at, created_at_epoch, content_hash, discovery_tokens, auto_category)
|
|
602
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
603
|
+
[memorySessionId, project, type, safeTitle, subtitle, safeText, safeNarrative, facts, concepts, filesRead, filesModified, promptNumber, now.toISOString(), now.getTime(), contentHash, discoveryTokens, autoCategory]
|
|
296
604
|
);
|
|
297
605
|
return Number(result.lastInsertRowid);
|
|
298
606
|
}
|
|
@@ -304,16 +612,16 @@ function getObservationsBySession(db, memorySessionId) {
|
|
|
304
612
|
}
|
|
305
613
|
function getObservationsByProject(db, project, limit = 100) {
|
|
306
614
|
const query = db.query(
|
|
307
|
-
"SELECT * FROM observations WHERE project = ? ORDER BY created_at_epoch DESC LIMIT ?"
|
|
615
|
+
"SELECT * FROM observations WHERE project = ? ORDER BY created_at_epoch DESC, id DESC LIMIT ?"
|
|
308
616
|
);
|
|
309
617
|
return query.all(project, limit);
|
|
310
618
|
}
|
|
311
619
|
function searchObservations(db, searchTerm, project) {
|
|
312
620
|
const sql = project ? `SELECT * FROM observations
|
|
313
621
|
WHERE project = ? AND (title LIKE ? ESCAPE '\\' OR text LIKE ? ESCAPE '\\' OR narrative LIKE ? ESCAPE '\\')
|
|
314
|
-
ORDER BY created_at_epoch DESC` : `SELECT * FROM observations
|
|
622
|
+
ORDER BY created_at_epoch DESC, id DESC` : `SELECT * FROM observations
|
|
315
623
|
WHERE title LIKE ? ESCAPE '\\' OR text LIKE ? ESCAPE '\\' OR narrative LIKE ? ESCAPE '\\'
|
|
316
|
-
ORDER BY created_at_epoch DESC`;
|
|
624
|
+
ORDER BY created_at_epoch DESC, id DESC`;
|
|
317
625
|
const pattern = `%${escapeLikePattern2(searchTerm)}%`;
|
|
318
626
|
const query = db.query(sql);
|
|
319
627
|
if (project) {
|
|
@@ -346,21 +654,32 @@ function consolidateObservations(db, project, options = {}) {
|
|
|
346
654
|
ORDER BY cnt DESC
|
|
347
655
|
`).all(project, minGroupSize);
|
|
348
656
|
if (groups.length === 0) return { merged: 0, removed: 0 };
|
|
349
|
-
|
|
350
|
-
|
|
657
|
+
if (options.dryRun) {
|
|
658
|
+
let totalMerged = 0;
|
|
659
|
+
let totalRemoved = 0;
|
|
660
|
+
for (const group of groups) {
|
|
661
|
+
const obsIds = group.ids.split(",").map(Number);
|
|
662
|
+
const placeholders = obsIds.map(() => "?").join(",");
|
|
663
|
+
const count = db.query(
|
|
664
|
+
`SELECT COUNT(*) as cnt FROM observations WHERE id IN (${placeholders})`
|
|
665
|
+
).get(...obsIds)?.cnt || 0;
|
|
666
|
+
if (count >= minGroupSize) {
|
|
667
|
+
totalMerged += 1;
|
|
668
|
+
totalRemoved += count - 1;
|
|
669
|
+
}
|
|
670
|
+
}
|
|
671
|
+
return { merged: totalMerged, removed: totalRemoved };
|
|
672
|
+
}
|
|
351
673
|
const runConsolidation = db.transaction(() => {
|
|
674
|
+
let merged = 0;
|
|
675
|
+
let removed = 0;
|
|
352
676
|
for (const group of groups) {
|
|
353
677
|
const obsIds = group.ids.split(",").map(Number);
|
|
354
678
|
const placeholders = obsIds.map(() => "?").join(",");
|
|
355
679
|
const observations = db.query(
|
|
356
|
-
`SELECT * FROM observations WHERE id IN (${placeholders}) ORDER BY created_at_epoch DESC`
|
|
680
|
+
`SELECT * FROM observations WHERE id IN (${placeholders}) ORDER BY created_at_epoch DESC, id DESC`
|
|
357
681
|
).all(...obsIds);
|
|
358
682
|
if (observations.length < minGroupSize) continue;
|
|
359
|
-
if (options.dryRun) {
|
|
360
|
-
totalMerged += 1;
|
|
361
|
-
totalRemoved += observations.length - 1;
|
|
362
|
-
continue;
|
|
363
|
-
}
|
|
364
683
|
const keeper = observations[0];
|
|
365
684
|
const others = observations.slice(1);
|
|
366
685
|
const uniqueTexts = /* @__PURE__ */ new Set();
|
|
@@ -373,22 +692,24 @@ function consolidateObservations(db, project, options = {}) {
|
|
|
373
692
|
const consolidatedText = Array.from(uniqueTexts).join("\n---\n").substring(0, 1e5);
|
|
374
693
|
db.run(
|
|
375
694
|
"UPDATE observations SET text = ?, title = ? WHERE id = ?",
|
|
376
|
-
[consolidatedText, `[
|
|
695
|
+
[consolidatedText, `[consolidated x${observations.length}] ${keeper.title}`, keeper.id]
|
|
377
696
|
);
|
|
378
697
|
const removeIds = others.map((o) => o.id);
|
|
379
698
|
const removePlaceholders = removeIds.map(() => "?").join(",");
|
|
380
699
|
db.run(`DELETE FROM observations WHERE id IN (${removePlaceholders})`, removeIds);
|
|
381
700
|
db.run(`DELETE FROM observation_embeddings WHERE observation_id IN (${removePlaceholders})`, removeIds);
|
|
382
|
-
|
|
383
|
-
|
|
701
|
+
merged += 1;
|
|
702
|
+
removed += removeIds.length;
|
|
384
703
|
}
|
|
704
|
+
return { merged, removed };
|
|
385
705
|
});
|
|
386
|
-
runConsolidation();
|
|
387
|
-
return { merged: totalMerged, removed: totalRemoved };
|
|
706
|
+
return runConsolidation();
|
|
388
707
|
}
|
|
389
708
|
var init_Observations = __esm({
|
|
390
709
|
"src/services/sqlite/Observations.ts"() {
|
|
391
710
|
"use strict";
|
|
711
|
+
init_secrets();
|
|
712
|
+
init_categorizer();
|
|
392
713
|
}
|
|
393
714
|
});
|
|
394
715
|
|
|
@@ -612,14 +933,48 @@ ${data.stack}` : ` ${data.message}`;
|
|
|
612
933
|
var logger = new Logger();
|
|
613
934
|
|
|
614
935
|
// src/services/search/EmbeddingService.ts
|
|
936
|
+
var MODEL_CONFIGS = {
|
|
937
|
+
"all-MiniLM-L6-v2": {
|
|
938
|
+
modelId: "Xenova/all-MiniLM-L6-v2",
|
|
939
|
+
dimensions: 384
|
|
940
|
+
},
|
|
941
|
+
"jina-code-v2": {
|
|
942
|
+
modelId: "jinaai/jina-embeddings-v2-base-code",
|
|
943
|
+
dimensions: 768
|
|
944
|
+
},
|
|
945
|
+
"bge-small-en": {
|
|
946
|
+
modelId: "BAAI/bge-small-en-v1.5",
|
|
947
|
+
dimensions: 384
|
|
948
|
+
}
|
|
949
|
+
};
|
|
950
|
+
var FASTEMBED_COMPATIBLE_MODELS = /* @__PURE__ */ new Set(["all-MiniLM-L6-v2", "bge-small-en"]);
|
|
615
951
|
var EmbeddingService = class {
|
|
616
952
|
provider = null;
|
|
617
953
|
model = null;
|
|
618
954
|
initialized = false;
|
|
619
955
|
initializing = null;
|
|
956
|
+
config;
|
|
957
|
+
configName;
|
|
958
|
+
constructor() {
|
|
959
|
+
const envModel = process.env.KIRO_MEMORY_EMBEDDING_MODEL || "all-MiniLM-L6-v2";
|
|
960
|
+
this.configName = envModel;
|
|
961
|
+
if (MODEL_CONFIGS[envModel]) {
|
|
962
|
+
this.config = MODEL_CONFIGS[envModel];
|
|
963
|
+
} else if (envModel.includes("/")) {
|
|
964
|
+
const dimensions = parseInt(process.env.KIRO_MEMORY_EMBEDDING_DIMENSIONS || "384", 10);
|
|
965
|
+
this.config = {
|
|
966
|
+
modelId: envModel,
|
|
967
|
+
dimensions: isNaN(dimensions) ? 384 : dimensions
|
|
968
|
+
};
|
|
969
|
+
} else {
|
|
970
|
+
logger.warn("EMBEDDING", `Unknown model name '${envModel}', falling back to 'all-MiniLM-L6-v2'`);
|
|
971
|
+
this.configName = "all-MiniLM-L6-v2";
|
|
972
|
+
this.config = MODEL_CONFIGS["all-MiniLM-L6-v2"];
|
|
973
|
+
}
|
|
974
|
+
}
|
|
620
975
|
/**
|
|
621
|
-
*
|
|
622
|
-
*
|
|
976
|
+
* Initialize the embedding service.
|
|
977
|
+
* Tries fastembed (when compatible), then @huggingface/transformers, then falls back to null.
|
|
623
978
|
*/
|
|
624
979
|
async initialize() {
|
|
625
980
|
if (this.initialized) return this.provider !== null;
|
|
@@ -630,45 +985,48 @@ var EmbeddingService = class {
|
|
|
630
985
|
return result;
|
|
631
986
|
}
|
|
632
987
|
async _doInitialize() {
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
988
|
+
const fastembedCompatible = FASTEMBED_COMPATIBLE_MODELS.has(this.configName);
|
|
989
|
+
if (fastembedCompatible) {
|
|
990
|
+
try {
|
|
991
|
+
const fastembed = await import("fastembed");
|
|
992
|
+
const EmbeddingModel = fastembed.EmbeddingModel || fastembed.default?.EmbeddingModel;
|
|
993
|
+
const FlagEmbedding = fastembed.FlagEmbedding || fastembed.default?.FlagEmbedding;
|
|
994
|
+
if (FlagEmbedding && EmbeddingModel) {
|
|
995
|
+
this.model = await FlagEmbedding.init({
|
|
996
|
+
model: EmbeddingModel.BGESmallENV15
|
|
997
|
+
});
|
|
998
|
+
this.provider = "fastembed";
|
|
999
|
+
this.initialized = true;
|
|
1000
|
+
logger.info("EMBEDDING", `Initialized with fastembed (BGE-small-en-v1.5) for model '${this.configName}'`);
|
|
1001
|
+
return true;
|
|
1002
|
+
}
|
|
1003
|
+
} catch (error) {
|
|
1004
|
+
logger.debug("EMBEDDING", `fastembed not available: ${error}`);
|
|
645
1005
|
}
|
|
646
|
-
} catch (error) {
|
|
647
|
-
logger.debug("EMBEDDING", `fastembed non disponibile: ${error}`);
|
|
648
1006
|
}
|
|
649
1007
|
try {
|
|
650
1008
|
const transformers = await import("@huggingface/transformers");
|
|
651
1009
|
const pipeline = transformers.pipeline || transformers.default?.pipeline;
|
|
652
1010
|
if (pipeline) {
|
|
653
|
-
this.model = await pipeline("feature-extraction",
|
|
1011
|
+
this.model = await pipeline("feature-extraction", this.config.modelId, {
|
|
654
1012
|
quantized: true
|
|
655
1013
|
});
|
|
656
1014
|
this.provider = "transformers";
|
|
657
1015
|
this.initialized = true;
|
|
658
|
-
logger.info("EMBEDDING",
|
|
1016
|
+
logger.info("EMBEDDING", `Initialized with @huggingface/transformers (${this.config.modelId})`);
|
|
659
1017
|
return true;
|
|
660
1018
|
}
|
|
661
1019
|
} catch (error) {
|
|
662
|
-
logger.debug("EMBEDDING", `@huggingface/transformers
|
|
1020
|
+
logger.debug("EMBEDDING", `@huggingface/transformers not available: ${error}`);
|
|
663
1021
|
}
|
|
664
1022
|
this.provider = null;
|
|
665
1023
|
this.initialized = true;
|
|
666
|
-
logger.warn("EMBEDDING", "
|
|
1024
|
+
logger.warn("EMBEDDING", "No embedding provider available, semantic search disabled");
|
|
667
1025
|
return false;
|
|
668
1026
|
}
|
|
669
1027
|
/**
|
|
670
|
-
*
|
|
671
|
-
*
|
|
1028
|
+
* Generate embedding for a single text.
|
|
1029
|
+
* Returns Float32Array with configured dimensions, or null if not available.
|
|
672
1030
|
*/
|
|
673
1031
|
async embed(text) {
|
|
674
1032
|
if (!this.initialized) await this.initialize();
|
|
@@ -681,46 +1039,118 @@ var EmbeddingService = class {
|
|
|
681
1039
|
return await this._embedTransformers(truncated);
|
|
682
1040
|
}
|
|
683
1041
|
} catch (error) {
|
|
684
|
-
logger.error("EMBEDDING", `
|
|
1042
|
+
logger.error("EMBEDDING", `Error generating embedding: ${error}`);
|
|
685
1043
|
}
|
|
686
1044
|
return null;
|
|
687
1045
|
}
|
|
688
1046
|
/**
|
|
689
|
-
*
|
|
1047
|
+
* Generate embeddings in batch.
|
|
1048
|
+
* Uses native batch support when available (fastembed, transformers),
|
|
1049
|
+
* falls back to serial processing on batch failure.
|
|
690
1050
|
*/
|
|
691
1051
|
async embedBatch(texts) {
|
|
692
1052
|
if (!this.initialized) await this.initialize();
|
|
693
1053
|
if (!this.provider || !this.model) return texts.map(() => null);
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
}
|
|
700
|
-
|
|
1054
|
+
if (texts.length === 0) return [];
|
|
1055
|
+
const truncated = texts.map((t) => t.substring(0, 2e3));
|
|
1056
|
+
try {
|
|
1057
|
+
if (this.provider === "fastembed") {
|
|
1058
|
+
return await this._embedBatchFastembed(truncated);
|
|
1059
|
+
} else if (this.provider === "transformers") {
|
|
1060
|
+
return await this._embedBatchTransformers(truncated);
|
|
701
1061
|
}
|
|
1062
|
+
} catch (error) {
|
|
1063
|
+
logger.warn("EMBEDDING", `Batch embedding failed, falling back to serial: ${error}`);
|
|
702
1064
|
}
|
|
703
|
-
return
|
|
1065
|
+
return this._embedBatchSerial(truncated);
|
|
704
1066
|
}
|
|
705
1067
|
/**
|
|
706
|
-
*
|
|
1068
|
+
* Check if the service is available.
|
|
707
1069
|
*/
|
|
708
1070
|
isAvailable() {
|
|
709
1071
|
return this.initialized && this.provider !== null;
|
|
710
1072
|
}
|
|
711
1073
|
/**
|
|
712
|
-
*
|
|
1074
|
+
* Name of the active provider.
|
|
713
1075
|
*/
|
|
714
1076
|
getProvider() {
|
|
715
1077
|
return this.provider;
|
|
716
1078
|
}
|
|
717
1079
|
/**
|
|
718
|
-
*
|
|
1080
|
+
* Embedding vector dimensions for the active model configuration.
|
|
719
1081
|
*/
|
|
720
1082
|
getDimensions() {
|
|
721
|
-
return
|
|
1083
|
+
return this.config.dimensions;
|
|
1084
|
+
}
|
|
1085
|
+
/**
|
|
1086
|
+
* Human-readable model name used as identifier in the observation_embeddings table.
|
|
1087
|
+
* Returns the short name (e.g., 'all-MiniLM-L6-v2') or the full HF model ID for custom models.
|
|
1088
|
+
*/
|
|
1089
|
+
getModelName() {
|
|
1090
|
+
return this.configName;
|
|
1091
|
+
}
|
|
1092
|
+
// --- Batch implementations ---
|
|
1093
|
+
/**
|
|
1094
|
+
* Native batch embedding with fastembed.
|
|
1095
|
+
* FlagEmbedding.embed() accepts string[] and returns an async iterable of batches.
|
|
1096
|
+
*/
|
|
1097
|
+
async _embedBatchFastembed(texts) {
|
|
1098
|
+
const results = [];
|
|
1099
|
+
const embeddings = this.model.embed(texts, texts.length);
|
|
1100
|
+
for await (const batch of embeddings) {
|
|
1101
|
+
if (batch) {
|
|
1102
|
+
for (const vec of batch) {
|
|
1103
|
+
results.push(vec instanceof Float32Array ? vec : new Float32Array(vec));
|
|
1104
|
+
}
|
|
1105
|
+
}
|
|
1106
|
+
}
|
|
1107
|
+
while (results.length < texts.length) {
|
|
1108
|
+
results.push(null);
|
|
1109
|
+
}
|
|
1110
|
+
return results;
|
|
722
1111
|
}
|
|
723
|
-
|
|
1112
|
+
/**
|
|
1113
|
+
* Batch embedding with @huggingface/transformers pipeline.
|
|
1114
|
+
* The pipeline accepts string[] and returns a Tensor with shape [N, dims].
|
|
1115
|
+
*/
|
|
1116
|
+
async _embedBatchTransformers(texts) {
|
|
1117
|
+
const output = await this.model(texts, {
|
|
1118
|
+
pooling: "mean",
|
|
1119
|
+
normalize: true
|
|
1120
|
+
});
|
|
1121
|
+
if (!output?.data) {
|
|
1122
|
+
return texts.map(() => null);
|
|
1123
|
+
}
|
|
1124
|
+
const dims = this.getDimensions();
|
|
1125
|
+
const data = output.data instanceof Float32Array ? output.data : new Float32Array(output.data);
|
|
1126
|
+
const results = [];
|
|
1127
|
+
for (let i = 0; i < texts.length; i++) {
|
|
1128
|
+
const offset = i * dims;
|
|
1129
|
+
if (offset + dims <= data.length) {
|
|
1130
|
+
results.push(data.slice(offset, offset + dims));
|
|
1131
|
+
} else {
|
|
1132
|
+
results.push(null);
|
|
1133
|
+
}
|
|
1134
|
+
}
|
|
1135
|
+
return results;
|
|
1136
|
+
}
|
|
1137
|
+
/**
|
|
1138
|
+
* Serial fallback: embed texts one at a time.
|
|
1139
|
+
* Used when native batch fails.
|
|
1140
|
+
*/
|
|
1141
|
+
async _embedBatchSerial(texts) {
|
|
1142
|
+
const results = [];
|
|
1143
|
+
for (const text of texts) {
|
|
1144
|
+
try {
|
|
1145
|
+
const embedding = await this.embed(text);
|
|
1146
|
+
results.push(embedding);
|
|
1147
|
+
} catch {
|
|
1148
|
+
results.push(null);
|
|
1149
|
+
}
|
|
1150
|
+
}
|
|
1151
|
+
return results;
|
|
1152
|
+
}
|
|
1153
|
+
// --- Single-text provider implementations ---
|
|
724
1154
|
async _embedFastembed(text) {
|
|
725
1155
|
const embeddings = this.model.embed([text], 1);
|
|
726
1156
|
for await (const batch of embeddings) {
|
|
@@ -751,17 +1181,21 @@ function getEmbeddingService() {
|
|
|
751
1181
|
}
|
|
752
1182
|
|
|
753
1183
|
// src/services/search/VectorSearch.ts
|
|
1184
|
+
var DEFAULT_MAX_CANDIDATES = 2e3;
|
|
754
1185
|
function cosineSimilarity(a, b) {
|
|
755
|
-
|
|
1186
|
+
const len = a.length;
|
|
1187
|
+
if (len !== b.length) return 0;
|
|
756
1188
|
let dotProduct = 0;
|
|
757
1189
|
let normA = 0;
|
|
758
1190
|
let normB = 0;
|
|
759
|
-
for (let i = 0; i <
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
1191
|
+
for (let i = 0; i < len; i++) {
|
|
1192
|
+
const ai = a[i];
|
|
1193
|
+
const bi = b[i];
|
|
1194
|
+
dotProduct += ai * bi;
|
|
1195
|
+
normA += ai * ai;
|
|
1196
|
+
normB += bi * bi;
|
|
763
1197
|
}
|
|
764
|
-
const denominator = Math.sqrt(normA
|
|
1198
|
+
const denominator = Math.sqrt(normA * normB);
|
|
765
1199
|
if (denominator === 0) return 0;
|
|
766
1200
|
return dotProduct / denominator;
|
|
767
1201
|
}
|
|
@@ -774,23 +1208,36 @@ function bufferToFloat32(buf) {
|
|
|
774
1208
|
}
|
|
775
1209
|
var VectorSearch = class {
|
|
776
1210
|
/**
|
|
777
|
-
*
|
|
1211
|
+
* Semantic search with SQL pre-filtering for scalability.
|
|
1212
|
+
*
|
|
1213
|
+
* 2-phase strategy:
|
|
1214
|
+
* 1. SQL pre-filters by project + sorts by recency (loads max N candidates)
|
|
1215
|
+
* 2. JS computes cosine similarity only on filtered candidates
|
|
1216
|
+
*
|
|
1217
|
+
* With 50k observations and maxCandidates=2000, loads only ~4% of data.
|
|
778
1218
|
*/
|
|
779
1219
|
async search(db, queryEmbedding, options = {}) {
|
|
780
1220
|
const limit = options.limit || 10;
|
|
781
1221
|
const threshold = options.threshold || 0.3;
|
|
1222
|
+
const maxCandidates = options.maxCandidates || DEFAULT_MAX_CANDIDATES;
|
|
782
1223
|
try {
|
|
783
|
-
|
|
1224
|
+
const conditions = [];
|
|
1225
|
+
const params = [];
|
|
1226
|
+
if (options.project) {
|
|
1227
|
+
conditions.push("o.project = ?");
|
|
1228
|
+
params.push(options.project);
|
|
1229
|
+
}
|
|
1230
|
+
const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
|
|
1231
|
+
const sql = `
|
|
784
1232
|
SELECT e.observation_id, e.embedding,
|
|
785
1233
|
o.title, o.text, o.type, o.project, o.created_at, o.created_at_epoch
|
|
786
1234
|
FROM observation_embeddings e
|
|
787
1235
|
JOIN observations o ON o.id = e.observation_id
|
|
1236
|
+
${whereClause}
|
|
1237
|
+
ORDER BY o.created_at_epoch DESC
|
|
1238
|
+
LIMIT ?
|
|
788
1239
|
`;
|
|
789
|
-
|
|
790
|
-
if (options.project) {
|
|
791
|
-
sql += " WHERE o.project = ?";
|
|
792
|
-
params.push(options.project);
|
|
793
|
-
}
|
|
1240
|
+
params.push(maxCandidates);
|
|
794
1241
|
const rows = db.query(sql).all(...params);
|
|
795
1242
|
const scored = [];
|
|
796
1243
|
for (const row of rows) {
|
|
@@ -811,14 +1258,15 @@ var VectorSearch = class {
|
|
|
811
1258
|
}
|
|
812
1259
|
}
|
|
813
1260
|
scored.sort((a, b) => b.similarity - a.similarity);
|
|
1261
|
+
logger.debug("VECTOR", `Search: ${rows.length} candidates \u2192 ${scored.length} above threshold \u2192 ${Math.min(scored.length, limit)} results`);
|
|
814
1262
|
return scored.slice(0, limit);
|
|
815
1263
|
} catch (error) {
|
|
816
|
-
logger.error("VECTOR", `
|
|
1264
|
+
logger.error("VECTOR", `Vector search error: ${error}`);
|
|
817
1265
|
return [];
|
|
818
1266
|
}
|
|
819
1267
|
}
|
|
820
1268
|
/**
|
|
821
|
-
*
|
|
1269
|
+
* Store embedding for an observation.
|
|
822
1270
|
*/
|
|
823
1271
|
async storeEmbedding(db, observationId, embedding, model) {
|
|
824
1272
|
try {
|
|
@@ -834,18 +1282,18 @@ var VectorSearch = class {
|
|
|
834
1282
|
embedding.length,
|
|
835
1283
|
(/* @__PURE__ */ new Date()).toISOString()
|
|
836
1284
|
);
|
|
837
|
-
logger.debug("VECTOR", `Embedding
|
|
1285
|
+
logger.debug("VECTOR", `Embedding saved for observation ${observationId}`);
|
|
838
1286
|
} catch (error) {
|
|
839
|
-
logger.error("VECTOR", `
|
|
1287
|
+
logger.error("VECTOR", `Error saving embedding: ${error}`);
|
|
840
1288
|
}
|
|
841
1289
|
}
|
|
842
1290
|
/**
|
|
843
|
-
*
|
|
1291
|
+
* Generate embeddings for observations that don't have them yet.
|
|
844
1292
|
*/
|
|
845
1293
|
async backfillEmbeddings(db, batchSize = 50) {
|
|
846
1294
|
const embeddingService2 = getEmbeddingService();
|
|
847
1295
|
if (!await embeddingService2.initialize()) {
|
|
848
|
-
logger.warn("VECTOR", "Embedding service
|
|
1296
|
+
logger.warn("VECTOR", "Embedding service not available, backfill skipped");
|
|
849
1297
|
return 0;
|
|
850
1298
|
}
|
|
851
1299
|
const rows = db.query(`
|
|
@@ -858,7 +1306,7 @@ var VectorSearch = class {
|
|
|
858
1306
|
`).all(batchSize);
|
|
859
1307
|
if (rows.length === 0) return 0;
|
|
860
1308
|
let count = 0;
|
|
861
|
-
const model = embeddingService2.
|
|
1309
|
+
const model = embeddingService2.getModelName();
|
|
862
1310
|
for (const row of rows) {
|
|
863
1311
|
const parts = [row.title];
|
|
864
1312
|
if (row.text) parts.push(row.text);
|
|
@@ -871,11 +1319,11 @@ var VectorSearch = class {
|
|
|
871
1319
|
count++;
|
|
872
1320
|
}
|
|
873
1321
|
}
|
|
874
|
-
logger.info("VECTOR", `Backfill
|
|
1322
|
+
logger.info("VECTOR", `Backfill completed: ${count}/${rows.length} embeddings generated`);
|
|
875
1323
|
return count;
|
|
876
1324
|
}
|
|
877
1325
|
/**
|
|
878
|
-
*
|
|
1326
|
+
* Embedding statistics.
|
|
879
1327
|
*/
|
|
880
1328
|
getStats(db) {
|
|
881
1329
|
try {
|
|
@@ -942,21 +1390,21 @@ function knowledgeTypeBoost(type) {
|
|
|
942
1390
|
var HybridSearch = class {
|
|
943
1391
|
embeddingInitialized = false;
|
|
944
1392
|
/**
|
|
945
|
-
*
|
|
1393
|
+
* Initialize the embedding service (lazy, non-blocking)
|
|
946
1394
|
*/
|
|
947
1395
|
async initialize() {
|
|
948
1396
|
try {
|
|
949
1397
|
const embeddingService2 = getEmbeddingService();
|
|
950
1398
|
await embeddingService2.initialize();
|
|
951
1399
|
this.embeddingInitialized = embeddingService2.isAvailable();
|
|
952
|
-
logger.info("SEARCH", `HybridSearch
|
|
1400
|
+
logger.info("SEARCH", `HybridSearch initialized (embedding: ${this.embeddingInitialized ? "active" : "disabled"})`);
|
|
953
1401
|
} catch (error) {
|
|
954
|
-
logger.warn("SEARCH", "
|
|
1402
|
+
logger.warn("SEARCH", "Embedding initialization failed, using only FTS5", {}, error);
|
|
955
1403
|
this.embeddingInitialized = false;
|
|
956
1404
|
}
|
|
957
1405
|
}
|
|
958
1406
|
/**
|
|
959
|
-
*
|
|
1407
|
+
* Hybrid search with 4-signal scoring
|
|
960
1408
|
*/
|
|
961
1409
|
async search(db, query, options = {}) {
|
|
962
1410
|
const limit = options.limit || 10;
|
|
@@ -972,7 +1420,7 @@ var HybridSearch = class {
|
|
|
972
1420
|
const vectorResults = await vectorSearch2.search(db, queryEmbedding, {
|
|
973
1421
|
project: options.project,
|
|
974
1422
|
limit: limit * 2,
|
|
975
|
-
//
|
|
1423
|
+
// Fetch more results for ranking
|
|
976
1424
|
threshold: 0.3
|
|
977
1425
|
});
|
|
978
1426
|
for (const hit of vectorResults) {
|
|
@@ -989,10 +1437,10 @@ var HybridSearch = class {
|
|
|
989
1437
|
source: "vector"
|
|
990
1438
|
});
|
|
991
1439
|
}
|
|
992
|
-
logger.debug("SEARCH", `Vector search: ${vectorResults.length}
|
|
1440
|
+
logger.debug("SEARCH", `Vector search: ${vectorResults.length} results`);
|
|
993
1441
|
}
|
|
994
1442
|
} catch (error) {
|
|
995
|
-
logger.warn("SEARCH", "
|
|
1443
|
+
logger.warn("SEARCH", "Vector search failed, using only keyword", {}, error);
|
|
996
1444
|
}
|
|
997
1445
|
}
|
|
998
1446
|
try {
|
|
@@ -1022,9 +1470,9 @@ var HybridSearch = class {
|
|
|
1022
1470
|
});
|
|
1023
1471
|
}
|
|
1024
1472
|
}
|
|
1025
|
-
logger.debug("SEARCH", `Keyword search: ${keywordResults.length}
|
|
1473
|
+
logger.debug("SEARCH", `Keyword search: ${keywordResults.length} results`);
|
|
1026
1474
|
} catch (error) {
|
|
1027
|
-
logger.error("SEARCH", "
|
|
1475
|
+
logger.error("SEARCH", "Keyword search failed", {}, error);
|
|
1028
1476
|
}
|
|
1029
1477
|
if (rawItems.size === 0) return [];
|
|
1030
1478
|
const allFTS5Ranks = Array.from(rawItems.values()).filter((item) => item.fts5Rank !== null).map((item) => item.fts5Rank);
|