kiro-memory 1.6.0 → 1.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,15 +9,260 @@ var __export = (target, all) => {
9
9
  __defProp(target, name, { get: all[name], enumerable: true });
10
10
  };
11
11
 
12
+ // src/services/sqlite/Search.ts
13
+ var Search_exports = {};
14
+ __export(Search_exports, {
15
+ getObservationsByIds: () => getObservationsByIds,
16
+ getProjectStats: () => getProjectStats,
17
+ getStaleObservations: () => getStaleObservations,
18
+ getTimeline: () => getTimeline,
19
+ markObservationsStale: () => markObservationsStale,
20
+ searchObservationsFTS: () => searchObservationsFTS,
21
+ searchObservationsFTSWithRank: () => searchObservationsFTSWithRank,
22
+ searchObservationsLIKE: () => searchObservationsLIKE,
23
+ searchSummariesFiltered: () => searchSummariesFiltered
24
+ });
25
+ import { existsSync as existsSync2, statSync } from "fs";
26
+ function escapeLikePattern(input) {
27
+ return input.replace(/[%_\\]/g, "\\$&");
28
+ }
29
+ function sanitizeFTS5Query(query) {
30
+ const trimmed = query.length > 1e4 ? query.substring(0, 1e4) : query;
31
+ const terms = trimmed.replace(/[""]/g, "").split(/\s+/).filter((t) => t.length > 0).slice(0, 100).map((t) => `"${t}"`);
32
+ return terms.join(" ");
33
+ }
34
+ function searchObservationsFTS(db, query, filters = {}) {
35
+ const limit = filters.limit || 50;
36
+ try {
37
+ const safeQuery = sanitizeFTS5Query(query);
38
+ if (!safeQuery) return searchObservationsLIKE(db, query, filters);
39
+ let sql = `
40
+ SELECT o.* FROM observations o
41
+ JOIN observations_fts fts ON o.id = fts.rowid
42
+ WHERE observations_fts MATCH ?
43
+ `;
44
+ const params = [safeQuery];
45
+ if (filters.project) {
46
+ sql += " AND o.project = ?";
47
+ params.push(filters.project);
48
+ }
49
+ if (filters.type) {
50
+ sql += " AND o.type = ?";
51
+ params.push(filters.type);
52
+ }
53
+ if (filters.dateStart) {
54
+ sql += " AND o.created_at_epoch >= ?";
55
+ params.push(filters.dateStart);
56
+ }
57
+ if (filters.dateEnd) {
58
+ sql += " AND o.created_at_epoch <= ?";
59
+ params.push(filters.dateEnd);
60
+ }
61
+ sql += " ORDER BY rank LIMIT ?";
62
+ params.push(limit);
63
+ const stmt = db.query(sql);
64
+ return stmt.all(...params);
65
+ } catch {
66
+ return searchObservationsLIKE(db, query, filters);
67
+ }
68
+ }
69
+ function searchObservationsFTSWithRank(db, query, filters = {}) {
70
+ const limit = filters.limit || 50;
71
+ try {
72
+ const safeQuery = sanitizeFTS5Query(query);
73
+ if (!safeQuery) return [];
74
+ let sql = `
75
+ SELECT o.*, rank as fts5_rank FROM observations o
76
+ JOIN observations_fts fts ON o.id = fts.rowid
77
+ WHERE observations_fts MATCH ?
78
+ `;
79
+ const params = [safeQuery];
80
+ if (filters.project) {
81
+ sql += " AND o.project = ?";
82
+ params.push(filters.project);
83
+ }
84
+ if (filters.type) {
85
+ sql += " AND o.type = ?";
86
+ params.push(filters.type);
87
+ }
88
+ if (filters.dateStart) {
89
+ sql += " AND o.created_at_epoch >= ?";
90
+ params.push(filters.dateStart);
91
+ }
92
+ if (filters.dateEnd) {
93
+ sql += " AND o.created_at_epoch <= ?";
94
+ params.push(filters.dateEnd);
95
+ }
96
+ sql += " ORDER BY rank LIMIT ?";
97
+ params.push(limit);
98
+ const stmt = db.query(sql);
99
+ return stmt.all(...params);
100
+ } catch {
101
+ return [];
102
+ }
103
+ }
104
+ function searchObservationsLIKE(db, query, filters = {}) {
105
+ const limit = filters.limit || 50;
106
+ const pattern = `%${escapeLikePattern(query)}%`;
107
+ let sql = `
108
+ SELECT * FROM observations
109
+ WHERE (title LIKE ? ESCAPE '\\' OR text LIKE ? ESCAPE '\\' OR narrative LIKE ? ESCAPE '\\' OR concepts LIKE ? ESCAPE '\\')
110
+ `;
111
+ const params = [pattern, pattern, pattern, pattern];
112
+ if (filters.project) {
113
+ sql += " AND project = ?";
114
+ params.push(filters.project);
115
+ }
116
+ if (filters.type) {
117
+ sql += " AND type = ?";
118
+ params.push(filters.type);
119
+ }
120
+ if (filters.dateStart) {
121
+ sql += " AND created_at_epoch >= ?";
122
+ params.push(filters.dateStart);
123
+ }
124
+ if (filters.dateEnd) {
125
+ sql += " AND created_at_epoch <= ?";
126
+ params.push(filters.dateEnd);
127
+ }
128
+ sql += " ORDER BY created_at_epoch DESC LIMIT ?";
129
+ params.push(limit);
130
+ const stmt = db.query(sql);
131
+ return stmt.all(...params);
132
+ }
133
+ function searchSummariesFiltered(db, query, filters = {}) {
134
+ const limit = filters.limit || 20;
135
+ const pattern = `%${escapeLikePattern(query)}%`;
136
+ let sql = `
137
+ SELECT * FROM summaries
138
+ WHERE (request LIKE ? ESCAPE '\\' OR learned LIKE ? ESCAPE '\\' OR completed LIKE ? ESCAPE '\\' OR notes LIKE ? ESCAPE '\\' OR next_steps LIKE ? ESCAPE '\\')
139
+ `;
140
+ const params = [pattern, pattern, pattern, pattern, pattern];
141
+ if (filters.project) {
142
+ sql += " AND project = ?";
143
+ params.push(filters.project);
144
+ }
145
+ if (filters.dateStart) {
146
+ sql += " AND created_at_epoch >= ?";
147
+ params.push(filters.dateStart);
148
+ }
149
+ if (filters.dateEnd) {
150
+ sql += " AND created_at_epoch <= ?";
151
+ params.push(filters.dateEnd);
152
+ }
153
+ sql += " ORDER BY created_at_epoch DESC LIMIT ?";
154
+ params.push(limit);
155
+ const stmt = db.query(sql);
156
+ return stmt.all(...params);
157
+ }
158
+ function getObservationsByIds(db, ids) {
159
+ if (!Array.isArray(ids) || ids.length === 0) return [];
160
+ const validIds = ids.filter((id) => typeof id === "number" && Number.isInteger(id) && id > 0).slice(0, 500);
161
+ if (validIds.length === 0) return [];
162
+ const placeholders = validIds.map(() => "?").join(",");
163
+ const sql = `SELECT * FROM observations WHERE id IN (${placeholders}) ORDER BY created_at_epoch DESC`;
164
+ const stmt = db.query(sql);
165
+ return stmt.all(...validIds);
166
+ }
167
+ function getTimeline(db, anchorId, depthBefore = 5, depthAfter = 5) {
168
+ const anchorStmt = db.query("SELECT created_at_epoch FROM observations WHERE id = ?");
169
+ const anchor = anchorStmt.get(anchorId);
170
+ if (!anchor) return [];
171
+ const anchorEpoch = anchor.created_at_epoch;
172
+ const beforeStmt = db.query(`
173
+ SELECT id, 'observation' as type, title, text as content, project, created_at, created_at_epoch
174
+ FROM observations
175
+ WHERE created_at_epoch < ?
176
+ ORDER BY created_at_epoch DESC
177
+ LIMIT ?
178
+ `);
179
+ const before = beforeStmt.all(anchorEpoch, depthBefore).reverse();
180
+ const selfStmt = db.query(`
181
+ SELECT id, 'observation' as type, title, text as content, project, created_at, created_at_epoch
182
+ FROM observations WHERE id = ?
183
+ `);
184
+ const self = selfStmt.all(anchorId);
185
+ const afterStmt = db.query(`
186
+ SELECT id, 'observation' as type, title, text as content, project, created_at, created_at_epoch
187
+ FROM observations
188
+ WHERE created_at_epoch > ?
189
+ ORDER BY created_at_epoch ASC
190
+ LIMIT ?
191
+ `);
192
+ const after = afterStmt.all(anchorEpoch, depthAfter);
193
+ return [...before, ...self, ...after];
194
+ }
195
+ function getProjectStats(db, project) {
196
+ const obsStmt = db.query("SELECT COUNT(*) as count FROM observations WHERE project = ?");
197
+ const sumStmt = db.query("SELECT COUNT(*) as count FROM summaries WHERE project = ?");
198
+ const sesStmt = db.query("SELECT COUNT(*) as count FROM sessions WHERE project = ?");
199
+ const prmStmt = db.query("SELECT COUNT(*) as count FROM prompts WHERE project = ?");
200
+ return {
201
+ observations: obsStmt.get(project)?.count || 0,
202
+ summaries: sumStmt.get(project)?.count || 0,
203
+ sessions: sesStmt.get(project)?.count || 0,
204
+ prompts: prmStmt.get(project)?.count || 0
205
+ };
206
+ }
207
+ function getStaleObservations(db, project) {
208
+ const rows = db.query(`
209
+ SELECT * FROM observations
210
+ WHERE project = ? AND files_modified IS NOT NULL AND files_modified != ''
211
+ ORDER BY created_at_epoch DESC
212
+ LIMIT 500
213
+ `).all(project);
214
+ const staleObs = [];
215
+ for (const obs of rows) {
216
+ if (!obs.files_modified) continue;
217
+ const files = obs.files_modified.split(",").map((f) => f.trim()).filter(Boolean);
218
+ let isStale = false;
219
+ for (const filepath of files) {
220
+ try {
221
+ if (!existsSync2(filepath)) continue;
222
+ const stat = statSync(filepath);
223
+ if (stat.mtimeMs > obs.created_at_epoch) {
224
+ isStale = true;
225
+ break;
226
+ }
227
+ } catch {
228
+ }
229
+ }
230
+ if (isStale) {
231
+ staleObs.push(obs);
232
+ }
233
+ }
234
+ return staleObs;
235
+ }
236
+ function markObservationsStale(db, ids, stale) {
237
+ if (!Array.isArray(ids) || ids.length === 0) return;
238
+ const validIds = ids.filter((id) => typeof id === "number" && Number.isInteger(id) && id > 0).slice(0, 500);
239
+ if (validIds.length === 0) return;
240
+ const placeholders = validIds.map(() => "?").join(",");
241
+ db.run(
242
+ `UPDATE observations SET is_stale = ? WHERE id IN (${placeholders})`,
243
+ [stale ? 1 : 0, ...validIds]
244
+ );
245
+ }
246
+ var init_Search = __esm({
247
+ "src/services/sqlite/Search.ts"() {
248
+ "use strict";
249
+ }
250
+ });
251
+
12
252
  // src/services/sqlite/Observations.ts
13
253
  var Observations_exports = {};
14
254
  __export(Observations_exports, {
255
+ consolidateObservations: () => consolidateObservations,
15
256
  createObservation: () => createObservation,
16
257
  deleteObservation: () => deleteObservation,
17
258
  getObservationsByProject: () => getObservationsByProject,
18
259
  getObservationsBySession: () => getObservationsBySession,
19
- searchObservations: () => searchObservations
260
+ searchObservations: () => searchObservations,
261
+ updateLastAccessed: () => updateLastAccessed
20
262
  });
263
+ function escapeLikePattern2(input) {
264
+ return input.replace(/[%_\\]/g, "\\$&");
265
+ }
21
266
  function createObservation(db, memorySessionId, project, type, title, subtitle, text, narrative, facts, concepts, filesRead, filesModified, promptNumber) {
22
267
  const now = /* @__PURE__ */ new Date();
23
268
  const result = db.run(
@@ -41,12 +286,12 @@ function getObservationsByProject(db, project, limit = 100) {
41
286
  return query.all(project, limit);
42
287
  }
43
288
  function searchObservations(db, searchTerm, project) {
44
- const sql = project ? `SELECT * FROM observations
45
- WHERE project = ? AND (title LIKE ? OR text LIKE ? OR narrative LIKE ?)
46
- ORDER BY created_at_epoch DESC` : `SELECT * FROM observations
47
- WHERE title LIKE ? OR text LIKE ? OR narrative LIKE ?
289
+ const sql = project ? `SELECT * FROM observations
290
+ WHERE project = ? AND (title LIKE ? ESCAPE '\\' OR text LIKE ? ESCAPE '\\' OR narrative LIKE ? ESCAPE '\\')
291
+ ORDER BY created_at_epoch DESC` : `SELECT * FROM observations
292
+ WHERE title LIKE ? ESCAPE '\\' OR text LIKE ? ESCAPE '\\' OR narrative LIKE ? ESCAPE '\\'
48
293
  ORDER BY created_at_epoch DESC`;
49
- const pattern = `%${searchTerm}%`;
294
+ const pattern = `%${escapeLikePattern2(searchTerm)}%`;
50
295
  const query = db.query(sql);
51
296
  if (project) {
52
297
  return query.all(project, pattern, pattern, pattern);
@@ -56,18 +301,71 @@ function searchObservations(db, searchTerm, project) {
56
301
  function deleteObservation(db, id) {
57
302
  db.run("DELETE FROM observations WHERE id = ?", [id]);
58
303
  }
304
+ function updateLastAccessed(db, ids) {
305
+ if (!Array.isArray(ids) || ids.length === 0) return;
306
+ const validIds = ids.filter((id) => typeof id === "number" && Number.isInteger(id) && id > 0).slice(0, 500);
307
+ if (validIds.length === 0) return;
308
+ const now = Date.now();
309
+ const placeholders = validIds.map(() => "?").join(",");
310
+ db.run(
311
+ `UPDATE observations SET last_accessed_epoch = ? WHERE id IN (${placeholders})`,
312
+ [now, ...validIds]
313
+ );
314
+ }
315
+ function consolidateObservations(db, project, options = {}) {
316
+ const minGroupSize = options.minGroupSize || 3;
317
+ const groups = db.query(`
318
+ SELECT type, files_modified, COUNT(*) as cnt, GROUP_CONCAT(id) as ids
319
+ FROM observations
320
+ WHERE project = ? AND files_modified IS NOT NULL AND files_modified != ''
321
+ GROUP BY type, files_modified
322
+ HAVING cnt >= ?
323
+ ORDER BY cnt DESC
324
+ `).all(project, minGroupSize);
325
+ if (groups.length === 0) return { merged: 0, removed: 0 };
326
+ let totalMerged = 0;
327
+ let totalRemoved = 0;
328
+ for (const group of groups) {
329
+ const obsIds = group.ids.split(",").map(Number);
330
+ const placeholders = obsIds.map(() => "?").join(",");
331
+ const observations = db.query(
332
+ `SELECT * FROM observations WHERE id IN (${placeholders}) ORDER BY created_at_epoch DESC`
333
+ ).all(...obsIds);
334
+ if (observations.length < minGroupSize) continue;
335
+ if (options.dryRun) {
336
+ totalMerged += 1;
337
+ totalRemoved += observations.length - 1;
338
+ continue;
339
+ }
340
+ const keeper = observations[0];
341
+ const others = observations.slice(1);
342
+ const uniqueTexts = /* @__PURE__ */ new Set();
343
+ if (keeper.text) uniqueTexts.add(keeper.text);
344
+ for (const obs of others) {
345
+ if (obs.text && !uniqueTexts.has(obs.text)) {
346
+ uniqueTexts.add(obs.text);
347
+ }
348
+ }
349
+ const consolidatedText = Array.from(uniqueTexts).join("\n---\n").substring(0, 1e5);
350
+ db.run(
351
+ "UPDATE observations SET text = ?, title = ? WHERE id = ?",
352
+ [consolidatedText, `[consolidato x${observations.length}] ${keeper.title}`, keeper.id]
353
+ );
354
+ const removeIds = others.map((o) => o.id);
355
+ const removePlaceholders = removeIds.map(() => "?").join(",");
356
+ db.run(`DELETE FROM observations WHERE id IN (${removePlaceholders})`, removeIds);
357
+ db.run(`DELETE FROM observation_embeddings WHERE observation_id IN (${removePlaceholders})`, removeIds);
358
+ totalMerged += 1;
359
+ totalRemoved += removeIds.length;
360
+ }
361
+ return { merged: totalMerged, removed: totalRemoved };
362
+ }
59
363
  var init_Observations = __esm({
60
364
  "src/services/sqlite/Observations.ts"() {
61
365
  "use strict";
62
366
  }
63
367
  });
64
368
 
65
- // src/services/search/ChromaManager.ts
66
- import { ChromaClient } from "chromadb";
67
- import { join as join2 } from "path";
68
- import { homedir as homedir2 } from "os";
69
- import { existsSync as existsSync2, mkdirSync as mkdirSync2 } from "fs";
70
-
71
369
  // src/utils/logger.ts
72
370
  import { appendFileSync, existsSync, mkdirSync, readFileSync } from "fs";
73
371
  import { join } from "path";
@@ -287,207 +585,461 @@ ${data.stack}` : ` ${data.message}`;
287
585
  };
288
586
  var logger = new Logger();
289
587
 
290
- // src/services/search/ChromaManager.ts
291
- var VECTOR_DB_DIR = join2(homedir2(), ".contextkit", "vector-db");
292
- var ChromaManager = class {
293
- client;
294
- collection = null;
295
- isAvailable = false;
296
- constructor() {
297
- if (!existsSync2(VECTOR_DB_DIR)) {
298
- mkdirSync2(VECTOR_DB_DIR, { recursive: true });
299
- }
300
- this.client = new ChromaClient({
301
- path: process.env.CHROMADB_URL || "http://localhost:8000"
302
- });
303
- }
588
+ // src/services/search/EmbeddingService.ts
589
+ var EmbeddingService = class {
590
+ provider = null;
591
+ model = null;
592
+ initialized = false;
593
+ initializing = null;
304
594
  /**
305
- * Initialize ChromaDB connection and collection
595
+ * Inizializza il servizio di embedding.
596
+ * Tenta fastembed, poi @huggingface/transformers, poi fallback a null.
306
597
  */
307
598
  async initialize() {
599
+ if (this.initialized) return this.provider !== null;
600
+ if (this.initializing) return this.initializing;
601
+ this.initializing = this._doInitialize();
602
+ const result = await this.initializing;
603
+ this.initializing = null;
604
+ return result;
605
+ }
606
+ async _doInitialize() {
308
607
  try {
309
- await this.client.heartbeat();
310
- this.collection = await this.client.getOrCreateCollection({
311
- name: "kiro-memory-observations",
312
- metadata: { description: "Kiro Memory observation embeddings" }
313
- });
314
- this.isAvailable = true;
315
- logger.info("CHROMA", "ChromaDB initialized successfully");
316
- return true;
608
+ const fastembed = await import("fastembed");
609
+ const EmbeddingModel = fastembed.EmbeddingModel || fastembed.default?.EmbeddingModel;
610
+ const FlagEmbedding = fastembed.FlagEmbedding || fastembed.default?.FlagEmbedding;
611
+ if (FlagEmbedding && EmbeddingModel) {
612
+ this.model = await FlagEmbedding.init({
613
+ model: EmbeddingModel.BGESmallENV15
614
+ });
615
+ this.provider = "fastembed";
616
+ this.initialized = true;
617
+ logger.info("EMBEDDING", "Inizializzato con fastembed (BGE-small-en-v1.5)");
618
+ return true;
619
+ }
620
+ } catch (error) {
621
+ logger.debug("EMBEDDING", `fastembed non disponibile: ${error}`);
622
+ }
623
+ try {
624
+ const transformers = await import("@huggingface/transformers");
625
+ const pipeline = transformers.pipeline || transformers.default?.pipeline;
626
+ if (pipeline) {
627
+ this.model = await pipeline("feature-extraction", "Xenova/all-MiniLM-L6-v2", {
628
+ quantized: true
629
+ });
630
+ this.provider = "transformers";
631
+ this.initialized = true;
632
+ logger.info("EMBEDDING", "Inizializzato con @huggingface/transformers (all-MiniLM-L6-v2)");
633
+ return true;
634
+ }
317
635
  } catch (error) {
318
- logger.warn("CHROMA", "ChromaDB not available, falling back to SQLite search", {}, error);
319
- this.isAvailable = false;
320
- return false;
636
+ logger.debug("EMBEDDING", `@huggingface/transformers non disponibile: ${error}`);
321
637
  }
638
+ this.provider = null;
639
+ this.initialized = true;
640
+ logger.warn("EMBEDDING", "Nessun provider embedding disponibile, ricerca semantica disabilitata");
641
+ return false;
322
642
  }
323
643
  /**
324
- * Add observation embedding to ChromaDB
644
+ * Genera embedding per un singolo testo.
645
+ * Ritorna Float32Array con 384 dimensioni, o null se non disponibile.
325
646
  */
326
- async addObservation(id, content, metadata) {
327
- if (!this.isAvailable || !this.collection) {
328
- logger.debug("CHROMA", "ChromaDB not available, skipping embedding");
329
- return;
330
- }
647
+ async embed(text) {
648
+ if (!this.initialized) await this.initialize();
649
+ if (!this.provider || !this.model) return null;
331
650
  try {
332
- await this.collection.add({
333
- ids: [id],
334
- documents: [content],
335
- metadatas: [metadata]
336
- });
337
- logger.debug("CHROMA", `Added observation ${id} to vector DB`);
651
+ const truncated = text.substring(0, 2e3);
652
+ if (this.provider === "fastembed") {
653
+ return await this._embedFastembed(truncated);
654
+ } else if (this.provider === "transformers") {
655
+ return await this._embedTransformers(truncated);
656
+ }
338
657
  } catch (error) {
339
- logger.error("CHROMA", `Failed to add observation ${id}`, {}, error);
658
+ logger.error("EMBEDDING", `Errore generazione embedding: ${error}`);
340
659
  }
660
+ return null;
341
661
  }
342
662
  /**
343
- * Search observations by semantic similarity
663
+ * Genera embeddings in batch.
344
664
  */
345
- async search(query, options = {}) {
346
- if (!this.isAvailable || !this.collection) {
347
- logger.debug("CHROMA", "ChromaDB not available, returning empty results");
348
- return [];
665
+ async embedBatch(texts) {
666
+ if (!this.initialized) await this.initialize();
667
+ if (!this.provider || !this.model) return texts.map(() => null);
668
+ const results = [];
669
+ for (const text of texts) {
670
+ try {
671
+ const embedding = await this.embed(text);
672
+ results.push(embedding);
673
+ } catch {
674
+ results.push(null);
675
+ }
676
+ }
677
+ return results;
678
+ }
679
+ /**
680
+ * Verifica se il servizio è disponibile.
681
+ */
682
+ isAvailable() {
683
+ return this.initialized && this.provider !== null;
684
+ }
685
+ /**
686
+ * Nome del provider attivo.
687
+ */
688
+ getProvider() {
689
+ return this.provider;
690
+ }
691
+ /**
692
+ * Dimensioni del vettore embedding.
693
+ */
694
+ getDimensions() {
695
+ return 384;
696
+ }
697
+ // --- Provider specifici ---
698
+ async _embedFastembed(text) {
699
+ const embeddings = this.model.embed([text], 1);
700
+ for await (const batch of embeddings) {
701
+ if (batch && batch.length > 0) {
702
+ const vec = batch[0];
703
+ return vec instanceof Float32Array ? vec : new Float32Array(vec);
704
+ }
705
+ }
706
+ return null;
707
+ }
708
+ async _embedTransformers(text) {
709
+ const output = await this.model(text, {
710
+ pooling: "mean",
711
+ normalize: true
712
+ });
713
+ if (output?.data) {
714
+ return output.data instanceof Float32Array ? output.data : new Float32Array(output.data);
349
715
  }
716
+ return null;
717
+ }
718
+ };
719
+ var embeddingService = null;
720
+ function getEmbeddingService() {
721
+ if (!embeddingService) {
722
+ embeddingService = new EmbeddingService();
723
+ }
724
+ return embeddingService;
725
+ }
726
+
727
+ // src/services/search/VectorSearch.ts
728
+ function cosineSimilarity(a, b) {
729
+ if (a.length !== b.length) return 0;
730
+ let dotProduct = 0;
731
+ let normA = 0;
732
+ let normB = 0;
733
+ for (let i = 0; i < a.length; i++) {
734
+ dotProduct += a[i] * b[i];
735
+ normA += a[i] * a[i];
736
+ normB += b[i] * b[i];
737
+ }
738
+ const denominator = Math.sqrt(normA) * Math.sqrt(normB);
739
+ if (denominator === 0) return 0;
740
+ return dotProduct / denominator;
741
+ }
742
+ function float32ToBuffer(arr) {
743
+ return Buffer.from(arr.buffer, arr.byteOffset, arr.byteLength);
744
+ }
745
+ function bufferToFloat32(buf) {
746
+ const arrayBuffer = buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
747
+ return new Float32Array(arrayBuffer);
748
+ }
749
+ var VectorSearch = class {
750
+ /**
751
+ * Ricerca semantica: calcola cosine similarity tra query e tutti gli embeddings.
752
+ */
753
+ async search(db, queryEmbedding, options = {}) {
754
+ const limit = options.limit || 10;
755
+ const threshold = options.threshold || 0.3;
350
756
  try {
351
- const where = options.project ? { project: options.project } : void 0;
352
- const results = await this.collection.query({
353
- queryTexts: [query],
354
- nResults: options.limit || 10,
355
- where
356
- });
357
- const hits = [];
358
- if (results.ids && results.ids[0]) {
359
- for (let i = 0; i < results.ids[0].length; i++) {
360
- hits.push({
361
- id: results.ids[0][i],
362
- content: results.documents?.[0]?.[i] || "",
363
- metadata: results.metadatas?.[0]?.[i] || {},
364
- distance: results.distances?.[0]?.[i] || 0
757
+ let sql = `
758
+ SELECT e.observation_id, e.embedding,
759
+ o.title, o.text, o.type, o.project, o.created_at, o.created_at_epoch
760
+ FROM observation_embeddings e
761
+ JOIN observations o ON o.id = e.observation_id
762
+ `;
763
+ const params = [];
764
+ if (options.project) {
765
+ sql += " WHERE o.project = ?";
766
+ params.push(options.project);
767
+ }
768
+ const rows = db.query(sql).all(...params);
769
+ const scored = [];
770
+ for (const row of rows) {
771
+ const embedding = bufferToFloat32(row.embedding);
772
+ const similarity = cosineSimilarity(queryEmbedding, embedding);
773
+ if (similarity >= threshold) {
774
+ scored.push({
775
+ id: row.observation_id,
776
+ observationId: row.observation_id,
777
+ similarity,
778
+ title: row.title,
779
+ text: row.text,
780
+ type: row.type,
781
+ project: row.project,
782
+ created_at: row.created_at,
783
+ created_at_epoch: row.created_at_epoch
365
784
  });
366
785
  }
367
786
  }
368
- logger.debug("CHROMA", `Search returned ${hits.length} results`);
369
- return hits;
787
+ scored.sort((a, b) => b.similarity - a.similarity);
788
+ return scored.slice(0, limit);
370
789
  } catch (error) {
371
- logger.error("CHROMA", "Search failed", {}, error);
790
+ logger.error("VECTOR", `Errore ricerca vettoriale: ${error}`);
372
791
  return [];
373
792
  }
374
793
  }
375
794
  /**
376
- * Delete observation from ChromaDB
795
+ * Salva embedding per un'osservazione.
377
796
  */
378
- async deleteObservation(id) {
379
- if (!this.isAvailable || !this.collection) {
380
- return;
381
- }
797
+ async storeEmbedding(db, observationId, embedding, model) {
382
798
  try {
383
- await this.collection.delete({ ids: [id] });
384
- logger.debug("CHROMA", `Deleted observation ${id}`);
799
+ const blob = float32ToBuffer(embedding);
800
+ db.query(`
801
+ INSERT OR REPLACE INTO observation_embeddings
802
+ (observation_id, embedding, model, dimensions, created_at)
803
+ VALUES (?, ?, ?, ?, ?)
804
+ `).run(
805
+ observationId,
806
+ blob,
807
+ model,
808
+ embedding.length,
809
+ (/* @__PURE__ */ new Date()).toISOString()
810
+ );
811
+ logger.debug("VECTOR", `Embedding salvato per osservazione ${observationId}`);
385
812
  } catch (error) {
386
- logger.error("CHROMA", `Failed to delete observation ${id}`, {}, error);
813
+ logger.error("VECTOR", `Errore salvataggio embedding: ${error}`);
387
814
  }
388
815
  }
389
816
  /**
390
- * Check if ChromaDB is available
817
+ * Genera embeddings per osservazioni che non li hanno ancora.
391
818
  */
392
- isChromaAvailable() {
393
- return this.isAvailable;
819
+ async backfillEmbeddings(db, batchSize = 50) {
820
+ const embeddingService2 = getEmbeddingService();
821
+ if (!await embeddingService2.initialize()) {
822
+ logger.warn("VECTOR", "Embedding service non disponibile, backfill saltato");
823
+ return 0;
824
+ }
825
+ const rows = db.query(`
826
+ SELECT o.id, o.title, o.text, o.narrative, o.concepts
827
+ FROM observations o
828
+ LEFT JOIN observation_embeddings e ON e.observation_id = o.id
829
+ WHERE e.observation_id IS NULL
830
+ ORDER BY o.created_at_epoch DESC
831
+ LIMIT ?
832
+ `).all(batchSize);
833
+ if (rows.length === 0) return 0;
834
+ let count = 0;
835
+ const model = embeddingService2.getProvider() || "unknown";
836
+ for (const row of rows) {
837
+ const parts = [row.title];
838
+ if (row.text) parts.push(row.text);
839
+ if (row.narrative) parts.push(row.narrative);
840
+ if (row.concepts) parts.push(row.concepts);
841
+ const fullText = parts.join(" ").substring(0, 2e3);
842
+ const embedding = await embeddingService2.embed(fullText);
843
+ if (embedding) {
844
+ await this.storeEmbedding(db, row.id, embedding, model);
845
+ count++;
846
+ }
847
+ }
848
+ logger.info("VECTOR", `Backfill completato: ${count}/${rows.length} embeddings generati`);
849
+ return count;
394
850
  }
395
851
  /**
396
- * Get collection stats
852
+ * Statistiche sugli embeddings.
397
853
  */
398
- async getStats() {
399
- if (!this.isAvailable || !this.collection) {
400
- return { count: 0 };
401
- }
854
+ getStats(db) {
402
855
  try {
403
- const count = await this.collection.count();
404
- return { count };
405
- } catch (error) {
406
- logger.error("CHROMA", "Failed to get stats", {}, error);
407
- return { count: 0 };
856
+ const totalRow = db.query("SELECT COUNT(*) as count FROM observations").get();
857
+ const embeddedRow = db.query("SELECT COUNT(*) as count FROM observation_embeddings").get();
858
+ const total = totalRow?.count || 0;
859
+ const embedded = embeddedRow?.count || 0;
860
+ const percentage = total > 0 ? Math.round(embedded / total * 100) : 0;
861
+ return { total, embedded, percentage };
862
+ } catch {
863
+ return { total: 0, embedded: 0, percentage: 0 };
408
864
  }
409
865
  }
410
866
  };
867
+ var vectorSearch = null;
868
+ function getVectorSearch() {
869
+ if (!vectorSearch) {
870
+ vectorSearch = new VectorSearch();
871
+ }
872
+ return vectorSearch;
873
+ }
874
+
875
+ // src/services/search/ScoringEngine.ts
876
+ var SEARCH_WEIGHTS = {
877
+ semantic: 0.4,
878
+ fts5: 0.3,
879
+ recency: 0.2,
880
+ projectMatch: 0.1
881
+ };
882
+ function recencyScore(createdAtEpoch, halfLifeHours = 168) {
883
+ if (!createdAtEpoch || createdAtEpoch <= 0) return 0;
884
+ const nowMs = Date.now();
885
+ const ageMs = nowMs - createdAtEpoch;
886
+ if (ageMs <= 0) return 1;
887
+ const ageHours = ageMs / (1e3 * 60 * 60);
888
+ return Math.exp(-ageHours * Math.LN2 / halfLifeHours);
889
+ }
890
+ function normalizeFTS5Rank(rank, allRanks) {
891
+ if (allRanks.length === 0) return 0;
892
+ if (allRanks.length === 1) return 1;
893
+ const minRank = Math.min(...allRanks);
894
+ const maxRank = Math.max(...allRanks);
895
+ if (minRank === maxRank) return 1;
896
+ return (maxRank - rank) / (maxRank - minRank);
897
+ }
898
+ function projectMatchScore(itemProject, targetProject) {
899
+ if (!itemProject || !targetProject) return 0;
900
+ return itemProject.toLowerCase() === targetProject.toLowerCase() ? 1 : 0;
901
+ }
902
+ function computeCompositeScore(signals, weights) {
903
+ return signals.semantic * weights.semantic + signals.fts5 * weights.fts5 + signals.recency * weights.recency + signals.projectMatch * weights.projectMatch;
904
+ }
905
+ var KNOWLEDGE_TYPE_BOOST = {
906
+ constraint: 1.3,
907
+ decision: 1.25,
908
+ heuristic: 1.15,
909
+ rejected: 1.1
910
+ };
911
+ function knowledgeTypeBoost(type) {
912
+ return KNOWLEDGE_TYPE_BOOST[type] ?? 1;
913
+ }
411
914
 
412
915
  // src/services/search/HybridSearch.ts
413
916
  var HybridSearch = class {
414
- chromaManager;
415
- constructor() {
416
- this.chromaManager = new ChromaManager();
417
- }
917
+ embeddingInitialized = false;
418
918
  /**
419
- * Initialize search (connects to ChromaDB if available)
919
+ * Inizializza il servizio di embedding (lazy, non bloccante)
420
920
  */
421
921
  async initialize() {
422
- await this.chromaManager.initialize();
922
+ try {
923
+ const embeddingService2 = getEmbeddingService();
924
+ await embeddingService2.initialize();
925
+ this.embeddingInitialized = embeddingService2.isAvailable();
926
+ logger.info("SEARCH", `HybridSearch inizializzato (embedding: ${this.embeddingInitialized ? "attivo" : "disattivato"})`);
927
+ } catch (error) {
928
+ logger.warn("SEARCH", "Inizializzazione embedding fallita, uso solo FTS5", {}, error);
929
+ this.embeddingInitialized = false;
930
+ }
423
931
  }
424
932
  /**
425
- * Perform hybrid search combining vector and keyword results
933
+ * Ricerca ibrida con scoring a 4 segnali
426
934
  */
427
935
  async search(db, query, options = {}) {
428
936
  const limit = options.limit || 10;
429
- const results = [];
430
- if (this.chromaManager.isChromaAvailable()) {
937
+ const weights = options.weights || SEARCH_WEIGHTS;
938
+ const targetProject = options.project || "";
939
+ const rawItems = /* @__PURE__ */ new Map();
940
+ if (this.embeddingInitialized) {
431
941
  try {
432
- const vectorResults = await this.chromaManager.search(query, {
433
- project: options.project,
434
- limit: Math.ceil(limit / 2)
435
- });
436
- for (const hit of vectorResults) {
437
- results.push({
438
- id: hit.id,
439
- title: hit.metadata.title || "Untitled",
440
- content: hit.content,
441
- type: hit.metadata.type || "unknown",
442
- project: hit.metadata.project || "unknown",
443
- created_at: hit.metadata.created_at || (/* @__PURE__ */ new Date()).toISOString(),
444
- score: 1 - hit.distance,
445
- // Convert distance to similarity score
446
- source: "vector"
942
+ const embeddingService2 = getEmbeddingService();
943
+ const queryEmbedding = await embeddingService2.embed(query);
944
+ if (queryEmbedding) {
945
+ const vectorSearch2 = getVectorSearch();
946
+ const vectorResults = await vectorSearch2.search(db, queryEmbedding, {
947
+ project: options.project,
948
+ limit: limit * 2,
949
+ // Prendiamo piu risultati per il ranking
950
+ threshold: 0.3
447
951
  });
952
+ for (const hit of vectorResults) {
953
+ rawItems.set(String(hit.observationId), {
954
+ id: String(hit.observationId),
955
+ title: hit.title,
956
+ content: hit.text || "",
957
+ type: hit.type,
958
+ project: hit.project,
959
+ created_at: hit.created_at,
960
+ created_at_epoch: hit.created_at_epoch,
961
+ semanticScore: hit.similarity,
962
+ fts5Rank: null,
963
+ source: "vector"
964
+ });
965
+ }
966
+ logger.debug("SEARCH", `Vector search: ${vectorResults.length} risultati`);
448
967
  }
449
- logger.debug("SEARCH", `Vector search returned ${vectorResults.length} results`);
450
968
  } catch (error) {
451
- logger.warn("SEARCH", "Vector search failed, using keyword only", {}, error);
969
+ logger.warn("SEARCH", "Ricerca vettoriale fallita, uso solo keyword", {}, error);
452
970
  }
453
971
  }
454
972
  try {
455
- const { searchObservations: searchObservations2 } = await Promise.resolve().then(() => (init_Observations(), Observations_exports));
456
- const keywordResults = searchObservations2(db, query, options.project);
457
- for (const obs of keywordResults.slice(0, Math.ceil(limit / 2))) {
458
- results.push({
459
- id: String(obs.id),
460
- title: obs.title,
461
- content: obs.text || obs.narrative || "",
462
- type: obs.type,
463
- project: obs.project,
464
- created_at: obs.created_at,
465
- score: 0.5,
466
- // Default score for keyword matches
467
- source: "keyword"
468
- });
973
+ const { searchObservationsFTSWithRank: searchObservationsFTSWithRank2 } = await Promise.resolve().then(() => (init_Search(), Search_exports));
974
+ const keywordResults = searchObservationsFTSWithRank2(db, query, {
975
+ project: options.project,
976
+ limit: limit * 2
977
+ });
978
+ for (const obs of keywordResults) {
979
+ const id = String(obs.id);
980
+ const existing = rawItems.get(id);
981
+ if (existing) {
982
+ existing.fts5Rank = obs.fts5_rank;
983
+ existing.source = "vector";
984
+ } else {
985
+ rawItems.set(id, {
986
+ id,
987
+ title: obs.title,
988
+ content: obs.text || obs.narrative || "",
989
+ type: obs.type,
990
+ project: obs.project,
991
+ created_at: obs.created_at,
992
+ created_at_epoch: obs.created_at_epoch,
993
+ semanticScore: 0,
994
+ fts5Rank: obs.fts5_rank,
995
+ source: "keyword"
996
+ });
997
+ }
469
998
  }
470
- logger.debug("SEARCH", `Keyword search returned ${keywordResults.length} results`);
999
+ logger.debug("SEARCH", `Keyword search: ${keywordResults.length} risultati`);
471
1000
  } catch (error) {
472
- logger.error("SEARCH", "Keyword search failed", {}, error);
1001
+ logger.error("SEARCH", "Ricerca keyword fallita", {}, error);
473
1002
  }
474
- const uniqueResults = this.deduplicateAndSort(results, limit);
475
- return uniqueResults;
476
- }
477
- /**
478
- * Remove duplicates and sort by score
479
- */
480
- deduplicateAndSort(results, limit) {
481
- const seen = /* @__PURE__ */ new Set();
482
- const unique = [];
483
- for (const result of results) {
484
- if (!seen.has(result.id)) {
485
- seen.add(result.id);
486
- unique.push(result);
1003
+ if (rawItems.size === 0) return [];
1004
+ const allFTS5Ranks = Array.from(rawItems.values()).filter((item) => item.fts5Rank !== null).map((item) => item.fts5Rank);
1005
+ const scored = [];
1006
+ for (const item of rawItems.values()) {
1007
+ const signals = {
1008
+ semantic: item.semanticScore,
1009
+ fts5: item.fts5Rank !== null ? normalizeFTS5Rank(item.fts5Rank, allFTS5Ranks) : 0,
1010
+ recency: recencyScore(item.created_at_epoch),
1011
+ projectMatch: targetProject ? projectMatchScore(item.project, targetProject) : 0
1012
+ };
1013
+ const score = computeCompositeScore(signals, weights);
1014
+ const isHybrid = item.semanticScore > 0 && item.fts5Rank !== null;
1015
+ const hybridBoost = isHybrid ? 1.15 : 1;
1016
+ const finalScore = Math.min(1, score * hybridBoost * knowledgeTypeBoost(item.type));
1017
+ scored.push({
1018
+ id: item.id,
1019
+ title: item.title,
1020
+ content: item.content,
1021
+ type: item.type,
1022
+ project: item.project,
1023
+ created_at: item.created_at,
1024
+ created_at_epoch: item.created_at_epoch,
1025
+ score: finalScore,
1026
+ source: isHybrid ? "hybrid" : item.source,
1027
+ signals
1028
+ });
1029
+ }
1030
+ scored.sort((a, b) => b.score - a.score);
1031
+ const finalResults = scored.slice(0, limit);
1032
+ if (finalResults.length > 0) {
1033
+ try {
1034
+ const { updateLastAccessed: updateLastAccessed2 } = await Promise.resolve().then(() => (init_Observations(), Observations_exports));
1035
+ const ids = finalResults.map((r) => parseInt(r.id, 10)).filter((id) => id > 0);
1036
+ if (ids.length > 0) {
1037
+ updateLastAccessed2(db, ids);
1038
+ }
1039
+ } catch {
487
1040
  }
488
1041
  }
489
- unique.sort((a, b) => b.score - a.score);
490
- return unique.slice(0, limit);
1042
+ return finalResults;
491
1043
  }
492
1044
  };
493
1045
  var hybridSearch = null;