kiro-memory 1.6.0 → 1.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,15 +9,260 @@ var __export = (target, all) => {
9
9
  __defProp(target, name, { get: all[name], enumerable: true });
10
10
  };
11
11
 
12
+ // src/services/sqlite/Search.ts
13
+ var Search_exports = {};
14
+ __export(Search_exports, {
15
+ getObservationsByIds: () => getObservationsByIds,
16
+ getProjectStats: () => getProjectStats,
17
+ getStaleObservations: () => getStaleObservations,
18
+ getTimeline: () => getTimeline,
19
+ markObservationsStale: () => markObservationsStale,
20
+ searchObservationsFTS: () => searchObservationsFTS,
21
+ searchObservationsFTSWithRank: () => searchObservationsFTSWithRank,
22
+ searchObservationsLIKE: () => searchObservationsLIKE,
23
+ searchSummariesFiltered: () => searchSummariesFiltered
24
+ });
25
+ import { existsSync as existsSync3, statSync } from "fs";
26
+ function escapeLikePattern(input) {
27
+ return input.replace(/[%_\\]/g, "\\$&");
28
+ }
29
+ function sanitizeFTS5Query(query) {
30
+ const trimmed = query.length > 1e4 ? query.substring(0, 1e4) : query;
31
+ const terms = trimmed.replace(/[""]/g, "").split(/\s+/).filter((t) => t.length > 0).slice(0, 100).map((t) => `"${t}"`);
32
+ return terms.join(" ");
33
+ }
34
+ function searchObservationsFTS(db, query, filters = {}) {
35
+ const limit = filters.limit || 50;
36
+ try {
37
+ const safeQuery = sanitizeFTS5Query(query);
38
+ if (!safeQuery) return searchObservationsLIKE(db, query, filters);
39
+ let sql = `
40
+ SELECT o.* FROM observations o
41
+ JOIN observations_fts fts ON o.id = fts.rowid
42
+ WHERE observations_fts MATCH ?
43
+ `;
44
+ const params = [safeQuery];
45
+ if (filters.project) {
46
+ sql += " AND o.project = ?";
47
+ params.push(filters.project);
48
+ }
49
+ if (filters.type) {
50
+ sql += " AND o.type = ?";
51
+ params.push(filters.type);
52
+ }
53
+ if (filters.dateStart) {
54
+ sql += " AND o.created_at_epoch >= ?";
55
+ params.push(filters.dateStart);
56
+ }
57
+ if (filters.dateEnd) {
58
+ sql += " AND o.created_at_epoch <= ?";
59
+ params.push(filters.dateEnd);
60
+ }
61
+ sql += " ORDER BY rank LIMIT ?";
62
+ params.push(limit);
63
+ const stmt = db.query(sql);
64
+ return stmt.all(...params);
65
+ } catch {
66
+ return searchObservationsLIKE(db, query, filters);
67
+ }
68
+ }
69
+ function searchObservationsFTSWithRank(db, query, filters = {}) {
70
+ const limit = filters.limit || 50;
71
+ try {
72
+ const safeQuery = sanitizeFTS5Query(query);
73
+ if (!safeQuery) return [];
74
+ let sql = `
75
+ SELECT o.*, rank as fts5_rank FROM observations o
76
+ JOIN observations_fts fts ON o.id = fts.rowid
77
+ WHERE observations_fts MATCH ?
78
+ `;
79
+ const params = [safeQuery];
80
+ if (filters.project) {
81
+ sql += " AND o.project = ?";
82
+ params.push(filters.project);
83
+ }
84
+ if (filters.type) {
85
+ sql += " AND o.type = ?";
86
+ params.push(filters.type);
87
+ }
88
+ if (filters.dateStart) {
89
+ sql += " AND o.created_at_epoch >= ?";
90
+ params.push(filters.dateStart);
91
+ }
92
+ if (filters.dateEnd) {
93
+ sql += " AND o.created_at_epoch <= ?";
94
+ params.push(filters.dateEnd);
95
+ }
96
+ sql += " ORDER BY rank LIMIT ?";
97
+ params.push(limit);
98
+ const stmt = db.query(sql);
99
+ return stmt.all(...params);
100
+ } catch {
101
+ return [];
102
+ }
103
+ }
104
+ function searchObservationsLIKE(db, query, filters = {}) {
105
+ const limit = filters.limit || 50;
106
+ const pattern = `%${escapeLikePattern(query)}%`;
107
+ let sql = `
108
+ SELECT * FROM observations
109
+ WHERE (title LIKE ? ESCAPE '\\' OR text LIKE ? ESCAPE '\\' OR narrative LIKE ? ESCAPE '\\' OR concepts LIKE ? ESCAPE '\\')
110
+ `;
111
+ const params = [pattern, pattern, pattern, pattern];
112
+ if (filters.project) {
113
+ sql += " AND project = ?";
114
+ params.push(filters.project);
115
+ }
116
+ if (filters.type) {
117
+ sql += " AND type = ?";
118
+ params.push(filters.type);
119
+ }
120
+ if (filters.dateStart) {
121
+ sql += " AND created_at_epoch >= ?";
122
+ params.push(filters.dateStart);
123
+ }
124
+ if (filters.dateEnd) {
125
+ sql += " AND created_at_epoch <= ?";
126
+ params.push(filters.dateEnd);
127
+ }
128
+ sql += " ORDER BY created_at_epoch DESC LIMIT ?";
129
+ params.push(limit);
130
+ const stmt = db.query(sql);
131
+ return stmt.all(...params);
132
+ }
133
+ function searchSummariesFiltered(db, query, filters = {}) {
134
+ const limit = filters.limit || 20;
135
+ const pattern = `%${escapeLikePattern(query)}%`;
136
+ let sql = `
137
+ SELECT * FROM summaries
138
+ WHERE (request LIKE ? ESCAPE '\\' OR learned LIKE ? ESCAPE '\\' OR completed LIKE ? ESCAPE '\\' OR notes LIKE ? ESCAPE '\\' OR next_steps LIKE ? ESCAPE '\\')
139
+ `;
140
+ const params = [pattern, pattern, pattern, pattern, pattern];
141
+ if (filters.project) {
142
+ sql += " AND project = ?";
143
+ params.push(filters.project);
144
+ }
145
+ if (filters.dateStart) {
146
+ sql += " AND created_at_epoch >= ?";
147
+ params.push(filters.dateStart);
148
+ }
149
+ if (filters.dateEnd) {
150
+ sql += " AND created_at_epoch <= ?";
151
+ params.push(filters.dateEnd);
152
+ }
153
+ sql += " ORDER BY created_at_epoch DESC LIMIT ?";
154
+ params.push(limit);
155
+ const stmt = db.query(sql);
156
+ return stmt.all(...params);
157
+ }
158
+ function getObservationsByIds(db, ids) {
159
+ if (!Array.isArray(ids) || ids.length === 0) return [];
160
+ const validIds = ids.filter((id) => typeof id === "number" && Number.isInteger(id) && id > 0).slice(0, 500);
161
+ if (validIds.length === 0) return [];
162
+ const placeholders = validIds.map(() => "?").join(",");
163
+ const sql = `SELECT * FROM observations WHERE id IN (${placeholders}) ORDER BY created_at_epoch DESC`;
164
+ const stmt = db.query(sql);
165
+ return stmt.all(...validIds);
166
+ }
167
+ function getTimeline(db, anchorId, depthBefore = 5, depthAfter = 5) {
168
+ const anchorStmt = db.query("SELECT created_at_epoch FROM observations WHERE id = ?");
169
+ const anchor = anchorStmt.get(anchorId);
170
+ if (!anchor) return [];
171
+ const anchorEpoch = anchor.created_at_epoch;
172
+ const beforeStmt = db.query(`
173
+ SELECT id, 'observation' as type, title, text as content, project, created_at, created_at_epoch
174
+ FROM observations
175
+ WHERE created_at_epoch < ?
176
+ ORDER BY created_at_epoch DESC
177
+ LIMIT ?
178
+ `);
179
+ const before = beforeStmt.all(anchorEpoch, depthBefore).reverse();
180
+ const selfStmt = db.query(`
181
+ SELECT id, 'observation' as type, title, text as content, project, created_at, created_at_epoch
182
+ FROM observations WHERE id = ?
183
+ `);
184
+ const self = selfStmt.all(anchorId);
185
+ const afterStmt = db.query(`
186
+ SELECT id, 'observation' as type, title, text as content, project, created_at, created_at_epoch
187
+ FROM observations
188
+ WHERE created_at_epoch > ?
189
+ ORDER BY created_at_epoch ASC
190
+ LIMIT ?
191
+ `);
192
+ const after = afterStmt.all(anchorEpoch, depthAfter);
193
+ return [...before, ...self, ...after];
194
+ }
195
+ function getProjectStats(db, project) {
196
+ const obsStmt = db.query("SELECT COUNT(*) as count FROM observations WHERE project = ?");
197
+ const sumStmt = db.query("SELECT COUNT(*) as count FROM summaries WHERE project = ?");
198
+ const sesStmt = db.query("SELECT COUNT(*) as count FROM sessions WHERE project = ?");
199
+ const prmStmt = db.query("SELECT COUNT(*) as count FROM prompts WHERE project = ?");
200
+ return {
201
+ observations: obsStmt.get(project)?.count || 0,
202
+ summaries: sumStmt.get(project)?.count || 0,
203
+ sessions: sesStmt.get(project)?.count || 0,
204
+ prompts: prmStmt.get(project)?.count || 0
205
+ };
206
+ }
207
+ function getStaleObservations(db, project) {
208
+ const rows = db.query(`
209
+ SELECT * FROM observations
210
+ WHERE project = ? AND files_modified IS NOT NULL AND files_modified != ''
211
+ ORDER BY created_at_epoch DESC
212
+ LIMIT 500
213
+ `).all(project);
214
+ const staleObs = [];
215
+ for (const obs of rows) {
216
+ if (!obs.files_modified) continue;
217
+ const files = obs.files_modified.split(",").map((f) => f.trim()).filter(Boolean);
218
+ let isStale = false;
219
+ for (const filepath of files) {
220
+ try {
221
+ if (!existsSync3(filepath)) continue;
222
+ const stat = statSync(filepath);
223
+ if (stat.mtimeMs > obs.created_at_epoch) {
224
+ isStale = true;
225
+ break;
226
+ }
227
+ } catch {
228
+ }
229
+ }
230
+ if (isStale) {
231
+ staleObs.push(obs);
232
+ }
233
+ }
234
+ return staleObs;
235
+ }
236
+ function markObservationsStale(db, ids, stale) {
237
+ if (!Array.isArray(ids) || ids.length === 0) return;
238
+ const validIds = ids.filter((id) => typeof id === "number" && Number.isInteger(id) && id > 0).slice(0, 500);
239
+ if (validIds.length === 0) return;
240
+ const placeholders = validIds.map(() => "?").join(",");
241
+ db.run(
242
+ `UPDATE observations SET is_stale = ? WHERE id IN (${placeholders})`,
243
+ [stale ? 1 : 0, ...validIds]
244
+ );
245
+ }
246
+ var init_Search = __esm({
247
+ "src/services/sqlite/Search.ts"() {
248
+ "use strict";
249
+ }
250
+ });
251
+
12
252
  // src/services/sqlite/Observations.ts
13
253
  var Observations_exports = {};
14
254
  __export(Observations_exports, {
255
+ consolidateObservations: () => consolidateObservations,
15
256
  createObservation: () => createObservation,
16
257
  deleteObservation: () => deleteObservation,
17
258
  getObservationsByProject: () => getObservationsByProject,
18
259
  getObservationsBySession: () => getObservationsBySession,
19
- searchObservations: () => searchObservations
260
+ searchObservations: () => searchObservations,
261
+ updateLastAccessed: () => updateLastAccessed
20
262
  });
263
+ function escapeLikePattern2(input) {
264
+ return input.replace(/[%_\\]/g, "\\$&");
265
+ }
21
266
  function createObservation(db, memorySessionId, project, type, title, subtitle, text, narrative, facts, concepts, filesRead, filesModified, promptNumber) {
22
267
  const now = /* @__PURE__ */ new Date();
23
268
  const result = db.run(
@@ -41,12 +286,12 @@ function getObservationsByProject(db, project, limit = 100) {
41
286
  return query.all(project, limit);
42
287
  }
43
288
  function searchObservations(db, searchTerm, project) {
44
- const sql = project ? `SELECT * FROM observations
45
- WHERE project = ? AND (title LIKE ? OR text LIKE ? OR narrative LIKE ?)
46
- ORDER BY created_at_epoch DESC` : `SELECT * FROM observations
47
- WHERE title LIKE ? OR text LIKE ? OR narrative LIKE ?
289
+ const sql = project ? `SELECT * FROM observations
290
+ WHERE project = ? AND (title LIKE ? ESCAPE '\\' OR text LIKE ? ESCAPE '\\' OR narrative LIKE ? ESCAPE '\\')
291
+ ORDER BY created_at_epoch DESC` : `SELECT * FROM observations
292
+ WHERE title LIKE ? ESCAPE '\\' OR text LIKE ? ESCAPE '\\' OR narrative LIKE ? ESCAPE '\\'
48
293
  ORDER BY created_at_epoch DESC`;
49
- const pattern = `%${searchTerm}%`;
294
+ const pattern = `%${escapeLikePattern2(searchTerm)}%`;
50
295
  const query = db.query(sql);
51
296
  if (project) {
52
297
  return query.all(project, pattern, pattern, pattern);
@@ -56,6 +301,65 @@ function searchObservations(db, searchTerm, project) {
56
301
  function deleteObservation(db, id) {
57
302
  db.run("DELETE FROM observations WHERE id = ?", [id]);
58
303
  }
304
+ function updateLastAccessed(db, ids) {
305
+ if (!Array.isArray(ids) || ids.length === 0) return;
306
+ const validIds = ids.filter((id) => typeof id === "number" && Number.isInteger(id) && id > 0).slice(0, 500);
307
+ if (validIds.length === 0) return;
308
+ const now = Date.now();
309
+ const placeholders = validIds.map(() => "?").join(",");
310
+ db.run(
311
+ `UPDATE observations SET last_accessed_epoch = ? WHERE id IN (${placeholders})`,
312
+ [now, ...validIds]
313
+ );
314
+ }
315
+ function consolidateObservations(db, project, options = {}) {
316
+ const minGroupSize = options.minGroupSize || 3;
317
+ const groups = db.query(`
318
+ SELECT type, files_modified, COUNT(*) as cnt, GROUP_CONCAT(id) as ids
319
+ FROM observations
320
+ WHERE project = ? AND files_modified IS NOT NULL AND files_modified != ''
321
+ GROUP BY type, files_modified
322
+ HAVING cnt >= ?
323
+ ORDER BY cnt DESC
324
+ `).all(project, minGroupSize);
325
+ if (groups.length === 0) return { merged: 0, removed: 0 };
326
+ let totalMerged = 0;
327
+ let totalRemoved = 0;
328
+ for (const group of groups) {
329
+ const obsIds = group.ids.split(",").map(Number);
330
+ const placeholders = obsIds.map(() => "?").join(",");
331
+ const observations = db.query(
332
+ `SELECT * FROM observations WHERE id IN (${placeholders}) ORDER BY created_at_epoch DESC`
333
+ ).all(...obsIds);
334
+ if (observations.length < minGroupSize) continue;
335
+ if (options.dryRun) {
336
+ totalMerged += 1;
337
+ totalRemoved += observations.length - 1;
338
+ continue;
339
+ }
340
+ const keeper = observations[0];
341
+ const others = observations.slice(1);
342
+ const uniqueTexts = /* @__PURE__ */ new Set();
343
+ if (keeper.text) uniqueTexts.add(keeper.text);
344
+ for (const obs of others) {
345
+ if (obs.text && !uniqueTexts.has(obs.text)) {
346
+ uniqueTexts.add(obs.text);
347
+ }
348
+ }
349
+ const consolidatedText = Array.from(uniqueTexts).join("\n---\n").substring(0, 1e5);
350
+ db.run(
351
+ "UPDATE observations SET text = ?, title = ? WHERE id = ?",
352
+ [consolidatedText, `[consolidato x${observations.length}] ${keeper.title}`, keeper.id]
353
+ );
354
+ const removeIds = others.map((o) => o.id);
355
+ const removePlaceholders = removeIds.map(() => "?").join(",");
356
+ db.run(`DELETE FROM observations WHERE id IN (${removePlaceholders})`, removeIds);
357
+ db.run(`DELETE FROM observation_embeddings WHERE observation_id IN (${removePlaceholders})`, removeIds);
358
+ totalMerged += 1;
359
+ totalRemoved += removeIds.length;
360
+ }
361
+ return { merged: totalMerged, removed: totalRemoved };
362
+ }
59
363
  var init_Observations = __esm({
60
364
  "src/services/sqlite/Observations.ts"() {
61
365
  "use strict";
@@ -416,85 +720,482 @@ function getChromaManager() {
416
720
  return chromaManager;
417
721
  }
418
722
 
723
+ // src/services/search/EmbeddingService.ts
724
+ var EmbeddingService = class {
725
+ provider = null;
726
+ model = null;
727
+ initialized = false;
728
+ initializing = null;
729
+ /**
730
+ * Inizializza il servizio di embedding.
731
+ * Tenta fastembed, poi @huggingface/transformers, poi fallback a null.
732
+ */
733
+ async initialize() {
734
+ if (this.initialized) return this.provider !== null;
735
+ if (this.initializing) return this.initializing;
736
+ this.initializing = this._doInitialize();
737
+ const result = await this.initializing;
738
+ this.initializing = null;
739
+ return result;
740
+ }
741
+ async _doInitialize() {
742
+ try {
743
+ const fastembed = await import("fastembed");
744
+ const EmbeddingModel = fastembed.EmbeddingModel || fastembed.default?.EmbeddingModel;
745
+ const FlagEmbedding = fastembed.FlagEmbedding || fastembed.default?.FlagEmbedding;
746
+ if (FlagEmbedding && EmbeddingModel) {
747
+ this.model = await FlagEmbedding.init({
748
+ model: EmbeddingModel.BGESmallENV15
749
+ });
750
+ this.provider = "fastembed";
751
+ this.initialized = true;
752
+ logger.info("EMBEDDING", "Inizializzato con fastembed (BGE-small-en-v1.5)");
753
+ return true;
754
+ }
755
+ } catch (error) {
756
+ logger.debug("EMBEDDING", `fastembed non disponibile: ${error}`);
757
+ }
758
+ try {
759
+ const transformers = await import("@huggingface/transformers");
760
+ const pipeline = transformers.pipeline || transformers.default?.pipeline;
761
+ if (pipeline) {
762
+ this.model = await pipeline("feature-extraction", "Xenova/all-MiniLM-L6-v2", {
763
+ quantized: true
764
+ });
765
+ this.provider = "transformers";
766
+ this.initialized = true;
767
+ logger.info("EMBEDDING", "Inizializzato con @huggingface/transformers (all-MiniLM-L6-v2)");
768
+ return true;
769
+ }
770
+ } catch (error) {
771
+ logger.debug("EMBEDDING", `@huggingface/transformers non disponibile: ${error}`);
772
+ }
773
+ this.provider = null;
774
+ this.initialized = true;
775
+ logger.warn("EMBEDDING", "Nessun provider embedding disponibile, ricerca semantica disabilitata");
776
+ return false;
777
+ }
778
+ /**
779
+ * Genera embedding per un singolo testo.
780
+ * Ritorna Float32Array con 384 dimensioni, o null se non disponibile.
781
+ */
782
+ async embed(text) {
783
+ if (!this.initialized) await this.initialize();
784
+ if (!this.provider || !this.model) return null;
785
+ try {
786
+ const truncated = text.substring(0, 2e3);
787
+ if (this.provider === "fastembed") {
788
+ return await this._embedFastembed(truncated);
789
+ } else if (this.provider === "transformers") {
790
+ return await this._embedTransformers(truncated);
791
+ }
792
+ } catch (error) {
793
+ logger.error("EMBEDDING", `Errore generazione embedding: ${error}`);
794
+ }
795
+ return null;
796
+ }
797
+ /**
798
+ * Genera embeddings in batch.
799
+ */
800
+ async embedBatch(texts) {
801
+ if (!this.initialized) await this.initialize();
802
+ if (!this.provider || !this.model) return texts.map(() => null);
803
+ const results = [];
804
+ for (const text of texts) {
805
+ try {
806
+ const embedding = await this.embed(text);
807
+ results.push(embedding);
808
+ } catch {
809
+ results.push(null);
810
+ }
811
+ }
812
+ return results;
813
+ }
814
+ /**
815
+ * Verifica se il servizio è disponibile.
816
+ */
817
+ isAvailable() {
818
+ return this.initialized && this.provider !== null;
819
+ }
820
+ /**
821
+ * Nome del provider attivo.
822
+ */
823
+ getProvider() {
824
+ return this.provider;
825
+ }
826
+ /**
827
+ * Dimensioni del vettore embedding.
828
+ */
829
+ getDimensions() {
830
+ return 384;
831
+ }
832
+ // --- Provider specifici ---
833
+ async _embedFastembed(text) {
834
+ const embeddings = this.model.embed([text], 1);
835
+ for await (const batch of embeddings) {
836
+ if (batch && batch.length > 0) {
837
+ const vec = batch[0];
838
+ return vec instanceof Float32Array ? vec : new Float32Array(vec);
839
+ }
840
+ }
841
+ return null;
842
+ }
843
+ async _embedTransformers(text) {
844
+ const output = await this.model(text, {
845
+ pooling: "mean",
846
+ normalize: true
847
+ });
848
+ if (output?.data) {
849
+ return output.data instanceof Float32Array ? output.data : new Float32Array(output.data);
850
+ }
851
+ return null;
852
+ }
853
+ };
854
+ var embeddingService = null;
855
+ function getEmbeddingService() {
856
+ if (!embeddingService) {
857
+ embeddingService = new EmbeddingService();
858
+ }
859
+ return embeddingService;
860
+ }
861
+
862
+ // src/services/search/VectorSearch.ts
863
+ function cosineSimilarity(a, b) {
864
+ if (a.length !== b.length) return 0;
865
+ let dotProduct = 0;
866
+ let normA = 0;
867
+ let normB = 0;
868
+ for (let i = 0; i < a.length; i++) {
869
+ dotProduct += a[i] * b[i];
870
+ normA += a[i] * a[i];
871
+ normB += b[i] * b[i];
872
+ }
873
+ const denominator = Math.sqrt(normA) * Math.sqrt(normB);
874
+ if (denominator === 0) return 0;
875
+ return dotProduct / denominator;
876
+ }
877
+ function float32ToBuffer(arr) {
878
+ return Buffer.from(arr.buffer, arr.byteOffset, arr.byteLength);
879
+ }
880
+ function bufferToFloat32(buf) {
881
+ const arrayBuffer = buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
882
+ return new Float32Array(arrayBuffer);
883
+ }
884
+ var VectorSearch = class {
885
+ /**
886
+ * Ricerca semantica: calcola cosine similarity tra query e tutti gli embeddings.
887
+ */
888
+ async search(db, queryEmbedding, options = {}) {
889
+ const limit = options.limit || 10;
890
+ const threshold = options.threshold || 0.3;
891
+ try {
892
+ let sql = `
893
+ SELECT e.observation_id, e.embedding,
894
+ o.title, o.text, o.type, o.project, o.created_at, o.created_at_epoch
895
+ FROM observation_embeddings e
896
+ JOIN observations o ON o.id = e.observation_id
897
+ `;
898
+ const params = [];
899
+ if (options.project) {
900
+ sql += " WHERE o.project = ?";
901
+ params.push(options.project);
902
+ }
903
+ const rows = db.query(sql).all(...params);
904
+ const scored = [];
905
+ for (const row of rows) {
906
+ const embedding = bufferToFloat32(row.embedding);
907
+ const similarity = cosineSimilarity(queryEmbedding, embedding);
908
+ if (similarity >= threshold) {
909
+ scored.push({
910
+ id: row.observation_id,
911
+ observationId: row.observation_id,
912
+ similarity,
913
+ title: row.title,
914
+ text: row.text,
915
+ type: row.type,
916
+ project: row.project,
917
+ created_at: row.created_at,
918
+ created_at_epoch: row.created_at_epoch
919
+ });
920
+ }
921
+ }
922
+ scored.sort((a, b) => b.similarity - a.similarity);
923
+ return scored.slice(0, limit);
924
+ } catch (error) {
925
+ logger.error("VECTOR", `Errore ricerca vettoriale: ${error}`);
926
+ return [];
927
+ }
928
+ }
929
+ /**
930
+ * Salva embedding per un'osservazione.
931
+ */
932
+ async storeEmbedding(db, observationId, embedding, model) {
933
+ try {
934
+ const blob = float32ToBuffer(embedding);
935
+ db.query(`
936
+ INSERT OR REPLACE INTO observation_embeddings
937
+ (observation_id, embedding, model, dimensions, created_at)
938
+ VALUES (?, ?, ?, ?, ?)
939
+ `).run(
940
+ observationId,
941
+ blob,
942
+ model,
943
+ embedding.length,
944
+ (/* @__PURE__ */ new Date()).toISOString()
945
+ );
946
+ logger.debug("VECTOR", `Embedding salvato per osservazione ${observationId}`);
947
+ } catch (error) {
948
+ logger.error("VECTOR", `Errore salvataggio embedding: ${error}`);
949
+ }
950
+ }
951
+ /**
952
+ * Genera embeddings per osservazioni che non li hanno ancora.
953
+ */
954
+ async backfillEmbeddings(db, batchSize = 50) {
955
+ const embeddingService2 = getEmbeddingService();
956
+ if (!await embeddingService2.initialize()) {
957
+ logger.warn("VECTOR", "Embedding service non disponibile, backfill saltato");
958
+ return 0;
959
+ }
960
+ const rows = db.query(`
961
+ SELECT o.id, o.title, o.text, o.narrative, o.concepts
962
+ FROM observations o
963
+ LEFT JOIN observation_embeddings e ON e.observation_id = o.id
964
+ WHERE e.observation_id IS NULL
965
+ ORDER BY o.created_at_epoch DESC
966
+ LIMIT ?
967
+ `).all(batchSize);
968
+ if (rows.length === 0) return 0;
969
+ let count = 0;
970
+ const model = embeddingService2.getProvider() || "unknown";
971
+ for (const row of rows) {
972
+ const parts = [row.title];
973
+ if (row.text) parts.push(row.text);
974
+ if (row.narrative) parts.push(row.narrative);
975
+ if (row.concepts) parts.push(row.concepts);
976
+ const fullText = parts.join(" ").substring(0, 2e3);
977
+ const embedding = await embeddingService2.embed(fullText);
978
+ if (embedding) {
979
+ await this.storeEmbedding(db, row.id, embedding, model);
980
+ count++;
981
+ }
982
+ }
983
+ logger.info("VECTOR", `Backfill completato: ${count}/${rows.length} embeddings generati`);
984
+ return count;
985
+ }
986
+ /**
987
+ * Statistiche sugli embeddings.
988
+ */
989
+ getStats(db) {
990
+ try {
991
+ const totalRow = db.query("SELECT COUNT(*) as count FROM observations").get();
992
+ const embeddedRow = db.query("SELECT COUNT(*) as count FROM observation_embeddings").get();
993
+ const total = totalRow?.count || 0;
994
+ const embedded = embeddedRow?.count || 0;
995
+ const percentage = total > 0 ? Math.round(embedded / total * 100) : 0;
996
+ return { total, embedded, percentage };
997
+ } catch {
998
+ return { total: 0, embedded: 0, percentage: 0 };
999
+ }
1000
+ }
1001
+ };
1002
+ var vectorSearch = null;
1003
+ function getVectorSearch() {
1004
+ if (!vectorSearch) {
1005
+ vectorSearch = new VectorSearch();
1006
+ }
1007
+ return vectorSearch;
1008
+ }
1009
+
1010
+ // src/services/search/ScoringEngine.ts
1011
+ var SEARCH_WEIGHTS = {
1012
+ semantic: 0.4,
1013
+ fts5: 0.3,
1014
+ recency: 0.2,
1015
+ projectMatch: 0.1
1016
+ };
1017
+ var CONTEXT_WEIGHTS = {
1018
+ semantic: 0,
1019
+ fts5: 0,
1020
+ recency: 0.7,
1021
+ projectMatch: 0.3
1022
+ };
1023
+ function recencyScore(createdAtEpoch, halfLifeHours = 168) {
1024
+ if (!createdAtEpoch || createdAtEpoch <= 0) return 0;
1025
+ const nowMs = Date.now();
1026
+ const ageMs = nowMs - createdAtEpoch;
1027
+ if (ageMs <= 0) return 1;
1028
+ const ageHours = ageMs / (1e3 * 60 * 60);
1029
+ return Math.exp(-ageHours * Math.LN2 / halfLifeHours);
1030
+ }
1031
+ function normalizeFTS5Rank(rank, allRanks) {
1032
+ if (allRanks.length === 0) return 0;
1033
+ if (allRanks.length === 1) return 1;
1034
+ const minRank = Math.min(...allRanks);
1035
+ const maxRank = Math.max(...allRanks);
1036
+ if (minRank === maxRank) return 1;
1037
+ return (maxRank - rank) / (maxRank - minRank);
1038
+ }
1039
+ function projectMatchScore(itemProject, targetProject) {
1040
+ if (!itemProject || !targetProject) return 0;
1041
+ return itemProject.toLowerCase() === targetProject.toLowerCase() ? 1 : 0;
1042
+ }
1043
+ function computeCompositeScore(signals, weights) {
1044
+ return signals.semantic * weights.semantic + signals.fts5 * weights.fts5 + signals.recency * weights.recency + signals.projectMatch * weights.projectMatch;
1045
+ }
1046
+ function accessRecencyScore(lastAccessedEpoch, halfLifeHours = 48) {
1047
+ if (!lastAccessedEpoch || lastAccessedEpoch <= 0) return 0;
1048
+ const nowMs = Date.now();
1049
+ const ageMs = nowMs - lastAccessedEpoch;
1050
+ if (ageMs <= 0) return 1;
1051
+ const ageHours = ageMs / (1e3 * 60 * 60);
1052
+ return Math.exp(-ageHours * Math.LN2 / halfLifeHours);
1053
+ }
1054
+ function stalenessPenalty(isStale) {
1055
+ return isStale === 1 ? 0.5 : 1;
1056
+ }
1057
+ var KNOWLEDGE_TYPE_BOOST = {
1058
+ constraint: 1.3,
1059
+ decision: 1.25,
1060
+ heuristic: 1.15,
1061
+ rejected: 1.1
1062
+ };
1063
+ function knowledgeTypeBoost(type) {
1064
+ return KNOWLEDGE_TYPE_BOOST[type] ?? 1;
1065
+ }
1066
+ function estimateTokens(text) {
1067
+ if (!text) return 0;
1068
+ return Math.ceil(text.length / 4);
1069
+ }
1070
+
419
1071
  // src/services/search/HybridSearch.ts
420
1072
  var HybridSearch = class {
421
- chromaManager;
422
- constructor() {
423
- this.chromaManager = new ChromaManager();
424
- }
1073
+ embeddingInitialized = false;
425
1074
  /**
426
- * Initialize search (connects to ChromaDB if available)
1075
+ * Inizializza il servizio di embedding (lazy, non bloccante)
427
1076
  */
428
1077
  async initialize() {
429
- await this.chromaManager.initialize();
1078
+ try {
1079
+ const embeddingService2 = getEmbeddingService();
1080
+ await embeddingService2.initialize();
1081
+ this.embeddingInitialized = embeddingService2.isAvailable();
1082
+ logger.info("SEARCH", `HybridSearch inizializzato (embedding: ${this.embeddingInitialized ? "attivo" : "disattivato"})`);
1083
+ } catch (error) {
1084
+ logger.warn("SEARCH", "Inizializzazione embedding fallita, uso solo FTS5", {}, error);
1085
+ this.embeddingInitialized = false;
1086
+ }
430
1087
  }
431
1088
  /**
432
- * Perform hybrid search combining vector and keyword results
1089
+ * Ricerca ibrida con scoring a 4 segnali
433
1090
  */
434
1091
  async search(db, query, options = {}) {
435
1092
  const limit = options.limit || 10;
436
- const results = [];
437
- if (this.chromaManager.isChromaAvailable()) {
1093
+ const weights = options.weights || SEARCH_WEIGHTS;
1094
+ const targetProject = options.project || "";
1095
+ const rawItems = /* @__PURE__ */ new Map();
1096
+ if (this.embeddingInitialized) {
438
1097
  try {
439
- const vectorResults = await this.chromaManager.search(query, {
440
- project: options.project,
441
- limit: Math.ceil(limit / 2)
442
- });
443
- for (const hit of vectorResults) {
444
- results.push({
445
- id: hit.id,
446
- title: hit.metadata.title || "Untitled",
447
- content: hit.content,
448
- type: hit.metadata.type || "unknown",
449
- project: hit.metadata.project || "unknown",
450
- created_at: hit.metadata.created_at || (/* @__PURE__ */ new Date()).toISOString(),
451
- score: 1 - hit.distance,
452
- // Convert distance to similarity score
453
- source: "vector"
1098
+ const embeddingService2 = getEmbeddingService();
1099
+ const queryEmbedding = await embeddingService2.embed(query);
1100
+ if (queryEmbedding) {
1101
+ const vectorSearch2 = getVectorSearch();
1102
+ const vectorResults = await vectorSearch2.search(db, queryEmbedding, {
1103
+ project: options.project,
1104
+ limit: limit * 2,
1105
+ // Prendiamo piu risultati per il ranking
1106
+ threshold: 0.3
454
1107
  });
1108
+ for (const hit of vectorResults) {
1109
+ rawItems.set(String(hit.observationId), {
1110
+ id: String(hit.observationId),
1111
+ title: hit.title,
1112
+ content: hit.text || "",
1113
+ type: hit.type,
1114
+ project: hit.project,
1115
+ created_at: hit.created_at,
1116
+ created_at_epoch: hit.created_at_epoch,
1117
+ semanticScore: hit.similarity,
1118
+ fts5Rank: null,
1119
+ source: "vector"
1120
+ });
1121
+ }
1122
+ logger.debug("SEARCH", `Vector search: ${vectorResults.length} risultati`);
455
1123
  }
456
- logger.debug("SEARCH", `Vector search returned ${vectorResults.length} results`);
457
1124
  } catch (error) {
458
- logger.warn("SEARCH", "Vector search failed, using keyword only", {}, error);
1125
+ logger.warn("SEARCH", "Ricerca vettoriale fallita, uso solo keyword", {}, error);
459
1126
  }
460
1127
  }
461
1128
  try {
462
- const { searchObservations: searchObservations2 } = await Promise.resolve().then(() => (init_Observations(), Observations_exports));
463
- const keywordResults = searchObservations2(db, query, options.project);
464
- for (const obs of keywordResults.slice(0, Math.ceil(limit / 2))) {
465
- results.push({
466
- id: String(obs.id),
467
- title: obs.title,
468
- content: obs.text || obs.narrative || "",
469
- type: obs.type,
470
- project: obs.project,
471
- created_at: obs.created_at,
472
- score: 0.5,
473
- // Default score for keyword matches
474
- source: "keyword"
475
- });
1129
+ const { searchObservationsFTSWithRank: searchObservationsFTSWithRank2 } = await Promise.resolve().then(() => (init_Search(), Search_exports));
1130
+ const keywordResults = searchObservationsFTSWithRank2(db, query, {
1131
+ project: options.project,
1132
+ limit: limit * 2
1133
+ });
1134
+ for (const obs of keywordResults) {
1135
+ const id = String(obs.id);
1136
+ const existing = rawItems.get(id);
1137
+ if (existing) {
1138
+ existing.fts5Rank = obs.fts5_rank;
1139
+ existing.source = "vector";
1140
+ } else {
1141
+ rawItems.set(id, {
1142
+ id,
1143
+ title: obs.title,
1144
+ content: obs.text || obs.narrative || "",
1145
+ type: obs.type,
1146
+ project: obs.project,
1147
+ created_at: obs.created_at,
1148
+ created_at_epoch: obs.created_at_epoch,
1149
+ semanticScore: 0,
1150
+ fts5Rank: obs.fts5_rank,
1151
+ source: "keyword"
1152
+ });
1153
+ }
476
1154
  }
477
- logger.debug("SEARCH", `Keyword search returned ${keywordResults.length} results`);
1155
+ logger.debug("SEARCH", `Keyword search: ${keywordResults.length} risultati`);
478
1156
  } catch (error) {
479
- logger.error("SEARCH", "Keyword search failed", {}, error);
1157
+ logger.error("SEARCH", "Ricerca keyword fallita", {}, error);
480
1158
  }
481
- const uniqueResults = this.deduplicateAndSort(results, limit);
482
- return uniqueResults;
483
- }
484
- /**
485
- * Remove duplicates and sort by score
486
- */
487
- deduplicateAndSort(results, limit) {
488
- const seen = /* @__PURE__ */ new Set();
489
- const unique = [];
490
- for (const result of results) {
491
- if (!seen.has(result.id)) {
492
- seen.add(result.id);
493
- unique.push(result);
1159
+ if (rawItems.size === 0) return [];
1160
+ const allFTS5Ranks = Array.from(rawItems.values()).filter((item) => item.fts5Rank !== null).map((item) => item.fts5Rank);
1161
+ const scored = [];
1162
+ for (const item of rawItems.values()) {
1163
+ const signals = {
1164
+ semantic: item.semanticScore,
1165
+ fts5: item.fts5Rank !== null ? normalizeFTS5Rank(item.fts5Rank, allFTS5Ranks) : 0,
1166
+ recency: recencyScore(item.created_at_epoch),
1167
+ projectMatch: targetProject ? projectMatchScore(item.project, targetProject) : 0
1168
+ };
1169
+ const score = computeCompositeScore(signals, weights);
1170
+ const isHybrid = item.semanticScore > 0 && item.fts5Rank !== null;
1171
+ const hybridBoost = isHybrid ? 1.15 : 1;
1172
+ const finalScore = Math.min(1, score * hybridBoost * knowledgeTypeBoost(item.type));
1173
+ scored.push({
1174
+ id: item.id,
1175
+ title: item.title,
1176
+ content: item.content,
1177
+ type: item.type,
1178
+ project: item.project,
1179
+ created_at: item.created_at,
1180
+ created_at_epoch: item.created_at_epoch,
1181
+ score: finalScore,
1182
+ source: isHybrid ? "hybrid" : item.source,
1183
+ signals
1184
+ });
1185
+ }
1186
+ scored.sort((a, b) => b.score - a.score);
1187
+ const finalResults = scored.slice(0, limit);
1188
+ if (finalResults.length > 0) {
1189
+ try {
1190
+ const { updateLastAccessed: updateLastAccessed2 } = await Promise.resolve().then(() => (init_Observations(), Observations_exports));
1191
+ const ids = finalResults.map((r) => parseInt(r.id, 10)).filter((id) => id > 0);
1192
+ if (ids.length > 0) {
1193
+ updateLastAccessed2(db, ids);
1194
+ }
1195
+ } catch {
494
1196
  }
495
1197
  }
496
- unique.sort((a, b) => b.score - a.score);
497
- return unique.slice(0, limit);
1198
+ return finalResults;
498
1199
  }
499
1200
  };
500
1201
  var hybridSearch = null;
@@ -505,8 +1206,19 @@ function getHybridSearch() {
505
1206
  return hybridSearch;
506
1207
  }
507
1208
  export {
1209
+ CONTEXT_WEIGHTS,
508
1210
  ChromaManager,
509
1211
  HybridSearch,
1212
+ KNOWLEDGE_TYPE_BOOST,
1213
+ SEARCH_WEIGHTS,
1214
+ accessRecencyScore,
1215
+ computeCompositeScore,
1216
+ estimateTokens,
510
1217
  getChromaManager,
511
- getHybridSearch
1218
+ getHybridSearch,
1219
+ knowledgeTypeBoost,
1220
+ normalizeFTS5Rank,
1221
+ projectMatchScore,
1222
+ recencyScore,
1223
+ stalenessPenalty
512
1224
  };