kiro-memory 1.6.0 → 1.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +105 -99
- package/package.json +14 -7
- package/plugin/dist/cli/contextkit.js +2661 -497
- package/plugin/dist/hooks/agentSpawn.js +1455 -189
- package/plugin/dist/hooks/kiro-hooks.js +1389 -156
- package/plugin/dist/hooks/postToolUse.js +1451 -174
- package/plugin/dist/hooks/stop.js +1426 -170
- package/plugin/dist/hooks/userPromptSubmit.js +1418 -170
- package/plugin/dist/index.js +1406 -172
- package/plugin/dist/sdk/index.js +1389 -155
- package/plugin/dist/servers/mcp-server.js +203 -2
- package/plugin/dist/services/search/EmbeddingService.js +363 -0
- package/plugin/dist/services/search/HybridSearch.js +703 -151
- package/plugin/dist/services/search/ScoringEngine.js +75 -0
- package/plugin/dist/services/search/VectorSearch.js +512 -0
- package/plugin/dist/services/search/index.js +776 -64
- package/plugin/dist/services/sqlite/Database.js +49 -0
- package/plugin/dist/services/sqlite/Observations.js +70 -6
- package/plugin/dist/services/sqlite/Search.js +92 -8
- package/plugin/dist/services/sqlite/Summaries.js +8 -5
- package/plugin/dist/services/sqlite/index.js +384 -18
- package/plugin/dist/types/worker-types.js +6 -0
- package/plugin/dist/viewer.js +369 -69
- package/plugin/dist/worker-service.js +1496 -148
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import { createRequire } from 'module';const require = createRequire(import.meta.url);
|
|
2
|
+
|
|
3
|
+
// src/services/search/ScoringEngine.ts
|
|
4
|
+
var SEARCH_WEIGHTS = {
|
|
5
|
+
semantic: 0.4,
|
|
6
|
+
fts5: 0.3,
|
|
7
|
+
recency: 0.2,
|
|
8
|
+
projectMatch: 0.1
|
|
9
|
+
};
|
|
10
|
+
var CONTEXT_WEIGHTS = {
|
|
11
|
+
semantic: 0,
|
|
12
|
+
fts5: 0,
|
|
13
|
+
recency: 0.7,
|
|
14
|
+
projectMatch: 0.3
|
|
15
|
+
};
|
|
16
|
+
function recencyScore(createdAtEpoch, halfLifeHours = 168) {
|
|
17
|
+
if (!createdAtEpoch || createdAtEpoch <= 0) return 0;
|
|
18
|
+
const nowMs = Date.now();
|
|
19
|
+
const ageMs = nowMs - createdAtEpoch;
|
|
20
|
+
if (ageMs <= 0) return 1;
|
|
21
|
+
const ageHours = ageMs / (1e3 * 60 * 60);
|
|
22
|
+
return Math.exp(-ageHours * Math.LN2 / halfLifeHours);
|
|
23
|
+
}
|
|
24
|
+
function normalizeFTS5Rank(rank, allRanks) {
|
|
25
|
+
if (allRanks.length === 0) return 0;
|
|
26
|
+
if (allRanks.length === 1) return 1;
|
|
27
|
+
const minRank = Math.min(...allRanks);
|
|
28
|
+
const maxRank = Math.max(...allRanks);
|
|
29
|
+
if (minRank === maxRank) return 1;
|
|
30
|
+
return (maxRank - rank) / (maxRank - minRank);
|
|
31
|
+
}
|
|
32
|
+
function projectMatchScore(itemProject, targetProject) {
|
|
33
|
+
if (!itemProject || !targetProject) return 0;
|
|
34
|
+
return itemProject.toLowerCase() === targetProject.toLowerCase() ? 1 : 0;
|
|
35
|
+
}
|
|
36
|
+
function computeCompositeScore(signals, weights) {
|
|
37
|
+
return signals.semantic * weights.semantic + signals.fts5 * weights.fts5 + signals.recency * weights.recency + signals.projectMatch * weights.projectMatch;
|
|
38
|
+
}
|
|
39
|
+
function accessRecencyScore(lastAccessedEpoch, halfLifeHours = 48) {
|
|
40
|
+
if (!lastAccessedEpoch || lastAccessedEpoch <= 0) return 0;
|
|
41
|
+
const nowMs = Date.now();
|
|
42
|
+
const ageMs = nowMs - lastAccessedEpoch;
|
|
43
|
+
if (ageMs <= 0) return 1;
|
|
44
|
+
const ageHours = ageMs / (1e3 * 60 * 60);
|
|
45
|
+
return Math.exp(-ageHours * Math.LN2 / halfLifeHours);
|
|
46
|
+
}
|
|
47
|
+
function stalenessPenalty(isStale) {
|
|
48
|
+
return isStale === 1 ? 0.5 : 1;
|
|
49
|
+
}
|
|
50
|
+
var KNOWLEDGE_TYPE_BOOST = {
|
|
51
|
+
constraint: 1.3,
|
|
52
|
+
decision: 1.25,
|
|
53
|
+
heuristic: 1.15,
|
|
54
|
+
rejected: 1.1
|
|
55
|
+
};
|
|
56
|
+
function knowledgeTypeBoost(type) {
|
|
57
|
+
return KNOWLEDGE_TYPE_BOOST[type] ?? 1;
|
|
58
|
+
}
|
|
59
|
+
function estimateTokens(text) {
|
|
60
|
+
if (!text) return 0;
|
|
61
|
+
return Math.ceil(text.length / 4);
|
|
62
|
+
}
|
|
63
|
+
export {
|
|
64
|
+
CONTEXT_WEIGHTS,
|
|
65
|
+
KNOWLEDGE_TYPE_BOOST,
|
|
66
|
+
SEARCH_WEIGHTS,
|
|
67
|
+
accessRecencyScore,
|
|
68
|
+
computeCompositeScore,
|
|
69
|
+
estimateTokens,
|
|
70
|
+
knowledgeTypeBoost,
|
|
71
|
+
normalizeFTS5Rank,
|
|
72
|
+
projectMatchScore,
|
|
73
|
+
recencyScore,
|
|
74
|
+
stalenessPenalty
|
|
75
|
+
};
|
|
@@ -0,0 +1,512 @@
|
|
|
1
|
+
import { createRequire } from 'module';const require = createRequire(import.meta.url);
|
|
2
|
+
|
|
3
|
+
// src/utils/logger.ts
|
|
4
|
+
import { appendFileSync, existsSync, mkdirSync, readFileSync } from "fs";
|
|
5
|
+
import { join } from "path";
|
|
6
|
+
import { homedir } from "os";
|
|
7
|
+
var LogLevel = /* @__PURE__ */ ((LogLevel2) => {
|
|
8
|
+
LogLevel2[LogLevel2["DEBUG"] = 0] = "DEBUG";
|
|
9
|
+
LogLevel2[LogLevel2["INFO"] = 1] = "INFO";
|
|
10
|
+
LogLevel2[LogLevel2["WARN"] = 2] = "WARN";
|
|
11
|
+
LogLevel2[LogLevel2["ERROR"] = 3] = "ERROR";
|
|
12
|
+
LogLevel2[LogLevel2["SILENT"] = 4] = "SILENT";
|
|
13
|
+
return LogLevel2;
|
|
14
|
+
})(LogLevel || {});
|
|
15
|
+
var DEFAULT_DATA_DIR = join(homedir(), ".contextkit");
|
|
16
|
+
var Logger = class {
|
|
17
|
+
level = null;
|
|
18
|
+
useColor;
|
|
19
|
+
logFilePath = null;
|
|
20
|
+
logFileInitialized = false;
|
|
21
|
+
constructor() {
|
|
22
|
+
this.useColor = process.stdout.isTTY ?? false;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Initialize log file path and ensure directory exists (lazy initialization)
|
|
26
|
+
*/
|
|
27
|
+
ensureLogFileInitialized() {
|
|
28
|
+
if (this.logFileInitialized) return;
|
|
29
|
+
this.logFileInitialized = true;
|
|
30
|
+
try {
|
|
31
|
+
const logsDir = join(DEFAULT_DATA_DIR, "logs");
|
|
32
|
+
if (!existsSync(logsDir)) {
|
|
33
|
+
mkdirSync(logsDir, { recursive: true });
|
|
34
|
+
}
|
|
35
|
+
const date = (/* @__PURE__ */ new Date()).toISOString().split("T")[0];
|
|
36
|
+
this.logFilePath = join(logsDir, `kiro-memory-${date}.log`);
|
|
37
|
+
} catch (error) {
|
|
38
|
+
console.error("[LOGGER] Failed to initialize log file:", error);
|
|
39
|
+
this.logFilePath = null;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Lazy-load log level from settings file
|
|
44
|
+
*/
|
|
45
|
+
getLevel() {
|
|
46
|
+
if (this.level === null) {
|
|
47
|
+
try {
|
|
48
|
+
const settingsPath = join(DEFAULT_DATA_DIR, "settings.json");
|
|
49
|
+
if (existsSync(settingsPath)) {
|
|
50
|
+
const settingsData = readFileSync(settingsPath, "utf-8");
|
|
51
|
+
const settings = JSON.parse(settingsData);
|
|
52
|
+
const envLevel = (settings.KIRO_MEMORY_LOG_LEVEL || settings.CONTEXTKIT_LOG_LEVEL || "INFO").toUpperCase();
|
|
53
|
+
this.level = LogLevel[envLevel] ?? 1 /* INFO */;
|
|
54
|
+
} else {
|
|
55
|
+
this.level = 1 /* INFO */;
|
|
56
|
+
}
|
|
57
|
+
} catch (error) {
|
|
58
|
+
this.level = 1 /* INFO */;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
return this.level;
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Create correlation ID for tracking an observation through the pipeline
|
|
65
|
+
*/
|
|
66
|
+
correlationId(sessionId, observationNum) {
|
|
67
|
+
return `obs-${sessionId}-${observationNum}`;
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Create session correlation ID
|
|
71
|
+
*/
|
|
72
|
+
sessionId(sessionId) {
|
|
73
|
+
return `session-${sessionId}`;
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Format data for logging - create compact summaries instead of full dumps
|
|
77
|
+
*/
|
|
78
|
+
formatData(data) {
|
|
79
|
+
if (data === null || data === void 0) return "";
|
|
80
|
+
if (typeof data === "string") return data;
|
|
81
|
+
if (typeof data === "number") return data.toString();
|
|
82
|
+
if (typeof data === "boolean") return data.toString();
|
|
83
|
+
if (typeof data === "object") {
|
|
84
|
+
if (data instanceof Error) {
|
|
85
|
+
return this.getLevel() === 0 /* DEBUG */ ? `${data.message}
|
|
86
|
+
${data.stack}` : data.message;
|
|
87
|
+
}
|
|
88
|
+
if (Array.isArray(data)) {
|
|
89
|
+
return `[${data.length} items]`;
|
|
90
|
+
}
|
|
91
|
+
const keys = Object.keys(data);
|
|
92
|
+
if (keys.length === 0) return "{}";
|
|
93
|
+
if (keys.length <= 3) {
|
|
94
|
+
return JSON.stringify(data);
|
|
95
|
+
}
|
|
96
|
+
return `{${keys.length} keys: ${keys.slice(0, 3).join(", ")}...}`;
|
|
97
|
+
}
|
|
98
|
+
return String(data);
|
|
99
|
+
}
|
|
100
|
+
/**
|
|
101
|
+
* Format timestamp in local timezone (YYYY-MM-DD HH:MM:SS.mmm)
|
|
102
|
+
*/
|
|
103
|
+
formatTimestamp(date) {
|
|
104
|
+
const year = date.getFullYear();
|
|
105
|
+
const month = String(date.getMonth() + 1).padStart(2, "0");
|
|
106
|
+
const day = String(date.getDate()).padStart(2, "0");
|
|
107
|
+
const hours = String(date.getHours()).padStart(2, "0");
|
|
108
|
+
const minutes = String(date.getMinutes()).padStart(2, "0");
|
|
109
|
+
const seconds = String(date.getSeconds()).padStart(2, "0");
|
|
110
|
+
const ms = String(date.getMilliseconds()).padStart(3, "0");
|
|
111
|
+
return `${year}-${month}-${day} ${hours}:${minutes}:${seconds}.${ms}`;
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
114
|
+
* Core logging method
|
|
115
|
+
*/
|
|
116
|
+
log(level, component, message, context, data) {
|
|
117
|
+
if (level < this.getLevel()) return;
|
|
118
|
+
this.ensureLogFileInitialized();
|
|
119
|
+
const timestamp = this.formatTimestamp(/* @__PURE__ */ new Date());
|
|
120
|
+
const levelStr = LogLevel[level].padEnd(5);
|
|
121
|
+
const componentStr = component.padEnd(6);
|
|
122
|
+
let correlationStr = "";
|
|
123
|
+
if (context?.correlationId) {
|
|
124
|
+
correlationStr = `[${context.correlationId}] `;
|
|
125
|
+
} else if (context?.sessionId) {
|
|
126
|
+
correlationStr = `[session-${context.sessionId}] `;
|
|
127
|
+
}
|
|
128
|
+
let dataStr = "";
|
|
129
|
+
if (data !== void 0 && data !== null) {
|
|
130
|
+
if (data instanceof Error) {
|
|
131
|
+
dataStr = this.getLevel() === 0 /* DEBUG */ ? `
|
|
132
|
+
${data.message}
|
|
133
|
+
${data.stack}` : ` ${data.message}`;
|
|
134
|
+
} else if (this.getLevel() === 0 /* DEBUG */ && typeof data === "object") {
|
|
135
|
+
dataStr = "\n" + JSON.stringify(data, null, 2);
|
|
136
|
+
} else {
|
|
137
|
+
dataStr = " " + this.formatData(data);
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
let contextStr = "";
|
|
141
|
+
if (context) {
|
|
142
|
+
const { sessionId, memorySessionId, correlationId, ...rest } = context;
|
|
143
|
+
if (Object.keys(rest).length > 0) {
|
|
144
|
+
const pairs = Object.entries(rest).map(([k, v]) => `${k}=${v}`);
|
|
145
|
+
contextStr = ` {${pairs.join(", ")}}`;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
const logLine = `[${timestamp}] [${levelStr}] [${componentStr}] ${correlationStr}${message}${contextStr}${dataStr}`;
|
|
149
|
+
if (this.logFilePath) {
|
|
150
|
+
try {
|
|
151
|
+
appendFileSync(this.logFilePath, logLine + "\n", "utf8");
|
|
152
|
+
} catch (error) {
|
|
153
|
+
process.stderr.write(`[LOGGER] Failed to write to log file: ${error}
|
|
154
|
+
`);
|
|
155
|
+
}
|
|
156
|
+
} else {
|
|
157
|
+
process.stderr.write(logLine + "\n");
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
// Public logging methods
|
|
161
|
+
debug(component, message, context, data) {
|
|
162
|
+
this.log(0 /* DEBUG */, component, message, context, data);
|
|
163
|
+
}
|
|
164
|
+
info(component, message, context, data) {
|
|
165
|
+
this.log(1 /* INFO */, component, message, context, data);
|
|
166
|
+
}
|
|
167
|
+
warn(component, message, context, data) {
|
|
168
|
+
this.log(2 /* WARN */, component, message, context, data);
|
|
169
|
+
}
|
|
170
|
+
error(component, message, context, data) {
|
|
171
|
+
this.log(3 /* ERROR */, component, message, context, data);
|
|
172
|
+
}
|
|
173
|
+
/**
|
|
174
|
+
* Log data flow: input → processing
|
|
175
|
+
*/
|
|
176
|
+
dataIn(component, message, context, data) {
|
|
177
|
+
this.info(component, `\u2192 ${message}`, context, data);
|
|
178
|
+
}
|
|
179
|
+
/**
|
|
180
|
+
* Log data flow: processing → output
|
|
181
|
+
*/
|
|
182
|
+
dataOut(component, message, context, data) {
|
|
183
|
+
this.info(component, `\u2190 ${message}`, context, data);
|
|
184
|
+
}
|
|
185
|
+
/**
|
|
186
|
+
* Log successful completion
|
|
187
|
+
*/
|
|
188
|
+
success(component, message, context, data) {
|
|
189
|
+
this.info(component, `\u2713 ${message}`, context, data);
|
|
190
|
+
}
|
|
191
|
+
/**
|
|
192
|
+
* Log failure
|
|
193
|
+
*/
|
|
194
|
+
failure(component, message, context, data) {
|
|
195
|
+
this.error(component, `\u2717 ${message}`, context, data);
|
|
196
|
+
}
|
|
197
|
+
/**
|
|
198
|
+
* Log timing information
|
|
199
|
+
*/
|
|
200
|
+
timing(component, message, durationMs, context) {
|
|
201
|
+
this.info(component, `\u23F1 ${message}`, context, { duration: `${durationMs}ms` });
|
|
202
|
+
}
|
|
203
|
+
/**
|
|
204
|
+
* Happy Path Error - logs when the expected "happy path" fails but we have a fallback
|
|
205
|
+
*/
|
|
206
|
+
happyPathError(component, message, context, data, fallback = "") {
|
|
207
|
+
const stack = new Error().stack || "";
|
|
208
|
+
const stackLines = stack.split("\n");
|
|
209
|
+
const callerLine = stackLines[2] || "";
|
|
210
|
+
const callerMatch = callerLine.match(/at\s+(?:.*\s+)?\(?([^:]+):(\d+):(\d+)\)?/);
|
|
211
|
+
const location = callerMatch ? `${callerMatch[1].split("/").pop()}:${callerMatch[2]}` : "unknown";
|
|
212
|
+
const enhancedContext = {
|
|
213
|
+
...context,
|
|
214
|
+
location
|
|
215
|
+
};
|
|
216
|
+
this.warn(component, `[HAPPY-PATH] ${message}`, enhancedContext, data);
|
|
217
|
+
return fallback;
|
|
218
|
+
}
|
|
219
|
+
};
|
|
220
|
+
var logger = new Logger();
|
|
221
|
+
|
|
222
|
+
// src/services/search/EmbeddingService.ts
|
|
223
|
+
var EmbeddingService = class {
|
|
224
|
+
provider = null;
|
|
225
|
+
model = null;
|
|
226
|
+
initialized = false;
|
|
227
|
+
initializing = null;
|
|
228
|
+
/**
|
|
229
|
+
* Inizializza il servizio di embedding.
|
|
230
|
+
* Tenta fastembed, poi @huggingface/transformers, poi fallback a null.
|
|
231
|
+
*/
|
|
232
|
+
async initialize() {
|
|
233
|
+
if (this.initialized) return this.provider !== null;
|
|
234
|
+
if (this.initializing) return this.initializing;
|
|
235
|
+
this.initializing = this._doInitialize();
|
|
236
|
+
const result = await this.initializing;
|
|
237
|
+
this.initializing = null;
|
|
238
|
+
return result;
|
|
239
|
+
}
|
|
240
|
+
async _doInitialize() {
|
|
241
|
+
try {
|
|
242
|
+
const fastembed = await import("fastembed");
|
|
243
|
+
const EmbeddingModel = fastembed.EmbeddingModel || fastembed.default?.EmbeddingModel;
|
|
244
|
+
const FlagEmbedding = fastembed.FlagEmbedding || fastembed.default?.FlagEmbedding;
|
|
245
|
+
if (FlagEmbedding && EmbeddingModel) {
|
|
246
|
+
this.model = await FlagEmbedding.init({
|
|
247
|
+
model: EmbeddingModel.BGESmallENV15
|
|
248
|
+
});
|
|
249
|
+
this.provider = "fastembed";
|
|
250
|
+
this.initialized = true;
|
|
251
|
+
logger.info("EMBEDDING", "Inizializzato con fastembed (BGE-small-en-v1.5)");
|
|
252
|
+
return true;
|
|
253
|
+
}
|
|
254
|
+
} catch (error) {
|
|
255
|
+
logger.debug("EMBEDDING", `fastembed non disponibile: ${error}`);
|
|
256
|
+
}
|
|
257
|
+
try {
|
|
258
|
+
const transformers = await import("@huggingface/transformers");
|
|
259
|
+
const pipeline = transformers.pipeline || transformers.default?.pipeline;
|
|
260
|
+
if (pipeline) {
|
|
261
|
+
this.model = await pipeline("feature-extraction", "Xenova/all-MiniLM-L6-v2", {
|
|
262
|
+
quantized: true
|
|
263
|
+
});
|
|
264
|
+
this.provider = "transformers";
|
|
265
|
+
this.initialized = true;
|
|
266
|
+
logger.info("EMBEDDING", "Inizializzato con @huggingface/transformers (all-MiniLM-L6-v2)");
|
|
267
|
+
return true;
|
|
268
|
+
}
|
|
269
|
+
} catch (error) {
|
|
270
|
+
logger.debug("EMBEDDING", `@huggingface/transformers non disponibile: ${error}`);
|
|
271
|
+
}
|
|
272
|
+
this.provider = null;
|
|
273
|
+
this.initialized = true;
|
|
274
|
+
logger.warn("EMBEDDING", "Nessun provider embedding disponibile, ricerca semantica disabilitata");
|
|
275
|
+
return false;
|
|
276
|
+
}
|
|
277
|
+
/**
|
|
278
|
+
* Genera embedding per un singolo testo.
|
|
279
|
+
* Ritorna Float32Array con 384 dimensioni, o null se non disponibile.
|
|
280
|
+
*/
|
|
281
|
+
async embed(text) {
|
|
282
|
+
if (!this.initialized) await this.initialize();
|
|
283
|
+
if (!this.provider || !this.model) return null;
|
|
284
|
+
try {
|
|
285
|
+
const truncated = text.substring(0, 2e3);
|
|
286
|
+
if (this.provider === "fastembed") {
|
|
287
|
+
return await this._embedFastembed(truncated);
|
|
288
|
+
} else if (this.provider === "transformers") {
|
|
289
|
+
return await this._embedTransformers(truncated);
|
|
290
|
+
}
|
|
291
|
+
} catch (error) {
|
|
292
|
+
logger.error("EMBEDDING", `Errore generazione embedding: ${error}`);
|
|
293
|
+
}
|
|
294
|
+
return null;
|
|
295
|
+
}
|
|
296
|
+
/**
|
|
297
|
+
* Genera embeddings in batch.
|
|
298
|
+
*/
|
|
299
|
+
async embedBatch(texts) {
|
|
300
|
+
if (!this.initialized) await this.initialize();
|
|
301
|
+
if (!this.provider || !this.model) return texts.map(() => null);
|
|
302
|
+
const results = [];
|
|
303
|
+
for (const text of texts) {
|
|
304
|
+
try {
|
|
305
|
+
const embedding = await this.embed(text);
|
|
306
|
+
results.push(embedding);
|
|
307
|
+
} catch {
|
|
308
|
+
results.push(null);
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
return results;
|
|
312
|
+
}
|
|
313
|
+
/**
|
|
314
|
+
* Verifica se il servizio è disponibile.
|
|
315
|
+
*/
|
|
316
|
+
isAvailable() {
|
|
317
|
+
return this.initialized && this.provider !== null;
|
|
318
|
+
}
|
|
319
|
+
/**
|
|
320
|
+
* Nome del provider attivo.
|
|
321
|
+
*/
|
|
322
|
+
getProvider() {
|
|
323
|
+
return this.provider;
|
|
324
|
+
}
|
|
325
|
+
/**
|
|
326
|
+
* Dimensioni del vettore embedding.
|
|
327
|
+
*/
|
|
328
|
+
getDimensions() {
|
|
329
|
+
return 384;
|
|
330
|
+
}
|
|
331
|
+
// --- Provider specifici ---
|
|
332
|
+
async _embedFastembed(text) {
|
|
333
|
+
const embeddings = this.model.embed([text], 1);
|
|
334
|
+
for await (const batch of embeddings) {
|
|
335
|
+
if (batch && batch.length > 0) {
|
|
336
|
+
const vec = batch[0];
|
|
337
|
+
return vec instanceof Float32Array ? vec : new Float32Array(vec);
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
return null;
|
|
341
|
+
}
|
|
342
|
+
async _embedTransformers(text) {
|
|
343
|
+
const output = await this.model(text, {
|
|
344
|
+
pooling: "mean",
|
|
345
|
+
normalize: true
|
|
346
|
+
});
|
|
347
|
+
if (output?.data) {
|
|
348
|
+
return output.data instanceof Float32Array ? output.data : new Float32Array(output.data);
|
|
349
|
+
}
|
|
350
|
+
return null;
|
|
351
|
+
}
|
|
352
|
+
};
|
|
353
|
+
var embeddingService = null;
|
|
354
|
+
function getEmbeddingService() {
|
|
355
|
+
if (!embeddingService) {
|
|
356
|
+
embeddingService = new EmbeddingService();
|
|
357
|
+
}
|
|
358
|
+
return embeddingService;
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
// src/services/search/VectorSearch.ts
|
|
362
|
+
function cosineSimilarity(a, b) {
|
|
363
|
+
if (a.length !== b.length) return 0;
|
|
364
|
+
let dotProduct = 0;
|
|
365
|
+
let normA = 0;
|
|
366
|
+
let normB = 0;
|
|
367
|
+
for (let i = 0; i < a.length; i++) {
|
|
368
|
+
dotProduct += a[i] * b[i];
|
|
369
|
+
normA += a[i] * a[i];
|
|
370
|
+
normB += b[i] * b[i];
|
|
371
|
+
}
|
|
372
|
+
const denominator = Math.sqrt(normA) * Math.sqrt(normB);
|
|
373
|
+
if (denominator === 0) return 0;
|
|
374
|
+
return dotProduct / denominator;
|
|
375
|
+
}
|
|
376
|
+
function float32ToBuffer(arr) {
|
|
377
|
+
return Buffer.from(arr.buffer, arr.byteOffset, arr.byteLength);
|
|
378
|
+
}
|
|
379
|
+
function bufferToFloat32(buf) {
|
|
380
|
+
const arrayBuffer = buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
|
|
381
|
+
return new Float32Array(arrayBuffer);
|
|
382
|
+
}
|
|
383
|
+
var VectorSearch = class {
|
|
384
|
+
/**
|
|
385
|
+
* Ricerca semantica: calcola cosine similarity tra query e tutti gli embeddings.
|
|
386
|
+
*/
|
|
387
|
+
async search(db, queryEmbedding, options = {}) {
|
|
388
|
+
const limit = options.limit || 10;
|
|
389
|
+
const threshold = options.threshold || 0.3;
|
|
390
|
+
try {
|
|
391
|
+
let sql = `
|
|
392
|
+
SELECT e.observation_id, e.embedding,
|
|
393
|
+
o.title, o.text, o.type, o.project, o.created_at, o.created_at_epoch
|
|
394
|
+
FROM observation_embeddings e
|
|
395
|
+
JOIN observations o ON o.id = e.observation_id
|
|
396
|
+
`;
|
|
397
|
+
const params = [];
|
|
398
|
+
if (options.project) {
|
|
399
|
+
sql += " WHERE o.project = ?";
|
|
400
|
+
params.push(options.project);
|
|
401
|
+
}
|
|
402
|
+
const rows = db.query(sql).all(...params);
|
|
403
|
+
const scored = [];
|
|
404
|
+
for (const row of rows) {
|
|
405
|
+
const embedding = bufferToFloat32(row.embedding);
|
|
406
|
+
const similarity = cosineSimilarity(queryEmbedding, embedding);
|
|
407
|
+
if (similarity >= threshold) {
|
|
408
|
+
scored.push({
|
|
409
|
+
id: row.observation_id,
|
|
410
|
+
observationId: row.observation_id,
|
|
411
|
+
similarity,
|
|
412
|
+
title: row.title,
|
|
413
|
+
text: row.text,
|
|
414
|
+
type: row.type,
|
|
415
|
+
project: row.project,
|
|
416
|
+
created_at: row.created_at,
|
|
417
|
+
created_at_epoch: row.created_at_epoch
|
|
418
|
+
});
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
scored.sort((a, b) => b.similarity - a.similarity);
|
|
422
|
+
return scored.slice(0, limit);
|
|
423
|
+
} catch (error) {
|
|
424
|
+
logger.error("VECTOR", `Errore ricerca vettoriale: ${error}`);
|
|
425
|
+
return [];
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
/**
|
|
429
|
+
* Salva embedding per un'osservazione.
|
|
430
|
+
*/
|
|
431
|
+
async storeEmbedding(db, observationId, embedding, model) {
|
|
432
|
+
try {
|
|
433
|
+
const blob = float32ToBuffer(embedding);
|
|
434
|
+
db.query(`
|
|
435
|
+
INSERT OR REPLACE INTO observation_embeddings
|
|
436
|
+
(observation_id, embedding, model, dimensions, created_at)
|
|
437
|
+
VALUES (?, ?, ?, ?, ?)
|
|
438
|
+
`).run(
|
|
439
|
+
observationId,
|
|
440
|
+
blob,
|
|
441
|
+
model,
|
|
442
|
+
embedding.length,
|
|
443
|
+
(/* @__PURE__ */ new Date()).toISOString()
|
|
444
|
+
);
|
|
445
|
+
logger.debug("VECTOR", `Embedding salvato per osservazione ${observationId}`);
|
|
446
|
+
} catch (error) {
|
|
447
|
+
logger.error("VECTOR", `Errore salvataggio embedding: ${error}`);
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
/**
|
|
451
|
+
* Genera embeddings per osservazioni che non li hanno ancora.
|
|
452
|
+
*/
|
|
453
|
+
async backfillEmbeddings(db, batchSize = 50) {
|
|
454
|
+
const embeddingService2 = getEmbeddingService();
|
|
455
|
+
if (!await embeddingService2.initialize()) {
|
|
456
|
+
logger.warn("VECTOR", "Embedding service non disponibile, backfill saltato");
|
|
457
|
+
return 0;
|
|
458
|
+
}
|
|
459
|
+
const rows = db.query(`
|
|
460
|
+
SELECT o.id, o.title, o.text, o.narrative, o.concepts
|
|
461
|
+
FROM observations o
|
|
462
|
+
LEFT JOIN observation_embeddings e ON e.observation_id = o.id
|
|
463
|
+
WHERE e.observation_id IS NULL
|
|
464
|
+
ORDER BY o.created_at_epoch DESC
|
|
465
|
+
LIMIT ?
|
|
466
|
+
`).all(batchSize);
|
|
467
|
+
if (rows.length === 0) return 0;
|
|
468
|
+
let count = 0;
|
|
469
|
+
const model = embeddingService2.getProvider() || "unknown";
|
|
470
|
+
for (const row of rows) {
|
|
471
|
+
const parts = [row.title];
|
|
472
|
+
if (row.text) parts.push(row.text);
|
|
473
|
+
if (row.narrative) parts.push(row.narrative);
|
|
474
|
+
if (row.concepts) parts.push(row.concepts);
|
|
475
|
+
const fullText = parts.join(" ").substring(0, 2e3);
|
|
476
|
+
const embedding = await embeddingService2.embed(fullText);
|
|
477
|
+
if (embedding) {
|
|
478
|
+
await this.storeEmbedding(db, row.id, embedding, model);
|
|
479
|
+
count++;
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
logger.info("VECTOR", `Backfill completato: ${count}/${rows.length} embeddings generati`);
|
|
483
|
+
return count;
|
|
484
|
+
}
|
|
485
|
+
/**
|
|
486
|
+
* Statistiche sugli embeddings.
|
|
487
|
+
*/
|
|
488
|
+
getStats(db) {
|
|
489
|
+
try {
|
|
490
|
+
const totalRow = db.query("SELECT COUNT(*) as count FROM observations").get();
|
|
491
|
+
const embeddedRow = db.query("SELECT COUNT(*) as count FROM observation_embeddings").get();
|
|
492
|
+
const total = totalRow?.count || 0;
|
|
493
|
+
const embedded = embeddedRow?.count || 0;
|
|
494
|
+
const percentage = total > 0 ? Math.round(embedded / total * 100) : 0;
|
|
495
|
+
return { total, embedded, percentage };
|
|
496
|
+
} catch {
|
|
497
|
+
return { total: 0, embedded: 0, percentage: 0 };
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
};
|
|
501
|
+
var vectorSearch = null;
|
|
502
|
+
function getVectorSearch() {
|
|
503
|
+
if (!vectorSearch) {
|
|
504
|
+
vectorSearch = new VectorSearch();
|
|
505
|
+
}
|
|
506
|
+
return vectorSearch;
|
|
507
|
+
}
|
|
508
|
+
export {
|
|
509
|
+
VectorSearch,
|
|
510
|
+
cosineSimilarity,
|
|
511
|
+
getVectorSearch
|
|
512
|
+
};
|