@digilogiclabs/platform-core 1.1.1 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{ConsoleEmail-CYPpn2sR.d.mts → ConsoleEmail-hUDFsKoA.d.mts} +1128 -40
- package/dist/{ConsoleEmail-CYPpn2sR.d.ts → ConsoleEmail-hUDFsKoA.d.ts} +1128 -40
- package/dist/index.d.mts +650 -918
- package/dist/index.d.ts +650 -918
- package/dist/index.js +10205 -5223
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +10188 -5222
- package/dist/index.mjs.map +1 -1
- package/dist/migrate.js +1101 -0
- package/dist/migrate.js.map +1 -0
- package/dist/testing.d.mts +2 -2
- package/dist/testing.d.ts +2 -2
- package/dist/testing.js +1100 -4
- package/dist/testing.js.map +1 -1
- package/dist/testing.mjs +1102 -4
- package/dist/testing.mjs.map +1 -1
- package/package.json +35 -9
package/dist/testing.mjs
CHANGED
|
@@ -1,3 +1,1018 @@
|
|
|
1
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
2
|
+
var __esm = (fn, res) => function __init() {
|
|
3
|
+
return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
|
|
4
|
+
};
|
|
5
|
+
|
|
6
|
+
// src/interfaces/IAI.ts
|
|
7
|
+
var MemoryAI;
|
|
8
|
+
var init_IAI = __esm({
|
|
9
|
+
"src/interfaces/IAI.ts"() {
|
|
10
|
+
"use strict";
|
|
11
|
+
MemoryAI = class {
|
|
12
|
+
constructor(config = {}) {
|
|
13
|
+
this.config = config;
|
|
14
|
+
this.models = config.models || [
|
|
15
|
+
{
|
|
16
|
+
modelId: "gpt-4",
|
|
17
|
+
provider: "openai",
|
|
18
|
+
capabilities: ["chat", "completion"],
|
|
19
|
+
maxContextTokens: 128e3,
|
|
20
|
+
maxOutputTokens: 4096,
|
|
21
|
+
inputCostPer1K: 0.03,
|
|
22
|
+
outputCostPer1K: 0.06,
|
|
23
|
+
supportsStreaming: true,
|
|
24
|
+
supportsTools: true,
|
|
25
|
+
supportsVision: true
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
modelId: "claude-3-opus",
|
|
29
|
+
provider: "anthropic",
|
|
30
|
+
capabilities: ["chat"],
|
|
31
|
+
maxContextTokens: 2e5,
|
|
32
|
+
maxOutputTokens: 4096,
|
|
33
|
+
inputCostPer1K: 0.015,
|
|
34
|
+
outputCostPer1K: 0.075,
|
|
35
|
+
supportsStreaming: true,
|
|
36
|
+
supportsTools: true,
|
|
37
|
+
supportsVision: true
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
modelId: "text-embedding-3-small",
|
|
41
|
+
provider: "openai",
|
|
42
|
+
capabilities: ["embedding"],
|
|
43
|
+
maxContextTokens: 8191,
|
|
44
|
+
maxOutputTokens: 0,
|
|
45
|
+
inputCostPer1K: 2e-5,
|
|
46
|
+
outputCostPer1K: 0,
|
|
47
|
+
supportsStreaming: false,
|
|
48
|
+
supportsTools: false,
|
|
49
|
+
supportsVision: false
|
|
50
|
+
}
|
|
51
|
+
];
|
|
52
|
+
}
|
|
53
|
+
models = [];
|
|
54
|
+
responses = /* @__PURE__ */ new Map();
|
|
55
|
+
embeddings = /* @__PURE__ */ new Map();
|
|
56
|
+
requestLog = [];
|
|
57
|
+
// ─────────────────────────────────────────────────────────────
|
|
58
|
+
// Test Helpers
|
|
59
|
+
// ─────────────────────────────────────────────────────────────
|
|
60
|
+
setResponse(key, response) {
|
|
61
|
+
this.responses.set(key, response);
|
|
62
|
+
}
|
|
63
|
+
setEmbedding(text, embedding) {
|
|
64
|
+
this.embeddings.set(text, embedding);
|
|
65
|
+
}
|
|
66
|
+
getRequestLog() {
|
|
67
|
+
return [...this.requestLog];
|
|
68
|
+
}
|
|
69
|
+
clearRequestLog() {
|
|
70
|
+
this.requestLog = [];
|
|
71
|
+
}
|
|
72
|
+
// ─────────────────────────────────────────────────────────────
|
|
73
|
+
// Chat Operations
|
|
74
|
+
// ─────────────────────────────────────────────────────────────
|
|
75
|
+
async chat(request) {
|
|
76
|
+
this.requestLog.push({ type: "chat", request, timestamp: /* @__PURE__ */ new Date() });
|
|
77
|
+
const model = request.model || this.config.defaultChatModel || "gpt-4";
|
|
78
|
+
const lastMessage = request.messages[request.messages.length - 1];
|
|
79
|
+
const key = `${model}:${lastMessage?.content}`;
|
|
80
|
+
if (this.responses.has(key)) {
|
|
81
|
+
return this.responses.get(key);
|
|
82
|
+
}
|
|
83
|
+
const response = {
|
|
84
|
+
id: `chatcmpl-${Date.now()}`,
|
|
85
|
+
model,
|
|
86
|
+
provider: "openai",
|
|
87
|
+
choices: [
|
|
88
|
+
{
|
|
89
|
+
index: 0,
|
|
90
|
+
message: {
|
|
91
|
+
role: "assistant",
|
|
92
|
+
content: `Mock response to: ${lastMessage?.content || "empty"}`
|
|
93
|
+
},
|
|
94
|
+
finishReason: "stop"
|
|
95
|
+
}
|
|
96
|
+
],
|
|
97
|
+
usage: {
|
|
98
|
+
promptTokens: this.estimateTokensSync(
|
|
99
|
+
request.messages.map((m) => m.content).join(" ")
|
|
100
|
+
),
|
|
101
|
+
completionTokens: 20,
|
|
102
|
+
totalTokens: 0,
|
|
103
|
+
estimatedCostUsd: 0
|
|
104
|
+
},
|
|
105
|
+
created: /* @__PURE__ */ new Date(),
|
|
106
|
+
finishReason: "stop"
|
|
107
|
+
};
|
|
108
|
+
response.usage.totalTokens = response.usage.promptTokens + response.usage.completionTokens;
|
|
109
|
+
response.usage.estimatedCostUsd = this.calculateCost(model, response.usage);
|
|
110
|
+
return response;
|
|
111
|
+
}
|
|
112
|
+
async *chatStream(request) {
|
|
113
|
+
this.requestLog.push({
|
|
114
|
+
type: "chatStream",
|
|
115
|
+
request,
|
|
116
|
+
timestamp: /* @__PURE__ */ new Date()
|
|
117
|
+
});
|
|
118
|
+
const model = request.model || this.config.defaultChatModel || "gpt-4";
|
|
119
|
+
const lastMessage = request.messages[request.messages.length - 1];
|
|
120
|
+
const responseText = `Mock streaming response to: ${lastMessage?.content || "empty"}`;
|
|
121
|
+
const words = responseText.split(" ");
|
|
122
|
+
for (let i = 0; i < words.length; i++) {
|
|
123
|
+
yield {
|
|
124
|
+
id: `chatcmpl-${Date.now()}`,
|
|
125
|
+
model,
|
|
126
|
+
provider: "openai",
|
|
127
|
+
delta: {
|
|
128
|
+
content: (i > 0 ? " " : "") + words[i],
|
|
129
|
+
role: i === 0 ? "assistant" : void 0
|
|
130
|
+
},
|
|
131
|
+
finishReason: i === words.length - 1 ? "stop" : void 0
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
async chatWithCallback(request, callback) {
|
|
136
|
+
let fullContent = "";
|
|
137
|
+
for await (const chunk of this.chatStream(request)) {
|
|
138
|
+
await callback(chunk);
|
|
139
|
+
if (chunk.delta.content) {
|
|
140
|
+
fullContent += chunk.delta.content;
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
const model = request.model || this.config.defaultChatModel || "gpt-4";
|
|
144
|
+
return {
|
|
145
|
+
id: `chatcmpl-${Date.now()}`,
|
|
146
|
+
model,
|
|
147
|
+
provider: "openai",
|
|
148
|
+
choices: [
|
|
149
|
+
{
|
|
150
|
+
index: 0,
|
|
151
|
+
message: { role: "assistant", content: fullContent },
|
|
152
|
+
finishReason: "stop"
|
|
153
|
+
}
|
|
154
|
+
],
|
|
155
|
+
usage: {
|
|
156
|
+
promptTokens: this.estimateTokensSync(
|
|
157
|
+
request.messages.map((m) => m.content).join(" ")
|
|
158
|
+
),
|
|
159
|
+
completionTokens: this.estimateTokensSync(fullContent),
|
|
160
|
+
totalTokens: 0,
|
|
161
|
+
estimatedCostUsd: 0
|
|
162
|
+
},
|
|
163
|
+
created: /* @__PURE__ */ new Date(),
|
|
164
|
+
finishReason: "stop"
|
|
165
|
+
};
|
|
166
|
+
}
|
|
167
|
+
// ─────────────────────────────────────────────────────────────
|
|
168
|
+
// Completion Operations
|
|
169
|
+
// ─────────────────────────────────────────────────────────────
|
|
170
|
+
async complete(request) {
|
|
171
|
+
this.requestLog.push({ type: "complete", request, timestamp: /* @__PURE__ */ new Date() });
|
|
172
|
+
const model = request.model || this.config.defaultCompletionModel || "gpt-4";
|
|
173
|
+
const key = `completion:${model}:${request.prompt}`;
|
|
174
|
+
if (this.responses.has(key)) {
|
|
175
|
+
return this.responses.get(key);
|
|
176
|
+
}
|
|
177
|
+
const response = {
|
|
178
|
+
id: `cmpl-${Date.now()}`,
|
|
179
|
+
model,
|
|
180
|
+
provider: "openai",
|
|
181
|
+
text: `Mock completion of: ${request.prompt.substring(0, 50)}...`,
|
|
182
|
+
usage: {
|
|
183
|
+
promptTokens: this.estimateTokensSync(request.prompt),
|
|
184
|
+
completionTokens: 20,
|
|
185
|
+
totalTokens: 0,
|
|
186
|
+
estimatedCostUsd: 0
|
|
187
|
+
},
|
|
188
|
+
created: /* @__PURE__ */ new Date(),
|
|
189
|
+
finishReason: "stop"
|
|
190
|
+
};
|
|
191
|
+
response.usage.totalTokens = response.usage.promptTokens + response.usage.completionTokens;
|
|
192
|
+
response.usage.estimatedCostUsd = this.calculateCost(model, response.usage);
|
|
193
|
+
return response;
|
|
194
|
+
}
|
|
195
|
+
async *completeStream(request) {
|
|
196
|
+
this.requestLog.push({
|
|
197
|
+
type: "completeStream",
|
|
198
|
+
request,
|
|
199
|
+
timestamp: /* @__PURE__ */ new Date()
|
|
200
|
+
});
|
|
201
|
+
const model = request.model || this.config.defaultCompletionModel || "gpt-4";
|
|
202
|
+
const responseText = `Mock streaming completion of: ${request.prompt.substring(0, 30)}...`;
|
|
203
|
+
const words = responseText.split(" ");
|
|
204
|
+
for (let i = 0; i < words.length; i++) {
|
|
205
|
+
yield {
|
|
206
|
+
id: `cmpl-${Date.now()}`,
|
|
207
|
+
model,
|
|
208
|
+
provider: "openai",
|
|
209
|
+
delta: {
|
|
210
|
+
content: (i > 0 ? " " : "") + words[i]
|
|
211
|
+
},
|
|
212
|
+
finishReason: i === words.length - 1 ? "stop" : void 0
|
|
213
|
+
};
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
// ─────────────────────────────────────────────────────────────
|
|
217
|
+
// Embedding Operations
|
|
218
|
+
// ─────────────────────────────────────────────────────────────
|
|
219
|
+
async embed(request) {
|
|
220
|
+
this.requestLog.push({ type: "embed", request, timestamp: /* @__PURE__ */ new Date() });
|
|
221
|
+
const model = request.model || this.config.defaultEmbeddingModel || "text-embedding-3-small";
|
|
222
|
+
const inputs = Array.isArray(request.input) ? request.input : [request.input];
|
|
223
|
+
const dimensions = request.dimensions || 1536;
|
|
224
|
+
const embeddings = inputs.map((text) => {
|
|
225
|
+
if (this.embeddings.has(text)) {
|
|
226
|
+
return this.embeddings.get(text);
|
|
227
|
+
}
|
|
228
|
+
return this.generateMockEmbedding(text, dimensions);
|
|
229
|
+
});
|
|
230
|
+
return {
|
|
231
|
+
id: `emb-${Date.now()}`,
|
|
232
|
+
model,
|
|
233
|
+
provider: "openai",
|
|
234
|
+
embeddings,
|
|
235
|
+
usage: {
|
|
236
|
+
promptTokens: inputs.reduce(
|
|
237
|
+
(sum, t) => sum + this.estimateTokensSync(t),
|
|
238
|
+
0
|
|
239
|
+
),
|
|
240
|
+
completionTokens: 0,
|
|
241
|
+
totalTokens: 0,
|
|
242
|
+
estimatedCostUsd: 0
|
|
243
|
+
},
|
|
244
|
+
created: /* @__PURE__ */ new Date()
|
|
245
|
+
};
|
|
246
|
+
}
|
|
247
|
+
async similarity(text1, text2, model) {
|
|
248
|
+
const response = await this.embed({ input: [text1, text2], model });
|
|
249
|
+
const [emb1, emb2] = response.embeddings;
|
|
250
|
+
return this.cosineSimilarity(emb1, emb2);
|
|
251
|
+
}
|
|
252
|
+
// ─────────────────────────────────────────────────────────────
|
|
253
|
+
// Model Management
|
|
254
|
+
// ─────────────────────────────────────────────────────────────
|
|
255
|
+
async listModels() {
|
|
256
|
+
return [...this.models];
|
|
257
|
+
}
|
|
258
|
+
async getModel(modelId) {
|
|
259
|
+
return this.models.find((m) => m.modelId === modelId) || null;
|
|
260
|
+
}
|
|
261
|
+
async supportsCapability(modelId, capability) {
|
|
262
|
+
const model = await this.getModel(modelId);
|
|
263
|
+
return model?.capabilities.includes(capability) ?? false;
|
|
264
|
+
}
|
|
265
|
+
async estimateTokens(text, _model) {
|
|
266
|
+
return this.estimateTokensSync(text);
|
|
267
|
+
}
|
|
268
|
+
async estimateCost(request) {
|
|
269
|
+
let model;
|
|
270
|
+
let inputTokens;
|
|
271
|
+
if ("messages" in request) {
|
|
272
|
+
model = request.model || this.config.defaultChatModel || "gpt-4";
|
|
273
|
+
inputTokens = this.estimateTokensSync(
|
|
274
|
+
request.messages.map((m) => m.content).join(" ")
|
|
275
|
+
);
|
|
276
|
+
} else if ("prompt" in request) {
|
|
277
|
+
model = request.model || this.config.defaultCompletionModel || "gpt-4";
|
|
278
|
+
inputTokens = this.estimateTokensSync(request.prompt);
|
|
279
|
+
} else {
|
|
280
|
+
model = request.model || this.config.defaultEmbeddingModel || "text-embedding-3-small";
|
|
281
|
+
const inputs = Array.isArray(request.input) ? request.input : [request.input];
|
|
282
|
+
inputTokens = inputs.reduce(
|
|
283
|
+
(sum, t) => sum + this.estimateTokensSync(t),
|
|
284
|
+
0
|
|
285
|
+
);
|
|
286
|
+
}
|
|
287
|
+
const modelConfig = await this.getModel(model);
|
|
288
|
+
if (!modelConfig) return 0;
|
|
289
|
+
const estimatedOutputTokens = "messages" in request || "prompt" in request ? 100 : 0;
|
|
290
|
+
return inputTokens / 1e3 * modelConfig.inputCostPer1K + estimatedOutputTokens / 1e3 * modelConfig.outputCostPer1K;
|
|
291
|
+
}
|
|
292
|
+
// ─────────────────────────────────────────────────────────────
|
|
293
|
+
// Health & Status
|
|
294
|
+
// ─────────────────────────────────────────────────────────────
|
|
295
|
+
async healthCheck() {
|
|
296
|
+
return {
|
|
297
|
+
healthy: true,
|
|
298
|
+
providers: {
|
|
299
|
+
openai: { available: true, latencyMs: 50 },
|
|
300
|
+
anthropic: { available: true, latencyMs: 60 },
|
|
301
|
+
google: { available: true, latencyMs: 55 },
|
|
302
|
+
azure: { available: false, error: "Not configured" },
|
|
303
|
+
bedrock: { available: false, error: "Not configured" },
|
|
304
|
+
custom: { available: false, error: "Not configured" }
|
|
305
|
+
}
|
|
306
|
+
};
|
|
307
|
+
}
|
|
308
|
+
// ─────────────────────────────────────────────────────────────
|
|
309
|
+
// Private Helpers
|
|
310
|
+
// ─────────────────────────────────────────────────────────────
|
|
311
|
+
estimateTokensSync(text) {
|
|
312
|
+
return Math.ceil(text.length / 4);
|
|
313
|
+
}
|
|
314
|
+
calculateCost(modelId, usage) {
|
|
315
|
+
const model = this.models.find((m) => m.modelId === modelId);
|
|
316
|
+
if (!model) return 0;
|
|
317
|
+
return usage.promptTokens / 1e3 * model.inputCostPer1K + usage.completionTokens / 1e3 * model.outputCostPer1K;
|
|
318
|
+
}
|
|
319
|
+
generateMockEmbedding(text, dimensions) {
|
|
320
|
+
const embedding = [];
|
|
321
|
+
let hash = 0;
|
|
322
|
+
for (let i = 0; i < text.length; i++) {
|
|
323
|
+
hash = (hash << 5) - hash + text.charCodeAt(i);
|
|
324
|
+
hash = hash & hash;
|
|
325
|
+
}
|
|
326
|
+
for (let i = 0; i < dimensions; i++) {
|
|
327
|
+
const seed = hash + i * 31;
|
|
328
|
+
embedding.push(Math.sin(seed) * 0.5);
|
|
329
|
+
}
|
|
330
|
+
const magnitude = Math.sqrt(embedding.reduce((sum, v) => sum + v * v, 0));
|
|
331
|
+
return embedding.map((v) => v / magnitude);
|
|
332
|
+
}
|
|
333
|
+
cosineSimilarity(a, b) {
|
|
334
|
+
let dotProduct = 0;
|
|
335
|
+
let normA = 0;
|
|
336
|
+
let normB = 0;
|
|
337
|
+
for (let i = 0; i < a.length; i++) {
|
|
338
|
+
dotProduct += a[i] * b[i];
|
|
339
|
+
normA += a[i] * a[i];
|
|
340
|
+
normB += b[i] * b[i];
|
|
341
|
+
}
|
|
342
|
+
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
|
|
343
|
+
}
|
|
344
|
+
};
|
|
345
|
+
}
|
|
346
|
+
});
|
|
347
|
+
|
|
348
|
+
// src/interfaces/IRAG.ts
|
|
349
|
+
var ChunkingPresets, MemoryRAG;
|
|
350
|
+
var init_IRAG = __esm({
|
|
351
|
+
"src/interfaces/IRAG.ts"() {
|
|
352
|
+
"use strict";
|
|
353
|
+
ChunkingPresets = {
|
|
354
|
+
default: {
|
|
355
|
+
strategy: "recursive",
|
|
356
|
+
chunkSize: 512,
|
|
357
|
+
chunkOverlap: 50,
|
|
358
|
+
minChunkSize: 100,
|
|
359
|
+
separators: ["\n\n", "\n", ". ", " "]
|
|
360
|
+
},
|
|
361
|
+
small: {
|
|
362
|
+
strategy: "sentence",
|
|
363
|
+
chunkSize: 256,
|
|
364
|
+
chunkOverlap: 25,
|
|
365
|
+
minChunkSize: 50
|
|
366
|
+
},
|
|
367
|
+
large: {
|
|
368
|
+
strategy: "paragraph",
|
|
369
|
+
chunkSize: 1024,
|
|
370
|
+
chunkOverlap: 100,
|
|
371
|
+
minChunkSize: 200
|
|
372
|
+
},
|
|
373
|
+
code: {
|
|
374
|
+
strategy: "recursive",
|
|
375
|
+
chunkSize: 1e3,
|
|
376
|
+
chunkOverlap: 100,
|
|
377
|
+
separators: ["\n\nclass ", "\n\nfunction ", "\n\ndef ", "\n\n", "\n"]
|
|
378
|
+
}
|
|
379
|
+
};
|
|
380
|
+
MemoryRAG = class {
|
|
381
|
+
constructor(config = {}) {
|
|
382
|
+
this.config = config;
|
|
383
|
+
}
|
|
384
|
+
collections = /* @__PURE__ */ new Map();
|
|
385
|
+
documents = /* @__PURE__ */ new Map();
|
|
386
|
+
chunks = /* @__PURE__ */ new Map();
|
|
387
|
+
pipelines = /* @__PURE__ */ new Map();
|
|
388
|
+
embeddings = /* @__PURE__ */ new Map();
|
|
389
|
+
// ─────────────────────────────────────────────────────────────
|
|
390
|
+
// Collection Management
|
|
391
|
+
// ─────────────────────────────────────────────────────────────
|
|
392
|
+
async createCollection(options) {
|
|
393
|
+
const now = /* @__PURE__ */ new Date();
|
|
394
|
+
const collection = {
|
|
395
|
+
name: options.name,
|
|
396
|
+
description: options.description,
|
|
397
|
+
embeddingModel: options.embeddingModel || this.config.defaultEmbeddingModel || "text-embedding-3-small",
|
|
398
|
+
dimensions: options.dimensions || 1536,
|
|
399
|
+
distanceMetric: options.distanceMetric || "cosine",
|
|
400
|
+
chunkingConfig: {
|
|
401
|
+
...ChunkingPresets.default,
|
|
402
|
+
...this.config.defaultChunkingConfig,
|
|
403
|
+
...options.chunkingConfig
|
|
404
|
+
},
|
|
405
|
+
documentCount: 0,
|
|
406
|
+
chunkCount: 0,
|
|
407
|
+
totalTokens: 0,
|
|
408
|
+
createdAt: now,
|
|
409
|
+
updatedAt: now
|
|
410
|
+
};
|
|
411
|
+
this.collections.set(options.name, collection);
|
|
412
|
+
return collection;
|
|
413
|
+
}
|
|
414
|
+
async getCollection(name) {
|
|
415
|
+
return this.collections.get(name) || null;
|
|
416
|
+
}
|
|
417
|
+
async listCollections(tenantId) {
|
|
418
|
+
const collections = Array.from(this.collections.values());
|
|
419
|
+
return collections;
|
|
420
|
+
}
|
|
421
|
+
async deleteCollection(name) {
|
|
422
|
+
for (const [id, doc] of this.documents) {
|
|
423
|
+
if (doc.collection === name) {
|
|
424
|
+
for (const [chunkId, chunk] of this.chunks) {
|
|
425
|
+
if (chunk.documentId === id) {
|
|
426
|
+
this.chunks.delete(chunkId);
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
this.documents.delete(id);
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
this.collections.delete(name);
|
|
433
|
+
}
|
|
434
|
+
async getCollectionStats(name) {
|
|
435
|
+
const collection = await this.getCollection(name);
|
|
436
|
+
if (!collection) {
|
|
437
|
+
throw new Error(`Collection not found: ${name}`);
|
|
438
|
+
}
|
|
439
|
+
const docs = Array.from(this.documents.values()).filter(
|
|
440
|
+
(d) => d.collection === name
|
|
441
|
+
);
|
|
442
|
+
const docChunks = Array.from(this.chunks.values()).filter(
|
|
443
|
+
(c) => c.collection === name
|
|
444
|
+
);
|
|
445
|
+
const totalTokens = docChunks.reduce((sum, c) => sum + c.tokenCount, 0);
|
|
446
|
+
return {
|
|
447
|
+
documentCount: docs.length,
|
|
448
|
+
chunkCount: docChunks.length,
|
|
449
|
+
totalTokens,
|
|
450
|
+
averageChunkSize: docChunks.length > 0 ? totalTokens / docChunks.length : 0,
|
|
451
|
+
storageBytes: docChunks.reduce((sum, c) => sum + c.content.length, 0)
|
|
452
|
+
};
|
|
453
|
+
}
|
|
454
|
+
// ─────────────────────────────────────────────────────────────
|
|
455
|
+
// Document Management
|
|
456
|
+
// ─────────────────────────────────────────────────────────────
|
|
457
|
+
async ingest(collection, documents, options) {
|
|
458
|
+
const startTime = Date.now();
|
|
459
|
+
const results = [];
|
|
460
|
+
for (const doc of documents) {
|
|
461
|
+
const result = await this.ingestOne(
|
|
462
|
+
collection,
|
|
463
|
+
doc,
|
|
464
|
+
options
|
|
465
|
+
);
|
|
466
|
+
results.push(result);
|
|
467
|
+
}
|
|
468
|
+
return {
|
|
469
|
+
total: documents.length,
|
|
470
|
+
successful: results.filter((r) => r.status === "indexed").length,
|
|
471
|
+
failed: results.filter((r) => r.status === "failed").length,
|
|
472
|
+
results,
|
|
473
|
+
totalProcessingTimeMs: Date.now() - startTime
|
|
474
|
+
};
|
|
475
|
+
}
|
|
476
|
+
async ingestOne(collection, document, options) {
|
|
477
|
+
const startTime = Date.now();
|
|
478
|
+
const docId = `doc_${Date.now()}_${Math.random().toString(36).substring(7)}`;
|
|
479
|
+
const now = /* @__PURE__ */ new Date();
|
|
480
|
+
try {
|
|
481
|
+
const col = await this.getCollection(collection);
|
|
482
|
+
if (!col) {
|
|
483
|
+
throw new Error(`Collection not found: ${collection}`);
|
|
484
|
+
}
|
|
485
|
+
const doc = {
|
|
486
|
+
...document,
|
|
487
|
+
id: docId,
|
|
488
|
+
collection,
|
|
489
|
+
status: "processing",
|
|
490
|
+
tenantId: options?.tenantId || document.tenantId,
|
|
491
|
+
metadata: { ...document.metadata, ...options?.metadata },
|
|
492
|
+
createdAt: now,
|
|
493
|
+
updatedAt: now
|
|
494
|
+
};
|
|
495
|
+
this.documents.set(docId, doc);
|
|
496
|
+
const chunkingConfig = { ...col.chunkingConfig, ...options?.chunking };
|
|
497
|
+
const docChunks = this.chunkDocument(doc, chunkingConfig);
|
|
498
|
+
if (options?.generateEmbeddings !== false) {
|
|
499
|
+
for (const chunk of docChunks) {
|
|
500
|
+
chunk.embedding = await this.generateMockEmbedding(
|
|
501
|
+
chunk.content,
|
|
502
|
+
col.dimensions
|
|
503
|
+
);
|
|
504
|
+
this.embeddings.set(chunk.id, chunk.embedding);
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
for (const chunk of docChunks) {
|
|
508
|
+
this.chunks.set(chunk.id, chunk);
|
|
509
|
+
}
|
|
510
|
+
doc.status = "indexed";
|
|
511
|
+
doc.chunkCount = docChunks.length;
|
|
512
|
+
doc.tokenCount = docChunks.reduce((sum, c) => sum + c.tokenCount, 0);
|
|
513
|
+
this.documents.set(docId, doc);
|
|
514
|
+
col.documentCount++;
|
|
515
|
+
col.chunkCount += docChunks.length;
|
|
516
|
+
col.totalTokens += doc.tokenCount;
|
|
517
|
+
col.updatedAt = /* @__PURE__ */ new Date();
|
|
518
|
+
return {
|
|
519
|
+
documentId: docId,
|
|
520
|
+
status: "indexed",
|
|
521
|
+
chunkCount: doc.chunkCount,
|
|
522
|
+
tokenCount: doc.tokenCount,
|
|
523
|
+
processingTimeMs: Date.now() - startTime
|
|
524
|
+
};
|
|
525
|
+
} catch (error) {
|
|
526
|
+
const doc = this.documents.get(docId);
|
|
527
|
+
if (doc) {
|
|
528
|
+
doc.status = "failed";
|
|
529
|
+
doc.error = error instanceof Error ? error.message : "Unknown error";
|
|
530
|
+
}
|
|
531
|
+
return {
|
|
532
|
+
documentId: docId,
|
|
533
|
+
status: "failed",
|
|
534
|
+
chunkCount: 0,
|
|
535
|
+
tokenCount: 0,
|
|
536
|
+
error: error instanceof Error ? error.message : "Unknown error",
|
|
537
|
+
processingTimeMs: Date.now() - startTime
|
|
538
|
+
};
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
async getDocument(documentId) {
|
|
542
|
+
return this.documents.get(documentId) || null;
|
|
543
|
+
}
|
|
544
|
+
async listDocuments(collection, options) {
|
|
545
|
+
let docs = Array.from(this.documents.values()).filter(
|
|
546
|
+
(d) => d.collection === collection
|
|
547
|
+
);
|
|
548
|
+
if (options?.tenantId) {
|
|
549
|
+
docs = docs.filter((d) => d.tenantId === options.tenantId);
|
|
550
|
+
}
|
|
551
|
+
if (options?.status) {
|
|
552
|
+
docs = docs.filter((d) => d.status === options.status);
|
|
553
|
+
}
|
|
554
|
+
const total = docs.length;
|
|
555
|
+
const offset = options?.offset || 0;
|
|
556
|
+
const limit = options?.limit || 50;
|
|
557
|
+
return {
|
|
558
|
+
documents: docs.slice(offset, offset + limit),
|
|
559
|
+
total
|
|
560
|
+
};
|
|
561
|
+
}
|
|
562
|
+
async deleteDocument(documentId) {
|
|
563
|
+
const doc = await this.getDocument(documentId);
|
|
564
|
+
if (doc) {
|
|
565
|
+
for (const [chunkId, chunk] of this.chunks) {
|
|
566
|
+
if (chunk.documentId === documentId) {
|
|
567
|
+
this.chunks.delete(chunkId);
|
|
568
|
+
this.embeddings.delete(chunkId);
|
|
569
|
+
}
|
|
570
|
+
}
|
|
571
|
+
const collection = await this.getCollection(doc.collection);
|
|
572
|
+
if (collection) {
|
|
573
|
+
collection.documentCount--;
|
|
574
|
+
collection.chunkCount -= doc.chunkCount || 0;
|
|
575
|
+
collection.totalTokens -= doc.tokenCount || 0;
|
|
576
|
+
}
|
|
577
|
+
this.documents.delete(documentId);
|
|
578
|
+
}
|
|
579
|
+
}
|
|
580
|
+
async reprocessDocument(documentId, options) {
|
|
581
|
+
const doc = await this.getDocument(documentId);
|
|
582
|
+
if (!doc) {
|
|
583
|
+
throw new Error(`Document not found: ${documentId}`);
|
|
584
|
+
}
|
|
585
|
+
for (const [chunkId, chunk] of this.chunks) {
|
|
586
|
+
if (chunk.documentId === documentId) {
|
|
587
|
+
this.chunks.delete(chunkId);
|
|
588
|
+
this.embeddings.delete(chunkId);
|
|
589
|
+
}
|
|
590
|
+
}
|
|
591
|
+
return this.ingestOne(
|
|
592
|
+
doc.collection,
|
|
593
|
+
{
|
|
594
|
+
source: doc.source,
|
|
595
|
+
type: doc.type,
|
|
596
|
+
content: doc.content,
|
|
597
|
+
title: doc.title,
|
|
598
|
+
metadata: doc.metadata,
|
|
599
|
+
tenantId: doc.tenantId
|
|
600
|
+
},
|
|
601
|
+
options
|
|
602
|
+
);
|
|
603
|
+
}
|
|
604
|
+
// ─────────────────────────────────────────────────────────────
|
|
605
|
+
// Chunk Management
|
|
606
|
+
// ─────────────────────────────────────────────────────────────
|
|
607
|
+
async getChunks(documentId) {
|
|
608
|
+
return Array.from(this.chunks.values()).filter((c) => c.documentId === documentId).sort((a, b) => a.index - b.index);
|
|
609
|
+
}
|
|
610
|
+
async getChunk(chunkId) {
|
|
611
|
+
return this.chunks.get(chunkId) || null;
|
|
612
|
+
}
|
|
613
|
+
async updateChunkMetadata(chunkId, metadata) {
|
|
614
|
+
const chunk = await this.getChunk(chunkId);
|
|
615
|
+
if (!chunk) {
|
|
616
|
+
throw new Error(`Chunk not found: ${chunkId}`);
|
|
617
|
+
}
|
|
618
|
+
chunk.metadata = { ...chunk.metadata, ...metadata };
|
|
619
|
+
this.chunks.set(chunkId, chunk);
|
|
620
|
+
return chunk;
|
|
621
|
+
}
|
|
622
|
+
// ─────────────────────────────────────────────────────────────
|
|
623
|
+
// Search & Retrieval
|
|
624
|
+
// ─────────────────────────────────────────────────────────────
|
|
625
|
+
async search(query) {
|
|
626
|
+
const startTime = Date.now();
|
|
627
|
+
const mode = query.mode || this.config.defaultSearchMode || "vector";
|
|
628
|
+
const limit = query.limit || this.config.defaultLimit || 10;
|
|
629
|
+
const queryEmbedding = await this.generateMockEmbedding(query.query, 1536);
|
|
630
|
+
let chunks = Array.from(this.chunks.values()).filter(
|
|
631
|
+
(c) => c.collection === query.collection
|
|
632
|
+
);
|
|
633
|
+
if (query.tenantId) {
|
|
634
|
+
chunks = chunks.filter((c) => c.tenantId === query.tenantId);
|
|
635
|
+
}
|
|
636
|
+
if (query.filters) {
|
|
637
|
+
chunks = chunks.filter(
|
|
638
|
+
(c) => this.matchesFilters(c.metadata, query.filters)
|
|
639
|
+
);
|
|
640
|
+
}
|
|
641
|
+
let results = chunks.map((chunk) => {
|
|
642
|
+
const embedding = this.embeddings.get(chunk.id) || [];
|
|
643
|
+
const vectorScore = embedding.length > 0 ? this.cosineSimilarity(queryEmbedding, embedding) : 0;
|
|
644
|
+
const keywordScore = this.keywordScore(query.query, chunk.content);
|
|
645
|
+
let score;
|
|
646
|
+
switch (mode) {
|
|
647
|
+
case "vector":
|
|
648
|
+
score = vectorScore;
|
|
649
|
+
break;
|
|
650
|
+
case "keyword":
|
|
651
|
+
score = keywordScore;
|
|
652
|
+
break;
|
|
653
|
+
case "hybrid":
|
|
654
|
+
score = 0.7 * vectorScore + 0.3 * keywordScore;
|
|
655
|
+
break;
|
|
656
|
+
default:
|
|
657
|
+
score = vectorScore;
|
|
658
|
+
}
|
|
659
|
+
return { chunk, score };
|
|
660
|
+
});
|
|
661
|
+
if (query.minScore) {
|
|
662
|
+
results = results.filter((r) => r.score >= query.minScore);
|
|
663
|
+
}
|
|
664
|
+
results.sort((a, b) => b.score - a.score);
|
|
665
|
+
if (query.rerank) {
|
|
666
|
+
const candidates = results.slice(0, query.rerankCandidates || limit * 3);
|
|
667
|
+
results = candidates.sort((a, b) => {
|
|
668
|
+
const aRelevance = this.keywordScore(query.query, a.chunk.content);
|
|
669
|
+
const bRelevance = this.keywordScore(query.query, b.chunk.content);
|
|
670
|
+
return b.score + bRelevance - (a.score + aRelevance);
|
|
671
|
+
});
|
|
672
|
+
}
|
|
673
|
+
results = results.slice(0, limit);
|
|
674
|
+
if (query.includeDocumentContent) {
|
|
675
|
+
for (const result of results) {
|
|
676
|
+
result.document = await this.getDocument(result.chunk.documentId) || void 0;
|
|
677
|
+
}
|
|
678
|
+
}
|
|
679
|
+
for (const result of results) {
|
|
680
|
+
result.highlights = this.generateHighlights(
|
|
681
|
+
query.query,
|
|
682
|
+
result.chunk.content
|
|
683
|
+
);
|
|
684
|
+
}
|
|
685
|
+
return {
|
|
686
|
+
results,
|
|
687
|
+
query: query.query,
|
|
688
|
+
totalMatches: results.length,
|
|
689
|
+
searchTimeMs: Date.now() - startTime,
|
|
690
|
+
mode
|
|
691
|
+
};
|
|
692
|
+
}
|
|
693
|
+
async findSimilar(chunkId, options) {
|
|
694
|
+
const chunk = await this.getChunk(chunkId);
|
|
695
|
+
if (!chunk) {
|
|
696
|
+
throw new Error(`Chunk not found: ${chunkId}`);
|
|
697
|
+
}
|
|
698
|
+
const embedding = this.embeddings.get(chunkId);
|
|
699
|
+
if (!embedding) {
|
|
700
|
+
return [];
|
|
701
|
+
}
|
|
702
|
+
const collection = options?.collection || chunk.collection;
|
|
703
|
+
let chunks = Array.from(this.chunks.values()).filter(
|
|
704
|
+
(c) => c.collection === collection && c.id !== chunkId
|
|
705
|
+
);
|
|
706
|
+
const results = chunks.map((c) => {
|
|
707
|
+
const otherEmbedding = this.embeddings.get(c.id) || [];
|
|
708
|
+
const score = otherEmbedding.length > 0 ? this.cosineSimilarity(embedding, otherEmbedding) : 0;
|
|
709
|
+
return { chunk: c, score };
|
|
710
|
+
});
|
|
711
|
+
let filteredResults = results;
|
|
712
|
+
if (options?.minScore) {
|
|
713
|
+
filteredResults = results.filter((r) => r.score >= options.minScore);
|
|
714
|
+
}
|
|
715
|
+
filteredResults.sort((a, b) => b.score - a.score);
|
|
716
|
+
return filteredResults.slice(0, options?.limit || 10);
|
|
717
|
+
}
|
|
718
|
+
async multiSearch(queries) {
|
|
719
|
+
return Promise.all(queries.map((q) => this.search(q)));
|
|
720
|
+
}
|
|
721
|
+
// ─────────────────────────────────────────────────────────────
|
|
722
|
+
// Context Assembly
|
|
723
|
+
// ─────────────────────────────────────────────────────────────
|
|
724
|
+
async assembleContext(results, config) {
|
|
725
|
+
const resultArray = "results" in results ? results.results : results;
|
|
726
|
+
const maxTokens = config?.maxTokens || 4e3;
|
|
727
|
+
const chunkTemplate = config?.chunkTemplate || "{{content}}";
|
|
728
|
+
const contextTemplate = config?.contextTemplate || "{{chunks}}";
|
|
729
|
+
let chunks = [];
|
|
730
|
+
let totalTokens = 0;
|
|
731
|
+
const sources = [];
|
|
732
|
+
const seenDocs = /* @__PURE__ */ new Set();
|
|
733
|
+
let processedResults = resultArray;
|
|
734
|
+
if (config?.deduplicate) {
|
|
735
|
+
const threshold = config.dedupeThreshold || 0.9;
|
|
736
|
+
processedResults = this.deduplicateResults(resultArray, threshold);
|
|
737
|
+
}
|
|
738
|
+
if (config?.sortBy) {
|
|
739
|
+
processedResults = [...processedResults].sort((a, b) => {
|
|
740
|
+
switch (config.sortBy) {
|
|
741
|
+
case "score":
|
|
742
|
+
return b.score - a.score;
|
|
743
|
+
case "document":
|
|
744
|
+
return a.chunk.documentId.localeCompare(b.chunk.documentId);
|
|
745
|
+
case "position":
|
|
746
|
+
return a.chunk.index - b.chunk.index;
|
|
747
|
+
default:
|
|
748
|
+
return 0;
|
|
749
|
+
}
|
|
750
|
+
});
|
|
751
|
+
}
|
|
752
|
+
let truncated = false;
|
|
753
|
+
for (const result of processedResults) {
|
|
754
|
+
if (totalTokens + result.chunk.tokenCount > maxTokens) {
|
|
755
|
+
truncated = true;
|
|
756
|
+
break;
|
|
757
|
+
}
|
|
758
|
+
chunks.push(result.chunk);
|
|
759
|
+
totalTokens += result.chunk.tokenCount;
|
|
760
|
+
if (!seenDocs.has(result.chunk.documentId)) {
|
|
761
|
+
seenDocs.add(result.chunk.documentId);
|
|
762
|
+
const doc = await this.getDocument(result.chunk.documentId);
|
|
763
|
+
sources.push({
|
|
764
|
+
documentId: result.chunk.documentId,
|
|
765
|
+
title: doc?.title,
|
|
766
|
+
source: doc?.source || ""
|
|
767
|
+
});
|
|
768
|
+
}
|
|
769
|
+
}
|
|
770
|
+
const formattedChunks = chunks.map((chunk, i) => {
|
|
771
|
+
let formatted = chunkTemplate.replace("{{content}}", chunk.content).replace("{{index}}", String(i + 1)).replace("{{documentId}}", chunk.documentId);
|
|
772
|
+
if (config?.includeCitations) {
|
|
773
|
+
formatted = `[${i + 1}] ${formatted}`;
|
|
774
|
+
}
|
|
775
|
+
return formatted;
|
|
776
|
+
});
|
|
777
|
+
let context = contextTemplate.replace(
|
|
778
|
+
"{{chunks}}",
|
|
779
|
+
formattedChunks.join("\n\n")
|
|
780
|
+
);
|
|
781
|
+
if (config?.includeCitations && sources.length > 0) {
|
|
782
|
+
const citations = sources.map((s, i) => `[${i + 1}] ${s.title || s.source}`).join("\n");
|
|
783
|
+
context += `
|
|
784
|
+
|
|
785
|
+
Sources:
|
|
786
|
+
${citations}`;
|
|
787
|
+
}
|
|
788
|
+
return {
|
|
789
|
+
context,
|
|
790
|
+
chunks,
|
|
791
|
+
tokenCount: totalTokens,
|
|
792
|
+
sources,
|
|
793
|
+
truncated
|
|
794
|
+
};
|
|
795
|
+
}
|
|
796
|
+
async queryWithContext(query, contextConfig) {
|
|
797
|
+
const searchResponse = await this.search(query);
|
|
798
|
+
const context = await this.assembleContext(searchResponse, contextConfig);
|
|
799
|
+
return { searchResponse, context };
|
|
800
|
+
}
|
|
801
|
+
// ─────────────────────────────────────────────────────────────
|
|
802
|
+
// Embedding Management
|
|
803
|
+
// ─────────────────────────────────────────────────────────────
|
|
804
|
+
async embed(texts, model) {
|
|
805
|
+
const textArray = Array.isArray(texts) ? texts : [texts];
|
|
806
|
+
return Promise.all(
|
|
807
|
+
textArray.map((text) => this.generateMockEmbedding(text, 1536))
|
|
808
|
+
);
|
|
809
|
+
}
|
|
810
|
+
async reembed(collection, model, batchSize) {
|
|
811
|
+
const col = await this.getCollection(collection);
|
|
812
|
+
if (!col) {
|
|
813
|
+
throw new Error(`Collection not found: ${collection}`);
|
|
814
|
+
}
|
|
815
|
+
let updated = 0;
|
|
816
|
+
let errors = 0;
|
|
817
|
+
const chunks = Array.from(this.chunks.values()).filter(
|
|
818
|
+
(c) => c.collection === collection
|
|
819
|
+
);
|
|
820
|
+
for (const chunk of chunks) {
|
|
821
|
+
try {
|
|
822
|
+
const embedding = await this.generateMockEmbedding(
|
|
823
|
+
chunk.content,
|
|
824
|
+
col.dimensions
|
|
825
|
+
);
|
|
826
|
+
chunk.embedding = embedding;
|
|
827
|
+
this.embeddings.set(chunk.id, embedding);
|
|
828
|
+
updated++;
|
|
829
|
+
} catch {
|
|
830
|
+
errors++;
|
|
831
|
+
}
|
|
832
|
+
}
|
|
833
|
+
return { updated, errors };
|
|
834
|
+
}
|
|
835
|
+
// ─────────────────────────────────────────────────────────────
|
|
836
|
+
// Pipeline Management
|
|
837
|
+
// ─────────────────────────────────────────────────────────────
|
|
838
|
+
async createPipeline(pipeline) {
|
|
839
|
+
const id = `pipeline_${Date.now()}`;
|
|
840
|
+
const now = /* @__PURE__ */ new Date();
|
|
841
|
+
const newPipeline = {
|
|
842
|
+
...pipeline,
|
|
843
|
+
id,
|
|
844
|
+
createdAt: now,
|
|
845
|
+
updatedAt: now
|
|
846
|
+
};
|
|
847
|
+
this.pipelines.set(id, newPipeline);
|
|
848
|
+
return newPipeline;
|
|
849
|
+
}
|
|
850
|
+
async getPipeline(pipelineId) {
|
|
851
|
+
return this.pipelines.get(pipelineId) || null;
|
|
852
|
+
}
|
|
853
|
+
async runPipeline(pipelineId, documentIds) {
|
|
854
|
+
const pipeline = await this.getPipeline(pipelineId);
|
|
855
|
+
if (!pipeline) {
|
|
856
|
+
throw new Error(`Pipeline not found: ${pipelineId}`);
|
|
857
|
+
}
|
|
858
|
+
const results = [];
|
|
859
|
+
for (const docId of documentIds) {
|
|
860
|
+
const result = await this.reprocessDocument(docId);
|
|
861
|
+
results.push(result);
|
|
862
|
+
}
|
|
863
|
+
return {
|
|
864
|
+
total: documentIds.length,
|
|
865
|
+
successful: results.filter((r) => r.status === "indexed").length,
|
|
866
|
+
failed: results.filter((r) => r.status === "failed").length,
|
|
867
|
+
results,
|
|
868
|
+
totalProcessingTimeMs: results.reduce(
|
|
869
|
+
(sum, r) => sum + r.processingTimeMs,
|
|
870
|
+
0
|
|
871
|
+
)
|
|
872
|
+
};
|
|
873
|
+
}
|
|
874
|
+
// ─────────────────────────────────────────────────────────────
|
|
875
|
+
// Private Helpers
|
|
876
|
+
// ─────────────────────────────────────────────────────────────
|
|
877
|
+
chunkDocument(doc, config) {
|
|
878
|
+
const chunks = [];
|
|
879
|
+
const content = doc.content;
|
|
880
|
+
const chunkSize = config.chunkSize;
|
|
881
|
+
const overlap = config.chunkOverlap;
|
|
882
|
+
let startOffset = 0;
|
|
883
|
+
let index = 0;
|
|
884
|
+
while (startOffset < content.length) {
|
|
885
|
+
const endOffset = Math.min(startOffset + chunkSize * 4, content.length);
|
|
886
|
+
const chunkContent = content.slice(startOffset, endOffset);
|
|
887
|
+
chunks.push({
|
|
888
|
+
id: `chunk_${doc.id}_${index}`,
|
|
889
|
+
documentId: doc.id,
|
|
890
|
+
index,
|
|
891
|
+
content: chunkContent,
|
|
892
|
+
metadata: config.includeMetadata ? doc.metadata : {},
|
|
893
|
+
startOffset,
|
|
894
|
+
endOffset,
|
|
895
|
+
tokenCount: Math.ceil(chunkContent.length / 4),
|
|
896
|
+
collection: doc.collection,
|
|
897
|
+
tenantId: doc.tenantId
|
|
898
|
+
});
|
|
899
|
+
startOffset = endOffset - overlap * 4;
|
|
900
|
+
index++;
|
|
901
|
+
}
|
|
902
|
+
return chunks;
|
|
903
|
+
}
|
|
904
|
+
async generateMockEmbedding(text, dimensions) {
|
|
905
|
+
const embedding = [];
|
|
906
|
+
let hash = 0;
|
|
907
|
+
for (let i = 0; i < text.length; i++) {
|
|
908
|
+
hash = (hash << 5) - hash + text.charCodeAt(i);
|
|
909
|
+
hash = hash & hash;
|
|
910
|
+
}
|
|
911
|
+
for (let i = 0; i < dimensions; i++) {
|
|
912
|
+
const seed = hash + i * 31;
|
|
913
|
+
embedding.push(Math.sin(seed) * 0.5);
|
|
914
|
+
}
|
|
915
|
+
const magnitude = Math.sqrt(embedding.reduce((sum, v) => sum + v * v, 0));
|
|
916
|
+
return embedding.map((v) => v / magnitude);
|
|
917
|
+
}
|
|
918
|
+
cosineSimilarity(a, b) {
|
|
919
|
+
if (a.length !== b.length) return 0;
|
|
920
|
+
let dotProduct = 0;
|
|
921
|
+
let normA = 0;
|
|
922
|
+
let normB = 0;
|
|
923
|
+
for (let i = 0; i < a.length; i++) {
|
|
924
|
+
dotProduct += a[i] * b[i];
|
|
925
|
+
normA += a[i] * a[i];
|
|
926
|
+
normB += b[i] * b[i];
|
|
927
|
+
}
|
|
928
|
+
const denominator = Math.sqrt(normA) * Math.sqrt(normB);
|
|
929
|
+
return denominator > 0 ? dotProduct / denominator : 0;
|
|
930
|
+
}
|
|
931
|
+
keywordScore(query, content) {
|
|
932
|
+
const queryWords = query.toLowerCase().split(/\s+/);
|
|
933
|
+
const contentLower = content.toLowerCase();
|
|
934
|
+
let matches = 0;
|
|
935
|
+
for (const word of queryWords) {
|
|
936
|
+
if (contentLower.includes(word)) {
|
|
937
|
+
matches++;
|
|
938
|
+
}
|
|
939
|
+
}
|
|
940
|
+
return queryWords.length > 0 ? matches / queryWords.length : 0;
|
|
941
|
+
}
|
|
942
|
+
matchesFilters(metadata, filters) {
|
|
943
|
+
for (const filter of filters) {
|
|
944
|
+
const value = metadata[filter.field];
|
|
945
|
+
switch (filter.operator) {
|
|
946
|
+
case "eq":
|
|
947
|
+
if (value !== filter.value) return false;
|
|
948
|
+
break;
|
|
949
|
+
case "ne":
|
|
950
|
+
if (value === filter.value) return false;
|
|
951
|
+
break;
|
|
952
|
+
case "gt":
|
|
953
|
+
if (typeof value !== "number" || typeof filter.value !== "number" || value <= filter.value)
|
|
954
|
+
return false;
|
|
955
|
+
break;
|
|
956
|
+
case "gte":
|
|
957
|
+
if (typeof value !== "number" || typeof filter.value !== "number" || value < filter.value)
|
|
958
|
+
return false;
|
|
959
|
+
break;
|
|
960
|
+
case "lt":
|
|
961
|
+
if (typeof value !== "number" || typeof filter.value !== "number" || value >= filter.value)
|
|
962
|
+
return false;
|
|
963
|
+
break;
|
|
964
|
+
case "lte":
|
|
965
|
+
if (typeof value !== "number" || typeof filter.value !== "number" || value > filter.value)
|
|
966
|
+
return false;
|
|
967
|
+
break;
|
|
968
|
+
case "in":
|
|
969
|
+
if (!Array.isArray(filter.value) || !filter.value.includes(value))
|
|
970
|
+
return false;
|
|
971
|
+
break;
|
|
972
|
+
case "nin":
|
|
973
|
+
if (!Array.isArray(filter.value) || filter.value.includes(value))
|
|
974
|
+
return false;
|
|
975
|
+
break;
|
|
976
|
+
case "contains":
|
|
977
|
+
if (typeof value !== "string" || typeof filter.value !== "string" || !value.includes(filter.value))
|
|
978
|
+
return false;
|
|
979
|
+
break;
|
|
980
|
+
}
|
|
981
|
+
}
|
|
982
|
+
return true;
|
|
983
|
+
}
|
|
984
|
+
generateHighlights(query, content) {
|
|
985
|
+
const words = query.toLowerCase().split(/\s+/);
|
|
986
|
+
const highlights = [];
|
|
987
|
+
for (const word of words) {
|
|
988
|
+
const index = content.toLowerCase().indexOf(word);
|
|
989
|
+
if (index !== -1) {
|
|
990
|
+
const start = Math.max(0, index - 30);
|
|
991
|
+
const end = Math.min(content.length, index + word.length + 30);
|
|
992
|
+
highlights.push(`...${content.slice(start, end)}...`);
|
|
993
|
+
}
|
|
994
|
+
}
|
|
995
|
+
return highlights.slice(0, 3);
|
|
996
|
+
}
|
|
997
|
+
deduplicateResults(results, threshold) {
|
|
998
|
+
const deduplicated = [];
|
|
999
|
+
for (const result of results) {
|
|
1000
|
+
const isDuplicate = deduplicated.some((r) => {
|
|
1001
|
+
const embedding1 = this.embeddings.get(result.chunk.id);
|
|
1002
|
+
const embedding2 = this.embeddings.get(r.chunk.id);
|
|
1003
|
+
if (!embedding1 || !embedding2) return false;
|
|
1004
|
+
return this.cosineSimilarity(embedding1, embedding2) > threshold;
|
|
1005
|
+
});
|
|
1006
|
+
if (!isDuplicate) {
|
|
1007
|
+
deduplicated.push(result);
|
|
1008
|
+
}
|
|
1009
|
+
}
|
|
1010
|
+
return deduplicated;
|
|
1011
|
+
}
|
|
1012
|
+
};
|
|
1013
|
+
}
|
|
1014
|
+
});
|
|
1015
|
+
|
|
1
1016
|
// src/adapters/memory/MemoryDatabase.ts
|
|
2
1017
|
var MemoryDatabase = class {
|
|
3
1018
|
tables = /* @__PURE__ */ new Map();
|
|
@@ -1259,6 +2274,8 @@ var EmailProviderSchema = z.enum(["memory", "console", "smtp", "resend"]);
|
|
|
1259
2274
|
var QueueProviderSchema = z.enum(["memory", "bullmq"]);
|
|
1260
2275
|
var TracingProviderSchema = z.enum(["noop", "memory", "otlp"]);
|
|
1261
2276
|
var LogLevelSchema = z.enum(["debug", "info", "warn", "error"]);
|
|
2277
|
+
var AIProviderSchema = z.enum(["memory", "openai", "anthropic", "google"]);
|
|
2278
|
+
var RAGProviderSchema = z.enum(["memory", "pinecone", "weaviate"]);
|
|
1262
2279
|
var DatabaseConfigSchema = z.object({
|
|
1263
2280
|
provider: DatabaseProviderSchema.default("memory"),
|
|
1264
2281
|
url: z.string().optional().describe("PostgreSQL connection URL"),
|
|
@@ -1371,6 +2388,51 @@ var QueueConfigSchema = z.object({
|
|
|
1371
2388
|
message: "BullMQ requires redisUrl"
|
|
1372
2389
|
}
|
|
1373
2390
|
);
|
|
2391
|
+
var AIConfigSchema = z.object({
|
|
2392
|
+
enabled: z.boolean().default(false).describe("Enable AI capabilities"),
|
|
2393
|
+
provider: AIProviderSchema.default("memory"),
|
|
2394
|
+
apiKey: z.string().optional().describe("API key for the AI provider"),
|
|
2395
|
+
model: z.string().optional().describe("Default model to use"),
|
|
2396
|
+
maxTokens: z.number().int().min(1).max(2e5).default(4096).describe("Default max tokens"),
|
|
2397
|
+
temperature: z.number().min(0).max(2).default(0.7).describe("Default temperature"),
|
|
2398
|
+
timeout: z.number().int().min(1e3).max(3e5).default(6e4).describe("Request timeout in ms"),
|
|
2399
|
+
baseUrl: z.string().url().optional().describe("Custom base URL for API")
|
|
2400
|
+
}).refine(
|
|
2401
|
+
(data) => {
|
|
2402
|
+
if (data.enabled && data.provider !== "memory") {
|
|
2403
|
+
return data.apiKey;
|
|
2404
|
+
}
|
|
2405
|
+
return true;
|
|
2406
|
+
},
|
|
2407
|
+
{
|
|
2408
|
+
message: "Production AI providers require an API key"
|
|
2409
|
+
}
|
|
2410
|
+
);
|
|
2411
|
+
var RAGConfigSchema = z.object({
|
|
2412
|
+
enabled: z.boolean().default(false).describe("Enable RAG capabilities"),
|
|
2413
|
+
provider: RAGProviderSchema.default("memory"),
|
|
2414
|
+
apiKey: z.string().optional().describe("API key for the RAG provider"),
|
|
2415
|
+
environment: z.string().optional().describe("Pinecone environment"),
|
|
2416
|
+
indexName: z.string().optional().describe("Pinecone index name or Weaviate class"),
|
|
2417
|
+
namespace: z.string().optional().describe("Default namespace"),
|
|
2418
|
+
host: z.string().url().optional().describe("Weaviate host URL"),
|
|
2419
|
+
embeddingProvider: AIProviderSchema.default("memory").describe("Provider for generating embeddings"),
|
|
2420
|
+
embeddingApiKey: z.string().optional().describe("API key for embedding provider"),
|
|
2421
|
+
embeddingModel: z.string().optional().describe("Model for generating embeddings")
|
|
2422
|
+
}).refine(
|
|
2423
|
+
(data) => {
|
|
2424
|
+
if (data.enabled && data.provider === "pinecone") {
|
|
2425
|
+
return data.apiKey && data.indexName;
|
|
2426
|
+
}
|
|
2427
|
+
if (data.enabled && data.provider === "weaviate") {
|
|
2428
|
+
return data.host;
|
|
2429
|
+
}
|
|
2430
|
+
return true;
|
|
2431
|
+
},
|
|
2432
|
+
{
|
|
2433
|
+
message: "Pinecone requires apiKey and indexName; Weaviate requires host"
|
|
2434
|
+
}
|
|
2435
|
+
);
|
|
1374
2436
|
var RetryConfigSchema = z.object({
|
|
1375
2437
|
enabled: z.boolean().default(true).describe("Enable retry for failed operations"),
|
|
1376
2438
|
maxAttempts: z.number().int().min(1).max(10).default(3).describe("Maximum retry attempts"),
|
|
@@ -1457,6 +2519,9 @@ var PlatformConfigSchema = z.object({
|
|
|
1457
2519
|
storage: StorageConfigSchema.default({ provider: "memory" }),
|
|
1458
2520
|
email: EmailConfigSchema.default({ provider: "memory" }),
|
|
1459
2521
|
queue: QueueConfigSchema.default({ provider: "memory" }),
|
|
2522
|
+
// AI configurations
|
|
2523
|
+
ai: AIConfigSchema.default({ enabled: false }),
|
|
2524
|
+
rag: RAGConfigSchema.default({ enabled: false }),
|
|
1460
2525
|
// Resilience configuration
|
|
1461
2526
|
resilience: ResilienceConfigSchema.default({}),
|
|
1462
2527
|
// Observability configuration
|
|
@@ -1512,6 +2577,28 @@ function loadConfig() {
|
|
|
1512
2577
|
concurrency: process.env.QUEUE_CONCURRENCY ? parseInt(process.env.QUEUE_CONCURRENCY) : void 0,
|
|
1513
2578
|
maxRetries: process.env.QUEUE_MAX_RETRIES ? parseInt(process.env.QUEUE_MAX_RETRIES) : void 0
|
|
1514
2579
|
},
|
|
2580
|
+
ai: {
|
|
2581
|
+
enabled: process.env.AI_ENABLED === "true",
|
|
2582
|
+
provider: process.env.AI_PROVIDER || "memory",
|
|
2583
|
+
apiKey: process.env.OPENAI_API_KEY || process.env.ANTHROPIC_API_KEY || process.env.GOOGLE_AI_API_KEY,
|
|
2584
|
+
model: process.env.AI_MODEL,
|
|
2585
|
+
maxTokens: process.env.AI_MAX_TOKENS ? parseInt(process.env.AI_MAX_TOKENS) : void 0,
|
|
2586
|
+
temperature: process.env.AI_TEMPERATURE ? parseFloat(process.env.AI_TEMPERATURE) : void 0,
|
|
2587
|
+
timeout: process.env.AI_TIMEOUT ? parseInt(process.env.AI_TIMEOUT) : void 0,
|
|
2588
|
+
baseUrl: process.env.AI_BASE_URL
|
|
2589
|
+
},
|
|
2590
|
+
rag: {
|
|
2591
|
+
enabled: process.env.RAG_ENABLED === "true",
|
|
2592
|
+
provider: process.env.RAG_PROVIDER || "memory",
|
|
2593
|
+
apiKey: process.env.PINECONE_API_KEY,
|
|
2594
|
+
environment: process.env.PINECONE_ENVIRONMENT,
|
|
2595
|
+
indexName: process.env.PINECONE_INDEX || process.env.RAG_INDEX_NAME,
|
|
2596
|
+
namespace: process.env.RAG_NAMESPACE,
|
|
2597
|
+
host: process.env.WEAVIATE_HOST,
|
|
2598
|
+
embeddingProvider: process.env.EMBEDDING_PROVIDER || "memory",
|
|
2599
|
+
embeddingApiKey: process.env.EMBEDDING_API_KEY || process.env.OPENAI_API_KEY,
|
|
2600
|
+
embeddingModel: process.env.EMBEDDING_MODEL
|
|
2601
|
+
},
|
|
1515
2602
|
resilience: {
|
|
1516
2603
|
retry: {
|
|
1517
2604
|
enabled: process.env.RESILIENCE_RETRY_ENABLED !== "false",
|
|
@@ -2079,6 +3166,8 @@ var NoopMetrics = class {
|
|
|
2079
3166
|
};
|
|
2080
3167
|
|
|
2081
3168
|
// src/factory.ts
|
|
3169
|
+
init_IAI();
|
|
3170
|
+
init_IRAG();
|
|
2082
3171
|
function createLogger(config) {
|
|
2083
3172
|
if (!config.observability.logging) {
|
|
2084
3173
|
return new NoopLogger();
|
|
@@ -2097,7 +3186,7 @@ function createMetrics(config) {
|
|
|
2097
3186
|
}
|
|
2098
3187
|
function createPlatform(config) {
|
|
2099
3188
|
const finalConfig = config ? deepMerge(loadConfig(), config) : loadConfig();
|
|
2100
|
-
const hasProductionAdapters = finalConfig.database.provider !== "memory" || finalConfig.cache.provider !== "memory" || finalConfig.storage.provider !== "memory" || finalConfig.email.provider !== "memory" && finalConfig.email.provider !== "console" || finalConfig.observability.tracing.provider === "otlp";
|
|
3189
|
+
const hasProductionAdapters = finalConfig.database.provider !== "memory" || finalConfig.cache.provider !== "memory" || finalConfig.storage.provider !== "memory" || finalConfig.email.provider !== "memory" && finalConfig.email.provider !== "console" || finalConfig.observability.tracing.provider === "otlp" || finalConfig.ai.enabled && finalConfig.ai.provider !== "memory" || finalConfig.rag.enabled && finalConfig.rag.provider !== "memory";
|
|
2101
3190
|
if (hasProductionAdapters) {
|
|
2102
3191
|
console.warn(
|
|
2103
3192
|
"createPlatform() is synchronous and cannot initialize production adapters. Use createPlatformAsync() for production adapters, or use memory/console adapters."
|
|
@@ -2111,10 +3200,12 @@ function createPlatform(config) {
|
|
|
2111
3200
|
const logger = createLogger(finalConfig);
|
|
2112
3201
|
const metrics = createMetrics(finalConfig);
|
|
2113
3202
|
const tracing = finalConfig.observability.tracing.provider === "memory" ? new MemoryTracing() : new NoopTracing();
|
|
2114
|
-
|
|
3203
|
+
const ai = finalConfig.ai.enabled ? new MemoryAI() : null;
|
|
3204
|
+
const rag = finalConfig.rag.enabled ? new MemoryRAG() : null;
|
|
3205
|
+
return createPlatformFromAdapters(db, cache, storage, email, queue, logger, metrics, tracing, ai, rag);
|
|
2115
3206
|
}
|
|
2116
|
-
function createPlatformFromAdapters(db, cache, storage, email, queue, logger, metrics, tracing) {
|
|
2117
|
-
|
|
3207
|
+
function createPlatformFromAdapters(db, cache, storage, email, queue, logger, metrics, tracing, ai, rag) {
|
|
3208
|
+
const platform = {
|
|
2118
3209
|
db,
|
|
2119
3210
|
cache,
|
|
2120
3211
|
storage,
|
|
@@ -2149,6 +3240,13 @@ function createPlatformFromAdapters(db, cache, storage, email, queue, logger, me
|
|
|
2149
3240
|
await Promise.all([db.close(), cache.close(), queue.close(), tracing.close()]);
|
|
2150
3241
|
}
|
|
2151
3242
|
};
|
|
3243
|
+
if (ai) {
|
|
3244
|
+
platform.ai = ai;
|
|
3245
|
+
}
|
|
3246
|
+
if (rag) {
|
|
3247
|
+
platform.rag = rag;
|
|
3248
|
+
}
|
|
3249
|
+
return platform;
|
|
2152
3250
|
}
|
|
2153
3251
|
function deepMerge(target, source) {
|
|
2154
3252
|
const result = { ...target };
|