@nexo-labs/payload-typesense 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +185 -0
- package/dist/index.d.mts +27716 -0
- package/dist/index.d.mts.map +1 -0
- package/dist/index.mjs +3720 -0
- package/dist/index.mjs.map +1 -0
- package/package.json +160 -0
package/dist/index.mjs
ADDED
|
@@ -0,0 +1,3720 @@
|
|
|
1
|
+
import OpenAI from "openai";
|
|
2
|
+
import { GoogleGenerativeAI, TaskType } from "@google/generative-ai";
|
|
3
|
+
import Typesense from "typesense";
|
|
4
|
+
import { z } from "zod";
|
|
5
|
+
import { MarkdownTextSplitter, RecursiveCharacterTextSplitter } from "@langchain/textsplitters";
|
|
6
|
+
|
|
7
|
+
//#region src/core/di/container.ts
|
|
8
|
+
var DIContainer = class {
|
|
9
|
+
services = /* @__PURE__ */ new Map();
|
|
10
|
+
factories = /* @__PURE__ */ new Map();
|
|
11
|
+
register(token, factory) {
|
|
12
|
+
this.factories.set(token, factory);
|
|
13
|
+
}
|
|
14
|
+
singleton(token, instance) {
|
|
15
|
+
this.services.set(token, instance);
|
|
16
|
+
}
|
|
17
|
+
resolve(token) {
|
|
18
|
+
if (this.services.has(token)) return this.services.get(token);
|
|
19
|
+
if (this.factories.has(token)) {
|
|
20
|
+
const factory = this.factories.get(token);
|
|
21
|
+
if (factory) return factory();
|
|
22
|
+
}
|
|
23
|
+
throw new Error(`Service not found: ${token.toString()}`);
|
|
24
|
+
}
|
|
25
|
+
has(token) {
|
|
26
|
+
return this.services.has(token) || this.factories.has(token);
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Clears all registered services and factories.
|
|
30
|
+
* Useful for testing.
|
|
31
|
+
*/
|
|
32
|
+
clear() {
|
|
33
|
+
this.services.clear();
|
|
34
|
+
this.factories.clear();
|
|
35
|
+
}
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
//#endregion
|
|
39
|
+
//#region src/core/di/tokens.ts
|
|
40
|
+
const TOKENS = {
|
|
41
|
+
CONFIG: Symbol.for("Config"),
|
|
42
|
+
LOGGER: Symbol.for("Logger"),
|
|
43
|
+
TYPESENSE_CLIENT: Symbol.for("TypesenseClient"),
|
|
44
|
+
EMBEDDING_PROVIDER: Symbol.for("EmbeddingProvider"),
|
|
45
|
+
EMBEDDING_SERVICE: Symbol.for("EmbeddingService"),
|
|
46
|
+
SEARCH_SERVICE: Symbol.for("SearchService"),
|
|
47
|
+
SYNC_SERVICE: Symbol.for("SyncService"),
|
|
48
|
+
RAG_SERVICE: Symbol.for("RAGService")
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
//#endregion
|
|
52
|
+
//#region src/core/logging/logger.ts
|
|
53
|
+
const LOG_LEVELS = {
|
|
54
|
+
debug: 0,
|
|
55
|
+
info: 1,
|
|
56
|
+
warn: 2,
|
|
57
|
+
error: 3,
|
|
58
|
+
silent: 4
|
|
59
|
+
};
|
|
60
|
+
var Logger = class {
|
|
61
|
+
level;
|
|
62
|
+
prefix;
|
|
63
|
+
enabled;
|
|
64
|
+
constructor(config = {}) {
|
|
65
|
+
this.level = config.level || "info";
|
|
66
|
+
this.prefix = config.prefix || "[payload-typesense]";
|
|
67
|
+
this.enabled = config.enabled !== false;
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Update logger configuration
|
|
71
|
+
*/
|
|
72
|
+
configure(config) {
|
|
73
|
+
if (config.level !== void 0) this.level = config.level;
|
|
74
|
+
if (config.prefix !== void 0) this.prefix = config.prefix;
|
|
75
|
+
if (config.enabled !== void 0) this.enabled = config.enabled;
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* Check if a log level should be output
|
|
79
|
+
*/
|
|
80
|
+
shouldLog(level) {
|
|
81
|
+
if (!this.enabled) return false;
|
|
82
|
+
return LOG_LEVELS[level] >= LOG_LEVELS[this.level];
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Format log message with context
|
|
86
|
+
*/
|
|
87
|
+
formatMessage(message, context) {
|
|
88
|
+
if (!context || Object.keys(context).length === 0) return `${this.prefix} ${message}`;
|
|
89
|
+
return `${this.prefix} ${message} ${JSON.stringify(context)}`;
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Debug level logging - detailed information for debugging
|
|
93
|
+
*/
|
|
94
|
+
debug(message, context) {
|
|
95
|
+
if (this.shouldLog("debug")) console.debug(this.formatMessage(message, context));
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Info level logging - general informational messages
|
|
99
|
+
*/
|
|
100
|
+
info(message, context) {
|
|
101
|
+
if (this.shouldLog("info")) console.log(this.formatMessage(message, context));
|
|
102
|
+
}
|
|
103
|
+
/**
|
|
104
|
+
* Warning level logging - warning messages
|
|
105
|
+
*/
|
|
106
|
+
warn(message, context) {
|
|
107
|
+
if (this.shouldLog("warn")) console.warn(this.formatMessage(message, context));
|
|
108
|
+
}
|
|
109
|
+
/**
|
|
110
|
+
* Error level logging - error messages
|
|
111
|
+
*/
|
|
112
|
+
error(message, error, context) {
|
|
113
|
+
if (this.shouldLog("error")) {
|
|
114
|
+
const errorContext = {
|
|
115
|
+
...context,
|
|
116
|
+
error: error instanceof Error ? {
|
|
117
|
+
message: error.message,
|
|
118
|
+
stack: error.stack,
|
|
119
|
+
name: error.name
|
|
120
|
+
} : String(error)
|
|
121
|
+
};
|
|
122
|
+
console.error(this.formatMessage(message, errorContext));
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* Get current log level
|
|
127
|
+
*/
|
|
128
|
+
getLevel() {
|
|
129
|
+
return this.level;
|
|
130
|
+
}
|
|
131
|
+
/**
|
|
132
|
+
* Check if logger is enabled
|
|
133
|
+
*/
|
|
134
|
+
isEnabled() {
|
|
135
|
+
return this.enabled;
|
|
136
|
+
}
|
|
137
|
+
};
|
|
138
|
+
let defaultLogger = new Logger();
|
|
139
|
+
/**
|
|
140
|
+
* Configure the default logger
|
|
141
|
+
*/
|
|
142
|
+
const configureLogger = (config) => {
|
|
143
|
+
defaultLogger.configure(config);
|
|
144
|
+
};
|
|
145
|
+
/**
|
|
146
|
+
* Create a new logger instance with custom configuration
|
|
147
|
+
*/
|
|
148
|
+
const createLogger = (config) => {
|
|
149
|
+
return new Logger(config);
|
|
150
|
+
};
|
|
151
|
+
const logger = {
|
|
152
|
+
debug: (message, context) => defaultLogger.debug(message, context),
|
|
153
|
+
info: (message, context) => defaultLogger.info(message, context),
|
|
154
|
+
warn: (message, context) => defaultLogger.warn(message, context),
|
|
155
|
+
error: (message, error, context) => defaultLogger.error(message, error, context),
|
|
156
|
+
configure: configureLogger,
|
|
157
|
+
getLevel: () => defaultLogger.getLevel(),
|
|
158
|
+
isEnabled: () => defaultLogger.isEnabled()
|
|
159
|
+
};
|
|
160
|
+
|
|
161
|
+
//#endregion
|
|
162
|
+
//#region src/core/config/constants.ts
|
|
163
|
+
/**
|
|
164
|
+
* Constants for payload-typesense plugin
|
|
165
|
+
* Centralizes all magic numbers and configuration defaults
|
|
166
|
+
*/
|
|
167
|
+
/**
|
|
168
|
+
* Default dimensions for OpenAI text-embedding-3-large model
|
|
169
|
+
*/
|
|
170
|
+
const DEFAULT_EMBEDDING_DIMENSIONS = 3072;
|
|
171
|
+
/**
|
|
172
|
+
* Default OpenAI embedding model
|
|
173
|
+
*/
|
|
174
|
+
const DEFAULT_EMBEDDING_MODEL = "text-embedding-3-large";
|
|
175
|
+
/**
|
|
176
|
+
* Default Gemini embedding model
|
|
177
|
+
*/
|
|
178
|
+
const DEFAULT_GEMINI_EMBEDDING_MODEL = "gemini-embedding-001";
|
|
179
|
+
/**
|
|
180
|
+
* Default chunk size for text splitting (in characters)
|
|
181
|
+
*/
|
|
182
|
+
const DEFAULT_CHUNK_SIZE = 1e3;
|
|
183
|
+
/**
|
|
184
|
+
* Default overlap for text splitting (in characters)
|
|
185
|
+
*/
|
|
186
|
+
const DEFAULT_OVERLAP = 200;
|
|
187
|
+
/**
|
|
188
|
+
* Default overlap between chunks (in characters)
|
|
189
|
+
*/
|
|
190
|
+
const DEFAULT_CHUNK_OVERLAP = 200;
|
|
191
|
+
/**
|
|
192
|
+
* Default alpha value for hybrid search (0 = pure semantic, 1 = pure keyword)
|
|
193
|
+
*/
|
|
194
|
+
const DEFAULT_HYBRID_SEARCH_ALPHA = .5;
|
|
195
|
+
/**
|
|
196
|
+
* Default number of search results to return
|
|
197
|
+
*/
|
|
198
|
+
const DEFAULT_SEARCH_LIMIT = 10;
|
|
199
|
+
/**
|
|
200
|
+
* Default TTL for cache entries (in milliseconds) - 5 minutes
|
|
201
|
+
*/
|
|
202
|
+
const DEFAULT_CACHE_TTL_MS = 300 * 1e3;
|
|
203
|
+
/**
|
|
204
|
+
* Default maximum tokens for RAG responses
|
|
205
|
+
*/
|
|
206
|
+
const DEFAULT_RAG_MAX_TOKENS = 1e3;
|
|
207
|
+
/**
|
|
208
|
+
* Default number of search results to use for RAG context
|
|
209
|
+
*/
|
|
210
|
+
const DEFAULT_RAG_CONTEXT_LIMIT = 5;
|
|
211
|
+
/**
|
|
212
|
+
* Default session TTL (in seconds) - 30 minutes
|
|
213
|
+
*/
|
|
214
|
+
const DEFAULT_SESSION_TTL_SEC = 1800;
|
|
215
|
+
/**
|
|
216
|
+
* Default OpenAI model for RAG chat
|
|
217
|
+
*/
|
|
218
|
+
const DEFAULT_RAG_LLM_MODEL = "gpt-4o-mini";
|
|
219
|
+
/**
|
|
220
|
+
* Minimum required text length for embedding generation
|
|
221
|
+
*/
|
|
222
|
+
const MIN_EMBEDDING_TEXT_LENGTH = 1;
|
|
223
|
+
/**
|
|
224
|
+
* Error codes for structured error handling
|
|
225
|
+
*/
|
|
226
|
+
const ErrorCodes = {
|
|
227
|
+
INVALID_CONFIG: "ERR_1001",
|
|
228
|
+
MISSING_API_KEY: "ERR_1002",
|
|
229
|
+
INVALID_EMBEDDING_CONFIG: "ERR_1003",
|
|
230
|
+
INVALID_RAG_CONFIG: "ERR_1004",
|
|
231
|
+
TYPESENSE_CONNECTION_FAILED: "ERR_2001",
|
|
232
|
+
TYPESENSE_COLLECTION_NOT_FOUND: "ERR_2002",
|
|
233
|
+
TYPESENSE_SEARCH_FAILED: "ERR_2003",
|
|
234
|
+
TYPESENSE_SYNC_FAILED: "ERR_2004",
|
|
235
|
+
TYPESENSE_DELETE_FAILED: "ERR_2005",
|
|
236
|
+
EMBEDDING_GENERATION_FAILED: "ERR_3001",
|
|
237
|
+
INVALID_EMBEDDING_DIMENSIONS: "ERR_3002",
|
|
238
|
+
OPENAI_API_ERROR: "ERR_3003",
|
|
239
|
+
RAG_SEARCH_FAILED: "ERR_4001",
|
|
240
|
+
RAG_SESSION_NOT_FOUND: "ERR_4002",
|
|
241
|
+
RAG_CONVERSATION_FAILED: "ERR_4003",
|
|
242
|
+
RAG_TOKEN_LIMIT_EXCEEDED: "ERR_4004",
|
|
243
|
+
CHUNKING_FAILED: "ERR_5001",
|
|
244
|
+
INVALID_CHUNK_SIZE: "ERR_5002",
|
|
245
|
+
UNKNOWN_ERROR: "ERR_9001",
|
|
246
|
+
VALIDATION_ERROR: "ERR_9002"
|
|
247
|
+
};
|
|
248
|
+
|
|
249
|
+
//#endregion
|
|
250
|
+
//#region src/features/embedding/providers/openai-provider.ts
|
|
251
|
+
var OpenAIEmbeddingProvider = class {
|
|
252
|
+
client;
|
|
253
|
+
model;
|
|
254
|
+
dimensions;
|
|
255
|
+
constructor(config, logger$1) {
|
|
256
|
+
this.logger = logger$1;
|
|
257
|
+
if (!config.apiKey) throw new Error("OpenAI API key is required");
|
|
258
|
+
this.client = new OpenAI({ apiKey: config.apiKey });
|
|
259
|
+
this.model = config.model || DEFAULT_EMBEDDING_MODEL;
|
|
260
|
+
this.dimensions = config.dimensions || DEFAULT_EMBEDDING_DIMENSIONS;
|
|
261
|
+
}
|
|
262
|
+
async generateEmbedding(text) {
|
|
263
|
+
if (!text || text.trim().length < MIN_EMBEDDING_TEXT_LENGTH) return null;
|
|
264
|
+
try {
|
|
265
|
+
const response = await this.client.embeddings.create({
|
|
266
|
+
model: this.model,
|
|
267
|
+
input: text.trim(),
|
|
268
|
+
dimensions: this.dimensions
|
|
269
|
+
});
|
|
270
|
+
const embedding = response.data[0]?.embedding;
|
|
271
|
+
if (!embedding) return null;
|
|
272
|
+
return {
|
|
273
|
+
embedding,
|
|
274
|
+
usage: {
|
|
275
|
+
promptTokens: response.usage?.prompt_tokens || 0,
|
|
276
|
+
totalTokens: response.usage?.total_tokens || 0
|
|
277
|
+
}
|
|
278
|
+
};
|
|
279
|
+
} catch (error) {
|
|
280
|
+
this.logger.error("OpenAI embedding generation failed", error, { model: this.model });
|
|
281
|
+
return null;
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
async generateBatchEmbeddings(texts) {
|
|
285
|
+
const validTexts = texts.filter((t) => t && t.trim().length >= MIN_EMBEDDING_TEXT_LENGTH);
|
|
286
|
+
if (validTexts.length === 0) return null;
|
|
287
|
+
try {
|
|
288
|
+
const response = await this.client.embeddings.create({
|
|
289
|
+
model: this.model,
|
|
290
|
+
input: validTexts.map((t) => t.trim()),
|
|
291
|
+
dimensions: this.dimensions
|
|
292
|
+
});
|
|
293
|
+
return {
|
|
294
|
+
embeddings: response.data.map((d) => d.embedding),
|
|
295
|
+
usage: {
|
|
296
|
+
promptTokens: response.usage?.prompt_tokens || 0,
|
|
297
|
+
totalTokens: response.usage?.total_tokens || 0
|
|
298
|
+
}
|
|
299
|
+
};
|
|
300
|
+
} catch (error) {
|
|
301
|
+
this.logger.error("OpenAI batch embedding generation failed", error, {
|
|
302
|
+
model: this.model,
|
|
303
|
+
count: texts.length
|
|
304
|
+
});
|
|
305
|
+
return null;
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
};
|
|
309
|
+
|
|
310
|
+
//#endregion
|
|
311
|
+
//#region src/features/embedding/providers/gemini-provider.ts
|
|
312
|
+
var GeminiEmbeddingProvider = class {
|
|
313
|
+
client;
|
|
314
|
+
model;
|
|
315
|
+
constructor(config, logger$1) {
|
|
316
|
+
this.logger = logger$1;
|
|
317
|
+
if (!config.apiKey) throw new Error("Gemini API key is required");
|
|
318
|
+
this.client = new GoogleGenerativeAI(config.apiKey);
|
|
319
|
+
this.model = config.model || DEFAULT_GEMINI_EMBEDDING_MODEL;
|
|
320
|
+
}
|
|
321
|
+
async generateEmbedding(text) {
|
|
322
|
+
if (!text || text.trim().length < MIN_EMBEDDING_TEXT_LENGTH) return null;
|
|
323
|
+
try {
|
|
324
|
+
const embedding = (await this.client.getGenerativeModel({ model: this.model }).embedContent({
|
|
325
|
+
content: {
|
|
326
|
+
role: "user",
|
|
327
|
+
parts: [{ text: text.trim() }]
|
|
328
|
+
},
|
|
329
|
+
taskType: TaskType.RETRIEVAL_DOCUMENT
|
|
330
|
+
})).embedding.values;
|
|
331
|
+
const estimatedTokens = Math.ceil(text.length / 4);
|
|
332
|
+
return {
|
|
333
|
+
embedding,
|
|
334
|
+
usage: {
|
|
335
|
+
promptTokens: estimatedTokens,
|
|
336
|
+
totalTokens: estimatedTokens
|
|
337
|
+
}
|
|
338
|
+
};
|
|
339
|
+
} catch (error) {
|
|
340
|
+
this.logger.error("Gemini embedding generation failed", error, { model: this.model });
|
|
341
|
+
return null;
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
async generateBatchEmbeddings(texts) {
|
|
345
|
+
const validTexts = texts.filter((t) => t && t.trim().length >= MIN_EMBEDDING_TEXT_LENGTH);
|
|
346
|
+
if (validTexts.length === 0) return null;
|
|
347
|
+
try {
|
|
348
|
+
const model = this.client.getGenerativeModel({ model: this.model });
|
|
349
|
+
const embeddings = [];
|
|
350
|
+
let totalTokens = 0;
|
|
351
|
+
for (const text of validTexts) {
|
|
352
|
+
const result = await model.embedContent({
|
|
353
|
+
content: {
|
|
354
|
+
role: "user",
|
|
355
|
+
parts: [{ text: text.trim() }]
|
|
356
|
+
},
|
|
357
|
+
taskType: TaskType.RETRIEVAL_DOCUMENT
|
|
358
|
+
});
|
|
359
|
+
embeddings.push(result.embedding.values);
|
|
360
|
+
totalTokens += Math.ceil(text.length / 4);
|
|
361
|
+
}
|
|
362
|
+
return {
|
|
363
|
+
embeddings,
|
|
364
|
+
usage: {
|
|
365
|
+
promptTokens: totalTokens,
|
|
366
|
+
totalTokens
|
|
367
|
+
}
|
|
368
|
+
};
|
|
369
|
+
} catch (error) {
|
|
370
|
+
this.logger.error("Gemini batch embedding generation failed", error, {
|
|
371
|
+
model: this.model,
|
|
372
|
+
count: texts.length
|
|
373
|
+
});
|
|
374
|
+
return null;
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
};
|
|
378
|
+
|
|
379
|
+
//#endregion
|
|
380
|
+
//#region src/features/embedding/embedding-service.ts
|
|
381
|
+
var EmbeddingServiceImpl = class {
|
|
382
|
+
constructor(provider, logger$1, config) {
|
|
383
|
+
this.provider = provider;
|
|
384
|
+
this.logger = logger$1;
|
|
385
|
+
this.config = config;
|
|
386
|
+
}
|
|
387
|
+
async getEmbedding(text) {
|
|
388
|
+
const result = await this.provider.generateEmbedding(text);
|
|
389
|
+
if (!result) return null;
|
|
390
|
+
return result.embedding;
|
|
391
|
+
}
|
|
392
|
+
async getEmbeddingsBatch(texts) {
|
|
393
|
+
const result = await this.provider.generateBatchEmbeddings(texts);
|
|
394
|
+
if (!result) return null;
|
|
395
|
+
return result.embeddings;
|
|
396
|
+
}
|
|
397
|
+
getDimensions() {
|
|
398
|
+
return this.config.dimensions || DEFAULT_EMBEDDING_DIMENSIONS;
|
|
399
|
+
}
|
|
400
|
+
};
|
|
401
|
+
|
|
402
|
+
//#endregion
|
|
403
|
+
//#region src/core/di/setup.ts
|
|
404
|
+
const setupContainer = (config) => {
|
|
405
|
+
const container = new DIContainer();
|
|
406
|
+
container.singleton(TOKENS.CONFIG, config);
|
|
407
|
+
const logger$1 = new Logger({
|
|
408
|
+
enabled: true,
|
|
409
|
+
prefix: "[payload-typesense]"
|
|
410
|
+
});
|
|
411
|
+
container.singleton(TOKENS.LOGGER, logger$1);
|
|
412
|
+
const embeddingConfig = config.features.embedding;
|
|
413
|
+
if (!embeddingConfig) throw new Error("Embedding configuration missing");
|
|
414
|
+
let provider;
|
|
415
|
+
if (embeddingConfig.type === "gemini") provider = new GeminiEmbeddingProvider(embeddingConfig, logger$1);
|
|
416
|
+
else provider = new OpenAIEmbeddingProvider(embeddingConfig, logger$1);
|
|
417
|
+
container.singleton(TOKENS.EMBEDDING_PROVIDER, provider);
|
|
418
|
+
container.register(TOKENS.EMBEDDING_SERVICE, () => new EmbeddingServiceImpl(provider, logger$1, embeddingConfig));
|
|
419
|
+
logger$1.debug("Embedding service registered", { provider: embeddingConfig });
|
|
420
|
+
return container;
|
|
421
|
+
};
|
|
422
|
+
|
|
423
|
+
//#endregion
|
|
424
|
+
//#region src/core/client/typesense-client.ts
|
|
425
|
+
const createTypesenseClient = (typesenseConfig) => {
|
|
426
|
+
return new Typesense.Client({
|
|
427
|
+
apiKey: typesenseConfig.apiKey,
|
|
428
|
+
connectionTimeoutSeconds: typesenseConfig.connectionTimeoutSeconds || 2,
|
|
429
|
+
nodes: typesenseConfig.nodes
|
|
430
|
+
});
|
|
431
|
+
};
|
|
432
|
+
const testTypesenseConnection = async (client) => {
|
|
433
|
+
try {
|
|
434
|
+
await client.health.retrieve();
|
|
435
|
+
return true;
|
|
436
|
+
} catch (_error) {
|
|
437
|
+
return false;
|
|
438
|
+
}
|
|
439
|
+
};
|
|
440
|
+
|
|
441
|
+
//#endregion
|
|
442
|
+
//#region src/features/embedding/embeddings.ts
|
|
443
|
+
let openaiClient = null;
|
|
444
|
+
let currentOpenAIApiKey = null;
|
|
445
|
+
let geminiClient = null;
|
|
446
|
+
let currentGeminiApiKey = null;
|
|
447
|
+
const getOpenAIClient = (apiKey) => {
|
|
448
|
+
const key = apiKey || process.env.OPENAI_API_KEY;
|
|
449
|
+
if (!key) return null;
|
|
450
|
+
if (!openaiClient || currentOpenAIApiKey !== key) {
|
|
451
|
+
openaiClient = new OpenAI({ apiKey: key });
|
|
452
|
+
currentOpenAIApiKey = key;
|
|
453
|
+
}
|
|
454
|
+
return openaiClient;
|
|
455
|
+
};
|
|
456
|
+
const getGeminiClient = (apiKey) => {
|
|
457
|
+
const key = apiKey || process.env.GOOGLE_API_KEY;
|
|
458
|
+
if (!key) return null;
|
|
459
|
+
if (!geminiClient || currentGeminiApiKey !== key) {
|
|
460
|
+
geminiClient = new GoogleGenerativeAI(key);
|
|
461
|
+
currentGeminiApiKey = key;
|
|
462
|
+
}
|
|
463
|
+
return geminiClient;
|
|
464
|
+
};
|
|
465
|
+
/**
|
|
466
|
+
* Generates an embedding for the given text using OpenAI or Gemini API
|
|
467
|
+
* @param text - The text to generate an embedding for
|
|
468
|
+
* @param config - Optional embedding configuration (provider, model, dimensions, apiKey)
|
|
469
|
+
* @returns The embedding vector as an array of numbers, or null if generation fails
|
|
470
|
+
*/
|
|
471
|
+
const generateEmbedding = async (text, config) => {
|
|
472
|
+
if (!text || text.trim().length < MIN_EMBEDDING_TEXT_LENGTH) {
|
|
473
|
+
logger.debug("Skipping embedding generation for empty or invalid text");
|
|
474
|
+
return null;
|
|
475
|
+
}
|
|
476
|
+
if ((config?.type || "openai") === "gemini") return generateGeminiEmbedding(text, config);
|
|
477
|
+
else return generateOpenAIEmbedding(text, config);
|
|
478
|
+
};
|
|
479
|
+
/**
|
|
480
|
+
* Generates an embedding using OpenAI API
|
|
481
|
+
*/
|
|
482
|
+
const generateOpenAIEmbedding = async (text, config) => {
|
|
483
|
+
const client = getOpenAIClient(config?.apiKey);
|
|
484
|
+
if (!client) {
|
|
485
|
+
logger.debug("OpenAI API key not configured, skipping embedding generation");
|
|
486
|
+
return null;
|
|
487
|
+
}
|
|
488
|
+
try {
|
|
489
|
+
const model = config?.model || process.env.OPENAI_EMBEDDING_MODEL || DEFAULT_EMBEDDING_MODEL;
|
|
490
|
+
const dimensions = config?.dimensions || DEFAULT_EMBEDDING_DIMENSIONS;
|
|
491
|
+
logger.debug("Generating OpenAI embedding", {
|
|
492
|
+
model,
|
|
493
|
+
dimensions,
|
|
494
|
+
textLength: text.length
|
|
495
|
+
});
|
|
496
|
+
const embedding = (await client.embeddings.create({
|
|
497
|
+
model,
|
|
498
|
+
input: text.trim(),
|
|
499
|
+
dimensions
|
|
500
|
+
})).data[0]?.embedding;
|
|
501
|
+
logger.debug("OpenAI embedding generated", { embeddingLength: embedding?.length });
|
|
502
|
+
if (!embedding || !Array.isArray(embedding) || embedding.length !== dimensions) {
|
|
503
|
+
logger.warn("Generated embedding has invalid dimensions", {
|
|
504
|
+
expected: dimensions,
|
|
505
|
+
received: embedding?.length
|
|
506
|
+
});
|
|
507
|
+
return null;
|
|
508
|
+
}
|
|
509
|
+
return embedding;
|
|
510
|
+
} catch (error) {
|
|
511
|
+
logger.error("Failed to generate OpenAI embedding", error, {
|
|
512
|
+
textLength: text.length,
|
|
513
|
+
model: config?.model
|
|
514
|
+
});
|
|
515
|
+
return null;
|
|
516
|
+
}
|
|
517
|
+
};
|
|
518
|
+
/**
|
|
519
|
+
* Generates an embedding using Google Gemini API
|
|
520
|
+
*/
|
|
521
|
+
const generateGeminiEmbedding = async (text, config) => {
|
|
522
|
+
const client = getGeminiClient(config?.apiKey);
|
|
523
|
+
if (!client) {
|
|
524
|
+
logger.debug("Google API key not configured, skipping embedding generation");
|
|
525
|
+
return null;
|
|
526
|
+
}
|
|
527
|
+
try {
|
|
528
|
+
const model = config?.model || DEFAULT_GEMINI_EMBEDDING_MODEL;
|
|
529
|
+
const dimensions = config?.dimensions || DEFAULT_EMBEDDING_DIMENSIONS;
|
|
530
|
+
logger.debug("Generating Gemini embedding", {
|
|
531
|
+
model,
|
|
532
|
+
dimensions,
|
|
533
|
+
textLength: text.length
|
|
534
|
+
});
|
|
535
|
+
const embedding = (await client.getGenerativeModel({ model }).embedContent({
|
|
536
|
+
content: {
|
|
537
|
+
role: "user",
|
|
538
|
+
parts: [{ text: text.trim() }]
|
|
539
|
+
},
|
|
540
|
+
taskType: TaskType.RETRIEVAL_DOCUMENT
|
|
541
|
+
})).embedding.values;
|
|
542
|
+
logger.debug("Gemini embedding generated", { embeddingLength: embedding?.length });
|
|
543
|
+
if (!embedding || !Array.isArray(embedding) || embedding.length !== dimensions) {
|
|
544
|
+
logger.warn("Generated embedding has invalid dimensions", {
|
|
545
|
+
expected: dimensions,
|
|
546
|
+
received: embedding?.length
|
|
547
|
+
});
|
|
548
|
+
return null;
|
|
549
|
+
}
|
|
550
|
+
return embedding;
|
|
551
|
+
} catch (error) {
|
|
552
|
+
logger.error("Failed to generate Gemini embedding", error, {
|
|
553
|
+
textLength: text.length,
|
|
554
|
+
model: config?.model
|
|
555
|
+
});
|
|
556
|
+
return null;
|
|
557
|
+
}
|
|
558
|
+
};
|
|
559
|
+
/**
|
|
560
|
+
* Generate embedding with usage tracking
|
|
561
|
+
*
|
|
562
|
+
* This function returns both the embedding and usage information (tokens used)
|
|
563
|
+
*
|
|
564
|
+
* @param text - The text to generate an embedding for
|
|
565
|
+
* @param config - Optional embedding configuration
|
|
566
|
+
* @returns Embedding with usage information, or null if generation fails
|
|
567
|
+
*/
|
|
568
|
+
const generateEmbeddingWithUsage = async (text, config) => {
|
|
569
|
+
if (!text || text.trim().length < MIN_EMBEDDING_TEXT_LENGTH) {
|
|
570
|
+
logger.debug("Skipping embedding generation for empty or invalid text");
|
|
571
|
+
return null;
|
|
572
|
+
}
|
|
573
|
+
if ((config?.type || "openai") === "gemini") return generateGeminiEmbeddingWithUsage(text, config);
|
|
574
|
+
else return generateOpenAIEmbeddingWithUsage(text, config);
|
|
575
|
+
};
|
|
576
|
+
/**
|
|
577
|
+
* Generate OpenAI embedding with usage tracking
|
|
578
|
+
*/
|
|
579
|
+
const generateOpenAIEmbeddingWithUsage = async (text, config) => {
|
|
580
|
+
const client = getOpenAIClient(config?.apiKey);
|
|
581
|
+
if (!client) {
|
|
582
|
+
logger.debug("OpenAI API key not configured, skipping embedding generation");
|
|
583
|
+
return null;
|
|
584
|
+
}
|
|
585
|
+
try {
|
|
586
|
+
const model = config?.model || process.env.OPENAI_EMBEDDING_MODEL || DEFAULT_EMBEDDING_MODEL;
|
|
587
|
+
const dimensions = config?.dimensions || DEFAULT_EMBEDDING_DIMENSIONS;
|
|
588
|
+
logger.debug("Generating OpenAI embedding with usage tracking", {
|
|
589
|
+
model,
|
|
590
|
+
dimensions
|
|
591
|
+
});
|
|
592
|
+
const response = await client.embeddings.create({
|
|
593
|
+
model,
|
|
594
|
+
input: text.trim(),
|
|
595
|
+
dimensions
|
|
596
|
+
});
|
|
597
|
+
const embedding = response.data[0]?.embedding;
|
|
598
|
+
if (!embedding || !Array.isArray(embedding) || embedding.length !== dimensions) {
|
|
599
|
+
logger.warn("Generated embedding has invalid dimensions", {
|
|
600
|
+
expected: dimensions,
|
|
601
|
+
received: embedding?.length
|
|
602
|
+
});
|
|
603
|
+
return null;
|
|
604
|
+
}
|
|
605
|
+
return {
|
|
606
|
+
embedding,
|
|
607
|
+
usage: {
|
|
608
|
+
promptTokens: response.usage?.prompt_tokens || 0,
|
|
609
|
+
totalTokens: response.usage?.total_tokens || 0
|
|
610
|
+
}
|
|
611
|
+
};
|
|
612
|
+
} catch (error) {
|
|
613
|
+
logger.error("Failed to generate OpenAI embedding with usage", error, {
|
|
614
|
+
textLength: text.length,
|
|
615
|
+
model: config?.model
|
|
616
|
+
});
|
|
617
|
+
return null;
|
|
618
|
+
}
|
|
619
|
+
};
|
|
620
|
+
/**
|
|
621
|
+
* Generate Gemini embedding with usage tracking
|
|
622
|
+
* Note: Gemini doesn't provide token usage, so we estimate it
|
|
623
|
+
*/
|
|
624
|
+
const generateGeminiEmbeddingWithUsage = async (text, config) => {
|
|
625
|
+
const embeddingResult = await generateGeminiEmbedding(text, config);
|
|
626
|
+
if (!embeddingResult) return null;
|
|
627
|
+
const estimatedTokens = Math.ceil(text.length / 4);
|
|
628
|
+
return {
|
|
629
|
+
embedding: embeddingResult,
|
|
630
|
+
usage: {
|
|
631
|
+
promptTokens: estimatedTokens,
|
|
632
|
+
totalTokens: estimatedTokens
|
|
633
|
+
}
|
|
634
|
+
};
|
|
635
|
+
};
|
|
636
|
+
/**
|
|
637
|
+
* Generate embeddings for multiple texts with usage tracking (batch)
|
|
638
|
+
*
|
|
639
|
+
* @param texts - Array of texts to generate embeddings for
|
|
640
|
+
* @param config - Optional embedding configuration
|
|
641
|
+
* @returns Embeddings with total usage information, or null if generation fails
|
|
642
|
+
*/
|
|
643
|
+
const generateEmbeddingsBatchWithUsage = async (texts, config) => {
|
|
644
|
+
if (!texts || texts.length === 0) {
|
|
645
|
+
logger.debug("No texts provided for batch embedding generation");
|
|
646
|
+
return null;
|
|
647
|
+
}
|
|
648
|
+
const validTexts = texts.filter((t) => t && t.trim().length >= MIN_EMBEDDING_TEXT_LENGTH);
|
|
649
|
+
if (validTexts.length === 0) {
|
|
650
|
+
logger.debug("No valid texts after filtering for batch embedding generation");
|
|
651
|
+
return null;
|
|
652
|
+
}
|
|
653
|
+
if ((config?.type || "openai") === "gemini") return generateGeminiBatchEmbeddingsWithUsage(validTexts, config);
|
|
654
|
+
else return generateOpenAIBatchEmbeddingsWithUsage(validTexts, config);
|
|
655
|
+
};
|
|
656
|
+
/**
|
|
657
|
+
* Generate OpenAI batch embeddings with usage tracking
|
|
658
|
+
*/
|
|
659
|
+
const generateOpenAIBatchEmbeddingsWithUsage = async (validTexts, config) => {
|
|
660
|
+
const client = getOpenAIClient(config?.apiKey);
|
|
661
|
+
if (!client) {
|
|
662
|
+
logger.debug("OpenAI API key not configured, skipping batch embedding generation");
|
|
663
|
+
return null;
|
|
664
|
+
}
|
|
665
|
+
try {
|
|
666
|
+
const model = config?.model || process.env.OPENAI_EMBEDDING_MODEL || DEFAULT_EMBEDDING_MODEL;
|
|
667
|
+
const dimensions = config?.dimensions || DEFAULT_EMBEDDING_DIMENSIONS;
|
|
668
|
+
logger.debug("Generating OpenAI batch embeddings with usage tracking", {
|
|
669
|
+
model,
|
|
670
|
+
dimensions,
|
|
671
|
+
batchSize: validTexts.length
|
|
672
|
+
});
|
|
673
|
+
const response = await client.embeddings.create({
|
|
674
|
+
model,
|
|
675
|
+
input: validTexts.map((t) => t.trim()),
|
|
676
|
+
dimensions
|
|
677
|
+
});
|
|
678
|
+
const embeddings = response.data.map((item) => item.embedding);
|
|
679
|
+
if (!embeddings.every((emb) => Array.isArray(emb) && emb.length === dimensions)) {
|
|
680
|
+
logger.warn("Some generated embeddings have invalid dimensions", {
|
|
681
|
+
expected: dimensions,
|
|
682
|
+
batchSize: embeddings.length
|
|
683
|
+
});
|
|
684
|
+
return null;
|
|
685
|
+
}
|
|
686
|
+
logger.info("OpenAI batch embeddings generated successfully", {
|
|
687
|
+
count: embeddings.length,
|
|
688
|
+
totalTokens: response.usage?.total_tokens || 0
|
|
689
|
+
});
|
|
690
|
+
return {
|
|
691
|
+
embeddings,
|
|
692
|
+
usage: {
|
|
693
|
+
promptTokens: response.usage?.prompt_tokens || 0,
|
|
694
|
+
totalTokens: response.usage?.total_tokens || 0
|
|
695
|
+
}
|
|
696
|
+
};
|
|
697
|
+
} catch (error) {
|
|
698
|
+
logger.error("Failed to generate OpenAI batch embeddings with usage", error, {
|
|
699
|
+
batchSize: validTexts.length,
|
|
700
|
+
model: config?.model
|
|
701
|
+
});
|
|
702
|
+
return null;
|
|
703
|
+
}
|
|
704
|
+
};
|
|
705
|
+
/**
|
|
706
|
+
* Generate Gemini batch embeddings with usage tracking
|
|
707
|
+
* Note: Gemini API handles one text at a time, so we batch them sequentially
|
|
708
|
+
*/
|
|
709
|
+
const generateGeminiBatchEmbeddingsWithUsage = async (validTexts, config) => {
|
|
710
|
+
const client = getGeminiClient(config?.apiKey);
|
|
711
|
+
if (!client) {
|
|
712
|
+
logger.debug("Google API key not configured, skipping batch embedding generation");
|
|
713
|
+
return null;
|
|
714
|
+
}
|
|
715
|
+
try {
|
|
716
|
+
const model = config?.model || DEFAULT_GEMINI_EMBEDDING_MODEL;
|
|
717
|
+
const dimensions = config?.dimensions || DEFAULT_EMBEDDING_DIMENSIONS;
|
|
718
|
+
logger.debug("Generating Gemini batch embeddings with usage tracking", {
|
|
719
|
+
model,
|
|
720
|
+
dimensions,
|
|
721
|
+
batchSize: validTexts.length
|
|
722
|
+
});
|
|
723
|
+
const embeddingModel = client.getGenerativeModel({ model });
|
|
724
|
+
const embeddings = [];
|
|
725
|
+
let totalEstimatedTokens = 0;
|
|
726
|
+
for (const text of validTexts) {
|
|
727
|
+
const result = await embeddingModel.embedContent({
|
|
728
|
+
content: {
|
|
729
|
+
role: "user",
|
|
730
|
+
parts: [{ text: text.trim() }]
|
|
731
|
+
},
|
|
732
|
+
taskType: TaskType.RETRIEVAL_DOCUMENT
|
|
733
|
+
});
|
|
734
|
+
embeddings.push(result.embedding.values);
|
|
735
|
+
totalEstimatedTokens += Math.ceil(text.length / 4);
|
|
736
|
+
}
|
|
737
|
+
if (!embeddings.every((emb) => Array.isArray(emb) && emb.length === dimensions)) {
|
|
738
|
+
logger.warn("Some generated embeddings have invalid dimensions", {
|
|
739
|
+
expected: dimensions,
|
|
740
|
+
batchSize: embeddings.length
|
|
741
|
+
});
|
|
742
|
+
return null;
|
|
743
|
+
}
|
|
744
|
+
logger.info("Gemini batch embeddings generated successfully", {
|
|
745
|
+
count: embeddings.length,
|
|
746
|
+
estimatedTokens: totalEstimatedTokens
|
|
747
|
+
});
|
|
748
|
+
return {
|
|
749
|
+
embeddings,
|
|
750
|
+
usage: {
|
|
751
|
+
promptTokens: totalEstimatedTokens,
|
|
752
|
+
totalTokens: totalEstimatedTokens
|
|
753
|
+
}
|
|
754
|
+
};
|
|
755
|
+
} catch (error) {
|
|
756
|
+
logger.error("Failed to generate Gemini batch embeddings with usage", error, {
|
|
757
|
+
batchSize: validTexts.length,
|
|
758
|
+
model: config?.model
|
|
759
|
+
});
|
|
760
|
+
return null;
|
|
761
|
+
}
|
|
762
|
+
};
|
|
763
|
+
|
|
764
|
+
//#endregion
|
|
765
|
+
//#region src/features/rag/query-builder.ts
|
|
766
|
+
/**
|
|
767
|
+
* Build the Typesense conversational search URL with all necessary parameters
|
|
768
|
+
*
|
|
769
|
+
* @param config - Query configuration
|
|
770
|
+
* @param config.userMessage - The user's message/query
|
|
771
|
+
* @param config.chatId - Optional conversation ID for follow-up questions
|
|
772
|
+
* @param conversationModelId - The conversation model ID in Typesense
|
|
773
|
+
* @param typesenseConfig - Typesense connection config
|
|
774
|
+
* @returns URL for the Typesense multi_search endpoint with conversation parameters
|
|
775
|
+
*/
|
|
776
|
+
function buildConversationalUrl(config, conversationModelId, typesenseConfig) {
|
|
777
|
+
const protocol = typesenseConfig.nodes[0].protocol || "http";
|
|
778
|
+
const typesenseUrl = new URL(`${protocol}://${typesenseConfig.nodes[0].host}:${typesenseConfig.nodes[0].port}/multi_search`);
|
|
779
|
+
typesenseUrl.searchParams.set("q", config.userMessage);
|
|
780
|
+
typesenseUrl.searchParams.set("conversation", "true");
|
|
781
|
+
typesenseUrl.searchParams.set("conversation_model_id", conversationModelId);
|
|
782
|
+
if (config.chatId) typesenseUrl.searchParams.set("conversation_id", config.chatId);
|
|
783
|
+
typesenseUrl.searchParams.set("conversation_stream", "true");
|
|
784
|
+
return typesenseUrl;
|
|
785
|
+
}
|
|
786
|
+
/**
|
|
787
|
+
* Build multi-search requests for Typesense with hybrid search configuration
|
|
788
|
+
*
|
|
789
|
+
* @param config - Query configuration including embedding, collections, and filters
|
|
790
|
+
* @returns Array of search requests for Typesense multi_search
|
|
791
|
+
*/
|
|
792
|
+
function buildMultiSearchRequests(config) {
|
|
793
|
+
const { searchCollections, queryEmbedding, selectedDocuments, kResults = 10, advancedConfig = {} } = config;
|
|
794
|
+
return searchCollections.map((collection) => {
|
|
795
|
+
const request = {
|
|
796
|
+
collection,
|
|
797
|
+
query_by: "chunk_text,title,headers",
|
|
798
|
+
vector_query: `embedding:([${queryEmbedding.join(",")}], k:${kResults})`,
|
|
799
|
+
exclude_fields: "embedding",
|
|
800
|
+
...buildAdvancedSearchParams(advancedConfig)
|
|
801
|
+
};
|
|
802
|
+
if (selectedDocuments && selectedDocuments.length > 0) request.filter_by = `parent_doc_id:[${selectedDocuments.map((id) => `"${id}"`).join(",")}]`;
|
|
803
|
+
return request;
|
|
804
|
+
});
|
|
805
|
+
}
|
|
806
|
+
/**
|
|
807
|
+
* Build advanced search parameters from config
|
|
808
|
+
*
|
|
809
|
+
* @param config - Advanced search configuration
|
|
810
|
+
* @returns Object with advanced search parameters
|
|
811
|
+
*/
|
|
812
|
+
function buildAdvancedSearchParams(config) {
|
|
813
|
+
const params = {};
|
|
814
|
+
if (config.typoTokensThreshold !== void 0) params.typo_tokens_threshold = config.typoTokensThreshold;
|
|
815
|
+
if (config.numTypos !== void 0) params.num_typos = config.numTypos;
|
|
816
|
+
if (config.prefix !== void 0) params.prefix = config.prefix;
|
|
817
|
+
if (config.dropTokensThreshold !== void 0) params.drop_tokens_threshold = config.dropTokensThreshold;
|
|
818
|
+
if (config.enableStemming !== void 0) params.enable_stemming = config.enableStemming;
|
|
819
|
+
return params;
|
|
820
|
+
}
|
|
821
|
+
/**
|
|
822
|
+
* Build the complete Typesense request body for multi-search
|
|
823
|
+
*
|
|
824
|
+
* @param config - Query configuration
|
|
825
|
+
* @returns Request body for Typesense multi_search endpoint
|
|
826
|
+
*/
|
|
827
|
+
function buildMultiSearchRequestBody(config) {
|
|
828
|
+
return { searches: buildMultiSearchRequests(config) };
|
|
829
|
+
}
|
|
830
|
+
/**
|
|
831
|
+
* Build hybrid search parameters for combining semantic and keyword search
|
|
832
|
+
*
|
|
833
|
+
* @param alpha - Weight between semantic (1.0) and keyword (0.0) search
|
|
834
|
+
* @param rerankMatches - Whether to rerank hybrid search results
|
|
835
|
+
* @param queryFields - Fields to use for keyword search
|
|
836
|
+
* @returns Object with hybrid search parameters
|
|
837
|
+
*/
|
|
838
|
+
function buildHybridSearchParams(alpha = .9, rerankMatches = true, queryFields = "chunk_text,title") {
|
|
839
|
+
return {
|
|
840
|
+
alpha,
|
|
841
|
+
rerank_hybrid_matches: rerankMatches,
|
|
842
|
+
query_fields: queryFields
|
|
843
|
+
};
|
|
844
|
+
}
|
|
845
|
+
|
|
846
|
+
//#endregion
|
|
847
|
+
//#region src/features/rag/stream-handler.ts
|
|
848
|
+
/**
|
|
849
|
+
* Parse a single SSE event from Typesense conversation stream
|
|
850
|
+
*
|
|
851
|
+
* @param line - Raw SSE event line
|
|
852
|
+
* @returns Parsed conversation event or null if not parseable
|
|
853
|
+
*/
|
|
854
|
+
function parseConversationEvent(line) {
|
|
855
|
+
if (!line.startsWith("data: ")) return null;
|
|
856
|
+
const data = line.slice(6);
|
|
857
|
+
if (data === "[DONE]") return { raw: "[DONE]" };
|
|
858
|
+
try {
|
|
859
|
+
const parsed = JSON.parse(data);
|
|
860
|
+
const event = { raw: parsed };
|
|
861
|
+
if (parsed.conversation_id) event.conversationId = parsed.conversation_id;
|
|
862
|
+
else if (parsed.conversation?.conversation_id) event.conversationId = parsed.conversation.conversation_id;
|
|
863
|
+
if (parsed.message !== void 0) event.message = parsed.message;
|
|
864
|
+
else if (parsed.conversation?.answer) event.message = parsed.conversation.answer;
|
|
865
|
+
if (parsed.results) event.results = parsed.results;
|
|
866
|
+
return event;
|
|
867
|
+
} catch (e) {
|
|
868
|
+
logger.error("Error parsing SSE data from conversation stream", e);
|
|
869
|
+
return null;
|
|
870
|
+
}
|
|
871
|
+
}
|
|
872
|
+
/**
|
|
873
|
+
* Extract sources from Typesense search results
|
|
874
|
+
*
|
|
875
|
+
* @param results - Typesense multi-search results array
|
|
876
|
+
* @param documentTypeResolver - Optional function to resolve document type from collection name
|
|
877
|
+
* @returns Array of chunk sources with metadata
|
|
878
|
+
*/
|
|
879
|
+
function extractSourcesFromResults(results, documentTypeResolver) {
|
|
880
|
+
const allSources = [];
|
|
881
|
+
for (const result of results) if (result.hits) for (const hit of result.hits) {
|
|
882
|
+
const doc = hit.document;
|
|
883
|
+
const score = hit.vector_distance || hit.text_match || 0;
|
|
884
|
+
const collectionName = result.request_params?.collection_name || "";
|
|
885
|
+
const type = documentTypeResolver ? documentTypeResolver(collectionName) : getDefaultDocumentType(collectionName);
|
|
886
|
+
const fullContent = doc.chunk_text || "";
|
|
887
|
+
const source = {
|
|
888
|
+
id: doc.id || "",
|
|
889
|
+
title: doc.title || "Sin título",
|
|
890
|
+
slug: doc.slug || "",
|
|
891
|
+
type,
|
|
892
|
+
chunkIndex: doc.chunk_index ?? 0,
|
|
893
|
+
relevanceScore: score,
|
|
894
|
+
content: "",
|
|
895
|
+
excerpt: fullContent.substring(0, 200) + (fullContent.length > 200 ? "..." : "")
|
|
896
|
+
};
|
|
897
|
+
allSources.push(source);
|
|
898
|
+
}
|
|
899
|
+
return allSources;
|
|
900
|
+
}
|
|
901
|
+
/**
|
|
902
|
+
* Build context text from results (useful for token estimation)
|
|
903
|
+
*
|
|
904
|
+
* @param results - Typesense multi-search results array
|
|
905
|
+
* @returns Combined context text from all chunks
|
|
906
|
+
*/
|
|
907
|
+
function buildContextText(results) {
|
|
908
|
+
let contextText = "";
|
|
909
|
+
for (const result of results) if (result.hits) for (const hit of result.hits) {
|
|
910
|
+
const doc = hit.document;
|
|
911
|
+
contextText += (doc.chunk_text || "") + "\n";
|
|
912
|
+
}
|
|
913
|
+
return contextText;
|
|
914
|
+
}
|
|
915
|
+
/**
|
|
916
|
+
* Process a Typesense conversation stream
|
|
917
|
+
*
|
|
918
|
+
* @param response - Fetch Response with SSE stream
|
|
919
|
+
* @param onEvent - Callback for each parsed event
|
|
920
|
+
* @param documentTypeResolver - Optional function to resolve document type
|
|
921
|
+
* @returns Processing result with full message, ID, and sources
|
|
922
|
+
*/
|
|
923
|
+
async function processConversationStream(response, onEvent, documentTypeResolver) {
|
|
924
|
+
const reader = response.body.getReader();
|
|
925
|
+
const decoder = new TextDecoder();
|
|
926
|
+
let buffer = "";
|
|
927
|
+
let sources = [];
|
|
928
|
+
let hasCollectedSources = false;
|
|
929
|
+
let conversationId = null;
|
|
930
|
+
let contextText = "";
|
|
931
|
+
let fullMessage = "";
|
|
932
|
+
while (true) {
|
|
933
|
+
const { done, value } = await reader.read();
|
|
934
|
+
if (done) break;
|
|
935
|
+
buffer += decoder.decode(value, { stream: true });
|
|
936
|
+
const lines = buffer.split("\n");
|
|
937
|
+
buffer = lines.pop() || "";
|
|
938
|
+
for (const line of lines) {
|
|
939
|
+
const event = parseConversationEvent(line);
|
|
940
|
+
if (!event) continue;
|
|
941
|
+
if (onEvent) onEvent(event);
|
|
942
|
+
if (!conversationId && event.conversationId) conversationId = event.conversationId;
|
|
943
|
+
if (!hasCollectedSources && event.results) {
|
|
944
|
+
sources = extractSourcesFromResults(event.results, documentTypeResolver);
|
|
945
|
+
contextText = buildContextText(event.results);
|
|
946
|
+
hasCollectedSources = true;
|
|
947
|
+
}
|
|
948
|
+
if (event.message) fullMessage += event.message;
|
|
949
|
+
}
|
|
950
|
+
}
|
|
951
|
+
return {
|
|
952
|
+
fullMessage,
|
|
953
|
+
conversationId,
|
|
954
|
+
sources,
|
|
955
|
+
contextText
|
|
956
|
+
};
|
|
957
|
+
}
|
|
958
|
+
/**
|
|
959
|
+
* Create a ReadableStream that forwards SSE events
|
|
960
|
+
*
|
|
961
|
+
* @param response - Fetch Response with SSE stream
|
|
962
|
+
* @param onData - Callback for processing each event before forwarding
|
|
963
|
+
* @returns ReadableStream for SSE events
|
|
964
|
+
*/
|
|
965
|
+
function createSSEForwardStream(response, onData) {
|
|
966
|
+
const reader = response.body.getReader();
|
|
967
|
+
const decoder = new TextDecoder();
|
|
968
|
+
const encoder = new TextEncoder();
|
|
969
|
+
let buffer = "";
|
|
970
|
+
return new ReadableStream({
|
|
971
|
+
async start(controller) {
|
|
972
|
+
while (true) {
|
|
973
|
+
const { done, value } = await reader.read();
|
|
974
|
+
if (done) {
|
|
975
|
+
controller.close();
|
|
976
|
+
break;
|
|
977
|
+
}
|
|
978
|
+
buffer += decoder.decode(value, { stream: true });
|
|
979
|
+
const lines = buffer.split("\n");
|
|
980
|
+
buffer = lines.pop() || "";
|
|
981
|
+
for (const line of lines) {
|
|
982
|
+
const event = parseConversationEvent(line);
|
|
983
|
+
if (event && onData) onData(event);
|
|
984
|
+
if (line) controller.enqueue(encoder.encode(line + "\n"));
|
|
985
|
+
}
|
|
986
|
+
}
|
|
987
|
+
},
|
|
988
|
+
cancel() {
|
|
989
|
+
reader.cancel();
|
|
990
|
+
}
|
|
991
|
+
});
|
|
992
|
+
}
|
|
993
|
+
/**
|
|
994
|
+
* Default document type resolver based on collection name
|
|
995
|
+
*
|
|
996
|
+
* @param collectionName - Name of the Typesense collection
|
|
997
|
+
* @returns Document type string
|
|
998
|
+
*/
|
|
999
|
+
function getDefaultDocumentType(collectionName) {
|
|
1000
|
+
if (collectionName.includes("article")) return "article";
|
|
1001
|
+
if (collectionName.includes("book")) return "book";
|
|
1002
|
+
if (collectionName.includes("post")) return "post";
|
|
1003
|
+
if (collectionName.includes("page")) return "page";
|
|
1004
|
+
return "document";
|
|
1005
|
+
}
|
|
1006
|
+
|
|
1007
|
+
//#endregion
|
|
1008
|
+
//#region src/features/rag/setup.ts
|
|
1009
|
+
/**
|
|
1010
|
+
* Ensure conversation history collection exists
|
|
1011
|
+
*
|
|
1012
|
+
* @param client - Typesense client
|
|
1013
|
+
* @param collectionName - Name of the conversation history collection
|
|
1014
|
+
* @returns true if collection exists or was created successfully
|
|
1015
|
+
*/
|
|
1016
|
+
async function ensureConversationCollection(client, collectionName = "conversation_history") {
|
|
1017
|
+
try {
|
|
1018
|
+
await client.collections(collectionName).retrieve();
|
|
1019
|
+
logger.info("Conversation collection already exists", { collection: collectionName });
|
|
1020
|
+
return true;
|
|
1021
|
+
} catch (error) {
|
|
1022
|
+
if (error?.httpStatus === 404) {
|
|
1023
|
+
logger.info("Creating conversation collection", { collection: collectionName });
|
|
1024
|
+
try {
|
|
1025
|
+
await client.collections().create({
|
|
1026
|
+
name: collectionName,
|
|
1027
|
+
fields: [
|
|
1028
|
+
{
|
|
1029
|
+
name: "conversation_id",
|
|
1030
|
+
type: "string"
|
|
1031
|
+
},
|
|
1032
|
+
{
|
|
1033
|
+
name: "model_id",
|
|
1034
|
+
type: "string"
|
|
1035
|
+
},
|
|
1036
|
+
{
|
|
1037
|
+
name: "timestamp",
|
|
1038
|
+
type: "int32"
|
|
1039
|
+
},
|
|
1040
|
+
{
|
|
1041
|
+
name: "role",
|
|
1042
|
+
type: "string"
|
|
1043
|
+
},
|
|
1044
|
+
{
|
|
1045
|
+
name: "message",
|
|
1046
|
+
type: "string"
|
|
1047
|
+
}
|
|
1048
|
+
]
|
|
1049
|
+
});
|
|
1050
|
+
logger.info("Conversation collection created successfully", { collection: collectionName });
|
|
1051
|
+
return true;
|
|
1052
|
+
} catch (createError) {
|
|
1053
|
+
logger.error("Failed to create conversation collection", createError, { collection: collectionName });
|
|
1054
|
+
return false;
|
|
1055
|
+
}
|
|
1056
|
+
}
|
|
1057
|
+
logger.error("Error checking conversation collection", error, { collection: collectionName });
|
|
1058
|
+
return false;
|
|
1059
|
+
}
|
|
1060
|
+
}
|
|
1061
|
+
/**
|
|
1062
|
+
* Get default RAG configuration values
|
|
1063
|
+
*
|
|
1064
|
+
* @returns Default RAG configuration
|
|
1065
|
+
*/
|
|
1066
|
+
function getDefaultRAGConfig() {
|
|
1067
|
+
return {
|
|
1068
|
+
hybrid: {
|
|
1069
|
+
alpha: .9,
|
|
1070
|
+
rerankMatches: true,
|
|
1071
|
+
queryFields: "chunk_text,title"
|
|
1072
|
+
},
|
|
1073
|
+
hnsw: {
|
|
1074
|
+
efConstruction: 200,
|
|
1075
|
+
M: 16,
|
|
1076
|
+
ef: 100,
|
|
1077
|
+
maxConnections: 64,
|
|
1078
|
+
distanceMetric: "cosine"
|
|
1079
|
+
},
|
|
1080
|
+
advanced: {
|
|
1081
|
+
typoTokensThreshold: 1,
|
|
1082
|
+
numTypos: 2,
|
|
1083
|
+
prefix: true,
|
|
1084
|
+
dropTokensThreshold: 1,
|
|
1085
|
+
enableStemming: true
|
|
1086
|
+
}
|
|
1087
|
+
};
|
|
1088
|
+
}
|
|
1089
|
+
/**
|
|
1090
|
+
* Merge user RAG config with defaults
|
|
1091
|
+
*
|
|
1092
|
+
* @param userConfig - User-provided RAG configuration
|
|
1093
|
+
* @returns Merged configuration with defaults
|
|
1094
|
+
*/
|
|
1095
|
+
function mergeRAGConfigWithDefaults(userConfig) {
|
|
1096
|
+
const defaults = getDefaultRAGConfig();
|
|
1097
|
+
if (!userConfig) return defaults;
|
|
1098
|
+
return {
|
|
1099
|
+
hybrid: {
|
|
1100
|
+
...defaults.hybrid,
|
|
1101
|
+
...userConfig.hybrid
|
|
1102
|
+
},
|
|
1103
|
+
hnsw: {
|
|
1104
|
+
...defaults.hnsw,
|
|
1105
|
+
...userConfig.hnsw
|
|
1106
|
+
},
|
|
1107
|
+
advanced: {
|
|
1108
|
+
...defaults.advanced,
|
|
1109
|
+
...userConfig.advanced
|
|
1110
|
+
}
|
|
1111
|
+
};
|
|
1112
|
+
}
|
|
1113
|
+
|
|
1114
|
+
//#endregion
|
|
1115
|
+
//#region src/features/rag/handlers/rag-search-handler.ts
|
|
1116
|
+
/**
|
|
1117
|
+
* Execute a RAG conversational search
|
|
1118
|
+
*
|
|
1119
|
+
* This function handles the complete flow of executing a RAG search against Typesense:
|
|
1120
|
+
* 1. Builds the conversational URL
|
|
1121
|
+
* 2. Builds the multi-search request body
|
|
1122
|
+
* 3. Executes the request
|
|
1123
|
+
* 4. Returns the response with metadata
|
|
1124
|
+
*
|
|
1125
|
+
* @param typesenseConfig - Typesense connection configuration
|
|
1126
|
+
* @param searchConfig - RAG search configuration
|
|
1127
|
+
* @param request - Chat request parameters
|
|
1128
|
+
* @returns Promise with search results
|
|
1129
|
+
*/
|
|
1130
|
+
async function executeRAGSearch(typesenseConfig, searchConfig, request) {
|
|
1131
|
+
const typesenseUrl = buildConversationalUrl(request, searchConfig.modelId, typesenseConfig);
|
|
1132
|
+
const requestBody = buildMultiSearchRequestBody({
|
|
1133
|
+
userMessage: request.userMessage,
|
|
1134
|
+
queryEmbedding: request.queryEmbedding,
|
|
1135
|
+
selectedDocuments: request.selectedDocuments,
|
|
1136
|
+
chatId: request.chatId,
|
|
1137
|
+
searchCollections: searchConfig.searchCollections,
|
|
1138
|
+
kResults: searchConfig.kResults || 10,
|
|
1139
|
+
advancedConfig: searchConfig.advancedConfig
|
|
1140
|
+
});
|
|
1141
|
+
const response = await fetch(typesenseUrl.toString(), {
|
|
1142
|
+
method: "POST",
|
|
1143
|
+
headers: {
|
|
1144
|
+
"Content-Type": "application/json",
|
|
1145
|
+
"X-TYPESENSE-API-KEY": typesenseConfig.apiKey
|
|
1146
|
+
},
|
|
1147
|
+
body: JSON.stringify(requestBody)
|
|
1148
|
+
});
|
|
1149
|
+
if (!response.ok) {
|
|
1150
|
+
const errorText = await response.text();
|
|
1151
|
+
throw new Error(`Typesense search failed: ${errorText}`);
|
|
1152
|
+
}
|
|
1153
|
+
return {
|
|
1154
|
+
response,
|
|
1155
|
+
isStreaming: response.headers.get("content-type")?.includes("text/event-stream") || false,
|
|
1156
|
+
sources: []
|
|
1157
|
+
};
|
|
1158
|
+
}
|
|
1159
|
+
|
|
1160
|
+
//#endregion
|
|
1161
|
+
//#region src/features/rag/handlers/chunk-fetch-handler.ts
|
|
1162
|
+
/**
|
|
1163
|
+
* Fetch a chunk document by ID from Typesense
|
|
1164
|
+
*
|
|
1165
|
+
* @param client - Typesense client instance
|
|
1166
|
+
* @param config - Chunk fetch configuration
|
|
1167
|
+
* @returns Promise with chunk data
|
|
1168
|
+
* @throws Error if chunk not found or collection is invalid
|
|
1169
|
+
*/
|
|
1170
|
+
async function fetchChunkById(client, config) {
|
|
1171
|
+
const { chunkId, collectionName, validCollections } = config;
|
|
1172
|
+
if (validCollections && !validCollections.includes(collectionName)) throw new Error(`Invalid collection: ${collectionName}. Must be one of: ${validCollections.join(", ")}`);
|
|
1173
|
+
try {
|
|
1174
|
+
const document = await client.collections(collectionName).documents(chunkId).retrieve();
|
|
1175
|
+
const chunkText$1 = document.chunk_text || "";
|
|
1176
|
+
if (!chunkText$1) throw new Error("Chunk contains no text");
|
|
1177
|
+
return {
|
|
1178
|
+
id: document.id,
|
|
1179
|
+
chunk_text: chunkText$1,
|
|
1180
|
+
title: document.title,
|
|
1181
|
+
slug: document.slug,
|
|
1182
|
+
chunk_index: document.chunk_index,
|
|
1183
|
+
collection: collectionName
|
|
1184
|
+
};
|
|
1185
|
+
} catch (error) {
|
|
1186
|
+
if (error && typeof error === "object" && "httpStatus" in error && error.httpStatus === 404) throw new Error(`Chunk not found: ${chunkId}`);
|
|
1187
|
+
throw error;
|
|
1188
|
+
}
|
|
1189
|
+
}
|
|
1190
|
+
|
|
1191
|
+
//#endregion
|
|
1192
|
+
//#region src/features/rag/handlers/session-handlers.ts
|
|
1193
|
+
/**
|
|
1194
|
+
* Get active chat session for a user
|
|
1195
|
+
*
|
|
1196
|
+
* @param payload - Payload CMS instance
|
|
1197
|
+
* @param userId - User ID
|
|
1198
|
+
* @param config - Session configuration
|
|
1199
|
+
* @returns Promise with session data or null
|
|
1200
|
+
*/
|
|
1201
|
+
async function getActiveSession(payload, userId, config = {}) {
|
|
1202
|
+
const collectionName = config.collectionName || "chat-sessions";
|
|
1203
|
+
const windowMs = config.activeSessionWindow || 1440 * 60 * 1e3;
|
|
1204
|
+
const cutoffTime = new Date(Date.now() - windowMs);
|
|
1205
|
+
const chatSessions = await payload.find({
|
|
1206
|
+
collection: collectionName,
|
|
1207
|
+
where: { and: [
|
|
1208
|
+
{ user: { equals: userId } },
|
|
1209
|
+
{ status: { equals: "active" } },
|
|
1210
|
+
{ last_activity: { greater_than: cutoffTime.toISOString() } }
|
|
1211
|
+
] },
|
|
1212
|
+
sort: "-last_activity",
|
|
1213
|
+
limit: 1
|
|
1214
|
+
});
|
|
1215
|
+
if (!chatSessions.docs.length) return null;
|
|
1216
|
+
return chatSessions.docs[0];
|
|
1217
|
+
}
|
|
1218
|
+
/**
|
|
1219
|
+
* Get session by conversation ID
|
|
1220
|
+
*
|
|
1221
|
+
* @param payload - Payload CMS instance
|
|
1222
|
+
* @param userId - User ID
|
|
1223
|
+
* @param conversationId - Conversation ID
|
|
1224
|
+
* @param config - Session configuration
|
|
1225
|
+
* @returns Promise with session data or null
|
|
1226
|
+
*/
|
|
1227
|
+
async function getSessionByConversationId(payload, userId, conversationId, config = {}) {
|
|
1228
|
+
const collectionName = config.collectionName || "chat-sessions";
|
|
1229
|
+
const chatSessions = await payload.find({
|
|
1230
|
+
collection: collectionName,
|
|
1231
|
+
where: { and: [{ conversation_id: { equals: conversationId } }, { user: { equals: userId } }] },
|
|
1232
|
+
limit: 1
|
|
1233
|
+
});
|
|
1234
|
+
if (!chatSessions.docs.length) return null;
|
|
1235
|
+
return chatSessions.docs[0];
|
|
1236
|
+
}
|
|
1237
|
+
/**
|
|
1238
|
+
* Close a chat session
|
|
1239
|
+
*
|
|
1240
|
+
* @param payload - Payload CMS instance
|
|
1241
|
+
* @param userId - User ID
|
|
1242
|
+
* @param conversationId - Conversation ID
|
|
1243
|
+
* @param config - Session configuration
|
|
1244
|
+
* @returns Promise with updated session data or null if not found
|
|
1245
|
+
*/
|
|
1246
|
+
async function closeSession(payload, userId, conversationId, config = {}) {
|
|
1247
|
+
const collectionName = config.collectionName || "chat-sessions";
|
|
1248
|
+
const chatSessions = await payload.find({
|
|
1249
|
+
collection: collectionName,
|
|
1250
|
+
where: { and: [{ conversation_id: { equals: conversationId } }, { user: { equals: userId } }] },
|
|
1251
|
+
limit: 1
|
|
1252
|
+
});
|
|
1253
|
+
if (!chatSessions.docs.length) return null;
|
|
1254
|
+
const session = chatSessions.docs[0];
|
|
1255
|
+
if (!session) return null;
|
|
1256
|
+
await payload.update({
|
|
1257
|
+
collection: collectionName,
|
|
1258
|
+
where: { conversation_id: { equals: conversationId } },
|
|
1259
|
+
data: {
|
|
1260
|
+
status: "closed",
|
|
1261
|
+
closed_at: (/* @__PURE__ */ new Date()).toISOString()
|
|
1262
|
+
}
|
|
1263
|
+
});
|
|
1264
|
+
return {
|
|
1265
|
+
conversation_id: session.conversation_id,
|
|
1266
|
+
messages: session.messages || [],
|
|
1267
|
+
status: "closed",
|
|
1268
|
+
total_tokens: session.total_tokens,
|
|
1269
|
+
total_cost: session.total_cost,
|
|
1270
|
+
last_activity: session.last_activity
|
|
1271
|
+
};
|
|
1272
|
+
}
|
|
1273
|
+
|
|
1274
|
+
//#endregion
|
|
1275
|
+
//#region src/features/rag/utils/sse-utils.ts
|
|
1276
|
+
/**
|
|
1277
|
+
* Helper to create an SSE event string
|
|
1278
|
+
*
|
|
1279
|
+
* @param event - SSE event object
|
|
1280
|
+
* @returns Formatted SSE event string
|
|
1281
|
+
*/
|
|
1282
|
+
function formatSSEEvent(event) {
|
|
1283
|
+
return `data: ${JSON.stringify(event)}\n\n`;
|
|
1284
|
+
}
|
|
1285
|
+
/**
|
|
1286
|
+
* Helper to send an SSE event through a controller
|
|
1287
|
+
*
|
|
1288
|
+
* @param controller - ReadableStreamDefaultController
|
|
1289
|
+
* @param encoder - TextEncoder instance
|
|
1290
|
+
* @param event - SSE event to send
|
|
1291
|
+
*/
|
|
1292
|
+
function sendSSEEvent(controller, encoder, event) {
|
|
1293
|
+
const data = formatSSEEvent(event);
|
|
1294
|
+
controller.enqueue(encoder.encode(data));
|
|
1295
|
+
}
|
|
1296
|
+
|
|
1297
|
+
//#endregion
|
|
1298
|
+
//#region src/features/rag/chat-session-repository.ts
|
|
1299
|
+
/**
|
|
1300
|
+
* Save or update chat session in PayloadCMS
|
|
1301
|
+
*
|
|
1302
|
+
* @param payload - Payload CMS instance
|
|
1303
|
+
* @param userId - User ID
|
|
1304
|
+
* @param conversationId - Conversation ID from Typesense
|
|
1305
|
+
* @param userMessage - User's message
|
|
1306
|
+
* @param assistantMessage - Assistant's response
|
|
1307
|
+
* @param sources - Source chunks used for the response
|
|
1308
|
+
* @param spending - Token spending entries
|
|
1309
|
+
* @param collectionName - Collection name for sessions (default: 'chat-sessions')
|
|
1310
|
+
*/
|
|
1311
|
+
async function saveChatSession(payload, userId, conversationId, userMessage, assistantMessage, sources, spending, collectionName = "chat-sessions") {
|
|
1312
|
+
try {
|
|
1313
|
+
const existing = await payload.find({
|
|
1314
|
+
collection: collectionName,
|
|
1315
|
+
where: { conversation_id: { equals: conversationId } },
|
|
1316
|
+
limit: 1
|
|
1317
|
+
});
|
|
1318
|
+
const newUserMessage = {
|
|
1319
|
+
role: "user",
|
|
1320
|
+
content: userMessage,
|
|
1321
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
1322
|
+
};
|
|
1323
|
+
const newAssistantMessage = {
|
|
1324
|
+
role: "assistant",
|
|
1325
|
+
content: assistantMessage,
|
|
1326
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1327
|
+
sources: sources.map((s) => ({
|
|
1328
|
+
id: s.id,
|
|
1329
|
+
title: s.title,
|
|
1330
|
+
type: s.type,
|
|
1331
|
+
chunk_index: s.chunkIndex,
|
|
1332
|
+
slug: s.slug
|
|
1333
|
+
}))
|
|
1334
|
+
};
|
|
1335
|
+
if (existing.docs.length > 0 && existing.docs[0]) await updateExistingSession(payload, existing.docs[0], newUserMessage, newAssistantMessage, spending, collectionName);
|
|
1336
|
+
else await createNewSession(payload, userId, conversationId, newUserMessage, newAssistantMessage, spending, collectionName);
|
|
1337
|
+
} catch (error) {
|
|
1338
|
+
logger.error("Error saving chat session", error, {
|
|
1339
|
+
conversationId,
|
|
1340
|
+
userId
|
|
1341
|
+
});
|
|
1342
|
+
}
|
|
1343
|
+
}
|
|
1344
|
+
/**
|
|
1345
|
+
* Update an existing chat session
|
|
1346
|
+
*/
|
|
1347
|
+
async function updateExistingSession(payload, session, newUserMessage, newAssistantMessage, spending, collectionName) {
|
|
1348
|
+
const existingMessages = session.messages || [];
|
|
1349
|
+
const existingSpending = session.spending || [];
|
|
1350
|
+
const messages = [
|
|
1351
|
+
...existingMessages,
|
|
1352
|
+
newUserMessage,
|
|
1353
|
+
newAssistantMessage
|
|
1354
|
+
];
|
|
1355
|
+
const allSpending = [...existingSpending, ...spending];
|
|
1356
|
+
const totalTokens = (session.total_tokens || 0) + spending.reduce((sum, e) => sum + e.tokens.total, 0);
|
|
1357
|
+
const totalCost = (session.total_cost || 0) + spending.reduce((sum, e) => sum + (e.cost_usd || 0), 0);
|
|
1358
|
+
await payload.update({
|
|
1359
|
+
collection: collectionName,
|
|
1360
|
+
id: session.id,
|
|
1361
|
+
data: {
|
|
1362
|
+
messages,
|
|
1363
|
+
spending: allSpending,
|
|
1364
|
+
total_tokens: totalTokens,
|
|
1365
|
+
total_cost: totalCost,
|
|
1366
|
+
last_activity: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1367
|
+
status: "active"
|
|
1368
|
+
}
|
|
1369
|
+
});
|
|
1370
|
+
logger.info("Chat session updated successfully", {
|
|
1371
|
+
sessionId: session.id,
|
|
1372
|
+
conversationId: session.conversation_id,
|
|
1373
|
+
totalTokens,
|
|
1374
|
+
totalCost
|
|
1375
|
+
});
|
|
1376
|
+
}
|
|
1377
|
+
/**
|
|
1378
|
+
* Create a new chat session
|
|
1379
|
+
*/
|
|
1380
|
+
async function createNewSession(payload, userId, conversationId, newUserMessage, newAssistantMessage, spending, collectionName) {
|
|
1381
|
+
const totalTokens = spending.reduce((sum, e) => sum + e.tokens.total, 0);
|
|
1382
|
+
const totalCost = spending.reduce((sum, e) => sum + (e.cost_usd || 0), 0);
|
|
1383
|
+
await payload.create({
|
|
1384
|
+
collection: collectionName,
|
|
1385
|
+
data: {
|
|
1386
|
+
user: userId,
|
|
1387
|
+
conversation_id: conversationId,
|
|
1388
|
+
status: "active",
|
|
1389
|
+
messages: [newUserMessage, newAssistantMessage],
|
|
1390
|
+
spending,
|
|
1391
|
+
total_tokens: totalTokens,
|
|
1392
|
+
total_cost: totalCost,
|
|
1393
|
+
last_activity: (/* @__PURE__ */ new Date()).toISOString()
|
|
1394
|
+
}
|
|
1395
|
+
});
|
|
1396
|
+
logger.info("New chat session created successfully", {
|
|
1397
|
+
conversationId,
|
|
1398
|
+
userId,
|
|
1399
|
+
totalTokens,
|
|
1400
|
+
totalCost
|
|
1401
|
+
});
|
|
1402
|
+
}
|
|
1403
|
+
|
|
1404
|
+
//#endregion
|
|
1405
|
+
//#region src/features/rag/api/types.ts
|
|
1406
|
+
/**
|
|
1407
|
+
* Helper to create a JSON response
|
|
1408
|
+
*/
|
|
1409
|
+
function jsonResponse(data, init) {
|
|
1410
|
+
return new Response(JSON.stringify(data), {
|
|
1411
|
+
...init,
|
|
1412
|
+
headers: {
|
|
1413
|
+
"Content-Type": "application/json",
|
|
1414
|
+
...init?.headers
|
|
1415
|
+
}
|
|
1416
|
+
});
|
|
1417
|
+
}
|
|
1418
|
+
|
|
1419
|
+
//#endregion
|
|
1420
|
+
//#region src/features/rag/api/chat/handlers/embedding-handler.ts
|
|
1421
|
+
/**
|
|
1422
|
+
* Generates embedding and tracks usage
|
|
1423
|
+
*/
|
|
1424
|
+
async function generateEmbeddingWithTracking(userMessage, config, spendingEntries) {
|
|
1425
|
+
logger.debug("Generating embeddings for semantic search");
|
|
1426
|
+
const embeddingConfig = config.embeddingConfig;
|
|
1427
|
+
if (!embeddingConfig) throw new Error("Embedding configuration missing");
|
|
1428
|
+
let provider;
|
|
1429
|
+
const providerType = embeddingConfig.type;
|
|
1430
|
+
const apiKey = embeddingConfig.apiKey;
|
|
1431
|
+
const model = embeddingConfig.model;
|
|
1432
|
+
const dimensions = embeddingConfig.dimensions;
|
|
1433
|
+
const serviceLogger = new Logger({
|
|
1434
|
+
enabled: true,
|
|
1435
|
+
prefix: "[rag-embedding]"
|
|
1436
|
+
});
|
|
1437
|
+
if (providerType === "gemini") provider = new GeminiEmbeddingProvider({
|
|
1438
|
+
type: "gemini",
|
|
1439
|
+
apiKey,
|
|
1440
|
+
model,
|
|
1441
|
+
dimensions
|
|
1442
|
+
}, serviceLogger);
|
|
1443
|
+
else provider = new OpenAIEmbeddingProvider({
|
|
1444
|
+
type: "openai",
|
|
1445
|
+
apiKey,
|
|
1446
|
+
model,
|
|
1447
|
+
dimensions
|
|
1448
|
+
}, serviceLogger);
|
|
1449
|
+
new EmbeddingServiceImpl(provider, serviceLogger, embeddingConfig);
|
|
1450
|
+
const resultWithUsage = await provider.generateEmbedding(userMessage);
|
|
1451
|
+
if (!resultWithUsage) throw new Error("Failed to generate embedding");
|
|
1452
|
+
const modelUsed = model || DEFAULT_EMBEDDING_MODEL;
|
|
1453
|
+
if (config.createEmbeddingSpending) {
|
|
1454
|
+
const embeddingSpending = config.createEmbeddingSpending(modelUsed, resultWithUsage.usage.totalTokens);
|
|
1455
|
+
spendingEntries.push(embeddingSpending);
|
|
1456
|
+
logger.info("Embedding generated successfully", {
|
|
1457
|
+
model: modelUsed,
|
|
1458
|
+
totalTokens: resultWithUsage.usage.totalTokens,
|
|
1459
|
+
costUsd: embeddingSpending.cost_usd
|
|
1460
|
+
});
|
|
1461
|
+
}
|
|
1462
|
+
return resultWithUsage.embedding;
|
|
1463
|
+
}
|
|
1464
|
+
|
|
1465
|
+
//#endregion
|
|
1466
|
+
//#region src/features/rag/api/chat/handlers/session-handler.ts
|
|
1467
|
+
/**
|
|
1468
|
+
* Saves chat session if function is provided
|
|
1469
|
+
*/
|
|
1470
|
+
async function saveChatSessionIfNeeded(config, payload, userId, conversationId, userMessage, assistantMessage, sources, spendingEntries) {
|
|
1471
|
+
if (!conversationId || !config.saveChatSession) return;
|
|
1472
|
+
await config.saveChatSession(payload, userId, conversationId, userMessage, assistantMessage, sources, spendingEntries, config.collectionName);
|
|
1473
|
+
logger.info("Chat session saved to PayloadCMS", { conversationId });
|
|
1474
|
+
}
|
|
1475
|
+
|
|
1476
|
+
//#endregion
|
|
1477
|
+
//#region src/features/rag/api/chat/handlers/token-limit-handler.ts
|
|
1478
|
+
/**
|
|
1479
|
+
* Checks token limits before processing request
|
|
1480
|
+
*/
|
|
1481
|
+
async function checkTokenLimitsIfNeeded(config, payload, userId, userEmail, userMessage) {
|
|
1482
|
+
if (!config.estimateTokensFromText || !config.checkTokenLimit) return null;
|
|
1483
|
+
const estimatedTotalTokens = config.estimateTokensFromText(userMessage) + config.estimateTokensFromText(userMessage) * 10;
|
|
1484
|
+
const limitCheck = await config.checkTokenLimit(payload, userId, estimatedTotalTokens);
|
|
1485
|
+
if (!limitCheck.allowed) {
|
|
1486
|
+
logger.warn("Token limit exceeded for user", {
|
|
1487
|
+
userId,
|
|
1488
|
+
limit: limitCheck.limit,
|
|
1489
|
+
used: limitCheck.used,
|
|
1490
|
+
remaining: limitCheck.remaining
|
|
1491
|
+
});
|
|
1492
|
+
return jsonResponse({
|
|
1493
|
+
error: "Has alcanzado tu límite diario de tokens.",
|
|
1494
|
+
limit_info: {
|
|
1495
|
+
limit: limitCheck.limit,
|
|
1496
|
+
used: limitCheck.used,
|
|
1497
|
+
remaining: limitCheck.remaining,
|
|
1498
|
+
reset_at: limitCheck.reset_at
|
|
1499
|
+
}
|
|
1500
|
+
}, { status: 429 });
|
|
1501
|
+
}
|
|
1502
|
+
logger.info("Chat request started with token limit check passed", {
|
|
1503
|
+
userId,
|
|
1504
|
+
userEmail,
|
|
1505
|
+
limit: limitCheck.limit,
|
|
1506
|
+
used: limitCheck.used,
|
|
1507
|
+
remaining: limitCheck.remaining
|
|
1508
|
+
});
|
|
1509
|
+
return null;
|
|
1510
|
+
}
|
|
1511
|
+
|
|
1512
|
+
//#endregion
|
|
1513
|
+
//#region src/features/rag/api/chat/handlers/usage-stats-handler.ts
|
|
1514
|
+
/**
|
|
1515
|
+
* Calculates total usage from spending entries
|
|
1516
|
+
*/
|
|
1517
|
+
function calculateTotalUsage(spendingEntries) {
|
|
1518
|
+
const totalTokensUsed = spendingEntries.reduce((sum, entry) => sum + entry.tokens.total, 0);
|
|
1519
|
+
const totalCostUSD = spendingEntries.reduce((sum, entry) => sum + (entry.cost_usd || 0), 0);
|
|
1520
|
+
logger.info("Total token usage calculated", {
|
|
1521
|
+
totalTokens: totalTokensUsed,
|
|
1522
|
+
totalCostUsd: totalCostUSD
|
|
1523
|
+
});
|
|
1524
|
+
return {
|
|
1525
|
+
totalTokens: totalTokensUsed,
|
|
1526
|
+
totalCostUSD
|
|
1527
|
+
};
|
|
1528
|
+
}
|
|
1529
|
+
/**
|
|
1530
|
+
* Sends usage statistics event to client
|
|
1531
|
+
*/
|
|
1532
|
+
async function sendUsageStatsIfNeeded(config, payload, userId, totalTokens, totalCostUSD, sendEvent) {
|
|
1533
|
+
if (!config.getUserUsageStats) return;
|
|
1534
|
+
const usageStats = await config.getUserUsageStats(payload, userId);
|
|
1535
|
+
sendEvent({
|
|
1536
|
+
type: "usage",
|
|
1537
|
+
data: {
|
|
1538
|
+
tokens_used: totalTokens,
|
|
1539
|
+
cost_usd: totalCostUSD,
|
|
1540
|
+
daily_limit: usageStats.limit,
|
|
1541
|
+
daily_used: usageStats.used,
|
|
1542
|
+
daily_remaining: usageStats.remaining,
|
|
1543
|
+
reset_at: usageStats.reset_at
|
|
1544
|
+
}
|
|
1545
|
+
});
|
|
1546
|
+
}
|
|
1547
|
+
|
|
1548
|
+
//#endregion
|
|
1549
|
+
//#region src/features/rag/api/chat/validators/request-validator.ts
|
|
1550
|
+
/**
|
|
1551
|
+
* Validates chat request and extracts required data
|
|
1552
|
+
*/
|
|
1553
|
+
async function validateChatRequest(request, config) {
|
|
1554
|
+
if (!await config.checkPermissions(request)) return {
|
|
1555
|
+
success: false,
|
|
1556
|
+
error: jsonResponse({ error: "No tienes permisos para acceder a esta sesión." }, { status: 403 })
|
|
1557
|
+
};
|
|
1558
|
+
if (!request.url || !request.user) return {
|
|
1559
|
+
success: false,
|
|
1560
|
+
error: jsonResponse({ error: "URL not found" }, { status: 400 })
|
|
1561
|
+
};
|
|
1562
|
+
const { id: userId, email } = request.user;
|
|
1563
|
+
const userEmail = email || "";
|
|
1564
|
+
const payload = await config.getPayload();
|
|
1565
|
+
const body = await request.json?.();
|
|
1566
|
+
if (!body) return {
|
|
1567
|
+
success: false,
|
|
1568
|
+
error: jsonResponse({ error: "Body not found" }, { status: 400 })
|
|
1569
|
+
};
|
|
1570
|
+
if (!body.message || typeof body.message !== "string" || body.message.trim() === "") return {
|
|
1571
|
+
success: false,
|
|
1572
|
+
error: jsonResponse({ error: "Se requiere un mensaje." }, { status: 400 })
|
|
1573
|
+
};
|
|
1574
|
+
return {
|
|
1575
|
+
success: true,
|
|
1576
|
+
userId,
|
|
1577
|
+
userEmail,
|
|
1578
|
+
payload,
|
|
1579
|
+
userMessage: body.message.trim(),
|
|
1580
|
+
body
|
|
1581
|
+
};
|
|
1582
|
+
}
|
|
1583
|
+
|
|
1584
|
+
//#endregion
|
|
1585
|
+
//#region src/features/rag/api/chat/route.ts
|
|
1586
|
+
/**
|
|
1587
|
+
* Create a parameterizable POST handler for chat endpoint
|
|
1588
|
+
*/
|
|
1589
|
+
function createChatPOSTHandler(config) {
|
|
1590
|
+
return async function POST(request) {
|
|
1591
|
+
try {
|
|
1592
|
+
const validated = await validateChatRequest(request, config);
|
|
1593
|
+
if (!validated.success) return validated.error;
|
|
1594
|
+
const { userId, userEmail, payload, userMessage, body } = validated;
|
|
1595
|
+
let searchConfig;
|
|
1596
|
+
const agentSlug = body.agentSlug;
|
|
1597
|
+
if (agentSlug && config.rag?.agents) {
|
|
1598
|
+
const agent = config.rag.agents.find((a) => a.slug === agentSlug);
|
|
1599
|
+
if (!agent) return new Response(JSON.stringify({ error: `Agent not found: ${agentSlug}` }), { status: 404 });
|
|
1600
|
+
searchConfig = {
|
|
1601
|
+
modelId: agent.slug,
|
|
1602
|
+
searchCollections: agent.searchCollections,
|
|
1603
|
+
kResults: agent.kResults,
|
|
1604
|
+
advancedConfig: config.rag.advanced
|
|
1605
|
+
};
|
|
1606
|
+
} else if (config.rag?.agents && config.rag.agents.length > 0) {
|
|
1607
|
+
const agent = config.rag.agents[0];
|
|
1608
|
+
if (!agent) throw new Error("Default agent not found");
|
|
1609
|
+
searchConfig = {
|
|
1610
|
+
modelId: agent.slug,
|
|
1611
|
+
searchCollections: agent.searchCollections,
|
|
1612
|
+
kResults: agent.kResults,
|
|
1613
|
+
advancedConfig: config.rag.advanced
|
|
1614
|
+
};
|
|
1615
|
+
} else return new Response(JSON.stringify({ error: "No RAG configuration available" }), { status: 500 });
|
|
1616
|
+
const tokenLimitError = await checkTokenLimitsIfNeeded(config, payload, userId, userEmail, userMessage);
|
|
1617
|
+
if (tokenLimitError) return tokenLimitError;
|
|
1618
|
+
logger.info("Processing chat message", {
|
|
1619
|
+
userId,
|
|
1620
|
+
chatId: body.chatId || "new",
|
|
1621
|
+
agentSlug: agentSlug || "default",
|
|
1622
|
+
modelId: searchConfig.modelId,
|
|
1623
|
+
isFollowUp: !!body.chatId,
|
|
1624
|
+
hasSelectedDocuments: !!body.selectedDocuments,
|
|
1625
|
+
messageLength: userMessage.length
|
|
1626
|
+
});
|
|
1627
|
+
const encoder = new TextEncoder();
|
|
1628
|
+
const stream = new ReadableStream({ async start(controller) {
|
|
1629
|
+
const spendingEntries = [];
|
|
1630
|
+
let fullAssistantMessage = "";
|
|
1631
|
+
let conversationIdCapture = null;
|
|
1632
|
+
let sourcesCapture = [];
|
|
1633
|
+
try {
|
|
1634
|
+
const sendEvent = (event) => sendSSEEvent(controller, encoder, event);
|
|
1635
|
+
const queryEmbedding = await generateEmbeddingWithTracking(userMessage, config, spendingEntries);
|
|
1636
|
+
const searchResult = await executeRAGSearch(config.typesense, searchConfig, {
|
|
1637
|
+
userMessage,
|
|
1638
|
+
queryEmbedding,
|
|
1639
|
+
chatId: body.chatId,
|
|
1640
|
+
selectedDocuments: body.selectedDocuments
|
|
1641
|
+
});
|
|
1642
|
+
const streamResult = searchResult.isStreaming && searchResult.response.body ? await config.handleStreamingResponse(searchResult.response, controller, encoder) : await config.handleNonStreamingResponse(await searchResult.response.json(), controller, encoder);
|
|
1643
|
+
fullAssistantMessage = streamResult.fullAssistantMessage;
|
|
1644
|
+
conversationIdCapture = streamResult.conversationId;
|
|
1645
|
+
sourcesCapture = streamResult.sources;
|
|
1646
|
+
spendingEntries.push(streamResult.llmSpending);
|
|
1647
|
+
const { totalTokens: totalTokensUsed, totalCostUSD } = calculateTotalUsage(spendingEntries);
|
|
1648
|
+
await sendUsageStatsIfNeeded(config, payload, userId, totalTokensUsed, totalCostUSD, sendEvent);
|
|
1649
|
+
await saveChatSessionIfNeeded(config, payload, userId, conversationIdCapture, userMessage, fullAssistantMessage, sourcesCapture, spendingEntries);
|
|
1650
|
+
logger.info("Chat request completed successfully", {
|
|
1651
|
+
userId,
|
|
1652
|
+
conversationId: conversationIdCapture,
|
|
1653
|
+
totalTokens: totalTokensUsed
|
|
1654
|
+
});
|
|
1655
|
+
controller.close();
|
|
1656
|
+
} catch (error) {
|
|
1657
|
+
logger.error("Fatal error in chat stream", error, {
|
|
1658
|
+
userId,
|
|
1659
|
+
chatId: body.chatId
|
|
1660
|
+
});
|
|
1661
|
+
sendSSEEvent(controller, encoder, {
|
|
1662
|
+
type: "error",
|
|
1663
|
+
data: { error: error instanceof Error ? error.message : "Error desconocido" }
|
|
1664
|
+
});
|
|
1665
|
+
controller.close();
|
|
1666
|
+
}
|
|
1667
|
+
} });
|
|
1668
|
+
return new Response(stream, { headers: {
|
|
1669
|
+
"Content-Type": "text/event-stream",
|
|
1670
|
+
"Cache-Control": "no-cache",
|
|
1671
|
+
Connection: "keep-alive"
|
|
1672
|
+
} });
|
|
1673
|
+
} catch (error) {
|
|
1674
|
+
logger.error("Error in chat API endpoint", error, { userId: request.user?.id });
|
|
1675
|
+
return new Response(JSON.stringify({
|
|
1676
|
+
error: "Error al procesar tu mensaje. Por favor, inténtalo de nuevo.",
|
|
1677
|
+
details: error instanceof Error ? error.message : "Error desconocido"
|
|
1678
|
+
}), {
|
|
1679
|
+
status: 500,
|
|
1680
|
+
headers: { "Content-Type": "application/json" }
|
|
1681
|
+
});
|
|
1682
|
+
}
|
|
1683
|
+
};
|
|
1684
|
+
}
|
|
1685
|
+
|
|
1686
|
+
//#endregion
|
|
1687
|
+
//#region src/features/rag/stream-handlers/utils.ts
|
|
1688
|
+
/**
|
|
1689
|
+
* Stream handler utilities
|
|
1690
|
+
*
|
|
1691
|
+
* Shared utility functions for stream handlers
|
|
1692
|
+
*/
|
|
1693
|
+
/**
|
|
1694
|
+
* Resolve document type from collection name
|
|
1695
|
+
*/
|
|
1696
|
+
function resolveDocumentType$1(collectionName) {
|
|
1697
|
+
if (collectionName === "article_web_chunk") return "article";
|
|
1698
|
+
if (collectionName === "book_chunk") return "book";
|
|
1699
|
+
return "document";
|
|
1700
|
+
}
|
|
1701
|
+
/**
|
|
1702
|
+
* Estimate tokens from text (simple word-based estimation)
|
|
1703
|
+
* More accurate implementations can be provided via callbacks
|
|
1704
|
+
*/
|
|
1705
|
+
function estimateTokensFromText(text) {
|
|
1706
|
+
const words = text.trim().split(/\s+/).length;
|
|
1707
|
+
return Math.ceil(words * 1.3);
|
|
1708
|
+
}
|
|
1709
|
+
|
|
1710
|
+
//#endregion
|
|
1711
|
+
//#region src/features/rag/stream-handlers/streaming-handler.ts
|
|
1712
|
+
/**
|
|
1713
|
+
* Streaming response handler
|
|
1714
|
+
*
|
|
1715
|
+
* Handles streaming responses from Typesense conversational search
|
|
1716
|
+
*/
|
|
1717
|
+
/**
|
|
1718
|
+
* Default implementation for handling streaming responses
|
|
1719
|
+
*/
|
|
1720
|
+
async function defaultHandleStreamingResponse(response, controller, encoder) {
|
|
1721
|
+
logger.debug("Starting streaming response handling");
|
|
1722
|
+
if (!response.body) throw new Error("Response body is null");
|
|
1723
|
+
const reader = response.body.getReader();
|
|
1724
|
+
const decoder = new TextDecoder();
|
|
1725
|
+
let buffer = "";
|
|
1726
|
+
let sources = [];
|
|
1727
|
+
let hasCollectedSources = false;
|
|
1728
|
+
let conversationId = null;
|
|
1729
|
+
let contextText = "";
|
|
1730
|
+
let fullAssistantMessage = "";
|
|
1731
|
+
try {
|
|
1732
|
+
while (true) {
|
|
1733
|
+
const { done, value } = await reader.read();
|
|
1734
|
+
if (done) {
|
|
1735
|
+
logger.debug("Streaming response completed");
|
|
1736
|
+
break;
|
|
1737
|
+
}
|
|
1738
|
+
buffer += decoder.decode(value, { stream: true });
|
|
1739
|
+
const lines = buffer.split("\n");
|
|
1740
|
+
buffer = lines.pop() || "";
|
|
1741
|
+
for (const line of lines) {
|
|
1742
|
+
const event = parseConversationEvent(line);
|
|
1743
|
+
if (!event) continue;
|
|
1744
|
+
if (event.raw === "[DONE]") {
|
|
1745
|
+
sendSSEEvent(controller, encoder, {
|
|
1746
|
+
type: "done",
|
|
1747
|
+
data: ""
|
|
1748
|
+
});
|
|
1749
|
+
continue;
|
|
1750
|
+
}
|
|
1751
|
+
if (!conversationId && event.conversationId) {
|
|
1752
|
+
conversationId = event.conversationId;
|
|
1753
|
+
logger.debug("Conversation ID captured", { conversationId });
|
|
1754
|
+
sendSSEEvent(controller, encoder, {
|
|
1755
|
+
type: "conversation_id",
|
|
1756
|
+
data: conversationId
|
|
1757
|
+
});
|
|
1758
|
+
}
|
|
1759
|
+
if (!hasCollectedSources && event.results) {
|
|
1760
|
+
sources = extractSourcesFromResults(event.results, resolveDocumentType$1);
|
|
1761
|
+
contextText = buildContextText(event.results);
|
|
1762
|
+
if (sources.length > 0) sendSSEEvent(controller, encoder, {
|
|
1763
|
+
type: "sources",
|
|
1764
|
+
data: sources
|
|
1765
|
+
});
|
|
1766
|
+
hasCollectedSources = true;
|
|
1767
|
+
}
|
|
1768
|
+
if (event.message) {
|
|
1769
|
+
fullAssistantMessage += event.message;
|
|
1770
|
+
sendSSEEvent(controller, encoder, {
|
|
1771
|
+
type: "token",
|
|
1772
|
+
data: event.message
|
|
1773
|
+
});
|
|
1774
|
+
}
|
|
1775
|
+
}
|
|
1776
|
+
}
|
|
1777
|
+
} finally {
|
|
1778
|
+
reader.releaseLock();
|
|
1779
|
+
}
|
|
1780
|
+
const llmInputTokens = estimateTokensFromText(contextText);
|
|
1781
|
+
const llmOutputTokens = estimateTokensFromText(fullAssistantMessage);
|
|
1782
|
+
const llmSpending = {
|
|
1783
|
+
service: "openai_llm",
|
|
1784
|
+
model: "gpt-4o-mini",
|
|
1785
|
+
tokens: {
|
|
1786
|
+
input: llmInputTokens,
|
|
1787
|
+
output: llmOutputTokens,
|
|
1788
|
+
total: llmInputTokens + llmOutputTokens
|
|
1789
|
+
},
|
|
1790
|
+
cost_usd: llmInputTokens * 15e-8 + llmOutputTokens * 6e-7,
|
|
1791
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
1792
|
+
};
|
|
1793
|
+
logger.info("LLM cost calculated", {
|
|
1794
|
+
inputTokens: llmInputTokens,
|
|
1795
|
+
outputTokens: llmOutputTokens,
|
|
1796
|
+
totalTokens: llmSpending.tokens.total,
|
|
1797
|
+
costUsd: llmSpending.cost_usd
|
|
1798
|
+
});
|
|
1799
|
+
return {
|
|
1800
|
+
fullAssistantMessage,
|
|
1801
|
+
conversationId,
|
|
1802
|
+
sources,
|
|
1803
|
+
llmSpending
|
|
1804
|
+
};
|
|
1805
|
+
}
|
|
1806
|
+
|
|
1807
|
+
//#endregion
|
|
1808
|
+
//#region src/features/rag/stream-handlers/non-streaming-handler.ts
|
|
1809
|
+
/**
|
|
1810
|
+
* Non-streaming response handler
|
|
1811
|
+
*
|
|
1812
|
+
* Handles non-streaming (regular JSON) responses from Typesense conversational search
|
|
1813
|
+
*/
|
|
1814
|
+
/**
|
|
1815
|
+
* Default implementation for handling non-streaming responses
|
|
1816
|
+
*/
|
|
1817
|
+
async function defaultHandleNonStreamingResponse(data, controller, encoder) {
|
|
1818
|
+
logger.debug("Using non-streaming fallback for response handling");
|
|
1819
|
+
const typedData = data;
|
|
1820
|
+
let conversationId = null;
|
|
1821
|
+
if (typedData.conversation?.conversation_id) conversationId = typedData.conversation.conversation_id;
|
|
1822
|
+
else if (typedData.conversation_id) conversationId = typedData.conversation_id;
|
|
1823
|
+
let fullAnswer = "";
|
|
1824
|
+
if (typedData.conversation?.answer) fullAnswer = typedData.conversation.answer;
|
|
1825
|
+
else if (typedData.response || typedData.message) fullAnswer = typedData.response || typedData.message || "";
|
|
1826
|
+
const sources = extractSourcesFromResults(typedData.results || [], resolveDocumentType$1);
|
|
1827
|
+
const contextText = buildContextText(typedData.results || []);
|
|
1828
|
+
if (fullAnswer) {
|
|
1829
|
+
const words = fullAnswer.split(" ");
|
|
1830
|
+
for (let i = 0; i < words.length; i++) {
|
|
1831
|
+
const token = i === 0 ? words[i] : " " + words[i];
|
|
1832
|
+
if (token) sendSSEEvent(controller, encoder, {
|
|
1833
|
+
type: "token",
|
|
1834
|
+
data: token
|
|
1835
|
+
});
|
|
1836
|
+
}
|
|
1837
|
+
}
|
|
1838
|
+
if (conversationId) sendSSEEvent(controller, encoder, {
|
|
1839
|
+
type: "conversation_id",
|
|
1840
|
+
data: conversationId
|
|
1841
|
+
});
|
|
1842
|
+
if (sources.length > 0) sendSSEEvent(controller, encoder, {
|
|
1843
|
+
type: "sources",
|
|
1844
|
+
data: sources
|
|
1845
|
+
});
|
|
1846
|
+
sendSSEEvent(controller, encoder, {
|
|
1847
|
+
type: "done",
|
|
1848
|
+
data: ""
|
|
1849
|
+
});
|
|
1850
|
+
const llmInputTokens = estimateTokensFromText(contextText);
|
|
1851
|
+
const llmOutputTokens = estimateTokensFromText(fullAnswer);
|
|
1852
|
+
const llmSpending = {
|
|
1853
|
+
service: "openai_llm",
|
|
1854
|
+
model: "gpt-4o-mini",
|
|
1855
|
+
tokens: {
|
|
1856
|
+
input: llmInputTokens,
|
|
1857
|
+
output: llmOutputTokens,
|
|
1858
|
+
total: llmInputTokens + llmOutputTokens
|
|
1859
|
+
},
|
|
1860
|
+
cost_usd: llmInputTokens * 15e-8 + llmOutputTokens * 6e-7,
|
|
1861
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
1862
|
+
};
|
|
1863
|
+
return {
|
|
1864
|
+
fullAssistantMessage: fullAnswer,
|
|
1865
|
+
conversationId,
|
|
1866
|
+
sources,
|
|
1867
|
+
llmSpending
|
|
1868
|
+
};
|
|
1869
|
+
}
|
|
1870
|
+
|
|
1871
|
+
//#endregion
|
|
1872
|
+
//#region src/features/rag/api/chat/session/route.ts
|
|
1873
|
+
/**
|
|
1874
|
+
* Create a parameterizable GET handler for session endpoint
|
|
1875
|
+
*
|
|
1876
|
+
* Query params:
|
|
1877
|
+
* - ?active=true → Get the most recent active session
|
|
1878
|
+
* - ?conversationId=xxx → Get a specific session by conversation ID
|
|
1879
|
+
*/
|
|
1880
|
+
function createSessionGETHandler(config) {
|
|
1881
|
+
return async function GET(request) {
|
|
1882
|
+
try {
|
|
1883
|
+
if (!await config.checkPermissions(request)) return jsonResponse({ error: "No tienes permisos para acceder a esta sesión." }, { status: 403 });
|
|
1884
|
+
const userId = request.user?.id;
|
|
1885
|
+
if (!request.url || !userId) return jsonResponse({ error: "URL not found" }, { status: 400 });
|
|
1886
|
+
const { searchParams } = new URL(request.url);
|
|
1887
|
+
const isActive = searchParams.get("active") === "true";
|
|
1888
|
+
const conversationId = searchParams.get("conversationId");
|
|
1889
|
+
const payload = await config.getPayload();
|
|
1890
|
+
if (isActive) {
|
|
1891
|
+
const session$1 = await getActiveSession(payload, userId, config.sessionConfig);
|
|
1892
|
+
if (!session$1) return jsonResponse({ error: "No hay sesión activa." }, { status: 404 });
|
|
1893
|
+
return jsonResponse(session$1);
|
|
1894
|
+
}
|
|
1895
|
+
if (!conversationId) return jsonResponse({ error: "Se requiere conversationId o active=true." }, { status: 400 });
|
|
1896
|
+
const session = await getSessionByConversationId(payload, userId, conversationId, config.sessionConfig);
|
|
1897
|
+
if (!session) return jsonResponse({ error: "Sesión de chat no encontrada." }, { status: 404 });
|
|
1898
|
+
return jsonResponse(session);
|
|
1899
|
+
} catch (error) {
|
|
1900
|
+
logger.error("Error retrieving chat session", error, { userId: request.user?.id });
|
|
1901
|
+
return jsonResponse({
|
|
1902
|
+
error: "Error al recuperar la sesión.",
|
|
1903
|
+
details: error instanceof Error ? error.message : "Error desconocido"
|
|
1904
|
+
}, { status: 500 });
|
|
1905
|
+
}
|
|
1906
|
+
};
|
|
1907
|
+
}
|
|
1908
|
+
/**
|
|
1909
|
+
* Create a parameterizable DELETE handler for session endpoint
|
|
1910
|
+
*
|
|
1911
|
+
* DELETE /api/chat/session?conversationId=xxx
|
|
1912
|
+
* Close a chat session
|
|
1913
|
+
*/
|
|
1914
|
+
function createSessionDELETEHandler(config) {
|
|
1915
|
+
return async function DELETE(request) {
|
|
1916
|
+
try {
|
|
1917
|
+
if (!await config.checkPermissions(request)) return jsonResponse({ error: "No tienes permisos para acceder a esta sesión." }, { status: 403 });
|
|
1918
|
+
const userId = request.user?.id;
|
|
1919
|
+
if (!request.url || !userId) return jsonResponse({ error: "URL not found" }, { status: 400 });
|
|
1920
|
+
const { searchParams } = new URL(request.url);
|
|
1921
|
+
const conversationId = searchParams.get("conversationId");
|
|
1922
|
+
if (!conversationId) return jsonResponse({ error: "Se requiere un conversationId válido." }, { status: 400 });
|
|
1923
|
+
const payload = await config.getPayload();
|
|
1924
|
+
logger.info("Closing chat session", {
|
|
1925
|
+
conversationId,
|
|
1926
|
+
userId
|
|
1927
|
+
});
|
|
1928
|
+
const session = await closeSession(payload, userId, conversationId, config.sessionConfig);
|
|
1929
|
+
if (!session) return jsonResponse({ error: "Sesión de chat no encontrada o no tienes permisos." }, { status: 404 });
|
|
1930
|
+
logger.info("Chat session closed successfully", {
|
|
1931
|
+
conversationId,
|
|
1932
|
+
totalTokens: session.total_tokens,
|
|
1933
|
+
totalCost: session.total_cost
|
|
1934
|
+
});
|
|
1935
|
+
return jsonResponse({
|
|
1936
|
+
success: true,
|
|
1937
|
+
message: "Sesión cerrada correctamente",
|
|
1938
|
+
session: {
|
|
1939
|
+
conversation_id: conversationId,
|
|
1940
|
+
status: "closed",
|
|
1941
|
+
total_tokens: session.total_tokens,
|
|
1942
|
+
total_cost: session.total_cost
|
|
1943
|
+
}
|
|
1944
|
+
});
|
|
1945
|
+
} catch (error) {
|
|
1946
|
+
logger.error("Error closing chat session", error, {
|
|
1947
|
+
conversationId: request.url ? new URL(request.url).searchParams.get("conversationId") : void 0,
|
|
1948
|
+
userId: request.user?.id
|
|
1949
|
+
});
|
|
1950
|
+
return jsonResponse({
|
|
1951
|
+
error: "Error al cerrar la sesión. Por favor, inténtalo de nuevo.",
|
|
1952
|
+
details: error instanceof Error ? error.message : "Error desconocido"
|
|
1953
|
+
}, { status: 500 });
|
|
1954
|
+
}
|
|
1955
|
+
};
|
|
1956
|
+
}
|
|
1957
|
+
|
|
1958
|
+
//#endregion
|
|
1959
|
+
//#region src/features/rag/api/chunks/[id]/route.ts
|
|
1960
|
+
/**
|
|
1961
|
+
* Create a parameterizable GET handler for chunks endpoint
|
|
1962
|
+
*
|
|
1963
|
+
* GET /api/chat/chunks/[id]?collection=article_web_chunk
|
|
1964
|
+
* Fetch the full chunk text from Typesense by document ID
|
|
1965
|
+
*/
|
|
1966
|
+
function createChunksGETHandler(config) {
|
|
1967
|
+
return async function GET(request) {
|
|
1968
|
+
try {
|
|
1969
|
+
if (!await config.checkPermissions(request)) return jsonResponse({ error: "No tienes permisos para acceder a este chunk." }, { status: 403 });
|
|
1970
|
+
if (!request.url || !request.user) return jsonResponse({ error: "URL not found" }, { status: 400 });
|
|
1971
|
+
const id = request.routeParams?.id;
|
|
1972
|
+
const collectionName = new URL(request.url).searchParams.get("collection");
|
|
1973
|
+
if (!id) return jsonResponse({ error: "Se requiere el ID del chunk" }, { status: 400 });
|
|
1974
|
+
if (!collectionName) return jsonResponse({
|
|
1975
|
+
error: "Se requiere el parámetro collection",
|
|
1976
|
+
collections: config.validCollections
|
|
1977
|
+
}, { status: 400 });
|
|
1978
|
+
return jsonResponse(await fetchChunkById(createTypesenseClient(config.typesense), {
|
|
1979
|
+
chunkId: id,
|
|
1980
|
+
collectionName,
|
|
1981
|
+
validCollections: config.validCollections
|
|
1982
|
+
}));
|
|
1983
|
+
} catch (error) {
|
|
1984
|
+
logger.error("Error fetching chunk", error, {
|
|
1985
|
+
chunkId: request.routeParams?.id,
|
|
1986
|
+
collection: request.url ? new URL(request.url).searchParams.get("collection") : void 0
|
|
1987
|
+
});
|
|
1988
|
+
if (error instanceof Error) {
|
|
1989
|
+
if (error.message.includes("Invalid collection")) return jsonResponse({
|
|
1990
|
+
error: error.message,
|
|
1991
|
+
collections: config.validCollections
|
|
1992
|
+
}, { status: 400 });
|
|
1993
|
+
if (error.message.includes("not found")) return jsonResponse({ error: "Chunk no encontrado" }, { status: 404 });
|
|
1994
|
+
}
|
|
1995
|
+
return jsonResponse({
|
|
1996
|
+
error: "Error al obtener el chunk",
|
|
1997
|
+
details: error instanceof Error ? error.message : "Error desconocido"
|
|
1998
|
+
}, { status: 500 });
|
|
1999
|
+
}
|
|
2000
|
+
};
|
|
2001
|
+
}
|
|
2002
|
+
|
|
2003
|
+
//#endregion
|
|
2004
|
+
//#region src/features/rag/api/chat/agents/route.ts
|
|
2005
|
+
function createAgentsGETHandler(config) {
|
|
2006
|
+
return async function GET() {
|
|
2007
|
+
try {
|
|
2008
|
+
return jsonResponse({ agents: (config.ragConfig?.agents || []).map((agent) => ({
|
|
2009
|
+
slug: agent.slug,
|
|
2010
|
+
name: agent.name || agent.slug
|
|
2011
|
+
})) }, { status: 200 });
|
|
2012
|
+
} catch (error) {
|
|
2013
|
+
return jsonResponse({ error: "Internal Server Error" }, { status: 500 });
|
|
2014
|
+
}
|
|
2015
|
+
};
|
|
2016
|
+
}
|
|
2017
|
+
|
|
2018
|
+
//#endregion
|
|
2019
|
+
//#region src/features/rag/create-rag-payload-handlers.ts
|
|
2020
|
+
/**
|
|
2021
|
+
* Creates Payload handlers for RAG endpoints
|
|
2022
|
+
*/
|
|
2023
|
+
function createRAGPayloadHandlers(pluginOptions) {
|
|
2024
|
+
const endpoints = [];
|
|
2025
|
+
if (!pluginOptions.features.rag?.enabled || !pluginOptions.features.rag.callbacks) return endpoints;
|
|
2026
|
+
const ragConfig = pluginOptions.features.rag;
|
|
2027
|
+
const callbacksConfig = ragConfig.callbacks;
|
|
2028
|
+
const agentCollections = ragConfig.agents?.flatMap((agent) => agent.searchCollections) || [];
|
|
2029
|
+
const validCollections = Array.from(new Set(agentCollections));
|
|
2030
|
+
endpoints.push({
|
|
2031
|
+
path: "/chat",
|
|
2032
|
+
method: "post",
|
|
2033
|
+
handler: createChatPOSTHandler({
|
|
2034
|
+
collectionName: "chat-sessions",
|
|
2035
|
+
checkPermissions: callbacksConfig.checkPermissions,
|
|
2036
|
+
typesense: pluginOptions.typesense,
|
|
2037
|
+
rag: ragConfig,
|
|
2038
|
+
getPayload: callbacksConfig.getPayload,
|
|
2039
|
+
checkTokenLimit: callbacksConfig.checkTokenLimit,
|
|
2040
|
+
getUserUsageStats: callbacksConfig.getUserUsageStats,
|
|
2041
|
+
saveChatSession: callbacksConfig.saveChatSession,
|
|
2042
|
+
handleStreamingResponse: defaultHandleStreamingResponse,
|
|
2043
|
+
handleNonStreamingResponse: defaultHandleNonStreamingResponse,
|
|
2044
|
+
createEmbeddingSpending: callbacksConfig.createEmbeddingSpending,
|
|
2045
|
+
estimateTokensFromText: callbacksConfig.estimateTokensFromText,
|
|
2046
|
+
embeddingConfig: pluginOptions.features.embedding
|
|
2047
|
+
})
|
|
2048
|
+
});
|
|
2049
|
+
endpoints.push({
|
|
2050
|
+
path: "/chat/session",
|
|
2051
|
+
method: "get",
|
|
2052
|
+
handler: createSessionGETHandler({
|
|
2053
|
+
getPayload: callbacksConfig.getPayload,
|
|
2054
|
+
checkPermissions: callbacksConfig.checkPermissions
|
|
2055
|
+
})
|
|
2056
|
+
});
|
|
2057
|
+
endpoints.push({
|
|
2058
|
+
path: "/chat/session",
|
|
2059
|
+
method: "delete",
|
|
2060
|
+
handler: createSessionDELETEHandler({
|
|
2061
|
+
getPayload: callbacksConfig.getPayload,
|
|
2062
|
+
checkPermissions: callbacksConfig.checkPermissions
|
|
2063
|
+
})
|
|
2064
|
+
});
|
|
2065
|
+
endpoints.push({
|
|
2066
|
+
path: "/chat/chunks/:id",
|
|
2067
|
+
method: "get",
|
|
2068
|
+
handler: createChunksGETHandler({
|
|
2069
|
+
typesense: pluginOptions.typesense,
|
|
2070
|
+
checkPermissions: callbacksConfig.checkPermissions,
|
|
2071
|
+
validCollections
|
|
2072
|
+
})
|
|
2073
|
+
});
|
|
2074
|
+
endpoints.push({
|
|
2075
|
+
path: "/chat/agents",
|
|
2076
|
+
method: "get",
|
|
2077
|
+
handler: createAgentsGETHandler({
|
|
2078
|
+
ragConfig,
|
|
2079
|
+
checkPermissions: callbacksConfig.checkPermissions
|
|
2080
|
+
})
|
|
2081
|
+
});
|
|
2082
|
+
return endpoints;
|
|
2083
|
+
}
|
|
2084
|
+
|
|
2085
|
+
//#endregion
|
|
2086
|
+
//#region src/features/search/handlers/collections-handler.ts
|
|
2087
|
+
/**
|
|
2088
|
+
* Creates a handler for listing available search collections
|
|
2089
|
+
*/
|
|
2090
|
+
const createCollectionsHandler = (pluginOptions) => {
|
|
2091
|
+
return () => {
|
|
2092
|
+
try {
|
|
2093
|
+
const collections = [];
|
|
2094
|
+
for (const [slug, tableConfigs] of Object.entries(pluginOptions.collections || {})) if (Array.isArray(tableConfigs)) {
|
|
2095
|
+
const firstEnabledConfig = tableConfigs.find((config) => config.enabled);
|
|
2096
|
+
if (firstEnabledConfig) {
|
|
2097
|
+
let fields = [];
|
|
2098
|
+
if (firstEnabledConfig.mode === "chunked") fields = [
|
|
2099
|
+
...firstEnabledConfig.fields || [],
|
|
2100
|
+
{
|
|
2101
|
+
name: "chunk_text",
|
|
2102
|
+
index: true
|
|
2103
|
+
},
|
|
2104
|
+
{
|
|
2105
|
+
name: "headers",
|
|
2106
|
+
facet: true
|
|
2107
|
+
}
|
|
2108
|
+
];
|
|
2109
|
+
else fields = firstEnabledConfig.fields;
|
|
2110
|
+
const facetFields = fields.filter((f) => f.facet).map((f) => f.name);
|
|
2111
|
+
const searchFields = fields.filter((f) => f.index !== false).map((f) => f.name);
|
|
2112
|
+
collections.push({
|
|
2113
|
+
slug,
|
|
2114
|
+
displayName: firstEnabledConfig.displayName || slug.charAt(0).toUpperCase() + slug.slice(1),
|
|
2115
|
+
facetFields,
|
|
2116
|
+
searchFields
|
|
2117
|
+
});
|
|
2118
|
+
}
|
|
2119
|
+
}
|
|
2120
|
+
return Response.json({
|
|
2121
|
+
categorized: false,
|
|
2122
|
+
collections
|
|
2123
|
+
});
|
|
2124
|
+
} catch (_error) {
|
|
2125
|
+
return Response.json({ error: "Failed to get collections" }, { status: 500 });
|
|
2126
|
+
}
|
|
2127
|
+
};
|
|
2128
|
+
};
|
|
2129
|
+
|
|
2130
|
+
//#endregion
|
|
2131
|
+
//#region src/core/utils/naming.ts
|
|
2132
|
+
/**
|
|
2133
|
+
* Generates the Typesense collection name based on the configuration.
|
|
2134
|
+
*
|
|
2135
|
+
* Priority:
|
|
2136
|
+
* 1. Explicit `tableName` if provided.
|
|
2137
|
+
* 2. `collectionSlug` (fallback).
|
|
2138
|
+
*
|
|
2139
|
+
* @param collectionSlug The slug of the Payload collection
|
|
2140
|
+
* @param tableConfig The configuration for the specific table
|
|
2141
|
+
* @returns The generated Typesense collection name
|
|
2142
|
+
*/
|
|
2143
|
+
const getTypesenseCollectionName = (collectionSlug, tableConfig) => {
|
|
2144
|
+
return tableConfig.tableName ?? collectionSlug;
|
|
2145
|
+
};
|
|
2146
|
+
|
|
2147
|
+
//#endregion
|
|
2148
|
+
//#region src/shared/cache/cache.ts
|
|
2149
|
+
var SearchCache = class {
|
|
2150
|
+
cache = /* @__PURE__ */ new Map();
|
|
2151
|
+
defaultTTL;
|
|
2152
|
+
maxSize;
|
|
2153
|
+
constructor(options = {}) {
|
|
2154
|
+
this.defaultTTL = options.ttl || 300 * 1e3;
|
|
2155
|
+
this.maxSize = options.maxSize || 1e3;
|
|
2156
|
+
}
|
|
2157
|
+
/**
|
|
2158
|
+
* Generate cache key from search parameters
|
|
2159
|
+
*/
|
|
2160
|
+
generateKey(query, collection, params) {
|
|
2161
|
+
const baseKey = `${collection || "universal"}:${query}`;
|
|
2162
|
+
if (params) return `${baseKey}:${Object.keys(params).sort().map((key) => `${key}=${params[key]}`).join("&")}`;
|
|
2163
|
+
return baseKey;
|
|
2164
|
+
}
|
|
2165
|
+
/**
|
|
2166
|
+
* Clear expired entries
|
|
2167
|
+
*/
|
|
2168
|
+
cleanup() {
|
|
2169
|
+
const now = Date.now();
|
|
2170
|
+
for (const [key, entry] of this.cache.entries()) if (now - entry.timestamp > entry.ttl) this.cache.delete(key);
|
|
2171
|
+
}
|
|
2172
|
+
/**
|
|
2173
|
+
* Clear cache entries matching pattern
|
|
2174
|
+
*/
|
|
2175
|
+
clear(pattern) {
|
|
2176
|
+
if (!pattern) {
|
|
2177
|
+
this.cache.clear();
|
|
2178
|
+
return;
|
|
2179
|
+
}
|
|
2180
|
+
for (const key of this.cache.keys()) if (key.includes(pattern)) this.cache.delete(key);
|
|
2181
|
+
}
|
|
2182
|
+
/**
|
|
2183
|
+
* Get cached search result
|
|
2184
|
+
*/
|
|
2185
|
+
get(query, collection, params) {
|
|
2186
|
+
const key = this.generateKey(query, collection || "", params);
|
|
2187
|
+
const entry = this.cache.get(key);
|
|
2188
|
+
if (!entry) return null;
|
|
2189
|
+
if (Date.now() - entry.timestamp > entry.ttl) {
|
|
2190
|
+
this.cache.delete(key);
|
|
2191
|
+
return null;
|
|
2192
|
+
}
|
|
2193
|
+
return entry.data;
|
|
2194
|
+
}
|
|
2195
|
+
/**
|
|
2196
|
+
* Get cache statistics
|
|
2197
|
+
*/
|
|
2198
|
+
getStats() {
|
|
2199
|
+
return {
|
|
2200
|
+
maxSize: this.maxSize,
|
|
2201
|
+
size: this.cache.size
|
|
2202
|
+
};
|
|
2203
|
+
}
|
|
2204
|
+
/**
|
|
2205
|
+
* Check if cache has valid entry
|
|
2206
|
+
*/
|
|
2207
|
+
has(query, collection, params) {
|
|
2208
|
+
return this.get(query, collection, params) !== null;
|
|
2209
|
+
}
|
|
2210
|
+
/**
|
|
2211
|
+
* Set cached search result
|
|
2212
|
+
*/
|
|
2213
|
+
set(query, data, collection, params, ttl) {
|
|
2214
|
+
const key = this.generateKey(query, collection || "", params);
|
|
2215
|
+
if (this.cache.size >= this.maxSize) {
|
|
2216
|
+
const oldestKey = this.cache.keys().next().value;
|
|
2217
|
+
if (oldestKey) this.cache.delete(oldestKey);
|
|
2218
|
+
}
|
|
2219
|
+
this.cache.set(key, {
|
|
2220
|
+
data,
|
|
2221
|
+
timestamp: Date.now(),
|
|
2222
|
+
ttl: ttl || this.defaultTTL
|
|
2223
|
+
});
|
|
2224
|
+
}
|
|
2225
|
+
};
|
|
2226
|
+
const searchCache = new SearchCache({
|
|
2227
|
+
maxSize: 1e3,
|
|
2228
|
+
ttl: 300 * 1e3
|
|
2229
|
+
});
|
|
2230
|
+
setInterval(() => {
|
|
2231
|
+
searchCache.cleanup();
|
|
2232
|
+
}, 600 * 1e3);
|
|
2233
|
+
|
|
2234
|
+
//#endregion
|
|
2235
|
+
//#region src/features/search/constants.ts
|
|
2236
|
+
/**
|
|
2237
|
+
* Default values for vector search parameters
|
|
2238
|
+
*
|
|
2239
|
+
* K is set high because:
|
|
2240
|
+
* - Documents are split into chunks (avg 5-10 chunks per doc)
|
|
2241
|
+
* - To get 20 unique documents, we need K = 20 docs × 7 chunks/doc = 140
|
|
2242
|
+
* - Higher K = better coverage but slightly slower (still fast with good indexing)
|
|
2243
|
+
*/
|
|
2244
|
+
const DEFAULT_K = 150;
|
|
2245
|
+
const DEFAULT_PAGE = 1;
|
|
2246
|
+
const DEFAULT_PER_PAGE = 20;
|
|
2247
|
+
const DEFAULT_ALPHA = .7;
|
|
2248
|
+
/**
|
|
2249
|
+
* Default search field names when not specified
|
|
2250
|
+
*/
|
|
2251
|
+
const DEFAULT_SEARCH_FIELDS = ["title", "content"];
|
|
2252
|
+
/**
|
|
2253
|
+
* Default snippet threshold for search results
|
|
2254
|
+
*/
|
|
2255
|
+
const DEFAULT_SNIPPET_THRESHOLD = 30;
|
|
2256
|
+
/**
|
|
2257
|
+
* Default typo tokens threshold
|
|
2258
|
+
*/
|
|
2259
|
+
const DEFAULT_TYPO_TOKENS_THRESHOLD = 1;
|
|
2260
|
+
/**
|
|
2261
|
+
* Default number of typos allowed
|
|
2262
|
+
*/
|
|
2263
|
+
const DEFAULT_NUM_TYPOS = 0;
|
|
2264
|
+
|
|
2265
|
+
//#endregion
|
|
2266
|
+
//#region src/features/search/results/process-traditional-results.ts
|
|
2267
|
+
/**
|
|
2268
|
+
* Processes traditional search results from a single collection
|
|
2269
|
+
*/
|
|
2270
|
+
const processSingleCollectionTraditionalResults = (results, collectionName, config) => {
|
|
2271
|
+
return {
|
|
2272
|
+
collection: collectionName,
|
|
2273
|
+
displayName: config?.displayName || collectionName,
|
|
2274
|
+
icon: "📄",
|
|
2275
|
+
found: results.found,
|
|
2276
|
+
hits: results.hits?.map((hit) => ({
|
|
2277
|
+
...hit,
|
|
2278
|
+
collection: collectionName,
|
|
2279
|
+
displayName: config?.displayName || collectionName,
|
|
2280
|
+
icon: "📄",
|
|
2281
|
+
document: hit.document || {}
|
|
2282
|
+
})) || []
|
|
2283
|
+
};
|
|
2284
|
+
};
|
|
2285
|
+
/**
|
|
2286
|
+
* Combines traditional search results from multiple collections
|
|
2287
|
+
*/
|
|
2288
|
+
const combineTraditionalResults = (results, options) => {
|
|
2289
|
+
const { page, per_page, query } = options;
|
|
2290
|
+
const combinedHits = results.flatMap((result) => result.hits || []);
|
|
2291
|
+
const totalFound = results.reduce((sum, result) => sum + (result.found || 0), 0);
|
|
2292
|
+
combinedHits.sort((a, b) => (b.text_match || 0) - (a.text_match || 0));
|
|
2293
|
+
return {
|
|
2294
|
+
collections: results.map((r) => ({
|
|
2295
|
+
collection: r.collection,
|
|
2296
|
+
displayName: r.displayName,
|
|
2297
|
+
error: r.error,
|
|
2298
|
+
found: r.found || 0,
|
|
2299
|
+
icon: r.icon
|
|
2300
|
+
})),
|
|
2301
|
+
found: totalFound,
|
|
2302
|
+
hits: combinedHits.slice(0, per_page),
|
|
2303
|
+
page,
|
|
2304
|
+
request_params: {
|
|
2305
|
+
per_page,
|
|
2306
|
+
query
|
|
2307
|
+
},
|
|
2308
|
+
search_cutoff: false,
|
|
2309
|
+
search_time_ms: 0
|
|
2310
|
+
};
|
|
2311
|
+
};
|
|
2312
|
+
|
|
2313
|
+
//#endregion
|
|
2314
|
+
//#region src/features/search/traditional/build-params.ts
|
|
2315
|
+
/**
|
|
2316
|
+
* Builds traditional search parameters for a single collection
|
|
2317
|
+
*/
|
|
2318
|
+
const buildTraditionalSearchParams = (query, options) => {
|
|
2319
|
+
const { page, per_page, searchFields = DEFAULT_SEARCH_FIELDS, sort_by, exclude_fields } = options;
|
|
2320
|
+
return {
|
|
2321
|
+
highlight_full_fields: searchFields.join(","),
|
|
2322
|
+
num_typos: DEFAULT_NUM_TYPOS,
|
|
2323
|
+
page,
|
|
2324
|
+
per_page,
|
|
2325
|
+
q: query,
|
|
2326
|
+
query_by: searchFields.join(","),
|
|
2327
|
+
snippet_threshold: DEFAULT_SNIPPET_THRESHOLD,
|
|
2328
|
+
typo_tokens_threshold: DEFAULT_TYPO_TOKENS_THRESHOLD,
|
|
2329
|
+
exclude_fields,
|
|
2330
|
+
sort_by
|
|
2331
|
+
};
|
|
2332
|
+
};
|
|
2333
|
+
|
|
2334
|
+
//#endregion
|
|
2335
|
+
//#region src/features/search/traditional/search-collection.ts
|
|
2336
|
+
/**
|
|
2337
|
+
* Performs a traditional search on a single collection
|
|
2338
|
+
*/
|
|
2339
|
+
const searchTraditionalCollection = async (typesenseClient, collectionName, config, options) => {
|
|
2340
|
+
try {
|
|
2341
|
+
const buildOptions = {
|
|
2342
|
+
page: options.page,
|
|
2343
|
+
per_page: options.per_page
|
|
2344
|
+
};
|
|
2345
|
+
if (options.searchFields) buildOptions.searchFields = options.searchFields;
|
|
2346
|
+
else if (config) {
|
|
2347
|
+
let fields = [];
|
|
2348
|
+
if (config.mode === "chunked") fields = [...config.fields || [], {
|
|
2349
|
+
name: "chunk_text",
|
|
2350
|
+
index: true,
|
|
2351
|
+
type: "string"
|
|
2352
|
+
}];
|
|
2353
|
+
else fields = config.fields;
|
|
2354
|
+
const searchFields = fields.filter((f) => f.index !== false && (f.type === "string" || f.type === "string[]")).map((f) => f.name);
|
|
2355
|
+
if (searchFields.length > 0) buildOptions.searchFields = searchFields;
|
|
2356
|
+
}
|
|
2357
|
+
if (options.sort_by) buildOptions.sort_by = options.sort_by;
|
|
2358
|
+
if (options.exclude_fields) buildOptions.exclude_fields = options.exclude_fields;
|
|
2359
|
+
const searchParameters = buildTraditionalSearchParams(options.query, buildOptions);
|
|
2360
|
+
if (!options.skipChunkFilter) try {
|
|
2361
|
+
if (((await typesenseClient.collections(collectionName).retrieve()).fields?.map((f) => f.name) || []).includes("is_chunk")) searchParameters.filter_by = "!is_chunk:true";
|
|
2362
|
+
} catch (schemaError) {}
|
|
2363
|
+
return processSingleCollectionTraditionalResults(await typesenseClient.collections(collectionName).documents().search(searchParameters), collectionName, config);
|
|
2364
|
+
} catch (error) {
|
|
2365
|
+
return {
|
|
2366
|
+
collection: collectionName,
|
|
2367
|
+
displayName: config?.displayName || collectionName,
|
|
2368
|
+
error: error instanceof Error ? error.message : "Unknown error",
|
|
2369
|
+
found: 0,
|
|
2370
|
+
hits: [],
|
|
2371
|
+
icon: "📄"
|
|
2372
|
+
};
|
|
2373
|
+
}
|
|
2374
|
+
};
|
|
2375
|
+
|
|
2376
|
+
//#endregion
|
|
2377
|
+
//#region src/features/search/handlers/executors/traditional-multi-collection-search.ts
|
|
2378
|
+
const performTraditionalMultiCollectionSearch = async (typesenseClient, enabledCollections, query, options) => {
|
|
2379
|
+
logger.info("Performing traditional multi-collection search", {
|
|
2380
|
+
query,
|
|
2381
|
+
collections: enabledCollections.map(([name]) => name)
|
|
2382
|
+
});
|
|
2383
|
+
const searchFieldsOverride = options.query_by ? options.query_by.split(",").map((f) => f.trim()) : void 0;
|
|
2384
|
+
const searchPromises = enabledCollections.map(async ([collectionName, config]) => {
|
|
2385
|
+
try {
|
|
2386
|
+
return await searchTraditionalCollection(typesenseClient, collectionName, config, {
|
|
2387
|
+
query,
|
|
2388
|
+
page: options.page,
|
|
2389
|
+
per_page: options.per_page,
|
|
2390
|
+
...searchFieldsOverride ? { searchFields: searchFieldsOverride } : (() => {
|
|
2391
|
+
if (!config) return {};
|
|
2392
|
+
let fields = [];
|
|
2393
|
+
if (config.mode === "chunked") fields = [...config.fields || [], {
|
|
2394
|
+
name: "chunk_text",
|
|
2395
|
+
index: true,
|
|
2396
|
+
type: "string"
|
|
2397
|
+
}];
|
|
2398
|
+
else fields = config.fields;
|
|
2399
|
+
const searchFields = fields.filter((f) => f.index !== false && (f.type === "string" || f.type === "string[]")).map((f) => f.name);
|
|
2400
|
+
return searchFields.length > 0 ? { searchFields } : {};
|
|
2401
|
+
})(),
|
|
2402
|
+
...options.sort_by && { sort_by: options.sort_by },
|
|
2403
|
+
...options.exclude_fields && { exclude_fields: options.exclude_fields }
|
|
2404
|
+
});
|
|
2405
|
+
} catch (error) {
|
|
2406
|
+
logger.error("Error searching collection", error, {
|
|
2407
|
+
collection: collectionName,
|
|
2408
|
+
query
|
|
2409
|
+
});
|
|
2410
|
+
throw error;
|
|
2411
|
+
}
|
|
2412
|
+
});
|
|
2413
|
+
const fallbackResult = combineTraditionalResults(await Promise.all(searchPromises), {
|
|
2414
|
+
page: options.page,
|
|
2415
|
+
per_page: options.per_page,
|
|
2416
|
+
query
|
|
2417
|
+
});
|
|
2418
|
+
searchCache.set(query, fallbackResult, "universal", options);
|
|
2419
|
+
return fallbackResult;
|
|
2420
|
+
};
|
|
2421
|
+
|
|
2422
|
+
//#endregion
|
|
2423
|
+
//#region src/features/search/results/process-vector-results.ts
|
|
2424
|
+
/**
|
|
2425
|
+
* Processes and combines vector search results from multiple collections
|
|
2426
|
+
*/
|
|
2427
|
+
const processVectorSearchResults = (multiSearchResults, enabledCollections, options) => {
|
|
2428
|
+
const { per_page = DEFAULT_PER_PAGE, page = DEFAULT_PAGE, k, query, vector } = options;
|
|
2429
|
+
const results = (multiSearchResults.results?.map((result, index) => {
|
|
2430
|
+
if (!enabledCollections[index]) return null;
|
|
2431
|
+
const [collectionName, config] = enabledCollections[index];
|
|
2432
|
+
return {
|
|
2433
|
+
collection: collectionName,
|
|
2434
|
+
displayName: config?.displayName || collectionName,
|
|
2435
|
+
icon: "📄",
|
|
2436
|
+
found: result.found || 0,
|
|
2437
|
+
error: result.error || void 0,
|
|
2438
|
+
hits: result.hits?.map((hit) => {
|
|
2439
|
+
const doc = hit.document || {};
|
|
2440
|
+
const hint = doc.chunk_text ? String(doc.chunk_text).substring(0, 300) + "..." : doc.description ? String(doc.description).substring(0, 300) + "..." : doc.hint;
|
|
2441
|
+
return {
|
|
2442
|
+
...hit,
|
|
2443
|
+
collection: collectionName,
|
|
2444
|
+
displayName: config?.displayName || collectionName,
|
|
2445
|
+
icon: "📄",
|
|
2446
|
+
document: {
|
|
2447
|
+
...doc,
|
|
2448
|
+
hint,
|
|
2449
|
+
...doc.chunk_text ? { chunk_text: doc.chunk_text } : {}
|
|
2450
|
+
},
|
|
2451
|
+
vector_distance: hit.vector_distance,
|
|
2452
|
+
text_match: hit.text_match
|
|
2453
|
+
};
|
|
2454
|
+
}) || []
|
|
2455
|
+
};
|
|
2456
|
+
}) || []).filter((r) => r !== null);
|
|
2457
|
+
const combinedHits = results.flatMap((result) => result.hits);
|
|
2458
|
+
const totalFound = results.reduce((sum, result) => sum + result.found, 0);
|
|
2459
|
+
combinedHits.sort((a, b) => {
|
|
2460
|
+
return (a.vector_distance ?? Infinity) - (b.vector_distance ?? Infinity);
|
|
2461
|
+
});
|
|
2462
|
+
return {
|
|
2463
|
+
collections: results.map((r) => ({
|
|
2464
|
+
collection: r.collection,
|
|
2465
|
+
displayName: r.displayName,
|
|
2466
|
+
error: r.error,
|
|
2467
|
+
found: r.found || 0,
|
|
2468
|
+
icon: r.icon
|
|
2469
|
+
})),
|
|
2470
|
+
found: totalFound,
|
|
2471
|
+
hits: combinedHits.slice(0, per_page),
|
|
2472
|
+
page,
|
|
2473
|
+
request_params: {
|
|
2474
|
+
k,
|
|
2475
|
+
per_page,
|
|
2476
|
+
query: query || null,
|
|
2477
|
+
vector: vector ? "provided" : null
|
|
2478
|
+
},
|
|
2479
|
+
search_cutoff: false,
|
|
2480
|
+
search_time_ms: 0
|
|
2481
|
+
};
|
|
2482
|
+
};
|
|
2483
|
+
|
|
2484
|
+
//#endregion
|
|
2485
|
+
//#region src/features/search/vector/build-params.ts
|
|
2486
|
+
/**
|
|
2487
|
+
* Builds vector search parameters for a single collection
|
|
2488
|
+
*/
|
|
2489
|
+
const buildVectorSearchParams = (searchVector, options) => {
|
|
2490
|
+
const { query, k = DEFAULT_K, hybrid = false, alpha = DEFAULT_ALPHA, page = DEFAULT_PAGE, per_page = DEFAULT_PER_PAGE, filter_by, sort_by, searchFields } = options;
|
|
2491
|
+
const searchParams = {
|
|
2492
|
+
q: "*",
|
|
2493
|
+
vector_query: `embedding:([${searchVector.join(",")}], k:${k})`,
|
|
2494
|
+
per_page,
|
|
2495
|
+
page,
|
|
2496
|
+
exclude_fields: "embedding"
|
|
2497
|
+
};
|
|
2498
|
+
if (hybrid && query) {
|
|
2499
|
+
searchParams.q = query;
|
|
2500
|
+
searchParams.query_by = searchFields?.join(",") || DEFAULT_SEARCH_FIELDS.join(",");
|
|
2501
|
+
searchParams.vector_query = `embedding:([${searchVector.join(",")}], k:${k}, alpha:${alpha})`;
|
|
2502
|
+
}
|
|
2503
|
+
if (filter_by) searchParams.filter_by = filter_by;
|
|
2504
|
+
if (sort_by) searchParams.sort_by = sort_by;
|
|
2505
|
+
return searchParams;
|
|
2506
|
+
};
|
|
2507
|
+
|
|
2508
|
+
//#endregion
|
|
2509
|
+
//#region src/features/search/vector/build-multi-collection-params.ts
|
|
2510
|
+
/**
|
|
2511
|
+
* Builds multi-collection vector search parameters
|
|
2512
|
+
*/
|
|
2513
|
+
const buildMultiCollectionVectorSearchParams = (searchVector, enabledCollections, options) => {
|
|
2514
|
+
const { query, k, hybrid, alpha, page, per_page, filter_by, sort_by } = options;
|
|
2515
|
+
return enabledCollections.map(([collectionName, config]) => {
|
|
2516
|
+
let searchFields;
|
|
2517
|
+
if (config) {
|
|
2518
|
+
let fields = [];
|
|
2519
|
+
if (config.mode === "chunked") fields = [...config.fields || [], {
|
|
2520
|
+
name: "chunk_text",
|
|
2521
|
+
index: true,
|
|
2522
|
+
type: "string"
|
|
2523
|
+
}];
|
|
2524
|
+
else fields = config.fields;
|
|
2525
|
+
const extracted = fields.filter((f) => f.index !== false && (f.type === "string" || f.type === "string[]")).map((f) => f.name);
|
|
2526
|
+
if (extracted.length > 0) searchFields = extracted;
|
|
2527
|
+
}
|
|
2528
|
+
return {
|
|
2529
|
+
collection: collectionName,
|
|
2530
|
+
...buildVectorSearchParams(searchVector, {
|
|
2531
|
+
...query !== void 0 && { query },
|
|
2532
|
+
...k !== void 0 && { k },
|
|
2533
|
+
...hybrid !== void 0 && { hybrid },
|
|
2534
|
+
...alpha !== void 0 && { alpha },
|
|
2535
|
+
...page !== void 0 && { page },
|
|
2536
|
+
...per_page !== void 0 && { per_page },
|
|
2537
|
+
...sort_by !== void 0 && { sort_by },
|
|
2538
|
+
...searchFields !== void 0 && { searchFields }
|
|
2539
|
+
}),
|
|
2540
|
+
_filter_by: filter_by
|
|
2541
|
+
};
|
|
2542
|
+
});
|
|
2543
|
+
};
|
|
2544
|
+
|
|
2545
|
+
//#endregion
|
|
2546
|
+
//#region src/features/search/vector/generate-vector.ts
|
|
2547
|
+
/**
|
|
2548
|
+
* Generates or retrieves a search vector from query text or provided vector
|
|
2549
|
+
*/
|
|
2550
|
+
const generateOrGetVector = async (query, vector, embeddingConfig) => {
|
|
2551
|
+
if (vector && Array.isArray(vector) && vector.length > 0) return vector;
|
|
2552
|
+
if (query) {
|
|
2553
|
+
const searchVector = await generateEmbedding(query, embeddingConfig);
|
|
2554
|
+
if (!searchVector || searchVector.length === 0) return null;
|
|
2555
|
+
return searchVector;
|
|
2556
|
+
}
|
|
2557
|
+
return null;
|
|
2558
|
+
};
|
|
2559
|
+
|
|
2560
|
+
//#endregion
|
|
2561
|
+
//#region src/features/search/services/search-service.ts
|
|
2562
|
+
var SearchService = class {
|
|
2563
|
+
constructor(typesenseClient, pluginOptions) {
|
|
2564
|
+
this.typesenseClient = typesenseClient;
|
|
2565
|
+
this.pluginOptions = pluginOptions;
|
|
2566
|
+
}
|
|
2567
|
+
async performSearch(query, targetCollections, options) {
|
|
2568
|
+
const cacheKey = `search:${query}:${JSON.stringify(options)}:${targetCollections.map((c) => c[0]).join(",")}`;
|
|
2569
|
+
const cachedResult = searchCache.get(query, cacheKey, options);
|
|
2570
|
+
if (cachedResult) return cachedResult;
|
|
2571
|
+
if ((options.mode || "semantic") === "simple") return this.performTraditionalSearch(query, targetCollections, options);
|
|
2572
|
+
const searchVector = await generateOrGetVector(query, void 0, this.pluginOptions.features.embedding);
|
|
2573
|
+
if (!searchVector) return this.performTraditionalSearch(query, targetCollections, options);
|
|
2574
|
+
try {
|
|
2575
|
+
const results = await this.executeVectorSearch(query, searchVector, targetCollections, options);
|
|
2576
|
+
searchCache.set(query, results, cacheKey, options);
|
|
2577
|
+
return results;
|
|
2578
|
+
} catch (error) {
|
|
2579
|
+
logger.error("Vector search failed, falling back to traditional", error);
|
|
2580
|
+
return this.performTraditionalSearch(query, targetCollections, options);
|
|
2581
|
+
}
|
|
2582
|
+
}
|
|
2583
|
+
async performTraditionalSearch(query, targetCollections, options) {
|
|
2584
|
+
return performTraditionalMultiCollectionSearch(this.typesenseClient, targetCollections, query, options);
|
|
2585
|
+
}
|
|
2586
|
+
async executeVectorSearch(query, searchVector, targetCollections, options) {
|
|
2587
|
+
const searches = buildMultiCollectionVectorSearchParams(searchVector, targetCollections, {
|
|
2588
|
+
query,
|
|
2589
|
+
k: Math.min(30, DEFAULT_K),
|
|
2590
|
+
hybrid: true,
|
|
2591
|
+
alpha: DEFAULT_ALPHA,
|
|
2592
|
+
page: options.page,
|
|
2593
|
+
per_page: options.per_page,
|
|
2594
|
+
...options.sort_by !== void 0 && { sort_by: options.sort_by }
|
|
2595
|
+
});
|
|
2596
|
+
if (searches.length === 0) return {
|
|
2597
|
+
collections: [],
|
|
2598
|
+
found: 0,
|
|
2599
|
+
hits: [],
|
|
2600
|
+
page: options.page,
|
|
2601
|
+
request_params: {
|
|
2602
|
+
per_page: options.per_page,
|
|
2603
|
+
query
|
|
2604
|
+
},
|
|
2605
|
+
search_cutoff: false,
|
|
2606
|
+
search_time_ms: 0
|
|
2607
|
+
};
|
|
2608
|
+
return processVectorSearchResults(await this.typesenseClient.multiSearch.perform({ searches }), targetCollections, {
|
|
2609
|
+
per_page: options.per_page,
|
|
2610
|
+
page: options.page,
|
|
2611
|
+
k: DEFAULT_K,
|
|
2612
|
+
query
|
|
2613
|
+
});
|
|
2614
|
+
}
|
|
2615
|
+
};
|
|
2616
|
+
|
|
2617
|
+
//#endregion
|
|
2618
|
+
//#region src/features/search/handlers/utils/document-transformer.ts
|
|
2619
|
+
/**
|
|
2620
|
+
* Helper to resolve document type from collection name
|
|
2621
|
+
*/
|
|
2622
|
+
function resolveDocumentType(collectionName) {
|
|
2623
|
+
if (collectionName.includes("article")) return "article";
|
|
2624
|
+
if (collectionName.includes("book")) return "book";
|
|
2625
|
+
return "document";
|
|
2626
|
+
}
|
|
2627
|
+
/**
|
|
2628
|
+
* Transform search response to simplified format
|
|
2629
|
+
*/
|
|
2630
|
+
function transformToSimpleFormat(data) {
|
|
2631
|
+
if (!data || !data.hits) return { documents: [] };
|
|
2632
|
+
return { documents: data.hits.map((hit) => {
|
|
2633
|
+
const doc = hit.document || {};
|
|
2634
|
+
const collectionValue = hit.collection || doc.collection;
|
|
2635
|
+
const collection = typeof collectionValue === "string" ? collectionValue : "";
|
|
2636
|
+
return {
|
|
2637
|
+
id: doc.id || "",
|
|
2638
|
+
title: doc.title || "Sin título",
|
|
2639
|
+
slug: doc.slug || "",
|
|
2640
|
+
type: resolveDocumentType(collection),
|
|
2641
|
+
collection
|
|
2642
|
+
};
|
|
2643
|
+
}) };
|
|
2644
|
+
}
|
|
2645
|
+
|
|
2646
|
+
//#endregion
|
|
2647
|
+
//#region src/features/search/handlers/utils/target-resolver.ts
|
|
2648
|
+
var TargetCollectionResolver = class {
|
|
2649
|
+
allowedTableNames;
|
|
2650
|
+
constructor(pluginOptions) {
|
|
2651
|
+
this.pluginOptions = pluginOptions;
|
|
2652
|
+
this.allowedTableNames = this.getAllowedTableNames(pluginOptions);
|
|
2653
|
+
}
|
|
2654
|
+
getAllowedTableNames(pluginOptions) {
|
|
2655
|
+
const configuredAllowed = pluginOptions.features.search?.defaults?.tables || [];
|
|
2656
|
+
const allowedTableNames = /* @__PURE__ */ new Set();
|
|
2657
|
+
const allTableNames = /* @__PURE__ */ new Set();
|
|
2658
|
+
for (const [collectionSlug, tableConfigs] of Object.entries(pluginOptions.collections || {})) if (Array.isArray(tableConfigs)) for (const tableConfig of tableConfigs) {
|
|
2659
|
+
if (!tableConfig.enabled) continue;
|
|
2660
|
+
const tableName = getTypesenseCollectionName(collectionSlug, tableConfig);
|
|
2661
|
+
allTableNames.add(tableName);
|
|
2662
|
+
if (configuredAllowed.length === 0) {
|
|
2663
|
+
allowedTableNames.add(tableName);
|
|
2664
|
+
continue;
|
|
2665
|
+
}
|
|
2666
|
+
if (configuredAllowed.includes(tableName)) allowedTableNames.add(tableName);
|
|
2667
|
+
}
|
|
2668
|
+
return Array.from(allowedTableNames);
|
|
2669
|
+
}
|
|
2670
|
+
/**
|
|
2671
|
+
* Resolves target table names based on request parameters.
|
|
2672
|
+
* Handles both multi-collection (array) and single-collection (slug) requests.
|
|
2673
|
+
* Enforces strict validation against allowed tables.
|
|
2674
|
+
*/
|
|
2675
|
+
resolveTargetTables(collectionNameSlug, requestedCollections) {
|
|
2676
|
+
if (!collectionNameSlug) {
|
|
2677
|
+
if (requestedCollections && requestedCollections.length > 0) return requestedCollections.filter((c) => this.allowedTableNames.includes(c));
|
|
2678
|
+
return this.allowedTableNames;
|
|
2679
|
+
}
|
|
2680
|
+
const targetTables = [];
|
|
2681
|
+
const tableConfigs = this.pluginOptions.collections?.[collectionNameSlug] || [];
|
|
2682
|
+
if (Array.isArray(tableConfigs)) {
|
|
2683
|
+
for (const config of tableConfigs) if (config.enabled) {
|
|
2684
|
+
const tableName = getTypesenseCollectionName(collectionNameSlug, config);
|
|
2685
|
+
if (this.allowedTableNames.includes(tableName)) targetTables.push(tableName);
|
|
2686
|
+
}
|
|
2687
|
+
}
|
|
2688
|
+
return targetTables;
|
|
2689
|
+
}
|
|
2690
|
+
};
|
|
2691
|
+
|
|
2692
|
+
//#endregion
|
|
2693
|
+
//#region src/features/search/handlers/utils/config-mapper.ts
|
|
2694
|
+
var SearchConfigMapper = class {
|
|
2695
|
+
constructor(pluginOptions) {
|
|
2696
|
+
this.pluginOptions = pluginOptions;
|
|
2697
|
+
}
|
|
2698
|
+
/**
|
|
2699
|
+
* Maps a list of table names to their full configuration objects.
|
|
2700
|
+
* Essential for the search service which needs config details (fields, weights, etc.)
|
|
2701
|
+
*/
|
|
2702
|
+
mapTablesToConfigs(targetTableNames) {
|
|
2703
|
+
const searchConfigs = [];
|
|
2704
|
+
for (const [slug, configs] of Object.entries(this.pluginOptions.collections || {})) {
|
|
2705
|
+
if (!Array.isArray(configs)) continue;
|
|
2706
|
+
for (const config of configs) {
|
|
2707
|
+
if (!config.enabled) continue;
|
|
2708
|
+
const tableName = getTypesenseCollectionName(slug, config);
|
|
2709
|
+
if (targetTableNames.includes(tableName)) searchConfigs.push([tableName, config]);
|
|
2710
|
+
}
|
|
2711
|
+
}
|
|
2712
|
+
return searchConfigs;
|
|
2713
|
+
}
|
|
2714
|
+
};
|
|
2715
|
+
|
|
2716
|
+
//#endregion
|
|
2717
|
+
//#region src/core/config/config-validation.ts
|
|
2718
|
+
/**
|
|
2719
|
+
* Configuration validation using Zod schemas
|
|
2720
|
+
*/
|
|
2721
|
+
/**
|
|
2722
|
+
* Get configuration validation errors in a user-friendly format
|
|
2723
|
+
*/
|
|
2724
|
+
function getValidationErrors(errors) {
|
|
2725
|
+
return errors.map((error, index) => `${index + 1}. ${error}`).join("\n");
|
|
2726
|
+
}
|
|
2727
|
+
/**
|
|
2728
|
+
* Validate search parameters
|
|
2729
|
+
*/
|
|
2730
|
+
const SearchParamsSchema = z.object({
|
|
2731
|
+
facets: z.array(z.string()).optional(),
|
|
2732
|
+
filters: z.record(z.string(), z.any()).optional(),
|
|
2733
|
+
highlight_fields: z.array(z.string()).optional(),
|
|
2734
|
+
num_typos: z.number().int().min(0).max(4).optional().default(0),
|
|
2735
|
+
page: z.number().int().min(1).optional().default(1),
|
|
2736
|
+
per_page: z.number().int().min(1).max(250).optional().default(10),
|
|
2737
|
+
q: z.string().min(1, "Query parameter \"q\" is required"),
|
|
2738
|
+
snippet_threshold: z.number().int().min(0).max(100).optional().default(30),
|
|
2739
|
+
sort_by: z.string().optional(),
|
|
2740
|
+
typo_tokens_threshold: z.number().int().min(1).optional().default(1)
|
|
2741
|
+
});
|
|
2742
|
+
/**
|
|
2743
|
+
* Validate search parameters
|
|
2744
|
+
*/
|
|
2745
|
+
function validateSearchParams(params) {
|
|
2746
|
+
try {
|
|
2747
|
+
return {
|
|
2748
|
+
data: SearchParamsSchema.parse(params),
|
|
2749
|
+
success: true
|
|
2750
|
+
};
|
|
2751
|
+
} catch (error) {
|
|
2752
|
+
if (error instanceof z.ZodError) return {
|
|
2753
|
+
errors: error.issues.map((err) => {
|
|
2754
|
+
return `${err.path.length > 0 ? `${err.path.join(".")}: ` : ""}${err.message}`;
|
|
2755
|
+
}),
|
|
2756
|
+
success: false
|
|
2757
|
+
};
|
|
2758
|
+
return {
|
|
2759
|
+
errors: ["Invalid search parameters format"],
|
|
2760
|
+
success: false
|
|
2761
|
+
};
|
|
2762
|
+
}
|
|
2763
|
+
}
|
|
2764
|
+
|
|
2765
|
+
//#endregion
|
|
2766
|
+
//#region src/features/search/utils/extract-collection-name.ts
|
|
2767
|
+
/**
|
|
2768
|
+
* Extracts collection name from request URL or params
|
|
2769
|
+
*/
|
|
2770
|
+
const extractCollectionName = (request) => {
|
|
2771
|
+
let collectionName;
|
|
2772
|
+
let collectionNameStr;
|
|
2773
|
+
if (request.url && typeof request.url === "string") {
|
|
2774
|
+
const pathParts = new URL(request.url).pathname.split("/");
|
|
2775
|
+
const searchIndex = pathParts.indexOf("search");
|
|
2776
|
+
if (searchIndex !== -1 && pathParts[searchIndex + 1]) {
|
|
2777
|
+
collectionName = pathParts[searchIndex + 1] || "";
|
|
2778
|
+
collectionNameStr = String(collectionName);
|
|
2779
|
+
} else {
|
|
2780
|
+
collectionName = "";
|
|
2781
|
+
collectionNameStr = "";
|
|
2782
|
+
}
|
|
2783
|
+
} else {
|
|
2784
|
+
const paramCollectionName = request.params?.collectionName;
|
|
2785
|
+
collectionName = String(paramCollectionName || "");
|
|
2786
|
+
collectionNameStr = collectionName;
|
|
2787
|
+
}
|
|
2788
|
+
return {
|
|
2789
|
+
collectionName,
|
|
2790
|
+
collectionNameStr
|
|
2791
|
+
};
|
|
2792
|
+
};
|
|
2793
|
+
|
|
2794
|
+
//#endregion
|
|
2795
|
+
//#region src/features/search/utils/extract-search-params.ts
|
|
2796
|
+
/**
|
|
2797
|
+
* Extracts and validates search parameters from request query
|
|
2798
|
+
*/
|
|
2799
|
+
const extractSearchParams = (query) => {
|
|
2800
|
+
const q = String(query?.q || "");
|
|
2801
|
+
const pageParam = query?.page;
|
|
2802
|
+
const perPageParam = query?.per_page;
|
|
2803
|
+
const page = pageParam ? parseInt(String(pageParam), 10) : 1;
|
|
2804
|
+
const per_page = perPageParam ? parseInt(String(perPageParam), 10) : 10;
|
|
2805
|
+
const sort_by = query?.sort_by;
|
|
2806
|
+
const mode = query?.mode;
|
|
2807
|
+
const collectionParam = query?.collection;
|
|
2808
|
+
const collections = collectionParam ? Array.isArray(collectionParam) ? collectionParam.map((c) => String(c)) : [String(collectionParam)] : void 0;
|
|
2809
|
+
const exclude_fields = query?.exclude_fields;
|
|
2810
|
+
const query_by = query?.query_by;
|
|
2811
|
+
const simpleParam = query?.simple;
|
|
2812
|
+
const simple = simpleParam === "true" || simpleParam === true || simpleParam === "1";
|
|
2813
|
+
const errors = [];
|
|
2814
|
+
if (isNaN(page) || page < 1) errors.push("Invalid page parameter");
|
|
2815
|
+
if (isNaN(per_page) || per_page < 1 || per_page > 250) errors.push("Invalid per_page parameter");
|
|
2816
|
+
const result = {
|
|
2817
|
+
q,
|
|
2818
|
+
page,
|
|
2819
|
+
per_page
|
|
2820
|
+
};
|
|
2821
|
+
if (sort_by) result.sort_by = sort_by;
|
|
2822
|
+
if (mode) result.mode = mode;
|
|
2823
|
+
if (collections && collections.length > 0) result.collections = collections;
|
|
2824
|
+
if (exclude_fields) result.exclude_fields = exclude_fields;
|
|
2825
|
+
if (query_by) result.query_by = query_by;
|
|
2826
|
+
if (simple) result.simple = simple;
|
|
2827
|
+
if (errors.length > 0) result.errors = errors;
|
|
2828
|
+
return result;
|
|
2829
|
+
};
|
|
2830
|
+
|
|
2831
|
+
//#endregion
|
|
2832
|
+
//#region src/features/search/handlers/validators/search-request-validator.ts
|
|
2833
|
+
/**
|
|
2834
|
+
* Validates search request and returns parsed parameters
|
|
2835
|
+
*/
|
|
2836
|
+
function validateSearchRequest(request) {
|
|
2837
|
+
const { query } = request;
|
|
2838
|
+
const { collectionName, collectionNameStr } = extractCollectionName(request);
|
|
2839
|
+
const searchParams = extractSearchParams(query);
|
|
2840
|
+
if (searchParams.errors && searchParams.errors.length > 0) return {
|
|
2841
|
+
success: false,
|
|
2842
|
+
error: Response.json({ error: searchParams.errors[0] }, { status: 400 })
|
|
2843
|
+
};
|
|
2844
|
+
const validation = validateSearchParams({
|
|
2845
|
+
page: searchParams.page,
|
|
2846
|
+
per_page: searchParams.per_page,
|
|
2847
|
+
q: searchParams.q,
|
|
2848
|
+
sort_by: searchParams.sort_by
|
|
2849
|
+
});
|
|
2850
|
+
if (!validation.success) return {
|
|
2851
|
+
success: false,
|
|
2852
|
+
error: Response.json({
|
|
2853
|
+
details: getValidationErrors(validation.errors || []),
|
|
2854
|
+
error: "Invalid search parameters"
|
|
2855
|
+
}, { status: 400 })
|
|
2856
|
+
};
|
|
2857
|
+
return {
|
|
2858
|
+
success: true,
|
|
2859
|
+
collectionName,
|
|
2860
|
+
collectionNameStr,
|
|
2861
|
+
searchParams
|
|
2862
|
+
};
|
|
2863
|
+
}
|
|
2864
|
+
|
|
2865
|
+
//#endregion
|
|
2866
|
+
//#region src/features/search/handlers/search-handler.ts
|
|
2867
|
+
/**
|
|
2868
|
+
* Helper type guard to check if a result is a valid search response
|
|
2869
|
+
*/
|
|
2870
|
+
function isValidSearchResponse(result) {
|
|
2871
|
+
return typeof result === "object" && result !== null && "hits" in result && Array.isArray(result.hits);
|
|
2872
|
+
}
|
|
2873
|
+
/**
|
|
2874
|
+
* Creates a handler for standard search requests
|
|
2875
|
+
*/
|
|
2876
|
+
const createSearchHandler = (typesenseClient, pluginOptions) => {
|
|
2877
|
+
const searchService = new SearchService(typesenseClient, pluginOptions);
|
|
2878
|
+
const targetResolver = new TargetCollectionResolver(pluginOptions);
|
|
2879
|
+
const configMapper = new SearchConfigMapper(pluginOptions);
|
|
2880
|
+
return async (request) => {
|
|
2881
|
+
try {
|
|
2882
|
+
const validated = validateSearchRequest(request);
|
|
2883
|
+
if (!validated.success) return validated.error;
|
|
2884
|
+
const { collectionName, searchParams } = validated;
|
|
2885
|
+
const targetCollections = targetResolver.resolveTargetTables(collectionName, searchParams.collections);
|
|
2886
|
+
if (targetCollections.length === 0) {
|
|
2887
|
+
if (!collectionName && searchParams.collections && searchParams.collections.length > 0) return Response.json({ error: "None of the requested collections are allowed" }, { status: 403 });
|
|
2888
|
+
return Response.json({ error: "Collection not allowed or not enabled" }, { status: 403 });
|
|
2889
|
+
}
|
|
2890
|
+
if (!searchParams.q || searchParams.q.trim() === "") return Response.json({ error: "Query parameter \"q\" is required" }, { status: 400 });
|
|
2891
|
+
const searchConfigs = configMapper.mapTablesToConfigs(targetCollections);
|
|
2892
|
+
const searchResult = await searchService.performSearch(searchParams.q, searchConfigs, {
|
|
2893
|
+
filters: {},
|
|
2894
|
+
page: searchParams.page,
|
|
2895
|
+
per_page: searchParams.per_page,
|
|
2896
|
+
sort_by: searchParams.sort_by,
|
|
2897
|
+
mode: searchParams.mode,
|
|
2898
|
+
exclude_fields: searchParams.exclude_fields,
|
|
2899
|
+
query_by: searchParams.query_by
|
|
2900
|
+
});
|
|
2901
|
+
if (searchResult instanceof Response) return searchResult;
|
|
2902
|
+
if (searchParams.simple && isValidSearchResponse(searchResult)) return Response.json(transformToSimpleFormat(searchResult));
|
|
2903
|
+
return Response.json(searchResult);
|
|
2904
|
+
} catch (error) {
|
|
2905
|
+
return Response.json({
|
|
2906
|
+
details: error instanceof Error ? error.message : "Unknown error",
|
|
2907
|
+
error: "Search handler failed"
|
|
2908
|
+
}, { status: 500 });
|
|
2909
|
+
}
|
|
2910
|
+
};
|
|
2911
|
+
};
|
|
2912
|
+
|
|
2913
|
+
//#endregion
|
|
2914
|
+
//#region src/features/search/create-search-endpoints.ts
|
|
2915
|
+
const createSearchEndpoints = (typesenseClient, pluginOptions) => {
|
|
2916
|
+
return [
|
|
2917
|
+
{
|
|
2918
|
+
handler: createCollectionsHandler(pluginOptions),
|
|
2919
|
+
method: "get",
|
|
2920
|
+
path: "/search/collections"
|
|
2921
|
+
},
|
|
2922
|
+
{
|
|
2923
|
+
handler: createSearchHandler(typesenseClient, pluginOptions),
|
|
2924
|
+
method: "get",
|
|
2925
|
+
path: "/search/:collectionName"
|
|
2926
|
+
},
|
|
2927
|
+
{
|
|
2928
|
+
handler: createSearchHandler(typesenseClient, pluginOptions),
|
|
2929
|
+
method: "get",
|
|
2930
|
+
path: "/search"
|
|
2931
|
+
}
|
|
2932
|
+
];
|
|
2933
|
+
};
|
|
2934
|
+
|
|
2935
|
+
//#endregion
|
|
2936
|
+
//#region src/shared/schema/collection-schemas.ts
|
|
2937
|
+
/**
|
|
2938
|
+
* Base fields that every collection should have
|
|
2939
|
+
*/
|
|
2940
|
+
const getBaseFields = () => [
|
|
2941
|
+
{
|
|
2942
|
+
name: "id",
|
|
2943
|
+
type: "string"
|
|
2944
|
+
},
|
|
2945
|
+
{
|
|
2946
|
+
name: "slug",
|
|
2947
|
+
type: "string"
|
|
2948
|
+
},
|
|
2949
|
+
{
|
|
2950
|
+
name: "createdAt",
|
|
2951
|
+
type: "int64"
|
|
2952
|
+
},
|
|
2953
|
+
{
|
|
2954
|
+
name: "updatedAt",
|
|
2955
|
+
type: "int64"
|
|
2956
|
+
}
|
|
2957
|
+
];
|
|
2958
|
+
/**
|
|
2959
|
+
* Creates embedding field definition
|
|
2960
|
+
* @param optional - Whether the embedding field is optional
|
|
2961
|
+
* @param dimensions - Number of dimensions for the embedding vector (default: 1536)
|
|
2962
|
+
*/
|
|
2963
|
+
const getEmbeddingField = (optional = true, dimensions = DEFAULT_EMBEDDING_DIMENSIONS) => ({
|
|
2964
|
+
name: "embedding",
|
|
2965
|
+
type: "float[]",
|
|
2966
|
+
num_dim: dimensions,
|
|
2967
|
+
...optional && { optional: true }
|
|
2968
|
+
});
|
|
2969
|
+
/**
|
|
2970
|
+
* Maps FieldMapping to TypesenseFieldSchema
|
|
2971
|
+
*/
|
|
2972
|
+
const mapFieldMappingsToSchema = (fields) => {
|
|
2973
|
+
return fields.map((field) => ({
|
|
2974
|
+
name: field.name,
|
|
2975
|
+
type: field.type === "auto" ? "string" : field.type,
|
|
2976
|
+
facet: field.facet,
|
|
2977
|
+
index: field.index,
|
|
2978
|
+
optional: field.optional
|
|
2979
|
+
}));
|
|
2980
|
+
};
|
|
2981
|
+
/**
|
|
2982
|
+
* Gets chunk-specific fields for chunk collections
|
|
2983
|
+
*/
|
|
2984
|
+
const getChunkFields = () => [
|
|
2985
|
+
{
|
|
2986
|
+
name: "parent_doc_id",
|
|
2987
|
+
type: "string"
|
|
2988
|
+
},
|
|
2989
|
+
{
|
|
2990
|
+
name: "chunk_index",
|
|
2991
|
+
type: "int32"
|
|
2992
|
+
},
|
|
2993
|
+
{
|
|
2994
|
+
name: "chunk_text",
|
|
2995
|
+
type: "string"
|
|
2996
|
+
},
|
|
2997
|
+
{
|
|
2998
|
+
name: "is_chunk",
|
|
2999
|
+
type: "bool"
|
|
3000
|
+
},
|
|
3001
|
+
{
|
|
3002
|
+
name: "headers",
|
|
3003
|
+
type: "string[]",
|
|
3004
|
+
facet: true,
|
|
3005
|
+
optional: true
|
|
3006
|
+
}
|
|
3007
|
+
];
|
|
3008
|
+
/**
|
|
3009
|
+
* Creates a complete schema for a chunk collection
|
|
3010
|
+
*/
|
|
3011
|
+
const getChunkCollectionSchema = (collectionSlug, tableConfig, embeddingDimensions = DEFAULT_EMBEDDING_DIMENSIONS) => {
|
|
3012
|
+
const fields = tableConfig.fields ? mapFieldMappingsToSchema(tableConfig.fields) : [];
|
|
3013
|
+
const userFieldNames = new Set([...fields.map((f) => f.name), ...getChunkFields().map((f) => f.name)]);
|
|
3014
|
+
return {
|
|
3015
|
+
name: collectionSlug,
|
|
3016
|
+
fields: [
|
|
3017
|
+
...getBaseFields().filter((f) => !userFieldNames.has(f.name)),
|
|
3018
|
+
...getChunkFields(),
|
|
3019
|
+
...fields,
|
|
3020
|
+
getEmbeddingField(false, embeddingDimensions)
|
|
3021
|
+
]
|
|
3022
|
+
};
|
|
3023
|
+
};
|
|
3024
|
+
/**
|
|
3025
|
+
* Creates a complete schema for a full document collection
|
|
3026
|
+
*/
|
|
3027
|
+
const getFullDocumentCollectionSchema = (collectionSlug, tableConfig, embeddingDimensions = DEFAULT_EMBEDDING_DIMENSIONS) => {
|
|
3028
|
+
const mappedFields = mapFieldMappingsToSchema(tableConfig.fields);
|
|
3029
|
+
const userFieldNames = new Set(mappedFields.map((f) => f.name));
|
|
3030
|
+
return {
|
|
3031
|
+
name: collectionSlug,
|
|
3032
|
+
fields: [
|
|
3033
|
+
...getBaseFields().filter((f) => !userFieldNames.has(f.name)),
|
|
3034
|
+
...mappedFields,
|
|
3035
|
+
getEmbeddingField(true, embeddingDimensions)
|
|
3036
|
+
]
|
|
3037
|
+
};
|
|
3038
|
+
};
|
|
3039
|
+
|
|
3040
|
+
//#endregion
|
|
3041
|
+
//#region src/shared/schema/field-mapper.ts
|
|
3042
|
+
/**
|
|
3043
|
+
* Extracts a value from a document using dot notation path
|
|
3044
|
+
*/
|
|
3045
|
+
const getValueByPath = (obj, path) => {
|
|
3046
|
+
if (!obj || typeof obj !== "object") return void 0;
|
|
3047
|
+
return path.split(".").reduce((acc, part) => {
|
|
3048
|
+
if (acc && typeof acc === "object" && part in acc) return acc[part];
|
|
3049
|
+
}, obj);
|
|
3050
|
+
};
|
|
3051
|
+
/**
|
|
3052
|
+
* Maps a Payload document to a Typesense document based on field configuration
|
|
3053
|
+
*/
|
|
3054
|
+
const mapPayloadDocumentToTypesense = async (doc, fields) => {
|
|
3055
|
+
const result = {};
|
|
3056
|
+
for (const field of fields) {
|
|
3057
|
+
let value = getValueByPath(doc, field.payloadField || field.name);
|
|
3058
|
+
if (field.transform) value = await field.transform(value);
|
|
3059
|
+
else {
|
|
3060
|
+
if (value === void 0 || value === null) {
|
|
3061
|
+
if (field.optional) continue;
|
|
3062
|
+
if (field.type === "string") value = "";
|
|
3063
|
+
else if (field.type === "string[]") value = [];
|
|
3064
|
+
else if (field.type === "bool") value = false;
|
|
3065
|
+
else if (field.type.startsWith("int") || field.type === "float") value = 0;
|
|
3066
|
+
}
|
|
3067
|
+
if (field.type === "string" && typeof value !== "string") if (typeof value === "object" && value !== null) value = JSON.stringify(value);
|
|
3068
|
+
else value = String(value);
|
|
3069
|
+
else if (field.type === "string[]" && !Array.isArray(value)) value = [String(value)];
|
|
3070
|
+
else if (field.type === "bool") value = Boolean(value);
|
|
3071
|
+
}
|
|
3072
|
+
result[field.name] = value;
|
|
3073
|
+
}
|
|
3074
|
+
return result;
|
|
3075
|
+
};
|
|
3076
|
+
|
|
3077
|
+
//#endregion
|
|
3078
|
+
//#region src/features/embedding/chunking/strategies/markdown-based/markdown-chunker.ts
|
|
3079
|
+
/**
|
|
3080
|
+
* Markdown chunking strategy using LangChain's MarkdownTextSplitter
|
|
3081
|
+
* Splits markdown text respecting markdown structure and preserves header metadata
|
|
3082
|
+
*/
|
|
3083
|
+
/**
|
|
3084
|
+
* Extracts markdown headers and their positions from text
|
|
3085
|
+
*/
|
|
3086
|
+
const extractHeaders = (text) => {
|
|
3087
|
+
const headerRegex = /^(#{1,6})\s+(.+)$/gm;
|
|
3088
|
+
const headers = [];
|
|
3089
|
+
let match;
|
|
3090
|
+
while ((match = headerRegex.exec(text)) !== null) headers.push({
|
|
3091
|
+
level: match[1]?.length ?? 0,
|
|
3092
|
+
text: match[2]?.trim() ?? "",
|
|
3093
|
+
position: match.index
|
|
3094
|
+
});
|
|
3095
|
+
return headers;
|
|
3096
|
+
};
|
|
3097
|
+
/**
|
|
3098
|
+
* Finds the headers that apply to a given chunk based on its content
|
|
3099
|
+
*/
|
|
3100
|
+
const findChunkHeaders = (chunkText$1, allHeaders, fullText) => {
|
|
3101
|
+
const chunkPosition = fullText.indexOf(chunkText$1.substring(0, Math.min(50, chunkText$1.length)));
|
|
3102
|
+
if (chunkPosition === -1) return {};
|
|
3103
|
+
const applicableHeaders = allHeaders.filter((h) => h.position <= chunkPosition);
|
|
3104
|
+
if (applicableHeaders.length === 0) return {};
|
|
3105
|
+
const metadata = {};
|
|
3106
|
+
const currentHierarchy = Array(6).fill(null);
|
|
3107
|
+
for (const header of applicableHeaders) {
|
|
3108
|
+
currentHierarchy[header.level - 1] = header;
|
|
3109
|
+
for (let i = header.level; i < 6; i++) currentHierarchy[i] = null;
|
|
3110
|
+
}
|
|
3111
|
+
for (let i = 0; i < 6; i++) if (currentHierarchy[i]) metadata[`Header ${i + 1}`] = currentHierarchy[i].text;
|
|
3112
|
+
return metadata;
|
|
3113
|
+
};
|
|
3114
|
+
/**
|
|
3115
|
+
* Chunks markdown text using LangChain's MarkdownTextSplitter
|
|
3116
|
+
* Respects markdown structure and extracts header metadata for each chunk
|
|
3117
|
+
*/
|
|
3118
|
+
const chunkMarkdown = async (text, options = {}) => {
|
|
3119
|
+
const { maxChunkSize = DEFAULT_CHUNK_SIZE, overlap = DEFAULT_OVERLAP } = options;
|
|
3120
|
+
if (!text || text.trim().length === 0) return [];
|
|
3121
|
+
const headers = extractHeaders(text);
|
|
3122
|
+
return (await new MarkdownTextSplitter({
|
|
3123
|
+
chunkSize: maxChunkSize,
|
|
3124
|
+
chunkOverlap: overlap
|
|
3125
|
+
}).createDocuments([text])).map((chunk, index) => {
|
|
3126
|
+
const metadata = findChunkHeaders(chunk.pageContent, headers, text);
|
|
3127
|
+
return {
|
|
3128
|
+
text: chunk.pageContent,
|
|
3129
|
+
index,
|
|
3130
|
+
startIndex: 0,
|
|
3131
|
+
endIndex: chunk.pageContent.length,
|
|
3132
|
+
metadata: Object.keys(metadata).length > 0 ? metadata : void 0
|
|
3133
|
+
};
|
|
3134
|
+
});
|
|
3135
|
+
};
|
|
3136
|
+
|
|
3137
|
+
//#endregion
|
|
3138
|
+
//#region src/features/embedding/chunking/index.ts
|
|
3139
|
+
/**
|
|
3140
|
+
* Text chunking module - provides utilities for splitting text into optimal chunks
|
|
3141
|
+
*
|
|
3142
|
+
* Available strategies:
|
|
3143
|
+
* - Simple: Uses LangChain's RecursiveCharacterTextSplitter
|
|
3144
|
+
* - Markdown-based: Uses LangChain's MarkdownTextSplitter for markdown documents
|
|
3145
|
+
*
|
|
3146
|
+
* Future strategies can be added in ./strategies/
|
|
3147
|
+
*/
|
|
3148
|
+
/**
|
|
3149
|
+
* Splits text into chunks using LangChain's RecursiveCharacterTextSplitter
|
|
3150
|
+
* Main entry point for simple text chunking
|
|
3151
|
+
*/
|
|
3152
|
+
const chunkText = async (text, options = {}) => {
|
|
3153
|
+
const { maxChunkSize = DEFAULT_CHUNK_SIZE, overlap = DEFAULT_OVERLAP } = options;
|
|
3154
|
+
if (!text || text.trim().length === 0) return [];
|
|
3155
|
+
if (text.length <= maxChunkSize) return [{
|
|
3156
|
+
text: text.trim(),
|
|
3157
|
+
index: 0,
|
|
3158
|
+
startIndex: 0,
|
|
3159
|
+
endIndex: text.length
|
|
3160
|
+
}];
|
|
3161
|
+
return (await new RecursiveCharacterTextSplitter({
|
|
3162
|
+
chunkSize: maxChunkSize,
|
|
3163
|
+
chunkOverlap: overlap
|
|
3164
|
+
}).createDocuments([text])).map((chunk, index) => ({
|
|
3165
|
+
text: chunk.pageContent,
|
|
3166
|
+
index,
|
|
3167
|
+
startIndex: 0,
|
|
3168
|
+
endIndex: chunk.pageContent.length
|
|
3169
|
+
}));
|
|
3170
|
+
};
|
|
3171
|
+
|
|
3172
|
+
//#endregion
|
|
3173
|
+
//#region src/core/utils/header-utils.ts
|
|
3174
|
+
/**
|
|
3175
|
+
* Builds a hierarchical path array from markdown header metadata.
|
|
3176
|
+
*
|
|
3177
|
+
* @param metadata - The metadata object from LangChain's MarkdownHeaderTextSplitter
|
|
3178
|
+
* @returns An array of header paths showing the hierarchy
|
|
3179
|
+
*
|
|
3180
|
+
* @example
|
|
3181
|
+
* // Input: { 'Header 1': 'Introduction', 'Header 2': 'Getting Started', 'Header 3': 'Installation' }
|
|
3182
|
+
* // Output: ['Introduction', 'Introduction > Getting Started', 'Introduction > Getting Started > Installation']
|
|
3183
|
+
*/
|
|
3184
|
+
const buildHeaderHierarchy = (metadata) => {
|
|
3185
|
+
if (!metadata || Object.keys(metadata).length === 0) return [];
|
|
3186
|
+
const headers = [];
|
|
3187
|
+
const headerLevels = Object.keys(metadata).filter((key) => key.startsWith("Header ")).sort((a, b) => {
|
|
3188
|
+
return parseInt(a.replace("Header ", "")) - parseInt(b.replace("Header ", ""));
|
|
3189
|
+
});
|
|
3190
|
+
let currentPath = [];
|
|
3191
|
+
for (const headerKey of headerLevels) {
|
|
3192
|
+
const headerValue = metadata[headerKey];
|
|
3193
|
+
if (!headerValue) continue;
|
|
3194
|
+
currentPath.push(headerValue);
|
|
3195
|
+
headers.push(currentPath.join(" > "));
|
|
3196
|
+
}
|
|
3197
|
+
return headers;
|
|
3198
|
+
};
|
|
3199
|
+
|
|
3200
|
+
//#endregion
|
|
3201
|
+
//#region src/core/utils/chunk-format-utils.ts
|
|
3202
|
+
/**
|
|
3203
|
+
* Utilities for formatting chunk text with header metadata
|
|
3204
|
+
*/
|
|
3205
|
+
/**
|
|
3206
|
+
* Separator used between chunk content and header metadata
|
|
3207
|
+
*/
|
|
3208
|
+
const CHUNK_HEADER_SEPARATOR = ".________________________________________.";
|
|
3209
|
+
/**
|
|
3210
|
+
* Formats chunk text with header metadata at the end
|
|
3211
|
+
*
|
|
3212
|
+
* @param content - The chunk content
|
|
3213
|
+
* @param headers - Hierarchical array of headers (e.g., ['Introduction', 'Introduction > Getting Started'])
|
|
3214
|
+
* @returns Formatted chunk text with content + separator + key-value metadata
|
|
3215
|
+
*
|
|
3216
|
+
* @example
|
|
3217
|
+
* const formatted = formatChunkWithHeaders(
|
|
3218
|
+
* 'To install the package...',
|
|
3219
|
+
* ['Introduction', 'Introduction > Getting Started', 'Introduction > Getting Started > Installation']
|
|
3220
|
+
* );
|
|
3221
|
+
* // Result:
|
|
3222
|
+
* // To install the package...
|
|
3223
|
+
* // ._________________________________________.
|
|
3224
|
+
* // section: Installation | path: Introduction > Getting Started > Installation
|
|
3225
|
+
*/
|
|
3226
|
+
const formatChunkWithHeaders = (content, headers) => {
|
|
3227
|
+
if (!headers || headers.length === 0) return content;
|
|
3228
|
+
const fullPath = headers[headers.length - 1];
|
|
3229
|
+
return `${content}\n${CHUNK_HEADER_SEPARATOR}\n${`section: ${fullPath && fullPath.split(" > ").pop() || fullPath || ""} | path: ${fullPath}`}`;
|
|
3230
|
+
};
|
|
3231
|
+
/**
|
|
3232
|
+
* Parses chunk text to extract header metadata and content separately
|
|
3233
|
+
*
|
|
3234
|
+
* @param chunkText - The formatted chunk text
|
|
3235
|
+
* @returns Object with separated metadata and content
|
|
3236
|
+
*
|
|
3237
|
+
* @example
|
|
3238
|
+
* const parsed = parseChunkText('Content here\\n._________________________________________.\\nsection: Installation | path: Introduction > Getting Started > Installation');
|
|
3239
|
+
* console.log(parsed.metadata.section); // "Installation"
|
|
3240
|
+
* console.log(parsed.content); // "Content here"
|
|
3241
|
+
*/
|
|
3242
|
+
const parseChunkText = (chunkText$1) => {
|
|
3243
|
+
if (!chunkText$1.includes(CHUNK_HEADER_SEPARATOR)) return { content: chunkText$1 };
|
|
3244
|
+
const [contentPart, ...metadataParts] = chunkText$1.split(CHUNK_HEADER_SEPARATOR);
|
|
3245
|
+
const content = contentPart ? contentPart.trim() : "";
|
|
3246
|
+
const metadataLine = metadataParts.join(CHUNK_HEADER_SEPARATOR).trim();
|
|
3247
|
+
try {
|
|
3248
|
+
const pairs = metadataLine.split(" | ");
|
|
3249
|
+
const metadata = {
|
|
3250
|
+
section: "",
|
|
3251
|
+
path: ""
|
|
3252
|
+
};
|
|
3253
|
+
for (const pair of pairs) {
|
|
3254
|
+
const [key, ...valueParts] = pair.split(": ");
|
|
3255
|
+
const value = valueParts.join(": ").trim();
|
|
3256
|
+
if (key?.trim() === "section") metadata.section = value;
|
|
3257
|
+
else if (key?.trim() === "path") metadata.path = value;
|
|
3258
|
+
}
|
|
3259
|
+
if (metadata.section || metadata.path) return {
|
|
3260
|
+
metadata,
|
|
3261
|
+
content
|
|
3262
|
+
};
|
|
3263
|
+
return { content: chunkText$1 };
|
|
3264
|
+
} catch (error) {
|
|
3265
|
+
return { content: chunkText$1 };
|
|
3266
|
+
}
|
|
3267
|
+
};
|
|
3268
|
+
/**
|
|
3269
|
+
* Extracts only the content from a formatted chunk (removes header metadata)
|
|
3270
|
+
*
|
|
3271
|
+
* @param chunkText - The formatted chunk text
|
|
3272
|
+
* @returns Just the content without header metadata
|
|
3273
|
+
*/
|
|
3274
|
+
const extractContentOnly = (chunkText$1) => {
|
|
3275
|
+
return parseChunkText(chunkText$1).content;
|
|
3276
|
+
};
|
|
3277
|
+
/**
|
|
3278
|
+
* Extracts only the header metadata from a formatted chunk
|
|
3279
|
+
*
|
|
3280
|
+
* @param chunkText - The formatted chunk text
|
|
3281
|
+
* @returns Header metadata or undefined if not present
|
|
3282
|
+
*/
|
|
3283
|
+
const extractHeaderMetadata = (chunkText$1) => {
|
|
3284
|
+
return parseChunkText(chunkText$1).metadata;
|
|
3285
|
+
};
|
|
3286
|
+
|
|
3287
|
+
//#endregion
|
|
3288
|
+
//#region src/features/sync/strategies/chunked-syncer.ts
|
|
3289
|
+
var ChunkedSyncer = class {
|
|
3290
|
+
constructor(client, collectionSlug, tableName, config, embeddingService) {
|
|
3291
|
+
this.client = client;
|
|
3292
|
+
this.collectionSlug = collectionSlug;
|
|
3293
|
+
this.tableName = tableName;
|
|
3294
|
+
this.config = config;
|
|
3295
|
+
this.embeddingService = embeddingService;
|
|
3296
|
+
}
|
|
3297
|
+
async sync(doc, operation) {
|
|
3298
|
+
logger.debug(`Syncing document ${doc.id} to table ${this.tableName} (Chunked Mode)`);
|
|
3299
|
+
const sourceText = await this.extractSourceText(doc);
|
|
3300
|
+
if (!sourceText) {
|
|
3301
|
+
logger.warn(`No source text found for document ${doc.id}`);
|
|
3302
|
+
return;
|
|
3303
|
+
}
|
|
3304
|
+
const chunks = await this.generateChunks(sourceText);
|
|
3305
|
+
const fields = this.config.fields ? await mapPayloadDocumentToTypesense(doc, this.config.fields) : {};
|
|
3306
|
+
fields.slug = doc.slug || "";
|
|
3307
|
+
fields.publishedAt = doc.publishedAt ? new Date(doc.publishedAt).getTime() : void 0;
|
|
3308
|
+
if (operation === "update") await this.client.collections(this.tableName).documents().delete({ filter_by: `parent_doc_id:${doc.id}` });
|
|
3309
|
+
for (const chunk of chunks) {
|
|
3310
|
+
const headers = buildHeaderHierarchy(chunk.metadata);
|
|
3311
|
+
const formattedText = formatChunkWithHeaders(chunk.text, headers);
|
|
3312
|
+
let embedding = [];
|
|
3313
|
+
if (this.embeddingService) {
|
|
3314
|
+
const result = await this.embeddingService.getEmbedding(formattedText);
|
|
3315
|
+
if (result) embedding = result;
|
|
3316
|
+
}
|
|
3317
|
+
const chunkDoc = {
|
|
3318
|
+
id: `${doc.id}_chunk_${chunk.index}`,
|
|
3319
|
+
parent_doc_id: String(doc.id),
|
|
3320
|
+
chunk_index: chunk.index,
|
|
3321
|
+
chunk_text: formattedText,
|
|
3322
|
+
is_chunk: true,
|
|
3323
|
+
headers,
|
|
3324
|
+
embedding,
|
|
3325
|
+
createdAt: new Date(doc.createdAt).getTime(),
|
|
3326
|
+
updatedAt: new Date(doc.updatedAt).getTime(),
|
|
3327
|
+
...fields
|
|
3328
|
+
};
|
|
3329
|
+
await this.client.collections(this.tableName).documents().upsert(chunkDoc);
|
|
3330
|
+
}
|
|
3331
|
+
logger.info(`Synced ${chunks.length} chunks for document ${doc.id} to ${this.tableName}`);
|
|
3332
|
+
}
|
|
3333
|
+
async extractSourceText(doc) {
|
|
3334
|
+
const textParts = [];
|
|
3335
|
+
for (const sourceField of this.config.sourceFields) {
|
|
3336
|
+
const fieldName = typeof sourceField === "string" ? sourceField : sourceField.field;
|
|
3337
|
+
const transform = typeof sourceField === "string" ? void 0 : sourceField.transform;
|
|
3338
|
+
const val = doc[fieldName];
|
|
3339
|
+
if (transform) {
|
|
3340
|
+
let transformedVal = await transform(val);
|
|
3341
|
+
textParts.push(String(transformedVal || ""));
|
|
3342
|
+
} else if (typeof val === "object" && val !== null && "root" in val) {
|
|
3343
|
+
let transformedVal = JSON.stringify(val);
|
|
3344
|
+
textParts.push(String(transformedVal || ""));
|
|
3345
|
+
}
|
|
3346
|
+
}
|
|
3347
|
+
return textParts.join("\n\n");
|
|
3348
|
+
}
|
|
3349
|
+
async generateChunks(text) {
|
|
3350
|
+
const { strategy, size, overlap } = this.config.chunking;
|
|
3351
|
+
const options = {
|
|
3352
|
+
maxChunkSize: size,
|
|
3353
|
+
overlap
|
|
3354
|
+
};
|
|
3355
|
+
if (strategy === "markdown") return await chunkMarkdown(text, options);
|
|
3356
|
+
else return await chunkText(text, options);
|
|
3357
|
+
}
|
|
3358
|
+
};
|
|
3359
|
+
|
|
3360
|
+
//#endregion
|
|
3361
|
+
//#region src/features/sync/strategies/document-syncer.ts
|
|
3362
|
+
var DocumentSyncer = class {
|
|
3363
|
+
constructor(client, collectionSlug, tableName, config, embeddingService) {
|
|
3364
|
+
this.client = client;
|
|
3365
|
+
this.collectionSlug = collectionSlug;
|
|
3366
|
+
this.tableName = tableName;
|
|
3367
|
+
this.config = config;
|
|
3368
|
+
this.embeddingService = embeddingService;
|
|
3369
|
+
}
|
|
3370
|
+
async sync(doc, operation) {
|
|
3371
|
+
logger.debug(`Syncing document ${doc.id} to table ${this.tableName} (Document Mode)`);
|
|
3372
|
+
const typesenseDoc = await mapPayloadDocumentToTypesense(doc, this.config.fields);
|
|
3373
|
+
typesenseDoc.id = String(doc.id);
|
|
3374
|
+
typesenseDoc.slug = doc.slug || "";
|
|
3375
|
+
typesenseDoc.createdAt = new Date(doc.createdAt).getTime();
|
|
3376
|
+
typesenseDoc.updatedAt = new Date(doc.updatedAt).getTime();
|
|
3377
|
+
if (doc.publishedAt) typesenseDoc.publishedAt = new Date(doc.publishedAt).getTime();
|
|
3378
|
+
if (this.config.sourceFields && this.embeddingService) {
|
|
3379
|
+
const sourceText = await this.extractSourceText(doc);
|
|
3380
|
+
if (sourceText) {
|
|
3381
|
+
const embedding = await this.embeddingService.getEmbedding(sourceText);
|
|
3382
|
+
if (embedding) typesenseDoc.embedding = embedding;
|
|
3383
|
+
}
|
|
3384
|
+
}
|
|
3385
|
+
await this.client.collections(this.tableName).documents().upsert(typesenseDoc);
|
|
3386
|
+
logger.info(`Synced document ${doc.id} to ${this.tableName}`);
|
|
3387
|
+
}
|
|
3388
|
+
/**
|
|
3389
|
+
* Extract and transform source fields for embedding generation
|
|
3390
|
+
*/
|
|
3391
|
+
async extractSourceText(doc) {
|
|
3392
|
+
if (!this.config.sourceFields) return "";
|
|
3393
|
+
const textParts = [];
|
|
3394
|
+
for (const sourceField of this.config.sourceFields) {
|
|
3395
|
+
let fieldName;
|
|
3396
|
+
let transform;
|
|
3397
|
+
if (typeof sourceField === "string") fieldName = sourceField;
|
|
3398
|
+
else {
|
|
3399
|
+
fieldName = sourceField.field;
|
|
3400
|
+
transform = sourceField.transform;
|
|
3401
|
+
}
|
|
3402
|
+
let val = doc[fieldName];
|
|
3403
|
+
if (transform) val = await transform(val);
|
|
3404
|
+
else if (typeof val === "object" && val !== null && "root" in val) val = JSON.stringify(val);
|
|
3405
|
+
textParts.push(String(val || ""));
|
|
3406
|
+
}
|
|
3407
|
+
return textParts.join("\n\n");
|
|
3408
|
+
}
|
|
3409
|
+
};
|
|
3410
|
+
|
|
3411
|
+
//#endregion
|
|
3412
|
+
//#region src/features/sync/document-sync.ts
|
|
3413
|
+
/**
|
|
3414
|
+
* Syncs a Payload document to Typesense
|
|
3415
|
+
* Uses Strategy pattern to handle both chunked and full document approaches
|
|
3416
|
+
*/
|
|
3417
|
+
const syncDocumentToTypesense = async (typesenseClient, collectionSlug, doc, operation, tableConfig, embeddingService) => {
|
|
3418
|
+
try {
|
|
3419
|
+
const tableName = tableConfig.tableName || getTypesenseCollectionName(collectionSlug, tableConfig);
|
|
3420
|
+
logger.debug("Syncing document to Typesense", {
|
|
3421
|
+
documentId: doc.id,
|
|
3422
|
+
collection: collectionSlug,
|
|
3423
|
+
tableName,
|
|
3424
|
+
operation,
|
|
3425
|
+
mode: tableConfig.mode
|
|
3426
|
+
});
|
|
3427
|
+
if (tableConfig.mode === "chunked") await new ChunkedSyncer(typesenseClient, collectionSlug, tableName, tableConfig, embeddingService).sync(doc, operation);
|
|
3428
|
+
else await new DocumentSyncer(typesenseClient, collectionSlug, tableName, tableConfig, embeddingService).sync(doc, operation);
|
|
3429
|
+
logger.info("Document synced successfully to Typesense", {
|
|
3430
|
+
documentId: doc.id,
|
|
3431
|
+
collection: collectionSlug,
|
|
3432
|
+
operation
|
|
3433
|
+
});
|
|
3434
|
+
} catch (error) {
|
|
3435
|
+
const isValidationError = (error instanceof Error ? error.message : String(error)).toLowerCase().includes("validation");
|
|
3436
|
+
logger.error(`Failed to sync document to Typesense`, error, {
|
|
3437
|
+
documentId: doc.id,
|
|
3438
|
+
collection: collectionSlug,
|
|
3439
|
+
operation,
|
|
3440
|
+
isValidationError
|
|
3441
|
+
});
|
|
3442
|
+
}
|
|
3443
|
+
};
|
|
3444
|
+
|
|
3445
|
+
//#endregion
|
|
3446
|
+
//#region src/features/sync/document-delete.ts
|
|
3447
|
+
/**
|
|
3448
|
+
* Deletes a document from Typesense
|
|
3449
|
+
* Handles both direct document deletion and chunk deletion
|
|
3450
|
+
*/
|
|
3451
|
+
const deleteDocumentFromTypesense = async (typesenseClient, collectionSlug, docId, tableConfig) => {
|
|
3452
|
+
try {
|
|
3453
|
+
const tableName = getTypesenseCollectionName(collectionSlug, tableConfig);
|
|
3454
|
+
logger.debug("Attempting to delete document from Typesense", {
|
|
3455
|
+
documentId: docId,
|
|
3456
|
+
collection: collectionSlug,
|
|
3457
|
+
tableName
|
|
3458
|
+
});
|
|
3459
|
+
try {
|
|
3460
|
+
await typesenseClient.collections(tableName).documents(docId).delete();
|
|
3461
|
+
logger.info("Document deleted from Typesense", {
|
|
3462
|
+
documentId: docId,
|
|
3463
|
+
tableName
|
|
3464
|
+
});
|
|
3465
|
+
} catch (docDeleteError) {
|
|
3466
|
+
if (docDeleteError.httpStatus === 404) {
|
|
3467
|
+
logger.debug("Document not found, attempting to delete chunks", {
|
|
3468
|
+
documentId: docId,
|
|
3469
|
+
tableName
|
|
3470
|
+
});
|
|
3471
|
+
try {
|
|
3472
|
+
await typesenseClient.collections(tableName).documents().delete({ filter_by: `parent_doc_id:${docId}` });
|
|
3473
|
+
logger.info("All chunks deleted for document", {
|
|
3474
|
+
documentId: docId,
|
|
3475
|
+
tableName
|
|
3476
|
+
});
|
|
3477
|
+
} catch (chunkDeleteError) {
|
|
3478
|
+
if (chunkDeleteError.httpStatus !== 404) logger.error("Failed to delete chunks for document", chunkDeleteError, {
|
|
3479
|
+
documentId: docId,
|
|
3480
|
+
tableName
|
|
3481
|
+
});
|
|
3482
|
+
else logger.debug("No chunks found to delete", { documentId: docId });
|
|
3483
|
+
}
|
|
3484
|
+
} else throw docDeleteError;
|
|
3485
|
+
}
|
|
3486
|
+
} catch (error) {
|
|
3487
|
+
const tableName = getTypesenseCollectionName(collectionSlug, tableConfig);
|
|
3488
|
+
logger.error("Failed to delete document from Typesense", error, {
|
|
3489
|
+
documentId: docId,
|
|
3490
|
+
collection: collectionSlug,
|
|
3491
|
+
tableName
|
|
3492
|
+
});
|
|
3493
|
+
}
|
|
3494
|
+
};
|
|
3495
|
+
|
|
3496
|
+
//#endregion
|
|
3497
|
+
//#region src/features/sync/schema-manager.ts
|
|
3498
|
+
var SchemaManager = class {
|
|
3499
|
+
constructor(client, config) {
|
|
3500
|
+
this.client = client;
|
|
3501
|
+
this.config = config;
|
|
3502
|
+
}
|
|
3503
|
+
/**
|
|
3504
|
+
* Synchronizes all configured collections with Typesense
|
|
3505
|
+
*/
|
|
3506
|
+
async syncCollections() {
|
|
3507
|
+
if (!this.config.collections) return;
|
|
3508
|
+
logger.info("Starting schema synchronization...");
|
|
3509
|
+
const embeddingDimensions = this.getEmbeddingDimensions();
|
|
3510
|
+
for (const [collectionSlug, tableConfigs] of Object.entries(this.config.collections)) {
|
|
3511
|
+
if (!tableConfigs) continue;
|
|
3512
|
+
for (const tableConfig of tableConfigs) {
|
|
3513
|
+
if (!tableConfig.enabled) continue;
|
|
3514
|
+
await this.syncTable(collectionSlug, tableConfig, embeddingDimensions);
|
|
3515
|
+
}
|
|
3516
|
+
}
|
|
3517
|
+
logger.info("Schema synchronization completed.");
|
|
3518
|
+
}
|
|
3519
|
+
/**
|
|
3520
|
+
* Syncs a single table configuration
|
|
3521
|
+
*/
|
|
3522
|
+
async syncTable(collectionSlug, tableConfig, embeddingDimensions) {
|
|
3523
|
+
const tableName = getTypesenseCollectionName(collectionSlug, tableConfig);
|
|
3524
|
+
let targetSchema;
|
|
3525
|
+
if (tableConfig.mode === "chunked") targetSchema = getChunkCollectionSchema(tableName, tableConfig, embeddingDimensions);
|
|
3526
|
+
else targetSchema = getFullDocumentCollectionSchema(tableName, tableConfig, embeddingDimensions);
|
|
3527
|
+
try {
|
|
3528
|
+
const collection = await this.client.collections(tableName).retrieve();
|
|
3529
|
+
await this.updateCollectionSchema(tableName, collection, targetSchema);
|
|
3530
|
+
} catch (error) {
|
|
3531
|
+
if (error?.httpStatus === 404) {
|
|
3532
|
+
logger.info(`Creating collection: ${tableName}`);
|
|
3533
|
+
await this.client.collections().create(targetSchema);
|
|
3534
|
+
} else {
|
|
3535
|
+
logger.error(`Error checking collection ${tableName}`, error);
|
|
3536
|
+
throw error;
|
|
3537
|
+
}
|
|
3538
|
+
}
|
|
3539
|
+
}
|
|
3540
|
+
async updateCollectionSchema(tableName, currentSchema, targetSchema) {
|
|
3541
|
+
if (!currentSchema || !currentSchema.fields) return;
|
|
3542
|
+
const currentFields = new Set(currentSchema.fields.map((f) => f.name));
|
|
3543
|
+
const newFields = targetSchema.fields?.filter((f) => !currentFields.has(f.name) && f.name !== "id") || [];
|
|
3544
|
+
if (newFields.length > 0) {
|
|
3545
|
+
logger.info(`Updating collection ${tableName} with ${newFields.length} new fields`, { fields: newFields.map((f) => f.name) });
|
|
3546
|
+
try {
|
|
3547
|
+
await this.client.collections(tableName).update({ fields: newFields });
|
|
3548
|
+
} catch (error) {
|
|
3549
|
+
logger.error(`Failed to update collection ${tableName}`, error);
|
|
3550
|
+
}
|
|
3551
|
+
}
|
|
3552
|
+
}
|
|
3553
|
+
getEmbeddingDimensions() {
|
|
3554
|
+
if (this.config.features.embedding?.dimensions) {}
|
|
3555
|
+
return DEFAULT_EMBEDDING_DIMENSIONS;
|
|
3556
|
+
}
|
|
3557
|
+
};
|
|
3558
|
+
|
|
3559
|
+
//#endregion
|
|
3560
|
+
//#region src/features/rag/agent-manager.ts
|
|
3561
|
+
var AgentManager = class {
|
|
3562
|
+
constructor(client, config) {
|
|
3563
|
+
this.client = client;
|
|
3564
|
+
this.config = config;
|
|
3565
|
+
}
|
|
3566
|
+
/**
|
|
3567
|
+
* Synchronizes all configured RAG agents with Typesense
|
|
3568
|
+
*/
|
|
3569
|
+
async syncAgents() {
|
|
3570
|
+
if (!this.config.features.rag?.enabled) return;
|
|
3571
|
+
const agents = this.config.features.rag.agents || [];
|
|
3572
|
+
if (agents.length === 0) return;
|
|
3573
|
+
logger.info(`Starting synchronization of ${agents.length} RAG agents...`);
|
|
3574
|
+
const historyCollections = new Set(agents.map((a) => a.historyCollection || "conversation_history"));
|
|
3575
|
+
for (const collectionName of historyCollections) await ensureConversationCollection(this.client, collectionName);
|
|
3576
|
+
for (const agent of agents) await this.syncAgentModel(agent);
|
|
3577
|
+
logger.info("Agent synchronization completed.");
|
|
3578
|
+
}
|
|
3579
|
+
async syncAgentModel(agent) {
|
|
3580
|
+
try {
|
|
3581
|
+
const modelConfig = {
|
|
3582
|
+
id: agent.slug,
|
|
3583
|
+
model_name: agent.llmModel,
|
|
3584
|
+
system_prompt: agent.systemPrompt,
|
|
3585
|
+
api_key: agent.apiKey,
|
|
3586
|
+
history_collection: agent.historyCollection || "conversation_history",
|
|
3587
|
+
max_bytes: agent.maxContextBytes || 65536,
|
|
3588
|
+
ttl: agent.ttl || 86400,
|
|
3589
|
+
k_results: agent.kResults || 5
|
|
3590
|
+
};
|
|
3591
|
+
return await this.upsertConversationModel(modelConfig);
|
|
3592
|
+
} catch (error) {
|
|
3593
|
+
logger.error(`Failed to sync agent ${agent.slug}`, error);
|
|
3594
|
+
return false;
|
|
3595
|
+
}
|
|
3596
|
+
}
|
|
3597
|
+
async upsertConversationModel(modelConfig) {
|
|
3598
|
+
const configuration = this.client.configuration;
|
|
3599
|
+
if (!configuration || !configuration.nodes || configuration.nodes.length === 0) {
|
|
3600
|
+
logger.error("Invalid Typesense client configuration");
|
|
3601
|
+
return false;
|
|
3602
|
+
}
|
|
3603
|
+
const node = configuration.nodes[0];
|
|
3604
|
+
const typesenseApiKey = configuration.apiKey;
|
|
3605
|
+
const baseUrl = `${node.protocol}://${node.host}:${node.port}`;
|
|
3606
|
+
try {
|
|
3607
|
+
const createResponse = await fetch(`${baseUrl}/conversations/models`, {
|
|
3608
|
+
method: "POST",
|
|
3609
|
+
headers: {
|
|
3610
|
+
"Content-Type": "application/json",
|
|
3611
|
+
"X-TYPESENSE-API-KEY": typesenseApiKey || ""
|
|
3612
|
+
},
|
|
3613
|
+
body: JSON.stringify(modelConfig)
|
|
3614
|
+
});
|
|
3615
|
+
if (createResponse.ok) {
|
|
3616
|
+
logger.info(`Agent model created: ${modelConfig.id}`);
|
|
3617
|
+
return true;
|
|
3618
|
+
}
|
|
3619
|
+
if (createResponse.status === 409) {
|
|
3620
|
+
logger.debug(`Agent model ${modelConfig.id} exists, updating...`);
|
|
3621
|
+
const updateResponse = await fetch(`${baseUrl}/conversations/models/${modelConfig.id}`, {
|
|
3622
|
+
method: "PUT",
|
|
3623
|
+
headers: {
|
|
3624
|
+
"Content-Type": "application/json",
|
|
3625
|
+
"X-TYPESENSE-API-KEY": typesenseApiKey || ""
|
|
3626
|
+
},
|
|
3627
|
+
body: JSON.stringify(modelConfig)
|
|
3628
|
+
});
|
|
3629
|
+
if (updateResponse.ok) {
|
|
3630
|
+
logger.info(`Agent model updated: ${modelConfig.id}`);
|
|
3631
|
+
return true;
|
|
3632
|
+
} else {
|
|
3633
|
+
const err$1 = await updateResponse.text();
|
|
3634
|
+
logger.error(`Failed to update agent ${modelConfig.id}: ${err$1}`);
|
|
3635
|
+
return false;
|
|
3636
|
+
}
|
|
3637
|
+
}
|
|
3638
|
+
const err = await createResponse.text();
|
|
3639
|
+
logger.error(`Failed to create agent ${modelConfig.id}: ${err}`);
|
|
3640
|
+
return false;
|
|
3641
|
+
} catch (networkError) {
|
|
3642
|
+
logger.error("Network error syncing agent model", networkError);
|
|
3643
|
+
return false;
|
|
3644
|
+
}
|
|
3645
|
+
}
|
|
3646
|
+
};
|
|
3647
|
+
|
|
3648
|
+
//#endregion
|
|
3649
|
+
//#region src/plugin/main-plugin.ts
|
|
3650
|
+
/**
|
|
3651
|
+
* Typesense Search Plugin for Payload CMS
|
|
3652
|
+
*
|
|
3653
|
+
* Provides full-text search and vector search capabilities using Typesense,
|
|
3654
|
+
* with optional RAG (Retrieval Augmented Generation) support.
|
|
3655
|
+
*
|
|
3656
|
+
* @param pluginOptions - Configuration options for the plugin
|
|
3657
|
+
* @returns Payload config modifier function
|
|
3658
|
+
*/
|
|
3659
|
+
const typesenseSearch = (pluginOptions) => (config) => {
|
|
3660
|
+
const container = setupContainer(pluginOptions);
|
|
3661
|
+
const typesenseClient = createTypesenseClient(pluginOptions.typesense);
|
|
3662
|
+
let embeddingService;
|
|
3663
|
+
if (container.has(TOKENS.EMBEDDING_SERVICE)) embeddingService = container.resolve(TOKENS.EMBEDDING_SERVICE);
|
|
3664
|
+
const searchEndpoints = createSearchEndpoints(typesenseClient, pluginOptions);
|
|
3665
|
+
const ragEndpoints = pluginOptions.features.rag?.enabled ? createRAGPayloadHandlers(pluginOptions) : [];
|
|
3666
|
+
config.endpoints = [
|
|
3667
|
+
...config.endpoints || [],
|
|
3668
|
+
...searchEndpoints,
|
|
3669
|
+
...ragEndpoints
|
|
3670
|
+
];
|
|
3671
|
+
logger.debug("Search and RAG endpoints registered", {
|
|
3672
|
+
searchEndpointsCount: searchEndpoints.length,
|
|
3673
|
+
ragEndpointsCount: ragEndpoints.length
|
|
3674
|
+
});
|
|
3675
|
+
if (pluginOptions.features.sync?.enabled && pluginOptions.features.sync.autoSync !== false && pluginOptions.collections) config.collections = (config.collections || []).map((collection) => {
|
|
3676
|
+
const tableConfigs = pluginOptions.collections?.[collection.slug];
|
|
3677
|
+
if (tableConfigs && Array.isArray(tableConfigs) && tableConfigs.some((tableConfig) => tableConfig.enabled)) {
|
|
3678
|
+
logger.debug("Registering sync hooks for collection", {
|
|
3679
|
+
collection: collection.slug,
|
|
3680
|
+
tableCount: tableConfigs?.length || 0
|
|
3681
|
+
});
|
|
3682
|
+
return {
|
|
3683
|
+
...collection,
|
|
3684
|
+
hooks: {
|
|
3685
|
+
...collection.hooks,
|
|
3686
|
+
afterChange: [...collection.hooks?.afterChange || [], async ({ doc, operation, req: _req }) => {
|
|
3687
|
+
if (tableConfigs && Array.isArray(tableConfigs)) {
|
|
3688
|
+
for (const tableConfig of tableConfigs) if (tableConfig.enabled) await syncDocumentToTypesense(typesenseClient, collection.slug, doc, operation, tableConfig, embeddingService);
|
|
3689
|
+
}
|
|
3690
|
+
}],
|
|
3691
|
+
afterDelete: [...collection.hooks?.afterDelete || [], async ({ doc, req: _req }) => {
|
|
3692
|
+
if (tableConfigs && Array.isArray(tableConfigs)) {
|
|
3693
|
+
for (const tableConfig of tableConfigs) if (tableConfig.enabled) await deleteDocumentFromTypesense(typesenseClient, collection.slug, doc.id, tableConfig);
|
|
3694
|
+
}
|
|
3695
|
+
}]
|
|
3696
|
+
}
|
|
3697
|
+
};
|
|
3698
|
+
}
|
|
3699
|
+
return collection;
|
|
3700
|
+
});
|
|
3701
|
+
const incomingOnInit = config.onInit;
|
|
3702
|
+
config.onInit = async (payload) => {
|
|
3703
|
+
if (incomingOnInit) await incomingOnInit(payload);
|
|
3704
|
+
try {
|
|
3705
|
+
logger.info("Initializing Typesense collections...");
|
|
3706
|
+
await new SchemaManager(typesenseClient, pluginOptions).syncCollections();
|
|
3707
|
+
if (pluginOptions.features.rag?.enabled) {
|
|
3708
|
+
logger.info("Initializing RAG agents...");
|
|
3709
|
+
await new AgentManager(typesenseClient, pluginOptions).syncAgents();
|
|
3710
|
+
}
|
|
3711
|
+
} catch (error) {
|
|
3712
|
+
logger.error("Error initializing Typesense resources", error);
|
|
3713
|
+
}
|
|
3714
|
+
};
|
|
3715
|
+
return config;
|
|
3716
|
+
};
|
|
3717
|
+
|
|
3718
|
+
//#endregion
|
|
3719
|
+
export { CHUNK_HEADER_SEPARATOR, DEFAULT_CACHE_TTL_MS, DEFAULT_CHUNK_OVERLAP, DEFAULT_CHUNK_SIZE, DEFAULT_EMBEDDING_DIMENSIONS, DEFAULT_EMBEDDING_MODEL, DEFAULT_HYBRID_SEARCH_ALPHA, DEFAULT_RAG_CONTEXT_LIMIT, DEFAULT_RAG_LLM_MODEL, DEFAULT_RAG_MAX_TOKENS, DEFAULT_SEARCH_LIMIT, DEFAULT_SESSION_TTL_SEC, ErrorCodes, buildContextText, buildConversationalUrl, buildHybridSearchParams, buildMultiSearchRequestBody, buildMultiSearchRequests, closeSession, configureLogger, createLogger, createSSEForwardStream, createTypesenseClient, ensureConversationCollection, executeRAGSearch, extractContentOnly, extractHeaderMetadata, extractSourcesFromResults, fetchChunkById, formatChunkWithHeaders, formatSSEEvent, generateEmbedding, generateEmbeddingWithUsage, generateEmbeddingsBatchWithUsage, getActiveSession, getDefaultRAGConfig, getSessionByConversationId, jsonResponse, logger, mergeRAGConfigWithDefaults, parseChunkText, parseConversationEvent, processConversationStream, saveChatSession, sendSSEEvent, testTypesenseConnection, typesenseSearch };
|
|
3720
|
+
//# sourceMappingURL=index.mjs.map
|