@nexo-labs/payload-typesense 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs ADDED
@@ -0,0 +1,3720 @@
1
+ import OpenAI from "openai";
2
+ import { GoogleGenerativeAI, TaskType } from "@google/generative-ai";
3
+ import Typesense from "typesense";
4
+ import { z } from "zod";
5
+ import { MarkdownTextSplitter, RecursiveCharacterTextSplitter } from "@langchain/textsplitters";
6
+
7
+ //#region src/core/di/container.ts
8
+ var DIContainer = class {
9
+ services = /* @__PURE__ */ new Map();
10
+ factories = /* @__PURE__ */ new Map();
11
+ register(token, factory) {
12
+ this.factories.set(token, factory);
13
+ }
14
+ singleton(token, instance) {
15
+ this.services.set(token, instance);
16
+ }
17
+ resolve(token) {
18
+ if (this.services.has(token)) return this.services.get(token);
19
+ if (this.factories.has(token)) {
20
+ const factory = this.factories.get(token);
21
+ if (factory) return factory();
22
+ }
23
+ throw new Error(`Service not found: ${token.toString()}`);
24
+ }
25
+ has(token) {
26
+ return this.services.has(token) || this.factories.has(token);
27
+ }
28
+ /**
29
+ * Clears all registered services and factories.
30
+ * Useful for testing.
31
+ */
32
+ clear() {
33
+ this.services.clear();
34
+ this.factories.clear();
35
+ }
36
+ };
37
+
38
+ //#endregion
39
+ //#region src/core/di/tokens.ts
40
+ const TOKENS = {
41
+ CONFIG: Symbol.for("Config"),
42
+ LOGGER: Symbol.for("Logger"),
43
+ TYPESENSE_CLIENT: Symbol.for("TypesenseClient"),
44
+ EMBEDDING_PROVIDER: Symbol.for("EmbeddingProvider"),
45
+ EMBEDDING_SERVICE: Symbol.for("EmbeddingService"),
46
+ SEARCH_SERVICE: Symbol.for("SearchService"),
47
+ SYNC_SERVICE: Symbol.for("SyncService"),
48
+ RAG_SERVICE: Symbol.for("RAGService")
49
+ };
50
+
51
+ //#endregion
52
+ //#region src/core/logging/logger.ts
53
+ const LOG_LEVELS = {
54
+ debug: 0,
55
+ info: 1,
56
+ warn: 2,
57
+ error: 3,
58
+ silent: 4
59
+ };
60
+ var Logger = class {
61
+ level;
62
+ prefix;
63
+ enabled;
64
+ constructor(config = {}) {
65
+ this.level = config.level || "info";
66
+ this.prefix = config.prefix || "[payload-typesense]";
67
+ this.enabled = config.enabled !== false;
68
+ }
69
+ /**
70
+ * Update logger configuration
71
+ */
72
+ configure(config) {
73
+ if (config.level !== void 0) this.level = config.level;
74
+ if (config.prefix !== void 0) this.prefix = config.prefix;
75
+ if (config.enabled !== void 0) this.enabled = config.enabled;
76
+ }
77
+ /**
78
+ * Check if a log level should be output
79
+ */
80
+ shouldLog(level) {
81
+ if (!this.enabled) return false;
82
+ return LOG_LEVELS[level] >= LOG_LEVELS[this.level];
83
+ }
84
+ /**
85
+ * Format log message with context
86
+ */
87
+ formatMessage(message, context) {
88
+ if (!context || Object.keys(context).length === 0) return `${this.prefix} ${message}`;
89
+ return `${this.prefix} ${message} ${JSON.stringify(context)}`;
90
+ }
91
+ /**
92
+ * Debug level logging - detailed information for debugging
93
+ */
94
+ debug(message, context) {
95
+ if (this.shouldLog("debug")) console.debug(this.formatMessage(message, context));
96
+ }
97
+ /**
98
+ * Info level logging - general informational messages
99
+ */
100
+ info(message, context) {
101
+ if (this.shouldLog("info")) console.log(this.formatMessage(message, context));
102
+ }
103
+ /**
104
+ * Warning level logging - warning messages
105
+ */
106
+ warn(message, context) {
107
+ if (this.shouldLog("warn")) console.warn(this.formatMessage(message, context));
108
+ }
109
+ /**
110
+ * Error level logging - error messages
111
+ */
112
+ error(message, error, context) {
113
+ if (this.shouldLog("error")) {
114
+ const errorContext = {
115
+ ...context,
116
+ error: error instanceof Error ? {
117
+ message: error.message,
118
+ stack: error.stack,
119
+ name: error.name
120
+ } : String(error)
121
+ };
122
+ console.error(this.formatMessage(message, errorContext));
123
+ }
124
+ }
125
+ /**
126
+ * Get current log level
127
+ */
128
+ getLevel() {
129
+ return this.level;
130
+ }
131
+ /**
132
+ * Check if logger is enabled
133
+ */
134
+ isEnabled() {
135
+ return this.enabled;
136
+ }
137
+ };
138
+ let defaultLogger = new Logger();
139
+ /**
140
+ * Configure the default logger
141
+ */
142
+ const configureLogger = (config) => {
143
+ defaultLogger.configure(config);
144
+ };
145
+ /**
146
+ * Create a new logger instance with custom configuration
147
+ */
148
+ const createLogger = (config) => {
149
+ return new Logger(config);
150
+ };
151
+ const logger = {
152
+ debug: (message, context) => defaultLogger.debug(message, context),
153
+ info: (message, context) => defaultLogger.info(message, context),
154
+ warn: (message, context) => defaultLogger.warn(message, context),
155
+ error: (message, error, context) => defaultLogger.error(message, error, context),
156
+ configure: configureLogger,
157
+ getLevel: () => defaultLogger.getLevel(),
158
+ isEnabled: () => defaultLogger.isEnabled()
159
+ };
160
+
161
+ //#endregion
162
+ //#region src/core/config/constants.ts
163
+ /**
164
+ * Constants for payload-typesense plugin
165
+ * Centralizes all magic numbers and configuration defaults
166
+ */
167
+ /**
168
+ * Default dimensions for OpenAI text-embedding-3-large model
169
+ */
170
+ const DEFAULT_EMBEDDING_DIMENSIONS = 3072;
171
+ /**
172
+ * Default OpenAI embedding model
173
+ */
174
+ const DEFAULT_EMBEDDING_MODEL = "text-embedding-3-large";
175
+ /**
176
+ * Default Gemini embedding model
177
+ */
178
+ const DEFAULT_GEMINI_EMBEDDING_MODEL = "gemini-embedding-001";
179
+ /**
180
+ * Default chunk size for text splitting (in characters)
181
+ */
182
+ const DEFAULT_CHUNK_SIZE = 1e3;
183
+ /**
184
+ * Default overlap for text splitting (in characters)
185
+ */
186
+ const DEFAULT_OVERLAP = 200;
187
+ /**
188
+ * Default overlap between chunks (in characters)
189
+ */
190
+ const DEFAULT_CHUNK_OVERLAP = 200;
191
+ /**
192
+ * Default alpha value for hybrid search (0 = pure semantic, 1 = pure keyword)
193
+ */
194
+ const DEFAULT_HYBRID_SEARCH_ALPHA = .5;
195
+ /**
196
+ * Default number of search results to return
197
+ */
198
+ const DEFAULT_SEARCH_LIMIT = 10;
199
+ /**
200
+ * Default TTL for cache entries (in milliseconds) - 5 minutes
201
+ */
202
+ const DEFAULT_CACHE_TTL_MS = 300 * 1e3;
203
+ /**
204
+ * Default maximum tokens for RAG responses
205
+ */
206
+ const DEFAULT_RAG_MAX_TOKENS = 1e3;
207
+ /**
208
+ * Default number of search results to use for RAG context
209
+ */
210
+ const DEFAULT_RAG_CONTEXT_LIMIT = 5;
211
+ /**
212
+ * Default session TTL (in seconds) - 30 minutes
213
+ */
214
+ const DEFAULT_SESSION_TTL_SEC = 1800;
215
+ /**
216
+ * Default OpenAI model for RAG chat
217
+ */
218
+ const DEFAULT_RAG_LLM_MODEL = "gpt-4o-mini";
219
+ /**
220
+ * Minimum required text length for embedding generation
221
+ */
222
+ const MIN_EMBEDDING_TEXT_LENGTH = 1;
223
+ /**
224
+ * Error codes for structured error handling
225
+ */
226
+ const ErrorCodes = {
227
+ INVALID_CONFIG: "ERR_1001",
228
+ MISSING_API_KEY: "ERR_1002",
229
+ INVALID_EMBEDDING_CONFIG: "ERR_1003",
230
+ INVALID_RAG_CONFIG: "ERR_1004",
231
+ TYPESENSE_CONNECTION_FAILED: "ERR_2001",
232
+ TYPESENSE_COLLECTION_NOT_FOUND: "ERR_2002",
233
+ TYPESENSE_SEARCH_FAILED: "ERR_2003",
234
+ TYPESENSE_SYNC_FAILED: "ERR_2004",
235
+ TYPESENSE_DELETE_FAILED: "ERR_2005",
236
+ EMBEDDING_GENERATION_FAILED: "ERR_3001",
237
+ INVALID_EMBEDDING_DIMENSIONS: "ERR_3002",
238
+ OPENAI_API_ERROR: "ERR_3003",
239
+ RAG_SEARCH_FAILED: "ERR_4001",
240
+ RAG_SESSION_NOT_FOUND: "ERR_4002",
241
+ RAG_CONVERSATION_FAILED: "ERR_4003",
242
+ RAG_TOKEN_LIMIT_EXCEEDED: "ERR_4004",
243
+ CHUNKING_FAILED: "ERR_5001",
244
+ INVALID_CHUNK_SIZE: "ERR_5002",
245
+ UNKNOWN_ERROR: "ERR_9001",
246
+ VALIDATION_ERROR: "ERR_9002"
247
+ };
248
+
249
+ //#endregion
250
+ //#region src/features/embedding/providers/openai-provider.ts
251
+ var OpenAIEmbeddingProvider = class {
252
+ client;
253
+ model;
254
+ dimensions;
255
+ constructor(config, logger$1) {
256
+ this.logger = logger$1;
257
+ if (!config.apiKey) throw new Error("OpenAI API key is required");
258
+ this.client = new OpenAI({ apiKey: config.apiKey });
259
+ this.model = config.model || DEFAULT_EMBEDDING_MODEL;
260
+ this.dimensions = config.dimensions || DEFAULT_EMBEDDING_DIMENSIONS;
261
+ }
262
+ async generateEmbedding(text) {
263
+ if (!text || text.trim().length < MIN_EMBEDDING_TEXT_LENGTH) return null;
264
+ try {
265
+ const response = await this.client.embeddings.create({
266
+ model: this.model,
267
+ input: text.trim(),
268
+ dimensions: this.dimensions
269
+ });
270
+ const embedding = response.data[0]?.embedding;
271
+ if (!embedding) return null;
272
+ return {
273
+ embedding,
274
+ usage: {
275
+ promptTokens: response.usage?.prompt_tokens || 0,
276
+ totalTokens: response.usage?.total_tokens || 0
277
+ }
278
+ };
279
+ } catch (error) {
280
+ this.logger.error("OpenAI embedding generation failed", error, { model: this.model });
281
+ return null;
282
+ }
283
+ }
284
+ async generateBatchEmbeddings(texts) {
285
+ const validTexts = texts.filter((t) => t && t.trim().length >= MIN_EMBEDDING_TEXT_LENGTH);
286
+ if (validTexts.length === 0) return null;
287
+ try {
288
+ const response = await this.client.embeddings.create({
289
+ model: this.model,
290
+ input: validTexts.map((t) => t.trim()),
291
+ dimensions: this.dimensions
292
+ });
293
+ return {
294
+ embeddings: response.data.map((d) => d.embedding),
295
+ usage: {
296
+ promptTokens: response.usage?.prompt_tokens || 0,
297
+ totalTokens: response.usage?.total_tokens || 0
298
+ }
299
+ };
300
+ } catch (error) {
301
+ this.logger.error("OpenAI batch embedding generation failed", error, {
302
+ model: this.model,
303
+ count: texts.length
304
+ });
305
+ return null;
306
+ }
307
+ }
308
+ };
309
+
310
+ //#endregion
311
+ //#region src/features/embedding/providers/gemini-provider.ts
312
+ var GeminiEmbeddingProvider = class {
313
+ client;
314
+ model;
315
+ constructor(config, logger$1) {
316
+ this.logger = logger$1;
317
+ if (!config.apiKey) throw new Error("Gemini API key is required");
318
+ this.client = new GoogleGenerativeAI(config.apiKey);
319
+ this.model = config.model || DEFAULT_GEMINI_EMBEDDING_MODEL;
320
+ }
321
+ async generateEmbedding(text) {
322
+ if (!text || text.trim().length < MIN_EMBEDDING_TEXT_LENGTH) return null;
323
+ try {
324
+ const embedding = (await this.client.getGenerativeModel({ model: this.model }).embedContent({
325
+ content: {
326
+ role: "user",
327
+ parts: [{ text: text.trim() }]
328
+ },
329
+ taskType: TaskType.RETRIEVAL_DOCUMENT
330
+ })).embedding.values;
331
+ const estimatedTokens = Math.ceil(text.length / 4);
332
+ return {
333
+ embedding,
334
+ usage: {
335
+ promptTokens: estimatedTokens,
336
+ totalTokens: estimatedTokens
337
+ }
338
+ };
339
+ } catch (error) {
340
+ this.logger.error("Gemini embedding generation failed", error, { model: this.model });
341
+ return null;
342
+ }
343
+ }
344
+ async generateBatchEmbeddings(texts) {
345
+ const validTexts = texts.filter((t) => t && t.trim().length >= MIN_EMBEDDING_TEXT_LENGTH);
346
+ if (validTexts.length === 0) return null;
347
+ try {
348
+ const model = this.client.getGenerativeModel({ model: this.model });
349
+ const embeddings = [];
350
+ let totalTokens = 0;
351
+ for (const text of validTexts) {
352
+ const result = await model.embedContent({
353
+ content: {
354
+ role: "user",
355
+ parts: [{ text: text.trim() }]
356
+ },
357
+ taskType: TaskType.RETRIEVAL_DOCUMENT
358
+ });
359
+ embeddings.push(result.embedding.values);
360
+ totalTokens += Math.ceil(text.length / 4);
361
+ }
362
+ return {
363
+ embeddings,
364
+ usage: {
365
+ promptTokens: totalTokens,
366
+ totalTokens
367
+ }
368
+ };
369
+ } catch (error) {
370
+ this.logger.error("Gemini batch embedding generation failed", error, {
371
+ model: this.model,
372
+ count: texts.length
373
+ });
374
+ return null;
375
+ }
376
+ }
377
+ };
378
+
379
+ //#endregion
380
+ //#region src/features/embedding/embedding-service.ts
381
+ var EmbeddingServiceImpl = class {
382
+ constructor(provider, logger$1, config) {
383
+ this.provider = provider;
384
+ this.logger = logger$1;
385
+ this.config = config;
386
+ }
387
+ async getEmbedding(text) {
388
+ const result = await this.provider.generateEmbedding(text);
389
+ if (!result) return null;
390
+ return result.embedding;
391
+ }
392
+ async getEmbeddingsBatch(texts) {
393
+ const result = await this.provider.generateBatchEmbeddings(texts);
394
+ if (!result) return null;
395
+ return result.embeddings;
396
+ }
397
+ getDimensions() {
398
+ return this.config.dimensions || DEFAULT_EMBEDDING_DIMENSIONS;
399
+ }
400
+ };
401
+
402
+ //#endregion
403
+ //#region src/core/di/setup.ts
404
+ const setupContainer = (config) => {
405
+ const container = new DIContainer();
406
+ container.singleton(TOKENS.CONFIG, config);
407
+ const logger$1 = new Logger({
408
+ enabled: true,
409
+ prefix: "[payload-typesense]"
410
+ });
411
+ container.singleton(TOKENS.LOGGER, logger$1);
412
+ const embeddingConfig = config.features.embedding;
413
+ if (!embeddingConfig) throw new Error("Embedding configuration missing");
414
+ let provider;
415
+ if (embeddingConfig.type === "gemini") provider = new GeminiEmbeddingProvider(embeddingConfig, logger$1);
416
+ else provider = new OpenAIEmbeddingProvider(embeddingConfig, logger$1);
417
+ container.singleton(TOKENS.EMBEDDING_PROVIDER, provider);
418
+ container.register(TOKENS.EMBEDDING_SERVICE, () => new EmbeddingServiceImpl(provider, logger$1, embeddingConfig));
419
+ logger$1.debug("Embedding service registered", { provider: embeddingConfig });
420
+ return container;
421
+ };
422
+
423
+ //#endregion
424
+ //#region src/core/client/typesense-client.ts
425
+ const createTypesenseClient = (typesenseConfig) => {
426
+ return new Typesense.Client({
427
+ apiKey: typesenseConfig.apiKey,
428
+ connectionTimeoutSeconds: typesenseConfig.connectionTimeoutSeconds || 2,
429
+ nodes: typesenseConfig.nodes
430
+ });
431
+ };
432
+ const testTypesenseConnection = async (client) => {
433
+ try {
434
+ await client.health.retrieve();
435
+ return true;
436
+ } catch (_error) {
437
+ return false;
438
+ }
439
+ };
440
+
441
+ //#endregion
442
+ //#region src/features/embedding/embeddings.ts
443
+ let openaiClient = null;
444
+ let currentOpenAIApiKey = null;
445
+ let geminiClient = null;
446
+ let currentGeminiApiKey = null;
447
+ const getOpenAIClient = (apiKey) => {
448
+ const key = apiKey || process.env.OPENAI_API_KEY;
449
+ if (!key) return null;
450
+ if (!openaiClient || currentOpenAIApiKey !== key) {
451
+ openaiClient = new OpenAI({ apiKey: key });
452
+ currentOpenAIApiKey = key;
453
+ }
454
+ return openaiClient;
455
+ };
456
+ const getGeminiClient = (apiKey) => {
457
+ const key = apiKey || process.env.GOOGLE_API_KEY;
458
+ if (!key) return null;
459
+ if (!geminiClient || currentGeminiApiKey !== key) {
460
+ geminiClient = new GoogleGenerativeAI(key);
461
+ currentGeminiApiKey = key;
462
+ }
463
+ return geminiClient;
464
+ };
465
+ /**
466
+ * Generates an embedding for the given text using OpenAI or Gemini API
467
+ * @param text - The text to generate an embedding for
468
+ * @param config - Optional embedding configuration (provider, model, dimensions, apiKey)
469
+ * @returns The embedding vector as an array of numbers, or null if generation fails
470
+ */
471
+ const generateEmbedding = async (text, config) => {
472
+ if (!text || text.trim().length < MIN_EMBEDDING_TEXT_LENGTH) {
473
+ logger.debug("Skipping embedding generation for empty or invalid text");
474
+ return null;
475
+ }
476
+ if ((config?.type || "openai") === "gemini") return generateGeminiEmbedding(text, config);
477
+ else return generateOpenAIEmbedding(text, config);
478
+ };
479
+ /**
480
+ * Generates an embedding using OpenAI API
481
+ */
482
+ const generateOpenAIEmbedding = async (text, config) => {
483
+ const client = getOpenAIClient(config?.apiKey);
484
+ if (!client) {
485
+ logger.debug("OpenAI API key not configured, skipping embedding generation");
486
+ return null;
487
+ }
488
+ try {
489
+ const model = config?.model || process.env.OPENAI_EMBEDDING_MODEL || DEFAULT_EMBEDDING_MODEL;
490
+ const dimensions = config?.dimensions || DEFAULT_EMBEDDING_DIMENSIONS;
491
+ logger.debug("Generating OpenAI embedding", {
492
+ model,
493
+ dimensions,
494
+ textLength: text.length
495
+ });
496
+ const embedding = (await client.embeddings.create({
497
+ model,
498
+ input: text.trim(),
499
+ dimensions
500
+ })).data[0]?.embedding;
501
+ logger.debug("OpenAI embedding generated", { embeddingLength: embedding?.length });
502
+ if (!embedding || !Array.isArray(embedding) || embedding.length !== dimensions) {
503
+ logger.warn("Generated embedding has invalid dimensions", {
504
+ expected: dimensions,
505
+ received: embedding?.length
506
+ });
507
+ return null;
508
+ }
509
+ return embedding;
510
+ } catch (error) {
511
+ logger.error("Failed to generate OpenAI embedding", error, {
512
+ textLength: text.length,
513
+ model: config?.model
514
+ });
515
+ return null;
516
+ }
517
+ };
518
+ /**
519
+ * Generates an embedding using Google Gemini API
520
+ */
521
+ const generateGeminiEmbedding = async (text, config) => {
522
+ const client = getGeminiClient(config?.apiKey);
523
+ if (!client) {
524
+ logger.debug("Google API key not configured, skipping embedding generation");
525
+ return null;
526
+ }
527
+ try {
528
+ const model = config?.model || DEFAULT_GEMINI_EMBEDDING_MODEL;
529
+ const dimensions = config?.dimensions || DEFAULT_EMBEDDING_DIMENSIONS;
530
+ logger.debug("Generating Gemini embedding", {
531
+ model,
532
+ dimensions,
533
+ textLength: text.length
534
+ });
535
+ const embedding = (await client.getGenerativeModel({ model }).embedContent({
536
+ content: {
537
+ role: "user",
538
+ parts: [{ text: text.trim() }]
539
+ },
540
+ taskType: TaskType.RETRIEVAL_DOCUMENT
541
+ })).embedding.values;
542
+ logger.debug("Gemini embedding generated", { embeddingLength: embedding?.length });
543
+ if (!embedding || !Array.isArray(embedding) || embedding.length !== dimensions) {
544
+ logger.warn("Generated embedding has invalid dimensions", {
545
+ expected: dimensions,
546
+ received: embedding?.length
547
+ });
548
+ return null;
549
+ }
550
+ return embedding;
551
+ } catch (error) {
552
+ logger.error("Failed to generate Gemini embedding", error, {
553
+ textLength: text.length,
554
+ model: config?.model
555
+ });
556
+ return null;
557
+ }
558
+ };
559
+ /**
560
+ * Generate embedding with usage tracking
561
+ *
562
+ * This function returns both the embedding and usage information (tokens used)
563
+ *
564
+ * @param text - The text to generate an embedding for
565
+ * @param config - Optional embedding configuration
566
+ * @returns Embedding with usage information, or null if generation fails
567
+ */
568
+ const generateEmbeddingWithUsage = async (text, config) => {
569
+ if (!text || text.trim().length < MIN_EMBEDDING_TEXT_LENGTH) {
570
+ logger.debug("Skipping embedding generation for empty or invalid text");
571
+ return null;
572
+ }
573
+ if ((config?.type || "openai") === "gemini") return generateGeminiEmbeddingWithUsage(text, config);
574
+ else return generateOpenAIEmbeddingWithUsage(text, config);
575
+ };
576
+ /**
577
+ * Generate OpenAI embedding with usage tracking
578
+ */
579
+ const generateOpenAIEmbeddingWithUsage = async (text, config) => {
580
+ const client = getOpenAIClient(config?.apiKey);
581
+ if (!client) {
582
+ logger.debug("OpenAI API key not configured, skipping embedding generation");
583
+ return null;
584
+ }
585
+ try {
586
+ const model = config?.model || process.env.OPENAI_EMBEDDING_MODEL || DEFAULT_EMBEDDING_MODEL;
587
+ const dimensions = config?.dimensions || DEFAULT_EMBEDDING_DIMENSIONS;
588
+ logger.debug("Generating OpenAI embedding with usage tracking", {
589
+ model,
590
+ dimensions
591
+ });
592
+ const response = await client.embeddings.create({
593
+ model,
594
+ input: text.trim(),
595
+ dimensions
596
+ });
597
+ const embedding = response.data[0]?.embedding;
598
+ if (!embedding || !Array.isArray(embedding) || embedding.length !== dimensions) {
599
+ logger.warn("Generated embedding has invalid dimensions", {
600
+ expected: dimensions,
601
+ received: embedding?.length
602
+ });
603
+ return null;
604
+ }
605
+ return {
606
+ embedding,
607
+ usage: {
608
+ promptTokens: response.usage?.prompt_tokens || 0,
609
+ totalTokens: response.usage?.total_tokens || 0
610
+ }
611
+ };
612
+ } catch (error) {
613
+ logger.error("Failed to generate OpenAI embedding with usage", error, {
614
+ textLength: text.length,
615
+ model: config?.model
616
+ });
617
+ return null;
618
+ }
619
+ };
620
+ /**
621
+ * Generate Gemini embedding with usage tracking
622
+ * Note: Gemini doesn't provide token usage, so we estimate it
623
+ */
624
+ const generateGeminiEmbeddingWithUsage = async (text, config) => {
625
+ const embeddingResult = await generateGeminiEmbedding(text, config);
626
+ if (!embeddingResult) return null;
627
+ const estimatedTokens = Math.ceil(text.length / 4);
628
+ return {
629
+ embedding: embeddingResult,
630
+ usage: {
631
+ promptTokens: estimatedTokens,
632
+ totalTokens: estimatedTokens
633
+ }
634
+ };
635
+ };
636
+ /**
637
+ * Generate embeddings for multiple texts with usage tracking (batch)
638
+ *
639
+ * @param texts - Array of texts to generate embeddings for
640
+ * @param config - Optional embedding configuration
641
+ * @returns Embeddings with total usage information, or null if generation fails
642
+ */
643
+ const generateEmbeddingsBatchWithUsage = async (texts, config) => {
644
+ if (!texts || texts.length === 0) {
645
+ logger.debug("No texts provided for batch embedding generation");
646
+ return null;
647
+ }
648
+ const validTexts = texts.filter((t) => t && t.trim().length >= MIN_EMBEDDING_TEXT_LENGTH);
649
+ if (validTexts.length === 0) {
650
+ logger.debug("No valid texts after filtering for batch embedding generation");
651
+ return null;
652
+ }
653
+ if ((config?.type || "openai") === "gemini") return generateGeminiBatchEmbeddingsWithUsage(validTexts, config);
654
+ else return generateOpenAIBatchEmbeddingsWithUsage(validTexts, config);
655
+ };
656
+ /**
657
+ * Generate OpenAI batch embeddings with usage tracking
658
+ */
659
+ const generateOpenAIBatchEmbeddingsWithUsage = async (validTexts, config) => {
660
+ const client = getOpenAIClient(config?.apiKey);
661
+ if (!client) {
662
+ logger.debug("OpenAI API key not configured, skipping batch embedding generation");
663
+ return null;
664
+ }
665
+ try {
666
+ const model = config?.model || process.env.OPENAI_EMBEDDING_MODEL || DEFAULT_EMBEDDING_MODEL;
667
+ const dimensions = config?.dimensions || DEFAULT_EMBEDDING_DIMENSIONS;
668
+ logger.debug("Generating OpenAI batch embeddings with usage tracking", {
669
+ model,
670
+ dimensions,
671
+ batchSize: validTexts.length
672
+ });
673
+ const response = await client.embeddings.create({
674
+ model,
675
+ input: validTexts.map((t) => t.trim()),
676
+ dimensions
677
+ });
678
+ const embeddings = response.data.map((item) => item.embedding);
679
+ if (!embeddings.every((emb) => Array.isArray(emb) && emb.length === dimensions)) {
680
+ logger.warn("Some generated embeddings have invalid dimensions", {
681
+ expected: dimensions,
682
+ batchSize: embeddings.length
683
+ });
684
+ return null;
685
+ }
686
+ logger.info("OpenAI batch embeddings generated successfully", {
687
+ count: embeddings.length,
688
+ totalTokens: response.usage?.total_tokens || 0
689
+ });
690
+ return {
691
+ embeddings,
692
+ usage: {
693
+ promptTokens: response.usage?.prompt_tokens || 0,
694
+ totalTokens: response.usage?.total_tokens || 0
695
+ }
696
+ };
697
+ } catch (error) {
698
+ logger.error("Failed to generate OpenAI batch embeddings with usage", error, {
699
+ batchSize: validTexts.length,
700
+ model: config?.model
701
+ });
702
+ return null;
703
+ }
704
+ };
705
+ /**
706
+ * Generate Gemini batch embeddings with usage tracking
707
+ * Note: Gemini API handles one text at a time, so we batch them sequentially
708
+ */
709
+ const generateGeminiBatchEmbeddingsWithUsage = async (validTexts, config) => {
710
+ const client = getGeminiClient(config?.apiKey);
711
+ if (!client) {
712
+ logger.debug("Google API key not configured, skipping batch embedding generation");
713
+ return null;
714
+ }
715
+ try {
716
+ const model = config?.model || DEFAULT_GEMINI_EMBEDDING_MODEL;
717
+ const dimensions = config?.dimensions || DEFAULT_EMBEDDING_DIMENSIONS;
718
+ logger.debug("Generating Gemini batch embeddings with usage tracking", {
719
+ model,
720
+ dimensions,
721
+ batchSize: validTexts.length
722
+ });
723
+ const embeddingModel = client.getGenerativeModel({ model });
724
+ const embeddings = [];
725
+ let totalEstimatedTokens = 0;
726
+ for (const text of validTexts) {
727
+ const result = await embeddingModel.embedContent({
728
+ content: {
729
+ role: "user",
730
+ parts: [{ text: text.trim() }]
731
+ },
732
+ taskType: TaskType.RETRIEVAL_DOCUMENT
733
+ });
734
+ embeddings.push(result.embedding.values);
735
+ totalEstimatedTokens += Math.ceil(text.length / 4);
736
+ }
737
+ if (!embeddings.every((emb) => Array.isArray(emb) && emb.length === dimensions)) {
738
+ logger.warn("Some generated embeddings have invalid dimensions", {
739
+ expected: dimensions,
740
+ batchSize: embeddings.length
741
+ });
742
+ return null;
743
+ }
744
+ logger.info("Gemini batch embeddings generated successfully", {
745
+ count: embeddings.length,
746
+ estimatedTokens: totalEstimatedTokens
747
+ });
748
+ return {
749
+ embeddings,
750
+ usage: {
751
+ promptTokens: totalEstimatedTokens,
752
+ totalTokens: totalEstimatedTokens
753
+ }
754
+ };
755
+ } catch (error) {
756
+ logger.error("Failed to generate Gemini batch embeddings with usage", error, {
757
+ batchSize: validTexts.length,
758
+ model: config?.model
759
+ });
760
+ return null;
761
+ }
762
+ };
763
+
764
+ //#endregion
765
+ //#region src/features/rag/query-builder.ts
766
+ /**
767
+ * Build the Typesense conversational search URL with all necessary parameters
768
+ *
769
+ * @param config - Query configuration
770
+ * @param config.userMessage - The user's message/query
771
+ * @param config.chatId - Optional conversation ID for follow-up questions
772
+ * @param conversationModelId - The conversation model ID in Typesense
773
+ * @param typesenseConfig - Typesense connection config
774
+ * @returns URL for the Typesense multi_search endpoint with conversation parameters
775
+ */
776
+ function buildConversationalUrl(config, conversationModelId, typesenseConfig) {
777
+ const protocol = typesenseConfig.nodes[0].protocol || "http";
778
+ const typesenseUrl = new URL(`${protocol}://${typesenseConfig.nodes[0].host}:${typesenseConfig.nodes[0].port}/multi_search`);
779
+ typesenseUrl.searchParams.set("q", config.userMessage);
780
+ typesenseUrl.searchParams.set("conversation", "true");
781
+ typesenseUrl.searchParams.set("conversation_model_id", conversationModelId);
782
+ if (config.chatId) typesenseUrl.searchParams.set("conversation_id", config.chatId);
783
+ typesenseUrl.searchParams.set("conversation_stream", "true");
784
+ return typesenseUrl;
785
+ }
786
+ /**
787
+ * Build multi-search requests for Typesense with hybrid search configuration
788
+ *
789
+ * @param config - Query configuration including embedding, collections, and filters
790
+ * @returns Array of search requests for Typesense multi_search
791
+ */
792
+ function buildMultiSearchRequests(config) {
793
+ const { searchCollections, queryEmbedding, selectedDocuments, kResults = 10, advancedConfig = {} } = config;
794
+ return searchCollections.map((collection) => {
795
+ const request = {
796
+ collection,
797
+ query_by: "chunk_text,title,headers",
798
+ vector_query: `embedding:([${queryEmbedding.join(",")}], k:${kResults})`,
799
+ exclude_fields: "embedding",
800
+ ...buildAdvancedSearchParams(advancedConfig)
801
+ };
802
+ if (selectedDocuments && selectedDocuments.length > 0) request.filter_by = `parent_doc_id:[${selectedDocuments.map((id) => `"${id}"`).join(",")}]`;
803
+ return request;
804
+ });
805
+ }
806
+ /**
807
+ * Build advanced search parameters from config
808
+ *
809
+ * @param config - Advanced search configuration
810
+ * @returns Object with advanced search parameters
811
+ */
812
+ function buildAdvancedSearchParams(config) {
813
+ const params = {};
814
+ if (config.typoTokensThreshold !== void 0) params.typo_tokens_threshold = config.typoTokensThreshold;
815
+ if (config.numTypos !== void 0) params.num_typos = config.numTypos;
816
+ if (config.prefix !== void 0) params.prefix = config.prefix;
817
+ if (config.dropTokensThreshold !== void 0) params.drop_tokens_threshold = config.dropTokensThreshold;
818
+ if (config.enableStemming !== void 0) params.enable_stemming = config.enableStemming;
819
+ return params;
820
+ }
821
+ /**
822
+ * Build the complete Typesense request body for multi-search
823
+ *
824
+ * @param config - Query configuration
825
+ * @returns Request body for Typesense multi_search endpoint
826
+ */
827
+ function buildMultiSearchRequestBody(config) {
828
+ return { searches: buildMultiSearchRequests(config) };
829
+ }
830
+ /**
831
+ * Build hybrid search parameters for combining semantic and keyword search
832
+ *
833
+ * @param alpha - Weight between semantic (1.0) and keyword (0.0) search
834
+ * @param rerankMatches - Whether to rerank hybrid search results
835
+ * @param queryFields - Fields to use for keyword search
836
+ * @returns Object with hybrid search parameters
837
+ */
838
+ function buildHybridSearchParams(alpha = .9, rerankMatches = true, queryFields = "chunk_text,title") {
839
+ return {
840
+ alpha,
841
+ rerank_hybrid_matches: rerankMatches,
842
+ query_fields: queryFields
843
+ };
844
+ }
845
+
846
+ //#endregion
847
+ //#region src/features/rag/stream-handler.ts
848
+ /**
849
+ * Parse a single SSE event from Typesense conversation stream
850
+ *
851
+ * @param line - Raw SSE event line
852
+ * @returns Parsed conversation event or null if not parseable
853
+ */
854
+ function parseConversationEvent(line) {
855
+ if (!line.startsWith("data: ")) return null;
856
+ const data = line.slice(6);
857
+ if (data === "[DONE]") return { raw: "[DONE]" };
858
+ try {
859
+ const parsed = JSON.parse(data);
860
+ const event = { raw: parsed };
861
+ if (parsed.conversation_id) event.conversationId = parsed.conversation_id;
862
+ else if (parsed.conversation?.conversation_id) event.conversationId = parsed.conversation.conversation_id;
863
+ if (parsed.message !== void 0) event.message = parsed.message;
864
+ else if (parsed.conversation?.answer) event.message = parsed.conversation.answer;
865
+ if (parsed.results) event.results = parsed.results;
866
+ return event;
867
+ } catch (e) {
868
+ logger.error("Error parsing SSE data from conversation stream", e);
869
+ return null;
870
+ }
871
+ }
872
+ /**
873
+ * Extract sources from Typesense search results
874
+ *
875
+ * @param results - Typesense multi-search results array
876
+ * @param documentTypeResolver - Optional function to resolve document type from collection name
877
+ * @returns Array of chunk sources with metadata
878
+ */
879
+ function extractSourcesFromResults(results, documentTypeResolver) {
880
+ const allSources = [];
881
+ for (const result of results) if (result.hits) for (const hit of result.hits) {
882
+ const doc = hit.document;
883
+ const score = hit.vector_distance || hit.text_match || 0;
884
+ const collectionName = result.request_params?.collection_name || "";
885
+ const type = documentTypeResolver ? documentTypeResolver(collectionName) : getDefaultDocumentType(collectionName);
886
+ const fullContent = doc.chunk_text || "";
887
+ const source = {
888
+ id: doc.id || "",
889
+ title: doc.title || "Sin título",
890
+ slug: doc.slug || "",
891
+ type,
892
+ chunkIndex: doc.chunk_index ?? 0,
893
+ relevanceScore: score,
894
+ content: "",
895
+ excerpt: fullContent.substring(0, 200) + (fullContent.length > 200 ? "..." : "")
896
+ };
897
+ allSources.push(source);
898
+ }
899
+ return allSources;
900
+ }
901
+ /**
902
+ * Build context text from results (useful for token estimation)
903
+ *
904
+ * @param results - Typesense multi-search results array
905
+ * @returns Combined context text from all chunks
906
+ */
907
+ function buildContextText(results) {
908
+ let contextText = "";
909
+ for (const result of results) if (result.hits) for (const hit of result.hits) {
910
+ const doc = hit.document;
911
+ contextText += (doc.chunk_text || "") + "\n";
912
+ }
913
+ return contextText;
914
+ }
915
+ /**
916
+ * Process a Typesense conversation stream
917
+ *
918
+ * @param response - Fetch Response with SSE stream
919
+ * @param onEvent - Callback for each parsed event
920
+ * @param documentTypeResolver - Optional function to resolve document type
921
+ * @returns Processing result with full message, ID, and sources
922
+ */
923
+ async function processConversationStream(response, onEvent, documentTypeResolver) {
924
+ const reader = response.body.getReader();
925
+ const decoder = new TextDecoder();
926
+ let buffer = "";
927
+ let sources = [];
928
+ let hasCollectedSources = false;
929
+ let conversationId = null;
930
+ let contextText = "";
931
+ let fullMessage = "";
932
+ while (true) {
933
+ const { done, value } = await reader.read();
934
+ if (done) break;
935
+ buffer += decoder.decode(value, { stream: true });
936
+ const lines = buffer.split("\n");
937
+ buffer = lines.pop() || "";
938
+ for (const line of lines) {
939
+ const event = parseConversationEvent(line);
940
+ if (!event) continue;
941
+ if (onEvent) onEvent(event);
942
+ if (!conversationId && event.conversationId) conversationId = event.conversationId;
943
+ if (!hasCollectedSources && event.results) {
944
+ sources = extractSourcesFromResults(event.results, documentTypeResolver);
945
+ contextText = buildContextText(event.results);
946
+ hasCollectedSources = true;
947
+ }
948
+ if (event.message) fullMessage += event.message;
949
+ }
950
+ }
951
+ return {
952
+ fullMessage,
953
+ conversationId,
954
+ sources,
955
+ contextText
956
+ };
957
+ }
958
+ /**
959
+ * Create a ReadableStream that forwards SSE events
960
+ *
961
+ * @param response - Fetch Response with SSE stream
962
+ * @param onData - Callback for processing each event before forwarding
963
+ * @returns ReadableStream for SSE events
964
+ */
965
+ function createSSEForwardStream(response, onData) {
966
+ const reader = response.body.getReader();
967
+ const decoder = new TextDecoder();
968
+ const encoder = new TextEncoder();
969
+ let buffer = "";
970
+ return new ReadableStream({
971
+ async start(controller) {
972
+ while (true) {
973
+ const { done, value } = await reader.read();
974
+ if (done) {
975
+ controller.close();
976
+ break;
977
+ }
978
+ buffer += decoder.decode(value, { stream: true });
979
+ const lines = buffer.split("\n");
980
+ buffer = lines.pop() || "";
981
+ for (const line of lines) {
982
+ const event = parseConversationEvent(line);
983
+ if (event && onData) onData(event);
984
+ if (line) controller.enqueue(encoder.encode(line + "\n"));
985
+ }
986
+ }
987
+ },
988
+ cancel() {
989
+ reader.cancel();
990
+ }
991
+ });
992
+ }
993
+ /**
994
+ * Default document type resolver based on collection name
995
+ *
996
+ * @param collectionName - Name of the Typesense collection
997
+ * @returns Document type string
998
+ */
999
+ function getDefaultDocumentType(collectionName) {
1000
+ if (collectionName.includes("article")) return "article";
1001
+ if (collectionName.includes("book")) return "book";
1002
+ if (collectionName.includes("post")) return "post";
1003
+ if (collectionName.includes("page")) return "page";
1004
+ return "document";
1005
+ }
1006
+
1007
+ //#endregion
1008
+ //#region src/features/rag/setup.ts
1009
+ /**
1010
+ * Ensure conversation history collection exists
1011
+ *
1012
+ * @param client - Typesense client
1013
+ * @param collectionName - Name of the conversation history collection
1014
+ * @returns true if collection exists or was created successfully
1015
+ */
1016
+ async function ensureConversationCollection(client, collectionName = "conversation_history") {
1017
+ try {
1018
+ await client.collections(collectionName).retrieve();
1019
+ logger.info("Conversation collection already exists", { collection: collectionName });
1020
+ return true;
1021
+ } catch (error) {
1022
+ if (error?.httpStatus === 404) {
1023
+ logger.info("Creating conversation collection", { collection: collectionName });
1024
+ try {
1025
+ await client.collections().create({
1026
+ name: collectionName,
1027
+ fields: [
1028
+ {
1029
+ name: "conversation_id",
1030
+ type: "string"
1031
+ },
1032
+ {
1033
+ name: "model_id",
1034
+ type: "string"
1035
+ },
1036
+ {
1037
+ name: "timestamp",
1038
+ type: "int32"
1039
+ },
1040
+ {
1041
+ name: "role",
1042
+ type: "string"
1043
+ },
1044
+ {
1045
+ name: "message",
1046
+ type: "string"
1047
+ }
1048
+ ]
1049
+ });
1050
+ logger.info("Conversation collection created successfully", { collection: collectionName });
1051
+ return true;
1052
+ } catch (createError) {
1053
+ logger.error("Failed to create conversation collection", createError, { collection: collectionName });
1054
+ return false;
1055
+ }
1056
+ }
1057
+ logger.error("Error checking conversation collection", error, { collection: collectionName });
1058
+ return false;
1059
+ }
1060
+ }
1061
+ /**
1062
+ * Get default RAG configuration values
1063
+ *
1064
+ * @returns Default RAG configuration
1065
+ */
1066
+ function getDefaultRAGConfig() {
1067
+ return {
1068
+ hybrid: {
1069
+ alpha: .9,
1070
+ rerankMatches: true,
1071
+ queryFields: "chunk_text,title"
1072
+ },
1073
+ hnsw: {
1074
+ efConstruction: 200,
1075
+ M: 16,
1076
+ ef: 100,
1077
+ maxConnections: 64,
1078
+ distanceMetric: "cosine"
1079
+ },
1080
+ advanced: {
1081
+ typoTokensThreshold: 1,
1082
+ numTypos: 2,
1083
+ prefix: true,
1084
+ dropTokensThreshold: 1,
1085
+ enableStemming: true
1086
+ }
1087
+ };
1088
+ }
1089
+ /**
1090
+ * Merge user RAG config with defaults
1091
+ *
1092
+ * @param userConfig - User-provided RAG configuration
1093
+ * @returns Merged configuration with defaults
1094
+ */
1095
+ function mergeRAGConfigWithDefaults(userConfig) {
1096
+ const defaults = getDefaultRAGConfig();
1097
+ if (!userConfig) return defaults;
1098
+ return {
1099
+ hybrid: {
1100
+ ...defaults.hybrid,
1101
+ ...userConfig.hybrid
1102
+ },
1103
+ hnsw: {
1104
+ ...defaults.hnsw,
1105
+ ...userConfig.hnsw
1106
+ },
1107
+ advanced: {
1108
+ ...defaults.advanced,
1109
+ ...userConfig.advanced
1110
+ }
1111
+ };
1112
+ }
1113
+
1114
+ //#endregion
1115
+ //#region src/features/rag/handlers/rag-search-handler.ts
1116
+ /**
1117
+ * Execute a RAG conversational search
1118
+ *
1119
+ * This function handles the complete flow of executing a RAG search against Typesense:
1120
+ * 1. Builds the conversational URL
1121
+ * 2. Builds the multi-search request body
1122
+ * 3. Executes the request
1123
+ * 4. Returns the response with metadata
1124
+ *
1125
+ * @param typesenseConfig - Typesense connection configuration
1126
+ * @param searchConfig - RAG search configuration
1127
+ * @param request - Chat request parameters
1128
+ * @returns Promise with search results
1129
+ */
1130
+ async function executeRAGSearch(typesenseConfig, searchConfig, request) {
1131
+ const typesenseUrl = buildConversationalUrl(request, searchConfig.modelId, typesenseConfig);
1132
+ const requestBody = buildMultiSearchRequestBody({
1133
+ userMessage: request.userMessage,
1134
+ queryEmbedding: request.queryEmbedding,
1135
+ selectedDocuments: request.selectedDocuments,
1136
+ chatId: request.chatId,
1137
+ searchCollections: searchConfig.searchCollections,
1138
+ kResults: searchConfig.kResults || 10,
1139
+ advancedConfig: searchConfig.advancedConfig
1140
+ });
1141
+ const response = await fetch(typesenseUrl.toString(), {
1142
+ method: "POST",
1143
+ headers: {
1144
+ "Content-Type": "application/json",
1145
+ "X-TYPESENSE-API-KEY": typesenseConfig.apiKey
1146
+ },
1147
+ body: JSON.stringify(requestBody)
1148
+ });
1149
+ if (!response.ok) {
1150
+ const errorText = await response.text();
1151
+ throw new Error(`Typesense search failed: ${errorText}`);
1152
+ }
1153
+ return {
1154
+ response,
1155
+ isStreaming: response.headers.get("content-type")?.includes("text/event-stream") || false,
1156
+ sources: []
1157
+ };
1158
+ }
1159
+
1160
+ //#endregion
1161
+ //#region src/features/rag/handlers/chunk-fetch-handler.ts
1162
+ /**
1163
+ * Fetch a chunk document by ID from Typesense
1164
+ *
1165
+ * @param client - Typesense client instance
1166
+ * @param config - Chunk fetch configuration
1167
+ * @returns Promise with chunk data
1168
+ * @throws Error if chunk not found or collection is invalid
1169
+ */
1170
+ async function fetchChunkById(client, config) {
1171
+ const { chunkId, collectionName, validCollections } = config;
1172
+ if (validCollections && !validCollections.includes(collectionName)) throw new Error(`Invalid collection: ${collectionName}. Must be one of: ${validCollections.join(", ")}`);
1173
+ try {
1174
+ const document = await client.collections(collectionName).documents(chunkId).retrieve();
1175
+ const chunkText$1 = document.chunk_text || "";
1176
+ if (!chunkText$1) throw new Error("Chunk contains no text");
1177
+ return {
1178
+ id: document.id,
1179
+ chunk_text: chunkText$1,
1180
+ title: document.title,
1181
+ slug: document.slug,
1182
+ chunk_index: document.chunk_index,
1183
+ collection: collectionName
1184
+ };
1185
+ } catch (error) {
1186
+ if (error && typeof error === "object" && "httpStatus" in error && error.httpStatus === 404) throw new Error(`Chunk not found: ${chunkId}`);
1187
+ throw error;
1188
+ }
1189
+ }
1190
+
1191
+ //#endregion
1192
+ //#region src/features/rag/handlers/session-handlers.ts
1193
+ /**
1194
+ * Get active chat session for a user
1195
+ *
1196
+ * @param payload - Payload CMS instance
1197
+ * @param userId - User ID
1198
+ * @param config - Session configuration
1199
+ * @returns Promise with session data or null
1200
+ */
1201
+ async function getActiveSession(payload, userId, config = {}) {
1202
+ const collectionName = config.collectionName || "chat-sessions";
1203
+ const windowMs = config.activeSessionWindow || 1440 * 60 * 1e3;
1204
+ const cutoffTime = new Date(Date.now() - windowMs);
1205
+ const chatSessions = await payload.find({
1206
+ collection: collectionName,
1207
+ where: { and: [
1208
+ { user: { equals: userId } },
1209
+ { status: { equals: "active" } },
1210
+ { last_activity: { greater_than: cutoffTime.toISOString() } }
1211
+ ] },
1212
+ sort: "-last_activity",
1213
+ limit: 1
1214
+ });
1215
+ if (!chatSessions.docs.length) return null;
1216
+ return chatSessions.docs[0];
1217
+ }
1218
+ /**
1219
+ * Get session by conversation ID
1220
+ *
1221
+ * @param payload - Payload CMS instance
1222
+ * @param userId - User ID
1223
+ * @param conversationId - Conversation ID
1224
+ * @param config - Session configuration
1225
+ * @returns Promise with session data or null
1226
+ */
1227
+ async function getSessionByConversationId(payload, userId, conversationId, config = {}) {
1228
+ const collectionName = config.collectionName || "chat-sessions";
1229
+ const chatSessions = await payload.find({
1230
+ collection: collectionName,
1231
+ where: { and: [{ conversation_id: { equals: conversationId } }, { user: { equals: userId } }] },
1232
+ limit: 1
1233
+ });
1234
+ if (!chatSessions.docs.length) return null;
1235
+ return chatSessions.docs[0];
1236
+ }
1237
+ /**
1238
+ * Close a chat session
1239
+ *
1240
+ * @param payload - Payload CMS instance
1241
+ * @param userId - User ID
1242
+ * @param conversationId - Conversation ID
1243
+ * @param config - Session configuration
1244
+ * @returns Promise with updated session data or null if not found
1245
+ */
1246
+ async function closeSession(payload, userId, conversationId, config = {}) {
1247
+ const collectionName = config.collectionName || "chat-sessions";
1248
+ const chatSessions = await payload.find({
1249
+ collection: collectionName,
1250
+ where: { and: [{ conversation_id: { equals: conversationId } }, { user: { equals: userId } }] },
1251
+ limit: 1
1252
+ });
1253
+ if (!chatSessions.docs.length) return null;
1254
+ const session = chatSessions.docs[0];
1255
+ if (!session) return null;
1256
+ await payload.update({
1257
+ collection: collectionName,
1258
+ where: { conversation_id: { equals: conversationId } },
1259
+ data: {
1260
+ status: "closed",
1261
+ closed_at: (/* @__PURE__ */ new Date()).toISOString()
1262
+ }
1263
+ });
1264
+ return {
1265
+ conversation_id: session.conversation_id,
1266
+ messages: session.messages || [],
1267
+ status: "closed",
1268
+ total_tokens: session.total_tokens,
1269
+ total_cost: session.total_cost,
1270
+ last_activity: session.last_activity
1271
+ };
1272
+ }
1273
+
1274
+ //#endregion
1275
+ //#region src/features/rag/utils/sse-utils.ts
1276
+ /**
1277
+ * Helper to create an SSE event string
1278
+ *
1279
+ * @param event - SSE event object
1280
+ * @returns Formatted SSE event string
1281
+ */
1282
+ function formatSSEEvent(event) {
1283
+ return `data: ${JSON.stringify(event)}\n\n`;
1284
+ }
1285
+ /**
1286
+ * Helper to send an SSE event through a controller
1287
+ *
1288
+ * @param controller - ReadableStreamDefaultController
1289
+ * @param encoder - TextEncoder instance
1290
+ * @param event - SSE event to send
1291
+ */
1292
+ function sendSSEEvent(controller, encoder, event) {
1293
+ const data = formatSSEEvent(event);
1294
+ controller.enqueue(encoder.encode(data));
1295
+ }
1296
+
1297
+ //#endregion
1298
+ //#region src/features/rag/chat-session-repository.ts
1299
+ /**
1300
+ * Save or update chat session in PayloadCMS
1301
+ *
1302
+ * @param payload - Payload CMS instance
1303
+ * @param userId - User ID
1304
+ * @param conversationId - Conversation ID from Typesense
1305
+ * @param userMessage - User's message
1306
+ * @param assistantMessage - Assistant's response
1307
+ * @param sources - Source chunks used for the response
1308
+ * @param spending - Token spending entries
1309
+ * @param collectionName - Collection name for sessions (default: 'chat-sessions')
1310
+ */
1311
+ async function saveChatSession(payload, userId, conversationId, userMessage, assistantMessage, sources, spending, collectionName = "chat-sessions") {
1312
+ try {
1313
+ const existing = await payload.find({
1314
+ collection: collectionName,
1315
+ where: { conversation_id: { equals: conversationId } },
1316
+ limit: 1
1317
+ });
1318
+ const newUserMessage = {
1319
+ role: "user",
1320
+ content: userMessage,
1321
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
1322
+ };
1323
+ const newAssistantMessage = {
1324
+ role: "assistant",
1325
+ content: assistantMessage,
1326
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
1327
+ sources: sources.map((s) => ({
1328
+ id: s.id,
1329
+ title: s.title,
1330
+ type: s.type,
1331
+ chunk_index: s.chunkIndex,
1332
+ slug: s.slug
1333
+ }))
1334
+ };
1335
+ if (existing.docs.length > 0 && existing.docs[0]) await updateExistingSession(payload, existing.docs[0], newUserMessage, newAssistantMessage, spending, collectionName);
1336
+ else await createNewSession(payload, userId, conversationId, newUserMessage, newAssistantMessage, spending, collectionName);
1337
+ } catch (error) {
1338
+ logger.error("Error saving chat session", error, {
1339
+ conversationId,
1340
+ userId
1341
+ });
1342
+ }
1343
+ }
1344
+ /**
1345
+ * Update an existing chat session
1346
+ */
1347
+ async function updateExistingSession(payload, session, newUserMessage, newAssistantMessage, spending, collectionName) {
1348
+ const existingMessages = session.messages || [];
1349
+ const existingSpending = session.spending || [];
1350
+ const messages = [
1351
+ ...existingMessages,
1352
+ newUserMessage,
1353
+ newAssistantMessage
1354
+ ];
1355
+ const allSpending = [...existingSpending, ...spending];
1356
+ const totalTokens = (session.total_tokens || 0) + spending.reduce((sum, e) => sum + e.tokens.total, 0);
1357
+ const totalCost = (session.total_cost || 0) + spending.reduce((sum, e) => sum + (e.cost_usd || 0), 0);
1358
+ await payload.update({
1359
+ collection: collectionName,
1360
+ id: session.id,
1361
+ data: {
1362
+ messages,
1363
+ spending: allSpending,
1364
+ total_tokens: totalTokens,
1365
+ total_cost: totalCost,
1366
+ last_activity: (/* @__PURE__ */ new Date()).toISOString(),
1367
+ status: "active"
1368
+ }
1369
+ });
1370
+ logger.info("Chat session updated successfully", {
1371
+ sessionId: session.id,
1372
+ conversationId: session.conversation_id,
1373
+ totalTokens,
1374
+ totalCost
1375
+ });
1376
+ }
1377
+ /**
1378
+ * Create a new chat session
1379
+ */
1380
+ async function createNewSession(payload, userId, conversationId, newUserMessage, newAssistantMessage, spending, collectionName) {
1381
+ const totalTokens = spending.reduce((sum, e) => sum + e.tokens.total, 0);
1382
+ const totalCost = spending.reduce((sum, e) => sum + (e.cost_usd || 0), 0);
1383
+ await payload.create({
1384
+ collection: collectionName,
1385
+ data: {
1386
+ user: userId,
1387
+ conversation_id: conversationId,
1388
+ status: "active",
1389
+ messages: [newUserMessage, newAssistantMessage],
1390
+ spending,
1391
+ total_tokens: totalTokens,
1392
+ total_cost: totalCost,
1393
+ last_activity: (/* @__PURE__ */ new Date()).toISOString()
1394
+ }
1395
+ });
1396
+ logger.info("New chat session created successfully", {
1397
+ conversationId,
1398
+ userId,
1399
+ totalTokens,
1400
+ totalCost
1401
+ });
1402
+ }
1403
+
1404
+ //#endregion
1405
+ //#region src/features/rag/api/types.ts
1406
+ /**
1407
+ * Helper to create a JSON response
1408
+ */
1409
+ function jsonResponse(data, init) {
1410
+ return new Response(JSON.stringify(data), {
1411
+ ...init,
1412
+ headers: {
1413
+ "Content-Type": "application/json",
1414
+ ...init?.headers
1415
+ }
1416
+ });
1417
+ }
1418
+
1419
+ //#endregion
1420
+ //#region src/features/rag/api/chat/handlers/embedding-handler.ts
1421
+ /**
1422
+ * Generates embedding and tracks usage
1423
+ */
1424
+ async function generateEmbeddingWithTracking(userMessage, config, spendingEntries) {
1425
+ logger.debug("Generating embeddings for semantic search");
1426
+ const embeddingConfig = config.embeddingConfig;
1427
+ if (!embeddingConfig) throw new Error("Embedding configuration missing");
1428
+ let provider;
1429
+ const providerType = embeddingConfig.type;
1430
+ const apiKey = embeddingConfig.apiKey;
1431
+ const model = embeddingConfig.model;
1432
+ const dimensions = embeddingConfig.dimensions;
1433
+ const serviceLogger = new Logger({
1434
+ enabled: true,
1435
+ prefix: "[rag-embedding]"
1436
+ });
1437
+ if (providerType === "gemini") provider = new GeminiEmbeddingProvider({
1438
+ type: "gemini",
1439
+ apiKey,
1440
+ model,
1441
+ dimensions
1442
+ }, serviceLogger);
1443
+ else provider = new OpenAIEmbeddingProvider({
1444
+ type: "openai",
1445
+ apiKey,
1446
+ model,
1447
+ dimensions
1448
+ }, serviceLogger);
1449
+ new EmbeddingServiceImpl(provider, serviceLogger, embeddingConfig);
1450
+ const resultWithUsage = await provider.generateEmbedding(userMessage);
1451
+ if (!resultWithUsage) throw new Error("Failed to generate embedding");
1452
+ const modelUsed = model || DEFAULT_EMBEDDING_MODEL;
1453
+ if (config.createEmbeddingSpending) {
1454
+ const embeddingSpending = config.createEmbeddingSpending(modelUsed, resultWithUsage.usage.totalTokens);
1455
+ spendingEntries.push(embeddingSpending);
1456
+ logger.info("Embedding generated successfully", {
1457
+ model: modelUsed,
1458
+ totalTokens: resultWithUsage.usage.totalTokens,
1459
+ costUsd: embeddingSpending.cost_usd
1460
+ });
1461
+ }
1462
+ return resultWithUsage.embedding;
1463
+ }
1464
+
1465
+ //#endregion
1466
+ //#region src/features/rag/api/chat/handlers/session-handler.ts
1467
+ /**
1468
+ * Saves chat session if function is provided
1469
+ */
1470
+ async function saveChatSessionIfNeeded(config, payload, userId, conversationId, userMessage, assistantMessage, sources, spendingEntries) {
1471
+ if (!conversationId || !config.saveChatSession) return;
1472
+ await config.saveChatSession(payload, userId, conversationId, userMessage, assistantMessage, sources, spendingEntries, config.collectionName);
1473
+ logger.info("Chat session saved to PayloadCMS", { conversationId });
1474
+ }
1475
+
1476
+ //#endregion
1477
+ //#region src/features/rag/api/chat/handlers/token-limit-handler.ts
1478
+ /**
1479
+ * Checks token limits before processing request
1480
+ */
1481
+ async function checkTokenLimitsIfNeeded(config, payload, userId, userEmail, userMessage) {
1482
+ if (!config.estimateTokensFromText || !config.checkTokenLimit) return null;
1483
+ const estimatedTotalTokens = config.estimateTokensFromText(userMessage) + config.estimateTokensFromText(userMessage) * 10;
1484
+ const limitCheck = await config.checkTokenLimit(payload, userId, estimatedTotalTokens);
1485
+ if (!limitCheck.allowed) {
1486
+ logger.warn("Token limit exceeded for user", {
1487
+ userId,
1488
+ limit: limitCheck.limit,
1489
+ used: limitCheck.used,
1490
+ remaining: limitCheck.remaining
1491
+ });
1492
+ return jsonResponse({
1493
+ error: "Has alcanzado tu límite diario de tokens.",
1494
+ limit_info: {
1495
+ limit: limitCheck.limit,
1496
+ used: limitCheck.used,
1497
+ remaining: limitCheck.remaining,
1498
+ reset_at: limitCheck.reset_at
1499
+ }
1500
+ }, { status: 429 });
1501
+ }
1502
+ logger.info("Chat request started with token limit check passed", {
1503
+ userId,
1504
+ userEmail,
1505
+ limit: limitCheck.limit,
1506
+ used: limitCheck.used,
1507
+ remaining: limitCheck.remaining
1508
+ });
1509
+ return null;
1510
+ }
1511
+
1512
+ //#endregion
1513
+ //#region src/features/rag/api/chat/handlers/usage-stats-handler.ts
1514
+ /**
1515
+ * Calculates total usage from spending entries
1516
+ */
1517
+ function calculateTotalUsage(spendingEntries) {
1518
+ const totalTokensUsed = spendingEntries.reduce((sum, entry) => sum + entry.tokens.total, 0);
1519
+ const totalCostUSD = spendingEntries.reduce((sum, entry) => sum + (entry.cost_usd || 0), 0);
1520
+ logger.info("Total token usage calculated", {
1521
+ totalTokens: totalTokensUsed,
1522
+ totalCostUsd: totalCostUSD
1523
+ });
1524
+ return {
1525
+ totalTokens: totalTokensUsed,
1526
+ totalCostUSD
1527
+ };
1528
+ }
1529
+ /**
1530
+ * Sends usage statistics event to client
1531
+ */
1532
+ async function sendUsageStatsIfNeeded(config, payload, userId, totalTokens, totalCostUSD, sendEvent) {
1533
+ if (!config.getUserUsageStats) return;
1534
+ const usageStats = await config.getUserUsageStats(payload, userId);
1535
+ sendEvent({
1536
+ type: "usage",
1537
+ data: {
1538
+ tokens_used: totalTokens,
1539
+ cost_usd: totalCostUSD,
1540
+ daily_limit: usageStats.limit,
1541
+ daily_used: usageStats.used,
1542
+ daily_remaining: usageStats.remaining,
1543
+ reset_at: usageStats.reset_at
1544
+ }
1545
+ });
1546
+ }
1547
+
1548
+ //#endregion
1549
+ //#region src/features/rag/api/chat/validators/request-validator.ts
1550
+ /**
1551
+ * Validates chat request and extracts required data
1552
+ */
1553
+ async function validateChatRequest(request, config) {
1554
+ if (!await config.checkPermissions(request)) return {
1555
+ success: false,
1556
+ error: jsonResponse({ error: "No tienes permisos para acceder a esta sesión." }, { status: 403 })
1557
+ };
1558
+ if (!request.url || !request.user) return {
1559
+ success: false,
1560
+ error: jsonResponse({ error: "URL not found" }, { status: 400 })
1561
+ };
1562
+ const { id: userId, email } = request.user;
1563
+ const userEmail = email || "";
1564
+ const payload = await config.getPayload();
1565
+ const body = await request.json?.();
1566
+ if (!body) return {
1567
+ success: false,
1568
+ error: jsonResponse({ error: "Body not found" }, { status: 400 })
1569
+ };
1570
+ if (!body.message || typeof body.message !== "string" || body.message.trim() === "") return {
1571
+ success: false,
1572
+ error: jsonResponse({ error: "Se requiere un mensaje." }, { status: 400 })
1573
+ };
1574
+ return {
1575
+ success: true,
1576
+ userId,
1577
+ userEmail,
1578
+ payload,
1579
+ userMessage: body.message.trim(),
1580
+ body
1581
+ };
1582
+ }
1583
+
1584
+ //#endregion
1585
+ //#region src/features/rag/api/chat/route.ts
1586
+ /**
1587
+ * Create a parameterizable POST handler for chat endpoint
1588
+ */
1589
+ function createChatPOSTHandler(config) {
1590
+ return async function POST(request) {
1591
+ try {
1592
+ const validated = await validateChatRequest(request, config);
1593
+ if (!validated.success) return validated.error;
1594
+ const { userId, userEmail, payload, userMessage, body } = validated;
1595
+ let searchConfig;
1596
+ const agentSlug = body.agentSlug;
1597
+ if (agentSlug && config.rag?.agents) {
1598
+ const agent = config.rag.agents.find((a) => a.slug === agentSlug);
1599
+ if (!agent) return new Response(JSON.stringify({ error: `Agent not found: ${agentSlug}` }), { status: 404 });
1600
+ searchConfig = {
1601
+ modelId: agent.slug,
1602
+ searchCollections: agent.searchCollections,
1603
+ kResults: agent.kResults,
1604
+ advancedConfig: config.rag.advanced
1605
+ };
1606
+ } else if (config.rag?.agents && config.rag.agents.length > 0) {
1607
+ const agent = config.rag.agents[0];
1608
+ if (!agent) throw new Error("Default agent not found");
1609
+ searchConfig = {
1610
+ modelId: agent.slug,
1611
+ searchCollections: agent.searchCollections,
1612
+ kResults: agent.kResults,
1613
+ advancedConfig: config.rag.advanced
1614
+ };
1615
+ } else return new Response(JSON.stringify({ error: "No RAG configuration available" }), { status: 500 });
1616
+ const tokenLimitError = await checkTokenLimitsIfNeeded(config, payload, userId, userEmail, userMessage);
1617
+ if (tokenLimitError) return tokenLimitError;
1618
+ logger.info("Processing chat message", {
1619
+ userId,
1620
+ chatId: body.chatId || "new",
1621
+ agentSlug: agentSlug || "default",
1622
+ modelId: searchConfig.modelId,
1623
+ isFollowUp: !!body.chatId,
1624
+ hasSelectedDocuments: !!body.selectedDocuments,
1625
+ messageLength: userMessage.length
1626
+ });
1627
+ const encoder = new TextEncoder();
1628
+ const stream = new ReadableStream({ async start(controller) {
1629
+ const spendingEntries = [];
1630
+ let fullAssistantMessage = "";
1631
+ let conversationIdCapture = null;
1632
+ let sourcesCapture = [];
1633
+ try {
1634
+ const sendEvent = (event) => sendSSEEvent(controller, encoder, event);
1635
+ const queryEmbedding = await generateEmbeddingWithTracking(userMessage, config, spendingEntries);
1636
+ const searchResult = await executeRAGSearch(config.typesense, searchConfig, {
1637
+ userMessage,
1638
+ queryEmbedding,
1639
+ chatId: body.chatId,
1640
+ selectedDocuments: body.selectedDocuments
1641
+ });
1642
+ const streamResult = searchResult.isStreaming && searchResult.response.body ? await config.handleStreamingResponse(searchResult.response, controller, encoder) : await config.handleNonStreamingResponse(await searchResult.response.json(), controller, encoder);
1643
+ fullAssistantMessage = streamResult.fullAssistantMessage;
1644
+ conversationIdCapture = streamResult.conversationId;
1645
+ sourcesCapture = streamResult.sources;
1646
+ spendingEntries.push(streamResult.llmSpending);
1647
+ const { totalTokens: totalTokensUsed, totalCostUSD } = calculateTotalUsage(spendingEntries);
1648
+ await sendUsageStatsIfNeeded(config, payload, userId, totalTokensUsed, totalCostUSD, sendEvent);
1649
+ await saveChatSessionIfNeeded(config, payload, userId, conversationIdCapture, userMessage, fullAssistantMessage, sourcesCapture, spendingEntries);
1650
+ logger.info("Chat request completed successfully", {
1651
+ userId,
1652
+ conversationId: conversationIdCapture,
1653
+ totalTokens: totalTokensUsed
1654
+ });
1655
+ controller.close();
1656
+ } catch (error) {
1657
+ logger.error("Fatal error in chat stream", error, {
1658
+ userId,
1659
+ chatId: body.chatId
1660
+ });
1661
+ sendSSEEvent(controller, encoder, {
1662
+ type: "error",
1663
+ data: { error: error instanceof Error ? error.message : "Error desconocido" }
1664
+ });
1665
+ controller.close();
1666
+ }
1667
+ } });
1668
+ return new Response(stream, { headers: {
1669
+ "Content-Type": "text/event-stream",
1670
+ "Cache-Control": "no-cache",
1671
+ Connection: "keep-alive"
1672
+ } });
1673
+ } catch (error) {
1674
+ logger.error("Error in chat API endpoint", error, { userId: request.user?.id });
1675
+ return new Response(JSON.stringify({
1676
+ error: "Error al procesar tu mensaje. Por favor, inténtalo de nuevo.",
1677
+ details: error instanceof Error ? error.message : "Error desconocido"
1678
+ }), {
1679
+ status: 500,
1680
+ headers: { "Content-Type": "application/json" }
1681
+ });
1682
+ }
1683
+ };
1684
+ }
1685
+
1686
+ //#endregion
1687
+ //#region src/features/rag/stream-handlers/utils.ts
1688
+ /**
1689
+ * Stream handler utilities
1690
+ *
1691
+ * Shared utility functions for stream handlers
1692
+ */
1693
+ /**
1694
+ * Resolve document type from collection name
1695
+ */
1696
+ function resolveDocumentType$1(collectionName) {
1697
+ if (collectionName === "article_web_chunk") return "article";
1698
+ if (collectionName === "book_chunk") return "book";
1699
+ return "document";
1700
+ }
1701
+ /**
1702
+ * Estimate tokens from text (simple word-based estimation)
1703
+ * More accurate implementations can be provided via callbacks
1704
+ */
1705
+ function estimateTokensFromText(text) {
1706
+ const words = text.trim().split(/\s+/).length;
1707
+ return Math.ceil(words * 1.3);
1708
+ }
1709
+
1710
+ //#endregion
1711
+ //#region src/features/rag/stream-handlers/streaming-handler.ts
1712
+ /**
1713
+ * Streaming response handler
1714
+ *
1715
+ * Handles streaming responses from Typesense conversational search
1716
+ */
1717
+ /**
1718
+ * Default implementation for handling streaming responses
1719
+ */
1720
+ async function defaultHandleStreamingResponse(response, controller, encoder) {
1721
+ logger.debug("Starting streaming response handling");
1722
+ if (!response.body) throw new Error("Response body is null");
1723
+ const reader = response.body.getReader();
1724
+ const decoder = new TextDecoder();
1725
+ let buffer = "";
1726
+ let sources = [];
1727
+ let hasCollectedSources = false;
1728
+ let conversationId = null;
1729
+ let contextText = "";
1730
+ let fullAssistantMessage = "";
1731
+ try {
1732
+ while (true) {
1733
+ const { done, value } = await reader.read();
1734
+ if (done) {
1735
+ logger.debug("Streaming response completed");
1736
+ break;
1737
+ }
1738
+ buffer += decoder.decode(value, { stream: true });
1739
+ const lines = buffer.split("\n");
1740
+ buffer = lines.pop() || "";
1741
+ for (const line of lines) {
1742
+ const event = parseConversationEvent(line);
1743
+ if (!event) continue;
1744
+ if (event.raw === "[DONE]") {
1745
+ sendSSEEvent(controller, encoder, {
1746
+ type: "done",
1747
+ data: ""
1748
+ });
1749
+ continue;
1750
+ }
1751
+ if (!conversationId && event.conversationId) {
1752
+ conversationId = event.conversationId;
1753
+ logger.debug("Conversation ID captured", { conversationId });
1754
+ sendSSEEvent(controller, encoder, {
1755
+ type: "conversation_id",
1756
+ data: conversationId
1757
+ });
1758
+ }
1759
+ if (!hasCollectedSources && event.results) {
1760
+ sources = extractSourcesFromResults(event.results, resolveDocumentType$1);
1761
+ contextText = buildContextText(event.results);
1762
+ if (sources.length > 0) sendSSEEvent(controller, encoder, {
1763
+ type: "sources",
1764
+ data: sources
1765
+ });
1766
+ hasCollectedSources = true;
1767
+ }
1768
+ if (event.message) {
1769
+ fullAssistantMessage += event.message;
1770
+ sendSSEEvent(controller, encoder, {
1771
+ type: "token",
1772
+ data: event.message
1773
+ });
1774
+ }
1775
+ }
1776
+ }
1777
+ } finally {
1778
+ reader.releaseLock();
1779
+ }
1780
+ const llmInputTokens = estimateTokensFromText(contextText);
1781
+ const llmOutputTokens = estimateTokensFromText(fullAssistantMessage);
1782
+ const llmSpending = {
1783
+ service: "openai_llm",
1784
+ model: "gpt-4o-mini",
1785
+ tokens: {
1786
+ input: llmInputTokens,
1787
+ output: llmOutputTokens,
1788
+ total: llmInputTokens + llmOutputTokens
1789
+ },
1790
+ cost_usd: llmInputTokens * 15e-8 + llmOutputTokens * 6e-7,
1791
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
1792
+ };
1793
+ logger.info("LLM cost calculated", {
1794
+ inputTokens: llmInputTokens,
1795
+ outputTokens: llmOutputTokens,
1796
+ totalTokens: llmSpending.tokens.total,
1797
+ costUsd: llmSpending.cost_usd
1798
+ });
1799
+ return {
1800
+ fullAssistantMessage,
1801
+ conversationId,
1802
+ sources,
1803
+ llmSpending
1804
+ };
1805
+ }
1806
+
1807
+ //#endregion
1808
+ //#region src/features/rag/stream-handlers/non-streaming-handler.ts
1809
+ /**
1810
+ * Non-streaming response handler
1811
+ *
1812
+ * Handles non-streaming (regular JSON) responses from Typesense conversational search
1813
+ */
1814
+ /**
1815
+ * Default implementation for handling non-streaming responses
1816
+ */
1817
+ async function defaultHandleNonStreamingResponse(data, controller, encoder) {
1818
+ logger.debug("Using non-streaming fallback for response handling");
1819
+ const typedData = data;
1820
+ let conversationId = null;
1821
+ if (typedData.conversation?.conversation_id) conversationId = typedData.conversation.conversation_id;
1822
+ else if (typedData.conversation_id) conversationId = typedData.conversation_id;
1823
+ let fullAnswer = "";
1824
+ if (typedData.conversation?.answer) fullAnswer = typedData.conversation.answer;
1825
+ else if (typedData.response || typedData.message) fullAnswer = typedData.response || typedData.message || "";
1826
+ const sources = extractSourcesFromResults(typedData.results || [], resolveDocumentType$1);
1827
+ const contextText = buildContextText(typedData.results || []);
1828
+ if (fullAnswer) {
1829
+ const words = fullAnswer.split(" ");
1830
+ for (let i = 0; i < words.length; i++) {
1831
+ const token = i === 0 ? words[i] : " " + words[i];
1832
+ if (token) sendSSEEvent(controller, encoder, {
1833
+ type: "token",
1834
+ data: token
1835
+ });
1836
+ }
1837
+ }
1838
+ if (conversationId) sendSSEEvent(controller, encoder, {
1839
+ type: "conversation_id",
1840
+ data: conversationId
1841
+ });
1842
+ if (sources.length > 0) sendSSEEvent(controller, encoder, {
1843
+ type: "sources",
1844
+ data: sources
1845
+ });
1846
+ sendSSEEvent(controller, encoder, {
1847
+ type: "done",
1848
+ data: ""
1849
+ });
1850
+ const llmInputTokens = estimateTokensFromText(contextText);
1851
+ const llmOutputTokens = estimateTokensFromText(fullAnswer);
1852
+ const llmSpending = {
1853
+ service: "openai_llm",
1854
+ model: "gpt-4o-mini",
1855
+ tokens: {
1856
+ input: llmInputTokens,
1857
+ output: llmOutputTokens,
1858
+ total: llmInputTokens + llmOutputTokens
1859
+ },
1860
+ cost_usd: llmInputTokens * 15e-8 + llmOutputTokens * 6e-7,
1861
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
1862
+ };
1863
+ return {
1864
+ fullAssistantMessage: fullAnswer,
1865
+ conversationId,
1866
+ sources,
1867
+ llmSpending
1868
+ };
1869
+ }
1870
+
1871
+ //#endregion
1872
+ //#region src/features/rag/api/chat/session/route.ts
1873
+ /**
1874
+ * Create a parameterizable GET handler for session endpoint
1875
+ *
1876
+ * Query params:
1877
+ * - ?active=true → Get the most recent active session
1878
+ * - ?conversationId=xxx → Get a specific session by conversation ID
1879
+ */
1880
+ function createSessionGETHandler(config) {
1881
+ return async function GET(request) {
1882
+ try {
1883
+ if (!await config.checkPermissions(request)) return jsonResponse({ error: "No tienes permisos para acceder a esta sesión." }, { status: 403 });
1884
+ const userId = request.user?.id;
1885
+ if (!request.url || !userId) return jsonResponse({ error: "URL not found" }, { status: 400 });
1886
+ const { searchParams } = new URL(request.url);
1887
+ const isActive = searchParams.get("active") === "true";
1888
+ const conversationId = searchParams.get("conversationId");
1889
+ const payload = await config.getPayload();
1890
+ if (isActive) {
1891
+ const session$1 = await getActiveSession(payload, userId, config.sessionConfig);
1892
+ if (!session$1) return jsonResponse({ error: "No hay sesión activa." }, { status: 404 });
1893
+ return jsonResponse(session$1);
1894
+ }
1895
+ if (!conversationId) return jsonResponse({ error: "Se requiere conversationId o active=true." }, { status: 400 });
1896
+ const session = await getSessionByConversationId(payload, userId, conversationId, config.sessionConfig);
1897
+ if (!session) return jsonResponse({ error: "Sesión de chat no encontrada." }, { status: 404 });
1898
+ return jsonResponse(session);
1899
+ } catch (error) {
1900
+ logger.error("Error retrieving chat session", error, { userId: request.user?.id });
1901
+ return jsonResponse({
1902
+ error: "Error al recuperar la sesión.",
1903
+ details: error instanceof Error ? error.message : "Error desconocido"
1904
+ }, { status: 500 });
1905
+ }
1906
+ };
1907
+ }
1908
+ /**
1909
+ * Create a parameterizable DELETE handler for session endpoint
1910
+ *
1911
+ * DELETE /api/chat/session?conversationId=xxx
1912
+ * Close a chat session
1913
+ */
1914
+ function createSessionDELETEHandler(config) {
1915
+ return async function DELETE(request) {
1916
+ try {
1917
+ if (!await config.checkPermissions(request)) return jsonResponse({ error: "No tienes permisos para acceder a esta sesión." }, { status: 403 });
1918
+ const userId = request.user?.id;
1919
+ if (!request.url || !userId) return jsonResponse({ error: "URL not found" }, { status: 400 });
1920
+ const { searchParams } = new URL(request.url);
1921
+ const conversationId = searchParams.get("conversationId");
1922
+ if (!conversationId) return jsonResponse({ error: "Se requiere un conversationId válido." }, { status: 400 });
1923
+ const payload = await config.getPayload();
1924
+ logger.info("Closing chat session", {
1925
+ conversationId,
1926
+ userId
1927
+ });
1928
+ const session = await closeSession(payload, userId, conversationId, config.sessionConfig);
1929
+ if (!session) return jsonResponse({ error: "Sesión de chat no encontrada o no tienes permisos." }, { status: 404 });
1930
+ logger.info("Chat session closed successfully", {
1931
+ conversationId,
1932
+ totalTokens: session.total_tokens,
1933
+ totalCost: session.total_cost
1934
+ });
1935
+ return jsonResponse({
1936
+ success: true,
1937
+ message: "Sesión cerrada correctamente",
1938
+ session: {
1939
+ conversation_id: conversationId,
1940
+ status: "closed",
1941
+ total_tokens: session.total_tokens,
1942
+ total_cost: session.total_cost
1943
+ }
1944
+ });
1945
+ } catch (error) {
1946
+ logger.error("Error closing chat session", error, {
1947
+ conversationId: request.url ? new URL(request.url).searchParams.get("conversationId") : void 0,
1948
+ userId: request.user?.id
1949
+ });
1950
+ return jsonResponse({
1951
+ error: "Error al cerrar la sesión. Por favor, inténtalo de nuevo.",
1952
+ details: error instanceof Error ? error.message : "Error desconocido"
1953
+ }, { status: 500 });
1954
+ }
1955
+ };
1956
+ }
1957
+
1958
+ //#endregion
1959
+ //#region src/features/rag/api/chunks/[id]/route.ts
1960
+ /**
1961
+ * Create a parameterizable GET handler for chunks endpoint
1962
+ *
1963
+ * GET /api/chat/chunks/[id]?collection=article_web_chunk
1964
+ * Fetch the full chunk text from Typesense by document ID
1965
+ */
1966
+ function createChunksGETHandler(config) {
1967
+ return async function GET(request) {
1968
+ try {
1969
+ if (!await config.checkPermissions(request)) return jsonResponse({ error: "No tienes permisos para acceder a este chunk." }, { status: 403 });
1970
+ if (!request.url || !request.user) return jsonResponse({ error: "URL not found" }, { status: 400 });
1971
+ const id = request.routeParams?.id;
1972
+ const collectionName = new URL(request.url).searchParams.get("collection");
1973
+ if (!id) return jsonResponse({ error: "Se requiere el ID del chunk" }, { status: 400 });
1974
+ if (!collectionName) return jsonResponse({
1975
+ error: "Se requiere el parámetro collection",
1976
+ collections: config.validCollections
1977
+ }, { status: 400 });
1978
+ return jsonResponse(await fetchChunkById(createTypesenseClient(config.typesense), {
1979
+ chunkId: id,
1980
+ collectionName,
1981
+ validCollections: config.validCollections
1982
+ }));
1983
+ } catch (error) {
1984
+ logger.error("Error fetching chunk", error, {
1985
+ chunkId: request.routeParams?.id,
1986
+ collection: request.url ? new URL(request.url).searchParams.get("collection") : void 0
1987
+ });
1988
+ if (error instanceof Error) {
1989
+ if (error.message.includes("Invalid collection")) return jsonResponse({
1990
+ error: error.message,
1991
+ collections: config.validCollections
1992
+ }, { status: 400 });
1993
+ if (error.message.includes("not found")) return jsonResponse({ error: "Chunk no encontrado" }, { status: 404 });
1994
+ }
1995
+ return jsonResponse({
1996
+ error: "Error al obtener el chunk",
1997
+ details: error instanceof Error ? error.message : "Error desconocido"
1998
+ }, { status: 500 });
1999
+ }
2000
+ };
2001
+ }
2002
+
2003
+ //#endregion
2004
+ //#region src/features/rag/api/chat/agents/route.ts
2005
+ function createAgentsGETHandler(config) {
2006
+ return async function GET() {
2007
+ try {
2008
+ return jsonResponse({ agents: (config.ragConfig?.agents || []).map((agent) => ({
2009
+ slug: agent.slug,
2010
+ name: agent.name || agent.slug
2011
+ })) }, { status: 200 });
2012
+ } catch (error) {
2013
+ return jsonResponse({ error: "Internal Server Error" }, { status: 500 });
2014
+ }
2015
+ };
2016
+ }
2017
+
2018
+ //#endregion
2019
+ //#region src/features/rag/create-rag-payload-handlers.ts
2020
+ /**
2021
+ * Creates Payload handlers for RAG endpoints
2022
+ */
2023
+ function createRAGPayloadHandlers(pluginOptions) {
2024
+ const endpoints = [];
2025
+ if (!pluginOptions.features.rag?.enabled || !pluginOptions.features.rag.callbacks) return endpoints;
2026
+ const ragConfig = pluginOptions.features.rag;
2027
+ const callbacksConfig = ragConfig.callbacks;
2028
+ const agentCollections = ragConfig.agents?.flatMap((agent) => agent.searchCollections) || [];
2029
+ const validCollections = Array.from(new Set(agentCollections));
2030
+ endpoints.push({
2031
+ path: "/chat",
2032
+ method: "post",
2033
+ handler: createChatPOSTHandler({
2034
+ collectionName: "chat-sessions",
2035
+ checkPermissions: callbacksConfig.checkPermissions,
2036
+ typesense: pluginOptions.typesense,
2037
+ rag: ragConfig,
2038
+ getPayload: callbacksConfig.getPayload,
2039
+ checkTokenLimit: callbacksConfig.checkTokenLimit,
2040
+ getUserUsageStats: callbacksConfig.getUserUsageStats,
2041
+ saveChatSession: callbacksConfig.saveChatSession,
2042
+ handleStreamingResponse: defaultHandleStreamingResponse,
2043
+ handleNonStreamingResponse: defaultHandleNonStreamingResponse,
2044
+ createEmbeddingSpending: callbacksConfig.createEmbeddingSpending,
2045
+ estimateTokensFromText: callbacksConfig.estimateTokensFromText,
2046
+ embeddingConfig: pluginOptions.features.embedding
2047
+ })
2048
+ });
2049
+ endpoints.push({
2050
+ path: "/chat/session",
2051
+ method: "get",
2052
+ handler: createSessionGETHandler({
2053
+ getPayload: callbacksConfig.getPayload,
2054
+ checkPermissions: callbacksConfig.checkPermissions
2055
+ })
2056
+ });
2057
+ endpoints.push({
2058
+ path: "/chat/session",
2059
+ method: "delete",
2060
+ handler: createSessionDELETEHandler({
2061
+ getPayload: callbacksConfig.getPayload,
2062
+ checkPermissions: callbacksConfig.checkPermissions
2063
+ })
2064
+ });
2065
+ endpoints.push({
2066
+ path: "/chat/chunks/:id",
2067
+ method: "get",
2068
+ handler: createChunksGETHandler({
2069
+ typesense: pluginOptions.typesense,
2070
+ checkPermissions: callbacksConfig.checkPermissions,
2071
+ validCollections
2072
+ })
2073
+ });
2074
+ endpoints.push({
2075
+ path: "/chat/agents",
2076
+ method: "get",
2077
+ handler: createAgentsGETHandler({
2078
+ ragConfig,
2079
+ checkPermissions: callbacksConfig.checkPermissions
2080
+ })
2081
+ });
2082
+ return endpoints;
2083
+ }
2084
+
2085
+ //#endregion
2086
+ //#region src/features/search/handlers/collections-handler.ts
2087
+ /**
2088
+ * Creates a handler for listing available search collections
2089
+ */
2090
+ const createCollectionsHandler = (pluginOptions) => {
2091
+ return () => {
2092
+ try {
2093
+ const collections = [];
2094
+ for (const [slug, tableConfigs] of Object.entries(pluginOptions.collections || {})) if (Array.isArray(tableConfigs)) {
2095
+ const firstEnabledConfig = tableConfigs.find((config) => config.enabled);
2096
+ if (firstEnabledConfig) {
2097
+ let fields = [];
2098
+ if (firstEnabledConfig.mode === "chunked") fields = [
2099
+ ...firstEnabledConfig.fields || [],
2100
+ {
2101
+ name: "chunk_text",
2102
+ index: true
2103
+ },
2104
+ {
2105
+ name: "headers",
2106
+ facet: true
2107
+ }
2108
+ ];
2109
+ else fields = firstEnabledConfig.fields;
2110
+ const facetFields = fields.filter((f) => f.facet).map((f) => f.name);
2111
+ const searchFields = fields.filter((f) => f.index !== false).map((f) => f.name);
2112
+ collections.push({
2113
+ slug,
2114
+ displayName: firstEnabledConfig.displayName || slug.charAt(0).toUpperCase() + slug.slice(1),
2115
+ facetFields,
2116
+ searchFields
2117
+ });
2118
+ }
2119
+ }
2120
+ return Response.json({
2121
+ categorized: false,
2122
+ collections
2123
+ });
2124
+ } catch (_error) {
2125
+ return Response.json({ error: "Failed to get collections" }, { status: 500 });
2126
+ }
2127
+ };
2128
+ };
2129
+
2130
+ //#endregion
2131
+ //#region src/core/utils/naming.ts
2132
+ /**
2133
+ * Generates the Typesense collection name based on the configuration.
2134
+ *
2135
+ * Priority:
2136
+ * 1. Explicit `tableName` if provided.
2137
+ * 2. `collectionSlug` (fallback).
2138
+ *
2139
+ * @param collectionSlug The slug of the Payload collection
2140
+ * @param tableConfig The configuration for the specific table
2141
+ * @returns The generated Typesense collection name
2142
+ */
2143
+ const getTypesenseCollectionName = (collectionSlug, tableConfig) => {
2144
+ return tableConfig.tableName ?? collectionSlug;
2145
+ };
2146
+
2147
+ //#endregion
2148
+ //#region src/shared/cache/cache.ts
2149
+ var SearchCache = class {
2150
+ cache = /* @__PURE__ */ new Map();
2151
+ defaultTTL;
2152
+ maxSize;
2153
+ constructor(options = {}) {
2154
+ this.defaultTTL = options.ttl || 300 * 1e3;
2155
+ this.maxSize = options.maxSize || 1e3;
2156
+ }
2157
+ /**
2158
+ * Generate cache key from search parameters
2159
+ */
2160
+ generateKey(query, collection, params) {
2161
+ const baseKey = `${collection || "universal"}:${query}`;
2162
+ if (params) return `${baseKey}:${Object.keys(params).sort().map((key) => `${key}=${params[key]}`).join("&")}`;
2163
+ return baseKey;
2164
+ }
2165
+ /**
2166
+ * Clear expired entries
2167
+ */
2168
+ cleanup() {
2169
+ const now = Date.now();
2170
+ for (const [key, entry] of this.cache.entries()) if (now - entry.timestamp > entry.ttl) this.cache.delete(key);
2171
+ }
2172
+ /**
2173
+ * Clear cache entries matching pattern
2174
+ */
2175
+ clear(pattern) {
2176
+ if (!pattern) {
2177
+ this.cache.clear();
2178
+ return;
2179
+ }
2180
+ for (const key of this.cache.keys()) if (key.includes(pattern)) this.cache.delete(key);
2181
+ }
2182
+ /**
2183
+ * Get cached search result
2184
+ */
2185
+ get(query, collection, params) {
2186
+ const key = this.generateKey(query, collection || "", params);
2187
+ const entry = this.cache.get(key);
2188
+ if (!entry) return null;
2189
+ if (Date.now() - entry.timestamp > entry.ttl) {
2190
+ this.cache.delete(key);
2191
+ return null;
2192
+ }
2193
+ return entry.data;
2194
+ }
2195
+ /**
2196
+ * Get cache statistics
2197
+ */
2198
+ getStats() {
2199
+ return {
2200
+ maxSize: this.maxSize,
2201
+ size: this.cache.size
2202
+ };
2203
+ }
2204
+ /**
2205
+ * Check if cache has valid entry
2206
+ */
2207
+ has(query, collection, params) {
2208
+ return this.get(query, collection, params) !== null;
2209
+ }
2210
+ /**
2211
+ * Set cached search result
2212
+ */
2213
+ set(query, data, collection, params, ttl) {
2214
+ const key = this.generateKey(query, collection || "", params);
2215
+ if (this.cache.size >= this.maxSize) {
2216
+ const oldestKey = this.cache.keys().next().value;
2217
+ if (oldestKey) this.cache.delete(oldestKey);
2218
+ }
2219
+ this.cache.set(key, {
2220
+ data,
2221
+ timestamp: Date.now(),
2222
+ ttl: ttl || this.defaultTTL
2223
+ });
2224
+ }
2225
+ };
2226
+ const searchCache = new SearchCache({
2227
+ maxSize: 1e3,
2228
+ ttl: 300 * 1e3
2229
+ });
2230
+ setInterval(() => {
2231
+ searchCache.cleanup();
2232
+ }, 600 * 1e3);
2233
+
2234
+ //#endregion
2235
+ //#region src/features/search/constants.ts
2236
+ /**
2237
+ * Default values for vector search parameters
2238
+ *
2239
+ * K is set high because:
2240
+ * - Documents are split into chunks (avg 5-10 chunks per doc)
2241
+ * - To get 20 unique documents, we need K = 20 docs × 7 chunks/doc = 140
2242
+ * - Higher K = better coverage but slightly slower (still fast with good indexing)
2243
+ */
2244
+ const DEFAULT_K = 150;
2245
+ const DEFAULT_PAGE = 1;
2246
+ const DEFAULT_PER_PAGE = 20;
2247
+ const DEFAULT_ALPHA = .7;
2248
+ /**
2249
+ * Default search field names when not specified
2250
+ */
2251
+ const DEFAULT_SEARCH_FIELDS = ["title", "content"];
2252
+ /**
2253
+ * Default snippet threshold for search results
2254
+ */
2255
+ const DEFAULT_SNIPPET_THRESHOLD = 30;
2256
+ /**
2257
+ * Default typo tokens threshold
2258
+ */
2259
+ const DEFAULT_TYPO_TOKENS_THRESHOLD = 1;
2260
+ /**
2261
+ * Default number of typos allowed
2262
+ */
2263
+ const DEFAULT_NUM_TYPOS = 0;
2264
+
2265
+ //#endregion
2266
+ //#region src/features/search/results/process-traditional-results.ts
2267
+ /**
2268
+ * Processes traditional search results from a single collection
2269
+ */
2270
+ const processSingleCollectionTraditionalResults = (results, collectionName, config) => {
2271
+ return {
2272
+ collection: collectionName,
2273
+ displayName: config?.displayName || collectionName,
2274
+ icon: "📄",
2275
+ found: results.found,
2276
+ hits: results.hits?.map((hit) => ({
2277
+ ...hit,
2278
+ collection: collectionName,
2279
+ displayName: config?.displayName || collectionName,
2280
+ icon: "📄",
2281
+ document: hit.document || {}
2282
+ })) || []
2283
+ };
2284
+ };
2285
+ /**
2286
+ * Combines traditional search results from multiple collections
2287
+ */
2288
+ const combineTraditionalResults = (results, options) => {
2289
+ const { page, per_page, query } = options;
2290
+ const combinedHits = results.flatMap((result) => result.hits || []);
2291
+ const totalFound = results.reduce((sum, result) => sum + (result.found || 0), 0);
2292
+ combinedHits.sort((a, b) => (b.text_match || 0) - (a.text_match || 0));
2293
+ return {
2294
+ collections: results.map((r) => ({
2295
+ collection: r.collection,
2296
+ displayName: r.displayName,
2297
+ error: r.error,
2298
+ found: r.found || 0,
2299
+ icon: r.icon
2300
+ })),
2301
+ found: totalFound,
2302
+ hits: combinedHits.slice(0, per_page),
2303
+ page,
2304
+ request_params: {
2305
+ per_page,
2306
+ query
2307
+ },
2308
+ search_cutoff: false,
2309
+ search_time_ms: 0
2310
+ };
2311
+ };
2312
+
2313
+ //#endregion
2314
+ //#region src/features/search/traditional/build-params.ts
2315
+ /**
2316
+ * Builds traditional search parameters for a single collection
2317
+ */
2318
+ const buildTraditionalSearchParams = (query, options) => {
2319
+ const { page, per_page, searchFields = DEFAULT_SEARCH_FIELDS, sort_by, exclude_fields } = options;
2320
+ return {
2321
+ highlight_full_fields: searchFields.join(","),
2322
+ num_typos: DEFAULT_NUM_TYPOS,
2323
+ page,
2324
+ per_page,
2325
+ q: query,
2326
+ query_by: searchFields.join(","),
2327
+ snippet_threshold: DEFAULT_SNIPPET_THRESHOLD,
2328
+ typo_tokens_threshold: DEFAULT_TYPO_TOKENS_THRESHOLD,
2329
+ exclude_fields,
2330
+ sort_by
2331
+ };
2332
+ };
2333
+
2334
+ //#endregion
2335
+ //#region src/features/search/traditional/search-collection.ts
2336
+ /**
2337
+ * Performs a traditional search on a single collection
2338
+ */
2339
+ const searchTraditionalCollection = async (typesenseClient, collectionName, config, options) => {
2340
+ try {
2341
+ const buildOptions = {
2342
+ page: options.page,
2343
+ per_page: options.per_page
2344
+ };
2345
+ if (options.searchFields) buildOptions.searchFields = options.searchFields;
2346
+ else if (config) {
2347
+ let fields = [];
2348
+ if (config.mode === "chunked") fields = [...config.fields || [], {
2349
+ name: "chunk_text",
2350
+ index: true,
2351
+ type: "string"
2352
+ }];
2353
+ else fields = config.fields;
2354
+ const searchFields = fields.filter((f) => f.index !== false && (f.type === "string" || f.type === "string[]")).map((f) => f.name);
2355
+ if (searchFields.length > 0) buildOptions.searchFields = searchFields;
2356
+ }
2357
+ if (options.sort_by) buildOptions.sort_by = options.sort_by;
2358
+ if (options.exclude_fields) buildOptions.exclude_fields = options.exclude_fields;
2359
+ const searchParameters = buildTraditionalSearchParams(options.query, buildOptions);
2360
+ if (!options.skipChunkFilter) try {
2361
+ if (((await typesenseClient.collections(collectionName).retrieve()).fields?.map((f) => f.name) || []).includes("is_chunk")) searchParameters.filter_by = "!is_chunk:true";
2362
+ } catch (schemaError) {}
2363
+ return processSingleCollectionTraditionalResults(await typesenseClient.collections(collectionName).documents().search(searchParameters), collectionName, config);
2364
+ } catch (error) {
2365
+ return {
2366
+ collection: collectionName,
2367
+ displayName: config?.displayName || collectionName,
2368
+ error: error instanceof Error ? error.message : "Unknown error",
2369
+ found: 0,
2370
+ hits: [],
2371
+ icon: "📄"
2372
+ };
2373
+ }
2374
+ };
2375
+
2376
+ //#endregion
2377
+ //#region src/features/search/handlers/executors/traditional-multi-collection-search.ts
2378
+ const performTraditionalMultiCollectionSearch = async (typesenseClient, enabledCollections, query, options) => {
2379
+ logger.info("Performing traditional multi-collection search", {
2380
+ query,
2381
+ collections: enabledCollections.map(([name]) => name)
2382
+ });
2383
+ const searchFieldsOverride = options.query_by ? options.query_by.split(",").map((f) => f.trim()) : void 0;
2384
+ const searchPromises = enabledCollections.map(async ([collectionName, config]) => {
2385
+ try {
2386
+ return await searchTraditionalCollection(typesenseClient, collectionName, config, {
2387
+ query,
2388
+ page: options.page,
2389
+ per_page: options.per_page,
2390
+ ...searchFieldsOverride ? { searchFields: searchFieldsOverride } : (() => {
2391
+ if (!config) return {};
2392
+ let fields = [];
2393
+ if (config.mode === "chunked") fields = [...config.fields || [], {
2394
+ name: "chunk_text",
2395
+ index: true,
2396
+ type: "string"
2397
+ }];
2398
+ else fields = config.fields;
2399
+ const searchFields = fields.filter((f) => f.index !== false && (f.type === "string" || f.type === "string[]")).map((f) => f.name);
2400
+ return searchFields.length > 0 ? { searchFields } : {};
2401
+ })(),
2402
+ ...options.sort_by && { sort_by: options.sort_by },
2403
+ ...options.exclude_fields && { exclude_fields: options.exclude_fields }
2404
+ });
2405
+ } catch (error) {
2406
+ logger.error("Error searching collection", error, {
2407
+ collection: collectionName,
2408
+ query
2409
+ });
2410
+ throw error;
2411
+ }
2412
+ });
2413
+ const fallbackResult = combineTraditionalResults(await Promise.all(searchPromises), {
2414
+ page: options.page,
2415
+ per_page: options.per_page,
2416
+ query
2417
+ });
2418
+ searchCache.set(query, fallbackResult, "universal", options);
2419
+ return fallbackResult;
2420
+ };
2421
+
2422
+ //#endregion
2423
+ //#region src/features/search/results/process-vector-results.ts
2424
+ /**
2425
+ * Processes and combines vector search results from multiple collections
2426
+ */
2427
+ const processVectorSearchResults = (multiSearchResults, enabledCollections, options) => {
2428
+ const { per_page = DEFAULT_PER_PAGE, page = DEFAULT_PAGE, k, query, vector } = options;
2429
+ const results = (multiSearchResults.results?.map((result, index) => {
2430
+ if (!enabledCollections[index]) return null;
2431
+ const [collectionName, config] = enabledCollections[index];
2432
+ return {
2433
+ collection: collectionName,
2434
+ displayName: config?.displayName || collectionName,
2435
+ icon: "📄",
2436
+ found: result.found || 0,
2437
+ error: result.error || void 0,
2438
+ hits: result.hits?.map((hit) => {
2439
+ const doc = hit.document || {};
2440
+ const hint = doc.chunk_text ? String(doc.chunk_text).substring(0, 300) + "..." : doc.description ? String(doc.description).substring(0, 300) + "..." : doc.hint;
2441
+ return {
2442
+ ...hit,
2443
+ collection: collectionName,
2444
+ displayName: config?.displayName || collectionName,
2445
+ icon: "📄",
2446
+ document: {
2447
+ ...doc,
2448
+ hint,
2449
+ ...doc.chunk_text ? { chunk_text: doc.chunk_text } : {}
2450
+ },
2451
+ vector_distance: hit.vector_distance,
2452
+ text_match: hit.text_match
2453
+ };
2454
+ }) || []
2455
+ };
2456
+ }) || []).filter((r) => r !== null);
2457
+ const combinedHits = results.flatMap((result) => result.hits);
2458
+ const totalFound = results.reduce((sum, result) => sum + result.found, 0);
2459
+ combinedHits.sort((a, b) => {
2460
+ return (a.vector_distance ?? Infinity) - (b.vector_distance ?? Infinity);
2461
+ });
2462
+ return {
2463
+ collections: results.map((r) => ({
2464
+ collection: r.collection,
2465
+ displayName: r.displayName,
2466
+ error: r.error,
2467
+ found: r.found || 0,
2468
+ icon: r.icon
2469
+ })),
2470
+ found: totalFound,
2471
+ hits: combinedHits.slice(0, per_page),
2472
+ page,
2473
+ request_params: {
2474
+ k,
2475
+ per_page,
2476
+ query: query || null,
2477
+ vector: vector ? "provided" : null
2478
+ },
2479
+ search_cutoff: false,
2480
+ search_time_ms: 0
2481
+ };
2482
+ };
2483
+
2484
+ //#endregion
2485
+ //#region src/features/search/vector/build-params.ts
2486
+ /**
2487
+ * Builds vector search parameters for a single collection
2488
+ */
2489
+ const buildVectorSearchParams = (searchVector, options) => {
2490
+ const { query, k = DEFAULT_K, hybrid = false, alpha = DEFAULT_ALPHA, page = DEFAULT_PAGE, per_page = DEFAULT_PER_PAGE, filter_by, sort_by, searchFields } = options;
2491
+ const searchParams = {
2492
+ q: "*",
2493
+ vector_query: `embedding:([${searchVector.join(",")}], k:${k})`,
2494
+ per_page,
2495
+ page,
2496
+ exclude_fields: "embedding"
2497
+ };
2498
+ if (hybrid && query) {
2499
+ searchParams.q = query;
2500
+ searchParams.query_by = searchFields?.join(",") || DEFAULT_SEARCH_FIELDS.join(",");
2501
+ searchParams.vector_query = `embedding:([${searchVector.join(",")}], k:${k}, alpha:${alpha})`;
2502
+ }
2503
+ if (filter_by) searchParams.filter_by = filter_by;
2504
+ if (sort_by) searchParams.sort_by = sort_by;
2505
+ return searchParams;
2506
+ };
2507
+
2508
+ //#endregion
2509
+ //#region src/features/search/vector/build-multi-collection-params.ts
2510
+ /**
2511
+ * Builds multi-collection vector search parameters
2512
+ */
2513
+ const buildMultiCollectionVectorSearchParams = (searchVector, enabledCollections, options) => {
2514
+ const { query, k, hybrid, alpha, page, per_page, filter_by, sort_by } = options;
2515
+ return enabledCollections.map(([collectionName, config]) => {
2516
+ let searchFields;
2517
+ if (config) {
2518
+ let fields = [];
2519
+ if (config.mode === "chunked") fields = [...config.fields || [], {
2520
+ name: "chunk_text",
2521
+ index: true,
2522
+ type: "string"
2523
+ }];
2524
+ else fields = config.fields;
2525
+ const extracted = fields.filter((f) => f.index !== false && (f.type === "string" || f.type === "string[]")).map((f) => f.name);
2526
+ if (extracted.length > 0) searchFields = extracted;
2527
+ }
2528
+ return {
2529
+ collection: collectionName,
2530
+ ...buildVectorSearchParams(searchVector, {
2531
+ ...query !== void 0 && { query },
2532
+ ...k !== void 0 && { k },
2533
+ ...hybrid !== void 0 && { hybrid },
2534
+ ...alpha !== void 0 && { alpha },
2535
+ ...page !== void 0 && { page },
2536
+ ...per_page !== void 0 && { per_page },
2537
+ ...sort_by !== void 0 && { sort_by },
2538
+ ...searchFields !== void 0 && { searchFields }
2539
+ }),
2540
+ _filter_by: filter_by
2541
+ };
2542
+ });
2543
+ };
2544
+
2545
+ //#endregion
2546
+ //#region src/features/search/vector/generate-vector.ts
2547
+ /**
2548
+ * Generates or retrieves a search vector from query text or provided vector
2549
+ */
2550
+ const generateOrGetVector = async (query, vector, embeddingConfig) => {
2551
+ if (vector && Array.isArray(vector) && vector.length > 0) return vector;
2552
+ if (query) {
2553
+ const searchVector = await generateEmbedding(query, embeddingConfig);
2554
+ if (!searchVector || searchVector.length === 0) return null;
2555
+ return searchVector;
2556
+ }
2557
+ return null;
2558
+ };
2559
+
2560
+ //#endregion
2561
+ //#region src/features/search/services/search-service.ts
2562
+ var SearchService = class {
2563
+ constructor(typesenseClient, pluginOptions) {
2564
+ this.typesenseClient = typesenseClient;
2565
+ this.pluginOptions = pluginOptions;
2566
+ }
2567
+ async performSearch(query, targetCollections, options) {
2568
+ const cacheKey = `search:${query}:${JSON.stringify(options)}:${targetCollections.map((c) => c[0]).join(",")}`;
2569
+ const cachedResult = searchCache.get(query, cacheKey, options);
2570
+ if (cachedResult) return cachedResult;
2571
+ if ((options.mode || "semantic") === "simple") return this.performTraditionalSearch(query, targetCollections, options);
2572
+ const searchVector = await generateOrGetVector(query, void 0, this.pluginOptions.features.embedding);
2573
+ if (!searchVector) return this.performTraditionalSearch(query, targetCollections, options);
2574
+ try {
2575
+ const results = await this.executeVectorSearch(query, searchVector, targetCollections, options);
2576
+ searchCache.set(query, results, cacheKey, options);
2577
+ return results;
2578
+ } catch (error) {
2579
+ logger.error("Vector search failed, falling back to traditional", error);
2580
+ return this.performTraditionalSearch(query, targetCollections, options);
2581
+ }
2582
+ }
2583
+ async performTraditionalSearch(query, targetCollections, options) {
2584
+ return performTraditionalMultiCollectionSearch(this.typesenseClient, targetCollections, query, options);
2585
+ }
2586
+ async executeVectorSearch(query, searchVector, targetCollections, options) {
2587
+ const searches = buildMultiCollectionVectorSearchParams(searchVector, targetCollections, {
2588
+ query,
2589
+ k: Math.min(30, DEFAULT_K),
2590
+ hybrid: true,
2591
+ alpha: DEFAULT_ALPHA,
2592
+ page: options.page,
2593
+ per_page: options.per_page,
2594
+ ...options.sort_by !== void 0 && { sort_by: options.sort_by }
2595
+ });
2596
+ if (searches.length === 0) return {
2597
+ collections: [],
2598
+ found: 0,
2599
+ hits: [],
2600
+ page: options.page,
2601
+ request_params: {
2602
+ per_page: options.per_page,
2603
+ query
2604
+ },
2605
+ search_cutoff: false,
2606
+ search_time_ms: 0
2607
+ };
2608
+ return processVectorSearchResults(await this.typesenseClient.multiSearch.perform({ searches }), targetCollections, {
2609
+ per_page: options.per_page,
2610
+ page: options.page,
2611
+ k: DEFAULT_K,
2612
+ query
2613
+ });
2614
+ }
2615
+ };
2616
+
2617
+ //#endregion
2618
+ //#region src/features/search/handlers/utils/document-transformer.ts
2619
+ /**
2620
+ * Helper to resolve document type from collection name
2621
+ */
2622
+ function resolveDocumentType(collectionName) {
2623
+ if (collectionName.includes("article")) return "article";
2624
+ if (collectionName.includes("book")) return "book";
2625
+ return "document";
2626
+ }
2627
+ /**
2628
+ * Transform search response to simplified format
2629
+ */
2630
+ function transformToSimpleFormat(data) {
2631
+ if (!data || !data.hits) return { documents: [] };
2632
+ return { documents: data.hits.map((hit) => {
2633
+ const doc = hit.document || {};
2634
+ const collectionValue = hit.collection || doc.collection;
2635
+ const collection = typeof collectionValue === "string" ? collectionValue : "";
2636
+ return {
2637
+ id: doc.id || "",
2638
+ title: doc.title || "Sin título",
2639
+ slug: doc.slug || "",
2640
+ type: resolveDocumentType(collection),
2641
+ collection
2642
+ };
2643
+ }) };
2644
+ }
2645
+
2646
+ //#endregion
2647
+ //#region src/features/search/handlers/utils/target-resolver.ts
2648
+ var TargetCollectionResolver = class {
2649
+ allowedTableNames;
2650
+ constructor(pluginOptions) {
2651
+ this.pluginOptions = pluginOptions;
2652
+ this.allowedTableNames = this.getAllowedTableNames(pluginOptions);
2653
+ }
2654
+ getAllowedTableNames(pluginOptions) {
2655
+ const configuredAllowed = pluginOptions.features.search?.defaults?.tables || [];
2656
+ const allowedTableNames = /* @__PURE__ */ new Set();
2657
+ const allTableNames = /* @__PURE__ */ new Set();
2658
+ for (const [collectionSlug, tableConfigs] of Object.entries(pluginOptions.collections || {})) if (Array.isArray(tableConfigs)) for (const tableConfig of tableConfigs) {
2659
+ if (!tableConfig.enabled) continue;
2660
+ const tableName = getTypesenseCollectionName(collectionSlug, tableConfig);
2661
+ allTableNames.add(tableName);
2662
+ if (configuredAllowed.length === 0) {
2663
+ allowedTableNames.add(tableName);
2664
+ continue;
2665
+ }
2666
+ if (configuredAllowed.includes(tableName)) allowedTableNames.add(tableName);
2667
+ }
2668
+ return Array.from(allowedTableNames);
2669
+ }
2670
+ /**
2671
+ * Resolves target table names based on request parameters.
2672
+ * Handles both multi-collection (array) and single-collection (slug) requests.
2673
+ * Enforces strict validation against allowed tables.
2674
+ */
2675
+ resolveTargetTables(collectionNameSlug, requestedCollections) {
2676
+ if (!collectionNameSlug) {
2677
+ if (requestedCollections && requestedCollections.length > 0) return requestedCollections.filter((c) => this.allowedTableNames.includes(c));
2678
+ return this.allowedTableNames;
2679
+ }
2680
+ const targetTables = [];
2681
+ const tableConfigs = this.pluginOptions.collections?.[collectionNameSlug] || [];
2682
+ if (Array.isArray(tableConfigs)) {
2683
+ for (const config of tableConfigs) if (config.enabled) {
2684
+ const tableName = getTypesenseCollectionName(collectionNameSlug, config);
2685
+ if (this.allowedTableNames.includes(tableName)) targetTables.push(tableName);
2686
+ }
2687
+ }
2688
+ return targetTables;
2689
+ }
2690
+ };
2691
+
2692
+ //#endregion
2693
+ //#region src/features/search/handlers/utils/config-mapper.ts
2694
+ var SearchConfigMapper = class {
2695
+ constructor(pluginOptions) {
2696
+ this.pluginOptions = pluginOptions;
2697
+ }
2698
+ /**
2699
+ * Maps a list of table names to their full configuration objects.
2700
+ * Essential for the search service which needs config details (fields, weights, etc.)
2701
+ */
2702
+ mapTablesToConfigs(targetTableNames) {
2703
+ const searchConfigs = [];
2704
+ for (const [slug, configs] of Object.entries(this.pluginOptions.collections || {})) {
2705
+ if (!Array.isArray(configs)) continue;
2706
+ for (const config of configs) {
2707
+ if (!config.enabled) continue;
2708
+ const tableName = getTypesenseCollectionName(slug, config);
2709
+ if (targetTableNames.includes(tableName)) searchConfigs.push([tableName, config]);
2710
+ }
2711
+ }
2712
+ return searchConfigs;
2713
+ }
2714
+ };
2715
+
2716
+ //#endregion
2717
+ //#region src/core/config/config-validation.ts
2718
+ /**
2719
+ * Configuration validation using Zod schemas
2720
+ */
2721
+ /**
2722
+ * Get configuration validation errors in a user-friendly format
2723
+ */
2724
+ function getValidationErrors(errors) {
2725
+ return errors.map((error, index) => `${index + 1}. ${error}`).join("\n");
2726
+ }
2727
+ /**
2728
+ * Validate search parameters
2729
+ */
2730
+ const SearchParamsSchema = z.object({
2731
+ facets: z.array(z.string()).optional(),
2732
+ filters: z.record(z.string(), z.any()).optional(),
2733
+ highlight_fields: z.array(z.string()).optional(),
2734
+ num_typos: z.number().int().min(0).max(4).optional().default(0),
2735
+ page: z.number().int().min(1).optional().default(1),
2736
+ per_page: z.number().int().min(1).max(250).optional().default(10),
2737
+ q: z.string().min(1, "Query parameter \"q\" is required"),
2738
+ snippet_threshold: z.number().int().min(0).max(100).optional().default(30),
2739
+ sort_by: z.string().optional(),
2740
+ typo_tokens_threshold: z.number().int().min(1).optional().default(1)
2741
+ });
2742
+ /**
2743
+ * Validate search parameters
2744
+ */
2745
+ function validateSearchParams(params) {
2746
+ try {
2747
+ return {
2748
+ data: SearchParamsSchema.parse(params),
2749
+ success: true
2750
+ };
2751
+ } catch (error) {
2752
+ if (error instanceof z.ZodError) return {
2753
+ errors: error.issues.map((err) => {
2754
+ return `${err.path.length > 0 ? `${err.path.join(".")}: ` : ""}${err.message}`;
2755
+ }),
2756
+ success: false
2757
+ };
2758
+ return {
2759
+ errors: ["Invalid search parameters format"],
2760
+ success: false
2761
+ };
2762
+ }
2763
+ }
2764
+
2765
+ //#endregion
2766
+ //#region src/features/search/utils/extract-collection-name.ts
2767
+ /**
2768
+ * Extracts collection name from request URL or params
2769
+ */
2770
+ const extractCollectionName = (request) => {
2771
+ let collectionName;
2772
+ let collectionNameStr;
2773
+ if (request.url && typeof request.url === "string") {
2774
+ const pathParts = new URL(request.url).pathname.split("/");
2775
+ const searchIndex = pathParts.indexOf("search");
2776
+ if (searchIndex !== -1 && pathParts[searchIndex + 1]) {
2777
+ collectionName = pathParts[searchIndex + 1] || "";
2778
+ collectionNameStr = String(collectionName);
2779
+ } else {
2780
+ collectionName = "";
2781
+ collectionNameStr = "";
2782
+ }
2783
+ } else {
2784
+ const paramCollectionName = request.params?.collectionName;
2785
+ collectionName = String(paramCollectionName || "");
2786
+ collectionNameStr = collectionName;
2787
+ }
2788
+ return {
2789
+ collectionName,
2790
+ collectionNameStr
2791
+ };
2792
+ };
2793
+
2794
+ //#endregion
2795
+ //#region src/features/search/utils/extract-search-params.ts
2796
+ /**
2797
+ * Extracts and validates search parameters from request query
2798
+ */
2799
+ const extractSearchParams = (query) => {
2800
+ const q = String(query?.q || "");
2801
+ const pageParam = query?.page;
2802
+ const perPageParam = query?.per_page;
2803
+ const page = pageParam ? parseInt(String(pageParam), 10) : 1;
2804
+ const per_page = perPageParam ? parseInt(String(perPageParam), 10) : 10;
2805
+ const sort_by = query?.sort_by;
2806
+ const mode = query?.mode;
2807
+ const collectionParam = query?.collection;
2808
+ const collections = collectionParam ? Array.isArray(collectionParam) ? collectionParam.map((c) => String(c)) : [String(collectionParam)] : void 0;
2809
+ const exclude_fields = query?.exclude_fields;
2810
+ const query_by = query?.query_by;
2811
+ const simpleParam = query?.simple;
2812
+ const simple = simpleParam === "true" || simpleParam === true || simpleParam === "1";
2813
+ const errors = [];
2814
+ if (isNaN(page) || page < 1) errors.push("Invalid page parameter");
2815
+ if (isNaN(per_page) || per_page < 1 || per_page > 250) errors.push("Invalid per_page parameter");
2816
+ const result = {
2817
+ q,
2818
+ page,
2819
+ per_page
2820
+ };
2821
+ if (sort_by) result.sort_by = sort_by;
2822
+ if (mode) result.mode = mode;
2823
+ if (collections && collections.length > 0) result.collections = collections;
2824
+ if (exclude_fields) result.exclude_fields = exclude_fields;
2825
+ if (query_by) result.query_by = query_by;
2826
+ if (simple) result.simple = simple;
2827
+ if (errors.length > 0) result.errors = errors;
2828
+ return result;
2829
+ };
2830
+
2831
+ //#endregion
2832
+ //#region src/features/search/handlers/validators/search-request-validator.ts
2833
+ /**
2834
+ * Validates search request and returns parsed parameters
2835
+ */
2836
+ function validateSearchRequest(request) {
2837
+ const { query } = request;
2838
+ const { collectionName, collectionNameStr } = extractCollectionName(request);
2839
+ const searchParams = extractSearchParams(query);
2840
+ if (searchParams.errors && searchParams.errors.length > 0) return {
2841
+ success: false,
2842
+ error: Response.json({ error: searchParams.errors[0] }, { status: 400 })
2843
+ };
2844
+ const validation = validateSearchParams({
2845
+ page: searchParams.page,
2846
+ per_page: searchParams.per_page,
2847
+ q: searchParams.q,
2848
+ sort_by: searchParams.sort_by
2849
+ });
2850
+ if (!validation.success) return {
2851
+ success: false,
2852
+ error: Response.json({
2853
+ details: getValidationErrors(validation.errors || []),
2854
+ error: "Invalid search parameters"
2855
+ }, { status: 400 })
2856
+ };
2857
+ return {
2858
+ success: true,
2859
+ collectionName,
2860
+ collectionNameStr,
2861
+ searchParams
2862
+ };
2863
+ }
2864
+
2865
+ //#endregion
2866
+ //#region src/features/search/handlers/search-handler.ts
2867
+ /**
2868
+ * Helper type guard to check if a result is a valid search response
2869
+ */
2870
+ function isValidSearchResponse(result) {
2871
+ return typeof result === "object" && result !== null && "hits" in result && Array.isArray(result.hits);
2872
+ }
2873
+ /**
2874
+ * Creates a handler for standard search requests
2875
+ */
2876
+ const createSearchHandler = (typesenseClient, pluginOptions) => {
2877
+ const searchService = new SearchService(typesenseClient, pluginOptions);
2878
+ const targetResolver = new TargetCollectionResolver(pluginOptions);
2879
+ const configMapper = new SearchConfigMapper(pluginOptions);
2880
+ return async (request) => {
2881
+ try {
2882
+ const validated = validateSearchRequest(request);
2883
+ if (!validated.success) return validated.error;
2884
+ const { collectionName, searchParams } = validated;
2885
+ const targetCollections = targetResolver.resolveTargetTables(collectionName, searchParams.collections);
2886
+ if (targetCollections.length === 0) {
2887
+ if (!collectionName && searchParams.collections && searchParams.collections.length > 0) return Response.json({ error: "None of the requested collections are allowed" }, { status: 403 });
2888
+ return Response.json({ error: "Collection not allowed or not enabled" }, { status: 403 });
2889
+ }
2890
+ if (!searchParams.q || searchParams.q.trim() === "") return Response.json({ error: "Query parameter \"q\" is required" }, { status: 400 });
2891
+ const searchConfigs = configMapper.mapTablesToConfigs(targetCollections);
2892
+ const searchResult = await searchService.performSearch(searchParams.q, searchConfigs, {
2893
+ filters: {},
2894
+ page: searchParams.page,
2895
+ per_page: searchParams.per_page,
2896
+ sort_by: searchParams.sort_by,
2897
+ mode: searchParams.mode,
2898
+ exclude_fields: searchParams.exclude_fields,
2899
+ query_by: searchParams.query_by
2900
+ });
2901
+ if (searchResult instanceof Response) return searchResult;
2902
+ if (searchParams.simple && isValidSearchResponse(searchResult)) return Response.json(transformToSimpleFormat(searchResult));
2903
+ return Response.json(searchResult);
2904
+ } catch (error) {
2905
+ return Response.json({
2906
+ details: error instanceof Error ? error.message : "Unknown error",
2907
+ error: "Search handler failed"
2908
+ }, { status: 500 });
2909
+ }
2910
+ };
2911
+ };
2912
+
2913
+ //#endregion
2914
+ //#region src/features/search/create-search-endpoints.ts
2915
+ const createSearchEndpoints = (typesenseClient, pluginOptions) => {
2916
+ return [
2917
+ {
2918
+ handler: createCollectionsHandler(pluginOptions),
2919
+ method: "get",
2920
+ path: "/search/collections"
2921
+ },
2922
+ {
2923
+ handler: createSearchHandler(typesenseClient, pluginOptions),
2924
+ method: "get",
2925
+ path: "/search/:collectionName"
2926
+ },
2927
+ {
2928
+ handler: createSearchHandler(typesenseClient, pluginOptions),
2929
+ method: "get",
2930
+ path: "/search"
2931
+ }
2932
+ ];
2933
+ };
2934
+
2935
+ //#endregion
2936
+ //#region src/shared/schema/collection-schemas.ts
2937
+ /**
2938
+ * Base fields that every collection should have
2939
+ */
2940
+ const getBaseFields = () => [
2941
+ {
2942
+ name: "id",
2943
+ type: "string"
2944
+ },
2945
+ {
2946
+ name: "slug",
2947
+ type: "string"
2948
+ },
2949
+ {
2950
+ name: "createdAt",
2951
+ type: "int64"
2952
+ },
2953
+ {
2954
+ name: "updatedAt",
2955
+ type: "int64"
2956
+ }
2957
+ ];
2958
+ /**
2959
+ * Creates embedding field definition
2960
+ * @param optional - Whether the embedding field is optional
2961
+ * @param dimensions - Number of dimensions for the embedding vector (default: 1536)
2962
+ */
2963
+ const getEmbeddingField = (optional = true, dimensions = DEFAULT_EMBEDDING_DIMENSIONS) => ({
2964
+ name: "embedding",
2965
+ type: "float[]",
2966
+ num_dim: dimensions,
2967
+ ...optional && { optional: true }
2968
+ });
2969
+ /**
2970
+ * Maps FieldMapping to TypesenseFieldSchema
2971
+ */
2972
+ const mapFieldMappingsToSchema = (fields) => {
2973
+ return fields.map((field) => ({
2974
+ name: field.name,
2975
+ type: field.type === "auto" ? "string" : field.type,
2976
+ facet: field.facet,
2977
+ index: field.index,
2978
+ optional: field.optional
2979
+ }));
2980
+ };
2981
+ /**
2982
+ * Gets chunk-specific fields for chunk collections
2983
+ */
2984
+ const getChunkFields = () => [
2985
+ {
2986
+ name: "parent_doc_id",
2987
+ type: "string"
2988
+ },
2989
+ {
2990
+ name: "chunk_index",
2991
+ type: "int32"
2992
+ },
2993
+ {
2994
+ name: "chunk_text",
2995
+ type: "string"
2996
+ },
2997
+ {
2998
+ name: "is_chunk",
2999
+ type: "bool"
3000
+ },
3001
+ {
3002
+ name: "headers",
3003
+ type: "string[]",
3004
+ facet: true,
3005
+ optional: true
3006
+ }
3007
+ ];
3008
+ /**
3009
+ * Creates a complete schema for a chunk collection
3010
+ */
3011
+ const getChunkCollectionSchema = (collectionSlug, tableConfig, embeddingDimensions = DEFAULT_EMBEDDING_DIMENSIONS) => {
3012
+ const fields = tableConfig.fields ? mapFieldMappingsToSchema(tableConfig.fields) : [];
3013
+ const userFieldNames = new Set([...fields.map((f) => f.name), ...getChunkFields().map((f) => f.name)]);
3014
+ return {
3015
+ name: collectionSlug,
3016
+ fields: [
3017
+ ...getBaseFields().filter((f) => !userFieldNames.has(f.name)),
3018
+ ...getChunkFields(),
3019
+ ...fields,
3020
+ getEmbeddingField(false, embeddingDimensions)
3021
+ ]
3022
+ };
3023
+ };
3024
+ /**
3025
+ * Creates a complete schema for a full document collection
3026
+ */
3027
+ const getFullDocumentCollectionSchema = (collectionSlug, tableConfig, embeddingDimensions = DEFAULT_EMBEDDING_DIMENSIONS) => {
3028
+ const mappedFields = mapFieldMappingsToSchema(tableConfig.fields);
3029
+ const userFieldNames = new Set(mappedFields.map((f) => f.name));
3030
+ return {
3031
+ name: collectionSlug,
3032
+ fields: [
3033
+ ...getBaseFields().filter((f) => !userFieldNames.has(f.name)),
3034
+ ...mappedFields,
3035
+ getEmbeddingField(true, embeddingDimensions)
3036
+ ]
3037
+ };
3038
+ };
3039
+
3040
+ //#endregion
3041
+ //#region src/shared/schema/field-mapper.ts
3042
+ /**
3043
+ * Extracts a value from a document using dot notation path
3044
+ */
3045
+ const getValueByPath = (obj, path) => {
3046
+ if (!obj || typeof obj !== "object") return void 0;
3047
+ return path.split(".").reduce((acc, part) => {
3048
+ if (acc && typeof acc === "object" && part in acc) return acc[part];
3049
+ }, obj);
3050
+ };
3051
+ /**
3052
+ * Maps a Payload document to a Typesense document based on field configuration
3053
+ */
3054
+ const mapPayloadDocumentToTypesense = async (doc, fields) => {
3055
+ const result = {};
3056
+ for (const field of fields) {
3057
+ let value = getValueByPath(doc, field.payloadField || field.name);
3058
+ if (field.transform) value = await field.transform(value);
3059
+ else {
3060
+ if (value === void 0 || value === null) {
3061
+ if (field.optional) continue;
3062
+ if (field.type === "string") value = "";
3063
+ else if (field.type === "string[]") value = [];
3064
+ else if (field.type === "bool") value = false;
3065
+ else if (field.type.startsWith("int") || field.type === "float") value = 0;
3066
+ }
3067
+ if (field.type === "string" && typeof value !== "string") if (typeof value === "object" && value !== null) value = JSON.stringify(value);
3068
+ else value = String(value);
3069
+ else if (field.type === "string[]" && !Array.isArray(value)) value = [String(value)];
3070
+ else if (field.type === "bool") value = Boolean(value);
3071
+ }
3072
+ result[field.name] = value;
3073
+ }
3074
+ return result;
3075
+ };
3076
+
3077
+ //#endregion
3078
+ //#region src/features/embedding/chunking/strategies/markdown-based/markdown-chunker.ts
3079
+ /**
3080
+ * Markdown chunking strategy using LangChain's MarkdownTextSplitter
3081
+ * Splits markdown text respecting markdown structure and preserves header metadata
3082
+ */
3083
+ /**
3084
+ * Extracts markdown headers and their positions from text
3085
+ */
3086
+ const extractHeaders = (text) => {
3087
+ const headerRegex = /^(#{1,6})\s+(.+)$/gm;
3088
+ const headers = [];
3089
+ let match;
3090
+ while ((match = headerRegex.exec(text)) !== null) headers.push({
3091
+ level: match[1]?.length ?? 0,
3092
+ text: match[2]?.trim() ?? "",
3093
+ position: match.index
3094
+ });
3095
+ return headers;
3096
+ };
3097
+ /**
3098
+ * Finds the headers that apply to a given chunk based on its content
3099
+ */
3100
+ const findChunkHeaders = (chunkText$1, allHeaders, fullText) => {
3101
+ const chunkPosition = fullText.indexOf(chunkText$1.substring(0, Math.min(50, chunkText$1.length)));
3102
+ if (chunkPosition === -1) return {};
3103
+ const applicableHeaders = allHeaders.filter((h) => h.position <= chunkPosition);
3104
+ if (applicableHeaders.length === 0) return {};
3105
+ const metadata = {};
3106
+ const currentHierarchy = Array(6).fill(null);
3107
+ for (const header of applicableHeaders) {
3108
+ currentHierarchy[header.level - 1] = header;
3109
+ for (let i = header.level; i < 6; i++) currentHierarchy[i] = null;
3110
+ }
3111
+ for (let i = 0; i < 6; i++) if (currentHierarchy[i]) metadata[`Header ${i + 1}`] = currentHierarchy[i].text;
3112
+ return metadata;
3113
+ };
3114
+ /**
3115
+ * Chunks markdown text using LangChain's MarkdownTextSplitter
3116
+ * Respects markdown structure and extracts header metadata for each chunk
3117
+ */
3118
+ const chunkMarkdown = async (text, options = {}) => {
3119
+ const { maxChunkSize = DEFAULT_CHUNK_SIZE, overlap = DEFAULT_OVERLAP } = options;
3120
+ if (!text || text.trim().length === 0) return [];
3121
+ const headers = extractHeaders(text);
3122
+ return (await new MarkdownTextSplitter({
3123
+ chunkSize: maxChunkSize,
3124
+ chunkOverlap: overlap
3125
+ }).createDocuments([text])).map((chunk, index) => {
3126
+ const metadata = findChunkHeaders(chunk.pageContent, headers, text);
3127
+ return {
3128
+ text: chunk.pageContent,
3129
+ index,
3130
+ startIndex: 0,
3131
+ endIndex: chunk.pageContent.length,
3132
+ metadata: Object.keys(metadata).length > 0 ? metadata : void 0
3133
+ };
3134
+ });
3135
+ };
3136
+
3137
+ //#endregion
3138
+ //#region src/features/embedding/chunking/index.ts
3139
+ /**
3140
+ * Text chunking module - provides utilities for splitting text into optimal chunks
3141
+ *
3142
+ * Available strategies:
3143
+ * - Simple: Uses LangChain's RecursiveCharacterTextSplitter
3144
+ * - Markdown-based: Uses LangChain's MarkdownTextSplitter for markdown documents
3145
+ *
3146
+ * Future strategies can be added in ./strategies/
3147
+ */
3148
+ /**
3149
+ * Splits text into chunks using LangChain's RecursiveCharacterTextSplitter
3150
+ * Main entry point for simple text chunking
3151
+ */
3152
+ const chunkText = async (text, options = {}) => {
3153
+ const { maxChunkSize = DEFAULT_CHUNK_SIZE, overlap = DEFAULT_OVERLAP } = options;
3154
+ if (!text || text.trim().length === 0) return [];
3155
+ if (text.length <= maxChunkSize) return [{
3156
+ text: text.trim(),
3157
+ index: 0,
3158
+ startIndex: 0,
3159
+ endIndex: text.length
3160
+ }];
3161
+ return (await new RecursiveCharacterTextSplitter({
3162
+ chunkSize: maxChunkSize,
3163
+ chunkOverlap: overlap
3164
+ }).createDocuments([text])).map((chunk, index) => ({
3165
+ text: chunk.pageContent,
3166
+ index,
3167
+ startIndex: 0,
3168
+ endIndex: chunk.pageContent.length
3169
+ }));
3170
+ };
3171
+
3172
+ //#endregion
3173
+ //#region src/core/utils/header-utils.ts
3174
+ /**
3175
+ * Builds a hierarchical path array from markdown header metadata.
3176
+ *
3177
+ * @param metadata - The metadata object from LangChain's MarkdownHeaderTextSplitter
3178
+ * @returns An array of header paths showing the hierarchy
3179
+ *
3180
+ * @example
3181
+ * // Input: { 'Header 1': 'Introduction', 'Header 2': 'Getting Started', 'Header 3': 'Installation' }
3182
+ * // Output: ['Introduction', 'Introduction > Getting Started', 'Introduction > Getting Started > Installation']
3183
+ */
3184
+ const buildHeaderHierarchy = (metadata) => {
3185
+ if (!metadata || Object.keys(metadata).length === 0) return [];
3186
+ const headers = [];
3187
+ const headerLevels = Object.keys(metadata).filter((key) => key.startsWith("Header ")).sort((a, b) => {
3188
+ return parseInt(a.replace("Header ", "")) - parseInt(b.replace("Header ", ""));
3189
+ });
3190
+ let currentPath = [];
3191
+ for (const headerKey of headerLevels) {
3192
+ const headerValue = metadata[headerKey];
3193
+ if (!headerValue) continue;
3194
+ currentPath.push(headerValue);
3195
+ headers.push(currentPath.join(" > "));
3196
+ }
3197
+ return headers;
3198
+ };
3199
+
3200
+ //#endregion
3201
+ //#region src/core/utils/chunk-format-utils.ts
3202
+ /**
3203
+ * Utilities for formatting chunk text with header metadata
3204
+ */
3205
+ /**
3206
+ * Separator used between chunk content and header metadata
3207
+ */
3208
+ const CHUNK_HEADER_SEPARATOR = ".________________________________________.";
3209
+ /**
3210
+ * Formats chunk text with header metadata at the end
3211
+ *
3212
+ * @param content - The chunk content
3213
+ * @param headers - Hierarchical array of headers (e.g., ['Introduction', 'Introduction > Getting Started'])
3214
+ * @returns Formatted chunk text with content + separator + key-value metadata
3215
+ *
3216
+ * @example
3217
+ * const formatted = formatChunkWithHeaders(
3218
+ * 'To install the package...',
3219
+ * ['Introduction', 'Introduction > Getting Started', 'Introduction > Getting Started > Installation']
3220
+ * );
3221
+ * // Result:
3222
+ * // To install the package...
3223
+ * // ._________________________________________.
3224
+ * // section: Installation | path: Introduction > Getting Started > Installation
3225
+ */
3226
+ const formatChunkWithHeaders = (content, headers) => {
3227
+ if (!headers || headers.length === 0) return content;
3228
+ const fullPath = headers[headers.length - 1];
3229
+ return `${content}\n${CHUNK_HEADER_SEPARATOR}\n${`section: ${fullPath && fullPath.split(" > ").pop() || fullPath || ""} | path: ${fullPath}`}`;
3230
+ };
3231
+ /**
3232
+ * Parses chunk text to extract header metadata and content separately
3233
+ *
3234
+ * @param chunkText - The formatted chunk text
3235
+ * @returns Object with separated metadata and content
3236
+ *
3237
+ * @example
3238
+ * const parsed = parseChunkText('Content here\\n._________________________________________.\\nsection: Installation | path: Introduction > Getting Started > Installation');
3239
+ * console.log(parsed.metadata.section); // "Installation"
3240
+ * console.log(parsed.content); // "Content here"
3241
+ */
3242
+ const parseChunkText = (chunkText$1) => {
3243
+ if (!chunkText$1.includes(CHUNK_HEADER_SEPARATOR)) return { content: chunkText$1 };
3244
+ const [contentPart, ...metadataParts] = chunkText$1.split(CHUNK_HEADER_SEPARATOR);
3245
+ const content = contentPart ? contentPart.trim() : "";
3246
+ const metadataLine = metadataParts.join(CHUNK_HEADER_SEPARATOR).trim();
3247
+ try {
3248
+ const pairs = metadataLine.split(" | ");
3249
+ const metadata = {
3250
+ section: "",
3251
+ path: ""
3252
+ };
3253
+ for (const pair of pairs) {
3254
+ const [key, ...valueParts] = pair.split(": ");
3255
+ const value = valueParts.join(": ").trim();
3256
+ if (key?.trim() === "section") metadata.section = value;
3257
+ else if (key?.trim() === "path") metadata.path = value;
3258
+ }
3259
+ if (metadata.section || metadata.path) return {
3260
+ metadata,
3261
+ content
3262
+ };
3263
+ return { content: chunkText$1 };
3264
+ } catch (error) {
3265
+ return { content: chunkText$1 };
3266
+ }
3267
+ };
3268
+ /**
3269
+ * Extracts only the content from a formatted chunk (removes header metadata)
3270
+ *
3271
+ * @param chunkText - The formatted chunk text
3272
+ * @returns Just the content without header metadata
3273
+ */
3274
+ const extractContentOnly = (chunkText$1) => {
3275
+ return parseChunkText(chunkText$1).content;
3276
+ };
3277
+ /**
3278
+ * Extracts only the header metadata from a formatted chunk
3279
+ *
3280
+ * @param chunkText - The formatted chunk text
3281
+ * @returns Header metadata or undefined if not present
3282
+ */
3283
+ const extractHeaderMetadata = (chunkText$1) => {
3284
+ return parseChunkText(chunkText$1).metadata;
3285
+ };
3286
+
3287
+ //#endregion
3288
+ //#region src/features/sync/strategies/chunked-syncer.ts
3289
+ var ChunkedSyncer = class {
3290
+ constructor(client, collectionSlug, tableName, config, embeddingService) {
3291
+ this.client = client;
3292
+ this.collectionSlug = collectionSlug;
3293
+ this.tableName = tableName;
3294
+ this.config = config;
3295
+ this.embeddingService = embeddingService;
3296
+ }
3297
+ async sync(doc, operation) {
3298
+ logger.debug(`Syncing document ${doc.id} to table ${this.tableName} (Chunked Mode)`);
3299
+ const sourceText = await this.extractSourceText(doc);
3300
+ if (!sourceText) {
3301
+ logger.warn(`No source text found for document ${doc.id}`);
3302
+ return;
3303
+ }
3304
+ const chunks = await this.generateChunks(sourceText);
3305
+ const fields = this.config.fields ? await mapPayloadDocumentToTypesense(doc, this.config.fields) : {};
3306
+ fields.slug = doc.slug || "";
3307
+ fields.publishedAt = doc.publishedAt ? new Date(doc.publishedAt).getTime() : void 0;
3308
+ if (operation === "update") await this.client.collections(this.tableName).documents().delete({ filter_by: `parent_doc_id:${doc.id}` });
3309
+ for (const chunk of chunks) {
3310
+ const headers = buildHeaderHierarchy(chunk.metadata);
3311
+ const formattedText = formatChunkWithHeaders(chunk.text, headers);
3312
+ let embedding = [];
3313
+ if (this.embeddingService) {
3314
+ const result = await this.embeddingService.getEmbedding(formattedText);
3315
+ if (result) embedding = result;
3316
+ }
3317
+ const chunkDoc = {
3318
+ id: `${doc.id}_chunk_${chunk.index}`,
3319
+ parent_doc_id: String(doc.id),
3320
+ chunk_index: chunk.index,
3321
+ chunk_text: formattedText,
3322
+ is_chunk: true,
3323
+ headers,
3324
+ embedding,
3325
+ createdAt: new Date(doc.createdAt).getTime(),
3326
+ updatedAt: new Date(doc.updatedAt).getTime(),
3327
+ ...fields
3328
+ };
3329
+ await this.client.collections(this.tableName).documents().upsert(chunkDoc);
3330
+ }
3331
+ logger.info(`Synced ${chunks.length} chunks for document ${doc.id} to ${this.tableName}`);
3332
+ }
3333
+ async extractSourceText(doc) {
3334
+ const textParts = [];
3335
+ for (const sourceField of this.config.sourceFields) {
3336
+ const fieldName = typeof sourceField === "string" ? sourceField : sourceField.field;
3337
+ const transform = typeof sourceField === "string" ? void 0 : sourceField.transform;
3338
+ const val = doc[fieldName];
3339
+ if (transform) {
3340
+ let transformedVal = await transform(val);
3341
+ textParts.push(String(transformedVal || ""));
3342
+ } else if (typeof val === "object" && val !== null && "root" in val) {
3343
+ let transformedVal = JSON.stringify(val);
3344
+ textParts.push(String(transformedVal || ""));
3345
+ }
3346
+ }
3347
+ return textParts.join("\n\n");
3348
+ }
3349
+ async generateChunks(text) {
3350
+ const { strategy, size, overlap } = this.config.chunking;
3351
+ const options = {
3352
+ maxChunkSize: size,
3353
+ overlap
3354
+ };
3355
+ if (strategy === "markdown") return await chunkMarkdown(text, options);
3356
+ else return await chunkText(text, options);
3357
+ }
3358
+ };
3359
+
3360
+ //#endregion
3361
+ //#region src/features/sync/strategies/document-syncer.ts
3362
+ var DocumentSyncer = class {
3363
+ constructor(client, collectionSlug, tableName, config, embeddingService) {
3364
+ this.client = client;
3365
+ this.collectionSlug = collectionSlug;
3366
+ this.tableName = tableName;
3367
+ this.config = config;
3368
+ this.embeddingService = embeddingService;
3369
+ }
3370
+ async sync(doc, operation) {
3371
+ logger.debug(`Syncing document ${doc.id} to table ${this.tableName} (Document Mode)`);
3372
+ const typesenseDoc = await mapPayloadDocumentToTypesense(doc, this.config.fields);
3373
+ typesenseDoc.id = String(doc.id);
3374
+ typesenseDoc.slug = doc.slug || "";
3375
+ typesenseDoc.createdAt = new Date(doc.createdAt).getTime();
3376
+ typesenseDoc.updatedAt = new Date(doc.updatedAt).getTime();
3377
+ if (doc.publishedAt) typesenseDoc.publishedAt = new Date(doc.publishedAt).getTime();
3378
+ if (this.config.sourceFields && this.embeddingService) {
3379
+ const sourceText = await this.extractSourceText(doc);
3380
+ if (sourceText) {
3381
+ const embedding = await this.embeddingService.getEmbedding(sourceText);
3382
+ if (embedding) typesenseDoc.embedding = embedding;
3383
+ }
3384
+ }
3385
+ await this.client.collections(this.tableName).documents().upsert(typesenseDoc);
3386
+ logger.info(`Synced document ${doc.id} to ${this.tableName}`);
3387
+ }
3388
+ /**
3389
+ * Extract and transform source fields for embedding generation
3390
+ */
3391
+ async extractSourceText(doc) {
3392
+ if (!this.config.sourceFields) return "";
3393
+ const textParts = [];
3394
+ for (const sourceField of this.config.sourceFields) {
3395
+ let fieldName;
3396
+ let transform;
3397
+ if (typeof sourceField === "string") fieldName = sourceField;
3398
+ else {
3399
+ fieldName = sourceField.field;
3400
+ transform = sourceField.transform;
3401
+ }
3402
+ let val = doc[fieldName];
3403
+ if (transform) val = await transform(val);
3404
+ else if (typeof val === "object" && val !== null && "root" in val) val = JSON.stringify(val);
3405
+ textParts.push(String(val || ""));
3406
+ }
3407
+ return textParts.join("\n\n");
3408
+ }
3409
+ };
3410
+
3411
+ //#endregion
3412
+ //#region src/features/sync/document-sync.ts
3413
+ /**
3414
+ * Syncs a Payload document to Typesense
3415
+ * Uses Strategy pattern to handle both chunked and full document approaches
3416
+ */
3417
+ const syncDocumentToTypesense = async (typesenseClient, collectionSlug, doc, operation, tableConfig, embeddingService) => {
3418
+ try {
3419
+ const tableName = tableConfig.tableName || getTypesenseCollectionName(collectionSlug, tableConfig);
3420
+ logger.debug("Syncing document to Typesense", {
3421
+ documentId: doc.id,
3422
+ collection: collectionSlug,
3423
+ tableName,
3424
+ operation,
3425
+ mode: tableConfig.mode
3426
+ });
3427
+ if (tableConfig.mode === "chunked") await new ChunkedSyncer(typesenseClient, collectionSlug, tableName, tableConfig, embeddingService).sync(doc, operation);
3428
+ else await new DocumentSyncer(typesenseClient, collectionSlug, tableName, tableConfig, embeddingService).sync(doc, operation);
3429
+ logger.info("Document synced successfully to Typesense", {
3430
+ documentId: doc.id,
3431
+ collection: collectionSlug,
3432
+ operation
3433
+ });
3434
+ } catch (error) {
3435
+ const isValidationError = (error instanceof Error ? error.message : String(error)).toLowerCase().includes("validation");
3436
+ logger.error(`Failed to sync document to Typesense`, error, {
3437
+ documentId: doc.id,
3438
+ collection: collectionSlug,
3439
+ operation,
3440
+ isValidationError
3441
+ });
3442
+ }
3443
+ };
3444
+
3445
+ //#endregion
3446
+ //#region src/features/sync/document-delete.ts
3447
+ /**
3448
+ * Deletes a document from Typesense
3449
+ * Handles both direct document deletion and chunk deletion
3450
+ */
3451
+ const deleteDocumentFromTypesense = async (typesenseClient, collectionSlug, docId, tableConfig) => {
3452
+ try {
3453
+ const tableName = getTypesenseCollectionName(collectionSlug, tableConfig);
3454
+ logger.debug("Attempting to delete document from Typesense", {
3455
+ documentId: docId,
3456
+ collection: collectionSlug,
3457
+ tableName
3458
+ });
3459
+ try {
3460
+ await typesenseClient.collections(tableName).documents(docId).delete();
3461
+ logger.info("Document deleted from Typesense", {
3462
+ documentId: docId,
3463
+ tableName
3464
+ });
3465
+ } catch (docDeleteError) {
3466
+ if (docDeleteError.httpStatus === 404) {
3467
+ logger.debug("Document not found, attempting to delete chunks", {
3468
+ documentId: docId,
3469
+ tableName
3470
+ });
3471
+ try {
3472
+ await typesenseClient.collections(tableName).documents().delete({ filter_by: `parent_doc_id:${docId}` });
3473
+ logger.info("All chunks deleted for document", {
3474
+ documentId: docId,
3475
+ tableName
3476
+ });
3477
+ } catch (chunkDeleteError) {
3478
+ if (chunkDeleteError.httpStatus !== 404) logger.error("Failed to delete chunks for document", chunkDeleteError, {
3479
+ documentId: docId,
3480
+ tableName
3481
+ });
3482
+ else logger.debug("No chunks found to delete", { documentId: docId });
3483
+ }
3484
+ } else throw docDeleteError;
3485
+ }
3486
+ } catch (error) {
3487
+ const tableName = getTypesenseCollectionName(collectionSlug, tableConfig);
3488
+ logger.error("Failed to delete document from Typesense", error, {
3489
+ documentId: docId,
3490
+ collection: collectionSlug,
3491
+ tableName
3492
+ });
3493
+ }
3494
+ };
3495
+
3496
+ //#endregion
3497
+ //#region src/features/sync/schema-manager.ts
3498
+ var SchemaManager = class {
3499
+ constructor(client, config) {
3500
+ this.client = client;
3501
+ this.config = config;
3502
+ }
3503
+ /**
3504
+ * Synchronizes all configured collections with Typesense
3505
+ */
3506
+ async syncCollections() {
3507
+ if (!this.config.collections) return;
3508
+ logger.info("Starting schema synchronization...");
3509
+ const embeddingDimensions = this.getEmbeddingDimensions();
3510
+ for (const [collectionSlug, tableConfigs] of Object.entries(this.config.collections)) {
3511
+ if (!tableConfigs) continue;
3512
+ for (const tableConfig of tableConfigs) {
3513
+ if (!tableConfig.enabled) continue;
3514
+ await this.syncTable(collectionSlug, tableConfig, embeddingDimensions);
3515
+ }
3516
+ }
3517
+ logger.info("Schema synchronization completed.");
3518
+ }
3519
+ /**
3520
+ * Syncs a single table configuration
3521
+ */
3522
+ async syncTable(collectionSlug, tableConfig, embeddingDimensions) {
3523
+ const tableName = getTypesenseCollectionName(collectionSlug, tableConfig);
3524
+ let targetSchema;
3525
+ if (tableConfig.mode === "chunked") targetSchema = getChunkCollectionSchema(tableName, tableConfig, embeddingDimensions);
3526
+ else targetSchema = getFullDocumentCollectionSchema(tableName, tableConfig, embeddingDimensions);
3527
+ try {
3528
+ const collection = await this.client.collections(tableName).retrieve();
3529
+ await this.updateCollectionSchema(tableName, collection, targetSchema);
3530
+ } catch (error) {
3531
+ if (error?.httpStatus === 404) {
3532
+ logger.info(`Creating collection: ${tableName}`);
3533
+ await this.client.collections().create(targetSchema);
3534
+ } else {
3535
+ logger.error(`Error checking collection ${tableName}`, error);
3536
+ throw error;
3537
+ }
3538
+ }
3539
+ }
3540
+ async updateCollectionSchema(tableName, currentSchema, targetSchema) {
3541
+ if (!currentSchema || !currentSchema.fields) return;
3542
+ const currentFields = new Set(currentSchema.fields.map((f) => f.name));
3543
+ const newFields = targetSchema.fields?.filter((f) => !currentFields.has(f.name) && f.name !== "id") || [];
3544
+ if (newFields.length > 0) {
3545
+ logger.info(`Updating collection ${tableName} with ${newFields.length} new fields`, { fields: newFields.map((f) => f.name) });
3546
+ try {
3547
+ await this.client.collections(tableName).update({ fields: newFields });
3548
+ } catch (error) {
3549
+ logger.error(`Failed to update collection ${tableName}`, error);
3550
+ }
3551
+ }
3552
+ }
3553
+ getEmbeddingDimensions() {
3554
+ if (this.config.features.embedding?.dimensions) {}
3555
+ return DEFAULT_EMBEDDING_DIMENSIONS;
3556
+ }
3557
+ };
3558
+
3559
+ //#endregion
3560
+ //#region src/features/rag/agent-manager.ts
3561
+ var AgentManager = class {
3562
+ constructor(client, config) {
3563
+ this.client = client;
3564
+ this.config = config;
3565
+ }
3566
+ /**
3567
+ * Synchronizes all configured RAG agents with Typesense
3568
+ */
3569
+ async syncAgents() {
3570
+ if (!this.config.features.rag?.enabled) return;
3571
+ const agents = this.config.features.rag.agents || [];
3572
+ if (agents.length === 0) return;
3573
+ logger.info(`Starting synchronization of ${agents.length} RAG agents...`);
3574
+ const historyCollections = new Set(agents.map((a) => a.historyCollection || "conversation_history"));
3575
+ for (const collectionName of historyCollections) await ensureConversationCollection(this.client, collectionName);
3576
+ for (const agent of agents) await this.syncAgentModel(agent);
3577
+ logger.info("Agent synchronization completed.");
3578
+ }
3579
+ async syncAgentModel(agent) {
3580
+ try {
3581
+ const modelConfig = {
3582
+ id: agent.slug,
3583
+ model_name: agent.llmModel,
3584
+ system_prompt: agent.systemPrompt,
3585
+ api_key: agent.apiKey,
3586
+ history_collection: agent.historyCollection || "conversation_history",
3587
+ max_bytes: agent.maxContextBytes || 65536,
3588
+ ttl: agent.ttl || 86400,
3589
+ k_results: agent.kResults || 5
3590
+ };
3591
+ return await this.upsertConversationModel(modelConfig);
3592
+ } catch (error) {
3593
+ logger.error(`Failed to sync agent ${agent.slug}`, error);
3594
+ return false;
3595
+ }
3596
+ }
3597
+ async upsertConversationModel(modelConfig) {
3598
+ const configuration = this.client.configuration;
3599
+ if (!configuration || !configuration.nodes || configuration.nodes.length === 0) {
3600
+ logger.error("Invalid Typesense client configuration");
3601
+ return false;
3602
+ }
3603
+ const node = configuration.nodes[0];
3604
+ const typesenseApiKey = configuration.apiKey;
3605
+ const baseUrl = `${node.protocol}://${node.host}:${node.port}`;
3606
+ try {
3607
+ const createResponse = await fetch(`${baseUrl}/conversations/models`, {
3608
+ method: "POST",
3609
+ headers: {
3610
+ "Content-Type": "application/json",
3611
+ "X-TYPESENSE-API-KEY": typesenseApiKey || ""
3612
+ },
3613
+ body: JSON.stringify(modelConfig)
3614
+ });
3615
+ if (createResponse.ok) {
3616
+ logger.info(`Agent model created: ${modelConfig.id}`);
3617
+ return true;
3618
+ }
3619
+ if (createResponse.status === 409) {
3620
+ logger.debug(`Agent model ${modelConfig.id} exists, updating...`);
3621
+ const updateResponse = await fetch(`${baseUrl}/conversations/models/${modelConfig.id}`, {
3622
+ method: "PUT",
3623
+ headers: {
3624
+ "Content-Type": "application/json",
3625
+ "X-TYPESENSE-API-KEY": typesenseApiKey || ""
3626
+ },
3627
+ body: JSON.stringify(modelConfig)
3628
+ });
3629
+ if (updateResponse.ok) {
3630
+ logger.info(`Agent model updated: ${modelConfig.id}`);
3631
+ return true;
3632
+ } else {
3633
+ const err$1 = await updateResponse.text();
3634
+ logger.error(`Failed to update agent ${modelConfig.id}: ${err$1}`);
3635
+ return false;
3636
+ }
3637
+ }
3638
+ const err = await createResponse.text();
3639
+ logger.error(`Failed to create agent ${modelConfig.id}: ${err}`);
3640
+ return false;
3641
+ } catch (networkError) {
3642
+ logger.error("Network error syncing agent model", networkError);
3643
+ return false;
3644
+ }
3645
+ }
3646
+ };
3647
+
3648
+ //#endregion
3649
+ //#region src/plugin/main-plugin.ts
3650
+ /**
3651
+ * Typesense Search Plugin for Payload CMS
3652
+ *
3653
+ * Provides full-text search and vector search capabilities using Typesense,
3654
+ * with optional RAG (Retrieval Augmented Generation) support.
3655
+ *
3656
+ * @param pluginOptions - Configuration options for the plugin
3657
+ * @returns Payload config modifier function
3658
+ */
3659
+ const typesenseSearch = (pluginOptions) => (config) => {
3660
+ const container = setupContainer(pluginOptions);
3661
+ const typesenseClient = createTypesenseClient(pluginOptions.typesense);
3662
+ let embeddingService;
3663
+ if (container.has(TOKENS.EMBEDDING_SERVICE)) embeddingService = container.resolve(TOKENS.EMBEDDING_SERVICE);
3664
+ const searchEndpoints = createSearchEndpoints(typesenseClient, pluginOptions);
3665
+ const ragEndpoints = pluginOptions.features.rag?.enabled ? createRAGPayloadHandlers(pluginOptions) : [];
3666
+ config.endpoints = [
3667
+ ...config.endpoints || [],
3668
+ ...searchEndpoints,
3669
+ ...ragEndpoints
3670
+ ];
3671
+ logger.debug("Search and RAG endpoints registered", {
3672
+ searchEndpointsCount: searchEndpoints.length,
3673
+ ragEndpointsCount: ragEndpoints.length
3674
+ });
3675
+ if (pluginOptions.features.sync?.enabled && pluginOptions.features.sync.autoSync !== false && pluginOptions.collections) config.collections = (config.collections || []).map((collection) => {
3676
+ const tableConfigs = pluginOptions.collections?.[collection.slug];
3677
+ if (tableConfigs && Array.isArray(tableConfigs) && tableConfigs.some((tableConfig) => tableConfig.enabled)) {
3678
+ logger.debug("Registering sync hooks for collection", {
3679
+ collection: collection.slug,
3680
+ tableCount: tableConfigs?.length || 0
3681
+ });
3682
+ return {
3683
+ ...collection,
3684
+ hooks: {
3685
+ ...collection.hooks,
3686
+ afterChange: [...collection.hooks?.afterChange || [], async ({ doc, operation, req: _req }) => {
3687
+ if (tableConfigs && Array.isArray(tableConfigs)) {
3688
+ for (const tableConfig of tableConfigs) if (tableConfig.enabled) await syncDocumentToTypesense(typesenseClient, collection.slug, doc, operation, tableConfig, embeddingService);
3689
+ }
3690
+ }],
3691
+ afterDelete: [...collection.hooks?.afterDelete || [], async ({ doc, req: _req }) => {
3692
+ if (tableConfigs && Array.isArray(tableConfigs)) {
3693
+ for (const tableConfig of tableConfigs) if (tableConfig.enabled) await deleteDocumentFromTypesense(typesenseClient, collection.slug, doc.id, tableConfig);
3694
+ }
3695
+ }]
3696
+ }
3697
+ };
3698
+ }
3699
+ return collection;
3700
+ });
3701
+ const incomingOnInit = config.onInit;
3702
+ config.onInit = async (payload) => {
3703
+ if (incomingOnInit) await incomingOnInit(payload);
3704
+ try {
3705
+ logger.info("Initializing Typesense collections...");
3706
+ await new SchemaManager(typesenseClient, pluginOptions).syncCollections();
3707
+ if (pluginOptions.features.rag?.enabled) {
3708
+ logger.info("Initializing RAG agents...");
3709
+ await new AgentManager(typesenseClient, pluginOptions).syncAgents();
3710
+ }
3711
+ } catch (error) {
3712
+ logger.error("Error initializing Typesense resources", error);
3713
+ }
3714
+ };
3715
+ return config;
3716
+ };
3717
+
3718
+ //#endregion
3719
+ export { CHUNK_HEADER_SEPARATOR, DEFAULT_CACHE_TTL_MS, DEFAULT_CHUNK_OVERLAP, DEFAULT_CHUNK_SIZE, DEFAULT_EMBEDDING_DIMENSIONS, DEFAULT_EMBEDDING_MODEL, DEFAULT_HYBRID_SEARCH_ALPHA, DEFAULT_RAG_CONTEXT_LIMIT, DEFAULT_RAG_LLM_MODEL, DEFAULT_RAG_MAX_TOKENS, DEFAULT_SEARCH_LIMIT, DEFAULT_SESSION_TTL_SEC, ErrorCodes, buildContextText, buildConversationalUrl, buildHybridSearchParams, buildMultiSearchRequestBody, buildMultiSearchRequests, closeSession, configureLogger, createLogger, createSSEForwardStream, createTypesenseClient, ensureConversationCollection, executeRAGSearch, extractContentOnly, extractHeaderMetadata, extractSourcesFromResults, fetchChunkById, formatChunkWithHeaders, formatSSEEvent, generateEmbedding, generateEmbeddingWithUsage, generateEmbeddingsBatchWithUsage, getActiveSession, getDefaultRAGConfig, getSessionByConversationId, jsonResponse, logger, mergeRAGConfigWithDefaults, parseChunkText, parseConversationEvent, processConversationStream, saveChatSession, sendSSEEvent, testTypesenseConnection, typesenseSearch };
3720
+ //# sourceMappingURL=index.mjs.map