@tiledesk/tiledesk-server 2.13.49 → 2.13.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/routes/kb.js CHANGED
@@ -1,24 +1,23 @@
1
1
  var express = require('express');
2
- var { Namespace, KB, Engine } = require('../models/kb_setting');
3
- // var { KB } = require('../models/kb_setting');
4
- // var { Engine } = require('../models/kb_setting')
5
2
  var router = express.Router();
6
3
  var winston = require('../config/winston');
7
4
  var multer = require('multer')
8
5
  var upload = multer()
9
- const aiService = require('../services/aiService');
10
6
  const JobManager = require('../utils/jobs-worker-queue-manager/JobManagerV2');
11
- const { Scheduler } = require('../services/Scheduler');
12
7
  var configGlobal = require('../config/global');
13
- const Sitemapper = require('sitemapper');
14
8
  var mongoose = require('mongoose');
9
+ var parsecsv = require("fast-csv");
10
+
11
+ var { Namespace, KB } = require('../models/kb_setting');
15
12
  const faq = require('../models/faq');
16
13
  const faq_kb = require('../models/faq_kb');
17
- let Integration = require('../models/integrations');
18
- var parsecsv = require("fast-csv");
19
14
 
20
15
  const { MODELS_MULTIPLIER } = require('../utils/aiUtils');
21
16
  const { kbTypes } = require('../models/kbConstants');
17
+ const Sitemapper = require('sitemapper');
18
+
19
+ const aiService = require('../services/aiService');
20
+ const aiManager = require('../services/aiManager');
22
21
  const integrationService = require('../services/integrationService');
23
22
 
24
23
  const AMQP_MANAGER_URL = process.env.AMQP_MANAGER_URL;
@@ -68,7 +67,6 @@ jobManagerHybrid.connectAndStartPublisher((status, error) => {
68
67
  }
69
68
  })
70
69
 
71
-
72
70
  let default_preview_settings = {
73
71
  model: 'gpt-4o',
74
72
  max_tokens: 256,
@@ -76,38 +74,25 @@ let default_preview_settings = {
76
74
  top_k: 4,
77
75
  alpha: 0.5,
78
76
  context: null
79
- //context: "You are an awesome AI Assistant."
80
- }
81
- let default_engine = {
82
- name: "pinecone",
83
- type: process.env.PINECONE_TYPE || "pod",
84
- apikey: "",
85
- vector_size: 1536,
86
- index_name: process.env.PINECONE_INDEX
87
- }
88
- let default_engine_hybrid = {
89
- name: "pinecone",
90
- type: process.env.PINECONE_TYPE_HYBRID || "serverless",
91
- apikey: "",
92
- vector_size: 1536,
93
- index_name: process.env.PINECONE_INDEX_HYBRID
94
77
  }
95
78
 
96
- //let default_context = "Answer if and ONLY if the answer is contained in the context provided. If the answer is not contained in the context provided ALWAYS answer with <NOANS>\n{context}"
97
- //let default_context = "You are an helpful assistant for question-answering tasks.\nUse ONLY the following pieces of retrieved context to answer the question.\nIf you don't know the answer, just say that you don't know.\nIf none of the retrieved context answer the question, add this word to the end <NOANS>\n\n{context}";
79
+ const default_engine = require('../config/kb/engine');
80
+ const default_engine_hybrid = require('../config/kb/engine.hybrid');
81
+ const default_embedding = require('../config/kb/embedding');
82
+
98
83
  let contexts = {
99
- "gpt-3.5-turbo": process.env.GPT_3_5_CONTEXT || "You are an helpful assistant for question-answering tasks.\nUse ONLY the pieces of retrieved context delimited by #### and the chat history to answer the question.\nIf you don't know the answer, just say: \"I don't know<NOANS>\"\n\n####{context}####",
100
- "gpt-4": process.env.GPT_4_CONTEXT || "You are an helpful assistant for question-answering tasks.\nUse ONLY the pieces of retrieved context delimited by #### and the chat history to answer the question.\nIf you don't know the answer, just say that you don't know.\nIf and only if none of the retrieved context is useful for your task, add this word to the end <NOANS>\n\n####{context}####",
101
- "gpt-4-turbo-preview": process.env.GPT_4T_CONTEXT || "You are an helpful assistant for question-answering tasks.\nUse ONLY the pieces of retrieved context delimited by #### and the chat history to answer the question.\nIf you don't know the answer, just say that you don't know.\nIf and only if none of the retrieved context is useful for your task, add this word to the end <NOANS>\n\n####{context}####",
102
- "gpt-4o": process.env.GPT_4O_CONTEXT || "You are an helpful assistant for question-answering tasks. Follow these steps carefully:\n1. Answer in the same language of the user question, regardless of the retrieved context language\n2. Use ONLY the pieces of the retrieved context and the chat history to answer the question.\n3. If the retrieved context does not contain sufficient information to generate an accurate and informative answer, return <NOANS>\n\n==Retrieved context start==\n{context}\n==Retrieved context end==",
103
- "gpt-4o-mini": process.env.GPT_4O_MINI_CONTEXT || "You are an helpful assistant for question-answering tasks. Follow these steps carefully:\n1. Answer in the same language of the user question, regardless of the retrieved context language\n2. Use ONLY the pieces of the retrieved context and the chat history to answer the question.\n3. If the retrieved context does not contain sufficient information to generate an accurate and informative answer, return <NOANS>\n\n==Retrieved context start==\n{context}\n==Retrieved context end==",
104
- "gpt-4.1": process.env.GPT_4_1_CONTEXT || "You are an helpful assistant for question-answering tasks. Follow these steps carefully:\n1. Answer in the same language of the user question, regardless of the retrieved context language\n2. Use ONLY the pieces of the retrieved context and the chat history to answer the question.\n3. If the retrieved context does not contain sufficient information to generate an accurate and informative answer, append <NOANS> at the end of the answer\n\n==Retrieved context start==\n{context}\n==Retrieved context end==",
105
- "gpt-4.1-mini": process.env.GPT_4_1_MINI_CONTEXT || "You are an helpful assistant for question-answering tasks. Follow these steps carefully:\n1. Answer in the same language of the user question, regardless of the retrieved context language\n2. Use ONLY the pieces of the retrieved context and the chat history to answer the question.\n3. If the retrieved context does not contain sufficient information to generate an accurate and informative answer, append <NOANS> at the end of the answer\n\n==Retrieved context start==\n{context}\n==Retrieved context end==",
106
- "gpt-4.1-nano": process.env.GPT_4_1_NANO_CONTEXT || "You are an helpful assistant for question-answering tasks. Follow these steps carefully:\n1. Answer in the same language of the user question, regardless of the retrieved context language\n2. Use ONLY the pieces of the retrieved context and the chat history to answer the question.\n3. If the retrieved context does not contain sufficient information to generate an accurate and informative answer, append <NOANS> at the end of the answer\n\n==Retrieved context start==\n{context}\n==Retrieved context end==",
107
- "gpt-5": process.env.GPT_5_CONTEXT || "You are an helpful assistant for question-answering tasks. Follow these steps carefully:\n1. Answer in the same language of the user question, regardless of the retrieved context language\n2. Use ONLY the pieces of the retrieved context and the chat history to answer the question.\n3. If the retrieved context does not contain sufficient information to generate an accurate and informative answer, append <NOANS> at the end of the answer\n\n==Retrieved context start==\n{context}\n==Retrieved context end==",
108
- "gpt-5-mini": process.env.GPT_5_MINI_CONTEXT || "You are an helpful assistant for question-answering tasks. Follow these steps carefully:\n1. Answer in the same language of the user question, regardless of the retrieved context language\n2. Use ONLY the pieces of the retrieved context and the chat history to answer the question.\n3. If the retrieved context does not contain sufficient information to generate an accurate and informative answer, append <NOANS> at the end of the answer\n\n==Retrieved context start==\n{context}\n==Retrieved context end==",
109
- "gpt-5-nano": process.env.GPT_5_NANO_CONTEXT || "You are an helpful assistant for question-answering tasks. Follow these steps carefully:\n1. Answer in the same language of the user question, regardless of the retrieved context language\n2. Use ONLY the pieces of the retrieved context and the chat history to answer the question.\n3. If the retrieved context does not contain sufficient information to generate an accurate and informative answer, append <NOANS> at the end of the answer\n\n==Retrieved context start==\n{context}\n==Retrieved context end==",
110
- "general": process.env.GENERAL_CONTEXT || "You are an helpful assistant for question-answering tasks. Follow these steps carefully:\n1. Answer in the same language of the user question, regardless of the retrieved context language\n2. Use ONLY the pieces of the retrieved context and the chat history to answer the question.\n3. If the retrieved context does not contain sufficient information to generate an accurate and informative answer, append <NOANS> at the end of the answer\n\n==Retrieved context start==\n{context}\n==Retrieved context end=="
84
+ "gpt-3.5-turbo": process.env.GPT_3_5_CONTEXT || "You are an helpful assistant for question-answering tasks.\nUse ONLY the pieces of retrieved context delimited by #### and the chat history to answer the question.\nIf you don't know the answer, just say: \"I don't know<NOANS>\"\n\n####{context}####",
85
+ "gpt-4": process.env.GPT_4_CONTEXT || "You are an helpful assistant for question-answering tasks.\nUse ONLY the pieces of retrieved context delimited by #### and the chat history to answer the question.\nIf you don't know the answer, just say that you don't know.\nIf and only if none of the retrieved context is useful for your task, add this word to the end <NOANS>\n\n####{context}####",
86
+ "gpt-4-turbo-preview": process.env.GPT_4T_CONTEXT || "You are an helpful assistant for question-answering tasks.\nUse ONLY the pieces of retrieved context delimited by #### and the chat history to answer the question.\nIf you don't know the answer, just say that you don't know.\nIf and only if none of the retrieved context is useful for your task, add this word to the end <NOANS>\n\n####{context}####",
87
+ "gpt-4o": process.env.GPT_4O_CONTEXT || "You are an helpful assistant for question-answering tasks. Follow these steps carefully:\n1. Answer in the same language of the user question, regardless of the retrieved context language\n2. Use ONLY the pieces of the retrieved context and the chat history to answer the question.\n3. If the retrieved context does not contain sufficient information to generate an accurate and informative answer, return <NOANS>\n\n==Retrieved context start==\n{context}\n==Retrieved context end==",
88
+ "gpt-4o-mini": process.env.GPT_4O_MINI_CONTEXT || "You are an helpful assistant for question-answering tasks. Follow these steps carefully:\n1. Answer in the same language of the user question, regardless of the retrieved context language\n2. Use ONLY the pieces of the retrieved context and the chat history to answer the question.\n3. If the retrieved context does not contain sufficient information to generate an accurate and informative answer, return <NOANS>\n\n==Retrieved context start==\n{context}\n==Retrieved context end==",
89
+ "gpt-4.1": process.env.GPT_4_1_CONTEXT || "You are an helpful assistant for question-answering tasks. Follow these steps carefully:\n1. Answer in the same language of the user question, regardless of the retrieved context language\n2. Use ONLY the pieces of the retrieved context and the chat history to answer the question.\n3. If the retrieved context does not contain sufficient information to generate an accurate and informative answer, append <NOANS> at the end of the answer\n\n==Retrieved context start==\n{context}\n==Retrieved context end==",
90
+ "gpt-4.1-mini": process.env.GPT_4_1_MINI_CONTEXT || "You are an helpful assistant for question-answering tasks. Follow these steps carefully:\n1. Answer in the same language of the user question, regardless of the retrieved context language\n2. Use ONLY the pieces of the retrieved context and the chat history to answer the question.\n3. If the retrieved context does not contain sufficient information to generate an accurate and informative answer, append <NOANS> at the end of the answer\n\n==Retrieved context start==\n{context}\n==Retrieved context end==",
91
+ "gpt-4.1-nano": process.env.GPT_4_1_NANO_CONTEXT || "You are an helpful assistant for question-answering tasks. Follow these steps carefully:\n1. Answer in the same language of the user question, regardless of the retrieved context language\n2. Use ONLY the pieces of the retrieved context and the chat history to answer the question.\n3. If the retrieved context does not contain sufficient information to generate an accurate and informative answer, append <NOANS> at the end of the answer\n\n==Retrieved context start==\n{context}\n==Retrieved context end==",
92
+ "gpt-5": process.env.GPT_5_CONTEXT || "You are an helpful assistant for question-answering tasks. Follow these steps carefully:\n1. Answer in the same language of the user question, regardless of the retrieved context language\n2. Use ONLY the pieces of the retrieved context and the chat history to answer the question.\n3. If the retrieved context does not contain sufficient information to generate an accurate and informative answer, append <NOANS> at the end of the answer\n\n==Retrieved context start==\n{context}\n==Retrieved context end==",
93
+ "gpt-5-mini": process.env.GPT_5_MINI_CONTEXT || "You are an helpful assistant for question-answering tasks. Follow these steps carefully:\n1. Answer in the same language of the user question, regardless of the retrieved context language\n2. Use ONLY the pieces of the retrieved context and the chat history to answer the question.\n3. If the retrieved context does not contain sufficient information to generate an accurate and informative answer, append <NOANS> at the end of the answer\n\n==Retrieved context start==\n{context}\n==Retrieved context end==",
94
+ "gpt-5-nano": process.env.GPT_5_NANO_CONTEXT || "You are an helpful assistant for question-answering tasks. Follow these steps carefully:\n1. Answer in the same language of the user question, regardless of the retrieved context language\n2. Use ONLY the pieces of the retrieved context and the chat history to answer the question.\n3. If the retrieved context does not contain sufficient information to generate an accurate and informative answer, append <NOANS> at the end of the answer\n\n==Retrieved context start==\n{context}\n==Retrieved context end==",
95
+ "general": process.env.GENERAL_CONTEXT || "You are an helpful assistant for question-answering tasks. Follow these steps carefully:\n1. Answer in the same language of the user question, regardless of the retrieved context language\n2. Use ONLY the pieces of the retrieved context and the chat history to answer the question.\n3. If the retrieved context does not contain sufficient information to generate an accurate and informative answer, append <NOANS> at the end of the answer\n\n==Retrieved context start==\n{context}\n==Retrieved context end=="
111
96
  }
112
97
 
113
98
  /**
@@ -120,20 +105,80 @@ router.post('/scrape/single', async (req, res) => {
120
105
  let project_id = req.projectid;
121
106
 
122
107
  let data = req.body;
123
- winston.debug("/scrape/single data: ", data);
108
+ winston.debug("/scrape/single data: ", data)
109
+
110
+ let namespace;
111
+ try {
112
+ namespace = await aiManager.checkNamespace(project_id, data.namespace);
113
+ } catch (err) {
114
+ let errorCode = err?.errorCode ?? 500;
115
+ return res.status(errorCode).send({ success: false, error: err.error });
116
+ }
117
+
118
+ if (data.type === "sitemap") {
124
119
 
125
- let namespaces = await Namespace.find({ id_project: project_id }).catch((err) => {
126
- winston.error("find namespaces error: ", err)
127
- res.status(500).send({ success: false, error: err })
128
- })
120
+ const urls = await aiManager.fetchSitemap(data.source).catch((err) => {
121
+ winston.error("Error fetching sitemap: ", err);
122
+ return res.status(500).send({ success: false, error: err });
123
+ })
129
124
 
130
- if (!namespaces || namespaces.length == 0) {
131
- let alert = "No namespace found for the selected project " + project_id + ". Cannot add content to a non-existent namespace."
132
- winston.warn(alert);
133
- res.status(403).send(alert);
134
- }
125
+ if (urls.length === 0) {
126
+ return res.status(400).send({ success: false, error: "No url found on sitemap" });
127
+ }
135
128
 
136
- let namespaceIds = namespaces.map(namespace => namespace.id);
129
+ let sitemapKb;
130
+ try {
131
+ sitemapKb = await KB.findById(data.id);
132
+ } catch (err) {
133
+ winston.error("Error finding sitemap content with id " + data.id);
134
+ return res.status(500).send({ success: false, error: "Error finding sitemap content with id " + data.id });
135
+ }
136
+
137
+ if (!sitemapKb) {
138
+ return res.status(404).send({ success: false, error: "Content not found with id " + data.id });
139
+ }
140
+
141
+ let existingKbs;
142
+ try {
143
+ existingKbs = await KB.find({ id_project: project_id, namespace: data.namespace, sitemap_origin_id: data.id }).lean().exec();
144
+ } catch(err) {
145
+ winston.error("Error finding existing contents: ", err);
146
+ return res.status(500).send({ success: false, error: "Error finding existing sitemap contents" });
147
+ }
148
+
149
+ const result = await aiManager.foundSitemapChanges(existingKbs, urls).catch((err) => {
150
+ winston.error("Error finding sitemap differecens ", err);
151
+ return res.status(400).send({ success: false, error: "Error finding sitemap differecens" });
152
+ })
153
+
154
+ if (!result) return; // esco qui
155
+
156
+ const { addedUrls, removedIds } = result;
157
+
158
+ if (removedIds.length > 0) {
159
+ const idsSet = new Set(removedIds);
160
+ const kbsToDelete = existingKbs.filter(obj => idsSet.has(obj._id));
161
+
162
+ aiManager.removeMultipleContents(namespace, kbsToDelete).catch((err) => {
163
+ winston.error("Error deleting multiple contents: ", err);
164
+ })
165
+ }
166
+
167
+ if (addedUrls.length > 0) {
168
+ const options = {
169
+ sitemap_origin_id: sitemapKb._id,
170
+ sitemap_origin: sitemapKb.source,
171
+ scrape_type: sitemapKb.scrape_type,
172
+ scrape_options: sitemapKb.scrape_options,
173
+ refresh_rate: sitemapKb.refresh_rate
174
+ }
175
+ aiManager.addMultipleUrls(namespace, addedUrls, options).catch((err) => {
176
+ winston.error("(webhook) error adding multiple urls contents: ", err);
177
+ })
178
+ }
179
+
180
+ return res.status(200).send({ success: true, message: "Content queued for reindexing", added_urls: addedUrls.length, removed_url: removedIds.length });
181
+ }
137
182
 
138
183
  KB.findById(data.id, (err, kb) => {
139
184
  if (err) {
@@ -146,10 +191,6 @@ router.post('/scrape/single', async (req, res) => {
146
191
  }
147
192
  else {
148
193
 
149
- if (!namespaceIds.includes(kb.namespace)) {
150
- return res.status(403).send({ success: false, error: "Not allowed. The namespace does not belong to the current project." })
151
- }
152
-
153
194
  let json = {
154
195
  id: kb._id,
155
196
  type: kb.type,
@@ -174,16 +215,20 @@ router.post('/scrape/single', async (req, res) => {
174
215
  }
175
216
  }
176
217
 
177
- let ns = namespaces.find(n => n.id === kb.namespace);
178
- json.engine = ns.engine || default_engine;
218
+ json.engine = namespace.engine || default_engine;
219
+ json.embedding = namespace.embedding || default_embedding;
179
220
 
180
- if (ns.hybrid === true) {
221
+ if (namespace.hybrid === true) {
181
222
  json.hybrid = true;
182
223
  }
183
224
 
184
225
  winston.verbose("/scrape/single json: ", json);
185
226
 
186
- startScrape(json).then((response) => {
227
+ if (process.env.NODE_ENV === "test") {
228
+ res.status(200).send({ success: true, message: "Skip indexing in test environment", data: json })
229
+ }
230
+
231
+ aiManager.startScrape(json).then((response) => {
187
232
  winston.verbose("startScrape response: ", response);
188
233
  res.status(200).send(response);
189
234
  }).catch((err) => {
@@ -211,24 +256,15 @@ router.post('/scrape/status', async (req, res) => {
211
256
  returnObject = true;
212
257
  }
213
258
 
214
- let namespaces = await Namespace.find({ id_project: project_id }).catch((err) => {
215
- winston.error("find namespaces error: ", err)
216
- res.status(500).send({ success: false, error: err })
217
- })
218
-
219
- if (!namespaces || namespaces.length == 0) {
220
- let alert = "No namespace found for the selected project " + project_id + ". Cannot add content to a non-existent namespace."
221
- winston.warn(alert);
222
- res.status(403).send(alert);
223
- }
224
-
225
- let namespaceIds = namespaces.map(namespace => namespace.id);
226
- if (!namespaceIds.includes(data.namespace)) {
227
- return res.status(403).send({ success: false, error: "Not allowed. The namespace does not belong to the current project." })
259
+ let namespace;
260
+ try {
261
+ namespace = await aiManager.checkNamespace(project_id, namespace_id);
262
+ } catch (err) {
263
+ let errorCode = err?.errorCode ?? 500;
264
+ return res.status(errorCode).send({ success: false, error: err.error });
228
265
  }
229
266
 
230
- let ns = namespaces.find(n => n.id === data.namespace);
231
- data.engine = ns.engine || default_engine;
267
+ data.engine = namespace.engine || default_engine;
232
268
 
233
269
  aiService.scrapeStatus(data).then(async (response) => {
234
270
 
@@ -238,7 +274,7 @@ router.post('/scrape/status', async (req, res) => {
238
274
 
239
275
  if (response.data.status_code) {
240
276
  // update.status = response.data.status_code;
241
- update.status = await statusConverter(response.data.status_code)
277
+ update.status = await aiManager.statusConverter(response.data.status_code)
242
278
 
243
279
  }
244
280
 
@@ -270,74 +306,33 @@ router.post('/scrape/status', async (req, res) => {
270
306
  })
271
307
 
272
308
  router.post('/qa', async (req, res) => {
273
-
274
- let project_id = req.projectid;
309
+ let id_project = req.projectid;
275
310
  let publicKey = false;
276
311
  let data = req.body;
277
- let ollama_integration;
278
- let vllm_integration;
279
312
 
280
- let namespaces = await Namespace.find({ id_project: project_id }).catch((err) => {
281
- winston.error("find namespaces error: ", err)
282
- res.status(500).send({ success: false, error: err })
283
- })
284
-
285
- if (!namespaces || namespaces.length == 0) {
286
- let alert = "No namespace found for the selected project " + project_id + ". Cannot add content to a non-existent namespace."
287
- winston.warn(alert);
288
- res.status(403).send(alert);
289
- }
290
-
291
- let namespaceIds = namespaces.map(namespace => namespace.id);
292
- if (!namespaceIds.includes(data.namespace)) {
293
- return res.status(403).send({ success: false, error: "Not allowed. The namespace does not belong to the current project." })
313
+ let namespace;
314
+ try {
315
+ namespace = await aiManager.checkNamespace(id_project, data.namespace);
316
+ } catch (err) {
317
+ let errorCode = err?.errorCode ?? 500;
318
+ return res.status(errorCode).send({ success: false, error: err.error });
294
319
  }
295
-
296
320
  winston.debug("/qa data: ", data);
297
321
 
298
- if (!data.llm) {
299
- data.llm = "openai";
322
+ let model;
323
+ try {
324
+ model = await aiManager.resolveLLMConfig(id_project, data.llm, data.model);
325
+ } catch (err) {
326
+ let errorCode = err?.code ?? 500;
327
+ return res.status(errorCode).send({ success: false, error: err.error });
300
328
  }
301
329
 
302
- if (data.llm === 'ollama') {
303
- data.gptkey = process.env.GPTKEY;
304
- try {
305
- ollama_integration = await integrationService.getIntegration(project_id, 'ollama');
306
- } catch (err) {
307
- let error_code = err.code || 500;
308
- let error_message = err.error || `Unable to get integration for ${data.llm}`;
309
- return res.status(error_code).send({ success: false, error: error_message });
310
- }
330
+ if (!model.api_key && model.provider === 'openai') {
331
+ model.api_key = process.env.GPTKEY;
332
+ publicKey = true;
311
333
  }
312
- else if (data.llm === 'vllm') {
313
- data.gptkey = process.env.GPTKEY;
314
- try {
315
- vllm_integration = await integrationService.getIntegration(project_id, 'vllm')
316
- } catch (err) {
317
- let error_code = err.code || 500;
318
- let error_message = err.error || `Unable to get integration for ${data.llm}`;
319
- return res.status(error_code).send({ success: false, error: error_message });
320
- }
321
- } else {
322
- try {
323
- let key = await integrationService.getKeyFromIntegration(project_id, data.llm);
324
- if (!key) {
325
- if (data.llm === 'openai') {
326
- data.gptkey = process.env.GPTKEY;
327
- publicKey = true;
328
- } else {
329
- return res.status(404).send({ success: false, error: `Invalid or empty key provided for ${data.llm}` });
330
- }
331
- } else {
332
- data.gptkey = key;
333
- }
334
334
 
335
- } catch (err) {
336
- let error_code = err.code || 500;
337
- let error_message = err.error || `Unable to get integration for ${data.llm}`;
338
- return res.status(error_code).send({ success: false, error: error_message });
339
- }
340
- }
335
+ data.model = model;
341
336
 
342
337
  let obj = { createdAt: new Date() };
343
338
 
@@ -351,7 +346,7 @@ router.post('/qa', async (req, res) => {
351
346
 
352
347
  // Check if "Advanced Mode" is active. In such case the default_context must be not appended
353
348
  if (!data.advancedPrompt) {
354
- const contextTemplate = contexts[data.model] || contexts["general"];
349
+ const contextTemplate = contexts[data.model.name] || contexts["general"];
355
350
  if (data.system_context) {
356
351
  data.system_context = data.system_context + " \n" + contextTemplate;
357
352
  } else {
@@ -359,10 +354,11 @@ router.post('/qa', async (req, res) => {
359
354
  }
360
355
  }
361
356
 
362
- let ns = namespaces.find(n => n.id === data.namespace);
363
- data.engine = ns.engine || default_engine;
357
+ data.engine = namespace.engine || default_engine;
358
+ data.embedding = namespace.embedding || default_embedding;
359
+ data.embedding.api_key = process.env.EMBEDDING_API_KEY || process.env.GPTKEY;
364
360
 
365
- if (ns.hybrid === true) {
361
+ if (namespace.hybrid === true) {
366
362
  data.search_type = 'hybrid';
367
363
 
368
364
  if (data.reranking === true) {
@@ -371,39 +367,15 @@ router.post('/qa', async (req, res) => {
371
367
  }
372
368
  }
373
369
 
374
- if (data.llm === 'ollama') {
375
- if (!ollama_integration.value.url) {
376
- return res.status(422).send({ success: false, error: "Server url for ollama is empty or invalid"})
377
- }
378
- data.model = {
379
- name: data.model,
380
- url: ollama_integration.value.url,
381
- provider: 'ollama'
382
- }
383
- data.stream = false;
384
- }
385
-
386
- if (data.llm === 'vllm') {
387
- if (!vllm_integration.value.url) {
388
- return res.status(422).send({ success: false, error: "Server url for vllm is empty or invalid"})
389
- }
390
- data.model = {
391
- name: data.model,
392
- url: vllm_integration.value.url,
393
- provider: 'vllm'
394
- }
395
- data.stream = false;
396
- }
397
-
370
+ data.stream = false;
371
+ data.debug = true;
398
372
  delete data.advancedPrompt;
399
373
  winston.verbose("ask data: ", data);
400
-
374
+
401
375
  if (process.env.NODE_ENV === 'test') {
402
376
  return res.status(200).send({ success: true, message: "Question skipped in test environment", data: data });
403
377
  }
404
378
 
405
- data.debug = true;
406
-
407
379
  aiService.askNamespace(data).then((resp) => {
408
380
  winston.debug("qa resp: ", resp.data);
409
381
  let answer = resp.data;
@@ -435,33 +407,185 @@ router.post('/qa', async (req, res) => {
435
407
  }
436
408
 
437
409
  })
410
+
438
411
  })
439
412
 
413
+ // router.post('/qa', async (req, res) => {
414
+
415
+ // let project_id = req.projectid;
416
+ // let publicKey = false;
417
+ // let data = req.body;
418
+ // let ollama_integration;
419
+ // let vllm_integration;
420
+
421
+ // let namespace;
422
+ // try {
423
+ // namespace = await aiManager.checkNamespace(project_id, data.namespace);
424
+ // } catch (err) {
425
+ // let errorCode = err?.errorCode ?? 500;
426
+ // return res.status(errorCode).send({ success: false, error: err.error });
427
+ // }
428
+ // winston.debug("/qa data: ", data);
429
+
430
+ // if (!data.llm) {
431
+ // data.llm = "openai";
432
+ // }
433
+
434
+ // if (data.llm === 'ollama') {
435
+ // data.gptkey = process.env.GPTKEY;
436
+ // try {
437
+ // ollama_integration = await integrationService.getIntegration(project_id, 'ollama');
438
+ // } catch (err) {
439
+ // let error_code = err.code || 500;
440
+ // let error_message = err.error || `Unable to get integration for ${data.llm}`;
441
+ // return res.status(error_code).send({ success: false, error: error_message });
442
+ // }
443
+ // }
444
+ // else if (data.llm === 'vllm') {
445
+ // data.gptkey = process.env.GPTKEY;
446
+ // try {
447
+ // vllm_integration = await integrationService.getIntegration(project_id, 'vllm')
448
+ // } catch (err) {
449
+ // let error_code = err.code || 500;
450
+ // let error_message = err.error || `Unable to get integration for ${data.llm}`;
451
+ // return res.status(error_code).send({ success: false, error: error_message });
452
+ // }
453
+ // } else {
454
+ // try {
455
+ // let key = await integrationService.getKeyFromIntegration(project_id, data.llm);
456
+
457
+ // if (!key) {
458
+ // if (data.llm === 'openai') {
459
+ // data.gptkey = process.env.GPTKEY;
460
+ // publicKey = true;
461
+ // } else {
462
+ // return res.status(404).send({ success: false, error: `Invalid or empty key provided for ${data.llm}` });
463
+ // }
464
+ // } else {
465
+ // data.gptkey = key;
466
+ // }
467
+
468
+ // } catch (err) {
469
+ // let error_code = err.code || 500;
470
+ // let error_message = err.error || `Unable to get integration for ${data.llm}`;
471
+ // return res.status(error_code).send({ success: false, error: error_message });
472
+ // }
473
+ // }
474
+
475
+ // let obj = { createdAt: new Date() };
476
+
477
+ // let quoteManager = req.app.get('quote_manager');
478
+ // if (publicKey === true) {
479
+ // let isAvailable = await quoteManager.checkQuote(req.project, obj, 'tokens');
480
+ // if (isAvailable === false) {
481
+ // return res.status(403).send({ success: false, message: "Tokens quota exceeded", error_code: 13001})
482
+ // }
483
+ // }
484
+
485
+ // // Check if "Advanced Mode" is active. In such case the default_context must be not appended
486
+ // if (!data.advancedPrompt) {
487
+ // const contextTemplate = contexts[data.model] || contexts["general"];
488
+ // if (data.system_context) {
489
+ // data.system_context = data.system_context + " \n" + contextTemplate;
490
+ // } else {
491
+ // data.system_context = contextTemplate;
492
+ // }
493
+ // }
494
+
495
+ // data.engine = namespace.engine || default_engine;
496
+ // data.embedding = namespace.embedding || default_embedding;
497
+ // data.embedding.api_key = process.env.EMBEDDING_API_KEY || process.env.GPTKEY;
498
+
499
+ // if (namespace.hybrid === true) {
500
+ // data.search_type = 'hybrid';
501
+
502
+ // if (data.reranking === true) {
503
+ // data.reranking_multiplier = 3;
504
+ // data.reranker_model = "cross-encoder/ms-marco-MiniLM-L-6-v2";
505
+ // }
506
+ // }
507
+
508
+ // if (data.llm === 'ollama') {
509
+ // if (!ollama_integration.value.url) {
510
+ // return res.status(422).send({ success: false, error: "Server url for ollama is empty or invalid"})
511
+ // }
512
+ // data.model = {
513
+ // name: data.model,
514
+ // url: ollama_integration.value.url,
515
+ // provider: 'ollama'
516
+ // }
517
+ // data.stream = false;
518
+ // }
519
+
520
+ // if (data.llm === 'vllm') {
521
+ // if (!vllm_integration.value.url) {
522
+ // return res.status(422).send({ success: false, error: "Server url for vllm is empty or invalid"})
523
+ // }
524
+ // data.model = {
525
+ // name: data.model,
526
+ // url: vllm_integration.value.url,
527
+ // provider: 'vllm'
528
+ // }
529
+ // data.stream = false;
530
+ // }
531
+
532
+ // delete data.advancedPrompt;
533
+ // winston.verbose("ask data: ", data);
534
+
535
+ // if (process.env.NODE_ENV === 'test') {
536
+ // return res.status(200).send({ success: true, message: "Question skipped in test environment", data: data });
537
+ // }
538
+
539
+ // data.debug = true;
540
+
541
+ // aiService.askNamespace(data).then((resp) => {
542
+ // winston.debug("qa resp: ", resp.data);
543
+ // let answer = resp.data;
544
+
545
+ // if (publicKey === true) {
546
+ // let multiplier = MODELS_MULTIPLIER[data.model];
547
+ // if (!multiplier) {
548
+ // multiplier = 1;
549
+ // winston.info("No multiplier found for AI model")
550
+ // }
551
+ // obj.multiplier = multiplier;
552
+ // obj.tokens = answer.prompt_token_size;
553
+
554
+ // let incremented_key = quoteManager.incrementTokenCount(req.project, obj);
555
+ // winston.verbose("incremented_key: ", incremented_key);
556
+ // }
557
+
558
+ // return res.status(200).send(answer);
559
+
560
+ // }).catch((err) => {
561
+ // winston.error("qa err: ", err);
562
+ // winston.error("qa err.response: ", err.response);
563
+ // if (err.response && err.response.status) {
564
+ // let status = err.response.status;
565
+ // res.status(status).send({ success: false, statusText: err.response.statusText, error: err.response.data.detail });
566
+ // }
567
+ // else {
568
+ // res.status(500).send({ success: false, error: err });
569
+ // }
570
+
571
+ // })
572
+ // })
573
+
440
574
  router.delete('/delete', async (req, res) => {
441
575
 
442
576
  let project_id = req.projectid;
443
577
  let data = req.body;
444
578
  winston.debug("/delete data: ", data);
445
579
 
446
- let namespaces = await Namespace.find({ id_project: project_id }).catch((err) => {
447
- winston.error("find namespaces error: ", err)
448
- res.status(500).send({ success: false, error: err })
449
- })
450
-
451
- if (!namespaces || namespaces.length == 0) {
452
- let alert = "No namespace found for the selected project " + project_id + ". Cannot add content to a non-existent namespace."
453
- winston.warn(alert);
454
- res.status(403).send(alert);
580
+ let namespace;
581
+ try {
582
+ namespace = await aiManager.checkNamespace(project_id, namespace_id);
583
+ } catch (err) {
584
+ let errorCode = err?.errorCode ?? 500;
585
+ return res.status(errorCode).send({ success: false, error: err.error });
455
586
  }
456
587
 
457
- let namespaceIds = namespaces.map(namespace => namespace.id);
458
-
459
- if (!namespaceIds.includes(data.namespace)) {
460
- return res.status(403).send({ success: false, error: "Not allowed. The namespace does not belong to the current project." })
461
- }
462
-
463
- let ns = namespaces.find(n => n.id === data.namespace);
464
- data.engine = ns.engine || default_engine;
588
+ data.engine = namespace.engine || default_engine;
465
589
 
466
590
  aiService.deleteIndex(data).then((resp) => {
467
591
  winston.debug("delete resp: ", resp.data);
@@ -480,25 +604,15 @@ router.delete('/deleteall', async (req, res) => {
480
604
  let data = req.body;
481
605
  winston.debug('/delete all data: ', data);
482
606
 
483
- let namespaces = await Namespace.find({ id_project: project_id }).catch((err) => {
484
- winston.error("find namespaces error: ", err)
485
- res.status(500).send({ success: false, error: err })
486
- })
487
-
488
- if (!namespaces || namespaces.length == 0) {
489
- let alert = "No namespace found for the selected project " + project_id + ". Cannot add content to a non-existent namespace."
490
- winston.warn(alert);
491
- res.status(403).send(alert);
492
- }
493
-
494
- let namespaceIds = namespaces.map(namespace => namespace.id);
495
-
496
- if (!namespaceIds.includes(data.namespace)) {
497
- return res.status(403).send({ success: false, error: "Not allowed. The namespace does not belong to the current project." })
607
+ let namespace;
608
+ try {
609
+ namespace = await aiManager.checkNamespace(project_id, namespace_id);
610
+ } catch (err) {
611
+ let errorCode = err?.errorCode ?? 500;
612
+ return res.status(errorCode).send({ success: false, error: err.error });
498
613
  }
499
614
 
500
- let ns = namespaces.find(n => n.id === data.namespace);
501
- data.engine = ns.engine || default_engine;
615
+ data.engine = namespace.engine || default_engine;
502
616
 
503
617
  winston.verbose("/deleteall data: ", data);
504
618
 
@@ -545,7 +659,8 @@ router.get('/namespace/all', async (req, res) => {
545
659
  name: "Default",
546
660
  preview_settings: default_preview_settings,
547
661
  default: true,
548
- engine: default_engine
662
+ engine: default_engine,
663
+ embedding: default_embedding
549
664
  })
550
665
 
551
666
  new_namespace.save((err, savedNamespace) => {
@@ -591,14 +706,12 @@ router.get('/namespace/:id/chunks/:content_id', async (req, res) => {
591
706
  let namespace_id = req.params.id;
592
707
  let content_id = req.params.content_id;
593
708
 
594
- let namespaces = await Namespace.find({ id_project: project_id }).catch((err) => {
595
- winston.error("find namespaces error: ", err)
596
- return res.status(500).send({ success: false, error: err })
597
- })
598
-
599
- let namespaceIds = namespaces.map(namespace => namespace.id);
600
- if (!namespaceIds.includes(namespace_id)) {
601
- return res.status(403).send({ success: false, error: "Not allowed. The namespace does not belong to the current project." })
709
+ let namespace;
710
+ try {
711
+ namespace = await aiManager.checkNamespace(project_id, namespace_id);
712
+ } catch (err) {
713
+ let errorCode = err?.errorCode ?? 500;
714
+ return res.status(errorCode).send({ success: false, error: err.error });
602
715
  }
603
716
 
604
717
  let content = await KB.find({ id_project: project_id, namespace: namespace_id, _id: content_id }).catch((err) => {
@@ -610,9 +723,7 @@ router.get('/namespace/:id/chunks/:content_id', async (req, res) => {
610
723
  return res.status(403).send({ success: false, error: "Not allowed. The content does not belong to the current namespace." })
611
724
  }
612
725
 
613
- let ns = namespaces.find(n => n.id === namespace_id);
614
- let engine = ns.engine || default_engine;
615
- delete engine._id;
726
+ let engine = namespace.engine || default_engine;
616
727
 
617
728
  if (process.env.NODE_ENV === 'test') {
618
729
  return res.status(200).send({ success: true, message: "Get chunks skipped in test environment"});
@@ -625,8 +736,8 @@ router.get('/namespace/:id/chunks/:content_id', async (req, res) => {
625
736
  return res.status(200).send(chunks);
626
737
 
627
738
  }).catch((err) => {
628
- console.error("error getting content chunks err.response: ", err.response)
629
- console.error("error getting content chunks err.data: ", err.data)
739
+ winston.error("error getting content chunks err.response: ", err.response)
740
+ winston.error("error getting content chunks err.data: ", err.data)
630
741
  return res.status(500).send({ success: false, error: err });
631
742
  })
632
743
 
@@ -639,14 +750,12 @@ router.get('/namespace/:id/chatbots', async (req, res) => {
639
750
 
640
751
  let chatbotsArray = [];
641
752
 
642
- let namespaces = await Namespace.find({ id_project: project_id }).catch((err) => {
643
- winston.error("find namespaces error: ", err)
644
- res.status(500).send({ success: false, error: err })
645
- })
646
-
647
- let namespaceIds = namespaces.map(namespace => namespace.id);
648
- if (!namespaceIds.includes(namespace_id)) {
649
- return res.status(403).send({ success: false, error: "Not allowed. The namespace does not belong to the current project." })
753
+ let namespace;
754
+ try {
755
+ namespace = await aiManager.checkNamespace(project_id, namespace_id);
756
+ } catch (err) {
757
+ let errorCode = err?.errorCode ?? 500;
758
+ return res.status(errorCode).send({ success: false, error: err.error });
650
759
  }
651
760
 
652
761
  let intents = await faq.find({ id_project: project_id, 'actions.namespace': namespace_id }).catch((err) => {
@@ -699,14 +808,12 @@ router.get('/namespace/export/:id', async (req, res) => {
699
808
 
700
809
  query.type = { $in: [ kbTypes.URL, kbTypes.TEXT, kbTypes.FAQ ] };
701
810
 
702
- let namespace = await Namespace.findOne({ id: namespace_id}).catch((err) => {
703
- winston.error("Error getting namepsace for export ", err);
704
- return res.status(500).send({ success: false, error: "Unable to get namespace with id " + namespace_id })
705
- })
706
-
707
- if (!namespace) {
708
- winston.warn("No namespace found with id ", namespace_id);
709
- return res.status(404).send({ success: false, error: "No namespace found with id " + namespace_id })
811
+ let namespace;
812
+ try {
813
+ namespace = await aiManager.checkNamespace(project_id, namespace_id);
814
+ } catch (err) {
815
+ let errorCode = err?.errorCode ?? 500;
816
+ return res.status(errorCode).send({ success: false, error: err.error });
710
817
  }
711
818
 
712
819
  let name = namespace.name;
@@ -718,7 +825,7 @@ router.get('/namespace/export/:id', async (req, res) => {
718
825
  })
719
826
 
720
827
  try {
721
- let filename = await generateFilename(name);
828
+ let filename = await aiManager.generateFilename(name);
722
829
  let json = {
723
830
  name: name,
724
831
  preview_settings: preview_settings,
@@ -765,7 +872,8 @@ router.post('/namespace', async (req, res) => {
765
872
  name: body.name,
766
873
  hybrid: hybrid,
767
874
  preview_settings: default_preview_settings,
768
- engine: engine
875
+ engine: engine,
876
+ embedding: default_embedding
769
877
  })
770
878
 
771
879
  let namespaces = await Namespace.find({ id_project: project_id }).catch((err) => {
@@ -779,7 +887,6 @@ router.post('/namespace', async (req, res) => {
779
887
  let quoteManager = req.app.get('quote_manager');
780
888
  let limits = await quoteManager.getPlanLimits(req.project);
781
889
  let ns_limit = limits.namespace;
782
- //console.log("Limit of namespaces for current plan " + ns_limit);
783
890
 
784
891
  if (namespaces.length >= ns_limit) {
785
892
  return res.status(403).send({ success: false, error: "Maximum number of resources reached for the current plan", plan_limit: ns_limit });
@@ -940,7 +1047,7 @@ router.post('/namespace/import/:id', upload.single('uploadFile'), async (req, re
940
1047
  winston.verbose("resources to be sent to worker: ", resources);
941
1048
 
942
1049
  if (process.env.NODE_ENV !== "test") {
943
- scheduleScrape(resources, hybrid);
1050
+ aiManager.scheduleScrape(resources, hybrid);
944
1051
  }
945
1052
 
946
1053
  res.status(200).send({ success: true, message: "Contents imported successfully" });
@@ -972,7 +1079,6 @@ router.post('/namespace/import/:id', upload.single('uploadFile'), async (req, re
972
1079
 
973
1080
  })
974
1081
 
975
-
976
1082
  router.put('/namespace/:id', async (req, res) => {
977
1083
 
978
1084
  let namespace_id = req.params.id;
@@ -1228,6 +1334,10 @@ router.get('/:kb_id', async (req, res) => {
1228
1334
  return res.status(500).send({ success: false, error: err });
1229
1335
  }
1230
1336
 
1337
+ if (!kb) {
1338
+ return res.status(404).send({ success: false, error: "Content not found with id " + kb_id });
1339
+ }
1340
+
1231
1341
  return res.status(200).send(kb);
1232
1342
  })
1233
1343
  })
@@ -1236,45 +1346,28 @@ router.post('/', async (req, res) => {
1236
1346
 
1237
1347
  let project_id = req.projectid;
1238
1348
  let body = req.body;
1349
+ let namespace_id = body.namespace;
1239
1350
 
1240
1351
  if (!body.namespace) {
1241
1352
  return res.status(400).send({ success: false, error: "parameter 'namespace' is not defined" });
1242
1353
  }
1243
1354
 
1244
- let namespaces = await Namespace.find({ id_project: project_id }).catch((err) => {
1245
- winston.error("find namespaces error: ", err)
1246
- res.status(500).send({ success: false, error: err })
1247
- })
1248
-
1249
- if (!namespaces || namespaces.length == 0) {
1250
- let alert = "No namespace found for the selected project " + project_id + ". Cannot add content to a non-existent namespace."
1251
- winston.warn(alert);
1252
- res.status(403).send(alert);
1253
- }
1254
-
1255
- let namespaceIds = namespaces.map(namespace => namespace.id);
1256
-
1257
- if (!namespaceIds.includes(body.namespace)) {
1258
- return res.status(403).send({ success: false, error: "Not allowed. The namespace does not belong to the current project." })
1355
+ let namespace;
1356
+ try {
1357
+ namespace = await aiManager.checkNamespace(project_id, namespace_id);
1358
+ } catch (err) {
1359
+ let errorCode = err?.errorCode ?? 500;
1360
+ return res.status(errorCode).send({ success: false, error: err.error });
1259
1361
  }
1260
1362
 
1261
1363
  let quoteManager = req.app.get('quote_manager');
1262
- let limits = await quoteManager.getPlanLimits(req.project);
1263
- let kbs_limit = limits.kbs;
1264
- winston.verbose("Limit of kbs for current plan: " + kbs_limit);
1265
-
1266
- let kbs_count = await KB.countDocuments({ id_project: project_id }).exec();
1267
- winston.verbose("Kbs count: " + kbs_count);
1268
-
1269
- if (kbs_count >= kbs_limit) {
1270
- return res.status(403).send({ success: false, error: "Maximum number of resources reached for the current plan", plan_limit: kbs_limit })
1364
+ try {
1365
+ await aiManager.checkQuotaAvailability(quoteManager, req.project, 1)
1366
+ } catch(err) {
1367
+ let errorCode = err?.errorCode ?? 500;
1368
+ return res.status(errorCode).send({ success: false, error: err.error, plan_limit: err.plan_limit })
1271
1369
  }
1272
1370
 
1273
- let total_count = kbs_count + 1;
1274
- if (total_count > kbs_limit) {
1275
- return res.status(403).send({ success: false, error: "Cannot exceed the number of resources in the current plan", plan_limit: kbs_limit })
1276
- }
1277
-
1278
1371
  let new_kb = {
1279
1372
  id_project: project_id,
1280
1373
  name: body.name,
@@ -1294,7 +1387,7 @@ router.post('/', async (req, res) => {
1294
1387
  new_kb.refresh_rate = body.refresh_rate;
1295
1388
  if (!body.scrape_type || body.scrape_type === 2) {
1296
1389
  new_kb.scrape_type = 2;
1297
- new_kb.scrape_options = await setDefaultScrapeOptions();
1390
+ new_kb.scrape_options = await aiManager.setDefaultScrapeOptions();
1298
1391
  } else {
1299
1392
  new_kb.scrape_type = body.scrape_type;
1300
1393
  new_kb.scrape_options = body.scrape_options;
@@ -1303,19 +1396,18 @@ router.post('/', async (req, res) => {
1303
1396
 
1304
1397
  winston.debug("adding kb: ", new_kb);
1305
1398
 
1306
- KB.findOneAndUpdate({ id_project: project_id, type: 'url', source: new_kb.source }, new_kb, { upsert: true, new: true, rawResult: true }, async (err, raw) => {
1399
+ KB.findOneAndUpdate({ id_project: project_id, type: 'url', source: new_kb.source }, new_kb, { upsert: true, new: true, rawResult: true }, async (err, raw_content) => {
1307
1400
  if (err) {
1308
1401
  winston.error("findOneAndUpdate with upsert error: ", err);
1309
1402
  res.status(500).send({ success: false, error: err });
1310
1403
  }
1311
1404
  else {
1312
1405
 
1313
- delete raw.ok;
1314
- delete raw.$clusterTime;
1315
- delete raw.operationTime;
1316
- res.status(200).send(raw);
1317
-
1318
- let saved_kb = raw.value;
1406
+ delete raw_content.ok;
1407
+ delete raw_content.$clusterTime;
1408
+ delete raw_content.operationTime;
1409
+
1410
+ let saved_kb = raw_content.value;
1319
1411
  let webhook = apiUrl + '/webhook/kb/status?token=' + KB_WEBHOOK_TOKEN;
1320
1412
 
1321
1413
  let json = {
@@ -1337,17 +1429,20 @@ router.post('/', async (req, res) => {
1337
1429
  if (saved_kb.scrape_options) {
1338
1430
  json.parameters_scrape_type_4 = saved_kb.scrape_options;
1339
1431
  }
1340
- let ns = namespaces.find(n => n.id === body.namespace);
1341
- json.engine = ns.engine || default_engine;
1342
- json.hybrid = ns.hybrid;
1343
-
1432
+ json.engine = namespace.engine || default_engine;
1433
+ json.embedding = namespace.embedding || default_embedding;
1434
+ json.hybrid = namespace.hybrid;
1435
+
1344
1436
  let resources = [];
1345
1437
 
1346
1438
  resources.push(json);
1347
1439
 
1348
- if (process.env.NODE_ENV !== 'test') {
1349
- scheduleScrape(resources, ns.hybrid);
1440
+ if (process.env.NODE_ENV === 'test') {
1441
+ return res.status(200).send({ success: true, message: "Schedule scrape skipped in test environment", data: raw_content, schedule_json: json });
1350
1442
  }
1443
+
1444
+ aiManager.scheduleScrape(resources, ns.hybrid);
1445
+ return res.status(200).send(raw_content);
1351
1446
 
1352
1447
  }
1353
1448
  })
@@ -1365,47 +1460,32 @@ router.post('/multi', upload.single('uploadFile'), async (req, res) => {
1365
1460
  }
1366
1461
 
1367
1462
  let project_id = req.projectid;
1368
- let scrape_type = req.body.scrape_type;
1369
- let scrape_options = req.body.scrape_options;
1370
1463
  let refresh_rate = req.body.refresh_rate;
1464
+ let scrape_type = req.body.scrape_type ?? 2;
1465
+ let scrape_options = req.body.scrape_options;
1466
+ if (scrape_type === 2 && scrape_options == null) {
1467
+ scrape_options = aiManager.setDefaultScrapeOptions();
1468
+ }
1371
1469
 
1372
1470
  let namespace_id = req.query.namespace;
1373
1471
  if (!namespace_id) {
1374
1472
  return res.status(400).send({ success: false, error: "queryParam 'namespace' is not defined" })
1375
1473
  }
1376
1474
 
1377
- let namespaces = await Namespace.find({ id_project: project_id }).catch((err) => {
1378
- winston.error("find namespaces error: ", err)
1379
- res.status(500).send({ success: false, error: err })
1380
- })
1381
-
1382
- if (!namespaces || namespaces.length == 0) {
1383
- let alert = "No namespace found for the selected project " + project_id + ". Cannot add content to a non-existent namespace."
1384
- winston.warn(alert);
1385
- res.status(403).send({ success: false, error: alert });
1386
- }
1387
-
1388
- let namespaceIds = namespaces.map(namespace => namespace.id);
1389
-
1390
- if (!namespaceIds.includes(namespace_id)) {
1391
- return res.status(403).send({ success: false, error: "Not allowed. The namespace does not belong to the current project." })
1475
+ let namespace;
1476
+ try {
1477
+ namespace = await aiManager.checkNamespace(project_id, namespace_id);
1478
+ } catch (err) {
1479
+ let errorCode = err?.errorCode ?? 500;
1480
+ return res.status(errorCode).send({ success: false, error: err.error });
1392
1481
  }
1393
1482
 
1394
1483
  let quoteManager = req.app.get('quote_manager');
1395
- let limits = await quoteManager.getPlanLimits(req.project);
1396
- let kbs_limit = limits.kbs;
1397
- winston.verbose("Limit of kbs for current plan: " + kbs_limit);
1398
-
1399
- let kbs_count = await KB.countDocuments({ id_project: project_id }).exec();
1400
- winston.verbose("Kbs count: " + kbs_count);
1401
-
1402
- if (kbs_count >= kbs_limit) {
1403
- return res.status(403).send({ success: false, error: "Maximum number of resources reached for the current plan", plan_limit: kbs_limit })
1404
- }
1405
-
1406
- let total_count = kbs_count + list.length;
1407
- if (total_count > kbs_limit) {
1408
- return res.status(403).send({ success: false, error: "Cannot exceed the number of resources in the current plan", plan_limit: kbs_limit })
1484
+ try {
1485
+ await aiManager.checkQuotaAvailability(quoteManager, req.project, list.length)
1486
+ } catch(err) {
1487
+ let errorCode = err?.errorCode ?? 500;
1488
+ return res.status(errorCode).send({ success: false, error: err.error, plan_limit: err.plan_limit })
1409
1489
  }
1410
1490
 
1411
1491
  if (list.length > 300) {
@@ -1413,81 +1493,27 @@ router.post('/multi', upload.single('uploadFile'), async (req, res) => {
1413
1493
  return res.status(403).send({ success: false, error: "Too many urls. Can't index more than 300 urls at a time." })
1414
1494
  }
1415
1495
 
1416
- let webhook = apiUrl + '/webhook/kb/status?token=' + KB_WEBHOOK_TOKEN;
1417
-
1418
- let kbs = [];
1419
- list.forEach( async (url) => {
1420
- let kb = {
1421
- id_project: project_id,
1422
- name: url,
1423
- source: url,
1424
- type: 'url',
1425
- content: "",
1426
- namespace: namespace_id,
1427
- status: -1,
1428
- scrape_type: scrape_type,
1429
- refresh_rate: refresh_rate
1430
- }
1431
-
1432
- if (!kb.scrape_type) {
1433
- scrape_type = 2;
1434
- }
1435
-
1436
- if (scrape_type == 2) {
1437
- kb.scrape_options = {
1438
- tags_to_extract: ["body"],
1439
- unwanted_tags: [],
1440
- unwanted_classnames: []
1441
- }
1442
- } else {
1443
- kb.scrape_options = scrape_options;
1444
- }
1445
- // if (scrape_type === 2) {
1446
- // kb.scrape_options = await setDefaultScrapeOptions();
1447
- // } else {
1448
- // kb.scrape_options = await setCustomScrapeOptions(scrape_options);
1449
- // }
1450
- kbs.push(kb)
1451
- })
1452
-
1453
- let operations = kbs.map(doc => {
1454
- return {
1455
- updateOne: {
1456
- filter: { id_project: doc.id_project, type: 'url', source: doc.source, namespace: namespace_id },
1457
- update: doc,
1458
- upsert: true,
1459
- returnOriginal: false
1460
- }
1461
- }
1462
- })
1463
-
1464
- saveBulk(operations, kbs, project_id, namespace_id).then((result) => {
1465
-
1466
- let ns = namespaces.find(n => n.id === namespace_id);
1467
- let engine = ns.engine || default_engine;
1468
- let hybrid = ns.hybrid;
1469
-
1470
- let resources = result.map(({ name, status, __v, createdAt, updatedAt, id_project, ...keepAttrs }) => keepAttrs)
1471
- resources = resources.map(({ _id, scrape_options, ...rest }) => {
1472
- return { id: _id, webhook: webhook, parameters_scrape_type_4: scrape_options, engine: engine, hybrid: hybrid, ...rest}
1473
- });
1474
- winston.verbose("resources to be sent to worker: ", resources);
1475
-
1476
- if (process.env.NODE_ENV !== 'test') {
1477
- scheduleScrape(resources, hybrid);
1478
- }
1479
- res.status(200).send(result);
1496
+ const options = {
1497
+ scrape_type: scrape_type,
1498
+ scrape_options: scrape_options,
1499
+ refresh_rate: refresh_rate
1500
+ }
1480
1501
 
1481
- }).catch((err) => {
1482
- winston.error("Unable to save kbs in bulk ", err)
1483
- res.status(500).send(err);
1484
- })
1502
+ let result;
1503
+ try {
1504
+ result = await aiManager.addMultipleUrls(namespace, list, options);
1505
+ return res.status(200).send(result);
1506
+ } catch (err) {
1507
+ winston.error("addMultipleUrls error: ", err)
1508
+ return res.status(500).send({ success: false, error: "Unable to add multiple urls due to an error." });
1509
+ }
1485
1510
 
1486
1511
  })
1487
1512
 
1488
1513
  router.post('/csv', upload.single('uploadFile'), async (req, res) => {
1489
1514
 
1490
1515
  let project_id = req.projectid;
1516
+ let namespace_id = req.query.namespace;
1491
1517
 
1492
1518
  let csv = req.file.buffer.toString('utf8');
1493
1519
  winston.debug("csv: ", csv);
@@ -1495,38 +1521,12 @@ router.post('/csv', upload.single('uploadFile'), async (req, res) => {
1495
1521
  let delimiter = req.body.delimiter || ";";
1496
1522
  winston.debug("delimiter: ", delimiter);
1497
1523
 
1498
- let namespace_id = req.query.namespace;
1499
- if (!namespace_id) {
1500
- return res.status(400).send({ success: false, error: "queryParam 'namespace' is not defined" })
1501
- }
1502
-
1503
- let namespaces = await Namespace.find({ id_project: project_id }).catch((err) => {
1504
- winston.error("find namespaces error: ", err)
1505
- res.status(500).send({ success: false, error: err })
1506
- })
1507
-
1508
- if (!namespaces || namespaces.length == 0) {
1509
- let alert = "No namespace found for the selected project " + project_id + ". Cannot add content to a non-existent namespace."
1510
- winston.warn(alert);
1511
- res.status(403).send({ success: false, error: alert });
1512
- }
1513
-
1514
- let namespaceIds = namespaces.map(namespace => namespace.id);
1515
-
1516
- if (!namespaceIds.includes(namespace_id)) {
1517
- return res.status(403).send({ success: false, error: "Not allowed. The namespace does not belong to the current project." })
1518
- }
1519
-
1520
- let quoteManager = req.app.get('quote_manager');
1521
- let limits = await quoteManager.getPlanLimits(req.project);
1522
- let kbs_limit = limits.kbs;
1523
- winston.verbose("Limit of kbs for current plan: " + kbs_limit);
1524
-
1525
- let kbs_count = await KB.countDocuments({ id_project: project_id }).exec();
1526
- winston.verbose("Kbs count: " + kbs_count);
1527
-
1528
- if (kbs_count >= kbs_limit) {
1529
- return res.status(403).send({ success: false, error: "Maximum number of resources reached for the current plan", plan_limit: kbs_limit })
1524
+ let namespace;
1525
+ try {
1526
+ namespace = await aiManager.checkNamespace(project_id, namespace_id);
1527
+ } catch (err) {
1528
+ let errorCode = err?.errorCode ?? 500;
1529
+ return res.status(errorCode).send({ success: false, error: err.error });
1530
1530
  }
1531
1531
 
1532
1532
  let webhook = apiUrl + '/webhook/kb/status?token=' + KB_WEBHOOK_TOKEN;
@@ -1549,16 +1549,15 @@ router.post('/csv', upload.single('uploadFile'), async (req, res) => {
1549
1549
  status: -1
1550
1550
  })
1551
1551
  })
1552
- .on("end", () => {
1552
+ .on("end", async () => {
1553
1553
  winston.debug("kbs after CSV parsing: ", kbs);
1554
1554
 
1555
- let total_count = kbs_count + kbs.length;
1556
- if (total_count >= kbs_limit) {
1557
- return res.status(403).send({ success: false, error: "Cannot exceed the number of resources in the current plan", plan_limit: kbs_limit })
1558
- }
1559
-
1560
- if (kbs.length > 300) {
1561
- return res.status(403).send({ success: false, error: "Too many faqs. Can't index more than 300 urls at a time." })
1555
+ try {
1556
+ let quoteManager = req.app.get('quote_manager');
1557
+ await aiManager.checkQuotaAvailability(quoteManager, req.project, kbs.length)
1558
+ } catch(err) {
1559
+ let errorCode = err?.errorCode ?? 500;
1560
+ return res.status(errorCode).send({ success: false, error: err.error, plan_limit: err.plan_limit })
1562
1561
  }
1563
1562
 
1564
1563
  let operations = kbs.map(doc => {
@@ -1572,21 +1571,25 @@ router.post('/csv', upload.single('uploadFile'), async (req, res) => {
1572
1571
  }
1573
1572
  })
1574
1573
 
1575
- saveBulk(operations, kbs, project_id, namespace_id).then((result) => {
1574
+ aiManager.saveBulk(operations, kbs, project_id, namespace_id).then((result) => {
1576
1575
 
1577
- let ns = namespaces.find(n => n.id === namespace_id);
1578
- let engine = ns.engine || default_engine;
1579
- let hybrid = ns.hybrid;
1576
+ let engine = namespace.engine || default_engine;
1577
+ let embedding = namespace.embedding || default_embedding;
1578
+ let hybrid = namespace.hybrid;
1580
1579
 
1581
1580
  let resources = result.map(({ name, status, __v, createdAt, updatedAt, id_project, ...keepAttrs }) => keepAttrs)
1582
1581
  resources = resources.map(({ _id, ...rest}) => {
1583
- return { id: _id, webhooh: webhook, engine: engine, ...rest };
1582
+ return { id: _id, webhook: webhook, embedding: embedding, engine: engine, ...rest };
1584
1583
  })
1585
1584
  winston.verbose("resources to be sent to worker: ", resources);
1586
- if (process.env.NODE_ENV !== 'test') {
1587
- scheduleScrape(resources, hybrid);
1585
+
1586
+ if (process.env.NODE_ENV === 'test') {
1587
+ return res.status(200).send({ success: true, message: "Schedule scrape skipped in test environment", data: result, schedule_json: resources });
1588
1588
  }
1589
- res.status(200).send(result);
1589
+
1590
+ aiManager.scheduleScrape(resources, hybrid);
1591
+ return res.status(200).send(result);
1592
+
1590
1593
  }).catch((err) => {
1591
1594
  winston.error("Unabled to saved kbs in bulk " + err);
1592
1595
  res.status(500).send(err);
@@ -1607,7 +1610,7 @@ router.post('/sitemap', async (req, res) => {
1607
1610
  const sitemap = new Sitemapper({
1608
1611
  url: sitemap_url,
1609
1612
  timeout: 15000,
1610
- debug: true
1613
+ debug: false
1611
1614
  });
1612
1615
 
1613
1616
  sitemap.fetch().then((data) => {
@@ -1621,6 +1624,114 @@ router.post('/sitemap', async (req, res) => {
1621
1624
 
1622
1625
  })
1623
1626
 
1627
+ router.post('/sitemap/import', async (req, res) => {
1628
+
1629
+ let project_id = req.projectid;
1630
+ let namespace_id = req.query.namespace;
1631
+ let content = req.body;
1632
+
1633
+ if (content.type !== "sitemap") {
1634
+ return res.status(403).send({success: false, error: "Endpoint available for sitemap type only." });
1635
+ }
1636
+
1637
+ if (!namespace_id) {
1638
+ return res.status(400).send({ success: false, error: "queryParam 'namespace' is not defined" })
1639
+ }
1640
+
1641
+ let namespace;
1642
+ try {
1643
+ namespace = await aiManager.checkNamespace(project_id, namespace_id);
1644
+ } catch (err) {
1645
+ let errorCode = err?.errorCode ?? 500;
1646
+ return res.status(errorCode).send({ success: false, error: err.error });
1647
+ }
1648
+
1649
+ let sitemap_url = req.body.source;
1650
+
1651
+ // let quoteManager = req.app.get('quote_manager');
1652
+ // let limits = await quoteManager.getPlanLimits(req.project);
1653
+ // let kbs_limit = limits.kbs;
1654
+ // winston.verbose("Limit of kbs for current plan: " + kbs_limit);
1655
+
1656
+ // let kbs_count = await KB.countDocuments({ id_project: project_id }).exec();
1657
+ // winston.verbose("Kbs count: " + kbs_count);
1658
+
1659
+ const sitemap = new Sitemapper({
1660
+ url: sitemap_url,
1661
+ timeout: 15000,
1662
+ debug: false
1663
+ });
1664
+
1665
+ const data = await sitemap.fetch().catch((err) => {
1666
+ winston.error("Error fetching sitemap: ", err);
1667
+ return res.status(500).send({ success: false, error: err });
1668
+ })
1669
+
1670
+ if (data.errors && data.errors.length > 0) {
1671
+ winston.error("An error occurred during sitemap fetch: ", data.errors[0])
1672
+ return res.status(500).send({ success: false, error: "Unable to fecth sitemap due to an error: " + data.errors[0].message})
1673
+ }
1674
+
1675
+ const urls = Array.isArray(data.sites) ? data.sites : [];
1676
+ if (urls.length === 0) {
1677
+ return res.status(400).send({ success: false, error: "No url found on sitemap" });
1678
+ }
1679
+
1680
+ // let total_count = kbs_count + 1 + urls.length;
1681
+ // if (total_count > kbs_limit) {
1682
+ // return res.status(403).send({ success: false, error: "Cannot exceed the number of resources in the current plan", plan_limit: kbs_limit })
1683
+ // }
1684
+
1685
+ let refresh_rate = req.body.refresh_rate;
1686
+ let scrape_type = req.body.scrape_type ?? 2;
1687
+ let scrape_options = req.body.scrape_options;
1688
+ if (scrape_type === 2 && scrape_options == null) {
1689
+ scrape_options = aiManager.setDefaultScrapeOptions();
1690
+ }
1691
+
1692
+ let sitemap_content = {
1693
+ id_project: project_id,
1694
+ name: sitemap_url,
1695
+ source: sitemap_url,
1696
+ type: 'sitemap',
1697
+ content: "",
1698
+ namespace: namespace_id,
1699
+ scrape_type: scrape_type,
1700
+ scrape_options: scrape_options,
1701
+ refresh_rate: refresh_rate
1702
+ }
1703
+
1704
+ let saved_content;
1705
+ try {
1706
+ saved_content = await KB.findOneAndUpdate({ id_project: project_id, type: 'sitemap', source: sitemap_url, namespace: namespace_id }, sitemap_content, { upsert: true, new: true }).lean().exec();
1707
+ } catch (err) {
1708
+ winston.error("Error saving content: ", err);
1709
+ return res.status(500).send({ success: false, error: err });
1710
+ }
1711
+
1712
+ const options = {
1713
+ sitemap_origin_id: saved_content._id,
1714
+ sitemap_origin: saved_content.source,
1715
+ scrape_type: saved_content.scrape_type,
1716
+ scrape_options: saved_content.scrape_options,
1717
+ refresh_rate: saved_content.refresh_rate
1718
+ }
1719
+
1720
+ let result;
1721
+ try {
1722
+ result = await aiManager.addMultipleUrls(namespace, urls, options);
1723
+ if (process.env.NODE_ENV === 'test') {
1724
+ result.result.push(saved_content);
1725
+ return res.status(200).send(result);
1726
+ }
1727
+ result.push(saved_content);
1728
+ return res.status(200).send(result);
1729
+ } catch (err) {
1730
+ return res.status(500).send({ success: false, error: "Unable to add multiple urls from sitemap due to an error." });
1731
+ }
1732
+
1733
+ })
1734
+
1624
1735
  router.put('/:kb_id', async (req, res) => {
1625
1736
 
1626
1737
  let kb_id = req.params.kb_id;
@@ -1670,24 +1781,27 @@ router.delete('/:kb_id', async (req, res) => {
1670
1781
  winston.error("Unable to delete kb. Kb not found...")
1671
1782
  return res.status(404).send({ success: false, error: "Content not found" })
1672
1783
  }
1673
-
1784
+
1785
+ let namespace_id = kb.namespace ?? project_id;
1786
+
1787
+ let namespace;
1788
+ try {
1789
+ namespace = await aiManager.checkNamespace(project_id, namespace_id);
1790
+ } catch (err) {
1791
+ let errorCode = err?.errorCode ?? 500;
1792
+ return res.status(errorCode).send({ success: false, error: err.error });
1793
+ }
1794
+
1674
1795
  let data = {
1675
1796
  id: kb_id,
1676
- namespace: kb.namespace
1797
+ namespace: namespace_id
1677
1798
  }
1678
1799
 
1679
1800
  if (!data.namespace) {
1680
1801
  data.namespace = project_id;
1681
1802
  }
1682
-
1683
- let namespaces = await Namespace.find({ id_project: project_id }).catch((err) => {
1684
- winston.error("find namespaces error: ", err)
1685
- res.status(500).send({ success: false, error: err })
1686
- })
1687
-
1688
- let ns = namespaces.find(n => n.id === data.namespace);
1689
- data.engine = ns.engine || default_engine;
1690
-
1803
+
1804
+ data.engine = namespace.engine || default_engine;
1691
1805
  winston.verbose("/:delete_id data: ", data);
1692
1806
 
1693
1807
  aiService.deleteIndex(data).then((resp) => {
@@ -1733,196 +1847,4 @@ router.delete('/:kb_id', async (req, res) => {
1733
1847
  */
1734
1848
 
1735
1849
 
1736
- //----------------------------------------
1737
-
1738
-
1739
- /**
1740
- * ****************************************
1741
- * Utils Methods Section - Start
1742
- * ****************************************
1743
- */
1744
-
1745
- async function saveBulk(operations, kbs, project_id, namespace) {
1746
-
1747
- return new Promise((resolve, reject) => {
1748
- KB.bulkWrite(operations, { ordered: false }).then((result) => {
1749
- winston.verbose("bulkWrite operations result: ", result);
1750
-
1751
- KB.find({ id_project: project_id, namespace: namespace, source: { $in: kbs.map(kb => kb.source) } }).lean().then((documents) => {
1752
- winston.debug("documents: ", documents);
1753
- resolve(documents)
1754
- }).catch((err) => {
1755
- winston.error("Error finding documents ", err)
1756
- reject(err);
1757
- })
1758
-
1759
- }).catch((err) => {
1760
- reject(err);
1761
- })
1762
- })
1763
-
1764
- }
1765
-
1766
- async function statusConverter(status) {
1767
- return new Promise((resolve) => {
1768
-
1769
- let td_status;
1770
- switch (status) {
1771
- case 0:
1772
- td_status = -1;
1773
- break;
1774
- case 2:
1775
- td_status = 200;
1776
- break;
1777
- case 3:
1778
- td_status = 300;
1779
- break;
1780
- case 4:
1781
- td_status = 400;
1782
- break;
1783
- default:
1784
- td_status = -1
1785
- }
1786
- resolve(td_status);
1787
- })
1788
- }
1789
-
1790
- async function updateStatus(id, status) {
1791
- return new Promise((resolve) => {
1792
-
1793
- KB.findByIdAndUpdate(id, { status: status }, { new: true }, (err, updatedKb) => {
1794
- if (err) {
1795
- resolve(false)
1796
- } else if (!updatedKb) {
1797
- winston.verbose("Unable to update status. Data source not found.")
1798
- resolve(false)
1799
- } else {
1800
- winston.debug("updatedKb: ", updatedKb)
1801
- resolve(true);
1802
- }
1803
- })
1804
- })
1805
- }
1806
-
1807
- async function scheduleScrape(resources, hybrid) {
1808
-
1809
- let scheduler;
1810
- if (hybrid) {
1811
- scheduler = new Scheduler({ jobManager: jobManagerHybrid });
1812
- } else {
1813
- scheduler = new Scheduler({ jobManager: jobManager });
1814
- }
1815
-
1816
- if (!scheduler) {
1817
- winston.error("ScheduleScrape JobManager is not defined");
1818
- return false;
1819
- }
1820
-
1821
- resources.forEach(r => {
1822
- winston.debug("Schedule job with following data: ", r);
1823
- scheduler.trainSchedule(r, async (err, result) => {
1824
- let error_code = 100;
1825
- if (err) {
1826
- winston.error("Scheduling error: ", err);
1827
- error_code = 400;
1828
- } else {
1829
- winston.verbose("Scheduling result: ", result);
1830
- }
1831
- await updateStatus(r.id, error_code);
1832
- });
1833
- })
1834
-
1835
- return true;
1836
- }
1837
-
1838
- async function startScrape(data) {
1839
-
1840
- if (!data.gptkey) {
1841
- let gptkey = process.env.GPTKEY;
1842
- if (!gptkey) {
1843
- return { error: "GPT apikey undefined" }
1844
- }
1845
- data.gptkey = gptkey;
1846
- }
1847
-
1848
-
1849
- let status_updated = await updateStatus(data.id, 200);
1850
- winston.verbose("status of kb " + data.id + " updated: " + status_updated);
1851
-
1852
- return new Promise((resolve, reject) => {
1853
- aiService.singleScrape(data).then(async (resp) => {
1854
- winston.debug("singleScrape resp: ", resp.data);
1855
- let status_updated = await updateStatus(data.id, 300);
1856
- winston.verbose("status of kb " + data.id + " updated: " + status_updated);
1857
- resolve(resp.data);
1858
- }).catch( async (err) => {
1859
- winston.error("singleScrape err: ", err);
1860
- let status_updated = await updateStatus(data.id, 400);
1861
- winston.verbose("status of kb " + data.id + " updated: " + status_updated);
1862
- reject(err);
1863
- })
1864
- })
1865
- }
1866
-
1867
- async function getKeyFromIntegrations(project_id) {
1868
-
1869
- return new Promise( async (resolve) => {
1870
-
1871
- let integration = await Integration.findOne({ id_project: project_id, name: 'openai' }).catch((err) => {
1872
- winston.error("Unable to find openai integration for the current project " + project_id);
1873
- resolve(null);
1874
- })
1875
- if (integration && integration.value && integration.value.apikey) {
1876
- resolve(integration.value.apikey);
1877
- } else {
1878
- resolve(null);
1879
- }
1880
- })
1881
- }
1882
-
1883
- async function setDefaultScrapeOptions() {
1884
- return {
1885
- tags_to_extract: ["body"],
1886
- unwanted_tags: [],
1887
- unwanted_classnames: []
1888
- }
1889
- }
1890
-
1891
- async function setCustomScrapeOptions(options) {
1892
- if (!options) {
1893
- options = await setDefaultScrapeOptions();
1894
- } else {
1895
- if (!options.tags_to_extract || options.tags_to_extract.length == 0) {
1896
- options.tags_to_extract = ["body"];
1897
- }
1898
- if (!options.unwanted_tags) {
1899
- options.unwanted_tags = [];
1900
- }
1901
- if (!options.unwanted_classnames) {
1902
- options.unwanted_classnames = [];
1903
- }
1904
- }
1905
- }
1906
-
1907
- async function generateFilename(name) {
1908
- return name
1909
- .toLowerCase()
1910
- .trim()
1911
- .normalize("NFD") // Normalize characters with accents
1912
- .replace(/[\u0300-\u036f]/g, "") // Removes diacritics (e.g. à becomes a)
1913
- .replace(/[^a-z0-9\s-_]/g, "") // Remove special characters
1914
- .replace(/\s+/g, "-") // Replaces spaces with dashes
1915
- .replace(/_/g, "-")
1916
- .replace(/-+/g, "-"); // Removes consecutive hyphens
1917
- }
1918
-
1919
-
1920
- /**
1921
- * ****************************************
1922
- * Utils Methods Section - End
1923
- * ****************************************
1924
- */
1925
-
1926
-
1927
-
1928
1850
  module.exports = router;