@tiledesk/tiledesk-server 2.13.48 → 2.13.50
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +9 -1
- package/app.js +5 -3
- package/config/kb/embedding.js +7 -0
- package/config/kb/engine.hybrid.js +10 -0
- package/config/kb/engine.js +10 -0
- package/jobs.js +4 -1
- package/models/kb_setting.js +60 -15
- package/models/profile.js +54 -5
- package/models/request.js +1 -1
- package/package.json +4 -4
- package/pubmodules/apps/listener.js +2 -1
- package/routes/kb.js +548 -626
- package/routes/webhook.js +86 -38
- package/services/aiManager.js +464 -0
- package/services/aiService.js +4 -2
- package/test/kbRoute.js +956 -910
package/routes/kb.js
CHANGED
|
@@ -1,24 +1,23 @@
|
|
|
1
1
|
var express = require('express');
|
|
2
|
-
var { Namespace, KB, Engine } = require('../models/kb_setting');
|
|
3
|
-
// var { KB } = require('../models/kb_setting');
|
|
4
|
-
// var { Engine } = require('../models/kb_setting')
|
|
5
2
|
var router = express.Router();
|
|
6
3
|
var winston = require('../config/winston');
|
|
7
4
|
var multer = require('multer')
|
|
8
5
|
var upload = multer()
|
|
9
|
-
const aiService = require('../services/aiService');
|
|
10
6
|
const JobManager = require('../utils/jobs-worker-queue-manager/JobManagerV2');
|
|
11
|
-
const { Scheduler } = require('../services/Scheduler');
|
|
12
7
|
var configGlobal = require('../config/global');
|
|
13
|
-
const Sitemapper = require('sitemapper');
|
|
14
8
|
var mongoose = require('mongoose');
|
|
9
|
+
var parsecsv = require("fast-csv");
|
|
10
|
+
|
|
11
|
+
var { Namespace, KB } = require('../models/kb_setting');
|
|
15
12
|
const faq = require('../models/faq');
|
|
16
13
|
const faq_kb = require('../models/faq_kb');
|
|
17
|
-
let Integration = require('../models/integrations');
|
|
18
|
-
var parsecsv = require("fast-csv");
|
|
19
14
|
|
|
20
15
|
const { MODELS_MULTIPLIER } = require('../utils/aiUtils');
|
|
21
16
|
const { kbTypes } = require('../models/kbConstants');
|
|
17
|
+
const Sitemapper = require('sitemapper');
|
|
18
|
+
|
|
19
|
+
const aiService = require('../services/aiService');
|
|
20
|
+
const aiManager = require('../services/aiManager');
|
|
22
21
|
const integrationService = require('../services/integrationService');
|
|
23
22
|
|
|
24
23
|
const AMQP_MANAGER_URL = process.env.AMQP_MANAGER_URL;
|
|
@@ -68,7 +67,6 @@ jobManagerHybrid.connectAndStartPublisher((status, error) => {
|
|
|
68
67
|
}
|
|
69
68
|
})
|
|
70
69
|
|
|
71
|
-
|
|
72
70
|
let default_preview_settings = {
|
|
73
71
|
model: 'gpt-4o',
|
|
74
72
|
max_tokens: 256,
|
|
@@ -76,38 +74,25 @@ let default_preview_settings = {
|
|
|
76
74
|
top_k: 4,
|
|
77
75
|
alpha: 0.5,
|
|
78
76
|
context: null
|
|
79
|
-
//context: "You are an awesome AI Assistant."
|
|
80
|
-
}
|
|
81
|
-
let default_engine = {
|
|
82
|
-
name: "pinecone",
|
|
83
|
-
type: process.env.PINECONE_TYPE || "pod",
|
|
84
|
-
apikey: "",
|
|
85
|
-
vector_size: 1536,
|
|
86
|
-
index_name: process.env.PINECONE_INDEX
|
|
87
|
-
}
|
|
88
|
-
let default_engine_hybrid = {
|
|
89
|
-
name: "pinecone",
|
|
90
|
-
type: process.env.PINECONE_TYPE_HYBRID || "serverless",
|
|
91
|
-
apikey: "",
|
|
92
|
-
vector_size: 1536,
|
|
93
|
-
index_name: process.env.PINECONE_INDEX_HYBRID
|
|
94
77
|
}
|
|
95
78
|
|
|
96
|
-
|
|
97
|
-
|
|
79
|
+
const default_engine = require('../config/kb/engine');
|
|
80
|
+
const default_engine_hybrid = require('../config/kb/engine.hybrid');
|
|
81
|
+
const default_embedding = require('../config/kb/embedding');
|
|
82
|
+
|
|
98
83
|
let contexts = {
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
84
|
+
"gpt-3.5-turbo": process.env.GPT_3_5_CONTEXT || "You are an helpful assistant for question-answering tasks.\nUse ONLY the pieces of retrieved context delimited by #### and the chat history to answer the question.\nIf you don't know the answer, just say: \"I don't know<NOANS>\"\n\n####{context}####",
|
|
85
|
+
"gpt-4": process.env.GPT_4_CONTEXT || "You are an helpful assistant for question-answering tasks.\nUse ONLY the pieces of retrieved context delimited by #### and the chat history to answer the question.\nIf you don't know the answer, just say that you don't know.\nIf and only if none of the retrieved context is useful for your task, add this word to the end <NOANS>\n\n####{context}####",
|
|
86
|
+
"gpt-4-turbo-preview": process.env.GPT_4T_CONTEXT || "You are an helpful assistant for question-answering tasks.\nUse ONLY the pieces of retrieved context delimited by #### and the chat history to answer the question.\nIf you don't know the answer, just say that you don't know.\nIf and only if none of the retrieved context is useful for your task, add this word to the end <NOANS>\n\n####{context}####",
|
|
87
|
+
"gpt-4o": process.env.GPT_4O_CONTEXT || "You are an helpful assistant for question-answering tasks. Follow these steps carefully:\n1. Answer in the same language of the user question, regardless of the retrieved context language\n2. Use ONLY the pieces of the retrieved context and the chat history to answer the question.\n3. If the retrieved context does not contain sufficient information to generate an accurate and informative answer, return <NOANS>\n\n==Retrieved context start==\n{context}\n==Retrieved context end==",
|
|
88
|
+
"gpt-4o-mini": process.env.GPT_4O_MINI_CONTEXT || "You are an helpful assistant for question-answering tasks. Follow these steps carefully:\n1. Answer in the same language of the user question, regardless of the retrieved context language\n2. Use ONLY the pieces of the retrieved context and the chat history to answer the question.\n3. If the retrieved context does not contain sufficient information to generate an accurate and informative answer, return <NOANS>\n\n==Retrieved context start==\n{context}\n==Retrieved context end==",
|
|
89
|
+
"gpt-4.1": process.env.GPT_4_1_CONTEXT || "You are an helpful assistant for question-answering tasks. Follow these steps carefully:\n1. Answer in the same language of the user question, regardless of the retrieved context language\n2. Use ONLY the pieces of the retrieved context and the chat history to answer the question.\n3. If the retrieved context does not contain sufficient information to generate an accurate and informative answer, append <NOANS> at the end of the answer\n\n==Retrieved context start==\n{context}\n==Retrieved context end==",
|
|
90
|
+
"gpt-4.1-mini": process.env.GPT_4_1_MINI_CONTEXT || "You are an helpful assistant for question-answering tasks. Follow these steps carefully:\n1. Answer in the same language of the user question, regardless of the retrieved context language\n2. Use ONLY the pieces of the retrieved context and the chat history to answer the question.\n3. If the retrieved context does not contain sufficient information to generate an accurate and informative answer, append <NOANS> at the end of the answer\n\n==Retrieved context start==\n{context}\n==Retrieved context end==",
|
|
91
|
+
"gpt-4.1-nano": process.env.GPT_4_1_NANO_CONTEXT || "You are an helpful assistant for question-answering tasks. Follow these steps carefully:\n1. Answer in the same language of the user question, regardless of the retrieved context language\n2. Use ONLY the pieces of the retrieved context and the chat history to answer the question.\n3. If the retrieved context does not contain sufficient information to generate an accurate and informative answer, append <NOANS> at the end of the answer\n\n==Retrieved context start==\n{context}\n==Retrieved context end==",
|
|
92
|
+
"gpt-5": process.env.GPT_5_CONTEXT || "You are an helpful assistant for question-answering tasks. Follow these steps carefully:\n1. Answer in the same language of the user question, regardless of the retrieved context language\n2. Use ONLY the pieces of the retrieved context and the chat history to answer the question.\n3. If the retrieved context does not contain sufficient information to generate an accurate and informative answer, append <NOANS> at the end of the answer\n\n==Retrieved context start==\n{context}\n==Retrieved context end==",
|
|
93
|
+
"gpt-5-mini": process.env.GPT_5_MINI_CONTEXT || "You are an helpful assistant for question-answering tasks. Follow these steps carefully:\n1. Answer in the same language of the user question, regardless of the retrieved context language\n2. Use ONLY the pieces of the retrieved context and the chat history to answer the question.\n3. If the retrieved context does not contain sufficient information to generate an accurate and informative answer, append <NOANS> at the end of the answer\n\n==Retrieved context start==\n{context}\n==Retrieved context end==",
|
|
94
|
+
"gpt-5-nano": process.env.GPT_5_NANO_CONTEXT || "You are an helpful assistant for question-answering tasks. Follow these steps carefully:\n1. Answer in the same language of the user question, regardless of the retrieved context language\n2. Use ONLY the pieces of the retrieved context and the chat history to answer the question.\n3. If the retrieved context does not contain sufficient information to generate an accurate and informative answer, append <NOANS> at the end of the answer\n\n==Retrieved context start==\n{context}\n==Retrieved context end==",
|
|
95
|
+
"general": process.env.GENERAL_CONTEXT || "You are an helpful assistant for question-answering tasks. Follow these steps carefully:\n1. Answer in the same language of the user question, regardless of the retrieved context language\n2. Use ONLY the pieces of the retrieved context and the chat history to answer the question.\n3. If the retrieved context does not contain sufficient information to generate an accurate and informative answer, append <NOANS> at the end of the answer\n\n==Retrieved context start==\n{context}\n==Retrieved context end=="
|
|
111
96
|
}
|
|
112
97
|
|
|
113
98
|
/**
|
|
@@ -120,20 +105,80 @@ router.post('/scrape/single', async (req, res) => {
|
|
|
120
105
|
let project_id = req.projectid;
|
|
121
106
|
|
|
122
107
|
let data = req.body;
|
|
123
|
-
winston.debug("/scrape/single data: ", data)
|
|
108
|
+
winston.debug("/scrape/single data: ", data)
|
|
109
|
+
|
|
110
|
+
let namespace;
|
|
111
|
+
try {
|
|
112
|
+
namespace = await aiManager.checkNamespace(project_id, data.namespace);
|
|
113
|
+
} catch (err) {
|
|
114
|
+
let errorCode = err?.errorCode ?? 500;
|
|
115
|
+
return res.status(errorCode).send({ success: false, error: err.error });
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
if (data.type === "sitemap") {
|
|
124
119
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
120
|
+
const urls = await aiManager.fetchSitemap(data.source).catch((err) => {
|
|
121
|
+
winston.error("Error fetching sitemap: ", err);
|
|
122
|
+
return res.status(500).send({ success: false, error: err });
|
|
123
|
+
})
|
|
129
124
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
res.status(403).send(alert);
|
|
134
|
-
}
|
|
125
|
+
if (urls.length === 0) {
|
|
126
|
+
return res.status(400).send({ success: false, error: "No url found on sitemap" });
|
|
127
|
+
}
|
|
135
128
|
|
|
136
|
-
|
|
129
|
+
let sitemapKb;
|
|
130
|
+
try {
|
|
131
|
+
sitemapKb = await KB.findById(data.id);
|
|
132
|
+
} catch (err) {
|
|
133
|
+
winston.error("Error finding sitemap content with id " + data.id);
|
|
134
|
+
return res.status(500).send({ success: false, error: "Error finding sitemap content with id " + data.id });
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
if (!sitemapKb) {
|
|
138
|
+
return res.status(404).send({ success: false, error: "Content not found with id " + data.id });
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
let existingKbs;
|
|
142
|
+
try {
|
|
143
|
+
existingKbs = await KB.find({ id_project: project_id, namespace: data.namespace, sitemap_origin_id: data.id }).lean().exec();
|
|
144
|
+
} catch(err) {
|
|
145
|
+
winston.error("Error finding existing contents: ", err);
|
|
146
|
+
return res.status(500).send({ success: false, error: "Error finding existing sitemap contents" });
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
const result = await aiManager.foundSitemapChanges(existingKbs, urls).catch((err) => {
|
|
150
|
+
winston.error("Error finding sitemap differecens ", err);
|
|
151
|
+
return res.status(400).send({ success: false, error: "Error finding sitemap differecens" });
|
|
152
|
+
})
|
|
153
|
+
|
|
154
|
+
if (!result) return; // esco qui
|
|
155
|
+
|
|
156
|
+
const { addedUrls, removedIds } = result;
|
|
157
|
+
|
|
158
|
+
if (removedIds.length > 0) {
|
|
159
|
+
const idsSet = new Set(removedIds);
|
|
160
|
+
const kbsToDelete = existingKbs.filter(obj => idsSet.has(obj._id));
|
|
161
|
+
|
|
162
|
+
aiManager.removeMultipleContents(namespace, kbsToDelete).catch((err) => {
|
|
163
|
+
winston.error("Error deleting multiple contents: ", err);
|
|
164
|
+
})
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
if (addedUrls.length > 0) {
|
|
168
|
+
const options = {
|
|
169
|
+
sitemap_origin_id: sitemapKb._id,
|
|
170
|
+
sitemap_origin: sitemapKb.source,
|
|
171
|
+
scrape_type: sitemapKb.scrape_type,
|
|
172
|
+
scrape_options: sitemapKb.scrape_options,
|
|
173
|
+
refresh_rate: sitemapKb.refresh_rate
|
|
174
|
+
}
|
|
175
|
+
aiManager.addMultipleUrls(namespace, addedUrls, options).catch((err) => {
|
|
176
|
+
winston.error("(webhook) error adding multiple urls contents: ", err);
|
|
177
|
+
})
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
return res.status(200).send({ success: true, message: "Content queued for reindexing", added_urls: addedUrls.length, removed_url: removedIds.length });
|
|
181
|
+
}
|
|
137
182
|
|
|
138
183
|
KB.findById(data.id, (err, kb) => {
|
|
139
184
|
if (err) {
|
|
@@ -146,10 +191,6 @@ router.post('/scrape/single', async (req, res) => {
|
|
|
146
191
|
}
|
|
147
192
|
else {
|
|
148
193
|
|
|
149
|
-
if (!namespaceIds.includes(kb.namespace)) {
|
|
150
|
-
return res.status(403).send({ success: false, error: "Not allowed. The namespace does not belong to the current project." })
|
|
151
|
-
}
|
|
152
|
-
|
|
153
194
|
let json = {
|
|
154
195
|
id: kb._id,
|
|
155
196
|
type: kb.type,
|
|
@@ -174,16 +215,20 @@ router.post('/scrape/single', async (req, res) => {
|
|
|
174
215
|
}
|
|
175
216
|
}
|
|
176
217
|
|
|
177
|
-
|
|
178
|
-
json.
|
|
218
|
+
json.engine = namespace.engine || default_engine;
|
|
219
|
+
json.embedding = namespace.embedding || default_embedding;
|
|
179
220
|
|
|
180
|
-
if (
|
|
221
|
+
if (namespace.hybrid === true) {
|
|
181
222
|
json.hybrid = true;
|
|
182
223
|
}
|
|
183
224
|
|
|
184
225
|
winston.verbose("/scrape/single json: ", json);
|
|
185
226
|
|
|
186
|
-
|
|
227
|
+
if (process.env.NODE_ENV === "test") {
|
|
228
|
+
res.status(200).send({ success: true, message: "Skip indexing in test environment", data: json })
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
aiManager.startScrape(json).then((response) => {
|
|
187
232
|
winston.verbose("startScrape response: ", response);
|
|
188
233
|
res.status(200).send(response);
|
|
189
234
|
}).catch((err) => {
|
|
@@ -211,24 +256,15 @@ router.post('/scrape/status', async (req, res) => {
|
|
|
211
256
|
returnObject = true;
|
|
212
257
|
}
|
|
213
258
|
|
|
214
|
-
let
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
})
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
let alert = "No namespace found for the selected project " + project_id + ". Cannot add content to a non-existent namespace."
|
|
221
|
-
winston.warn(alert);
|
|
222
|
-
res.status(403).send(alert);
|
|
223
|
-
}
|
|
224
|
-
|
|
225
|
-
let namespaceIds = namespaces.map(namespace => namespace.id);
|
|
226
|
-
if (!namespaceIds.includes(data.namespace)) {
|
|
227
|
-
return res.status(403).send({ success: false, error: "Not allowed. The namespace does not belong to the current project." })
|
|
259
|
+
let namespace;
|
|
260
|
+
try {
|
|
261
|
+
namespace = await aiManager.checkNamespace(project_id, namespace_id);
|
|
262
|
+
} catch (err) {
|
|
263
|
+
let errorCode = err?.errorCode ?? 500;
|
|
264
|
+
return res.status(errorCode).send({ success: false, error: err.error });
|
|
228
265
|
}
|
|
229
266
|
|
|
230
|
-
|
|
231
|
-
data.engine = ns.engine || default_engine;
|
|
267
|
+
data.engine = namespace.engine || default_engine;
|
|
232
268
|
|
|
233
269
|
aiService.scrapeStatus(data).then(async (response) => {
|
|
234
270
|
|
|
@@ -238,7 +274,7 @@ router.post('/scrape/status', async (req, res) => {
|
|
|
238
274
|
|
|
239
275
|
if (response.data.status_code) {
|
|
240
276
|
// update.status = response.data.status_code;
|
|
241
|
-
update.status = await statusConverter(response.data.status_code)
|
|
277
|
+
update.status = await aiManager.statusConverter(response.data.status_code)
|
|
242
278
|
|
|
243
279
|
}
|
|
244
280
|
|
|
@@ -270,74 +306,33 @@ router.post('/scrape/status', async (req, res) => {
|
|
|
270
306
|
})
|
|
271
307
|
|
|
272
308
|
router.post('/qa', async (req, res) => {
|
|
273
|
-
|
|
274
|
-
let project_id = req.projectid;
|
|
309
|
+
let id_project = req.projectid;
|
|
275
310
|
let publicKey = false;
|
|
276
311
|
let data = req.body;
|
|
277
|
-
let ollama_integration;
|
|
278
|
-
let vllm_integration;
|
|
279
312
|
|
|
280
|
-
let
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
})
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
let alert = "No namespace found for the selected project " + project_id + ". Cannot add content to a non-existent namespace."
|
|
287
|
-
winston.warn(alert);
|
|
288
|
-
res.status(403).send(alert);
|
|
289
|
-
}
|
|
290
|
-
|
|
291
|
-
let namespaceIds = namespaces.map(namespace => namespace.id);
|
|
292
|
-
if (!namespaceIds.includes(data.namespace)) {
|
|
293
|
-
return res.status(403).send({ success: false, error: "Not allowed. The namespace does not belong to the current project." })
|
|
313
|
+
let namespace;
|
|
314
|
+
try {
|
|
315
|
+
namespace = await aiManager.checkNamespace(id_project, data.namespace);
|
|
316
|
+
} catch (err) {
|
|
317
|
+
let errorCode = err?.errorCode ?? 500;
|
|
318
|
+
return res.status(errorCode).send({ success: false, error: err.error });
|
|
294
319
|
}
|
|
295
|
-
|
|
296
320
|
winston.debug("/qa data: ", data);
|
|
297
321
|
|
|
298
|
-
|
|
299
|
-
|
|
322
|
+
let model;
|
|
323
|
+
try {
|
|
324
|
+
model = await aiManager.resolveLLMConfig(id_project, data.llm, data.model);
|
|
325
|
+
} catch (err) {
|
|
326
|
+
let errorCode = err?.code ?? 500;
|
|
327
|
+
return res.status(errorCode).send({ success: false, error: err.error });
|
|
300
328
|
}
|
|
301
329
|
|
|
302
|
-
if (
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
ollama_integration = await integrationService.getIntegration(project_id, 'ollama');
|
|
306
|
-
} catch (err) {
|
|
307
|
-
let error_code = err.code || 500;
|
|
308
|
-
let error_message = err.error || `Unable to get integration for ${data.llm}`;
|
|
309
|
-
return res.status(error_code).send({ success: false, error: error_message });
|
|
310
|
-
}
|
|
330
|
+
if (!model.api_key && model.provider === 'openai') {
|
|
331
|
+
model.api_key = process.env.GPTKEY;
|
|
332
|
+
publicKey = true;
|
|
311
333
|
}
|
|
312
|
-
else if (data.llm === 'vllm') {
|
|
313
|
-
data.gptkey = process.env.GPTKEY;
|
|
314
|
-
try {
|
|
315
|
-
vllm_integration = await integrationService.getIntegration(project_id, 'vllm')
|
|
316
|
-
} catch (err) {
|
|
317
|
-
let error_code = err.code || 500;
|
|
318
|
-
let error_message = err.error || `Unable to get integration for ${data.llm}`;
|
|
319
|
-
return res.status(error_code).send({ success: false, error: error_message });
|
|
320
|
-
}
|
|
321
|
-
} else {
|
|
322
|
-
try {
|
|
323
|
-
let key = await integrationService.getKeyFromIntegration(project_id, data.llm);
|
|
324
|
-
if (!key) {
|
|
325
|
-
if (data.llm === 'openai') {
|
|
326
|
-
data.gptkey = process.env.GPTKEY;
|
|
327
|
-
publicKey = true;
|
|
328
|
-
} else {
|
|
329
|
-
return res.status(404).send({ success: false, error: `Invalid or empty key provided for ${data.llm}` });
|
|
330
|
-
}
|
|
331
|
-
} else {
|
|
332
|
-
data.gptkey = key;
|
|
333
|
-
}
|
|
334
334
|
|
|
335
|
-
|
|
336
|
-
let error_code = err.code || 500;
|
|
337
|
-
let error_message = err.error || `Unable to get integration for ${data.llm}`;
|
|
338
|
-
return res.status(error_code).send({ success: false, error: error_message });
|
|
339
|
-
}
|
|
340
|
-
}
|
|
335
|
+
data.model = model;
|
|
341
336
|
|
|
342
337
|
let obj = { createdAt: new Date() };
|
|
343
338
|
|
|
@@ -351,7 +346,7 @@ router.post('/qa', async (req, res) => {
|
|
|
351
346
|
|
|
352
347
|
// Check if "Advanced Mode" is active. In such case the default_context must be not appended
|
|
353
348
|
if (!data.advancedPrompt) {
|
|
354
|
-
const contextTemplate = contexts[data.model] || contexts["general"];
|
|
349
|
+
const contextTemplate = contexts[data.model.name] || contexts["general"];
|
|
355
350
|
if (data.system_context) {
|
|
356
351
|
data.system_context = data.system_context + " \n" + contextTemplate;
|
|
357
352
|
} else {
|
|
@@ -359,10 +354,11 @@ router.post('/qa', async (req, res) => {
|
|
|
359
354
|
}
|
|
360
355
|
}
|
|
361
356
|
|
|
362
|
-
|
|
363
|
-
data.
|
|
357
|
+
data.engine = namespace.engine || default_engine;
|
|
358
|
+
data.embedding = namespace.embedding || default_embedding;
|
|
359
|
+
data.embedding.api_key = process.env.EMBEDDING_API_KEY || process.env.GPTKEY;
|
|
364
360
|
|
|
365
|
-
if (
|
|
361
|
+
if (namespace.hybrid === true) {
|
|
366
362
|
data.search_type = 'hybrid';
|
|
367
363
|
|
|
368
364
|
if (data.reranking === true) {
|
|
@@ -371,39 +367,15 @@ router.post('/qa', async (req, res) => {
|
|
|
371
367
|
}
|
|
372
368
|
}
|
|
373
369
|
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
return res.status(422).send({ success: false, error: "Server url for ollama is empty or invalid"})
|
|
377
|
-
}
|
|
378
|
-
data.model = {
|
|
379
|
-
name: data.model,
|
|
380
|
-
url: ollama_integration.value.url,
|
|
381
|
-
provider: 'ollama'
|
|
382
|
-
}
|
|
383
|
-
data.stream = false;
|
|
384
|
-
}
|
|
385
|
-
|
|
386
|
-
if (data.llm === 'vllm') {
|
|
387
|
-
if (!vllm_integration.value.url) {
|
|
388
|
-
return res.status(422).send({ success: false, error: "Server url for vllm is empty or invalid"})
|
|
389
|
-
}
|
|
390
|
-
data.model = {
|
|
391
|
-
name: data.model,
|
|
392
|
-
url: vllm_integration.value.url,
|
|
393
|
-
provider: 'vllm'
|
|
394
|
-
}
|
|
395
|
-
data.stream = false;
|
|
396
|
-
}
|
|
397
|
-
|
|
370
|
+
data.stream = false;
|
|
371
|
+
data.debug = true;
|
|
398
372
|
delete data.advancedPrompt;
|
|
399
373
|
winston.verbose("ask data: ", data);
|
|
400
|
-
|
|
374
|
+
|
|
401
375
|
if (process.env.NODE_ENV === 'test') {
|
|
402
376
|
return res.status(200).send({ success: true, message: "Question skipped in test environment", data: data });
|
|
403
377
|
}
|
|
404
378
|
|
|
405
|
-
data.debug = true;
|
|
406
|
-
|
|
407
379
|
aiService.askNamespace(data).then((resp) => {
|
|
408
380
|
winston.debug("qa resp: ", resp.data);
|
|
409
381
|
let answer = resp.data;
|
|
@@ -435,33 +407,185 @@ router.post('/qa', async (req, res) => {
|
|
|
435
407
|
}
|
|
436
408
|
|
|
437
409
|
})
|
|
410
|
+
|
|
438
411
|
})
|
|
439
412
|
|
|
413
|
+
// router.post('/qa', async (req, res) => {
|
|
414
|
+
|
|
415
|
+
// let project_id = req.projectid;
|
|
416
|
+
// let publicKey = false;
|
|
417
|
+
// let data = req.body;
|
|
418
|
+
// let ollama_integration;
|
|
419
|
+
// let vllm_integration;
|
|
420
|
+
|
|
421
|
+
// let namespace;
|
|
422
|
+
// try {
|
|
423
|
+
// namespace = await aiManager.checkNamespace(project_id, data.namespace);
|
|
424
|
+
// } catch (err) {
|
|
425
|
+
// let errorCode = err?.errorCode ?? 500;
|
|
426
|
+
// return res.status(errorCode).send({ success: false, error: err.error });
|
|
427
|
+
// }
|
|
428
|
+
// winston.debug("/qa data: ", data);
|
|
429
|
+
|
|
430
|
+
// if (!data.llm) {
|
|
431
|
+
// data.llm = "openai";
|
|
432
|
+
// }
|
|
433
|
+
|
|
434
|
+
// if (data.llm === 'ollama') {
|
|
435
|
+
// data.gptkey = process.env.GPTKEY;
|
|
436
|
+
// try {
|
|
437
|
+
// ollama_integration = await integrationService.getIntegration(project_id, 'ollama');
|
|
438
|
+
// } catch (err) {
|
|
439
|
+
// let error_code = err.code || 500;
|
|
440
|
+
// let error_message = err.error || `Unable to get integration for ${data.llm}`;
|
|
441
|
+
// return res.status(error_code).send({ success: false, error: error_message });
|
|
442
|
+
// }
|
|
443
|
+
// }
|
|
444
|
+
// else if (data.llm === 'vllm') {
|
|
445
|
+
// data.gptkey = process.env.GPTKEY;
|
|
446
|
+
// try {
|
|
447
|
+
// vllm_integration = await integrationService.getIntegration(project_id, 'vllm')
|
|
448
|
+
// } catch (err) {
|
|
449
|
+
// let error_code = err.code || 500;
|
|
450
|
+
// let error_message = err.error || `Unable to get integration for ${data.llm}`;
|
|
451
|
+
// return res.status(error_code).send({ success: false, error: error_message });
|
|
452
|
+
// }
|
|
453
|
+
// } else {
|
|
454
|
+
// try {
|
|
455
|
+
// let key = await integrationService.getKeyFromIntegration(project_id, data.llm);
|
|
456
|
+
|
|
457
|
+
// if (!key) {
|
|
458
|
+
// if (data.llm === 'openai') {
|
|
459
|
+
// data.gptkey = process.env.GPTKEY;
|
|
460
|
+
// publicKey = true;
|
|
461
|
+
// } else {
|
|
462
|
+
// return res.status(404).send({ success: false, error: `Invalid or empty key provided for ${data.llm}` });
|
|
463
|
+
// }
|
|
464
|
+
// } else {
|
|
465
|
+
// data.gptkey = key;
|
|
466
|
+
// }
|
|
467
|
+
|
|
468
|
+
// } catch (err) {
|
|
469
|
+
// let error_code = err.code || 500;
|
|
470
|
+
// let error_message = err.error || `Unable to get integration for ${data.llm}`;
|
|
471
|
+
// return res.status(error_code).send({ success: false, error: error_message });
|
|
472
|
+
// }
|
|
473
|
+
// }
|
|
474
|
+
|
|
475
|
+
// let obj = { createdAt: new Date() };
|
|
476
|
+
|
|
477
|
+
// let quoteManager = req.app.get('quote_manager');
|
|
478
|
+
// if (publicKey === true) {
|
|
479
|
+
// let isAvailable = await quoteManager.checkQuote(req.project, obj, 'tokens');
|
|
480
|
+
// if (isAvailable === false) {
|
|
481
|
+
// return res.status(403).send({ success: false, message: "Tokens quota exceeded", error_code: 13001})
|
|
482
|
+
// }
|
|
483
|
+
// }
|
|
484
|
+
|
|
485
|
+
// // Check if "Advanced Mode" is active. In such case the default_context must be not appended
|
|
486
|
+
// if (!data.advancedPrompt) {
|
|
487
|
+
// const contextTemplate = contexts[data.model] || contexts["general"];
|
|
488
|
+
// if (data.system_context) {
|
|
489
|
+
// data.system_context = data.system_context + " \n" + contextTemplate;
|
|
490
|
+
// } else {
|
|
491
|
+
// data.system_context = contextTemplate;
|
|
492
|
+
// }
|
|
493
|
+
// }
|
|
494
|
+
|
|
495
|
+
// data.engine = namespace.engine || default_engine;
|
|
496
|
+
// data.embedding = namespace.embedding || default_embedding;
|
|
497
|
+
// data.embedding.api_key = process.env.EMBEDDING_API_KEY || process.env.GPTKEY;
|
|
498
|
+
|
|
499
|
+
// if (namespace.hybrid === true) {
|
|
500
|
+
// data.search_type = 'hybrid';
|
|
501
|
+
|
|
502
|
+
// if (data.reranking === true) {
|
|
503
|
+
// data.reranking_multiplier = 3;
|
|
504
|
+
// data.reranker_model = "cross-encoder/ms-marco-MiniLM-L-6-v2";
|
|
505
|
+
// }
|
|
506
|
+
// }
|
|
507
|
+
|
|
508
|
+
// if (data.llm === 'ollama') {
|
|
509
|
+
// if (!ollama_integration.value.url) {
|
|
510
|
+
// return res.status(422).send({ success: false, error: "Server url for ollama is empty or invalid"})
|
|
511
|
+
// }
|
|
512
|
+
// data.model = {
|
|
513
|
+
// name: data.model,
|
|
514
|
+
// url: ollama_integration.value.url,
|
|
515
|
+
// provider: 'ollama'
|
|
516
|
+
// }
|
|
517
|
+
// data.stream = false;
|
|
518
|
+
// }
|
|
519
|
+
|
|
520
|
+
// if (data.llm === 'vllm') {
|
|
521
|
+
// if (!vllm_integration.value.url) {
|
|
522
|
+
// return res.status(422).send({ success: false, error: "Server url for vllm is empty or invalid"})
|
|
523
|
+
// }
|
|
524
|
+
// data.model = {
|
|
525
|
+
// name: data.model,
|
|
526
|
+
// url: vllm_integration.value.url,
|
|
527
|
+
// provider: 'vllm'
|
|
528
|
+
// }
|
|
529
|
+
// data.stream = false;
|
|
530
|
+
// }
|
|
531
|
+
|
|
532
|
+
// delete data.advancedPrompt;
|
|
533
|
+
// winston.verbose("ask data: ", data);
|
|
534
|
+
|
|
535
|
+
// if (process.env.NODE_ENV === 'test') {
|
|
536
|
+
// return res.status(200).send({ success: true, message: "Question skipped in test environment", data: data });
|
|
537
|
+
// }
|
|
538
|
+
|
|
539
|
+
// data.debug = true;
|
|
540
|
+
|
|
541
|
+
// aiService.askNamespace(data).then((resp) => {
|
|
542
|
+
// winston.debug("qa resp: ", resp.data);
|
|
543
|
+
// let answer = resp.data;
|
|
544
|
+
|
|
545
|
+
// if (publicKey === true) {
|
|
546
|
+
// let multiplier = MODELS_MULTIPLIER[data.model];
|
|
547
|
+
// if (!multiplier) {
|
|
548
|
+
// multiplier = 1;
|
|
549
|
+
// winston.info("No multiplier found for AI model")
|
|
550
|
+
// }
|
|
551
|
+
// obj.multiplier = multiplier;
|
|
552
|
+
// obj.tokens = answer.prompt_token_size;
|
|
553
|
+
|
|
554
|
+
// let incremented_key = quoteManager.incrementTokenCount(req.project, obj);
|
|
555
|
+
// winston.verbose("incremented_key: ", incremented_key);
|
|
556
|
+
// }
|
|
557
|
+
|
|
558
|
+
// return res.status(200).send(answer);
|
|
559
|
+
|
|
560
|
+
// }).catch((err) => {
|
|
561
|
+
// winston.error("qa err: ", err);
|
|
562
|
+
// winston.error("qa err.response: ", err.response);
|
|
563
|
+
// if (err.response && err.response.status) {
|
|
564
|
+
// let status = err.response.status;
|
|
565
|
+
// res.status(status).send({ success: false, statusText: err.response.statusText, error: err.response.data.detail });
|
|
566
|
+
// }
|
|
567
|
+
// else {
|
|
568
|
+
// res.status(500).send({ success: false, error: err });
|
|
569
|
+
// }
|
|
570
|
+
|
|
571
|
+
// })
|
|
572
|
+
// })
|
|
573
|
+
|
|
440
574
|
router.delete('/delete', async (req, res) => {
|
|
441
575
|
|
|
442
576
|
let project_id = req.projectid;
|
|
443
577
|
let data = req.body;
|
|
444
578
|
winston.debug("/delete data: ", data);
|
|
445
579
|
|
|
446
|
-
let
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
})
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
let alert = "No namespace found for the selected project " + project_id + ". Cannot add content to a non-existent namespace."
|
|
453
|
-
winston.warn(alert);
|
|
454
|
-
res.status(403).send(alert);
|
|
580
|
+
let namespace;
|
|
581
|
+
try {
|
|
582
|
+
namespace = await aiManager.checkNamespace(project_id, namespace_id);
|
|
583
|
+
} catch (err) {
|
|
584
|
+
let errorCode = err?.errorCode ?? 500;
|
|
585
|
+
return res.status(errorCode).send({ success: false, error: err.error });
|
|
455
586
|
}
|
|
456
587
|
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
if (!namespaceIds.includes(data.namespace)) {
|
|
460
|
-
return res.status(403).send({ success: false, error: "Not allowed. The namespace does not belong to the current project." })
|
|
461
|
-
}
|
|
462
|
-
|
|
463
|
-
let ns = namespaces.find(n => n.id === data.namespace);
|
|
464
|
-
data.engine = ns.engine || default_engine;
|
|
588
|
+
data.engine = namespace.engine || default_engine;
|
|
465
589
|
|
|
466
590
|
aiService.deleteIndex(data).then((resp) => {
|
|
467
591
|
winston.debug("delete resp: ", resp.data);
|
|
@@ -480,25 +604,15 @@ router.delete('/deleteall', async (req, res) => {
|
|
|
480
604
|
let data = req.body;
|
|
481
605
|
winston.debug('/delete all data: ', data);
|
|
482
606
|
|
|
483
|
-
let
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
})
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
let alert = "No namespace found for the selected project " + project_id + ". Cannot add content to a non-existent namespace."
|
|
490
|
-
winston.warn(alert);
|
|
491
|
-
res.status(403).send(alert);
|
|
492
|
-
}
|
|
493
|
-
|
|
494
|
-
let namespaceIds = namespaces.map(namespace => namespace.id);
|
|
495
|
-
|
|
496
|
-
if (!namespaceIds.includes(data.namespace)) {
|
|
497
|
-
return res.status(403).send({ success: false, error: "Not allowed. The namespace does not belong to the current project." })
|
|
607
|
+
let namespace;
|
|
608
|
+
try {
|
|
609
|
+
namespace = await aiManager.checkNamespace(project_id, namespace_id);
|
|
610
|
+
} catch (err) {
|
|
611
|
+
let errorCode = err?.errorCode ?? 500;
|
|
612
|
+
return res.status(errorCode).send({ success: false, error: err.error });
|
|
498
613
|
}
|
|
499
614
|
|
|
500
|
-
|
|
501
|
-
data.engine = ns.engine || default_engine;
|
|
615
|
+
data.engine = namespace.engine || default_engine;
|
|
502
616
|
|
|
503
617
|
winston.verbose("/deleteall data: ", data);
|
|
504
618
|
|
|
@@ -545,7 +659,8 @@ router.get('/namespace/all', async (req, res) => {
|
|
|
545
659
|
name: "Default",
|
|
546
660
|
preview_settings: default_preview_settings,
|
|
547
661
|
default: true,
|
|
548
|
-
engine: default_engine
|
|
662
|
+
engine: default_engine,
|
|
663
|
+
embedding: default_embedding
|
|
549
664
|
})
|
|
550
665
|
|
|
551
666
|
new_namespace.save((err, savedNamespace) => {
|
|
@@ -591,14 +706,12 @@ router.get('/namespace/:id/chunks/:content_id', async (req, res) => {
|
|
|
591
706
|
let namespace_id = req.params.id;
|
|
592
707
|
let content_id = req.params.content_id;
|
|
593
708
|
|
|
594
|
-
let
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
})
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
if (!namespaceIds.includes(namespace_id)) {
|
|
601
|
-
return res.status(403).send({ success: false, error: "Not allowed. The namespace does not belong to the current project." })
|
|
709
|
+
let namespace;
|
|
710
|
+
try {
|
|
711
|
+
namespace = await aiManager.checkNamespace(project_id, namespace_id);
|
|
712
|
+
} catch (err) {
|
|
713
|
+
let errorCode = err?.errorCode ?? 500;
|
|
714
|
+
return res.status(errorCode).send({ success: false, error: err.error });
|
|
602
715
|
}
|
|
603
716
|
|
|
604
717
|
let content = await KB.find({ id_project: project_id, namespace: namespace_id, _id: content_id }).catch((err) => {
|
|
@@ -610,9 +723,7 @@ router.get('/namespace/:id/chunks/:content_id', async (req, res) => {
|
|
|
610
723
|
return res.status(403).send({ success: false, error: "Not allowed. The content does not belong to the current namespace." })
|
|
611
724
|
}
|
|
612
725
|
|
|
613
|
-
let
|
|
614
|
-
let engine = ns.engine || default_engine;
|
|
615
|
-
delete engine._id;
|
|
726
|
+
let engine = namespace.engine || default_engine;
|
|
616
727
|
|
|
617
728
|
if (process.env.NODE_ENV === 'test') {
|
|
618
729
|
return res.status(200).send({ success: true, message: "Get chunks skipped in test environment"});
|
|
@@ -625,8 +736,8 @@ router.get('/namespace/:id/chunks/:content_id', async (req, res) => {
|
|
|
625
736
|
return res.status(200).send(chunks);
|
|
626
737
|
|
|
627
738
|
}).catch((err) => {
|
|
628
|
-
|
|
629
|
-
|
|
739
|
+
winston.error("error getting content chunks err.response: ", err.response)
|
|
740
|
+
winston.error("error getting content chunks err.data: ", err.data)
|
|
630
741
|
return res.status(500).send({ success: false, error: err });
|
|
631
742
|
})
|
|
632
743
|
|
|
@@ -639,14 +750,12 @@ router.get('/namespace/:id/chatbots', async (req, res) => {
|
|
|
639
750
|
|
|
640
751
|
let chatbotsArray = [];
|
|
641
752
|
|
|
642
|
-
let
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
})
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
if (!namespaceIds.includes(namespace_id)) {
|
|
649
|
-
return res.status(403).send({ success: false, error: "Not allowed. The namespace does not belong to the current project." })
|
|
753
|
+
let namespace;
|
|
754
|
+
try {
|
|
755
|
+
namespace = await aiManager.checkNamespace(project_id, namespace_id);
|
|
756
|
+
} catch (err) {
|
|
757
|
+
let errorCode = err?.errorCode ?? 500;
|
|
758
|
+
return res.status(errorCode).send({ success: false, error: err.error });
|
|
650
759
|
}
|
|
651
760
|
|
|
652
761
|
let intents = await faq.find({ id_project: project_id, 'actions.namespace': namespace_id }).catch((err) => {
|
|
@@ -699,14 +808,12 @@ router.get('/namespace/export/:id', async (req, res) => {
|
|
|
699
808
|
|
|
700
809
|
query.type = { $in: [ kbTypes.URL, kbTypes.TEXT, kbTypes.FAQ ] };
|
|
701
810
|
|
|
702
|
-
let namespace
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
})
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
winston.warn("No namespace found with id ", namespace_id);
|
|
709
|
-
return res.status(404).send({ success: false, error: "No namespace found with id " + namespace_id })
|
|
811
|
+
let namespace;
|
|
812
|
+
try {
|
|
813
|
+
namespace = await aiManager.checkNamespace(project_id, namespace_id);
|
|
814
|
+
} catch (err) {
|
|
815
|
+
let errorCode = err?.errorCode ?? 500;
|
|
816
|
+
return res.status(errorCode).send({ success: false, error: err.error });
|
|
710
817
|
}
|
|
711
818
|
|
|
712
819
|
let name = namespace.name;
|
|
@@ -718,7 +825,7 @@ router.get('/namespace/export/:id', async (req, res) => {
|
|
|
718
825
|
})
|
|
719
826
|
|
|
720
827
|
try {
|
|
721
|
-
let filename = await generateFilename(name);
|
|
828
|
+
let filename = await aiManager.generateFilename(name);
|
|
722
829
|
let json = {
|
|
723
830
|
name: name,
|
|
724
831
|
preview_settings: preview_settings,
|
|
@@ -765,7 +872,8 @@ router.post('/namespace', async (req, res) => {
|
|
|
765
872
|
name: body.name,
|
|
766
873
|
hybrid: hybrid,
|
|
767
874
|
preview_settings: default_preview_settings,
|
|
768
|
-
engine: engine
|
|
875
|
+
engine: engine,
|
|
876
|
+
embedding: default_embedding
|
|
769
877
|
})
|
|
770
878
|
|
|
771
879
|
let namespaces = await Namespace.find({ id_project: project_id }).catch((err) => {
|
|
@@ -779,7 +887,6 @@ router.post('/namespace', async (req, res) => {
|
|
|
779
887
|
let quoteManager = req.app.get('quote_manager');
|
|
780
888
|
let limits = await quoteManager.getPlanLimits(req.project);
|
|
781
889
|
let ns_limit = limits.namespace;
|
|
782
|
-
//console.log("Limit of namespaces for current plan " + ns_limit);
|
|
783
890
|
|
|
784
891
|
if (namespaces.length >= ns_limit) {
|
|
785
892
|
return res.status(403).send({ success: false, error: "Maximum number of resources reached for the current plan", plan_limit: ns_limit });
|
|
@@ -940,7 +1047,7 @@ router.post('/namespace/import/:id', upload.single('uploadFile'), async (req, re
|
|
|
940
1047
|
winston.verbose("resources to be sent to worker: ", resources);
|
|
941
1048
|
|
|
942
1049
|
if (process.env.NODE_ENV !== "test") {
|
|
943
|
-
scheduleScrape(resources, hybrid);
|
|
1050
|
+
aiManager.scheduleScrape(resources, hybrid);
|
|
944
1051
|
}
|
|
945
1052
|
|
|
946
1053
|
res.status(200).send({ success: true, message: "Contents imported successfully" });
|
|
@@ -972,7 +1079,6 @@ router.post('/namespace/import/:id', upload.single('uploadFile'), async (req, re
|
|
|
972
1079
|
|
|
973
1080
|
})
|
|
974
1081
|
|
|
975
|
-
|
|
976
1082
|
router.put('/namespace/:id', async (req, res) => {
|
|
977
1083
|
|
|
978
1084
|
let namespace_id = req.params.id;
|
|
@@ -1228,6 +1334,10 @@ router.get('/:kb_id', async (req, res) => {
|
|
|
1228
1334
|
return res.status(500).send({ success: false, error: err });
|
|
1229
1335
|
}
|
|
1230
1336
|
|
|
1337
|
+
if (!kb) {
|
|
1338
|
+
return res.status(404).send({ success: false, error: "Content not found with id " + kb_id });
|
|
1339
|
+
}
|
|
1340
|
+
|
|
1231
1341
|
return res.status(200).send(kb);
|
|
1232
1342
|
})
|
|
1233
1343
|
})
|
|
@@ -1236,45 +1346,28 @@ router.post('/', async (req, res) => {
|
|
|
1236
1346
|
|
|
1237
1347
|
let project_id = req.projectid;
|
|
1238
1348
|
let body = req.body;
|
|
1349
|
+
let namespace_id = body.namespace;
|
|
1239
1350
|
|
|
1240
1351
|
if (!body.namespace) {
|
|
1241
1352
|
return res.status(400).send({ success: false, error: "parameter 'namespace' is not defined" });
|
|
1242
1353
|
}
|
|
1243
1354
|
|
|
1244
|
-
let
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
})
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
let alert = "No namespace found for the selected project " + project_id + ". Cannot add content to a non-existent namespace."
|
|
1251
|
-
winston.warn(alert);
|
|
1252
|
-
res.status(403).send(alert);
|
|
1253
|
-
}
|
|
1254
|
-
|
|
1255
|
-
let namespaceIds = namespaces.map(namespace => namespace.id);
|
|
1256
|
-
|
|
1257
|
-
if (!namespaceIds.includes(body.namespace)) {
|
|
1258
|
-
return res.status(403).send({ success: false, error: "Not allowed. The namespace does not belong to the current project." })
|
|
1355
|
+
let namespace;
|
|
1356
|
+
try {
|
|
1357
|
+
namespace = await aiManager.checkNamespace(project_id, namespace_id);
|
|
1358
|
+
} catch (err) {
|
|
1359
|
+
let errorCode = err?.errorCode ?? 500;
|
|
1360
|
+
return res.status(errorCode).send({ success: false, error: err.error });
|
|
1259
1361
|
}
|
|
1260
1362
|
|
|
1261
1363
|
let quoteManager = req.app.get('quote_manager');
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
winston.verbose("Kbs count: " + kbs_count);
|
|
1268
|
-
|
|
1269
|
-
if (kbs_count >= kbs_limit) {
|
|
1270
|
-
return res.status(403).send({ success: false, error: "Maximum number of resources reached for the current plan", plan_limit: kbs_limit })
|
|
1364
|
+
try {
|
|
1365
|
+
await aiManager.checkQuotaAvailability(quoteManager, req.project, 1)
|
|
1366
|
+
} catch(err) {
|
|
1367
|
+
let errorCode = err?.errorCode ?? 500;
|
|
1368
|
+
return res.status(errorCode).send({ success: false, error: err.error, plan_limit: err.plan_limit })
|
|
1271
1369
|
}
|
|
1272
1370
|
|
|
1273
|
-
let total_count = kbs_count + 1;
|
|
1274
|
-
if (total_count > kbs_limit) {
|
|
1275
|
-
return res.status(403).send({ success: false, error: "Cannot exceed the number of resources in the current plan", plan_limit: kbs_limit })
|
|
1276
|
-
}
|
|
1277
|
-
|
|
1278
1371
|
let new_kb = {
|
|
1279
1372
|
id_project: project_id,
|
|
1280
1373
|
name: body.name,
|
|
@@ -1294,7 +1387,7 @@ router.post('/', async (req, res) => {
|
|
|
1294
1387
|
new_kb.refresh_rate = body.refresh_rate;
|
|
1295
1388
|
if (!body.scrape_type || body.scrape_type === 2) {
|
|
1296
1389
|
new_kb.scrape_type = 2;
|
|
1297
|
-
new_kb.scrape_options = await setDefaultScrapeOptions();
|
|
1390
|
+
new_kb.scrape_options = await aiManager.setDefaultScrapeOptions();
|
|
1298
1391
|
} else {
|
|
1299
1392
|
new_kb.scrape_type = body.scrape_type;
|
|
1300
1393
|
new_kb.scrape_options = body.scrape_options;
|
|
@@ -1303,19 +1396,18 @@ router.post('/', async (req, res) => {
|
|
|
1303
1396
|
|
|
1304
1397
|
winston.debug("adding kb: ", new_kb);
|
|
1305
1398
|
|
|
1306
|
-
KB.findOneAndUpdate({ id_project: project_id, type: 'url', source: new_kb.source }, new_kb, { upsert: true, new: true, rawResult: true }, async (err,
|
|
1399
|
+
KB.findOneAndUpdate({ id_project: project_id, type: 'url', source: new_kb.source }, new_kb, { upsert: true, new: true, rawResult: true }, async (err, raw_content) => {
|
|
1307
1400
|
if (err) {
|
|
1308
1401
|
winston.error("findOneAndUpdate with upsert error: ", err);
|
|
1309
1402
|
res.status(500).send({ success: false, error: err });
|
|
1310
1403
|
}
|
|
1311
1404
|
else {
|
|
1312
1405
|
|
|
1313
|
-
delete
|
|
1314
|
-
delete
|
|
1315
|
-
delete
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
|
-
let saved_kb = raw.value;
|
|
1406
|
+
delete raw_content.ok;
|
|
1407
|
+
delete raw_content.$clusterTime;
|
|
1408
|
+
delete raw_content.operationTime;
|
|
1409
|
+
|
|
1410
|
+
let saved_kb = raw_content.value;
|
|
1319
1411
|
let webhook = apiUrl + '/webhook/kb/status?token=' + KB_WEBHOOK_TOKEN;
|
|
1320
1412
|
|
|
1321
1413
|
let json = {
|
|
@@ -1337,17 +1429,20 @@ router.post('/', async (req, res) => {
|
|
|
1337
1429
|
if (saved_kb.scrape_options) {
|
|
1338
1430
|
json.parameters_scrape_type_4 = saved_kb.scrape_options;
|
|
1339
1431
|
}
|
|
1340
|
-
|
|
1341
|
-
json.
|
|
1342
|
-
json.hybrid =
|
|
1343
|
-
|
|
1432
|
+
json.engine = namespace.engine || default_engine;
|
|
1433
|
+
json.embedding = namespace.embedding || default_embedding;
|
|
1434
|
+
json.hybrid = namespace.hybrid;
|
|
1435
|
+
|
|
1344
1436
|
let resources = [];
|
|
1345
1437
|
|
|
1346
1438
|
resources.push(json);
|
|
1347
1439
|
|
|
1348
|
-
if (process.env.NODE_ENV
|
|
1349
|
-
|
|
1440
|
+
if (process.env.NODE_ENV === 'test') {
|
|
1441
|
+
return res.status(200).send({ success: true, message: "Schedule scrape skipped in test environment", data: raw_content, schedule_json: json });
|
|
1350
1442
|
}
|
|
1443
|
+
|
|
1444
|
+
aiManager.scheduleScrape(resources, ns.hybrid);
|
|
1445
|
+
return res.status(200).send(raw_content);
|
|
1351
1446
|
|
|
1352
1447
|
}
|
|
1353
1448
|
})
|
|
@@ -1365,47 +1460,32 @@ router.post('/multi', upload.single('uploadFile'), async (req, res) => {
|
|
|
1365
1460
|
}
|
|
1366
1461
|
|
|
1367
1462
|
let project_id = req.projectid;
|
|
1368
|
-
let scrape_type = req.body.scrape_type;
|
|
1369
|
-
let scrape_options = req.body.scrape_options;
|
|
1370
1463
|
let refresh_rate = req.body.refresh_rate;
|
|
1464
|
+
let scrape_type = req.body.scrape_type ?? 2;
|
|
1465
|
+
let scrape_options = req.body.scrape_options;
|
|
1466
|
+
if (scrape_type === 2 && scrape_options == null) {
|
|
1467
|
+
scrape_options = aiManager.setDefaultScrapeOptions();
|
|
1468
|
+
}
|
|
1371
1469
|
|
|
1372
1470
|
let namespace_id = req.query.namespace;
|
|
1373
1471
|
if (!namespace_id) {
|
|
1374
1472
|
return res.status(400).send({ success: false, error: "queryParam 'namespace' is not defined" })
|
|
1375
1473
|
}
|
|
1376
1474
|
|
|
1377
|
-
let
|
|
1378
|
-
|
|
1379
|
-
|
|
1380
|
-
})
|
|
1381
|
-
|
|
1382
|
-
|
|
1383
|
-
let alert = "No namespace found for the selected project " + project_id + ". Cannot add content to a non-existent namespace."
|
|
1384
|
-
winston.warn(alert);
|
|
1385
|
-
res.status(403).send({ success: false, error: alert });
|
|
1386
|
-
}
|
|
1387
|
-
|
|
1388
|
-
let namespaceIds = namespaces.map(namespace => namespace.id);
|
|
1389
|
-
|
|
1390
|
-
if (!namespaceIds.includes(namespace_id)) {
|
|
1391
|
-
return res.status(403).send({ success: false, error: "Not allowed. The namespace does not belong to the current project." })
|
|
1475
|
+
let namespace;
|
|
1476
|
+
try {
|
|
1477
|
+
namespace = await aiManager.checkNamespace(project_id, namespace_id);
|
|
1478
|
+
} catch (err) {
|
|
1479
|
+
let errorCode = err?.errorCode ?? 500;
|
|
1480
|
+
return res.status(errorCode).send({ success: false, error: err.error });
|
|
1392
1481
|
}
|
|
1393
1482
|
|
|
1394
1483
|
let quoteManager = req.app.get('quote_manager');
|
|
1395
|
-
|
|
1396
|
-
|
|
1397
|
-
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
winston.verbose("Kbs count: " + kbs_count);
|
|
1401
|
-
|
|
1402
|
-
if (kbs_count >= kbs_limit) {
|
|
1403
|
-
return res.status(403).send({ success: false, error: "Maximum number of resources reached for the current plan", plan_limit: kbs_limit })
|
|
1404
|
-
}
|
|
1405
|
-
|
|
1406
|
-
let total_count = kbs_count + list.length;
|
|
1407
|
-
if (total_count > kbs_limit) {
|
|
1408
|
-
return res.status(403).send({ success: false, error: "Cannot exceed the number of resources in the current plan", plan_limit: kbs_limit })
|
|
1484
|
+
try {
|
|
1485
|
+
await aiManager.checkQuotaAvailability(quoteManager, req.project, list.length)
|
|
1486
|
+
} catch(err) {
|
|
1487
|
+
let errorCode = err?.errorCode ?? 500;
|
|
1488
|
+
return res.status(errorCode).send({ success: false, error: err.error, plan_limit: err.plan_limit })
|
|
1409
1489
|
}
|
|
1410
1490
|
|
|
1411
1491
|
if (list.length > 300) {
|
|
@@ -1413,81 +1493,27 @@ router.post('/multi', upload.single('uploadFile'), async (req, res) => {
|
|
|
1413
1493
|
return res.status(403).send({ success: false, error: "Too many urls. Can't index more than 300 urls at a time." })
|
|
1414
1494
|
}
|
|
1415
1495
|
|
|
1416
|
-
|
|
1417
|
-
|
|
1418
|
-
|
|
1419
|
-
|
|
1420
|
-
|
|
1421
|
-
id_project: project_id,
|
|
1422
|
-
name: url,
|
|
1423
|
-
source: url,
|
|
1424
|
-
type: 'url',
|
|
1425
|
-
content: "",
|
|
1426
|
-
namespace: namespace_id,
|
|
1427
|
-
status: -1,
|
|
1428
|
-
scrape_type: scrape_type,
|
|
1429
|
-
refresh_rate: refresh_rate
|
|
1430
|
-
}
|
|
1431
|
-
|
|
1432
|
-
if (!kb.scrape_type) {
|
|
1433
|
-
scrape_type = 2;
|
|
1434
|
-
}
|
|
1435
|
-
|
|
1436
|
-
if (scrape_type == 2) {
|
|
1437
|
-
kb.scrape_options = {
|
|
1438
|
-
tags_to_extract: ["body"],
|
|
1439
|
-
unwanted_tags: [],
|
|
1440
|
-
unwanted_classnames: []
|
|
1441
|
-
}
|
|
1442
|
-
} else {
|
|
1443
|
-
kb.scrape_options = scrape_options;
|
|
1444
|
-
}
|
|
1445
|
-
// if (scrape_type === 2) {
|
|
1446
|
-
// kb.scrape_options = await setDefaultScrapeOptions();
|
|
1447
|
-
// } else {
|
|
1448
|
-
// kb.scrape_options = await setCustomScrapeOptions(scrape_options);
|
|
1449
|
-
// }
|
|
1450
|
-
kbs.push(kb)
|
|
1451
|
-
})
|
|
1452
|
-
|
|
1453
|
-
let operations = kbs.map(doc => {
|
|
1454
|
-
return {
|
|
1455
|
-
updateOne: {
|
|
1456
|
-
filter: { id_project: doc.id_project, type: 'url', source: doc.source, namespace: namespace_id },
|
|
1457
|
-
update: doc,
|
|
1458
|
-
upsert: true,
|
|
1459
|
-
returnOriginal: false
|
|
1460
|
-
}
|
|
1461
|
-
}
|
|
1462
|
-
})
|
|
1463
|
-
|
|
1464
|
-
saveBulk(operations, kbs, project_id, namespace_id).then((result) => {
|
|
1465
|
-
|
|
1466
|
-
let ns = namespaces.find(n => n.id === namespace_id);
|
|
1467
|
-
let engine = ns.engine || default_engine;
|
|
1468
|
-
let hybrid = ns.hybrid;
|
|
1469
|
-
|
|
1470
|
-
let resources = result.map(({ name, status, __v, createdAt, updatedAt, id_project, ...keepAttrs }) => keepAttrs)
|
|
1471
|
-
resources = resources.map(({ _id, scrape_options, ...rest }) => {
|
|
1472
|
-
return { id: _id, webhook: webhook, parameters_scrape_type_4: scrape_options, engine: engine, hybrid: hybrid, ...rest}
|
|
1473
|
-
});
|
|
1474
|
-
winston.verbose("resources to be sent to worker: ", resources);
|
|
1475
|
-
|
|
1476
|
-
if (process.env.NODE_ENV !== 'test') {
|
|
1477
|
-
scheduleScrape(resources, hybrid);
|
|
1478
|
-
}
|
|
1479
|
-
res.status(200).send(result);
|
|
1496
|
+
const options = {
|
|
1497
|
+
scrape_type: scrape_type,
|
|
1498
|
+
scrape_options: scrape_options,
|
|
1499
|
+
refresh_rate: refresh_rate
|
|
1500
|
+
}
|
|
1480
1501
|
|
|
1481
|
-
|
|
1482
|
-
|
|
1483
|
-
|
|
1484
|
-
|
|
1502
|
+
let result;
|
|
1503
|
+
try {
|
|
1504
|
+
result = await aiManager.addMultipleUrls(namespace, list, options);
|
|
1505
|
+
return res.status(200).send(result);
|
|
1506
|
+
} catch (err) {
|
|
1507
|
+
winston.error("addMultipleUrls error: ", err)
|
|
1508
|
+
return res.status(500).send({ success: false, error: "Unable to add multiple urls due to an error." });
|
|
1509
|
+
}
|
|
1485
1510
|
|
|
1486
1511
|
})
|
|
1487
1512
|
|
|
1488
1513
|
router.post('/csv', upload.single('uploadFile'), async (req, res) => {
|
|
1489
1514
|
|
|
1490
1515
|
let project_id = req.projectid;
|
|
1516
|
+
let namespace_id = req.query.namespace;
|
|
1491
1517
|
|
|
1492
1518
|
let csv = req.file.buffer.toString('utf8');
|
|
1493
1519
|
winston.debug("csv: ", csv);
|
|
@@ -1495,38 +1521,12 @@ router.post('/csv', upload.single('uploadFile'), async (req, res) => {
|
|
|
1495
1521
|
let delimiter = req.body.delimiter || ";";
|
|
1496
1522
|
winston.debug("delimiter: ", delimiter);
|
|
1497
1523
|
|
|
1498
|
-
let
|
|
1499
|
-
|
|
1500
|
-
|
|
1501
|
-
}
|
|
1502
|
-
|
|
1503
|
-
|
|
1504
|
-
winston.error("find namespaces error: ", err)
|
|
1505
|
-
res.status(500).send({ success: false, error: err })
|
|
1506
|
-
})
|
|
1507
|
-
|
|
1508
|
-
if (!namespaces || namespaces.length == 0) {
|
|
1509
|
-
let alert = "No namespace found for the selected project " + project_id + ". Cannot add content to a non-existent namespace."
|
|
1510
|
-
winston.warn(alert);
|
|
1511
|
-
res.status(403).send({ success: false, error: alert });
|
|
1512
|
-
}
|
|
1513
|
-
|
|
1514
|
-
let namespaceIds = namespaces.map(namespace => namespace.id);
|
|
1515
|
-
|
|
1516
|
-
if (!namespaceIds.includes(namespace_id)) {
|
|
1517
|
-
return res.status(403).send({ success: false, error: "Not allowed. The namespace does not belong to the current project." })
|
|
1518
|
-
}
|
|
1519
|
-
|
|
1520
|
-
let quoteManager = req.app.get('quote_manager');
|
|
1521
|
-
let limits = await quoteManager.getPlanLimits(req.project);
|
|
1522
|
-
let kbs_limit = limits.kbs;
|
|
1523
|
-
winston.verbose("Limit of kbs for current plan: " + kbs_limit);
|
|
1524
|
-
|
|
1525
|
-
let kbs_count = await KB.countDocuments({ id_project: project_id }).exec();
|
|
1526
|
-
winston.verbose("Kbs count: " + kbs_count);
|
|
1527
|
-
|
|
1528
|
-
if (kbs_count >= kbs_limit) {
|
|
1529
|
-
return res.status(403).send({ success: false, error: "Maximum number of resources reached for the current plan", plan_limit: kbs_limit })
|
|
1524
|
+
let namespace;
|
|
1525
|
+
try {
|
|
1526
|
+
namespace = await aiManager.checkNamespace(project_id, namespace_id);
|
|
1527
|
+
} catch (err) {
|
|
1528
|
+
let errorCode = err?.errorCode ?? 500;
|
|
1529
|
+
return res.status(errorCode).send({ success: false, error: err.error });
|
|
1530
1530
|
}
|
|
1531
1531
|
|
|
1532
1532
|
let webhook = apiUrl + '/webhook/kb/status?token=' + KB_WEBHOOK_TOKEN;
|
|
@@ -1549,16 +1549,15 @@ router.post('/csv', upload.single('uploadFile'), async (req, res) => {
|
|
|
1549
1549
|
status: -1
|
|
1550
1550
|
})
|
|
1551
1551
|
})
|
|
1552
|
-
.on("end", () => {
|
|
1552
|
+
.on("end", async () => {
|
|
1553
1553
|
winston.debug("kbs after CSV parsing: ", kbs);
|
|
1554
1554
|
|
|
1555
|
-
|
|
1556
|
-
|
|
1557
|
-
|
|
1558
|
-
}
|
|
1559
|
-
|
|
1560
|
-
|
|
1561
|
-
return res.status(403).send({ success: false, error: "Too many faqs. Can't index more than 300 urls at a time." })
|
|
1555
|
+
try {
|
|
1556
|
+
let quoteManager = req.app.get('quote_manager');
|
|
1557
|
+
await aiManager.checkQuotaAvailability(quoteManager, req.project, kbs.length)
|
|
1558
|
+
} catch(err) {
|
|
1559
|
+
let errorCode = err?.errorCode ?? 500;
|
|
1560
|
+
return res.status(errorCode).send({ success: false, error: err.error, plan_limit: err.plan_limit })
|
|
1562
1561
|
}
|
|
1563
1562
|
|
|
1564
1563
|
let operations = kbs.map(doc => {
|
|
@@ -1572,21 +1571,25 @@ router.post('/csv', upload.single('uploadFile'), async (req, res) => {
|
|
|
1572
1571
|
}
|
|
1573
1572
|
})
|
|
1574
1573
|
|
|
1575
|
-
saveBulk(operations, kbs, project_id, namespace_id).then((result) => {
|
|
1574
|
+
aiManager.saveBulk(operations, kbs, project_id, namespace_id).then((result) => {
|
|
1576
1575
|
|
|
1577
|
-
let
|
|
1578
|
-
let
|
|
1579
|
-
let hybrid =
|
|
1576
|
+
let engine = namespace.engine || default_engine;
|
|
1577
|
+
let embedding = namespace.embedding || default_embedding;
|
|
1578
|
+
let hybrid = namespace.hybrid;
|
|
1580
1579
|
|
|
1581
1580
|
let resources = result.map(({ name, status, __v, createdAt, updatedAt, id_project, ...keepAttrs }) => keepAttrs)
|
|
1582
1581
|
resources = resources.map(({ _id, ...rest}) => {
|
|
1583
|
-
return { id: _id,
|
|
1582
|
+
return { id: _id, webhook: webhook, embedding: embedding, engine: engine, ...rest };
|
|
1584
1583
|
})
|
|
1585
1584
|
winston.verbose("resources to be sent to worker: ", resources);
|
|
1586
|
-
|
|
1587
|
-
|
|
1585
|
+
|
|
1586
|
+
if (process.env.NODE_ENV === 'test') {
|
|
1587
|
+
return res.status(200).send({ success: true, message: "Schedule scrape skipped in test environment", data: result, schedule_json: resources });
|
|
1588
1588
|
}
|
|
1589
|
-
|
|
1589
|
+
|
|
1590
|
+
aiManager.scheduleScrape(resources, hybrid);
|
|
1591
|
+
return res.status(200).send(result);
|
|
1592
|
+
|
|
1590
1593
|
}).catch((err) => {
|
|
1591
1594
|
winston.error("Unabled to saved kbs in bulk " + err);
|
|
1592
1595
|
res.status(500).send(err);
|
|
@@ -1607,7 +1610,7 @@ router.post('/sitemap', async (req, res) => {
|
|
|
1607
1610
|
const sitemap = new Sitemapper({
|
|
1608
1611
|
url: sitemap_url,
|
|
1609
1612
|
timeout: 15000,
|
|
1610
|
-
debug:
|
|
1613
|
+
debug: false
|
|
1611
1614
|
});
|
|
1612
1615
|
|
|
1613
1616
|
sitemap.fetch().then((data) => {
|
|
@@ -1621,6 +1624,114 @@ router.post('/sitemap', async (req, res) => {
|
|
|
1621
1624
|
|
|
1622
1625
|
})
|
|
1623
1626
|
|
|
1627
|
+
router.post('/sitemap/import', async (req, res) => {
|
|
1628
|
+
|
|
1629
|
+
let project_id = req.projectid;
|
|
1630
|
+
let namespace_id = req.query.namespace;
|
|
1631
|
+
let content = req.body;
|
|
1632
|
+
|
|
1633
|
+
if (content.type !== "sitemap") {
|
|
1634
|
+
return res.status(403).send({success: false, error: "Endpoint available for sitemap type only." });
|
|
1635
|
+
}
|
|
1636
|
+
|
|
1637
|
+
if (!namespace_id) {
|
|
1638
|
+
return res.status(400).send({ success: false, error: "queryParam 'namespace' is not defined" })
|
|
1639
|
+
}
|
|
1640
|
+
|
|
1641
|
+
let namespace;
|
|
1642
|
+
try {
|
|
1643
|
+
namespace = await aiManager.checkNamespace(project_id, namespace_id);
|
|
1644
|
+
} catch (err) {
|
|
1645
|
+
let errorCode = err?.errorCode ?? 500;
|
|
1646
|
+
return res.status(errorCode).send({ success: false, error: err.error });
|
|
1647
|
+
}
|
|
1648
|
+
|
|
1649
|
+
let sitemap_url = req.body.source;
|
|
1650
|
+
|
|
1651
|
+
// let quoteManager = req.app.get('quote_manager');
|
|
1652
|
+
// let limits = await quoteManager.getPlanLimits(req.project);
|
|
1653
|
+
// let kbs_limit = limits.kbs;
|
|
1654
|
+
// winston.verbose("Limit of kbs for current plan: " + kbs_limit);
|
|
1655
|
+
|
|
1656
|
+
// let kbs_count = await KB.countDocuments({ id_project: project_id }).exec();
|
|
1657
|
+
// winston.verbose("Kbs count: " + kbs_count);
|
|
1658
|
+
|
|
1659
|
+
const sitemap = new Sitemapper({
|
|
1660
|
+
url: sitemap_url,
|
|
1661
|
+
timeout: 15000,
|
|
1662
|
+
debug: false
|
|
1663
|
+
});
|
|
1664
|
+
|
|
1665
|
+
const data = await sitemap.fetch().catch((err) => {
|
|
1666
|
+
winston.error("Error fetching sitemap: ", err);
|
|
1667
|
+
return res.status(500).send({ success: false, error: err });
|
|
1668
|
+
})
|
|
1669
|
+
|
|
1670
|
+
if (data.errors && data.errors.length > 0) {
|
|
1671
|
+
winston.error("An error occurred during sitemap fetch: ", data.errors[0])
|
|
1672
|
+
return res.status(500).send({ success: false, error: "Unable to fecth sitemap due to an error: " + data.errors[0].message})
|
|
1673
|
+
}
|
|
1674
|
+
|
|
1675
|
+
const urls = Array.isArray(data.sites) ? data.sites : [];
|
|
1676
|
+
if (urls.length === 0) {
|
|
1677
|
+
return res.status(400).send({ success: false, error: "No url found on sitemap" });
|
|
1678
|
+
}
|
|
1679
|
+
|
|
1680
|
+
// let total_count = kbs_count + 1 + urls.length;
|
|
1681
|
+
// if (total_count > kbs_limit) {
|
|
1682
|
+
// return res.status(403).send({ success: false, error: "Cannot exceed the number of resources in the current plan", plan_limit: kbs_limit })
|
|
1683
|
+
// }
|
|
1684
|
+
|
|
1685
|
+
let refresh_rate = req.body.refresh_rate;
|
|
1686
|
+
let scrape_type = req.body.scrape_type ?? 2;
|
|
1687
|
+
let scrape_options = req.body.scrape_options;
|
|
1688
|
+
if (scrape_type === 2 && scrape_options == null) {
|
|
1689
|
+
scrape_options = aiManager.setDefaultScrapeOptions();
|
|
1690
|
+
}
|
|
1691
|
+
|
|
1692
|
+
let sitemap_content = {
|
|
1693
|
+
id_project: project_id,
|
|
1694
|
+
name: sitemap_url,
|
|
1695
|
+
source: sitemap_url,
|
|
1696
|
+
type: 'sitemap',
|
|
1697
|
+
content: "",
|
|
1698
|
+
namespace: namespace_id,
|
|
1699
|
+
scrape_type: scrape_type,
|
|
1700
|
+
scrape_options: scrape_options,
|
|
1701
|
+
refresh_rate: refresh_rate
|
|
1702
|
+
}
|
|
1703
|
+
|
|
1704
|
+
let saved_content;
|
|
1705
|
+
try {
|
|
1706
|
+
saved_content = await KB.findOneAndUpdate({ id_project: project_id, type: 'sitemap', source: sitemap_url, namespace: namespace_id }, sitemap_content, { upsert: true, new: true }).lean().exec();
|
|
1707
|
+
} catch (err) {
|
|
1708
|
+
winston.error("Error saving content: ", err);
|
|
1709
|
+
return res.status(500).send({ success: false, error: err });
|
|
1710
|
+
}
|
|
1711
|
+
|
|
1712
|
+
const options = {
|
|
1713
|
+
sitemap_origin_id: saved_content._id,
|
|
1714
|
+
sitemap_origin: saved_content.source,
|
|
1715
|
+
scrape_type: saved_content.scrape_type,
|
|
1716
|
+
scrape_options: saved_content.scrape_options,
|
|
1717
|
+
refresh_rate: saved_content.refresh_rate
|
|
1718
|
+
}
|
|
1719
|
+
|
|
1720
|
+
let result;
|
|
1721
|
+
try {
|
|
1722
|
+
result = await aiManager.addMultipleUrls(namespace, urls, options);
|
|
1723
|
+
if (process.env.NODE_ENV === 'test') {
|
|
1724
|
+
result.result.push(saved_content);
|
|
1725
|
+
return res.status(200).send(result);
|
|
1726
|
+
}
|
|
1727
|
+
result.push(saved_content);
|
|
1728
|
+
return res.status(200).send(result);
|
|
1729
|
+
} catch (err) {
|
|
1730
|
+
return res.status(500).send({ success: false, error: "Unable to add multiple urls from sitemap due to an error." });
|
|
1731
|
+
}
|
|
1732
|
+
|
|
1733
|
+
})
|
|
1734
|
+
|
|
1624
1735
|
router.put('/:kb_id', async (req, res) => {
|
|
1625
1736
|
|
|
1626
1737
|
let kb_id = req.params.kb_id;
|
|
@@ -1670,24 +1781,27 @@ router.delete('/:kb_id', async (req, res) => {
|
|
|
1670
1781
|
winston.error("Unable to delete kb. Kb not found...")
|
|
1671
1782
|
return res.status(404).send({ success: false, error: "Content not found" })
|
|
1672
1783
|
}
|
|
1673
|
-
|
|
1784
|
+
|
|
1785
|
+
let namespace_id = kb.namespace ?? project_id;
|
|
1786
|
+
|
|
1787
|
+
let namespace;
|
|
1788
|
+
try {
|
|
1789
|
+
namespace = await aiManager.checkNamespace(project_id, namespace_id);
|
|
1790
|
+
} catch (err) {
|
|
1791
|
+
let errorCode = err?.errorCode ?? 500;
|
|
1792
|
+
return res.status(errorCode).send({ success: false, error: err.error });
|
|
1793
|
+
}
|
|
1794
|
+
|
|
1674
1795
|
let data = {
|
|
1675
1796
|
id: kb_id,
|
|
1676
|
-
namespace:
|
|
1797
|
+
namespace: namespace_id
|
|
1677
1798
|
}
|
|
1678
1799
|
|
|
1679
1800
|
if (!data.namespace) {
|
|
1680
1801
|
data.namespace = project_id;
|
|
1681
1802
|
}
|
|
1682
|
-
|
|
1683
|
-
|
|
1684
|
-
winston.error("find namespaces error: ", err)
|
|
1685
|
-
res.status(500).send({ success: false, error: err })
|
|
1686
|
-
})
|
|
1687
|
-
|
|
1688
|
-
let ns = namespaces.find(n => n.id === data.namespace);
|
|
1689
|
-
data.engine = ns.engine || default_engine;
|
|
1690
|
-
|
|
1803
|
+
|
|
1804
|
+
data.engine = namespace.engine || default_engine;
|
|
1691
1805
|
winston.verbose("/:delete_id data: ", data);
|
|
1692
1806
|
|
|
1693
1807
|
aiService.deleteIndex(data).then((resp) => {
|
|
@@ -1733,196 +1847,4 @@ router.delete('/:kb_id', async (req, res) => {
|
|
|
1733
1847
|
*/
|
|
1734
1848
|
|
|
1735
1849
|
|
|
1736
|
-
//----------------------------------------
|
|
1737
|
-
|
|
1738
|
-
|
|
1739
|
-
/**
|
|
1740
|
-
* ****************************************
|
|
1741
|
-
* Utils Methods Section - Start
|
|
1742
|
-
* ****************************************
|
|
1743
|
-
*/
|
|
1744
|
-
|
|
1745
|
-
async function saveBulk(operations, kbs, project_id, namespace) {
|
|
1746
|
-
|
|
1747
|
-
return new Promise((resolve, reject) => {
|
|
1748
|
-
KB.bulkWrite(operations, { ordered: false }).then((result) => {
|
|
1749
|
-
winston.verbose("bulkWrite operations result: ", result);
|
|
1750
|
-
|
|
1751
|
-
KB.find({ id_project: project_id, namespace: namespace, source: { $in: kbs.map(kb => kb.source) } }).lean().then((documents) => {
|
|
1752
|
-
winston.debug("documents: ", documents);
|
|
1753
|
-
resolve(documents)
|
|
1754
|
-
}).catch((err) => {
|
|
1755
|
-
winston.error("Error finding documents ", err)
|
|
1756
|
-
reject(err);
|
|
1757
|
-
})
|
|
1758
|
-
|
|
1759
|
-
}).catch((err) => {
|
|
1760
|
-
reject(err);
|
|
1761
|
-
})
|
|
1762
|
-
})
|
|
1763
|
-
|
|
1764
|
-
}
|
|
1765
|
-
|
|
1766
|
-
async function statusConverter(status) {
|
|
1767
|
-
return new Promise((resolve) => {
|
|
1768
|
-
|
|
1769
|
-
let td_status;
|
|
1770
|
-
switch (status) {
|
|
1771
|
-
case 0:
|
|
1772
|
-
td_status = -1;
|
|
1773
|
-
break;
|
|
1774
|
-
case 2:
|
|
1775
|
-
td_status = 200;
|
|
1776
|
-
break;
|
|
1777
|
-
case 3:
|
|
1778
|
-
td_status = 300;
|
|
1779
|
-
break;
|
|
1780
|
-
case 4:
|
|
1781
|
-
td_status = 400;
|
|
1782
|
-
break;
|
|
1783
|
-
default:
|
|
1784
|
-
td_status = -1
|
|
1785
|
-
}
|
|
1786
|
-
resolve(td_status);
|
|
1787
|
-
})
|
|
1788
|
-
}
|
|
1789
|
-
|
|
1790
|
-
async function updateStatus(id, status) {
|
|
1791
|
-
return new Promise((resolve) => {
|
|
1792
|
-
|
|
1793
|
-
KB.findByIdAndUpdate(id, { status: status }, { new: true }, (err, updatedKb) => {
|
|
1794
|
-
if (err) {
|
|
1795
|
-
resolve(false)
|
|
1796
|
-
} else if (!updatedKb) {
|
|
1797
|
-
winston.verbose("Unable to update status. Data source not found.")
|
|
1798
|
-
resolve(false)
|
|
1799
|
-
} else {
|
|
1800
|
-
winston.debug("updatedKb: ", updatedKb)
|
|
1801
|
-
resolve(true);
|
|
1802
|
-
}
|
|
1803
|
-
})
|
|
1804
|
-
})
|
|
1805
|
-
}
|
|
1806
|
-
|
|
1807
|
-
async function scheduleScrape(resources, hybrid) {
|
|
1808
|
-
|
|
1809
|
-
let scheduler;
|
|
1810
|
-
if (hybrid) {
|
|
1811
|
-
scheduler = new Scheduler({ jobManager: jobManagerHybrid });
|
|
1812
|
-
} else {
|
|
1813
|
-
scheduler = new Scheduler({ jobManager: jobManager });
|
|
1814
|
-
}
|
|
1815
|
-
|
|
1816
|
-
if (!scheduler) {
|
|
1817
|
-
winston.error("ScheduleScrape JobManager is not defined");
|
|
1818
|
-
return false;
|
|
1819
|
-
}
|
|
1820
|
-
|
|
1821
|
-
resources.forEach(r => {
|
|
1822
|
-
winston.debug("Schedule job with following data: ", r);
|
|
1823
|
-
scheduler.trainSchedule(r, async (err, result) => {
|
|
1824
|
-
let error_code = 100;
|
|
1825
|
-
if (err) {
|
|
1826
|
-
winston.error("Scheduling error: ", err);
|
|
1827
|
-
error_code = 400;
|
|
1828
|
-
} else {
|
|
1829
|
-
winston.verbose("Scheduling result: ", result);
|
|
1830
|
-
}
|
|
1831
|
-
await updateStatus(r.id, error_code);
|
|
1832
|
-
});
|
|
1833
|
-
})
|
|
1834
|
-
|
|
1835
|
-
return true;
|
|
1836
|
-
}
|
|
1837
|
-
|
|
1838
|
-
async function startScrape(data) {
|
|
1839
|
-
|
|
1840
|
-
if (!data.gptkey) {
|
|
1841
|
-
let gptkey = process.env.GPTKEY;
|
|
1842
|
-
if (!gptkey) {
|
|
1843
|
-
return { error: "GPT apikey undefined" }
|
|
1844
|
-
}
|
|
1845
|
-
data.gptkey = gptkey;
|
|
1846
|
-
}
|
|
1847
|
-
|
|
1848
|
-
|
|
1849
|
-
let status_updated = await updateStatus(data.id, 200);
|
|
1850
|
-
winston.verbose("status of kb " + data.id + " updated: " + status_updated);
|
|
1851
|
-
|
|
1852
|
-
return new Promise((resolve, reject) => {
|
|
1853
|
-
aiService.singleScrape(data).then(async (resp) => {
|
|
1854
|
-
winston.debug("singleScrape resp: ", resp.data);
|
|
1855
|
-
let status_updated = await updateStatus(data.id, 300);
|
|
1856
|
-
winston.verbose("status of kb " + data.id + " updated: " + status_updated);
|
|
1857
|
-
resolve(resp.data);
|
|
1858
|
-
}).catch( async (err) => {
|
|
1859
|
-
winston.error("singleScrape err: ", err);
|
|
1860
|
-
let status_updated = await updateStatus(data.id, 400);
|
|
1861
|
-
winston.verbose("status of kb " + data.id + " updated: " + status_updated);
|
|
1862
|
-
reject(err);
|
|
1863
|
-
})
|
|
1864
|
-
})
|
|
1865
|
-
}
|
|
1866
|
-
|
|
1867
|
-
async function getKeyFromIntegrations(project_id) {
|
|
1868
|
-
|
|
1869
|
-
return new Promise( async (resolve) => {
|
|
1870
|
-
|
|
1871
|
-
let integration = await Integration.findOne({ id_project: project_id, name: 'openai' }).catch((err) => {
|
|
1872
|
-
winston.error("Unable to find openai integration for the current project " + project_id);
|
|
1873
|
-
resolve(null);
|
|
1874
|
-
})
|
|
1875
|
-
if (integration && integration.value && integration.value.apikey) {
|
|
1876
|
-
resolve(integration.value.apikey);
|
|
1877
|
-
} else {
|
|
1878
|
-
resolve(null);
|
|
1879
|
-
}
|
|
1880
|
-
})
|
|
1881
|
-
}
|
|
1882
|
-
|
|
1883
|
-
async function setDefaultScrapeOptions() {
|
|
1884
|
-
return {
|
|
1885
|
-
tags_to_extract: ["body"],
|
|
1886
|
-
unwanted_tags: [],
|
|
1887
|
-
unwanted_classnames: []
|
|
1888
|
-
}
|
|
1889
|
-
}
|
|
1890
|
-
|
|
1891
|
-
async function setCustomScrapeOptions(options) {
|
|
1892
|
-
if (!options) {
|
|
1893
|
-
options = await setDefaultScrapeOptions();
|
|
1894
|
-
} else {
|
|
1895
|
-
if (!options.tags_to_extract || options.tags_to_extract.length == 0) {
|
|
1896
|
-
options.tags_to_extract = ["body"];
|
|
1897
|
-
}
|
|
1898
|
-
if (!options.unwanted_tags) {
|
|
1899
|
-
options.unwanted_tags = [];
|
|
1900
|
-
}
|
|
1901
|
-
if (!options.unwanted_classnames) {
|
|
1902
|
-
options.unwanted_classnames = [];
|
|
1903
|
-
}
|
|
1904
|
-
}
|
|
1905
|
-
}
|
|
1906
|
-
|
|
1907
|
-
async function generateFilename(name) {
|
|
1908
|
-
return name
|
|
1909
|
-
.toLowerCase()
|
|
1910
|
-
.trim()
|
|
1911
|
-
.normalize("NFD") // Normalize characters with accents
|
|
1912
|
-
.replace(/[\u0300-\u036f]/g, "") // Removes diacritics (e.g. à becomes a)
|
|
1913
|
-
.replace(/[^a-z0-9\s-_]/g, "") // Remove special characters
|
|
1914
|
-
.replace(/\s+/g, "-") // Replaces spaces with dashes
|
|
1915
|
-
.replace(/_/g, "-")
|
|
1916
|
-
.replace(/-+/g, "-"); // Removes consecutive hyphens
|
|
1917
|
-
}
|
|
1918
|
-
|
|
1919
|
-
|
|
1920
|
-
/**
|
|
1921
|
-
* ****************************************
|
|
1922
|
-
* Utils Methods Section - End
|
|
1923
|
-
* ****************************************
|
|
1924
|
-
*/
|
|
1925
|
-
|
|
1926
|
-
|
|
1927
|
-
|
|
1928
1850
|
module.exports = router;
|