@tiledesk/tiledesk-server 2.13.49 → 2.13.50
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +5 -0
- package/app.js +5 -3
- package/config/kb/embedding.js +7 -0
- package/config/kb/engine.hybrid.js +10 -0
- package/config/kb/engine.js +10 -0
- package/jobs.js +4 -1
- package/models/kb_setting.js +60 -15
- package/models/profile.js +54 -5
- package/models/request.js +1 -1
- package/package.json +4 -4
- package/pubmodules/apps/listener.js +2 -1
- package/routes/kb.js +548 -626
- package/routes/webhook.js +86 -38
- package/services/aiManager.js +464 -0
- package/services/aiService.js +4 -2
- package/test/kbRoute.js +956 -910
package/routes/webhook.js
CHANGED
|
@@ -1,17 +1,13 @@
|
|
|
1
1
|
var express = require('express');
|
|
2
2
|
var router = express.Router();
|
|
3
|
-
const uuidv4 = require('uuid/v4');
|
|
4
3
|
var { KB, Namespace } = require('../models/kb_setting');
|
|
5
4
|
var winston = require('../config/winston');
|
|
6
5
|
const JobManager = require('../utils/jobs-worker-queue-manager/JobManagerV2');
|
|
7
|
-
const { Scheduler } = require('../services/Scheduler');
|
|
8
6
|
const { AiReindexService } = require('../services/aiReindexService');
|
|
9
7
|
const { Webhook } = require('../models/webhook');
|
|
10
|
-
const httpUtil = require('../utils/httpUtil');
|
|
11
|
-
var jwt = require('jsonwebtoken');
|
|
12
|
-
const Faq_kb = require('../models/faq_kb');
|
|
13
8
|
const webhookService = require('../services/webhookService');
|
|
14
9
|
const errorCodes = require('../errorCodes');
|
|
10
|
+
const aiManager = require('../services/aiManager');
|
|
15
11
|
var ObjectId = require('mongoose').Types.ObjectId;
|
|
16
12
|
|
|
17
13
|
const port = process.env.PORT || '3000';
|
|
@@ -41,12 +37,20 @@ jobManager.connectAndStartPublisher((status, error) => {
|
|
|
41
37
|
|
|
42
38
|
let default_engine = {
|
|
43
39
|
name: "pinecone",
|
|
44
|
-
type: process.env.PINECONE_TYPE,
|
|
40
|
+
type: process.env.PINECONE_TYPE || "pod",
|
|
45
41
|
apikey: "",
|
|
46
42
|
vector_size: 1536,
|
|
47
43
|
index_name: process.env.PINECONE_INDEX
|
|
48
44
|
}
|
|
49
45
|
|
|
46
|
+
let default_engine_hybrid = {
|
|
47
|
+
name: "pinecone",
|
|
48
|
+
type: process.env.PINECONE_TYPE_HYBRID || "serverless",
|
|
49
|
+
apikey: "",
|
|
50
|
+
vector_size: 1536,
|
|
51
|
+
index_name: process.env.PINECONE_INDEX_HYBRID
|
|
52
|
+
}
|
|
53
|
+
|
|
50
54
|
router.post('/kb/reindex', async (req, res) => {
|
|
51
55
|
|
|
52
56
|
winston.verbose("/kb/reindex webhook called")
|
|
@@ -69,6 +73,15 @@ router.post('/kb/reindex', async (req, res) => {
|
|
|
69
73
|
return res.status(500).send({ success: false, error: "Error getting content with id " + content_id });
|
|
70
74
|
})
|
|
71
75
|
|
|
76
|
+
const namespace_id = kb.namespace;
|
|
77
|
+
let namespace;
|
|
78
|
+
try {
|
|
79
|
+
namespace = await aiManager.checkNamespace(kb.id_project, namespace_id);
|
|
80
|
+
} catch (err) {
|
|
81
|
+
let errorCode = err?.errorCode ?? 500;
|
|
82
|
+
return res.status(errorCode).send({ success: false, error: err.error });
|
|
83
|
+
}
|
|
84
|
+
|
|
72
85
|
if (!kb) {
|
|
73
86
|
winston.warn("(webhook) Kb content not found with id " + content_id + ". Deleting scheduler...");
|
|
74
87
|
|
|
@@ -86,6 +99,58 @@ router.post('/kb/reindex', async (req, res) => {
|
|
|
86
99
|
return;
|
|
87
100
|
}, 10000);
|
|
88
101
|
|
|
102
|
+
} else if (kb.type === 'sitemap') {
|
|
103
|
+
|
|
104
|
+
const urls = await aiManager.fetchSitemap(kb.source).catch((err) => {
|
|
105
|
+
winston.error("(webhook) Error fetching sitemap: ", err);
|
|
106
|
+
return res.status(500).send({ success: false, error: err });
|
|
107
|
+
})
|
|
108
|
+
|
|
109
|
+
if (urls.length === 0) {
|
|
110
|
+
return res.status(400).send({ success: false, error: "No url found on sitemap" });
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
let existingKbs;
|
|
114
|
+
try {
|
|
115
|
+
existingKbs = await KB.find({ id_project: kb.id_project, namespace: namespace_id, sitemap_origin_id: content_id}).lean().exec();
|
|
116
|
+
} catch(err) {
|
|
117
|
+
winston.error("(webhook) Error finding existing contents: ", err);
|
|
118
|
+
return res.status(500).send({ success: false, error: "Error finding existing sitemap contents" });
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
const result = await aiManager.foundSitemapChanges(existingKbs, urls).catch((err) => {
|
|
122
|
+
winston.error("(webhook) error finding sitemap differecens ", err);
|
|
123
|
+
return res.status(400).send({ success: false, error: "Error finding sitemap differecens" });
|
|
124
|
+
})
|
|
125
|
+
|
|
126
|
+
if (!result) return; // esco qui
|
|
127
|
+
|
|
128
|
+
const { addedUrls, removedIds } = result;
|
|
129
|
+
|
|
130
|
+
if (removedIds.length > 0) {
|
|
131
|
+
const idsSet = new Set(removedIds);
|
|
132
|
+
const kbsToDelete = existingKbs.filter(obj => idsSet.has(obj._id));
|
|
133
|
+
|
|
134
|
+
aiManager.removeMultipleContents(namespace, kbsToDelete).catch((err) => {
|
|
135
|
+
winston.error("(webhook) error deleting multiple contents: ", err);
|
|
136
|
+
})
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
if (addedUrls.length > 0) {
|
|
140
|
+
const options = {
|
|
141
|
+
sitemap_origin_id: kb.id,
|
|
142
|
+
sitemap_origin: kb.source,
|
|
143
|
+
scrape_type: kb.scrape_type,
|
|
144
|
+
scrape_options: kb.scrape_options,
|
|
145
|
+
refresh_rate: kb.refresh_rate
|
|
146
|
+
}
|
|
147
|
+
aiManager.addMultipleUrls(namespace, addedUrls, options).catch((err) => {
|
|
148
|
+
winston.error("(webhook) error adding multiple urls contents: ", err);
|
|
149
|
+
})
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
res.status(200).send({ success: true, message: "Content queued for reindexing" });
|
|
153
|
+
|
|
89
154
|
} else {
|
|
90
155
|
|
|
91
156
|
let json = {
|
|
@@ -120,13 +185,13 @@ router.post('/kb/reindex', async (req, res) => {
|
|
|
120
185
|
return res.status(500).send({ success: false, error: err })
|
|
121
186
|
}
|
|
122
187
|
|
|
123
|
-
json.engine = namespace.engine || default_engine;
|
|
124
|
-
|
|
188
|
+
json.engine = namespace.engine || (namespace.hybrid ? default_engine_hybrid : default_engine);
|
|
189
|
+
|
|
125
190
|
let resources = [];
|
|
126
191
|
resources.push(json);
|
|
127
192
|
|
|
128
193
|
if (process.env.NODE_ENV !== 'test') {
|
|
129
|
-
scheduleScrape(resources);
|
|
194
|
+
aiManager.scheduleScrape(resources, namespace.hybrid);
|
|
130
195
|
}
|
|
131
196
|
|
|
132
197
|
res.status(200).send({ success: true, message: "Content queued for reindexing" });
|
|
@@ -273,37 +338,20 @@ router.all('/:webhook_id/dev', async (req, res) => {
|
|
|
273
338
|
|
|
274
339
|
})
|
|
275
340
|
|
|
276
|
-
async function scheduleScrape(resources) {
|
|
277
341
|
|
|
278
|
-
|
|
342
|
+
// async function generateChatbotToken(chatbot) {
|
|
343
|
+
// let signOptions = {
|
|
344
|
+
// issuer: 'https://tiledesk.com',
|
|
345
|
+
// subject: 'bot',
|
|
346
|
+
// audience: 'https://tiledesk.com/bots/' + chatbot._id,
|
|
347
|
+
// jwtid: uuidv4()
|
|
348
|
+
// };
|
|
279
349
|
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
scheduler.trainSchedule(r, async (err, result) => {
|
|
283
|
-
if (err) {
|
|
284
|
-
winston.error("Scheduling error: ", err);
|
|
285
|
-
} else {
|
|
286
|
-
winston.verbose("Scheduling result: ", result);
|
|
287
|
-
}
|
|
288
|
-
});
|
|
289
|
-
})
|
|
350
|
+
// let botPayload = chatbot.toObject();
|
|
351
|
+
// let botSecret = botPayload.secret;
|
|
290
352
|
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
async function generateChatbotToken(chatbot) {
|
|
295
|
-
let signOptions = {
|
|
296
|
-
issuer: 'https://tiledesk.com',
|
|
297
|
-
subject: 'bot',
|
|
298
|
-
audience: 'https://tiledesk.com/bots/' + chatbot._id,
|
|
299
|
-
jwtid: uuidv4()
|
|
300
|
-
};
|
|
301
|
-
|
|
302
|
-
let botPayload = chatbot.toObject();
|
|
303
|
-
let botSecret = botPayload.secret;
|
|
304
|
-
|
|
305
|
-
var bot_token = jwt.sign(botPayload, botSecret, signOptions);
|
|
306
|
-
return bot_token;
|
|
307
|
-
}
|
|
353
|
+
// var bot_token = jwt.sign(botPayload, botSecret, signOptions);
|
|
354
|
+
// return bot_token;
|
|
355
|
+
// }
|
|
308
356
|
|
|
309
357
|
module.exports = router;
|
|
@@ -0,0 +1,464 @@
|
|
|
1
|
+
const { Namespace, KB, Engine } = require('../models/kb_setting');
|
|
2
|
+
const Integrations = require("../models/integrations");
|
|
3
|
+
const aiService = require("./aiService");
|
|
4
|
+
const { Scheduler } = require("./Scheduler");
|
|
5
|
+
const { default: Sitemapper } = require('sitemapper');
|
|
6
|
+
const winston = require('../config/winston');
|
|
7
|
+
const configGlobal = require('../config/global');
|
|
8
|
+
const JobManager = require('../utils/jobs-worker-queue-manager/JobManagerV2');
|
|
9
|
+
|
|
10
|
+
// Constants
|
|
11
|
+
const apiUrl = process.env.API_URL || configGlobal.apiUrl;
|
|
12
|
+
const KB_WEBHOOK_TOKEN = process.env.KB_WEBHOOK_TOKEN || 'kbcustomtoken';
|
|
13
|
+
const AMQP_MANAGER_URL = process.env.AMQP_MANAGER_URL || 'amqp://localhost';
|
|
14
|
+
const JOB_TOPIC_EXCHANGE = process.env.JOB_TOPIC_EXCHANGE_TRAIN || 'tiledesk-trainer';
|
|
15
|
+
const JOB_TOPIC_EXCHANGE_HYBRID = process.env.JOB_TOPIC_EXCHANGE_TRAIN_HYBRID || 'tiledesk-trainer-hybrid';
|
|
16
|
+
|
|
17
|
+
// Default engine configuration
|
|
18
|
+
const default_engine = require('../config/kb/engine');
|
|
19
|
+
const default_engine_hybrid = require('../config/kb/engine.hybrid');
|
|
20
|
+
const default_embedding = require('../config/kb/embedding');
|
|
21
|
+
const integrationService = require('./integrationService');
|
|
22
|
+
|
|
23
|
+
// Job managers
|
|
24
|
+
let jobManager = new JobManager(AMQP_MANAGER_URL, {
|
|
25
|
+
debug: false,
|
|
26
|
+
topic: JOB_TOPIC_EXCHANGE,
|
|
27
|
+
exchange: JOB_TOPIC_EXCHANGE
|
|
28
|
+
})
|
|
29
|
+
|
|
30
|
+
let jobManagerHybrid = new JobManager(AMQP_MANAGER_URL, {
|
|
31
|
+
debug: false,
|
|
32
|
+
topic: JOB_TOPIC_EXCHANGE_HYBRID,
|
|
33
|
+
exchange: JOB_TOPIC_EXCHANGE_HYBRID
|
|
34
|
+
})
|
|
35
|
+
|
|
36
|
+
// Connect job managers
|
|
37
|
+
jobManager.connectAndStartPublisher((status, error) => {
|
|
38
|
+
if (error) {
|
|
39
|
+
winston.error("aiManager jobManager connectAndStartPublisher error: ", error);
|
|
40
|
+
} else {
|
|
41
|
+
winston.info("aiManager jobManager - ConnectPublisher done with status: ", status);
|
|
42
|
+
}
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
jobManagerHybrid.connectAndStartPublisher((status, error) => {
|
|
46
|
+
if (error) {
|
|
47
|
+
winston.error("aiManager jobManagerHybrid connectAndStartPublisher error: ", error);
|
|
48
|
+
} else {
|
|
49
|
+
winston.info("aiManager jobManagerHybrid - ConnectPublisher done with status: ", status);
|
|
50
|
+
}
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
class AiManager {
|
|
54
|
+
|
|
55
|
+
constructor() { }
|
|
56
|
+
|
|
57
|
+
async addMultipleUrls(namespace, urls, options) {
|
|
58
|
+
return new Promise(async (resolve, reject) => {
|
|
59
|
+
|
|
60
|
+
let kbs = urls.map((url) => {
|
|
61
|
+
let kb = {
|
|
62
|
+
id_project: namespace.id_project,
|
|
63
|
+
name: url,
|
|
64
|
+
source: url,
|
|
65
|
+
type: 'url',
|
|
66
|
+
content: "",
|
|
67
|
+
namespace: namespace.id,
|
|
68
|
+
status: -1,
|
|
69
|
+
sitemap_origin_id: options.sitemap_origin_id,
|
|
70
|
+
sitemap_origin: options.sitemap_origin,
|
|
71
|
+
scrape_type: options.scrape_type,
|
|
72
|
+
scrape_options: options.scrape_options,
|
|
73
|
+
refresh_rate: options.refresh_rate
|
|
74
|
+
}
|
|
75
|
+
return kb;
|
|
76
|
+
})
|
|
77
|
+
|
|
78
|
+
let operations = kbs.map(doc => {
|
|
79
|
+
return {
|
|
80
|
+
updateOne: {
|
|
81
|
+
filter: { id_project: doc.id_project, type: 'url', source: doc.source, namespace: namespace.id },
|
|
82
|
+
update: doc,
|
|
83
|
+
upsert: true,
|
|
84
|
+
returnOriginal: false
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
})
|
|
88
|
+
|
|
89
|
+
this.saveBulk(operations, kbs, namespace.id_project, namespace.id).then( async (result) => {
|
|
90
|
+
let hybrid = namespace.hybrid;
|
|
91
|
+
let engine = namespace.engine || default_engine;
|
|
92
|
+
let embedding = namespace.embedding || default_embedding;
|
|
93
|
+
embedding.api_key = process.env.EMBEDDING_API_KEY || process.env.GPTKEY;
|
|
94
|
+
let webhook = apiUrl + '/webhook/kb/status?token=' + KB_WEBHOOK_TOKEN;
|
|
95
|
+
|
|
96
|
+
let resources = result.map(({ name, status, __v, createdAt, updatedAt, id_project, ...keepAttrs }) => keepAttrs)
|
|
97
|
+
resources = resources.map(({ _id, scrape_options, ...rest }) => {
|
|
98
|
+
return { id: _id, webhook: webhook, parameters_scrape_type_4: scrape_options, embedding: embedding, engine: engine, hybrid: hybrid, ...rest}
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
winston.verbose("resources to be sent to worker: ", resources);
|
|
102
|
+
|
|
103
|
+
if (process.env.NODE_ENV === 'test') {
|
|
104
|
+
resolve({ result, schedule_json: resources });
|
|
105
|
+
return;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
this.scheduleScrape(resources, hybrid);
|
|
109
|
+
resolve(result);
|
|
110
|
+
|
|
111
|
+
}).catch((err) => {
|
|
112
|
+
winston.error("Error save contents in bulk: ", err);
|
|
113
|
+
reject(err);
|
|
114
|
+
})
|
|
115
|
+
})
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
async checkNamespace(id_project, namespace_id) {
|
|
119
|
+
return new Promise( async (resolve, reject) => {
|
|
120
|
+
|
|
121
|
+
let namespace = await Namespace.findOne({ id: namespace_id }).catch((err) => {
|
|
122
|
+
winston.error("Error getting namespace ", err);
|
|
123
|
+
reject(err);
|
|
124
|
+
})
|
|
125
|
+
if (!namespace) {
|
|
126
|
+
winston.warn("Namespace not found with id " + namespace_id);
|
|
127
|
+
reject({ errorCode: 404, error: "Namespace not found with id " + namespace_id });
|
|
128
|
+
}
|
|
129
|
+
if (namespace.id_project !== id_project) {
|
|
130
|
+
winston.warn("Namespace not belonging to project " + id_project);
|
|
131
|
+
reject({ errorCode: 403, error: "Namespace not belonging to project " + id_project });
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
resolve(namespace);
|
|
135
|
+
})
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
async resolveLLMConfig(id_project, provider = 'openai', model) {
|
|
139
|
+
|
|
140
|
+
if (provider === 'ollama' || provider === 'vllm') {
|
|
141
|
+
try {
|
|
142
|
+
const integration = await integrationService.getIntegration(id_project, provider);
|
|
143
|
+
|
|
144
|
+
if (!integration?.value?.url) {
|
|
145
|
+
throw { code: 422, error: `Server url for ${provider} is empty or invalid`}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
return {
|
|
149
|
+
provider,
|
|
150
|
+
name: model,
|
|
151
|
+
url: integration.value.url,
|
|
152
|
+
api_key: integration.value.apikey || ""
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
} catch (err) {
|
|
156
|
+
throw { code: err.code, error: err.error }
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
try {
|
|
161
|
+
let key = await integrationService.getKeyFromIntegration(id_project, provider)
|
|
162
|
+
|
|
163
|
+
return {
|
|
164
|
+
provider,
|
|
165
|
+
name: model,
|
|
166
|
+
api_key: key
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
} catch (err) {
|
|
170
|
+
throw { code: err.code, error: err.error }
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
async checkQuotaAvailability(quoteManager, project, ncontents) {
|
|
176
|
+
|
|
177
|
+
return new Promise( async (resolve, reject) => {
|
|
178
|
+
|
|
179
|
+
let limits = await quoteManager.getPlanLimits(project);
|
|
180
|
+
let kbs_limit = limits.kbs;
|
|
181
|
+
winston.verbose("Limit of kbs for current plan: " + kbs_limit);
|
|
182
|
+
|
|
183
|
+
let kbs_count = await KB.countDocuments({ id_project: project._id }).exec();
|
|
184
|
+
winston.verbose("Kbs count: " + kbs_count);
|
|
185
|
+
|
|
186
|
+
if (kbs_count >= kbs_limit) {
|
|
187
|
+
reject({ errorCode: 403, error: "Maximum number of resources reached for the current plan", plan_limit: kbs_limit });
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
let total_count = kbs_count + ncontents;
|
|
191
|
+
if (total_count > kbs_limit) {
|
|
192
|
+
reject({ errorCode: 403, error: "Cannot exceed the number of resources in the current plan", plan_limit: kbs_limit });
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
resolve(true);
|
|
196
|
+
})
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
async fetchSitemap(sitemapUrl) {
|
|
200
|
+
|
|
201
|
+
return new Promise(async (resolve, reject) => {
|
|
202
|
+
const sitemap = new Sitemapper({
|
|
203
|
+
url: sitemapUrl,
|
|
204
|
+
timeout: 15000,
|
|
205
|
+
debug: false
|
|
206
|
+
});
|
|
207
|
+
|
|
208
|
+
const data = await sitemap.fetch().catch((err) => {
|
|
209
|
+
reject(err);
|
|
210
|
+
})
|
|
211
|
+
|
|
212
|
+
if (data.errors && data.errors.length > 0) {
|
|
213
|
+
reject(data.errors[0]);
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
const urls = Array.isArray(data.sites) ? data.sites : [];
|
|
217
|
+
resolve(urls);
|
|
218
|
+
})
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
async foundSitemapChanges(existingKbs, urls) {
|
|
222
|
+
|
|
223
|
+
return new Promise( async (resolve, reject) => {
|
|
224
|
+
let existingIdsBySource = {};
|
|
225
|
+
existingKbs.forEach(doc => {
|
|
226
|
+
existingIdsBySource[doc.source] = doc._id;
|
|
227
|
+
});
|
|
228
|
+
|
|
229
|
+
let addedUrls = urls.filter(url => !existingIdsBySource.hasOwnProperty(url));
|
|
230
|
+
let removedIds = existingKbs
|
|
231
|
+
.filter(doc => !urls.includes(doc.source))
|
|
232
|
+
.map(doc => doc._id);
|
|
233
|
+
|
|
234
|
+
resolve({ addedUrls, removedIds });
|
|
235
|
+
})
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
async generateFilename(name) {
|
|
239
|
+
return name
|
|
240
|
+
.toLowerCase()
|
|
241
|
+
.trim()
|
|
242
|
+
.normalize("NFD") // Normalize characters with accents
|
|
243
|
+
.replace(/[\u0300-\u036f]/g, "") // Removes diacritics (e.g. à becomes a)
|
|
244
|
+
.replace(/[^a-z0-9\s-_]/g, "") // Remove special characters
|
|
245
|
+
.replace(/\s+/g, "-") // Replaces spaces with dashes
|
|
246
|
+
.replace(/_/g, "-")
|
|
247
|
+
.replace(/-+/g, "-"); // Removes consecutive hyphens
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
async getKeyFromIntegrations(project_id) {
|
|
251
|
+
|
|
252
|
+
return new Promise( async (resolve) => {
|
|
253
|
+
|
|
254
|
+
let integration = await Integrations.findOne({ id_project: project_id, name: 'openai' }).catch((err) => {
|
|
255
|
+
winston.error("Unable to find openai integration for the current project " + project_id);
|
|
256
|
+
resolve(null);
|
|
257
|
+
})
|
|
258
|
+
if (integration && integration.value && integration.value.apikey) {
|
|
259
|
+
resolve(integration.value.apikey);
|
|
260
|
+
} else {
|
|
261
|
+
resolve(null);
|
|
262
|
+
}
|
|
263
|
+
})
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
async removeMultipleContents(namespace, kbs) {
|
|
267
|
+
|
|
268
|
+
return new Promise( async (resolve, reject) => {
|
|
269
|
+
|
|
270
|
+
kbs.forEach((kb) => {
|
|
271
|
+
let data = {
|
|
272
|
+
id: kb._id,
|
|
273
|
+
namespace: kb.namespace,
|
|
274
|
+
engine: namespace.engine || default_engine
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
aiService.deleteIndex(data).then((resp) => {
|
|
278
|
+
winston.debug("delete content response: ", resp);
|
|
279
|
+
if (resp.data.success === true) {
|
|
280
|
+
KB.findByIdAndDelete(kb._id, (err, deletedKb) => {
|
|
281
|
+
if (err) {
|
|
282
|
+
winston.error("Delete kb error: ", err);
|
|
283
|
+
reject(err);
|
|
284
|
+
}
|
|
285
|
+
})
|
|
286
|
+
} else {
|
|
287
|
+
KB.findOneAndDelete({ _id: kb._id, status: { $in: [-1, 400 ]}}, (err, deletedKb) => {
|
|
288
|
+
if (err) {
|
|
289
|
+
winston.error("Delete kb error: ", err);
|
|
290
|
+
reject(err);
|
|
291
|
+
}
|
|
292
|
+
})
|
|
293
|
+
}
|
|
294
|
+
})
|
|
295
|
+
})
|
|
296
|
+
resolve(true);
|
|
297
|
+
})
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
async saveBulk(operations, kbs, project_id, namespace) {
|
|
301
|
+
|
|
302
|
+
return new Promise((resolve, reject) => {
|
|
303
|
+
KB.bulkWrite(operations, { ordered: false }).then((result) => {
|
|
304
|
+
winston.verbose("bulkWrite operations result: ", result);
|
|
305
|
+
|
|
306
|
+
KB.find({ id_project: project_id, namespace: namespace, source: { $in: kbs.map(kb => kb.source) } }).lean().then((documents) => {
|
|
307
|
+
winston.debug("documents: ", documents);
|
|
308
|
+
resolve(documents)
|
|
309
|
+
}).catch((err) => {
|
|
310
|
+
winston.error("Error finding documents ", err)
|
|
311
|
+
reject(err);
|
|
312
|
+
})
|
|
313
|
+
|
|
314
|
+
}).catch((err) => {
|
|
315
|
+
reject(err);
|
|
316
|
+
})
|
|
317
|
+
})
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
setDefaultScrapeOptions() {
|
|
321
|
+
return {
|
|
322
|
+
tags_to_extract: ["body"],
|
|
323
|
+
unwanted_tags: [],
|
|
324
|
+
unwanted_classnames: []
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
async scheduleScrape(resources, hybrid) {
|
|
329
|
+
|
|
330
|
+
let scheduler;
|
|
331
|
+
if (hybrid) {
|
|
332
|
+
scheduler = new Scheduler({ jobManager: jobManagerHybrid });
|
|
333
|
+
} else {
|
|
334
|
+
scheduler = new Scheduler({ jobManager: jobManager });
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
if (!scheduler) {
|
|
338
|
+
winston.error("ScheduleScrape JobManager is not defined");
|
|
339
|
+
return false;
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
resources.forEach(r => {
|
|
343
|
+
winston.debug("Schedule job with following data: ", r);
|
|
344
|
+
scheduler.trainSchedule(r, async (err, result) => {
|
|
345
|
+
let error_code = 100;
|
|
346
|
+
if (err) {
|
|
347
|
+
winston.error("Scheduling error: ", err);
|
|
348
|
+
error_code = 400;
|
|
349
|
+
} else {
|
|
350
|
+
winston.verbose("Scheduling result: ", result);
|
|
351
|
+
}
|
|
352
|
+
await this.updateStatus(r.id, error_code);
|
|
353
|
+
});
|
|
354
|
+
})
|
|
355
|
+
|
|
356
|
+
return true;
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
// from webhook
|
|
360
|
+
// async scheduleScrape(resources) {
|
|
361
|
+
|
|
362
|
+
// let scheduler = new Scheduler({ jobManager: jobManager });
|
|
363
|
+
|
|
364
|
+
// resources.forEach(r => {
|
|
365
|
+
// winston.debug("(Webhook) Schedule job with following data: ", r);
|
|
366
|
+
// scheduler.trainSchedule(r, async (err, result) => {
|
|
367
|
+
// if (err) {
|
|
368
|
+
// winston.error("Scheduling error: ", err);
|
|
369
|
+
// } else {
|
|
370
|
+
// winston.info("Scheduling result: ", result);
|
|
371
|
+
// }
|
|
372
|
+
// });
|
|
373
|
+
// })
|
|
374
|
+
|
|
375
|
+
// return true;
|
|
376
|
+
// }
|
|
377
|
+
|
|
378
|
+
async startScrape(data) {
|
|
379
|
+
|
|
380
|
+
if (!data.gptkey) {
|
|
381
|
+
let gptkey = process.env.GPTKEY;
|
|
382
|
+
if (!gptkey) {
|
|
383
|
+
return { error: "GPT apikey undefined" }
|
|
384
|
+
}
|
|
385
|
+
data.gptkey = gptkey;
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
let status_updated = await this.updateStatus(data.id, 200);
|
|
389
|
+
winston.verbose("status of kb " + data.id + " updated: " + status_updated);
|
|
390
|
+
|
|
391
|
+
return new Promise((resolve, reject) => {
|
|
392
|
+
aiService.singleScrape(data).then(async (resp) => {
|
|
393
|
+
winston.debug("singleScrape resp: ", resp.data);
|
|
394
|
+
let status_updated = await this.updateStatus(data.id, 300);
|
|
395
|
+
winston.verbose("status of kb " + data.id + " updated: " + status_updated);
|
|
396
|
+
resolve(resp.data);
|
|
397
|
+
}).catch( async (err) => {
|
|
398
|
+
winston.error("singleScrape err: ", err);
|
|
399
|
+
let error_message = err.response?.data?.error || "An unexpected error occurred";
|
|
400
|
+
let status_updated = await this.updateStatus(data.id, 400, error_message);
|
|
401
|
+
winston.verbose("status of kb " + data.id + " updated: " + status_updated);
|
|
402
|
+
reject(err);
|
|
403
|
+
})
|
|
404
|
+
})
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
async statusConverter(status) {
|
|
408
|
+
return new Promise((resolve) => {
|
|
409
|
+
|
|
410
|
+
let td_status;
|
|
411
|
+
switch (status) {
|
|
412
|
+
case 0:
|
|
413
|
+
td_status = -1;
|
|
414
|
+
break;
|
|
415
|
+
case 2:
|
|
416
|
+
td_status = 200;
|
|
417
|
+
break;
|
|
418
|
+
case 3:
|
|
419
|
+
td_status = 300;
|
|
420
|
+
break;
|
|
421
|
+
case 4:
|
|
422
|
+
td_status = 400;
|
|
423
|
+
break;
|
|
424
|
+
default:
|
|
425
|
+
td_status = -1
|
|
426
|
+
}
|
|
427
|
+
resolve(td_status);
|
|
428
|
+
})
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
async updateStatus(id, status, error) {
|
|
432
|
+
return new Promise((resolve) => {
|
|
433
|
+
|
|
434
|
+
let update = {
|
|
435
|
+
status: status,
|
|
436
|
+
last_refresh: new Date()
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
if (error) {
|
|
440
|
+
update.last_error = {
|
|
441
|
+
timestamp: Date.now(),
|
|
442
|
+
message: error
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
KB.findByIdAndUpdate(id, update, { new: true }, (err, updatedKb) => {
|
|
447
|
+
if (err) {
|
|
448
|
+
resolve(false)
|
|
449
|
+
} else if (!updatedKb) {
|
|
450
|
+
winston.verbose("Unable to update status. Data source not found.")
|
|
451
|
+
resolve(false)
|
|
452
|
+
} else {
|
|
453
|
+
winston.debug("updatedKb: ", updatedKb)
|
|
454
|
+
resolve(true);
|
|
455
|
+
}
|
|
456
|
+
})
|
|
457
|
+
})
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
const aiManager = new AiManager();
|
|
463
|
+
|
|
464
|
+
module.exports = aiManager;
|
package/services/aiService.js
CHANGED
|
@@ -205,7 +205,7 @@ class AiService {
|
|
|
205
205
|
base_url = kb_endpoint_qa_gpu;
|
|
206
206
|
}
|
|
207
207
|
winston.debug("[OPENAI SERVICE] kb endpoint: " + base_url);
|
|
208
|
-
|
|
208
|
+
console.log("aa1")
|
|
209
209
|
return new Promise((resolve, reject) => {
|
|
210
210
|
|
|
211
211
|
axios({
|
|
@@ -216,8 +216,10 @@ class AiService {
|
|
|
216
216
|
data: data,
|
|
217
217
|
method: 'POST'
|
|
218
218
|
}).then((resbody) => {
|
|
219
|
+
console.log("aa2")
|
|
219
220
|
resolve(resbody);
|
|
220
221
|
}).catch((err) => {
|
|
222
|
+
console.log("aa3")
|
|
221
223
|
reject(err);
|
|
222
224
|
})
|
|
223
225
|
|
|
@@ -291,6 +293,6 @@ class AiService {
|
|
|
291
293
|
|
|
292
294
|
}
|
|
293
295
|
|
|
294
|
-
|
|
296
|
+
const aiService = new AiService();
|
|
295
297
|
|
|
296
298
|
module.exports = aiService;
|