plugin-knowledge-base 1.1.1 → 1.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/client/13f004eeaba90fe8.js +10 -0
- package/dist/client/3864360fe4f42855.js +10 -0
- package/dist/client/index.js +1 -1
- package/dist/externalVersion.js +10 -10
- package/dist/server/actions/add-document.js +21 -3
- package/dist/server/collections/ai-knowledge-bases.js +17 -2
- package/dist/server/features/vector-store-provider-impl.js +11 -11
- package/dist/server/index.js +8 -0
- package/dist/server/pipeline/simple-embeddings.js +36 -4
- package/dist/server/pipeline/vectorization.js +120 -57
- package/dist/server/plugin.js +202 -17
- package/dist/server/providers/external-rag.js +133 -0
- package/dist/server/request-context.js +2 -1
- package/dist/server/resources/ai-knowledge-base-documents.js +38 -25
- package/dist/server/resources/ai-knowledge-base.js +33 -23
- package/dist/server/resources/ai-vector-databases.js +2 -2
- package/dist/server/resources/ai-vector-stores.js +6 -11
- package/dist/server/services/docpixie-extractor.js +139 -0
- package/package.json +9 -7
- package/README.md +0 -35
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* This file is part of the NocoBase (R) project.
|
|
3
|
+
* Copyright (c) 2020-2024 NocoBase Co., Ltd.
|
|
4
|
+
* Authors: NocoBase Team.
|
|
5
|
+
*
|
|
6
|
+
* This project is dual-licensed under AGPL-3.0 and NocoBase Commercial License.
|
|
7
|
+
* For more information, please refer to: https://www.nocobase.com/agreement.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
var __defProp = Object.defineProperty;
|
|
11
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
12
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
13
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
14
|
+
var __export = (target, all) => {
|
|
15
|
+
for (var name in all)
|
|
16
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
17
|
+
};
|
|
18
|
+
var __copyProps = (to, from, except, desc) => {
|
|
19
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
20
|
+
for (let key of __getOwnPropNames(from))
|
|
21
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
22
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
23
|
+
}
|
|
24
|
+
return to;
|
|
25
|
+
};
|
|
26
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
27
|
+
var external_rag_exports = {};
|
|
28
|
+
__export(external_rag_exports, {
|
|
29
|
+
EXTERNAL_HTTP_RAG_PROVIDER: () => EXTERNAL_HTTP_RAG_PROVIDER,
|
|
30
|
+
EXTERNAL_RAG_KB_TYPE: () => EXTERNAL_RAG_KB_TYPE,
|
|
31
|
+
externalHttpRagStrategy: () => externalHttpRagStrategy
|
|
32
|
+
});
|
|
33
|
+
module.exports = __toCommonJS(external_rag_exports);
|
|
34
|
+
const RAG_REQUEST_TIMEOUT_MS = 3e4;
|
|
35
|
+
function validateExternalUrl(url) {
|
|
36
|
+
let parsed;
|
|
37
|
+
try {
|
|
38
|
+
parsed = new URL(url);
|
|
39
|
+
} catch {
|
|
40
|
+
throw new Error(`Invalid RAG API URL: "${url}"`);
|
|
41
|
+
}
|
|
42
|
+
if (!["http:", "https:"].includes(parsed.protocol)) {
|
|
43
|
+
throw new Error(`RAG API URL must use http or https: "${url}"`);
|
|
44
|
+
}
|
|
45
|
+
const hostname = parsed.hostname.toLowerCase();
|
|
46
|
+
const localhostNames = ["localhost", "127.0.0.1", "0.0.0.0", "::1", "[::1]"];
|
|
47
|
+
if (localhostNames.includes(hostname)) {
|
|
48
|
+
throw new Error(`RAG API URL cannot point to localhost: "${url}"`);
|
|
49
|
+
}
|
|
50
|
+
if (hostname === "169.254.169.254" || hostname.startsWith("169.254.") || hostname === "metadata.google.internal" || hostname === "metadata.internal") {
|
|
51
|
+
throw new Error(`RAG API URL cannot point to cloud metadata service: "${url}"`);
|
|
52
|
+
}
|
|
53
|
+
if (hostname.startsWith("10.") || hostname.startsWith("192.168.")) {
|
|
54
|
+
throw new Error(`RAG API URL cannot point to private network: "${url}"`);
|
|
55
|
+
}
|
|
56
|
+
const match172 = /^172\.(\d+)\./.exec(hostname);
|
|
57
|
+
if (match172) {
|
|
58
|
+
const second = parseInt(match172[1], 10);
|
|
59
|
+
if (second >= 16 && second <= 31) {
|
|
60
|
+
throw new Error(`RAG API URL cannot point to private network: "${url}"`);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
const externalHttpRagStrategy = async (query, kb, { topK = 5, scoreThreshold = 0, filter }) => {
|
|
65
|
+
const opts = kb.options ?? {};
|
|
66
|
+
const apiUrl = opts.ragApiUrl ?? "";
|
|
67
|
+
const apiKey = opts.ragApiKey ?? "";
|
|
68
|
+
const namespace = opts.ragNamespace;
|
|
69
|
+
const effectiveTopK = opts.ragTopK ? Number(opts.ragTopK) : topK;
|
|
70
|
+
const effectiveThreshold = opts.ragScoreThreshold != null ? Number(opts.ragScoreThreshold) : scoreThreshold;
|
|
71
|
+
if (!apiUrl) {
|
|
72
|
+
throw new Error(
|
|
73
|
+
`Knowledge base "${kb.name ?? kb.id}" (EXTERNAL_RAG) is missing options.ragApiUrl`
|
|
74
|
+
);
|
|
75
|
+
}
|
|
76
|
+
validateExternalUrl(apiUrl);
|
|
77
|
+
const body = {
|
|
78
|
+
query,
|
|
79
|
+
topK: effectiveTopK,
|
|
80
|
+
scoreThreshold: effectiveThreshold,
|
|
81
|
+
...namespace ? { namespace } : {},
|
|
82
|
+
...filter ? { filter } : {}
|
|
83
|
+
};
|
|
84
|
+
const controller = new AbortController();
|
|
85
|
+
const timeoutId = setTimeout(() => controller.abort(), RAG_REQUEST_TIMEOUT_MS);
|
|
86
|
+
let response;
|
|
87
|
+
try {
|
|
88
|
+
response = await fetch(apiUrl, {
|
|
89
|
+
method: "POST",
|
|
90
|
+
headers: {
|
|
91
|
+
"Content-Type": "application/json",
|
|
92
|
+
...apiKey ? { Authorization: `Bearer ${apiKey}` } : {}
|
|
93
|
+
},
|
|
94
|
+
body: JSON.stringify(body),
|
|
95
|
+
signal: controller.signal
|
|
96
|
+
});
|
|
97
|
+
} catch (err) {
|
|
98
|
+
if (err.name === "AbortError") {
|
|
99
|
+
throw new Error(
|
|
100
|
+
`External RAG API "${apiUrl}" timed out after ${RAG_REQUEST_TIMEOUT_MS / 1e3}s`
|
|
101
|
+
);
|
|
102
|
+
}
|
|
103
|
+
throw err;
|
|
104
|
+
} finally {
|
|
105
|
+
clearTimeout(timeoutId);
|
|
106
|
+
}
|
|
107
|
+
if (!response.ok) {
|
|
108
|
+
const errorText = await response.text().catch(() => "");
|
|
109
|
+
throw new Error(
|
|
110
|
+
`External RAG API "${apiUrl}" returned HTTP ${response.status}${errorText ? ": " + errorText.slice(0, 200) : ""}`
|
|
111
|
+
);
|
|
112
|
+
}
|
|
113
|
+
const data = await response.json();
|
|
114
|
+
if (!Array.isArray(data.results)) {
|
|
115
|
+
throw new Error(
|
|
116
|
+
`External RAG API "${apiUrl}" response is missing a "results" array`
|
|
117
|
+
);
|
|
118
|
+
}
|
|
119
|
+
return data.results.filter((r) => (r.score ?? 0) >= effectiveThreshold).map((r) => ({
|
|
120
|
+
content: r.content,
|
|
121
|
+
score: r.score ?? 0,
|
|
122
|
+
metadata: r.metadata ?? {},
|
|
123
|
+
id: r.id
|
|
124
|
+
}));
|
|
125
|
+
};
|
|
126
|
+
const EXTERNAL_RAG_KB_TYPE = "EXTERNAL_RAG";
|
|
127
|
+
const EXTERNAL_HTTP_RAG_PROVIDER = "external-http";
|
|
128
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
129
|
+
0 && (module.exports = {
|
|
130
|
+
EXTERNAL_HTTP_RAG_PROVIDER,
|
|
131
|
+
EXTERNAL_RAG_KB_TYPE,
|
|
132
|
+
externalHttpRagStrategy
|
|
133
|
+
});
|
|
@@ -43,7 +43,8 @@ function getCurrentUserRoles() {
|
|
|
43
43
|
return ((_a = requestContext.getStore()) == null ? void 0 : _a.userRoles) ?? [];
|
|
44
44
|
}
|
|
45
45
|
function runWithUserId(userId, fn) {
|
|
46
|
-
|
|
46
|
+
const current = requestContext.getStore() ?? {};
|
|
47
|
+
return requestContext.run({ ...current, userId }, fn);
|
|
47
48
|
}
|
|
48
49
|
var request_context_default = requestContext;
|
|
49
50
|
// Annotate the CommonJS export names for ESM import in node:
|
|
@@ -7,9 +7,11 @@
|
|
|
7
7
|
* For more information, please refer to: https://www.nocobase.com/agreement.
|
|
8
8
|
*/
|
|
9
9
|
|
|
10
|
+
var __create = Object.create;
|
|
10
11
|
var __defProp = Object.defineProperty;
|
|
11
12
|
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
12
13
|
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
14
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
13
15
|
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
14
16
|
var __export = (target, all) => {
|
|
15
17
|
for (var name in all)
|
|
@@ -23,12 +25,28 @@ var __copyProps = (to, from, except, desc) => {
|
|
|
23
25
|
}
|
|
24
26
|
return to;
|
|
25
27
|
};
|
|
28
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
29
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
30
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
31
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
32
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
33
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
34
|
+
mod
|
|
35
|
+
));
|
|
26
36
|
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
27
37
|
var ai_knowledge_base_documents_exports = {};
|
|
28
38
|
__export(ai_knowledge_base_documents_exports, {
|
|
29
39
|
default: () => ai_knowledge_base_documents_default
|
|
30
40
|
});
|
|
31
41
|
module.exports = __toCommonJS(ai_knowledge_base_documents_exports);
|
|
42
|
+
var import_plugin = __toESM(require("../plugin"));
|
|
43
|
+
function getPlugin(ctx) {
|
|
44
|
+
try {
|
|
45
|
+
return ctx.app.pm.get(import_plugin.default);
|
|
46
|
+
} catch {
|
|
47
|
+
return null;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
32
50
|
async function checkKBAccess(ctx, knowledgeBaseId) {
|
|
33
51
|
var _a, _b, _c, _d;
|
|
34
52
|
const userId = (_b = (_a = ctx.auth) == null ? void 0 : _a.user) == null ? void 0 : _b.id;
|
|
@@ -104,6 +122,10 @@ var ai_knowledge_base_documents_default = {
|
|
|
104
122
|
const kb = await kbRepo.findOne({ filter: { id: values.knowledgeBaseId } });
|
|
105
123
|
if (kb) {
|
|
106
124
|
const kbData = kb.toJSON();
|
|
125
|
+
if (kbData.type === "EXTERNAL_RAG") {
|
|
126
|
+
ctx.throw(400, "Cannot upload documents to an external RAG knowledge base");
|
|
127
|
+
return;
|
|
128
|
+
}
|
|
107
129
|
if (kbData.accessLevel === "BASIC" && kbData.ownerId !== userId) {
|
|
108
130
|
ctx.throw(403, "Only the owner can upload documents to a personal knowledge base");
|
|
109
131
|
return;
|
|
@@ -123,27 +145,19 @@ var ai_knowledge_base_documents_default = {
|
|
|
123
145
|
}
|
|
124
146
|
values.uploadedById = userId;
|
|
125
147
|
const doc = await repo.create({ values });
|
|
126
|
-
const
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
if (
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
if (setClauses.length > 0) {
|
|
138
|
-
bindValues.push(doc.get("id"));
|
|
139
|
-
await ctx.db.sequelize.query(
|
|
140
|
-
`UPDATE "aiKnowledgeBaseDocuments" SET ${setClauses.join(", ")} WHERE id = $${paramIdx}`,
|
|
141
|
-
{ bind: bindValues }
|
|
142
|
-
);
|
|
143
|
-
if (values.knowledgeBaseId) doc.set("knowledgeBaseId", values.knowledgeBaseId);
|
|
144
|
-
if (values.fileId) doc.set("fileId", values.fileId);
|
|
148
|
+
const fkUpdates = {};
|
|
149
|
+
if (values.knowledgeBaseId) fkUpdates.knowledgeBaseId = values.knowledgeBaseId;
|
|
150
|
+
if (values.fileId) fkUpdates.fileId = values.fileId;
|
|
151
|
+
if (Object.keys(fkUpdates).length > 0) {
|
|
152
|
+
await repo.update({
|
|
153
|
+
filterByTk: doc.get("id"),
|
|
154
|
+
values: fkUpdates
|
|
155
|
+
});
|
|
156
|
+
for (const [k, v] of Object.entries(fkUpdates)) {
|
|
157
|
+
doc.set(k, v);
|
|
158
|
+
}
|
|
145
159
|
}
|
|
146
|
-
const plugin = ctx
|
|
160
|
+
const plugin = getPlugin(ctx);
|
|
147
161
|
if (plugin == null ? void 0 : plugin.vectorizationPipeline) {
|
|
148
162
|
plugin.vectorizationPipeline.processDocument(doc.id).catch((err) => {
|
|
149
163
|
ctx.logger.error(`Vectorization failed for document ${doc.id}:`, err);
|
|
@@ -185,11 +199,10 @@ var ai_knowledge_base_documents_default = {
|
|
|
185
199
|
await next();
|
|
186
200
|
},
|
|
187
201
|
async reprocess(ctx, next) {
|
|
188
|
-
var _a, _b
|
|
202
|
+
var _a, _b;
|
|
189
203
|
const { filterByTk } = ctx.action.params;
|
|
190
204
|
const repo = ctx.db.getRepository("aiKnowledgeBaseDocuments");
|
|
191
|
-
const
|
|
192
|
-
const roles = ((_c = ctx.state) == null ? void 0 : _c.currentRoles) ?? [];
|
|
205
|
+
const roles = ((_a = ctx.state) == null ? void 0 : _a.currentRoles) ?? [];
|
|
193
206
|
const isAdmin = roles.includes("root") || roles.includes("admin");
|
|
194
207
|
if (!isAdmin) {
|
|
195
208
|
const doc = await repo.findOne({ filterByTk });
|
|
@@ -202,7 +215,7 @@ var ai_knowledge_base_documents_default = {
|
|
|
202
215
|
return;
|
|
203
216
|
}
|
|
204
217
|
if ((kbData == null ? void 0 : kbData.accessLevel) === "SHARED") {
|
|
205
|
-
const canUpload = (
|
|
218
|
+
const canUpload = (_b = kbData.uploadRoles) == null ? void 0 : _b.some((r) => roles.includes(r));
|
|
206
219
|
if (!canUpload) {
|
|
207
220
|
ctx.throw(403, "You do not have permission to reprocess this document");
|
|
208
221
|
return;
|
|
@@ -215,7 +228,7 @@ var ai_knowledge_base_documents_default = {
|
|
|
215
228
|
filterByTk,
|
|
216
229
|
values: { status: "pending", error: null, chunkCount: 0 }
|
|
217
230
|
});
|
|
218
|
-
const plugin = ctx
|
|
231
|
+
const plugin = getPlugin(ctx);
|
|
219
232
|
if (plugin == null ? void 0 : plugin.vectorizationPipeline) {
|
|
220
233
|
plugin.vectorizationPipeline.processDocument(filterByTk).catch((err) => {
|
|
221
234
|
ctx.logger.error(`Re-vectorization failed for document ${filterByTk}:`, err);
|
|
@@ -7,9 +7,11 @@
|
|
|
7
7
|
* For more information, please refer to: https://www.nocobase.com/agreement.
|
|
8
8
|
*/
|
|
9
9
|
|
|
10
|
+
var __create = Object.create;
|
|
10
11
|
var __defProp = Object.defineProperty;
|
|
11
12
|
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
12
13
|
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
14
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
13
15
|
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
14
16
|
var __export = (target, all) => {
|
|
15
17
|
for (var name in all)
|
|
@@ -23,12 +25,24 @@ var __copyProps = (to, from, except, desc) => {
|
|
|
23
25
|
}
|
|
24
26
|
return to;
|
|
25
27
|
};
|
|
28
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
29
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
30
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
31
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
32
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
33
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
34
|
+
mod
|
|
35
|
+
));
|
|
26
36
|
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
27
37
|
var ai_knowledge_base_exports = {};
|
|
28
38
|
__export(ai_knowledge_base_exports, {
|
|
29
39
|
default: () => ai_knowledge_base_default
|
|
30
40
|
});
|
|
31
41
|
module.exports = __toCommonJS(ai_knowledge_base_exports);
|
|
42
|
+
var import_plugin = __toESM(require("../plugin"));
|
|
43
|
+
function getPlugin(ctx) {
|
|
44
|
+
return ctx.app.pm.get(import_plugin.default);
|
|
45
|
+
}
|
|
32
46
|
async function checkKBPermission(ctx, filterByTk, action) {
|
|
33
47
|
var _a, _b;
|
|
34
48
|
const repo = ctx.db.getRepository("aiKnowledgeBases");
|
|
@@ -89,17 +103,19 @@ var ai_knowledge_base_default = {
|
|
|
89
103
|
filterByTk,
|
|
90
104
|
appends: ["vectorStore", "documents"]
|
|
91
105
|
});
|
|
92
|
-
if (record) {
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
106
|
+
if (!record) {
|
|
107
|
+
ctx.throw(404, "Knowledge base not found");
|
|
108
|
+
return;
|
|
109
|
+
}
|
|
110
|
+
const data = record.toJSON();
|
|
111
|
+
const userId = (_b = (_a = ctx.auth) == null ? void 0 : _a.user) == null ? void 0 : _b.id;
|
|
112
|
+
const roles = ctx.state.currentRoles ?? [];
|
|
113
|
+
const isAdmin = roles.includes("root") || roles.includes("admin");
|
|
114
|
+
if (!isAdmin) {
|
|
115
|
+
const hasAccess = data.accessLevel === "PUBLIC" || data.accessLevel === "BASIC" && data.ownerId === userId || data.accessLevel === "SHARED" && ((_c = data.allowedRoles) == null ? void 0 : _c.some((r) => roles.includes(r)));
|
|
116
|
+
if (!hasAccess) {
|
|
117
|
+
ctx.throw(403, "Access denied");
|
|
118
|
+
return;
|
|
103
119
|
}
|
|
104
120
|
}
|
|
105
121
|
ctx.body = record;
|
|
@@ -120,13 +136,14 @@ var ai_knowledge_base_default = {
|
|
|
120
136
|
if (values.accessLevel === "BASIC") {
|
|
121
137
|
values.ownerId = userId;
|
|
122
138
|
}
|
|
123
|
-
|
|
139
|
+
const record = await repo.create({ values });
|
|
124
140
|
if (values.vectorStoreId) {
|
|
125
|
-
await
|
|
126
|
-
|
|
127
|
-
{
|
|
128
|
-
);
|
|
141
|
+
await repo.update({
|
|
142
|
+
filterByTk: record.get("id"),
|
|
143
|
+
values: { vectorStoreId: values.vectorStoreId }
|
|
144
|
+
});
|
|
129
145
|
}
|
|
146
|
+
ctx.body = record;
|
|
130
147
|
await next();
|
|
131
148
|
},
|
|
132
149
|
async update(ctx, next) {
|
|
@@ -143,15 +160,8 @@ var ai_knowledge_base_default = {
|
|
|
143
160
|
filterByTk,
|
|
144
161
|
values
|
|
145
162
|
});
|
|
146
|
-
if (values.vectorStoreId) {
|
|
147
|
-
await ctx.db.sequelize.query(
|
|
148
|
-
`UPDATE "aiKnowledgeBases" SET "vectorStoreId" = $1 WHERE id = $2`,
|
|
149
|
-
{ bind: [values.vectorStoreId, filterByTk] }
|
|
150
|
-
);
|
|
151
|
-
}
|
|
152
163
|
await next();
|
|
153
164
|
},
|
|
154
|
-
// Fix #3: destroy with permission checks
|
|
155
165
|
async destroy(ctx, next) {
|
|
156
166
|
const { filterByTk } = ctx.action.params;
|
|
157
167
|
const repo = ctx.db.getRepository("aiKnowledgeBases");
|
|
@@ -29,6 +29,7 @@ __export(ai_vector_databases_exports, {
|
|
|
29
29
|
default: () => ai_vector_databases_default
|
|
30
30
|
});
|
|
31
31
|
module.exports = __toCommonJS(ai_vector_databases_exports);
|
|
32
|
+
var import_pg = require("pg");
|
|
32
33
|
var ai_vector_databases_default = {
|
|
33
34
|
name: "aiVectorDatabase",
|
|
34
35
|
actions: {
|
|
@@ -85,8 +86,7 @@ var ai_vector_databases_default = {
|
|
|
85
86
|
return;
|
|
86
87
|
}
|
|
87
88
|
try {
|
|
88
|
-
const
|
|
89
|
-
const client = new Client({
|
|
89
|
+
const client = new import_pg.Client({
|
|
90
90
|
host: connectParams.host,
|
|
91
91
|
port: connectParams.port || 5432,
|
|
92
92
|
user: connectParams.username,
|
|
@@ -58,13 +58,14 @@ var ai_vector_stores_default = {
|
|
|
58
58
|
const rawValues = ctx.action.params.values || {};
|
|
59
59
|
const values = rawValues.values || rawValues;
|
|
60
60
|
const repo = ctx.db.getRepository("aiVectorStores");
|
|
61
|
-
|
|
61
|
+
const record = await repo.create({ values });
|
|
62
62
|
if (values.vectorDatabaseId) {
|
|
63
|
-
await
|
|
64
|
-
|
|
65
|
-
{
|
|
66
|
-
);
|
|
63
|
+
await repo.update({
|
|
64
|
+
filterByTk: record.get("id"),
|
|
65
|
+
values: { vectorDatabaseId: values.vectorDatabaseId }
|
|
66
|
+
});
|
|
67
67
|
}
|
|
68
|
+
ctx.body = record;
|
|
68
69
|
await next();
|
|
69
70
|
},
|
|
70
71
|
async update(ctx, next) {
|
|
@@ -76,12 +77,6 @@ var ai_vector_stores_default = {
|
|
|
76
77
|
filterByTk,
|
|
77
78
|
values
|
|
78
79
|
});
|
|
79
|
-
if (values.vectorDatabaseId) {
|
|
80
|
-
await ctx.db.sequelize.query(
|
|
81
|
-
`UPDATE "aiVectorStores" SET "vectorDatabaseId" = $1 WHERE id = $2`,
|
|
82
|
-
{ bind: [values.vectorDatabaseId, filterByTk] }
|
|
83
|
-
);
|
|
84
|
-
}
|
|
85
80
|
await next();
|
|
86
81
|
},
|
|
87
82
|
async destroy(ctx, next) {
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* This file is part of the NocoBase (R) project.
|
|
3
|
+
* Copyright (c) 2020-2024 NocoBase Co., Ltd.
|
|
4
|
+
* Authors: NocoBase Team.
|
|
5
|
+
*
|
|
6
|
+
* This project is dual-licensed under AGPL-3.0 and NocoBase Commercial License.
|
|
7
|
+
* For more information, please refer to: https://www.nocobase.com/agreement.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
var __defProp = Object.defineProperty;
|
|
11
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
12
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
13
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
14
|
+
var __export = (target, all) => {
|
|
15
|
+
for (var name in all)
|
|
16
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
17
|
+
};
|
|
18
|
+
var __copyProps = (to, from, except, desc) => {
|
|
19
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
20
|
+
for (let key of __getOwnPropNames(from))
|
|
21
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
22
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
23
|
+
}
|
|
24
|
+
return to;
|
|
25
|
+
};
|
|
26
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
27
|
+
var docpixie_extractor_exports = {};
|
|
28
|
+
__export(docpixie_extractor_exports, {
|
|
29
|
+
DocPixieExtractor: () => DocPixieExtractor
|
|
30
|
+
});
|
|
31
|
+
module.exports = __toCommonJS(docpixie_extractor_exports);
|
|
32
|
+
var import_os = require("os");
|
|
33
|
+
var import_path = require("path");
|
|
34
|
+
var import_promises = require("fs/promises");
|
|
35
|
+
var import_fs = require("fs");
|
|
36
|
+
class DocPixieExtractor {
|
|
37
|
+
constructor(db, getDocpixiePlugin) {
|
|
38
|
+
this.db = db;
|
|
39
|
+
this.getDocpixiePlugin = getDocpixiePlugin;
|
|
40
|
+
}
|
|
41
|
+
/** Returns true if plugin-docpixie is loaded and ready (configured + LLM provider set) */
|
|
42
|
+
isAvailable() {
|
|
43
|
+
var _a;
|
|
44
|
+
const plugin = this.getDocpixiePlugin();
|
|
45
|
+
return !!((_a = plugin == null ? void 0 : plugin.service) == null ? void 0 : _a.isReady());
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Extract text from a local file path.
|
|
49
|
+
* processDocument() is synchronous — it waits for full OCR + summarization before returning.
|
|
50
|
+
* Returns null on any failure so callers can fall through to the next extractor.
|
|
51
|
+
*/
|
|
52
|
+
async extractFromPath(filePath, filename, userId) {
|
|
53
|
+
var _a;
|
|
54
|
+
const plugin = this.getDocpixiePlugin();
|
|
55
|
+
if (!((_a = plugin == null ? void 0 : plugin.service) == null ? void 0 : _a.isReady())) return null;
|
|
56
|
+
try {
|
|
57
|
+
const documentId = await plugin.service.processDocument(filePath, {
|
|
58
|
+
name: filename,
|
|
59
|
+
userId
|
|
60
|
+
});
|
|
61
|
+
const text = await this.getPageTexts(documentId);
|
|
62
|
+
return { text, documentId };
|
|
63
|
+
} catch {
|
|
64
|
+
return null;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Extract text from an in-memory buffer (for files already downloaded, e.g. from S3).
|
|
69
|
+
* Writes to a temp file, runs DocPixie extraction, then cleans up.
|
|
70
|
+
*/
|
|
71
|
+
async extractFromBuffer(buffer, filename, extname, userId) {
|
|
72
|
+
var _a;
|
|
73
|
+
const plugin = this.getDocpixiePlugin();
|
|
74
|
+
if (!((_a = plugin == null ? void 0 : plugin.service) == null ? void 0 : _a.isReady())) return null;
|
|
75
|
+
const ext = extname.startsWith(".") ? extname : `.${extname}`;
|
|
76
|
+
const tempPath = (0, import_path.join)(
|
|
77
|
+
(0, import_os.tmpdir)(),
|
|
78
|
+
`kb-docpixie-${Date.now()}-${Math.random().toString(36).slice(2)}${ext}`
|
|
79
|
+
);
|
|
80
|
+
try {
|
|
81
|
+
await (0, import_promises.writeFile)(tempPath, buffer);
|
|
82
|
+
return await this.extractFromPath(tempPath, filename, userId);
|
|
83
|
+
} finally {
|
|
84
|
+
if ((0, import_fs.existsSync)(tempPath)) {
|
|
85
|
+
(0, import_promises.unlink)(tempPath).catch(() => {
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* Concatenate all page texts for a DocPixie document, ordered by page number.
|
|
92
|
+
* Queries the docpixie_pages collection directly.
|
|
93
|
+
*/
|
|
94
|
+
async getPageTexts(documentId) {
|
|
95
|
+
const pageRepo = this.db.getRepository("docpixie_pages");
|
|
96
|
+
const pages = await pageRepo.find({
|
|
97
|
+
filter: { documentId },
|
|
98
|
+
sort: ["pageNumber"]
|
|
99
|
+
});
|
|
100
|
+
return pages.map((p) => p.get("structuredText") || "").filter(Boolean).join("\n\n");
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* Fetch the summary + page texts for a set of DocPixie document IDs.
|
|
104
|
+
* Used in Stage 2 deep retrieval during AI chat work context.
|
|
105
|
+
*
|
|
106
|
+
* Returns a formatted string ready to be injected into the AI prompt.
|
|
107
|
+
* Caps at maxDocs documents to avoid blowing the context window.
|
|
108
|
+
*/
|
|
109
|
+
async buildDeepContext(documentIds, maxDocs = 3) {
|
|
110
|
+
if (!documentIds.length) return "";
|
|
111
|
+
const docRepo = this.db.getRepository("docpixie_documents");
|
|
112
|
+
const ids = documentIds.slice(0, maxDocs);
|
|
113
|
+
const parts = [];
|
|
114
|
+
for (const id of ids) {
|
|
115
|
+
try {
|
|
116
|
+
const docRecord = await docRepo.findOne({ filter: { id } });
|
|
117
|
+
if (!docRecord) continue;
|
|
118
|
+
const status = docRecord.get("status");
|
|
119
|
+
const name = docRecord.get("name");
|
|
120
|
+
const summary = docRecord.get("summary");
|
|
121
|
+
if (status !== "ready") continue;
|
|
122
|
+
const pageText = await this.getPageTexts(id);
|
|
123
|
+
if (!pageText && !summary) continue;
|
|
124
|
+
const content = pageText || summary || "";
|
|
125
|
+
parts.push(
|
|
126
|
+
`<docpixie_document id="${id}" name="${name}">
|
|
127
|
+
${content}
|
|
128
|
+
</docpixie_document>`
|
|
129
|
+
);
|
|
130
|
+
} catch {
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
return parts.join("\n\n");
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
137
|
+
0 && (module.exports = {
|
|
138
|
+
DocPixieExtractor
|
|
139
|
+
});
|
package/package.json
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
"description": "Provides Knowledge Base management, Vector Store, Vector Database (PGVector), and RAG retrieval capabilities for AI Employees.",
|
|
7
7
|
"description.vi-VN": "Cung cấp quản lý Cơ sở tri thức, Vector Store, Vector Database (PGVector), và khả năng truy xuất RAG cho nhân viên AI.",
|
|
8
8
|
"description.zh-CN": "为 AI 员工提供知识库管理、向量存储、向量数据库 (PGVector) 和 RAG 检索功能。",
|
|
9
|
-
"version": "1.1.
|
|
9
|
+
"version": "1.1.4",
|
|
10
10
|
"license": "Apache-2.0",
|
|
11
11
|
"main": "./dist/server/index.js",
|
|
12
12
|
"files": [
|
|
@@ -18,18 +18,20 @@
|
|
|
18
18
|
"README.md"
|
|
19
19
|
],
|
|
20
20
|
"dependencies": {
|
|
21
|
-
"pg": "^8.13.0"
|
|
21
|
+
"pg": "^8.13.0",
|
|
22
|
+
"@langchain/community": "^1.1.0",
|
|
23
|
+
"@langchain/core": "^1.1.24",
|
|
24
|
+
"@langchain/textsplitters": "^0.1.0"
|
|
22
25
|
},
|
|
23
26
|
"peerDependencies": {
|
|
24
27
|
"@nocobase/client": "2.x",
|
|
25
28
|
"@nocobase/server": "2.x",
|
|
26
29
|
"@nocobase/database": "2.x",
|
|
27
30
|
"@nocobase/plugin-ai": "2.x",
|
|
28
|
-
"@nocobase/plugin-file-manager": "2.x"
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
"@
|
|
32
|
-
"@langchain/textsplitters": "^0.1.0"
|
|
31
|
+
"@nocobase/plugin-file-manager": "2.x"
|
|
32
|
+
},
|
|
33
|
+
"devDependencies": {
|
|
34
|
+
"@nocobase/test": "2.x"
|
|
33
35
|
},
|
|
34
36
|
"keywords": [
|
|
35
37
|
"AI",
|
package/README.md
DELETED
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
# plugin-knowledge-base
|
|
2
|
-
|
|
3
|
-
AI Knowledge Base plugin for NocoBase. Provides RAG (Retrieval Augmented Generation) capabilities for AI Employees.
|
|
4
|
-
|
|
5
|
-
## Features
|
|
6
|
-
|
|
7
|
-
- **Knowledge Base Management** — Create and manage knowledge bases with document upload
|
|
8
|
-
- **Access Control (ACL)** — 4 access levels: Private, Basic, Shared (role-based), Public
|
|
9
|
-
- **Shared mode**: `allowedRoles` (read/search) and `uploadRoles` (upload documents) per KB
|
|
10
|
-
- Permission enforcement on all operations: list, create, destroy, reprocess, RAG search
|
|
11
|
-
- **Vector Store** — Bind Embedding models with Vector Databases
|
|
12
|
-
- **Vector Database** — PGVector connection management with connection testing
|
|
13
|
-
- **Document Processing** — Automatic document parsing, chunking, and vectorization
|
|
14
|
-
- File upload (.pdf, .txt, .md, .doc, .docx, .ppt, .csv, .json)
|
|
15
|
-
- Paste text documents directly
|
|
16
|
-
- Auto-delete source file option after successful embedding
|
|
17
|
-
- **RAG Retrieval** — Semantic search for AI Employee conversations via Work Context
|
|
18
|
-
- **KB Management Popup in Chat** — Users can browse, upload, and select KBs directly from AI chat
|
|
19
|
-
- Left sidebar with KB list and access level badges
|
|
20
|
-
- Document table with status, chunk count, and actions
|
|
21
|
-
- File upload and drag-and-drop support
|
|
22
|
-
- "Select for Chat" toggle for RAG context
|
|
23
|
-
|
|
24
|
-
## Requirements
|
|
25
|
-
|
|
26
|
-
- NocoBase 2.x
|
|
27
|
-
- Plugin AI (`@nocobase/plugin-ai`) enabled
|
|
28
|
-
- PostgreSQL with `pgvector` extension (for PGVector provider)
|
|
29
|
-
|
|
30
|
-
## Installation
|
|
31
|
-
|
|
32
|
-
```bash
|
|
33
|
-
yarn pm add plugin-knowledge-base
|
|
34
|
-
yarn pm enable plugin-knowledge-base
|
|
35
|
-
```
|