plugin-knowledge-base 1.1.1 → 1.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/client/13f004eeaba90fe8.js +10 -0
- package/dist/client/3864360fe4f42855.js +10 -0
- package/dist/client/index.js +1 -1
- package/dist/externalVersion.js +10 -10
- package/dist/server/actions/add-document.js +21 -3
- package/dist/server/collections/ai-knowledge-bases.js +17 -2
- package/dist/server/features/vector-store-provider-impl.js +11 -11
- package/dist/server/index.js +8 -0
- package/dist/server/pipeline/simple-embeddings.js +36 -4
- package/dist/server/pipeline/vectorization.js +120 -57
- package/dist/server/plugin.js +202 -17
- package/dist/server/providers/external-rag.js +133 -0
- package/dist/server/request-context.js +2 -1
- package/dist/server/resources/ai-knowledge-base-documents.js +38 -25
- package/dist/server/resources/ai-knowledge-base.js +33 -23
- package/dist/server/resources/ai-vector-databases.js +2 -2
- package/dist/server/resources/ai-vector-stores.js +6 -11
- package/dist/server/services/docpixie-extractor.js +139 -0
- package/package.json +9 -7
- package/README.md +0 -35
|
@@ -29,13 +29,17 @@ __export(vectorization_exports, {
|
|
|
29
29
|
VectorizationPipeline: () => VectorizationPipeline
|
|
30
30
|
});
|
|
31
31
|
module.exports = __toCommonJS(vectorization_exports);
|
|
32
|
+
var import_path = require("path");
|
|
33
|
+
var import_promises = require("fs/promises");
|
|
32
34
|
var import_text_splitter = require("./text-splitter");
|
|
35
|
+
var import_simple_embeddings = require("./simple-embeddings");
|
|
33
36
|
class VectorizationPipeline {
|
|
34
|
-
constructor(plugin) {
|
|
37
|
+
constructor(plugin, docpixieExtractor) {
|
|
35
38
|
this.plugin = plugin;
|
|
39
|
+
this.docpixieExtractor = docpixieExtractor;
|
|
36
40
|
}
|
|
37
41
|
async processDocument(documentId, options) {
|
|
38
|
-
var _a;
|
|
42
|
+
var _a, _b;
|
|
39
43
|
const docRepo = this.plugin.db.getRepository("aiKnowledgeBaseDocuments");
|
|
40
44
|
await docRepo.update({
|
|
41
45
|
filter: { id: documentId },
|
|
@@ -60,37 +64,54 @@ class VectorizationPipeline {
|
|
|
60
64
|
throw new Error("Knowledge base has no vector store configured");
|
|
61
65
|
}
|
|
62
66
|
let rawText;
|
|
67
|
+
let docpixieDocumentId = null;
|
|
63
68
|
if (textContent) {
|
|
64
69
|
rawText = textContent;
|
|
65
70
|
} else {
|
|
66
|
-
const
|
|
67
|
-
if ((_a =
|
|
68
|
-
const
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
if (filePath.startsWith("/storage/") || filePath.startsWith("storage/")) {
|
|
81
|
-
const storageBase = path.resolve(process.cwd(), "storage");
|
|
82
|
-
filePath = path.resolve(storageBase, filePath.replace(/^\/?(storage\/)?/, ""));
|
|
71
|
+
const resolvedPath = this.resolveLocalPath(file);
|
|
72
|
+
if (knowledgeBase.useDocpixie && ((_a = this.docpixieExtractor) == null ? void 0 : _a.isAvailable()) && resolvedPath) {
|
|
73
|
+
const filename = file.filename ?? doc.filename ?? "document";
|
|
74
|
+
const result = await this.docpixieExtractor.extractFromPath(
|
|
75
|
+
resolvedPath,
|
|
76
|
+
filename,
|
|
77
|
+
doc.uploadedById ?? void 0
|
|
78
|
+
);
|
|
79
|
+
if (result) {
|
|
80
|
+
rawText = result.text;
|
|
81
|
+
docpixieDocumentId = result.documentId;
|
|
82
|
+
this.plugin.app.logger.info(
|
|
83
|
+
`[Vectorization] DocPixie extraction: doc="${filename}" docpixie_id=${result.documentId} chars=${result.text.length}`
|
|
84
|
+
);
|
|
83
85
|
}
|
|
84
|
-
|
|
85
|
-
|
|
86
|
+
}
|
|
87
|
+
if (!rawText) {
|
|
88
|
+
const aiPlugin = this.plugin.aiPlugin;
|
|
89
|
+
if ((_b = aiPlugin.documentLoaders) == null ? void 0 : _b.cached) {
|
|
90
|
+
const parseResult = await aiPlugin.documentLoaders.cached.load(file);
|
|
91
|
+
if (!parseResult.supported) {
|
|
92
|
+
throw new Error(`File type not supported: ${file.filename}`);
|
|
93
|
+
}
|
|
94
|
+
rawText = parseResult.text || "";
|
|
95
|
+
} else if (resolvedPath) {
|
|
96
|
+
try {
|
|
97
|
+
await (0, import_promises.access)(resolvedPath);
|
|
98
|
+
} catch {
|
|
99
|
+
throw new Error(`File not found at: ${resolvedPath}`);
|
|
100
|
+
}
|
|
101
|
+
rawText = await (0, import_promises.readFile)(resolvedPath, "utf-8");
|
|
102
|
+
} else {
|
|
103
|
+
throw new Error(`Cannot resolve file for: ${file.filename}`);
|
|
86
104
|
}
|
|
87
|
-
rawText = fs.readFileSync(filePath, "utf-8");
|
|
88
105
|
}
|
|
89
106
|
}
|
|
90
107
|
if (!rawText || rawText.trim().length === 0) {
|
|
91
108
|
await docRepo.update({
|
|
92
109
|
filter: { id: documentId },
|
|
93
|
-
values: {
|
|
110
|
+
values: {
|
|
111
|
+
status: "success",
|
|
112
|
+
chunkCount: 0,
|
|
113
|
+
...docpixieDocumentId ? { meta: { ...doc.meta ?? {}, docpixieDocumentId } } : {}
|
|
114
|
+
}
|
|
94
115
|
});
|
|
95
116
|
return { success: true, chunkCount: 0 };
|
|
96
117
|
}
|
|
@@ -102,12 +123,18 @@ class VectorizationPipeline {
|
|
|
102
123
|
source: (file == null ? void 0 : file.filename) ?? doc.filename ?? "pasted-text",
|
|
103
124
|
// Permission metadata for vector-level filtering
|
|
104
125
|
userId: doc.uploadedById ?? null,
|
|
105
|
-
accessLevel: knowledgeBase.accessLevel ?? "PUBLIC"
|
|
126
|
+
accessLevel: knowledgeBase.accessLevel ?? "PUBLIC",
|
|
127
|
+
// DocPixie reference for Stage 2 deep retrieval (null if not used)
|
|
128
|
+
...docpixieDocumentId ? { docpixieDocumentId } : {}
|
|
106
129
|
});
|
|
107
130
|
if (chunks.length === 0) {
|
|
108
131
|
await docRepo.update({
|
|
109
132
|
filter: { id: documentId },
|
|
110
|
-
values: {
|
|
133
|
+
values: {
|
|
134
|
+
status: "success",
|
|
135
|
+
chunkCount: 0,
|
|
136
|
+
...docpixieDocumentId ? { meta: { ...doc.meta ?? {}, docpixieDocumentId } } : {}
|
|
137
|
+
}
|
|
111
138
|
});
|
|
112
139
|
return { success: true, chunkCount: 0 };
|
|
113
140
|
}
|
|
@@ -120,9 +147,8 @@ class VectorizationPipeline {
|
|
|
120
147
|
throw new Error(`LLM service "${vectorStore.llmService}" not found`);
|
|
121
148
|
}
|
|
122
149
|
const llmService = llmServiceRecord.toJSON();
|
|
123
|
-
const { SimpleHTTPEmbeddings } = require("./simple-embeddings");
|
|
124
150
|
const serviceOpts = this.plugin.app.environment.renderJsonTemplate(llmService.options || {});
|
|
125
|
-
const embeddings = new SimpleHTTPEmbeddings({
|
|
151
|
+
const embeddings = new import_simple_embeddings.SimpleHTTPEmbeddings({
|
|
126
152
|
baseURL: serviceOpts.baseURL || serviceOpts.baseUrl || "",
|
|
127
153
|
apiKey: serviceOpts.apiKey || "",
|
|
128
154
|
model: vectorStore.embeddingModel
|
|
@@ -138,39 +164,15 @@ class VectorizationPipeline {
|
|
|
138
164
|
filter: { id: documentId },
|
|
139
165
|
values: {
|
|
140
166
|
status: "success",
|
|
141
|
-
chunkCount: chunks.length
|
|
167
|
+
chunkCount: chunks.length,
|
|
168
|
+
meta: {
|
|
169
|
+
...doc.meta ?? {},
|
|
170
|
+
...docpixieDocumentId ? { docpixieDocumentId } : {}
|
|
171
|
+
}
|
|
142
172
|
}
|
|
143
173
|
});
|
|
144
174
|
if (knowledgeBase.deleteSourceFile && file) {
|
|
145
|
-
|
|
146
|
-
const fs = require("fs");
|
|
147
|
-
const path = require("path");
|
|
148
|
-
let filePath = file.path || file.url;
|
|
149
|
-
if (filePath) {
|
|
150
|
-
if (filePath.startsWith("/storage/") || filePath.startsWith("storage/")) {
|
|
151
|
-
const storageBase = path.resolve(process.cwd(), "storage");
|
|
152
|
-
filePath = path.resolve(storageBase, filePath.replace(/^\/?(?:storage\/)?/, ""));
|
|
153
|
-
}
|
|
154
|
-
if (fs.existsSync(filePath)) {
|
|
155
|
-
fs.unlinkSync(filePath);
|
|
156
|
-
}
|
|
157
|
-
}
|
|
158
|
-
const fileRepo = this.plugin.db.getRepository("aiFiles");
|
|
159
|
-
if (file.id) {
|
|
160
|
-
await fileRepo.destroy({ filterByTk: file.id });
|
|
161
|
-
}
|
|
162
|
-
await this.plugin.db.sequelize.query(
|
|
163
|
-
`UPDATE "aiKnowledgeBaseDocuments" SET "fileId" = NULL WHERE id = $1`,
|
|
164
|
-
{ bind: [documentId] }
|
|
165
|
-
);
|
|
166
|
-
this.plugin.app.logger.info(
|
|
167
|
-
`[Vectorization] Source file deleted for doc ${documentId}: ${file.filename}`
|
|
168
|
-
);
|
|
169
|
-
} catch (delErr) {
|
|
170
|
-
this.plugin.app.logger.warn(
|
|
171
|
-
`[Vectorization] Failed to delete source file for doc ${documentId}: ${delErr.message}`
|
|
172
|
-
);
|
|
173
|
-
}
|
|
175
|
+
await this.deleteSourceFile(documentId, file);
|
|
174
176
|
}
|
|
175
177
|
return { success: true, chunkCount: chunks.length };
|
|
176
178
|
} catch (error) {
|
|
@@ -188,6 +190,67 @@ class VectorizationPipeline {
|
|
|
188
190
|
};
|
|
189
191
|
}
|
|
190
192
|
}
|
|
193
|
+
// ── Helpers ───────────────────────────────────────────────────────────────
|
|
194
|
+
/**
|
|
195
|
+
* Delete a source file after successful vectorization.
|
|
196
|
+
* Uses async fs/promises instead of blocking sync calls.
|
|
197
|
+
*/
|
|
198
|
+
async deleteSourceFile(documentId, file) {
|
|
199
|
+
try {
|
|
200
|
+
let filePath = file.path || file.url;
|
|
201
|
+
if (filePath) {
|
|
202
|
+
if (filePath.startsWith("/storage/") || filePath.startsWith("storage/")) {
|
|
203
|
+
const storageBase = (0, import_path.resolve)(process.cwd(), "storage");
|
|
204
|
+
filePath = (0, import_path.resolve)(storageBase, filePath.replace(/^\/?(?:storage\/)?/, ""));
|
|
205
|
+
}
|
|
206
|
+
try {
|
|
207
|
+
await (0, import_promises.access)(filePath);
|
|
208
|
+
await (0, import_promises.unlink)(filePath);
|
|
209
|
+
} catch {
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
const fileRepo = this.plugin.db.getRepository("aiFiles");
|
|
213
|
+
if (file.id) {
|
|
214
|
+
await fileRepo.destroy({ filterByTk: file.id });
|
|
215
|
+
}
|
|
216
|
+
await this.plugin.db.getRepository("aiKnowledgeBaseDocuments").update({
|
|
217
|
+
filter: { id: documentId },
|
|
218
|
+
values: { fileId: null }
|
|
219
|
+
});
|
|
220
|
+
this.plugin.app.logger.info(
|
|
221
|
+
`[Vectorization] Source file deleted for doc ${documentId}: ${file.filename}`
|
|
222
|
+
);
|
|
223
|
+
} catch (delErr) {
|
|
224
|
+
this.plugin.app.logger.warn(
|
|
225
|
+
`[Vectorization] Failed to delete source file for doc ${documentId}: ${delErr.message}`
|
|
226
|
+
);
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
/**
|
|
230
|
+
* Resolve a storage-relative or absolute path from a file record.
|
|
231
|
+
* Returns null if the path cannot be resolved to a local file
|
|
232
|
+
* (e.g., remote S3 URL).
|
|
233
|
+
*
|
|
234
|
+
* Fix #5: The allowed root is now `storage/uploads` — not the entire CWD —
|
|
235
|
+
* to prevent path traversal attacks that could read source files or configs.
|
|
236
|
+
*/
|
|
237
|
+
resolveLocalPath(file) {
|
|
238
|
+
let filePath = file.path || file.url || "";
|
|
239
|
+
if (!filePath) return null;
|
|
240
|
+
if (filePath.startsWith("http://") || filePath.startsWith("https://")) return null;
|
|
241
|
+
if (filePath.startsWith("/storage/") || filePath.startsWith("storage/")) {
|
|
242
|
+
const storageBase = (0, import_path.resolve)(process.cwd(), "storage");
|
|
243
|
+
filePath = (0, import_path.resolve)(storageBase, filePath.replace(/^\/?(?:storage\/)?/, ""));
|
|
244
|
+
}
|
|
245
|
+
const allowedRoot = (0, import_path.resolve)(process.cwd(), "storage", "uploads");
|
|
246
|
+
if (!filePath.startsWith(allowedRoot + import_path.sep) && filePath !== allowedRoot) {
|
|
247
|
+
this.plugin.app.logger.warn(
|
|
248
|
+
`[Vectorization] Blocked path traversal attempt: "${filePath}" is outside storage/uploads`
|
|
249
|
+
);
|
|
250
|
+
return null;
|
|
251
|
+
}
|
|
252
|
+
return filePath;
|
|
253
|
+
}
|
|
191
254
|
}
|
|
192
255
|
// Annotate the CommonJS export names for ESM import in node:
|
|
193
256
|
0 && (module.exports = {
|
package/dist/server/plugin.js
CHANGED
|
@@ -47,7 +47,9 @@ var import_vector_database_impl = require("./features/vector-database-impl");
|
|
|
47
47
|
var import_vector_database_provider_impl = require("./features/vector-database-provider-impl");
|
|
48
48
|
var import_vector_store_provider_impl = require("./features/vector-store-provider-impl");
|
|
49
49
|
var import_pgvector = require("./providers/pgvector");
|
|
50
|
+
var import_external_rag = require("./providers/external-rag");
|
|
50
51
|
var import_vectorization = require("./pipeline/vectorization");
|
|
52
|
+
var import_docpixie_extractor = require("./services/docpixie-extractor");
|
|
51
53
|
var import_ai_knowledge_base = __toESM(require("./resources/ai-knowledge-base"));
|
|
52
54
|
var import_ai_knowledge_base_documents = __toESM(require("./resources/ai-knowledge-base-documents"));
|
|
53
55
|
var import_ai_vector_stores = __toESM(require("./resources/ai-vector-stores"));
|
|
@@ -56,6 +58,29 @@ var import_add_document = require("./actions/add-document");
|
|
|
56
58
|
var import_request_context = __toESM(require("./request-context"));
|
|
57
59
|
class PluginKnowledgeBaseServer extends import_server.Plugin {
|
|
58
60
|
vectorizationPipeline;
|
|
61
|
+
docpixieExtractor;
|
|
62
|
+
/**
|
|
63
|
+
* The VectorStore provider registry exposed publicly so that other plugins can
|
|
64
|
+
* register additional vector store backends (e.g., a custom embedding plugin).
|
|
65
|
+
*
|
|
66
|
+
* Usage from another plugin:
|
|
67
|
+
* const kb = this.pm.get(PluginKnowledgeBaseServer) as PluginKnowledgeBaseServer;
|
|
68
|
+
* kb.registerVectorStoreProvider(myProvider);
|
|
69
|
+
*/
|
|
70
|
+
vectorStoreProvider;
|
|
71
|
+
/**
|
|
72
|
+
* Registry for external / custom RAG search strategies.
|
|
73
|
+
*
|
|
74
|
+
* A strategy is invoked for Knowledge Bases of type 'EXTERNAL_RAG' whose
|
|
75
|
+
* options.ragProvider matches the registered name.
|
|
76
|
+
*
|
|
77
|
+
* Usage from another plugin:
|
|
78
|
+
* kb.registerRagSearchStrategy('my-rag', async (query, kbRecord, opts) => {
|
|
79
|
+
* // call your custom RAG backend and return results
|
|
80
|
+
* return [{ content: '...', score: 0.9 }];
|
|
81
|
+
* });
|
|
82
|
+
*/
|
|
83
|
+
ragSearchStrategies = /* @__PURE__ */ new Map();
|
|
59
84
|
_aiPlugin;
|
|
60
85
|
get aiPlugin() {
|
|
61
86
|
if (!this._aiPlugin) {
|
|
@@ -63,6 +88,42 @@ class PluginKnowledgeBaseServer extends import_server.Plugin {
|
|
|
63
88
|
}
|
|
64
89
|
return this._aiPlugin;
|
|
65
90
|
}
|
|
91
|
+
// ── Public extension API ────────────────────────────────────────────────────
|
|
92
|
+
/**
|
|
93
|
+
* Register a custom VectorStoreProvider so it can be selected as a provider
|
|
94
|
+
* when creating Vector Stores in the admin UI.
|
|
95
|
+
*
|
|
96
|
+
* Example (from another plugin's load()):
|
|
97
|
+
* const kb = this.pm.get(PluginKnowledgeBaseServer) as PluginKnowledgeBaseServer;
|
|
98
|
+
* kb.registerVectorStoreProvider({
|
|
99
|
+
* providerName: 'my-vector-store',
|
|
100
|
+
* async createVectorStoreService(props) { ... },
|
|
101
|
+
* });
|
|
102
|
+
*/
|
|
103
|
+
registerVectorStoreProvider(provider) {
|
|
104
|
+
if (!this.vectorStoreProvider) {
|
|
105
|
+
throw new Error("registerVectorStoreProvider() must be called after plugin-knowledge-base has loaded");
|
|
106
|
+
}
|
|
107
|
+
this.vectorStoreProvider.register(provider);
|
|
108
|
+
}
|
|
109
|
+
/**
|
|
110
|
+
* Register a RAG search strategy for Knowledge Bases of type 'EXTERNAL_RAG'.
|
|
111
|
+
*
|
|
112
|
+
* The strategy name must match the value stored in kb.options.ragProvider.
|
|
113
|
+
* The built-in 'external-http' strategy is always pre-registered and handles
|
|
114
|
+
* generic HTTP-based RAG services.
|
|
115
|
+
*
|
|
116
|
+
* Example:
|
|
117
|
+
* kb.registerRagSearchStrategy('pinecone', async (query, kbRecord, opts) => {
|
|
118
|
+
* const { ragApiKey, ragNamespace } = kbRecord.options;
|
|
119
|
+
* const results = await pineconeQuery(ragApiKey, ragNamespace, query, opts.topK);
|
|
120
|
+
* return results.map(r => ({ content: r.text, score: r.score }));
|
|
121
|
+
* });
|
|
122
|
+
*/
|
|
123
|
+
registerRagSearchStrategy(name, strategy) {
|
|
124
|
+
this.ragSearchStrategies.set(name, strategy);
|
|
125
|
+
}
|
|
126
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
66
127
|
async afterAdd() {
|
|
67
128
|
}
|
|
68
129
|
async beforeLoad() {
|
|
@@ -70,7 +131,9 @@ class PluginKnowledgeBaseServer extends import_server.Plugin {
|
|
|
70
131
|
async load() {
|
|
71
132
|
const vdbProvider = new import_vector_database_provider_impl.VectorDatabaseProviderImpl();
|
|
72
133
|
vdbProvider.register(import_pgvector.pgVectorProviderInfo);
|
|
73
|
-
|
|
134
|
+
this.vectorStoreProvider = new import_vector_store_provider_impl.VectorStoreProviderImpl(this, this.aiPlugin);
|
|
135
|
+
const vectorStoreProvider = this.vectorStoreProvider;
|
|
136
|
+
this.ragSearchStrategies.set(import_external_rag.EXTERNAL_HTTP_RAG_PROVIDER, import_external_rag.externalHttpRagStrategy);
|
|
74
137
|
const vectorDatabase = new import_vector_database_impl.VectorDatabaseFeatureImpl(this);
|
|
75
138
|
const knowledgeBase = new import_knowledge_base_impl.KnowledgeBaseFeatureImpl(this);
|
|
76
139
|
this.aiPlugin.features.enableFeatures({
|
|
@@ -79,7 +142,11 @@ class PluginKnowledgeBaseServer extends import_server.Plugin {
|
|
|
79
142
|
vectorStoreProvider,
|
|
80
143
|
knowledgeBase
|
|
81
144
|
});
|
|
82
|
-
this.
|
|
145
|
+
this.docpixieExtractor = new import_docpixie_extractor.DocPixieExtractor(
|
|
146
|
+
this.db,
|
|
147
|
+
() => this.getDocpixiePlugin()
|
|
148
|
+
);
|
|
149
|
+
this.vectorizationPipeline = new import_vectorization.VectorizationPipeline(this, this.docpixieExtractor);
|
|
83
150
|
this.defineResources();
|
|
84
151
|
this.app.resourceManager.use(async (ctx, next) => {
|
|
85
152
|
var _a, _b, _c, _d, _e, _f, _g, _h;
|
|
@@ -102,7 +169,16 @@ class PluginKnowledgeBaseServer extends import_server.Plugin {
|
|
|
102
169
|
const storageName = (_a = ctx.action.params) == null ? void 0 : _a.storageRule;
|
|
103
170
|
if (storageName) {
|
|
104
171
|
const collection = ctx.db.getCollection("aiFiles");
|
|
105
|
-
collection
|
|
172
|
+
if (collection) {
|
|
173
|
+
const prevStorage = collection.options.storage;
|
|
174
|
+
collection.options.storage = storageName;
|
|
175
|
+
try {
|
|
176
|
+
await next();
|
|
177
|
+
} finally {
|
|
178
|
+
collection.options.storage = prevStorage;
|
|
179
|
+
}
|
|
180
|
+
return;
|
|
181
|
+
}
|
|
106
182
|
}
|
|
107
183
|
}
|
|
108
184
|
await next();
|
|
@@ -111,6 +187,15 @@ class PluginKnowledgeBaseServer extends import_server.Plugin {
|
|
|
111
187
|
);
|
|
112
188
|
this.registerKnowledgeBaseWorkContext(vectorStoreProvider, knowledgeBase);
|
|
113
189
|
}
|
|
190
|
+
/** Returns the plugin-docpixie instance if it is loaded and has an active service, else null */
|
|
191
|
+
getDocpixiePlugin() {
|
|
192
|
+
try {
|
|
193
|
+
const p = this.pm.get("@nocobase/plugin-docpixie");
|
|
194
|
+
return (p == null ? void 0 : p.service) ? p : null;
|
|
195
|
+
} catch {
|
|
196
|
+
return null;
|
|
197
|
+
}
|
|
198
|
+
}
|
|
114
199
|
defineResources() {
|
|
115
200
|
this.app.resourceManager.define(import_ai_knowledge_base.default);
|
|
116
201
|
this.app.resourceManager.define(import_ai_knowledge_base_documents.default);
|
|
@@ -130,7 +215,7 @@ class PluginKnowledgeBaseServer extends import_server.Plugin {
|
|
|
130
215
|
return `[Knowledge Base: ${contextItem.title || contextItem.uid}]`;
|
|
131
216
|
},
|
|
132
217
|
background: async (ctx, aiMessages, workContextItems) => {
|
|
133
|
-
var _a, _b, _c, _d;
|
|
218
|
+
var _a, _b, _c, _d, _e;
|
|
134
219
|
const kbIds = [...new Set(workContextItems.map((item) => item.uid).filter(Boolean))];
|
|
135
220
|
if (!kbIds.length) return "";
|
|
136
221
|
const userId = (_b = (_a = ctx.auth) == null ? void 0 : _a.user) == null ? void 0 : _b.id;
|
|
@@ -138,8 +223,8 @@ class PluginKnowledgeBaseServer extends import_server.Plugin {
|
|
|
138
223
|
const isAdmin = roles.includes("root") || roles.includes("admin");
|
|
139
224
|
let filteredIds = kbIds;
|
|
140
225
|
if (!isAdmin) {
|
|
141
|
-
const
|
|
142
|
-
const accessibleKBs = await
|
|
226
|
+
const kbRepo2 = plugin.db.getRepository("aiKnowledgeBases");
|
|
227
|
+
const accessibleKBs = await kbRepo2.find({
|
|
143
228
|
filter: {
|
|
144
229
|
id: { $in: kbIds },
|
|
145
230
|
enabled: true,
|
|
@@ -155,19 +240,55 @@ class PluginKnowledgeBaseServer extends import_server.Plugin {
|
|
|
155
240
|
if (!filteredIds.length) return "";
|
|
156
241
|
}
|
|
157
242
|
const lastUserMsg = [...aiMessages].reverse().find((m) => m.role === "user");
|
|
158
|
-
|
|
243
|
+
let queryString = "";
|
|
244
|
+
if (lastUserMsg) {
|
|
245
|
+
const c = lastUserMsg.content;
|
|
246
|
+
if (typeof c === "string") {
|
|
247
|
+
queryString = c;
|
|
248
|
+
} else if (c && typeof c === "object") {
|
|
249
|
+
if (typeof c.content === "string") {
|
|
250
|
+
queryString = c.content;
|
|
251
|
+
} else if (c.content && typeof c.content === "object" && typeof c.content.content === "string") {
|
|
252
|
+
queryString = c.content.content;
|
|
253
|
+
} else if (typeof c.text === "string") {
|
|
254
|
+
queryString = c.text;
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
plugin.app.logger.info(`[KB WorkContext] lastUserMsg content type: ${typeof (lastUserMsg == null ? void 0 : lastUserMsg.content)}, queryString length: ${queryString.length}, queryString: "${queryString.substring(0, 100)}"`);
|
|
159
259
|
if (!queryString) return "";
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
260
|
+
const kbRepo = plugin.db.getRepository("aiKnowledgeBases");
|
|
261
|
+
const allKbRecords = await kbRepo.find({
|
|
262
|
+
filter: { id: { $in: filteredIds }, enabled: true },
|
|
263
|
+
fields: ["id", "name", "type", "options"]
|
|
264
|
+
});
|
|
265
|
+
const externalRagIds = /* @__PURE__ */ new Set();
|
|
266
|
+
const externalRagKbs = [];
|
|
267
|
+
for (const rec of allKbRecords) {
|
|
268
|
+
const kbData2 = rec.toJSON ? rec.toJSON() : rec;
|
|
269
|
+
if (kbData2.type === import_external_rag.EXTERNAL_RAG_KB_TYPE) {
|
|
270
|
+
externalRagIds.add(String(kbData2.id));
|
|
271
|
+
externalRagKbs.push(kbData2);
|
|
272
|
+
}
|
|
166
273
|
}
|
|
167
|
-
|
|
274
|
+
const standardIds = filteredIds.filter((id) => !externalRagIds.has(String(id)));
|
|
275
|
+
let knowledgeBaseGroup = [];
|
|
276
|
+
if (standardIds.length > 0) {
|
|
277
|
+
try {
|
|
278
|
+
knowledgeBaseGroup = await knowledgeBaseFeature.getKnowledgeBaseGroup(standardIds);
|
|
279
|
+
} catch (err) {
|
|
280
|
+
plugin.app.logger.error("[KB WorkContext] Failed to get KB group:", err);
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
plugin.app.logger.info(
|
|
284
|
+
`[KB WorkContext] Found ${knowledgeBaseGroup.length} local KB groups, ${externalRagKbs.length} external RAG KBs: ${JSON.stringify(knowledgeBaseGroup.map((g) => {
|
|
285
|
+
var _a2, _b2, _c2;
|
|
286
|
+
return { type: g.knowledgeBaseType, provider: (_a2 = g.vectorStoreConfig) == null ? void 0 : _a2.vectorStoreProvider, configId: (_b2 = g.vectorStoreConfig) == null ? void 0 : _b2.vectorStoreConfigId, kbCount: (_c2 = g.knowledgeBaseList) == null ? void 0 : _c2.length };
|
|
287
|
+
}))}`
|
|
288
|
+
);
|
|
168
289
|
const allDocs = [];
|
|
169
|
-
const topK =
|
|
170
|
-
const score = "0.
|
|
290
|
+
const topK = 5;
|
|
291
|
+
const score = "0.3";
|
|
171
292
|
for (const entry of knowledgeBaseGroup) {
|
|
172
293
|
const { vectorStoreConfig, knowledgeBaseType, knowledgeBaseList } = entry;
|
|
173
294
|
if (!(knowledgeBaseList == null ? void 0 : knowledgeBaseList.length)) continue;
|
|
@@ -184,6 +305,10 @@ class PluginKnowledgeBaseServer extends import_server.Plugin {
|
|
|
184
305
|
score,
|
|
185
306
|
filter: { knowledgeBaseOuterId: { in: knowledgeBaseOuterIds } }
|
|
186
307
|
});
|
|
308
|
+
plugin.app.logger.info(`[KB WorkContext] LOCAL search: ${result.length} results, scores: ${result.map((r) => {
|
|
309
|
+
var _a2;
|
|
310
|
+
return (_a2 = r.score) == null ? void 0 : _a2.toFixed(3);
|
|
311
|
+
}).join(", ")}`);
|
|
187
312
|
allDocs.push(...result);
|
|
188
313
|
} else {
|
|
189
314
|
for (const kb of knowledgeBaseList) {
|
|
@@ -192,6 +317,10 @@ class PluginKnowledgeBaseServer extends import_server.Plugin {
|
|
|
192
317
|
kb.vectorStoreProps
|
|
193
318
|
);
|
|
194
319
|
const result = await vectorStoreService.search(queryString, { topK, score });
|
|
320
|
+
plugin.app.logger.info(`[KB WorkContext] ${knowledgeBaseType} search (${kb.name}): ${result.length} results, scores: ${result.map((r) => {
|
|
321
|
+
var _a2;
|
|
322
|
+
return (_a2 = r.score) == null ? void 0 : _a2.toFixed(3);
|
|
323
|
+
}).join(", ")}`);
|
|
195
324
|
allDocs.push(...result);
|
|
196
325
|
}
|
|
197
326
|
}
|
|
@@ -199,8 +328,60 @@ class PluginKnowledgeBaseServer extends import_server.Plugin {
|
|
|
199
328
|
plugin.app.logger.error(`[KB WorkContext] Vector search failed for type=${knowledgeBaseType}:`, err);
|
|
200
329
|
}
|
|
201
330
|
}
|
|
331
|
+
for (const kb of externalRagKbs) {
|
|
332
|
+
const providerName = ((_d = kb.options) == null ? void 0 : _d.ragProvider) ?? import_external_rag.EXTERNAL_HTTP_RAG_PROVIDER;
|
|
333
|
+
const strategy = plugin.ragSearchStrategies.get(providerName);
|
|
334
|
+
if (!strategy) {
|
|
335
|
+
plugin.app.logger.warn(
|
|
336
|
+
`[KB WorkContext] No RAG strategy registered for provider "${providerName}" (KB: ${kb.name ?? kb.id})`
|
|
337
|
+
);
|
|
338
|
+
continue;
|
|
339
|
+
}
|
|
340
|
+
try {
|
|
341
|
+
const results = await strategy(queryString, kb, {
|
|
342
|
+
topK,
|
|
343
|
+
scoreThreshold: parseFloat(score)
|
|
344
|
+
});
|
|
345
|
+
plugin.app.logger.info(
|
|
346
|
+
`[KB WorkContext] EXTERNAL_RAG (${kb.name}, provider=${providerName}): ${results.length} results`
|
|
347
|
+
);
|
|
348
|
+
allDocs.push(...results);
|
|
349
|
+
} catch (err) {
|
|
350
|
+
plugin.app.logger.error(
|
|
351
|
+
`[KB WorkContext] External RAG search failed (${kb.name}, provider=${providerName}):`,
|
|
352
|
+
err
|
|
353
|
+
);
|
|
354
|
+
}
|
|
355
|
+
}
|
|
202
356
|
if (!allDocs.length) return "";
|
|
203
|
-
|
|
357
|
+
let deepContext = "";
|
|
358
|
+
const docpixiePlugin = plugin.getDocpixiePlugin();
|
|
359
|
+
if ((_e = docpixiePlugin == null ? void 0 : docpixiePlugin.service) == null ? void 0 : _e.isReady()) {
|
|
360
|
+
const docpixieIds = [
|
|
361
|
+
...new Set(
|
|
362
|
+
allDocs.map((d) => {
|
|
363
|
+
var _a2;
|
|
364
|
+
return (_a2 = d.metadata) == null ? void 0 : _a2.docpixieDocumentId;
|
|
365
|
+
}).filter((id) => typeof id === "number")
|
|
366
|
+
)
|
|
367
|
+
].slice(0, 3);
|
|
368
|
+
if (docpixieIds.length > 0) {
|
|
369
|
+
try {
|
|
370
|
+
deepContext = await plugin.docpixieExtractor.buildDeepContext(docpixieIds);
|
|
371
|
+
plugin.app.logger.info(
|
|
372
|
+
`[KB WorkContext] Stage 2 DocPixie deep retrieval: ${docpixieIds.length} doc(s), context length=${deepContext.length}`
|
|
373
|
+
);
|
|
374
|
+
} catch (err) {
|
|
375
|
+
plugin.app.logger.warn("[KB WorkContext] Stage 2 DocPixie retrieval failed:", err);
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
const chunkData = allDocs.map((doc) => doc.content).join("\n");
|
|
380
|
+
const kbData = deepContext ? `${deepContext}
|
|
381
|
+
|
|
382
|
+
<kb_chunks>
|
|
383
|
+
${chunkData}
|
|
384
|
+
</kb_chunks>` : chunkData;
|
|
204
385
|
return `<knowledgeBase>From knowledge base:
|
|
205
386
|
${kbData}
|
|
206
387
|
answer user's question using this information.</knowledgeBase>`;
|
|
@@ -220,6 +401,10 @@ answer user's question using this information.</knowledgeBase>`;
|
|
|
220
401
|
this.app.acl.allow("aiKnowledgeBase", "list", "loggedIn");
|
|
221
402
|
this.app.acl.allow("aiKnowledgeBase", "get", "loggedIn");
|
|
222
403
|
this.app.acl.allow("aiKnowledgeBase", "addDocument", "loggedIn");
|
|
404
|
+
this.app.acl.allow("aiKnowledgeBaseDoc", "list", "loggedIn");
|
|
405
|
+
this.app.acl.allow("aiKnowledgeBaseDoc", "create", "loggedIn");
|
|
406
|
+
this.app.acl.allow("aiKnowledgeBaseDoc", "destroy", "loggedIn");
|
|
407
|
+
this.app.acl.allow("aiKnowledgeBaseDoc", "reprocess", "loggedIn");
|
|
223
408
|
}
|
|
224
409
|
async install() {
|
|
225
410
|
}
|