plugin-knowledge-base 1.1.2 → 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,9 +7,11 @@
7
7
  * For more information, please refer to: https://www.nocobase.com/agreement.
8
8
  */
9
9
 
10
+ var __create = Object.create;
10
11
  var __defProp = Object.defineProperty;
11
12
  var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
12
13
  var __getOwnPropNames = Object.getOwnPropertyNames;
14
+ var __getProtoOf = Object.getPrototypeOf;
13
15
  var __hasOwnProp = Object.prototype.hasOwnProperty;
14
16
  var __export = (target, all) => {
15
17
  for (var name in all)
@@ -23,12 +25,28 @@ var __copyProps = (to, from, except, desc) => {
23
25
  }
24
26
  return to;
25
27
  };
28
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
29
+ // If the importer is in node compatibility mode or this is not an ESM
30
+ // file that has been converted to a CommonJS file using a Babel-
31
+ // compatible transform (i.e. "__esModule" has not been set), then set
32
+ // "default" to the CommonJS "module.exports" for node compatibility.
33
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
34
+ mod
35
+ ));
26
36
  var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
27
37
  var ai_knowledge_base_documents_exports = {};
28
38
  __export(ai_knowledge_base_documents_exports, {
29
39
  default: () => ai_knowledge_base_documents_default
30
40
  });
31
41
  module.exports = __toCommonJS(ai_knowledge_base_documents_exports);
42
+ var import_plugin = __toESM(require("../plugin"));
43
+ function getPlugin(ctx) {
44
+ try {
45
+ return ctx.app.pm.get(import_plugin.default);
46
+ } catch {
47
+ return null;
48
+ }
49
+ }
32
50
  async function checkKBAccess(ctx, knowledgeBaseId) {
33
51
  var _a, _b, _c, _d;
34
52
  const userId = (_b = (_a = ctx.auth) == null ? void 0 : _a.user) == null ? void 0 : _b.id;
@@ -104,6 +122,10 @@ var ai_knowledge_base_documents_default = {
104
122
  const kb = await kbRepo.findOne({ filter: { id: values.knowledgeBaseId } });
105
123
  if (kb) {
106
124
  const kbData = kb.toJSON();
125
+ if (kbData.type === "EXTERNAL_RAG") {
126
+ ctx.throw(400, "Cannot upload documents to an external RAG knowledge base");
127
+ return;
128
+ }
107
129
  if (kbData.accessLevel === "BASIC" && kbData.ownerId !== userId) {
108
130
  ctx.throw(403, "Only the owner can upload documents to a personal knowledge base");
109
131
  return;
@@ -123,27 +145,19 @@ var ai_knowledge_base_documents_default = {
123
145
  }
124
146
  values.uploadedById = userId;
125
147
  const doc = await repo.create({ values });
126
- const setClauses = [];
127
- const bindValues = [];
128
- let paramIdx = 1;
129
- if (values.knowledgeBaseId) {
130
- setClauses.push(`"knowledgeBaseId" = $${paramIdx++}`);
131
- bindValues.push(values.knowledgeBaseId);
132
- }
133
- if (values.fileId) {
134
- setClauses.push(`"fileId" = $${paramIdx++}`);
135
- bindValues.push(values.fileId);
136
- }
137
- if (setClauses.length > 0) {
138
- bindValues.push(doc.get("id"));
139
- await ctx.db.sequelize.query(
140
- `UPDATE "aiKnowledgeBaseDocuments" SET ${setClauses.join(", ")} WHERE id = $${paramIdx}`,
141
- { bind: bindValues }
142
- );
143
- if (values.knowledgeBaseId) doc.set("knowledgeBaseId", values.knowledgeBaseId);
144
- if (values.fileId) doc.set("fileId", values.fileId);
148
+ const fkUpdates = {};
149
+ if (values.knowledgeBaseId) fkUpdates.knowledgeBaseId = values.knowledgeBaseId;
150
+ if (values.fileId) fkUpdates.fileId = values.fileId;
151
+ if (Object.keys(fkUpdates).length > 0) {
152
+ await repo.update({
153
+ filterByTk: doc.get("id"),
154
+ values: fkUpdates
155
+ });
156
+ for (const [k, v] of Object.entries(fkUpdates)) {
157
+ doc.set(k, v);
158
+ }
145
159
  }
146
- const plugin = ctx.app.pm.get("plugin-knowledge-base");
160
+ const plugin = getPlugin(ctx);
147
161
  if (plugin == null ? void 0 : plugin.vectorizationPipeline) {
148
162
  plugin.vectorizationPipeline.processDocument(doc.id).catch((err) => {
149
163
  ctx.logger.error(`Vectorization failed for document ${doc.id}:`, err);
@@ -185,11 +199,10 @@ var ai_knowledge_base_documents_default = {
185
199
  await next();
186
200
  },
187
201
  async reprocess(ctx, next) {
188
- var _a, _b, _c, _d;
202
+ var _a, _b;
189
203
  const { filterByTk } = ctx.action.params;
190
204
  const repo = ctx.db.getRepository("aiKnowledgeBaseDocuments");
191
- const userId = (_b = (_a = ctx.auth) == null ? void 0 : _a.user) == null ? void 0 : _b.id;
192
- const roles = ((_c = ctx.state) == null ? void 0 : _c.currentRoles) ?? [];
205
+ const roles = ((_a = ctx.state) == null ? void 0 : _a.currentRoles) ?? [];
193
206
  const isAdmin = roles.includes("root") || roles.includes("admin");
194
207
  if (!isAdmin) {
195
208
  const doc = await repo.findOne({ filterByTk });
@@ -202,7 +215,7 @@ var ai_knowledge_base_documents_default = {
202
215
  return;
203
216
  }
204
217
  if ((kbData == null ? void 0 : kbData.accessLevel) === "SHARED") {
205
- const canUpload = (_d = kbData.uploadRoles) == null ? void 0 : _d.some((r) => roles.includes(r));
218
+ const canUpload = (_b = kbData.uploadRoles) == null ? void 0 : _b.some((r) => roles.includes(r));
206
219
  if (!canUpload) {
207
220
  ctx.throw(403, "You do not have permission to reprocess this document");
208
221
  return;
@@ -215,7 +228,7 @@ var ai_knowledge_base_documents_default = {
215
228
  filterByTk,
216
229
  values: { status: "pending", error: null, chunkCount: 0 }
217
230
  });
218
- const plugin = ctx.app.pm.get("plugin-knowledge-base");
231
+ const plugin = getPlugin(ctx);
219
232
  if (plugin == null ? void 0 : plugin.vectorizationPipeline) {
220
233
  plugin.vectorizationPipeline.processDocument(filterByTk).catch((err) => {
221
234
  ctx.logger.error(`Re-vectorization failed for document ${filterByTk}:`, err);
@@ -7,9 +7,11 @@
7
7
  * For more information, please refer to: https://www.nocobase.com/agreement.
8
8
  */
9
9
 
10
+ var __create = Object.create;
10
11
  var __defProp = Object.defineProperty;
11
12
  var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
12
13
  var __getOwnPropNames = Object.getOwnPropertyNames;
14
+ var __getProtoOf = Object.getPrototypeOf;
13
15
  var __hasOwnProp = Object.prototype.hasOwnProperty;
14
16
  var __export = (target, all) => {
15
17
  for (var name in all)
@@ -23,12 +25,24 @@ var __copyProps = (to, from, except, desc) => {
23
25
  }
24
26
  return to;
25
27
  };
28
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
29
+ // If the importer is in node compatibility mode or this is not an ESM
30
+ // file that has been converted to a CommonJS file using a Babel-
31
+ // compatible transform (i.e. "__esModule" has not been set), then set
32
+ // "default" to the CommonJS "module.exports" for node compatibility.
33
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
34
+ mod
35
+ ));
26
36
  var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
27
37
  var ai_knowledge_base_exports = {};
28
38
  __export(ai_knowledge_base_exports, {
29
39
  default: () => ai_knowledge_base_default
30
40
  });
31
41
  module.exports = __toCommonJS(ai_knowledge_base_exports);
42
+ var import_plugin = __toESM(require("../plugin"));
43
+ function getPlugin(ctx) {
44
+ return ctx.app.pm.get(import_plugin.default);
45
+ }
32
46
  async function checkKBPermission(ctx, filterByTk, action) {
33
47
  var _a, _b;
34
48
  const repo = ctx.db.getRepository("aiKnowledgeBases");
@@ -89,17 +103,19 @@ var ai_knowledge_base_default = {
89
103
  filterByTk,
90
104
  appends: ["vectorStore", "documents"]
91
105
  });
92
- if (record) {
93
- const data = record.toJSON();
94
- const userId = (_b = (_a = ctx.auth) == null ? void 0 : _a.user) == null ? void 0 : _b.id;
95
- const roles = ctx.state.currentRoles ?? [];
96
- const isAdmin = roles.includes("root") || roles.includes("admin");
97
- if (!isAdmin) {
98
- const hasAccess = data.accessLevel === "PUBLIC" || data.accessLevel === "BASIC" && data.ownerId === userId || data.accessLevel === "SHARED" && ((_c = data.allowedRoles) == null ? void 0 : _c.some((r) => roles.includes(r)));
99
- if (!hasAccess) {
100
- ctx.throw(403, "Access denied");
101
- return;
102
- }
106
+ if (!record) {
107
+ ctx.throw(404, "Knowledge base not found");
108
+ return;
109
+ }
110
+ const data = record.toJSON();
111
+ const userId = (_b = (_a = ctx.auth) == null ? void 0 : _a.user) == null ? void 0 : _b.id;
112
+ const roles = ctx.state.currentRoles ?? [];
113
+ const isAdmin = roles.includes("root") || roles.includes("admin");
114
+ if (!isAdmin) {
115
+ const hasAccess = data.accessLevel === "PUBLIC" || data.accessLevel === "BASIC" && data.ownerId === userId || data.accessLevel === "SHARED" && ((_c = data.allowedRoles) == null ? void 0 : _c.some((r) => roles.includes(r)));
116
+ if (!hasAccess) {
117
+ ctx.throw(403, "Access denied");
118
+ return;
103
119
  }
104
120
  }
105
121
  ctx.body = record;
@@ -120,13 +136,14 @@ var ai_knowledge_base_default = {
120
136
  if (values.accessLevel === "BASIC") {
121
137
  values.ownerId = userId;
122
138
  }
123
- ctx.body = await repo.create({ values });
139
+ const record = await repo.create({ values });
124
140
  if (values.vectorStoreId) {
125
- await ctx.db.sequelize.query(
126
- `UPDATE "aiKnowledgeBases" SET "vectorStoreId" = $1 WHERE id = $2`,
127
- { bind: [values.vectorStoreId, ctx.body.get("id")] }
128
- );
141
+ await repo.update({
142
+ filterByTk: record.get("id"),
143
+ values: { vectorStoreId: values.vectorStoreId }
144
+ });
129
145
  }
146
+ ctx.body = record;
130
147
  await next();
131
148
  },
132
149
  async update(ctx, next) {
@@ -143,15 +160,8 @@ var ai_knowledge_base_default = {
143
160
  filterByTk,
144
161
  values
145
162
  });
146
- if (values.vectorStoreId) {
147
- await ctx.db.sequelize.query(
148
- `UPDATE "aiKnowledgeBases" SET "vectorStoreId" = $1 WHERE id = $2`,
149
- { bind: [values.vectorStoreId, filterByTk] }
150
- );
151
- }
152
163
  await next();
153
164
  },
154
- // Fix #3: destroy with permission checks
155
165
  async destroy(ctx, next) {
156
166
  const { filterByTk } = ctx.action.params;
157
167
  const repo = ctx.db.getRepository("aiKnowledgeBases");
@@ -29,6 +29,7 @@ __export(ai_vector_databases_exports, {
29
29
  default: () => ai_vector_databases_default
30
30
  });
31
31
  module.exports = __toCommonJS(ai_vector_databases_exports);
32
+ var import_pg = require("pg");
32
33
  var ai_vector_databases_default = {
33
34
  name: "aiVectorDatabase",
34
35
  actions: {
@@ -85,8 +86,7 @@ var ai_vector_databases_default = {
85
86
  return;
86
87
  }
87
88
  try {
88
- const { Client } = require("pg");
89
- const client = new Client({
89
+ const client = new import_pg.Client({
90
90
  host: connectParams.host,
91
91
  port: connectParams.port || 5432,
92
92
  user: connectParams.username,
@@ -58,13 +58,14 @@ var ai_vector_stores_default = {
58
58
  const rawValues = ctx.action.params.values || {};
59
59
  const values = rawValues.values || rawValues;
60
60
  const repo = ctx.db.getRepository("aiVectorStores");
61
- ctx.body = await repo.create({ values });
61
+ const record = await repo.create({ values });
62
62
  if (values.vectorDatabaseId) {
63
- await ctx.db.sequelize.query(
64
- `UPDATE "aiVectorStores" SET "vectorDatabaseId" = $1 WHERE id = $2`,
65
- { bind: [values.vectorDatabaseId, ctx.body.get("id")] }
66
- );
63
+ await repo.update({
64
+ filterByTk: record.get("id"),
65
+ values: { vectorDatabaseId: values.vectorDatabaseId }
66
+ });
67
67
  }
68
+ ctx.body = record;
68
69
  await next();
69
70
  },
70
71
  async update(ctx, next) {
@@ -76,12 +77,6 @@ var ai_vector_stores_default = {
76
77
  filterByTk,
77
78
  values
78
79
  });
79
- if (values.vectorDatabaseId) {
80
- await ctx.db.sequelize.query(
81
- `UPDATE "aiVectorStores" SET "vectorDatabaseId" = $1 WHERE id = $2`,
82
- { bind: [values.vectorDatabaseId, filterByTk] }
83
- );
84
- }
85
80
  await next();
86
81
  },
87
82
  async destroy(ctx, next) {
@@ -0,0 +1,139 @@
1
+ /**
2
+ * This file is part of the NocoBase (R) project.
3
+ * Copyright (c) 2020-2024 NocoBase Co., Ltd.
4
+ * Authors: NocoBase Team.
5
+ *
6
+ * This project is dual-licensed under AGPL-3.0 and NocoBase Commercial License.
7
+ * For more information, please refer to: https://www.nocobase.com/agreement.
8
+ */
9
+
10
+ var __defProp = Object.defineProperty;
11
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
12
+ var __getOwnPropNames = Object.getOwnPropertyNames;
13
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
14
+ var __export = (target, all) => {
15
+ for (var name in all)
16
+ __defProp(target, name, { get: all[name], enumerable: true });
17
+ };
18
+ var __copyProps = (to, from, except, desc) => {
19
+ if (from && typeof from === "object" || typeof from === "function") {
20
+ for (let key of __getOwnPropNames(from))
21
+ if (!__hasOwnProp.call(to, key) && key !== except)
22
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
23
+ }
24
+ return to;
25
+ };
26
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
27
+ var docpixie_extractor_exports = {};
28
+ __export(docpixie_extractor_exports, {
29
+ DocPixieExtractor: () => DocPixieExtractor
30
+ });
31
+ module.exports = __toCommonJS(docpixie_extractor_exports);
32
+ var import_os = require("os");
33
+ var import_path = require("path");
34
+ var import_promises = require("fs/promises");
35
+ var import_fs = require("fs");
36
+ class DocPixieExtractor {
37
+ constructor(db, getDocpixiePlugin) {
38
+ this.db = db;
39
+ this.getDocpixiePlugin = getDocpixiePlugin;
40
+ }
41
+ /** Returns true if plugin-docpixie is loaded and ready (configured + LLM provider set) */
42
+ isAvailable() {
43
+ var _a;
44
+ const plugin = this.getDocpixiePlugin();
45
+ return !!((_a = plugin == null ? void 0 : plugin.service) == null ? void 0 : _a.isReady());
46
+ }
47
+ /**
48
+ * Extract text from a local file path.
49
+ * processDocument() is synchronous — it waits for full OCR + summarization before returning.
50
+ * Returns null on any failure so callers can fall through to the next extractor.
51
+ */
52
+ async extractFromPath(filePath, filename, userId) {
53
+ var _a;
54
+ const plugin = this.getDocpixiePlugin();
55
+ if (!((_a = plugin == null ? void 0 : plugin.service) == null ? void 0 : _a.isReady())) return null;
56
+ try {
57
+ const documentId = await plugin.service.processDocument(filePath, {
58
+ name: filename,
59
+ userId
60
+ });
61
+ const text = await this.getPageTexts(documentId);
62
+ return { text, documentId };
63
+ } catch {
64
+ return null;
65
+ }
66
+ }
67
+ /**
68
+ * Extract text from an in-memory buffer (for files already downloaded, e.g. from S3).
69
+ * Writes to a temp file, runs DocPixie extraction, then cleans up.
70
+ */
71
+ async extractFromBuffer(buffer, filename, extname, userId) {
72
+ var _a;
73
+ const plugin = this.getDocpixiePlugin();
74
+ if (!((_a = plugin == null ? void 0 : plugin.service) == null ? void 0 : _a.isReady())) return null;
75
+ const ext = extname.startsWith(".") ? extname : `.${extname}`;
76
+ const tempPath = (0, import_path.join)(
77
+ (0, import_os.tmpdir)(),
78
+ `kb-docpixie-${Date.now()}-${Math.random().toString(36).slice(2)}${ext}`
79
+ );
80
+ try {
81
+ await (0, import_promises.writeFile)(tempPath, buffer);
82
+ return await this.extractFromPath(tempPath, filename, userId);
83
+ } finally {
84
+ if ((0, import_fs.existsSync)(tempPath)) {
85
+ (0, import_promises.unlink)(tempPath).catch(() => {
86
+ });
87
+ }
88
+ }
89
+ }
90
+ /**
91
+ * Concatenate all page texts for a DocPixie document, ordered by page number.
92
+ * Queries the docpixie_pages collection directly.
93
+ */
94
+ async getPageTexts(documentId) {
95
+ const pageRepo = this.db.getRepository("docpixie_pages");
96
+ const pages = await pageRepo.find({
97
+ filter: { documentId },
98
+ sort: ["pageNumber"]
99
+ });
100
+ return pages.map((p) => p.get("structuredText") || "").filter(Boolean).join("\n\n");
101
+ }
102
+ /**
103
+ * Fetch the summary + page texts for a set of DocPixie document IDs.
104
+ * Used in Stage 2 deep retrieval during AI chat work context.
105
+ *
106
+ * Returns a formatted string ready to be injected into the AI prompt.
107
+ * Caps at maxDocs documents to avoid blowing the context window.
108
+ */
109
+ async buildDeepContext(documentIds, maxDocs = 3) {
110
+ if (!documentIds.length) return "";
111
+ const docRepo = this.db.getRepository("docpixie_documents");
112
+ const ids = documentIds.slice(0, maxDocs);
113
+ const parts = [];
114
+ for (const id of ids) {
115
+ try {
116
+ const docRecord = await docRepo.findOne({ filter: { id } });
117
+ if (!docRecord) continue;
118
+ const status = docRecord.get("status");
119
+ const name = docRecord.get("name");
120
+ const summary = docRecord.get("summary");
121
+ if (status !== "ready") continue;
122
+ const pageText = await this.getPageTexts(id);
123
+ if (!pageText && !summary) continue;
124
+ const content = pageText || summary || "";
125
+ parts.push(
126
+ `<docpixie_document id="${id}" name="${name}">
127
+ ${content}
128
+ </docpixie_document>`
129
+ );
130
+ } catch {
131
+ }
132
+ }
133
+ return parts.join("\n\n");
134
+ }
135
+ }
136
+ // Annotate the CommonJS export names for ESM import in node:
137
+ 0 && (module.exports = {
138
+ DocPixieExtractor
139
+ });
package/package.json CHANGED
@@ -6,7 +6,7 @@
6
6
  "description": "Provides Knowledge Base management, Vector Store, Vector Database (PGVector), and RAG retrieval capabilities for AI Employees.",
7
7
  "description.vi-VN": "Cung cấp quản lý Cơ sở tri thức, Vector Store, Vector Database (PGVector), và khả năng truy xuất RAG cho nhân viên AI.",
8
8
  "description.zh-CN": "为 AI 员工提供知识库管理、向量存储、向量数据库 (PGVector) 和 RAG 检索功能。",
9
- "version": "1.1.2",
9
+ "version": "1.1.4",
10
10
  "license": "Apache-2.0",
11
11
  "main": "./dist/server/index.js",
12
12
  "files": [
@@ -18,18 +18,20 @@
18
18
  "README.md"
19
19
  ],
20
20
  "dependencies": {
21
- "pg": "^8.13.0"
21
+ "pg": "^8.13.0",
22
+ "@langchain/community": "^1.1.0",
23
+ "@langchain/core": "^1.1.24",
24
+ "@langchain/textsplitters": "^0.1.0"
22
25
  },
23
26
  "peerDependencies": {
24
27
  "@nocobase/client": "2.x",
25
28
  "@nocobase/server": "2.x",
26
29
  "@nocobase/database": "2.x",
27
30
  "@nocobase/plugin-ai": "2.x",
28
- "@nocobase/plugin-file-manager": "2.x",
29
- "@nocobase/test": "2.x",
30
- "@langchain/community": "^1.1.0",
31
- "@langchain/core": "^1.1.24",
32
- "@langchain/textsplitters": "^0.1.0"
31
+ "@nocobase/plugin-file-manager": "2.x"
32
+ },
33
+ "devDependencies": {
34
+ "@nocobase/test": "2.x"
33
35
  },
34
36
  "keywords": [
35
37
  "AI",
package/README.md DELETED
@@ -1,35 +0,0 @@
1
- # plugin-knowledge-base
2
-
3
- AI Knowledge Base plugin for NocoBase. Provides RAG (Retrieval Augmented Generation) capabilities for AI Employees.
4
-
5
- ## Features
6
-
7
- - **Knowledge Base Management** — Create and manage knowledge bases with document upload
8
- - **Access Control (ACL)** — 4 access levels: Private, Basic, Shared (role-based), Public
9
- - **Shared mode**: `allowedRoles` (read/search) and `uploadRoles` (upload documents) per KB
10
- - Permission enforcement on all operations: list, create, destroy, reprocess, RAG search
11
- - **Vector Store** — Bind Embedding models with Vector Databases
12
- - **Vector Database** — PGVector connection management with connection testing
13
- - **Document Processing** — Automatic document parsing, chunking, and vectorization
14
- - File upload (.pdf, .txt, .md, .doc, .docx, .ppt, .csv, .json)
15
- - Paste text documents directly
16
- - Auto-delete source file option after successful embedding
17
- - **RAG Retrieval** — Semantic search for AI Employee conversations via Work Context
18
- - **KB Management Popup in Chat** — Users can browse, upload, and select KBs directly from AI chat
19
- - Left sidebar with KB list and access level badges
20
- - Document table with status, chunk count, and actions
21
- - File upload and drag-and-drop support
22
- - "Select for Chat" toggle for RAG context
23
-
24
- ## Requirements
25
-
26
- - NocoBase 2.x
27
- - Plugin AI (`@nocobase/plugin-ai`) enabled
28
- - PostgreSQL with `pgvector` extension (for PGVector provider)
29
-
30
- ## Installation
31
-
32
- ```bash
33
- yarn pm add plugin-knowledge-base
34
- yarn pm enable plugin-knowledge-base
35
- ```