@elizaos/plugin-knowledge 1.0.11 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/.vite/manifest.json +2 -2
- package/dist/assets/{index-CzI8hR5q.css → index-B5VEkqpw.css} +1 -1
- package/dist/assets/index-DlOvU00u.js +169 -0
- package/dist/index.d.ts +15 -40
- package/dist/index.html +2 -2
- package/dist/index.js +968 -340
- package/dist/index.js.map +1 -1
- package/package.json +20 -22
- package/dist/assets/index-DimDNB3w.js +0 -160
- package/dist/chunk-RFXW7QQK.js +0 -695
- package/dist/chunk-RFXW7QQK.js.map +0 -1
- package/dist/docs-loader-5H4HRYEE.js +0 -9
- package/dist/docs-loader-5H4HRYEE.js.map +0 -1
package/dist/chunk-RFXW7QQK.js
DELETED
|
@@ -1,695 +0,0 @@
|
|
|
1
|
-
// src/docs-loader.ts
|
|
2
|
-
import { logger as logger2 } from "@elizaos/core";
|
|
3
|
-
import * as fs from "fs";
|
|
4
|
-
import * as path from "path";
|
|
5
|
-
|
|
6
|
-
// src/utils.ts
|
|
7
|
-
import { Buffer as Buffer2 } from "buffer";
|
|
8
|
-
import * as mammoth from "mammoth";
|
|
9
|
-
import { logger } from "@elizaos/core";
|
|
10
|
-
import { getDocument } from "pdfjs-dist/legacy/build/pdf.mjs";
|
|
11
|
-
import { createHash as createHash2 } from "crypto";
|
|
12
|
-
|
|
13
|
-
// node_modules/uuid/dist/esm/regex.js
|
|
14
|
-
var regex_default = /^(?:[0-9a-f]{8}-[0-9a-f]{4}-[1-8][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}|00000000-0000-0000-0000-000000000000|ffffffff-ffff-ffff-ffff-ffffffffffff)$/i;
|
|
15
|
-
|
|
16
|
-
// node_modules/uuid/dist/esm/validate.js
|
|
17
|
-
function validate(uuid) {
|
|
18
|
-
return typeof uuid === "string" && regex_default.test(uuid);
|
|
19
|
-
}
|
|
20
|
-
var validate_default = validate;
|
|
21
|
-
|
|
22
|
-
// node_modules/uuid/dist/esm/parse.js
|
|
23
|
-
function parse(uuid) {
|
|
24
|
-
if (!validate_default(uuid)) {
|
|
25
|
-
throw TypeError("Invalid UUID");
|
|
26
|
-
}
|
|
27
|
-
let v;
|
|
28
|
-
return Uint8Array.of((v = parseInt(uuid.slice(0, 8), 16)) >>> 24, v >>> 16 & 255, v >>> 8 & 255, v & 255, (v = parseInt(uuid.slice(9, 13), 16)) >>> 8, v & 255, (v = parseInt(uuid.slice(14, 18), 16)) >>> 8, v & 255, (v = parseInt(uuid.slice(19, 23), 16)) >>> 8, v & 255, (v = parseInt(uuid.slice(24, 36), 16)) / 1099511627776 & 255, v / 4294967296 & 255, v >>> 24 & 255, v >>> 16 & 255, v >>> 8 & 255, v & 255);
|
|
29
|
-
}
|
|
30
|
-
var parse_default = parse;
|
|
31
|
-
|
|
32
|
-
// node_modules/uuid/dist/esm/stringify.js
|
|
33
|
-
var byteToHex = [];
|
|
34
|
-
for (let i = 0; i < 256; ++i) {
|
|
35
|
-
byteToHex.push((i + 256).toString(16).slice(1));
|
|
36
|
-
}
|
|
37
|
-
function unsafeStringify(arr, offset = 0) {
|
|
38
|
-
return (byteToHex[arr[offset + 0]] + byteToHex[arr[offset + 1]] + byteToHex[arr[offset + 2]] + byteToHex[arr[offset + 3]] + "-" + byteToHex[arr[offset + 4]] + byteToHex[arr[offset + 5]] + "-" + byteToHex[arr[offset + 6]] + byteToHex[arr[offset + 7]] + "-" + byteToHex[arr[offset + 8]] + byteToHex[arr[offset + 9]] + "-" + byteToHex[arr[offset + 10]] + byteToHex[arr[offset + 11]] + byteToHex[arr[offset + 12]] + byteToHex[arr[offset + 13]] + byteToHex[arr[offset + 14]] + byteToHex[arr[offset + 15]]).toLowerCase();
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
// node_modules/uuid/dist/esm/rng.js
|
|
42
|
-
import { randomFillSync } from "crypto";
|
|
43
|
-
var rnds8Pool = new Uint8Array(256);
|
|
44
|
-
var poolPtr = rnds8Pool.length;
|
|
45
|
-
function rng() {
|
|
46
|
-
if (poolPtr > rnds8Pool.length - 16) {
|
|
47
|
-
randomFillSync(rnds8Pool);
|
|
48
|
-
poolPtr = 0;
|
|
49
|
-
}
|
|
50
|
-
return rnds8Pool.slice(poolPtr, poolPtr += 16);
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
// node_modules/uuid/dist/esm/v35.js
|
|
54
|
-
function stringToBytes(str) {
|
|
55
|
-
str = unescape(encodeURIComponent(str));
|
|
56
|
-
const bytes = new Uint8Array(str.length);
|
|
57
|
-
for (let i = 0; i < str.length; ++i) {
|
|
58
|
-
bytes[i] = str.charCodeAt(i);
|
|
59
|
-
}
|
|
60
|
-
return bytes;
|
|
61
|
-
}
|
|
62
|
-
var DNS = "6ba7b810-9dad-11d1-80b4-00c04fd430c8";
|
|
63
|
-
var URL2 = "6ba7b811-9dad-11d1-80b4-00c04fd430c8";
|
|
64
|
-
function v35(version, hash, value, namespace, buf, offset) {
|
|
65
|
-
const valueBytes = typeof value === "string" ? stringToBytes(value) : value;
|
|
66
|
-
const namespaceBytes = typeof namespace === "string" ? parse_default(namespace) : namespace;
|
|
67
|
-
if (typeof namespace === "string") {
|
|
68
|
-
namespace = parse_default(namespace);
|
|
69
|
-
}
|
|
70
|
-
if (namespace?.length !== 16) {
|
|
71
|
-
throw TypeError("Namespace must be array-like (16 iterable integer values, 0-255)");
|
|
72
|
-
}
|
|
73
|
-
let bytes = new Uint8Array(16 + valueBytes.length);
|
|
74
|
-
bytes.set(namespaceBytes);
|
|
75
|
-
bytes.set(valueBytes, namespaceBytes.length);
|
|
76
|
-
bytes = hash(bytes);
|
|
77
|
-
bytes[6] = bytes[6] & 15 | version;
|
|
78
|
-
bytes[8] = bytes[8] & 63 | 128;
|
|
79
|
-
if (buf) {
|
|
80
|
-
offset = offset || 0;
|
|
81
|
-
for (let i = 0; i < 16; ++i) {
|
|
82
|
-
buf[offset + i] = bytes[i];
|
|
83
|
-
}
|
|
84
|
-
return buf;
|
|
85
|
-
}
|
|
86
|
-
return unsafeStringify(bytes);
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
// node_modules/uuid/dist/esm/native.js
|
|
90
|
-
import { randomUUID } from "crypto";
|
|
91
|
-
var native_default = { randomUUID };
|
|
92
|
-
|
|
93
|
-
// node_modules/uuid/dist/esm/v4.js
|
|
94
|
-
function v4(options, buf, offset) {
|
|
95
|
-
if (native_default.randomUUID && !buf && !options) {
|
|
96
|
-
return native_default.randomUUID();
|
|
97
|
-
}
|
|
98
|
-
options = options || {};
|
|
99
|
-
const rnds = options.random ?? options.rng?.() ?? rng();
|
|
100
|
-
if (rnds.length < 16) {
|
|
101
|
-
throw new Error("Random bytes length must be >= 16");
|
|
102
|
-
}
|
|
103
|
-
rnds[6] = rnds[6] & 15 | 64;
|
|
104
|
-
rnds[8] = rnds[8] & 63 | 128;
|
|
105
|
-
if (buf) {
|
|
106
|
-
offset = offset || 0;
|
|
107
|
-
if (offset < 0 || offset + 16 > buf.length) {
|
|
108
|
-
throw new RangeError(`UUID byte range ${offset}:${offset + 15} is out of buffer bounds`);
|
|
109
|
-
}
|
|
110
|
-
for (let i = 0; i < 16; ++i) {
|
|
111
|
-
buf[offset + i] = rnds[i];
|
|
112
|
-
}
|
|
113
|
-
return buf;
|
|
114
|
-
}
|
|
115
|
-
return unsafeStringify(rnds);
|
|
116
|
-
}
|
|
117
|
-
var v4_default = v4;
|
|
118
|
-
|
|
119
|
-
// node_modules/uuid/dist/esm/sha1.js
|
|
120
|
-
import { createHash } from "crypto";
|
|
121
|
-
function sha1(bytes) {
|
|
122
|
-
if (Array.isArray(bytes)) {
|
|
123
|
-
bytes = Buffer.from(bytes);
|
|
124
|
-
} else if (typeof bytes === "string") {
|
|
125
|
-
bytes = Buffer.from(bytes, "utf8");
|
|
126
|
-
}
|
|
127
|
-
return createHash("sha1").update(bytes).digest();
|
|
128
|
-
}
|
|
129
|
-
var sha1_default = sha1;
|
|
130
|
-
|
|
131
|
-
// node_modules/uuid/dist/esm/v5.js
|
|
132
|
-
function v5(value, namespace, buf, offset) {
|
|
133
|
-
return v35(80, sha1_default, value, namespace, buf, offset);
|
|
134
|
-
}
|
|
135
|
-
v5.DNS = DNS;
|
|
136
|
-
v5.URL = URL2;
|
|
137
|
-
var v5_default = v5;
|
|
138
|
-
|
|
139
|
-
// src/utils.ts
|
|
140
|
-
var PLAIN_TEXT_CONTENT_TYPES = [
|
|
141
|
-
"application/typescript",
|
|
142
|
-
"text/typescript",
|
|
143
|
-
"text/x-python",
|
|
144
|
-
"application/x-python-code",
|
|
145
|
-
"application/yaml",
|
|
146
|
-
"text/yaml",
|
|
147
|
-
"application/x-yaml",
|
|
148
|
-
"application/json",
|
|
149
|
-
"text/markdown",
|
|
150
|
-
"text/csv"
|
|
151
|
-
];
|
|
152
|
-
var MAX_FALLBACK_SIZE_BYTES = 5 * 1024 * 1024;
|
|
153
|
-
var BINARY_CHECK_BYTES = 1024;
|
|
154
|
-
async function extractTextFromFileBuffer(fileBuffer, contentType, originalFilename) {
|
|
155
|
-
const lowerContentType = contentType.toLowerCase();
|
|
156
|
-
logger.debug(
|
|
157
|
-
`[TextUtil] Attempting to extract text from ${originalFilename} (type: ${contentType})`
|
|
158
|
-
);
|
|
159
|
-
if (lowerContentType === "application/vnd.openxmlformats-officedocument.wordprocessingml.document") {
|
|
160
|
-
logger.debug(`[TextUtil] Extracting text from DOCX ${originalFilename} via mammoth.`);
|
|
161
|
-
try {
|
|
162
|
-
const result = await mammoth.extractRawText({ buffer: fileBuffer });
|
|
163
|
-
logger.debug(
|
|
164
|
-
`[TextUtil] DOCX text extraction complete for ${originalFilename}. Text length: ${result.value.length}`
|
|
165
|
-
);
|
|
166
|
-
return result.value;
|
|
167
|
-
} catch (docxError) {
|
|
168
|
-
const errorMsg = `[TextUtil] Failed to parse DOCX file ${originalFilename}: ${docxError.message}`;
|
|
169
|
-
logger.error(errorMsg, docxError.stack);
|
|
170
|
-
throw new Error(errorMsg);
|
|
171
|
-
}
|
|
172
|
-
} else if (lowerContentType === "application/msword" || originalFilename.toLowerCase().endsWith(".doc")) {
|
|
173
|
-
logger.debug(`[TextUtil] Handling Microsoft Word .doc file: ${originalFilename}`);
|
|
174
|
-
return `[Microsoft Word Document: ${originalFilename}]
|
|
175
|
-
|
|
176
|
-
This document was indexed for search but cannot be displayed directly in the browser. The original document content is preserved for retrieval purposes.`;
|
|
177
|
-
} else if (lowerContentType.startsWith("text/") || PLAIN_TEXT_CONTENT_TYPES.includes(lowerContentType)) {
|
|
178
|
-
logger.debug(
|
|
179
|
-
`[TextUtil] Extracting text from plain text compatible file ${originalFilename} (type: ${contentType})`
|
|
180
|
-
);
|
|
181
|
-
return fileBuffer.toString("utf-8");
|
|
182
|
-
} else {
|
|
183
|
-
logger.warn(
|
|
184
|
-
`[TextUtil] Unsupported content type: "${contentType}" for ${originalFilename}. Attempting fallback to plain text.`
|
|
185
|
-
);
|
|
186
|
-
if (fileBuffer.length > MAX_FALLBACK_SIZE_BYTES) {
|
|
187
|
-
const sizeErrorMsg = `[TextUtil] File ${originalFilename} (type: ${contentType}) exceeds maximum size for fallback (${MAX_FALLBACK_SIZE_BYTES} bytes). Cannot process as plain text.`;
|
|
188
|
-
logger.error(sizeErrorMsg);
|
|
189
|
-
throw new Error(sizeErrorMsg);
|
|
190
|
-
}
|
|
191
|
-
const initialBytes = fileBuffer.subarray(0, Math.min(fileBuffer.length, BINARY_CHECK_BYTES));
|
|
192
|
-
if (initialBytes.includes(0)) {
|
|
193
|
-
const binaryHeuristicMsg = `[TextUtil] File ${originalFilename} (type: ${contentType}) appears to be binary based on initial byte check. Cannot process as plain text.`;
|
|
194
|
-
logger.error(binaryHeuristicMsg);
|
|
195
|
-
throw new Error(binaryHeuristicMsg);
|
|
196
|
-
}
|
|
197
|
-
try {
|
|
198
|
-
const textContent = fileBuffer.toString("utf-8");
|
|
199
|
-
if (textContent.includes("\uFFFD")) {
|
|
200
|
-
const binaryErrorMsg = `[TextUtil] File ${originalFilename} (type: ${contentType}) seems to be binary or has encoding issues after fallback to plain text (detected \uFFFD).`;
|
|
201
|
-
logger.error(binaryErrorMsg);
|
|
202
|
-
throw new Error(binaryErrorMsg);
|
|
203
|
-
}
|
|
204
|
-
logger.debug(
|
|
205
|
-
`[TextUtil] Successfully processed unknown type ${contentType} as plain text after fallback for ${originalFilename}.`
|
|
206
|
-
);
|
|
207
|
-
return textContent;
|
|
208
|
-
} catch (fallbackError) {
|
|
209
|
-
const finalErrorMsg = `[TextUtil] Unsupported content type: ${contentType} for ${originalFilename}. Fallback to plain text also failed or indicated binary content.`;
|
|
210
|
-
logger.error(finalErrorMsg, fallbackError.message ? fallbackError.stack : void 0);
|
|
211
|
-
throw new Error(finalErrorMsg);
|
|
212
|
-
}
|
|
213
|
-
}
|
|
214
|
-
}
|
|
215
|
-
async function convertPdfToTextFromBuffer(pdfBuffer, filename) {
|
|
216
|
-
const docName = filename || "unnamed-document";
|
|
217
|
-
logger.debug(`[PdfService] Starting conversion for ${docName}`);
|
|
218
|
-
try {
|
|
219
|
-
const uint8Array = new Uint8Array(pdfBuffer);
|
|
220
|
-
const pdf = await getDocument({ data: uint8Array }).promise;
|
|
221
|
-
const numPages = pdf.numPages;
|
|
222
|
-
const textPages = [];
|
|
223
|
-
for (let pageNum = 1; pageNum <= numPages; pageNum++) {
|
|
224
|
-
logger.debug(`[PdfService] Processing page ${pageNum}/${numPages}`);
|
|
225
|
-
const page = await pdf.getPage(pageNum);
|
|
226
|
-
const textContent = await page.getTextContent();
|
|
227
|
-
const lineMap = /* @__PURE__ */ new Map();
|
|
228
|
-
textContent.items.filter(isTextItem).forEach((item) => {
|
|
229
|
-
const yPos = Math.round(item.transform[5]);
|
|
230
|
-
if (!lineMap.has(yPos)) {
|
|
231
|
-
lineMap.set(yPos, []);
|
|
232
|
-
}
|
|
233
|
-
lineMap.get(yPos).push(item);
|
|
234
|
-
});
|
|
235
|
-
const sortedLines = Array.from(lineMap.entries()).sort((a, b) => b[0] - a[0]).map(
|
|
236
|
-
([_, items]) => items.sort((a, b) => a.transform[4] - b.transform[4]).map((item) => item.str).join(" ")
|
|
237
|
-
);
|
|
238
|
-
textPages.push(sortedLines.join("\n"));
|
|
239
|
-
}
|
|
240
|
-
const fullText = textPages.join("\n\n").replace(/\s+/g, " ").trim();
|
|
241
|
-
logger.debug(`[PdfService] Conversion complete for ${docName}, length: ${fullText.length}`);
|
|
242
|
-
return fullText;
|
|
243
|
-
} catch (error) {
|
|
244
|
-
logger.error(`[PdfService] Error converting PDF ${docName}:`, error.message);
|
|
245
|
-
throw new Error(`Failed to convert PDF to text: ${error.message}`);
|
|
246
|
-
}
|
|
247
|
-
}
|
|
248
|
-
function isBinaryContentType(contentType, filename) {
|
|
249
|
-
const textContentTypes = [
|
|
250
|
-
"text/",
|
|
251
|
-
"application/json",
|
|
252
|
-
"application/xml",
|
|
253
|
-
"application/javascript",
|
|
254
|
-
"application/typescript",
|
|
255
|
-
"application/x-yaml",
|
|
256
|
-
"application/x-sh"
|
|
257
|
-
];
|
|
258
|
-
const isTextMimeType = textContentTypes.some((type) => contentType.includes(type));
|
|
259
|
-
if (isTextMimeType) {
|
|
260
|
-
return false;
|
|
261
|
-
}
|
|
262
|
-
const binaryContentTypes = [
|
|
263
|
-
"application/pdf",
|
|
264
|
-
"application/msword",
|
|
265
|
-
"application/vnd.openxmlformats-officedocument",
|
|
266
|
-
"application/vnd.ms-excel",
|
|
267
|
-
"application/vnd.ms-powerpoint",
|
|
268
|
-
"application/zip",
|
|
269
|
-
"application/x-zip-compressed",
|
|
270
|
-
"application/octet-stream",
|
|
271
|
-
"image/",
|
|
272
|
-
"audio/",
|
|
273
|
-
"video/"
|
|
274
|
-
];
|
|
275
|
-
const isBinaryMimeType = binaryContentTypes.some((type) => contentType.includes(type));
|
|
276
|
-
if (isBinaryMimeType) {
|
|
277
|
-
return true;
|
|
278
|
-
}
|
|
279
|
-
const fileExt = filename.split(".").pop()?.toLowerCase() || "";
|
|
280
|
-
const textExtensions = [
|
|
281
|
-
"txt",
|
|
282
|
-
"md",
|
|
283
|
-
"markdown",
|
|
284
|
-
"json",
|
|
285
|
-
"xml",
|
|
286
|
-
"html",
|
|
287
|
-
"htm",
|
|
288
|
-
"css",
|
|
289
|
-
"js",
|
|
290
|
-
"ts",
|
|
291
|
-
"jsx",
|
|
292
|
-
"tsx",
|
|
293
|
-
"yaml",
|
|
294
|
-
"yml",
|
|
295
|
-
"toml",
|
|
296
|
-
"ini",
|
|
297
|
-
"cfg",
|
|
298
|
-
"conf",
|
|
299
|
-
"sh",
|
|
300
|
-
"bash",
|
|
301
|
-
"zsh",
|
|
302
|
-
"fish",
|
|
303
|
-
"py",
|
|
304
|
-
"rb",
|
|
305
|
-
"go",
|
|
306
|
-
"rs",
|
|
307
|
-
"java",
|
|
308
|
-
"c",
|
|
309
|
-
"cpp",
|
|
310
|
-
"h",
|
|
311
|
-
"hpp",
|
|
312
|
-
"cs",
|
|
313
|
-
"php",
|
|
314
|
-
"sql",
|
|
315
|
-
"r",
|
|
316
|
-
"swift",
|
|
317
|
-
"kt",
|
|
318
|
-
"scala",
|
|
319
|
-
"clj",
|
|
320
|
-
"ex",
|
|
321
|
-
"exs",
|
|
322
|
-
"vim",
|
|
323
|
-
"env",
|
|
324
|
-
"gitignore",
|
|
325
|
-
"dockerignore",
|
|
326
|
-
"editorconfig",
|
|
327
|
-
"log",
|
|
328
|
-
"csv",
|
|
329
|
-
"tsv",
|
|
330
|
-
"properties",
|
|
331
|
-
"gradle",
|
|
332
|
-
"sbt",
|
|
333
|
-
"makefile",
|
|
334
|
-
"dockerfile",
|
|
335
|
-
"vagrantfile",
|
|
336
|
-
"gemfile",
|
|
337
|
-
"rakefile",
|
|
338
|
-
"podfile",
|
|
339
|
-
"csproj",
|
|
340
|
-
"vbproj",
|
|
341
|
-
"fsproj",
|
|
342
|
-
"sln",
|
|
343
|
-
"pom"
|
|
344
|
-
];
|
|
345
|
-
if (textExtensions.includes(fileExt)) {
|
|
346
|
-
return false;
|
|
347
|
-
}
|
|
348
|
-
const binaryExtensions = [
|
|
349
|
-
"pdf",
|
|
350
|
-
"docx",
|
|
351
|
-
"doc",
|
|
352
|
-
"xls",
|
|
353
|
-
"xlsx",
|
|
354
|
-
"ppt",
|
|
355
|
-
"pptx",
|
|
356
|
-
"zip",
|
|
357
|
-
"rar",
|
|
358
|
-
"7z",
|
|
359
|
-
"tar",
|
|
360
|
-
"gz",
|
|
361
|
-
"bz2",
|
|
362
|
-
"xz",
|
|
363
|
-
"jpg",
|
|
364
|
-
"jpeg",
|
|
365
|
-
"png",
|
|
366
|
-
"gif",
|
|
367
|
-
"bmp",
|
|
368
|
-
"svg",
|
|
369
|
-
"ico",
|
|
370
|
-
"webp",
|
|
371
|
-
"mp3",
|
|
372
|
-
"mp4",
|
|
373
|
-
"avi",
|
|
374
|
-
"mov",
|
|
375
|
-
"wmv",
|
|
376
|
-
"flv",
|
|
377
|
-
"wav",
|
|
378
|
-
"flac",
|
|
379
|
-
"ogg",
|
|
380
|
-
"exe",
|
|
381
|
-
"dll",
|
|
382
|
-
"so",
|
|
383
|
-
"dylib",
|
|
384
|
-
"bin",
|
|
385
|
-
"dat",
|
|
386
|
-
"db",
|
|
387
|
-
"sqlite"
|
|
388
|
-
];
|
|
389
|
-
return binaryExtensions.includes(fileExt);
|
|
390
|
-
}
|
|
391
|
-
function isTextItem(item) {
|
|
392
|
-
return "str" in item;
|
|
393
|
-
}
|
|
394
|
-
function normalizeS3Url(url) {
|
|
395
|
-
try {
|
|
396
|
-
const urlObj = new URL(url);
|
|
397
|
-
return `${urlObj.origin}${urlObj.pathname}`;
|
|
398
|
-
} catch (error) {
|
|
399
|
-
logger.warn(`[URL NORMALIZER] Failed to parse URL: ${url}. Returning original.`);
|
|
400
|
-
return url;
|
|
401
|
-
}
|
|
402
|
-
}
|
|
403
|
-
async function fetchUrlContent(url) {
|
|
404
|
-
logger.debug(`[URL FETCHER] Fetching content from URL: ${url}`);
|
|
405
|
-
try {
|
|
406
|
-
const controller = new AbortController();
|
|
407
|
-
const timeoutId = setTimeout(() => controller.abort(), 3e4);
|
|
408
|
-
const response = await fetch(url, {
|
|
409
|
-
signal: controller.signal,
|
|
410
|
-
headers: {
|
|
411
|
-
"User-Agent": "Eliza-Knowledge-Plugin/1.0"
|
|
412
|
-
}
|
|
413
|
-
});
|
|
414
|
-
clearTimeout(timeoutId);
|
|
415
|
-
if (!response.ok) {
|
|
416
|
-
throw new Error(`Failed to fetch URL: ${response.status} ${response.statusText}`);
|
|
417
|
-
}
|
|
418
|
-
const contentType = response.headers.get("content-type") || "application/octet-stream";
|
|
419
|
-
logger.debug(`[URL FETCHER] Content type from server: ${contentType} for URL: ${url}`);
|
|
420
|
-
const arrayBuffer = await response.arrayBuffer();
|
|
421
|
-
const buffer = Buffer2.from(arrayBuffer);
|
|
422
|
-
const base64Content = buffer.toString("base64");
|
|
423
|
-
logger.debug(
|
|
424
|
-
`[URL FETCHER] Successfully fetched content from URL: ${url} (${buffer.length} bytes)`
|
|
425
|
-
);
|
|
426
|
-
return {
|
|
427
|
-
content: base64Content,
|
|
428
|
-
contentType
|
|
429
|
-
};
|
|
430
|
-
} catch (error) {
|
|
431
|
-
logger.error(`[URL FETCHER] Error fetching content from URL ${url}: ${error.message}`);
|
|
432
|
-
throw new Error(`Failed to fetch content from URL: ${error.message}`);
|
|
433
|
-
}
|
|
434
|
-
}
|
|
435
|
-
function looksLikeBase64(content) {
|
|
436
|
-
const base64Regex = /^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?$/;
|
|
437
|
-
return content && content.length > 0 && base64Regex.test(content.replace(/\s/g, "")) || false;
|
|
438
|
-
}
|
|
439
|
-
function generateContentBasedId(content, agentId, options) {
|
|
440
|
-
const {
|
|
441
|
-
maxChars = 2e3,
|
|
442
|
-
// Use first 2000 chars by default
|
|
443
|
-
includeFilename,
|
|
444
|
-
contentType
|
|
445
|
-
} = options || {};
|
|
446
|
-
let contentForHashing;
|
|
447
|
-
if (looksLikeBase64(content)) {
|
|
448
|
-
try {
|
|
449
|
-
const decoded = Buffer2.from(content, "base64").toString("utf8");
|
|
450
|
-
if (!decoded.includes("\uFFFD") || contentType?.includes("pdf")) {
|
|
451
|
-
contentForHashing = content.slice(0, maxChars);
|
|
452
|
-
} else {
|
|
453
|
-
contentForHashing = decoded.slice(0, maxChars);
|
|
454
|
-
}
|
|
455
|
-
} catch {
|
|
456
|
-
contentForHashing = content.slice(0, maxChars);
|
|
457
|
-
}
|
|
458
|
-
} else {
|
|
459
|
-
contentForHashing = content.slice(0, maxChars);
|
|
460
|
-
}
|
|
461
|
-
contentForHashing = contentForHashing.replace(/\r\n/g, "\n").replace(/\r/g, "\n").trim();
|
|
462
|
-
const componentsToHash = [
|
|
463
|
-
agentId,
|
|
464
|
-
// Namespace by agent
|
|
465
|
-
contentForHashing,
|
|
466
|
-
// The actual content
|
|
467
|
-
includeFilename || ""
|
|
468
|
-
// Optional filename for additional uniqueness
|
|
469
|
-
].filter(Boolean).join("::");
|
|
470
|
-
const hash = createHash2("sha256").update(componentsToHash).digest("hex");
|
|
471
|
-
const DOCUMENT_NAMESPACE = "6ba7b810-9dad-11d1-80b4-00c04fd430c8";
|
|
472
|
-
const uuid = v5_default(hash, DOCUMENT_NAMESPACE);
|
|
473
|
-
logger.debug(
|
|
474
|
-
`[generateContentBasedId] Generated UUID ${uuid} for document with content hash ${hash.slice(0, 8)}...`
|
|
475
|
-
);
|
|
476
|
-
return uuid;
|
|
477
|
-
}
|
|
478
|
-
|
|
479
|
-
// src/docs-loader.ts
|
|
480
|
-
function getKnowledgePath() {
|
|
481
|
-
const envPath = process.env.KNOWLEDGE_PATH;
|
|
482
|
-
if (envPath) {
|
|
483
|
-
const resolvedPath = path.resolve(envPath);
|
|
484
|
-
if (!fs.existsSync(resolvedPath)) {
|
|
485
|
-
logger2.warn(`Knowledge path from environment variable does not exist: ${resolvedPath}`);
|
|
486
|
-
logger2.warn("Please create the directory or update KNOWLEDGE_PATH environment variable");
|
|
487
|
-
}
|
|
488
|
-
return resolvedPath;
|
|
489
|
-
}
|
|
490
|
-
const defaultPath = path.join(process.cwd(), "docs");
|
|
491
|
-
if (!fs.existsSync(defaultPath)) {
|
|
492
|
-
logger2.info(`Default docs folder does not exist at: ${defaultPath}`);
|
|
493
|
-
logger2.info("To use the knowledge plugin, either:");
|
|
494
|
-
logger2.info('1. Create a "docs" folder in your project root');
|
|
495
|
-
logger2.info("2. Set KNOWLEDGE_PATH environment variable to your documents folder");
|
|
496
|
-
}
|
|
497
|
-
return defaultPath;
|
|
498
|
-
}
|
|
499
|
-
async function loadDocsFromPath(service, agentId, worldId) {
|
|
500
|
-
const docsPath = getKnowledgePath();
|
|
501
|
-
if (!fs.existsSync(docsPath)) {
|
|
502
|
-
logger2.warn(`Knowledge path does not exist: ${docsPath}`);
|
|
503
|
-
return { total: 0, successful: 0, failed: 0 };
|
|
504
|
-
}
|
|
505
|
-
logger2.info(`Loading documents from: ${docsPath}`);
|
|
506
|
-
const files = getAllFiles(docsPath);
|
|
507
|
-
if (files.length === 0) {
|
|
508
|
-
logger2.info("No files found in knowledge path");
|
|
509
|
-
return { total: 0, successful: 0, failed: 0 };
|
|
510
|
-
}
|
|
511
|
-
logger2.info(`Found ${files.length} files to process`);
|
|
512
|
-
let successful = 0;
|
|
513
|
-
let failed = 0;
|
|
514
|
-
for (const filePath of files) {
|
|
515
|
-
try {
|
|
516
|
-
const fileName = path.basename(filePath);
|
|
517
|
-
const fileExt = path.extname(filePath).toLowerCase();
|
|
518
|
-
if (fileName.startsWith(".")) {
|
|
519
|
-
continue;
|
|
520
|
-
}
|
|
521
|
-
const contentType = getContentType(fileExt);
|
|
522
|
-
if (!contentType) {
|
|
523
|
-
logger2.debug(`Skipping unsupported file type: ${filePath}`);
|
|
524
|
-
continue;
|
|
525
|
-
}
|
|
526
|
-
const fileBuffer = fs.readFileSync(filePath);
|
|
527
|
-
const isBinary = isBinaryContentType(contentType, fileName);
|
|
528
|
-
const content = isBinary ? fileBuffer.toString("base64") : fileBuffer.toString("utf-8");
|
|
529
|
-
const knowledgeOptions = {
|
|
530
|
-
clientDocumentId: "",
|
|
531
|
-
// Will be generated by the service based on content
|
|
532
|
-
contentType,
|
|
533
|
-
originalFilename: fileName,
|
|
534
|
-
worldId: worldId || agentId,
|
|
535
|
-
content,
|
|
536
|
-
roomId: agentId,
|
|
537
|
-
entityId: agentId
|
|
538
|
-
};
|
|
539
|
-
logger2.debug(`Processing document: ${fileName}`);
|
|
540
|
-
const result = await service.addKnowledge(knowledgeOptions);
|
|
541
|
-
logger2.info(`\u2705 "${fileName}": ${result.fragmentCount} fragments created`);
|
|
542
|
-
successful++;
|
|
543
|
-
} catch (error) {
|
|
544
|
-
logger2.error(`Failed to process file ${filePath}:`, error);
|
|
545
|
-
failed++;
|
|
546
|
-
}
|
|
547
|
-
}
|
|
548
|
-
logger2.info(
|
|
549
|
-
`Document loading complete: ${successful} successful, ${failed} failed out of ${files.length} total`
|
|
550
|
-
);
|
|
551
|
-
return {
|
|
552
|
-
total: files.length,
|
|
553
|
-
successful,
|
|
554
|
-
failed
|
|
555
|
-
};
|
|
556
|
-
}
|
|
557
|
-
function getAllFiles(dirPath, files = []) {
|
|
558
|
-
try {
|
|
559
|
-
const entries = fs.readdirSync(dirPath, { withFileTypes: true });
|
|
560
|
-
for (const entry of entries) {
|
|
561
|
-
const fullPath = path.join(dirPath, entry.name);
|
|
562
|
-
if (entry.isDirectory()) {
|
|
563
|
-
if (!["node_modules", ".git", ".vscode", "dist", "build"].includes(entry.name)) {
|
|
564
|
-
getAllFiles(fullPath, files);
|
|
565
|
-
}
|
|
566
|
-
} else if (entry.isFile()) {
|
|
567
|
-
files.push(fullPath);
|
|
568
|
-
}
|
|
569
|
-
}
|
|
570
|
-
} catch (error) {
|
|
571
|
-
logger2.error(`Error reading directory ${dirPath}:`, error);
|
|
572
|
-
}
|
|
573
|
-
return files;
|
|
574
|
-
}
|
|
575
|
-
function getContentType(extension) {
|
|
576
|
-
const contentTypes = {
|
|
577
|
-
// Text documents
|
|
578
|
-
".txt": "text/plain",
|
|
579
|
-
".md": "text/markdown",
|
|
580
|
-
".markdown": "text/markdown",
|
|
581
|
-
".tson": "text/plain",
|
|
582
|
-
".xml": "application/xml",
|
|
583
|
-
".csv": "text/csv",
|
|
584
|
-
".tsv": "text/tab-separated-values",
|
|
585
|
-
".log": "text/plain",
|
|
586
|
-
// Web files
|
|
587
|
-
".html": "text/html",
|
|
588
|
-
".htm": "text/html",
|
|
589
|
-
".css": "text/css",
|
|
590
|
-
".scss": "text/x-scss",
|
|
591
|
-
".sass": "text/x-sass",
|
|
592
|
-
".less": "text/x-less",
|
|
593
|
-
// JavaScript/TypeScript
|
|
594
|
-
".js": "text/javascript",
|
|
595
|
-
".jsx": "text/javascript",
|
|
596
|
-
".ts": "text/typescript",
|
|
597
|
-
".tsx": "text/typescript",
|
|
598
|
-
".mjs": "text/javascript",
|
|
599
|
-
".cjs": "text/javascript",
|
|
600
|
-
".vue": "text/x-vue",
|
|
601
|
-
".svelte": "text/x-svelte",
|
|
602
|
-
".astro": "text/x-astro",
|
|
603
|
-
// Python
|
|
604
|
-
".py": "text/x-python",
|
|
605
|
-
".pyw": "text/x-python",
|
|
606
|
-
".pyi": "text/x-python",
|
|
607
|
-
// Java/Kotlin/Scala
|
|
608
|
-
".java": "text/x-java",
|
|
609
|
-
".kt": "text/x-kotlin",
|
|
610
|
-
".kts": "text/x-kotlin",
|
|
611
|
-
".scala": "text/x-scala",
|
|
612
|
-
// C/C++/C#
|
|
613
|
-
".c": "text/x-c",
|
|
614
|
-
".cpp": "text/x-c++",
|
|
615
|
-
".cc": "text/x-c++",
|
|
616
|
-
".cxx": "text/x-c++",
|
|
617
|
-
".h": "text/x-c",
|
|
618
|
-
".hpp": "text/x-c++",
|
|
619
|
-
".cs": "text/x-csharp",
|
|
620
|
-
// Other languages
|
|
621
|
-
".php": "text/x-php",
|
|
622
|
-
".rb": "text/x-ruby",
|
|
623
|
-
".go": "text/x-go",
|
|
624
|
-
".rs": "text/x-rust",
|
|
625
|
-
".swift": "text/x-swift",
|
|
626
|
-
".r": "text/x-r",
|
|
627
|
-
".R": "text/x-r",
|
|
628
|
-
".m": "text/x-objectivec",
|
|
629
|
-
".mm": "text/x-objectivec",
|
|
630
|
-
".clj": "text/x-clojure",
|
|
631
|
-
".cljs": "text/x-clojure",
|
|
632
|
-
".ex": "text/x-elixir",
|
|
633
|
-
".exs": "text/x-elixir",
|
|
634
|
-
".lua": "text/x-lua",
|
|
635
|
-
".pl": "text/x-perl",
|
|
636
|
-
".pm": "text/x-perl",
|
|
637
|
-
".dart": "text/x-dart",
|
|
638
|
-
".hs": "text/x-haskell",
|
|
639
|
-
".elm": "text/x-elm",
|
|
640
|
-
".ml": "text/x-ocaml",
|
|
641
|
-
".fs": "text/x-fsharp",
|
|
642
|
-
".fsx": "text/x-fsharp",
|
|
643
|
-
".vb": "text/x-vb",
|
|
644
|
-
".pas": "text/x-pascal",
|
|
645
|
-
".d": "text/x-d",
|
|
646
|
-
".nim": "text/x-nim",
|
|
647
|
-
".zig": "text/x-zig",
|
|
648
|
-
".jl": "text/x-julia",
|
|
649
|
-
".tcl": "text/x-tcl",
|
|
650
|
-
".awk": "text/x-awk",
|
|
651
|
-
".sed": "text/x-sed",
|
|
652
|
-
// Shell scripts
|
|
653
|
-
".sh": "text/x-sh",
|
|
654
|
-
".bash": "text/x-sh",
|
|
655
|
-
".zsh": "text/x-sh",
|
|
656
|
-
".fish": "text/x-fish",
|
|
657
|
-
".ps1": "text/x-powershell",
|
|
658
|
-
".bat": "text/x-batch",
|
|
659
|
-
".cmd": "text/x-batch",
|
|
660
|
-
// Config files
|
|
661
|
-
".json": "application/json",
|
|
662
|
-
".yaml": "text/x-yaml",
|
|
663
|
-
".yml": "text/x-yaml",
|
|
664
|
-
".toml": "text/x-toml",
|
|
665
|
-
".ini": "text/x-ini",
|
|
666
|
-
".cfg": "text/x-ini",
|
|
667
|
-
".conf": "text/x-ini",
|
|
668
|
-
".env": "text/plain",
|
|
669
|
-
".gitignore": "text/plain",
|
|
670
|
-
".dockerignore": "text/plain",
|
|
671
|
-
".editorconfig": "text/plain",
|
|
672
|
-
".properties": "text/x-properties",
|
|
673
|
-
// Database
|
|
674
|
-
".sql": "text/x-sql",
|
|
675
|
-
// Binary documents
|
|
676
|
-
".pdf": "application/pdf",
|
|
677
|
-
".doc": "application/msword",
|
|
678
|
-
".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
|
679
|
-
};
|
|
680
|
-
return contentTypes[extension] || null;
|
|
681
|
-
}
|
|
682
|
-
|
|
683
|
-
export {
|
|
684
|
-
v4_default,
|
|
685
|
-
extractTextFromFileBuffer,
|
|
686
|
-
convertPdfToTextFromBuffer,
|
|
687
|
-
isBinaryContentType,
|
|
688
|
-
normalizeS3Url,
|
|
689
|
-
fetchUrlContent,
|
|
690
|
-
looksLikeBase64,
|
|
691
|
-
generateContentBasedId,
|
|
692
|
-
getKnowledgePath,
|
|
693
|
-
loadDocsFromPath
|
|
694
|
-
};
|
|
695
|
-
//# sourceMappingURL=chunk-RFXW7QQK.js.map
|