@elizaos/plugin-knowledge 1.5.13 → 1.5.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +34 -42
- package/dist/index.js.map +1 -1
- package/package.json +3 -3
package/dist/index.js
CHANGED
|
@@ -20,16 +20,16 @@ import {
|
|
|
20
20
|
splitChunks
|
|
21
21
|
} from "@elizaos/core";
|
|
22
22
|
|
|
23
|
-
// node_modules/uuid/dist/
|
|
23
|
+
// node_modules/uuid/dist-node/regex.js
|
|
24
24
|
var regex_default = /^(?:[0-9a-f]{8}-[0-9a-f]{4}-[1-8][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}|00000000-0000-0000-0000-000000000000|ffffffff-ffff-ffff-ffff-ffffffffffff)$/i;
|
|
25
25
|
|
|
26
|
-
// node_modules/uuid/dist/
|
|
26
|
+
// node_modules/uuid/dist-node/validate.js
|
|
27
27
|
function validate(uuid) {
|
|
28
28
|
return typeof uuid === "string" && regex_default.test(uuid);
|
|
29
29
|
}
|
|
30
30
|
var validate_default = validate;
|
|
31
31
|
|
|
32
|
-
// node_modules/uuid/dist/
|
|
32
|
+
// node_modules/uuid/dist-node/parse.js
|
|
33
33
|
function parse(uuid) {
|
|
34
34
|
if (!validate_default(uuid)) {
|
|
35
35
|
throw TypeError("Invalid UUID");
|
|
@@ -39,7 +39,7 @@ function parse(uuid) {
|
|
|
39
39
|
}
|
|
40
40
|
var parse_default = parse;
|
|
41
41
|
|
|
42
|
-
// node_modules/uuid/dist/
|
|
42
|
+
// node_modules/uuid/dist-node/stringify.js
|
|
43
43
|
var byteToHex = [];
|
|
44
44
|
for (let i = 0; i < 256; ++i) {
|
|
45
45
|
byteToHex.push((i + 256).toString(16).slice(1));
|
|
@@ -48,7 +48,7 @@ function unsafeStringify(arr, offset = 0) {
|
|
|
48
48
|
return (byteToHex[arr[offset + 0]] + byteToHex[arr[offset + 1]] + byteToHex[arr[offset + 2]] + byteToHex[arr[offset + 3]] + "-" + byteToHex[arr[offset + 4]] + byteToHex[arr[offset + 5]] + "-" + byteToHex[arr[offset + 6]] + byteToHex[arr[offset + 7]] + "-" + byteToHex[arr[offset + 8]] + byteToHex[arr[offset + 9]] + "-" + byteToHex[arr[offset + 10]] + byteToHex[arr[offset + 11]] + byteToHex[arr[offset + 12]] + byteToHex[arr[offset + 13]] + byteToHex[arr[offset + 14]] + byteToHex[arr[offset + 15]]).toLowerCase();
|
|
49
49
|
}
|
|
50
50
|
|
|
51
|
-
// node_modules/uuid/dist/
|
|
51
|
+
// node_modules/uuid/dist-node/rng.js
|
|
52
52
|
import { randomFillSync } from "crypto";
|
|
53
53
|
var rnds8Pool = new Uint8Array(256);
|
|
54
54
|
var poolPtr = rnds8Pool.length;
|
|
@@ -60,7 +60,7 @@ function rng() {
|
|
|
60
60
|
return rnds8Pool.slice(poolPtr, poolPtr += 16);
|
|
61
61
|
}
|
|
62
62
|
|
|
63
|
-
// node_modules/uuid/dist/
|
|
63
|
+
// node_modules/uuid/dist-node/v35.js
|
|
64
64
|
function stringToBytes(str) {
|
|
65
65
|
str = unescape(encodeURIComponent(str));
|
|
66
66
|
const bytes = new Uint8Array(str.length);
|
|
@@ -96,15 +96,12 @@ function v35(version, hash, value, namespace, buf, offset) {
|
|
|
96
96
|
return unsafeStringify(bytes);
|
|
97
97
|
}
|
|
98
98
|
|
|
99
|
-
// node_modules/uuid/dist/
|
|
99
|
+
// node_modules/uuid/dist-node/native.js
|
|
100
100
|
import { randomUUID } from "crypto";
|
|
101
101
|
var native_default = { randomUUID };
|
|
102
102
|
|
|
103
|
-
// node_modules/uuid/dist/
|
|
104
|
-
function
|
|
105
|
-
if (native_default.randomUUID && !buf && !options) {
|
|
106
|
-
return native_default.randomUUID();
|
|
107
|
-
}
|
|
103
|
+
// node_modules/uuid/dist-node/v4.js
|
|
104
|
+
function _v4(options, buf, offset) {
|
|
108
105
|
options = options || {};
|
|
109
106
|
const rnds = options.random ?? options.rng?.() ?? rng();
|
|
110
107
|
if (rnds.length < 16) {
|
|
@@ -124,9 +121,15 @@ function v4(options, buf, offset) {
|
|
|
124
121
|
}
|
|
125
122
|
return unsafeStringify(rnds);
|
|
126
123
|
}
|
|
124
|
+
function v4(options, buf, offset) {
|
|
125
|
+
if (native_default.randomUUID && !buf && !options) {
|
|
126
|
+
return native_default.randomUUID();
|
|
127
|
+
}
|
|
128
|
+
return _v4(options, buf, offset);
|
|
129
|
+
}
|
|
127
130
|
var v4_default = v4;
|
|
128
131
|
|
|
129
|
-
// node_modules/uuid/dist/
|
|
132
|
+
// node_modules/uuid/dist-node/sha1.js
|
|
130
133
|
import { createHash } from "crypto";
|
|
131
134
|
function sha1(bytes) {
|
|
132
135
|
if (Array.isArray(bytes)) {
|
|
@@ -138,7 +141,7 @@ function sha1(bytes) {
|
|
|
138
141
|
}
|
|
139
142
|
var sha1_default = sha1;
|
|
140
143
|
|
|
141
|
-
// node_modules/uuid/dist/
|
|
144
|
+
// node_modules/uuid/dist-node/v5.js
|
|
142
145
|
function v5(value, namespace, buf, offset) {
|
|
143
146
|
return v35(80, sha1_default, value, namespace, buf, offset);
|
|
144
147
|
}
|
|
@@ -1083,7 +1086,7 @@ function logCacheMetrics(result) {
|
|
|
1083
1086
|
import { Buffer as Buffer2 } from "buffer";
|
|
1084
1087
|
import * as mammoth from "mammoth";
|
|
1085
1088
|
import { logger as logger3 } from "@elizaos/core";
|
|
1086
|
-
import {
|
|
1089
|
+
import { extractText } from "unpdf";
|
|
1087
1090
|
import { createHash as createHash2 } from "crypto";
|
|
1088
1091
|
var PLAIN_TEXT_CONTENT_TYPES = [
|
|
1089
1092
|
"application/typescript",
|
|
@@ -1162,32 +1165,24 @@ This document was indexed for search but cannot be displayed directly in the bro
|
|
|
1162
1165
|
}
|
|
1163
1166
|
async function convertPdfToTextFromBuffer(pdfBuffer, filename) {
|
|
1164
1167
|
const docName = filename || "unnamed-document";
|
|
1165
|
-
logger3.debug(`[PdfService] Starting conversion for ${docName}`);
|
|
1168
|
+
logger3.debug(`[PdfService] Starting conversion for ${docName} using unpdf`);
|
|
1166
1169
|
try {
|
|
1167
|
-
const uint8Array = new Uint8Array(
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
const
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
const yPos = Math.round(item.transform[5]);
|
|
1178
|
-
if (!lineMap.has(yPos)) {
|
|
1179
|
-
lineMap.set(yPos, []);
|
|
1180
|
-
}
|
|
1181
|
-
lineMap.get(yPos).push(item);
|
|
1182
|
-
});
|
|
1183
|
-
const sortedLines = Array.from(lineMap.entries()).sort((a, b) => b[0] - a[0]).map(
|
|
1184
|
-
([_, items]) => items.sort((a, b) => a.transform[4] - b.transform[4]).map((item) => item.str).join(" ")
|
|
1185
|
-
);
|
|
1186
|
-
textPages.push(sortedLines.join("\n"));
|
|
1170
|
+
const uint8Array = new Uint8Array(
|
|
1171
|
+
pdfBuffer.buffer.slice(pdfBuffer.byteOffset, pdfBuffer.byteOffset + pdfBuffer.byteLength)
|
|
1172
|
+
);
|
|
1173
|
+
const result = await extractText(uint8Array, {
|
|
1174
|
+
mergePages: true
|
|
1175
|
+
// Merge all pages into a single string
|
|
1176
|
+
});
|
|
1177
|
+
if (!result.text || result.text.trim().length === 0) {
|
|
1178
|
+
logger3.warn(`[PdfService] No text extracted from ${docName}`);
|
|
1179
|
+
return "";
|
|
1187
1180
|
}
|
|
1188
|
-
const
|
|
1189
|
-
logger3.debug(
|
|
1190
|
-
|
|
1181
|
+
const cleanedText = result.text.split("\n").map((line) => line.trim()).filter((line) => line.length > 0).join("\n").replace(/\n{3,}/g, "\n\n");
|
|
1182
|
+
logger3.debug(
|
|
1183
|
+
`[PdfService] Conversion complete for ${docName}, ${result.totalPages} pages, length: ${cleanedText.length}`
|
|
1184
|
+
);
|
|
1185
|
+
return cleanedText;
|
|
1191
1186
|
} catch (error) {
|
|
1192
1187
|
logger3.error(`[PdfService] Error converting PDF ${docName}:`, error.message);
|
|
1193
1188
|
throw new Error(`Failed to convert PDF to text: ${error.message}`);
|
|
@@ -1336,9 +1331,6 @@ function isBinaryContentType(contentType, filename) {
|
|
|
1336
1331
|
];
|
|
1337
1332
|
return binaryExtensions.includes(fileExt);
|
|
1338
1333
|
}
|
|
1339
|
-
function isTextItem(item) {
|
|
1340
|
-
return "str" in item;
|
|
1341
|
-
}
|
|
1342
1334
|
function normalizeS3Url(url) {
|
|
1343
1335
|
try {
|
|
1344
1336
|
const urlObj = new URL(url);
|