@elizaos/plugin-knowledge 1.5.13 → 1.5.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -20,16 +20,16 @@ import {
20
20
  splitChunks
21
21
  } from "@elizaos/core";
22
22
 
23
- // node_modules/uuid/dist/esm/regex.js
23
+ // node_modules/uuid/dist-node/regex.js
24
24
  var regex_default = /^(?:[0-9a-f]{8}-[0-9a-f]{4}-[1-8][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}|00000000-0000-0000-0000-000000000000|ffffffff-ffff-ffff-ffff-ffffffffffff)$/i;
25
25
 
26
- // node_modules/uuid/dist/esm/validate.js
26
+ // node_modules/uuid/dist-node/validate.js
27
27
  function validate(uuid) {
28
28
  return typeof uuid === "string" && regex_default.test(uuid);
29
29
  }
30
30
  var validate_default = validate;
31
31
 
32
- // node_modules/uuid/dist/esm/parse.js
32
+ // node_modules/uuid/dist-node/parse.js
33
33
  function parse(uuid) {
34
34
  if (!validate_default(uuid)) {
35
35
  throw TypeError("Invalid UUID");
@@ -39,7 +39,7 @@ function parse(uuid) {
39
39
  }
40
40
  var parse_default = parse;
41
41
 
42
- // node_modules/uuid/dist/esm/stringify.js
42
+ // node_modules/uuid/dist-node/stringify.js
43
43
  var byteToHex = [];
44
44
  for (let i = 0; i < 256; ++i) {
45
45
  byteToHex.push((i + 256).toString(16).slice(1));
@@ -48,7 +48,7 @@ function unsafeStringify(arr, offset = 0) {
48
48
  return (byteToHex[arr[offset + 0]] + byteToHex[arr[offset + 1]] + byteToHex[arr[offset + 2]] + byteToHex[arr[offset + 3]] + "-" + byteToHex[arr[offset + 4]] + byteToHex[arr[offset + 5]] + "-" + byteToHex[arr[offset + 6]] + byteToHex[arr[offset + 7]] + "-" + byteToHex[arr[offset + 8]] + byteToHex[arr[offset + 9]] + "-" + byteToHex[arr[offset + 10]] + byteToHex[arr[offset + 11]] + byteToHex[arr[offset + 12]] + byteToHex[arr[offset + 13]] + byteToHex[arr[offset + 14]] + byteToHex[arr[offset + 15]]).toLowerCase();
49
49
  }
50
50
 
51
- // node_modules/uuid/dist/esm/rng.js
51
+ // node_modules/uuid/dist-node/rng.js
52
52
  import { randomFillSync } from "crypto";
53
53
  var rnds8Pool = new Uint8Array(256);
54
54
  var poolPtr = rnds8Pool.length;
@@ -60,7 +60,7 @@ function rng() {
60
60
  return rnds8Pool.slice(poolPtr, poolPtr += 16);
61
61
  }
62
62
 
63
- // node_modules/uuid/dist/esm/v35.js
63
+ // node_modules/uuid/dist-node/v35.js
64
64
  function stringToBytes(str) {
65
65
  str = unescape(encodeURIComponent(str));
66
66
  const bytes = new Uint8Array(str.length);
@@ -96,15 +96,12 @@ function v35(version, hash, value, namespace, buf, offset) {
96
96
  return unsafeStringify(bytes);
97
97
  }
98
98
 
99
- // node_modules/uuid/dist/esm/native.js
99
+ // node_modules/uuid/dist-node/native.js
100
100
  import { randomUUID } from "crypto";
101
101
  var native_default = { randomUUID };
102
102
 
103
- // node_modules/uuid/dist/esm/v4.js
104
- function v4(options, buf, offset) {
105
- if (native_default.randomUUID && !buf && !options) {
106
- return native_default.randomUUID();
107
- }
103
+ // node_modules/uuid/dist-node/v4.js
104
+ function _v4(options, buf, offset) {
108
105
  options = options || {};
109
106
  const rnds = options.random ?? options.rng?.() ?? rng();
110
107
  if (rnds.length < 16) {
@@ -124,9 +121,15 @@ function v4(options, buf, offset) {
124
121
  }
125
122
  return unsafeStringify(rnds);
126
123
  }
124
+ function v4(options, buf, offset) {
125
+ if (native_default.randomUUID && !buf && !options) {
126
+ return native_default.randomUUID();
127
+ }
128
+ return _v4(options, buf, offset);
129
+ }
127
130
  var v4_default = v4;
128
131
 
129
- // node_modules/uuid/dist/esm/sha1.js
132
+ // node_modules/uuid/dist-node/sha1.js
130
133
  import { createHash } from "crypto";
131
134
  function sha1(bytes) {
132
135
  if (Array.isArray(bytes)) {
@@ -138,7 +141,7 @@ function sha1(bytes) {
138
141
  }
139
142
  var sha1_default = sha1;
140
143
 
141
- // node_modules/uuid/dist/esm/v5.js
144
+ // node_modules/uuid/dist-node/v5.js
142
145
  function v5(value, namespace, buf, offset) {
143
146
  return v35(80, sha1_default, value, namespace, buf, offset);
144
147
  }
@@ -1083,7 +1086,7 @@ function logCacheMetrics(result) {
1083
1086
  import { Buffer as Buffer2 } from "buffer";
1084
1087
  import * as mammoth from "mammoth";
1085
1088
  import { logger as logger3 } from "@elizaos/core";
1086
- import { getDocument } from "pdfjs-dist/legacy/build/pdf.mjs";
1089
+ import { extractText } from "unpdf";
1087
1090
  import { createHash as createHash2 } from "crypto";
1088
1091
  var PLAIN_TEXT_CONTENT_TYPES = [
1089
1092
  "application/typescript",
@@ -1162,32 +1165,24 @@ This document was indexed for search but cannot be displayed directly in the bro
1162
1165
  }
1163
1166
  async function convertPdfToTextFromBuffer(pdfBuffer, filename) {
1164
1167
  const docName = filename || "unnamed-document";
1165
- logger3.debug(`[PdfService] Starting conversion for ${docName}`);
1168
+ logger3.debug(`[PdfService] Starting conversion for ${docName} using unpdf`);
1166
1169
  try {
1167
- const uint8Array = new Uint8Array(pdfBuffer);
1168
- const pdf = await getDocument({ data: uint8Array }).promise;
1169
- const numPages = pdf.numPages;
1170
- const textPages = [];
1171
- for (let pageNum = 1; pageNum <= numPages; pageNum++) {
1172
- logger3.debug(`[PdfService] Processing page ${pageNum}/${numPages}`);
1173
- const page = await pdf.getPage(pageNum);
1174
- const textContent = await page.getTextContent();
1175
- const lineMap = /* @__PURE__ */ new Map();
1176
- textContent.items.filter(isTextItem).forEach((item) => {
1177
- const yPos = Math.round(item.transform[5]);
1178
- if (!lineMap.has(yPos)) {
1179
- lineMap.set(yPos, []);
1180
- }
1181
- lineMap.get(yPos).push(item);
1182
- });
1183
- const sortedLines = Array.from(lineMap.entries()).sort((a, b) => b[0] - a[0]).map(
1184
- ([_, items]) => items.sort((a, b) => a.transform[4] - b.transform[4]).map((item) => item.str).join(" ")
1185
- );
1186
- textPages.push(sortedLines.join("\n"));
1170
+ const uint8Array = new Uint8Array(
1171
+ pdfBuffer.buffer.slice(pdfBuffer.byteOffset, pdfBuffer.byteOffset + pdfBuffer.byteLength)
1172
+ );
1173
+ const result = await extractText(uint8Array, {
1174
+ mergePages: true
1175
+ // Merge all pages into a single string
1176
+ });
1177
+ if (!result.text || result.text.trim().length === 0) {
1178
+ logger3.warn(`[PdfService] No text extracted from ${docName}`);
1179
+ return "";
1187
1180
  }
1188
- const fullText = textPages.join("\n\n").replace(/\s+/g, " ").trim();
1189
- logger3.debug(`[PdfService] Conversion complete for ${docName}, length: ${fullText.length}`);
1190
- return fullText;
1181
+ const cleanedText = result.text.split("\n").map((line) => line.trim()).filter((line) => line.length > 0).join("\n").replace(/\n{3,}/g, "\n\n");
1182
+ logger3.debug(
1183
+ `[PdfService] Conversion complete for ${docName}, ${result.totalPages} pages, length: ${cleanedText.length}`
1184
+ );
1185
+ return cleanedText;
1191
1186
  } catch (error) {
1192
1187
  logger3.error(`[PdfService] Error converting PDF ${docName}:`, error.message);
1193
1188
  throw new Error(`Failed to convert PDF to text: ${error.message}`);
@@ -1336,9 +1331,6 @@ function isBinaryContentType(contentType, filename) {
1336
1331
  ];
1337
1332
  return binaryExtensions.includes(fileExt);
1338
1333
  }
1339
- function isTextItem(item) {
1340
- return "str" in item;
1341
- }
1342
1334
  function normalizeS3Url(url) {
1343
1335
  try {
1344
1336
  const urlObj = new URL(url);