npm - @elizaos/plugin-knowledge - Versions diffs - 1.0.8 → 1.0.9 - Mend

@elizaos/plugin-knowledge 1.0.8 → 1.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/dist/.vite/manifest.json +2 -2
package/dist/assets/index-BIGrGyiB.css +1 -0
package/dist/assets/{index-BMDX6vvo.js → index-BdW2hLiy.js} +40 -35
package/dist/{chunk-536BD2UA.js → chunk-QH7GBNKB.js} +170 -5
package/dist/chunk-QH7GBNKB.js.map +1 -0
package/dist/{docs-loader-IBTEOAYT.js → docs-loader-5INCF4VJ.js} +2 -2
package/dist/index.html +2 -2
package/dist/index.js +113 -103
package/dist/index.js.map +1 -1
package/package.json +1 -1
package/dist/assets/index-o1rKIvUo.css +0 -1
package/dist/chunk-536BD2UA.js.map +0 -1
/package/dist/{docs-loader-IBTEOAYT.js.map → docs-loader-5INCF4VJ.js.map} +0 -0

package/dist/index.js CHANGED Viewed

@@ -2,11 +2,13 @@ import {
   convertPdfToTextFromBuffer,
   extractTextFromFileBuffer,
   fetchUrlContent,
+  generateContentBasedId,
   isBinaryContentType,
   loadDocsFromPath,
   looksLikeBase64,
-  normalizeS3Url
-} from "./chunk-536BD2UA.js";
+  normalizeS3Url,
+  v4_default
+} from "./chunk-QH7GBNKB.js";
 // src/index.ts
 import { logger as logger7 } from "@elizaos/core";
@@ -206,51 +208,6 @@ import {
   splitChunks
 } from "@elizaos/core";
-// node_modules/uuid/dist/esm/stringify.js
-var byteToHex = [];
-for (let i = 0; i < 256; ++i) {
-  byteToHex.push((i + 256).toString(16).slice(1));
-}
-function unsafeStringify(arr, offset = 0) {
-  return (byteToHex[arr[offset + 0]] + byteToHex[arr[offset + 1]] + byteToHex[arr[offset + 2]] + byteToHex[arr[offset + 3]] + "-" + byteToHex[arr[offset + 4]] + byteToHex[arr[offset + 5]] + "-" + byteToHex[arr[offset + 6]] + byteToHex[arr[offset + 7]] + "-" + byteToHex[arr[offset + 8]] + byteToHex[arr[offset + 9]] + "-" + byteToHex[arr[offset + 10]] + byteToHex[arr[offset + 11]] + byteToHex[arr[offset + 12]] + byteToHex[arr[offset + 13]] + byteToHex[arr[offset + 14]] + byteToHex[arr[offset + 15]]).toLowerCase();
-}
-// node_modules/uuid/dist/esm/rng.js
-import { randomFillSync } from "crypto";
-var rnds8Pool = new Uint8Array(256);
-var poolPtr = rnds8Pool.length;
-function rng() {
-  if (poolPtr > rnds8Pool.length - 16) {
-    randomFillSync(rnds8Pool);
-    poolPtr = 0;
-  }
-  return rnds8Pool.slice(poolPtr, poolPtr += 16);
-}
-// node_modules/uuid/dist/esm/native.js
-import { randomUUID } from "crypto";
-var native_default = { randomUUID };
-// node_modules/uuid/dist/esm/v4.js
-function v4(options, buf, offset) {
-  if (native_default.randomUUID && !buf && !options) {
-    return native_default.randomUUID();
-  }
-  options = options || {};
-  const rnds = options.random || (options.rng || rng)();
-  rnds[6] = rnds[6] & 15 | 64;
-  rnds[8] = rnds[8] & 63 | 128;
-  if (buf) {
-    offset = offset || 0;
-    for (let i = 0; i < 16; ++i) {
-      buf[offset + i] = rnds[i];
-    }
-    return buf;
-  }
-  return unsafeStringify(rnds);
-}
-var v4_default = v4;
 // src/ctx-embeddings.ts
 var DEFAULT_CHUNK_TOKEN_SIZE = 500;
 var DEFAULT_CHUNK_OVERLAP_TOKENS = 100;
@@ -674,14 +631,6 @@ function getChunkWithContext(chunkContent, generatedContext) {
     );
     return chunkContent;
   }
-  if (!generatedContext.includes(chunkContent)) {
-    console.warn(
-      "Generated context does not contain the original chunk. Appending original to ensure data integrity."
-    );
-    return `${generatedContext.trim()}
-${chunkContent}`;
-  }
   return generatedContext.trim();
 }
@@ -1015,7 +964,9 @@ var useCustomLLM = shouldUseCustomLLM();
 if (ctxKnowledgeEnabled) {
   logger3.info(`Document processor starting with Contextual Knowledge ENABLED`);
   if (useCustomLLM) {
-    logger3.info(`Using Custom LLM with provider: ${process.env.TEXT_PROVIDER}, model: ${process.env.TEXT_MODEL}`);
+    logger3.info(
+      `Using Custom LLM with provider: ${process.env.TEXT_PROVIDER}, model: ${process.env.TEXT_MODEL}`
+    );
   } else {
     logger3.info(`Using ElizaOS Runtime LLM (default behavior)`);
   }
@@ -1307,6 +1258,8 @@ async function getContextualizedChunks(runtime, fullDocumentText, chunks, conten
   }
 }
 async function generateContextsInBatch(runtime, fullDocumentText, chunks, contentType, batchIndices) {
+  console.log("####### generateContextsInBatch FULLL DOCUMENT", fullDocumentText);
+  console.log("####### generateContextsInBatch CHUNKS", chunks);
   if (!chunks || chunks.length === 0) {
     return [];
   }
@@ -1315,6 +1268,9 @@ async function generateContextsInBatch(runtime, fullDocumentText, chunks, conten
   const config = validateModelConfig();
   const isUsingOpenRouter = config.TEXT_PROVIDER === "openrouter";
   const isUsingCacheCapableModel = isUsingOpenRouter && (config.TEXT_MODEL?.toLowerCase().includes("claude") || config.TEXT_MODEL?.toLowerCase().includes("gemini"));
+  logger3.info(
+    `Using provider: ${config.TEXT_PROVIDER}, model: ${config.TEXT_MODEL}, caching capability: ${isUsingCacheCapableModel}`
+  );
   const promptConfigs = prepareContextPrompts(
     chunks,
     fullDocumentText,
@@ -1337,15 +1293,11 @@ async function generateContextsInBatch(runtime, fullDocumentText, chunks, conten
         const generateTextOperation = async () => {
           if (useCustomLLM) {
             if (item.usesCaching) {
-              return await generateText(
-                item.promptText,
-                item.systemPrompt,
-                {
-                  cacheDocument: item.fullDocumentTextForContext,
-                  cacheOptions: { type: "ephemeral" },
-                  autoCacheContextualRetrieval: true
-                }
-              );
+              return await generateText(item.promptText, item.systemPrompt, {
+                cacheDocument: item.fullDocumentTextForContext,
+                cacheOptions: { type: "ephemeral" },
+                autoCacheContextualRetrieval: true
+              });
             } else {
               return await generateText(item.prompt);
             }
@@ -1366,7 +1318,7 @@ async function generateContextsInBatch(runtime, fullDocumentText, chunks, conten
           generateTextOperation,
           `context generation for chunk ${item.originalIndex}`
         );
-        const generatedContext = llmResponse.text;
+        const generatedContext = typeof llmResponse === "string" ? llmResponse : llmResponse.text;
         const contextualizedText = getChunkWithContext(item.chunkText, generatedContext);
         logger3.debug(
           `Context added for chunk ${item.originalIndex}. New length: ${contextualizedText.length}`
@@ -1630,36 +1582,42 @@ var KnowledgeService = class _KnowledgeService extends Service {
    */
   async addKnowledge(options) {
     const agentId = options.agentId || this.runtime.agentId;
+    const contentBasedId = generateContentBasedId(options.content, agentId, {
+      includeFilename: options.originalFilename,
+      contentType: options.contentType,
+      maxChars: 2e3
+      // Use first 2KB of content for ID generation
+    });
     logger4.info(
-      `KnowledgeService processing document for agent: ${agentId}, file: ${options.originalFilename}, type: ${options.contentType}`
+      `KnowledgeService processing document for agent: ${agentId}, file: ${options.originalFilename}, type: ${options.contentType}, generated ID: ${contentBasedId}`
     );
     try {
-      const existingDocument = await this.runtime.getMemoryById(options.clientDocumentId);
+      const existingDocument = await this.runtime.getMemoryById(contentBasedId);
       if (existingDocument && existingDocument.metadata?.type === MemoryType2.DOCUMENT) {
         logger4.info(
-          `Document ${options.originalFilename} with ID ${options.clientDocumentId} already exists. Skipping processing.`
+          `Document ${options.originalFilename} with ID ${contentBasedId} already exists. Skipping processing.`
         );
         const fragments = await this.runtime.getMemories({
           tableName: "knowledge"
-          // Assuming fragments store original documentId in metadata.documentId
-          // This query might need adjustment based on actual fragment metadata structure.
-          // A more robust way would be to query where metadata.documentId === options.clientDocumentId
         });
         const relatedFragments = fragments.filter(
-          (f) => f.metadata?.type === MemoryType2.FRAGMENT && f.metadata.documentId === options.clientDocumentId
+          (f) => f.metadata?.type === MemoryType2.FRAGMENT && f.metadata.documentId === contentBasedId
         );
         return {
-          clientDocumentId: options.clientDocumentId,
+          clientDocumentId: contentBasedId,
           storedDocumentMemoryId: existingDocument.id,
           fragmentCount: relatedFragments.length
         };
       }
     } catch (error) {
       logger4.debug(
-        `Document ${options.clientDocumentId} not found or error checking existence, proceeding with processing: ${error instanceof Error ? error.message : String(error)}`
+        `Document ${contentBasedId} not found or error checking existence, proceeding with processing: ${error instanceof Error ? error.message : String(error)}`
       );
     }
-    return this.processDocument(options);
+    return this.processDocument({
+      ...options,
+      clientDocumentId: contentBasedId
+    });
   }
   /**
    * Process a document regardless of type - Called by public addKnowledge
@@ -1851,7 +1809,12 @@ var KnowledgeService = class _KnowledgeService extends Service {
     const processingPromises = items.map(async (item) => {
       await this.knowledgeProcessingSemaphore.acquire();
       try {
-        const knowledgeId = createUniqueUuid(this.runtime.agentId + item, item);
+        const knowledgeId = generateContentBasedId(item, this.runtime.agentId, {
+          maxChars: 2e3,
+          // Use first 2KB of content
+          includeFilename: "character-knowledge"
+          // A constant identifier for character knowledge
+        });
         if (await this.checkExistingKnowledge(knowledgeId)) {
           logger4.debug(
             `KnowledgeService: Character knowledge item with ID ${knowledgeId} already exists. Skipping.`
@@ -1888,7 +1851,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
         await this._internalAddKnowledge(
           {
             id: knowledgeId,
-            // Use the content-derived ID
+            // Use the content-based ID
             content: {
               text: item
             },
@@ -3345,7 +3308,12 @@ async function uploadKnowledgeHandler(req, res, runtime) {
       if (invalidFiles.length > 0) {
         cleanupFiles(files);
         const invalidFileNames = invalidFiles.map((f) => f.name || "unnamed").join(", ");
-        return sendError(res, 400, "INVALID_FILES", `Invalid or corrupted files: ${invalidFileNames}`);
+        return sendError(
+          res,
+          400,
+          "INVALID_FILES",
+          `Invalid or corrupted files: ${invalidFileNames}`
+        );
       }
       const agentId = req.body.agentId || req.query.agentId;
       if (!agentId) {
@@ -3361,12 +3329,10 @@ async function uploadKnowledgeHandler(req, res, runtime) {
       const worldId = req.body.worldId || agentId;
       logger6.info(`[KNOWLEDGE UPLOAD HANDLER] Processing upload for agent: ${agentId}`);
       const processingPromises = files.map(async (file, index) => {
-        let knowledgeId;
         const originalFilename = file.name;
         const filePath = file.tempFilePath;
-        knowledgeId = req.body?.documentIds && req.body.documentIds[index] || req.body?.documentId || createUniqueUuid2(runtime, `knowledge-${originalFilename}-${Date.now()}`);
         logger6.debug(
-          `[KNOWLEDGE UPLOAD HANDLER] File: ${originalFilename}, Agent ID: ${agentId}, World ID: ${worldId}, Knowledge ID: ${knowledgeId}`
+          `[KNOWLEDGE UPLOAD HANDLER] File: ${originalFilename}, Agent ID: ${agentId}, World ID: ${worldId}`
         );
         try {
           let fileBuffer;
@@ -3377,7 +3343,9 @@ async function uploadKnowledgeHandler(req, res, runtime) {
                 throw new Error("Temporary file is empty");
               }
               fileBuffer = await fs3.promises.readFile(filePath);
-              logger6.debug(`[KNOWLEDGE UPLOAD] Read ${fileBuffer.length} bytes from temp file: ${filePath}`);
+              logger6.debug(
+                `[KNOWLEDGE UPLOAD] Read ${fileBuffer.length} bytes from temp file: ${filePath}`
+              );
             } catch (fsError) {
               throw new Error(`Failed to read temporary file: ${fsError.message}`);
             }
@@ -3391,7 +3359,9 @@ async function uploadKnowledgeHandler(req, res, runtime) {
             throw new Error("Invalid or empty file buffer");
           }
           if (fileBuffer.length !== file.size) {
-            logger6.warn(`File size mismatch for ${originalFilename}: expected ${file.size}, got ${fileBuffer.length}`);
+            logger6.warn(
+              `File size mismatch for ${originalFilename}: expected ${file.size}, got ${fileBuffer.length}`
+            );
           }
           const base64Content = fileBuffer.toString("base64");
           if (!base64Content || base64Content.length === 0) {
@@ -3400,8 +3370,8 @@ async function uploadKnowledgeHandler(req, res, runtime) {
           const addKnowledgeOpts = {
             agentId,
             // Pass the agent ID from frontend
-            clientDocumentId: knowledgeId,
-            // This is knowledgeItem.id
+            clientDocumentId: "",
+            // This will be ignored by the service
             contentType: file.mimetype,
             // Directly from express-fileupload file object
             originalFilename,
@@ -3414,12 +3384,13 @@ async function uploadKnowledgeHandler(req, res, runtime) {
             entityId: agentId
             // Use the correct agent ID
           };
-          await service.addKnowledge(addKnowledgeOpts);
+          const result = await service.addKnowledge(addKnowledgeOpts);
           if (filePath) {
             cleanupFile(filePath);
           }
           return {
-            id: knowledgeId,
+            id: result.clientDocumentId,
+            // Use the content-based ID returned by the service
             filename: originalFilename,
             type: file.mimetype,
             size: file.size,
@@ -3434,7 +3405,8 @@ async function uploadKnowledgeHandler(req, res, runtime) {
             cleanupFile(filePath);
           }
           return {
-            id: knowledgeId,
+            id: "",
+            // No ID since processing failed
             filename: originalFilename,
             status: "error_processing",
             error: fileError.message
@@ -3462,7 +3434,6 @@ async function uploadKnowledgeHandler(req, res, runtime) {
       const processingPromises = fileUrls.map(async (fileUrl) => {
         try {
           const normalizedUrl = normalizeS3Url(fileUrl);
-          const knowledgeId = createUniqueUuid2(runtime, normalizedUrl);
           const urlObject = new URL(fileUrl);
           const pathSegments = urlObject.pathname.split("/");
           const encodedFilename = pathSegments[pathSegments.length - 1] || "document.pdf";
@@ -3493,7 +3464,8 @@ async function uploadKnowledgeHandler(req, res, runtime) {
           const addKnowledgeOpts = {
             agentId,
             // Pass the agent ID from frontend
-            clientDocumentId: knowledgeId,
+            clientDocumentId: "",
+            // This will be ignored by the service
             contentType,
             originalFilename,
             content,
@@ -3512,6 +3484,7 @@ async function uploadKnowledgeHandler(req, res, runtime) {
           const result = await service.addKnowledge(addKnowledgeOpts);
           return {
             id: result.clientDocumentId,
+            // Use the content-based ID returned by the service
             fileUrl,
             filename: originalFilename,
             message: "Knowledge created successfully",
@@ -3794,19 +3767,56 @@ async function getKnowledgeChunksHandler(req, res, runtime) {
     return sendError(res, 500, "SERVICE_NOT_FOUND", "KnowledgeService not found");
   }
   try {
-    const limit = req.query.limit ? Number.parseInt(req.query.limit, 10) : 100;
-    const before = req.query.before ? Number.parseInt(req.query.before, 10) : Date.now();
     const documentId = req.query.documentId;
-    const agentId = req.query.agentId;
-    const chunks = await service.getMemories({
-      tableName: "knowledge",
-      count: limit,
-      end: before
+    const documentsOnly = req.query.documentsOnly === "true";
+    const documents = await service.getMemories({
+      tableName: "documents",
+      count: 1e3,
+      // Reasonable limit for documents
+      end: Date.now()
+    });
+    if (documentsOnly) {
+      sendSuccess(res, {
+        chunks: documents,
+        stats: {
+          documents: documents.length,
+          fragments: 0,
+          mode: "documents-only"
+        }
+      });
+      return;
+    }
+    if (documentId) {
+      const allFragments = await service.getMemories({
+        tableName: "knowledge",
+        count: 1e5
+        // Very high limit to get all fragments
+      });
+      const documentFragments = allFragments.filter((fragment) => {
+        const metadata = fragment.metadata;
+        return metadata?.documentId === documentId;
+      });
+      const specificDocument = documents.find((d) => d.id === documentId);
+      const results = specificDocument ? [specificDocument, ...documentFragments] : documentFragments;
+      sendSuccess(res, {
+        chunks: results,
+        stats: {
+          documents: specificDocument ? 1 : 0,
+          fragments: documentFragments.length,
+          mode: "single-document",
+          documentId
+        }
+      });
+      return;
+    }
+    sendSuccess(res, {
+      chunks: documents,
+      stats: {
+        documents: documents.length,
+        fragments: 0,
+        mode: "documents-only"
+      }
     });
-    const filteredChunks = documentId ? chunks.filter(
-      (chunk) => chunk.metadata && typeof chunk.metadata === "object" && "documentId" in chunk.metadata && chunk.metadata.documentId === documentId
-    ) : chunks;
-    sendSuccess(res, { chunks: filteredChunks });
   } catch (error) {
     logger6.error("[KNOWLEDGE CHUNKS GET HANDLER] Error retrieving chunks:", error);
     sendError(res, 500, "RETRIEVAL_ERROR", "Failed to retrieve knowledge chunks", error.message);
@@ -4004,7 +4014,7 @@ var knowledgePlugin = {
             try {
               const service = runtime.getService(KnowledgeService.serviceType);
               if (service instanceof KnowledgeService) {
-                const { loadDocsFromPath: loadDocsFromPath2 } = await import("./docs-loader-IBTEOAYT.js");
+                const { loadDocsFromPath: loadDocsFromPath2 } = await import("./docs-loader-5INCF4VJ.js");
                 const result = await loadDocsFromPath2(service, runtime.agentId);
                 if (result.successful > 0) {
                   logger7.info(`Loaded ${result.successful} documents from docs folder on startup`);