@elizaos/plugin-knowledge 1.0.8 → 1.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -2,11 +2,13 @@ import {
2
2
  convertPdfToTextFromBuffer,
3
3
  extractTextFromFileBuffer,
4
4
  fetchUrlContent,
5
+ generateContentBasedId,
5
6
  isBinaryContentType,
6
7
  loadDocsFromPath,
7
8
  looksLikeBase64,
8
- normalizeS3Url
9
- } from "./chunk-536BD2UA.js";
9
+ normalizeS3Url,
10
+ v4_default
11
+ } from "./chunk-QH7GBNKB.js";
10
12
 
11
13
  // src/index.ts
12
14
  import { logger as logger7 } from "@elizaos/core";
@@ -206,51 +208,6 @@ import {
206
208
  splitChunks
207
209
  } from "@elizaos/core";
208
210
 
209
- // node_modules/uuid/dist/esm/stringify.js
210
- var byteToHex = [];
211
- for (let i = 0; i < 256; ++i) {
212
- byteToHex.push((i + 256).toString(16).slice(1));
213
- }
214
- function unsafeStringify(arr, offset = 0) {
215
- return (byteToHex[arr[offset + 0]] + byteToHex[arr[offset + 1]] + byteToHex[arr[offset + 2]] + byteToHex[arr[offset + 3]] + "-" + byteToHex[arr[offset + 4]] + byteToHex[arr[offset + 5]] + "-" + byteToHex[arr[offset + 6]] + byteToHex[arr[offset + 7]] + "-" + byteToHex[arr[offset + 8]] + byteToHex[arr[offset + 9]] + "-" + byteToHex[arr[offset + 10]] + byteToHex[arr[offset + 11]] + byteToHex[arr[offset + 12]] + byteToHex[arr[offset + 13]] + byteToHex[arr[offset + 14]] + byteToHex[arr[offset + 15]]).toLowerCase();
216
- }
217
-
218
- // node_modules/uuid/dist/esm/rng.js
219
- import { randomFillSync } from "crypto";
220
- var rnds8Pool = new Uint8Array(256);
221
- var poolPtr = rnds8Pool.length;
222
- function rng() {
223
- if (poolPtr > rnds8Pool.length - 16) {
224
- randomFillSync(rnds8Pool);
225
- poolPtr = 0;
226
- }
227
- return rnds8Pool.slice(poolPtr, poolPtr += 16);
228
- }
229
-
230
- // node_modules/uuid/dist/esm/native.js
231
- import { randomUUID } from "crypto";
232
- var native_default = { randomUUID };
233
-
234
- // node_modules/uuid/dist/esm/v4.js
235
- function v4(options, buf, offset) {
236
- if (native_default.randomUUID && !buf && !options) {
237
- return native_default.randomUUID();
238
- }
239
- options = options || {};
240
- const rnds = options.random || (options.rng || rng)();
241
- rnds[6] = rnds[6] & 15 | 64;
242
- rnds[8] = rnds[8] & 63 | 128;
243
- if (buf) {
244
- offset = offset || 0;
245
- for (let i = 0; i < 16; ++i) {
246
- buf[offset + i] = rnds[i];
247
- }
248
- return buf;
249
- }
250
- return unsafeStringify(rnds);
251
- }
252
- var v4_default = v4;
253
-
254
211
  // src/ctx-embeddings.ts
255
212
  var DEFAULT_CHUNK_TOKEN_SIZE = 500;
256
213
  var DEFAULT_CHUNK_OVERLAP_TOKENS = 100;
@@ -674,14 +631,6 @@ function getChunkWithContext(chunkContent, generatedContext) {
674
631
  );
675
632
  return chunkContent;
676
633
  }
677
- if (!generatedContext.includes(chunkContent)) {
678
- console.warn(
679
- "Generated context does not contain the original chunk. Appending original to ensure data integrity."
680
- );
681
- return `${generatedContext.trim()}
682
-
683
- ${chunkContent}`;
684
- }
685
634
  return generatedContext.trim();
686
635
  }
687
636
 
@@ -1015,7 +964,9 @@ var useCustomLLM = shouldUseCustomLLM();
1015
964
  if (ctxKnowledgeEnabled) {
1016
965
  logger3.info(`Document processor starting with Contextual Knowledge ENABLED`);
1017
966
  if (useCustomLLM) {
1018
- logger3.info(`Using Custom LLM with provider: ${process.env.TEXT_PROVIDER}, model: ${process.env.TEXT_MODEL}`);
967
+ logger3.info(
968
+ `Using Custom LLM with provider: ${process.env.TEXT_PROVIDER}, model: ${process.env.TEXT_MODEL}`
969
+ );
1019
970
  } else {
1020
971
  logger3.info(`Using ElizaOS Runtime LLM (default behavior)`);
1021
972
  }
@@ -1307,6 +1258,8 @@ async function getContextualizedChunks(runtime, fullDocumentText, chunks, conten
1307
1258
  }
1308
1259
  }
1309
1260
  async function generateContextsInBatch(runtime, fullDocumentText, chunks, contentType, batchIndices) {
1261
+ console.log("####### generateContextsInBatch FULLL DOCUMENT", fullDocumentText);
1262
+ console.log("####### generateContextsInBatch CHUNKS", chunks);
1310
1263
  if (!chunks || chunks.length === 0) {
1311
1264
  return [];
1312
1265
  }
@@ -1315,6 +1268,9 @@ async function generateContextsInBatch(runtime, fullDocumentText, chunks, conten
1315
1268
  const config = validateModelConfig();
1316
1269
  const isUsingOpenRouter = config.TEXT_PROVIDER === "openrouter";
1317
1270
  const isUsingCacheCapableModel = isUsingOpenRouter && (config.TEXT_MODEL?.toLowerCase().includes("claude") || config.TEXT_MODEL?.toLowerCase().includes("gemini"));
1271
+ logger3.info(
1272
+ `Using provider: ${config.TEXT_PROVIDER}, model: ${config.TEXT_MODEL}, caching capability: ${isUsingCacheCapableModel}`
1273
+ );
1318
1274
  const promptConfigs = prepareContextPrompts(
1319
1275
  chunks,
1320
1276
  fullDocumentText,
@@ -1337,15 +1293,11 @@ async function generateContextsInBatch(runtime, fullDocumentText, chunks, conten
1337
1293
  const generateTextOperation = async () => {
1338
1294
  if (useCustomLLM) {
1339
1295
  if (item.usesCaching) {
1340
- return await generateText(
1341
- item.promptText,
1342
- item.systemPrompt,
1343
- {
1344
- cacheDocument: item.fullDocumentTextForContext,
1345
- cacheOptions: { type: "ephemeral" },
1346
- autoCacheContextualRetrieval: true
1347
- }
1348
- );
1296
+ return await generateText(item.promptText, item.systemPrompt, {
1297
+ cacheDocument: item.fullDocumentTextForContext,
1298
+ cacheOptions: { type: "ephemeral" },
1299
+ autoCacheContextualRetrieval: true
1300
+ });
1349
1301
  } else {
1350
1302
  return await generateText(item.prompt);
1351
1303
  }
@@ -1366,7 +1318,7 @@ async function generateContextsInBatch(runtime, fullDocumentText, chunks, conten
1366
1318
  generateTextOperation,
1367
1319
  `context generation for chunk ${item.originalIndex}`
1368
1320
  );
1369
- const generatedContext = llmResponse.text;
1321
+ const generatedContext = typeof llmResponse === "string" ? llmResponse : llmResponse.text;
1370
1322
  const contextualizedText = getChunkWithContext(item.chunkText, generatedContext);
1371
1323
  logger3.debug(
1372
1324
  `Context added for chunk ${item.originalIndex}. New length: ${contextualizedText.length}`
@@ -1630,36 +1582,42 @@ var KnowledgeService = class _KnowledgeService extends Service {
1630
1582
  */
1631
1583
  async addKnowledge(options) {
1632
1584
  const agentId = options.agentId || this.runtime.agentId;
1585
+ const contentBasedId = generateContentBasedId(options.content, agentId, {
1586
+ includeFilename: options.originalFilename,
1587
+ contentType: options.contentType,
1588
+ maxChars: 2e3
1589
+ // Use first 2KB of content for ID generation
1590
+ });
1633
1591
  logger4.info(
1634
- `KnowledgeService processing document for agent: ${agentId}, file: ${options.originalFilename}, type: ${options.contentType}`
1592
+ `KnowledgeService processing document for agent: ${agentId}, file: ${options.originalFilename}, type: ${options.contentType}, generated ID: ${contentBasedId}`
1635
1593
  );
1636
1594
  try {
1637
- const existingDocument = await this.runtime.getMemoryById(options.clientDocumentId);
1595
+ const existingDocument = await this.runtime.getMemoryById(contentBasedId);
1638
1596
  if (existingDocument && existingDocument.metadata?.type === MemoryType2.DOCUMENT) {
1639
1597
  logger4.info(
1640
- `Document ${options.originalFilename} with ID ${options.clientDocumentId} already exists. Skipping processing.`
1598
+ `Document ${options.originalFilename} with ID ${contentBasedId} already exists. Skipping processing.`
1641
1599
  );
1642
1600
  const fragments = await this.runtime.getMemories({
1643
1601
  tableName: "knowledge"
1644
- // Assuming fragments store original documentId in metadata.documentId
1645
- // This query might need adjustment based on actual fragment metadata structure.
1646
- // A more robust way would be to query where metadata.documentId === options.clientDocumentId
1647
1602
  });
1648
1603
  const relatedFragments = fragments.filter(
1649
- (f) => f.metadata?.type === MemoryType2.FRAGMENT && f.metadata.documentId === options.clientDocumentId
1604
+ (f) => f.metadata?.type === MemoryType2.FRAGMENT && f.metadata.documentId === contentBasedId
1650
1605
  );
1651
1606
  return {
1652
- clientDocumentId: options.clientDocumentId,
1607
+ clientDocumentId: contentBasedId,
1653
1608
  storedDocumentMemoryId: existingDocument.id,
1654
1609
  fragmentCount: relatedFragments.length
1655
1610
  };
1656
1611
  }
1657
1612
  } catch (error) {
1658
1613
  logger4.debug(
1659
- `Document ${options.clientDocumentId} not found or error checking existence, proceeding with processing: ${error instanceof Error ? error.message : String(error)}`
1614
+ `Document ${contentBasedId} not found or error checking existence, proceeding with processing: ${error instanceof Error ? error.message : String(error)}`
1660
1615
  );
1661
1616
  }
1662
- return this.processDocument(options);
1617
+ return this.processDocument({
1618
+ ...options,
1619
+ clientDocumentId: contentBasedId
1620
+ });
1663
1621
  }
1664
1622
  /**
1665
1623
  * Process a document regardless of type - Called by public addKnowledge
@@ -1851,7 +1809,12 @@ var KnowledgeService = class _KnowledgeService extends Service {
1851
1809
  const processingPromises = items.map(async (item) => {
1852
1810
  await this.knowledgeProcessingSemaphore.acquire();
1853
1811
  try {
1854
- const knowledgeId = createUniqueUuid(this.runtime.agentId + item, item);
1812
+ const knowledgeId = generateContentBasedId(item, this.runtime.agentId, {
1813
+ maxChars: 2e3,
1814
+ // Use first 2KB of content
1815
+ includeFilename: "character-knowledge"
1816
+ // A constant identifier for character knowledge
1817
+ });
1855
1818
  if (await this.checkExistingKnowledge(knowledgeId)) {
1856
1819
  logger4.debug(
1857
1820
  `KnowledgeService: Character knowledge item with ID ${knowledgeId} already exists. Skipping.`
@@ -1888,7 +1851,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
1888
1851
  await this._internalAddKnowledge(
1889
1852
  {
1890
1853
  id: knowledgeId,
1891
- // Use the content-derived ID
1854
+ // Use the content-based ID
1892
1855
  content: {
1893
1856
  text: item
1894
1857
  },
@@ -3345,7 +3308,12 @@ async function uploadKnowledgeHandler(req, res, runtime) {
3345
3308
  if (invalidFiles.length > 0) {
3346
3309
  cleanupFiles(files);
3347
3310
  const invalidFileNames = invalidFiles.map((f) => f.name || "unnamed").join(", ");
3348
- return sendError(res, 400, "INVALID_FILES", `Invalid or corrupted files: ${invalidFileNames}`);
3311
+ return sendError(
3312
+ res,
3313
+ 400,
3314
+ "INVALID_FILES",
3315
+ `Invalid or corrupted files: ${invalidFileNames}`
3316
+ );
3349
3317
  }
3350
3318
  const agentId = req.body.agentId || req.query.agentId;
3351
3319
  if (!agentId) {
@@ -3361,12 +3329,10 @@ async function uploadKnowledgeHandler(req, res, runtime) {
3361
3329
  const worldId = req.body.worldId || agentId;
3362
3330
  logger6.info(`[KNOWLEDGE UPLOAD HANDLER] Processing upload for agent: ${agentId}`);
3363
3331
  const processingPromises = files.map(async (file, index) => {
3364
- let knowledgeId;
3365
3332
  const originalFilename = file.name;
3366
3333
  const filePath = file.tempFilePath;
3367
- knowledgeId = req.body?.documentIds && req.body.documentIds[index] || req.body?.documentId || createUniqueUuid2(runtime, `knowledge-${originalFilename}-${Date.now()}`);
3368
3334
  logger6.debug(
3369
- `[KNOWLEDGE UPLOAD HANDLER] File: ${originalFilename}, Agent ID: ${agentId}, World ID: ${worldId}, Knowledge ID: ${knowledgeId}`
3335
+ `[KNOWLEDGE UPLOAD HANDLER] File: ${originalFilename}, Agent ID: ${agentId}, World ID: ${worldId}`
3370
3336
  );
3371
3337
  try {
3372
3338
  let fileBuffer;
@@ -3377,7 +3343,9 @@ async function uploadKnowledgeHandler(req, res, runtime) {
3377
3343
  throw new Error("Temporary file is empty");
3378
3344
  }
3379
3345
  fileBuffer = await fs3.promises.readFile(filePath);
3380
- logger6.debug(`[KNOWLEDGE UPLOAD] Read ${fileBuffer.length} bytes from temp file: ${filePath}`);
3346
+ logger6.debug(
3347
+ `[KNOWLEDGE UPLOAD] Read ${fileBuffer.length} bytes from temp file: ${filePath}`
3348
+ );
3381
3349
  } catch (fsError) {
3382
3350
  throw new Error(`Failed to read temporary file: ${fsError.message}`);
3383
3351
  }
@@ -3391,7 +3359,9 @@ async function uploadKnowledgeHandler(req, res, runtime) {
3391
3359
  throw new Error("Invalid or empty file buffer");
3392
3360
  }
3393
3361
  if (fileBuffer.length !== file.size) {
3394
- logger6.warn(`File size mismatch for ${originalFilename}: expected ${file.size}, got ${fileBuffer.length}`);
3362
+ logger6.warn(
3363
+ `File size mismatch for ${originalFilename}: expected ${file.size}, got ${fileBuffer.length}`
3364
+ );
3395
3365
  }
3396
3366
  const base64Content = fileBuffer.toString("base64");
3397
3367
  if (!base64Content || base64Content.length === 0) {
@@ -3400,8 +3370,8 @@ async function uploadKnowledgeHandler(req, res, runtime) {
3400
3370
  const addKnowledgeOpts = {
3401
3371
  agentId,
3402
3372
  // Pass the agent ID from frontend
3403
- clientDocumentId: knowledgeId,
3404
- // This is knowledgeItem.id
3373
+ clientDocumentId: "",
3374
+ // This will be ignored by the service
3405
3375
  contentType: file.mimetype,
3406
3376
  // Directly from express-fileupload file object
3407
3377
  originalFilename,
@@ -3414,12 +3384,13 @@ async function uploadKnowledgeHandler(req, res, runtime) {
3414
3384
  entityId: agentId
3415
3385
  // Use the correct agent ID
3416
3386
  };
3417
- await service.addKnowledge(addKnowledgeOpts);
3387
+ const result = await service.addKnowledge(addKnowledgeOpts);
3418
3388
  if (filePath) {
3419
3389
  cleanupFile(filePath);
3420
3390
  }
3421
3391
  return {
3422
- id: knowledgeId,
3392
+ id: result.clientDocumentId,
3393
+ // Use the content-based ID returned by the service
3423
3394
  filename: originalFilename,
3424
3395
  type: file.mimetype,
3425
3396
  size: file.size,
@@ -3434,7 +3405,8 @@ async function uploadKnowledgeHandler(req, res, runtime) {
3434
3405
  cleanupFile(filePath);
3435
3406
  }
3436
3407
  return {
3437
- id: knowledgeId,
3408
+ id: "",
3409
+ // No ID since processing failed
3438
3410
  filename: originalFilename,
3439
3411
  status: "error_processing",
3440
3412
  error: fileError.message
@@ -3462,7 +3434,6 @@ async function uploadKnowledgeHandler(req, res, runtime) {
3462
3434
  const processingPromises = fileUrls.map(async (fileUrl) => {
3463
3435
  try {
3464
3436
  const normalizedUrl = normalizeS3Url(fileUrl);
3465
- const knowledgeId = createUniqueUuid2(runtime, normalizedUrl);
3466
3437
  const urlObject = new URL(fileUrl);
3467
3438
  const pathSegments = urlObject.pathname.split("/");
3468
3439
  const encodedFilename = pathSegments[pathSegments.length - 1] || "document.pdf";
@@ -3493,7 +3464,8 @@ async function uploadKnowledgeHandler(req, res, runtime) {
3493
3464
  const addKnowledgeOpts = {
3494
3465
  agentId,
3495
3466
  // Pass the agent ID from frontend
3496
- clientDocumentId: knowledgeId,
3467
+ clientDocumentId: "",
3468
+ // This will be ignored by the service
3497
3469
  contentType,
3498
3470
  originalFilename,
3499
3471
  content,
@@ -3512,6 +3484,7 @@ async function uploadKnowledgeHandler(req, res, runtime) {
3512
3484
  const result = await service.addKnowledge(addKnowledgeOpts);
3513
3485
  return {
3514
3486
  id: result.clientDocumentId,
3487
+ // Use the content-based ID returned by the service
3515
3488
  fileUrl,
3516
3489
  filename: originalFilename,
3517
3490
  message: "Knowledge created successfully",
@@ -3794,19 +3767,56 @@ async function getKnowledgeChunksHandler(req, res, runtime) {
3794
3767
  return sendError(res, 500, "SERVICE_NOT_FOUND", "KnowledgeService not found");
3795
3768
  }
3796
3769
  try {
3797
- const limit = req.query.limit ? Number.parseInt(req.query.limit, 10) : 100;
3798
- const before = req.query.before ? Number.parseInt(req.query.before, 10) : Date.now();
3799
3770
  const documentId = req.query.documentId;
3800
- const agentId = req.query.agentId;
3801
- const chunks = await service.getMemories({
3802
- tableName: "knowledge",
3803
- count: limit,
3804
- end: before
3771
+ const documentsOnly = req.query.documentsOnly === "true";
3772
+ const documents = await service.getMemories({
3773
+ tableName: "documents",
3774
+ count: 1e3,
3775
+ // Reasonable limit for documents
3776
+ end: Date.now()
3777
+ });
3778
+ if (documentsOnly) {
3779
+ sendSuccess(res, {
3780
+ chunks: documents,
3781
+ stats: {
3782
+ documents: documents.length,
3783
+ fragments: 0,
3784
+ mode: "documents-only"
3785
+ }
3786
+ });
3787
+ return;
3788
+ }
3789
+ if (documentId) {
3790
+ const allFragments = await service.getMemories({
3791
+ tableName: "knowledge",
3792
+ count: 1e5
3793
+ // Very high limit to get all fragments
3794
+ });
3795
+ const documentFragments = allFragments.filter((fragment) => {
3796
+ const metadata = fragment.metadata;
3797
+ return metadata?.documentId === documentId;
3798
+ });
3799
+ const specificDocument = documents.find((d) => d.id === documentId);
3800
+ const results = specificDocument ? [specificDocument, ...documentFragments] : documentFragments;
3801
+ sendSuccess(res, {
3802
+ chunks: results,
3803
+ stats: {
3804
+ documents: specificDocument ? 1 : 0,
3805
+ fragments: documentFragments.length,
3806
+ mode: "single-document",
3807
+ documentId
3808
+ }
3809
+ });
3810
+ return;
3811
+ }
3812
+ sendSuccess(res, {
3813
+ chunks: documents,
3814
+ stats: {
3815
+ documents: documents.length,
3816
+ fragments: 0,
3817
+ mode: "documents-only"
3818
+ }
3805
3819
  });
3806
- const filteredChunks = documentId ? chunks.filter(
3807
- (chunk) => chunk.metadata && typeof chunk.metadata === "object" && "documentId" in chunk.metadata && chunk.metadata.documentId === documentId
3808
- ) : chunks;
3809
- sendSuccess(res, { chunks: filteredChunks });
3810
3820
  } catch (error) {
3811
3821
  logger6.error("[KNOWLEDGE CHUNKS GET HANDLER] Error retrieving chunks:", error);
3812
3822
  sendError(res, 500, "RETRIEVAL_ERROR", "Failed to retrieve knowledge chunks", error.message);
@@ -4004,7 +4014,7 @@ var knowledgePlugin = {
4004
4014
  try {
4005
4015
  const service = runtime.getService(KnowledgeService.serviceType);
4006
4016
  if (service instanceof KnowledgeService) {
4007
- const { loadDocsFromPath: loadDocsFromPath2 } = await import("./docs-loader-IBTEOAYT.js");
4017
+ const { loadDocsFromPath: loadDocsFromPath2 } = await import("./docs-loader-5INCF4VJ.js");
4008
4018
  const result = await loadDocsFromPath2(service, runtime.agentId);
4009
4019
  if (result.successful > 0) {
4010
4020
  logger7.info(`Loaded ${result.successful} documents from docs folder on startup`);