@promptbook/website-crawler 0.100.0-1 โ†’ 0.100.0-3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/esm/index.es.js CHANGED
@@ -29,7 +29,7 @@ const BOOK_LANGUAGE_VERSION = '1.0.0';
29
29
  * @generated
30
30
  * @see https://github.com/webgptorg/promptbook
31
31
  */
32
- const PROMPTBOOK_ENGINE_VERSION = '0.100.0-1';
32
+ const PROMPTBOOK_ENGINE_VERSION = '0.100.0-3';
33
33
  /**
34
34
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
35
35
  * Note: [๐Ÿ’ž] Ignore a discrepancy between file name and entity name
@@ -3503,7 +3503,36 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3503
3503
  if (fileContent.length > DEFAULT_MAX_FILE_SIZE /* <- TODO: Allow to pass different value to remote server */) {
3504
3504
  throw new LimitReachedError(`File is too large (${Math.round(fileContent.length / 1024 / 1024)}MB). Maximum allowed size is ${Math.round(DEFAULT_MAX_FILE_SIZE / 1024 / 1024)}MB.`);
3505
3505
  }
3506
- await tools.fs.writeFile(join(rootDirname, filepath), fileContent);
3506
+ // Note: Try to cache the downloaded file, but don't fail if the filesystem is read-only
3507
+ try {
3508
+ await tools.fs.writeFile(join(rootDirname, filepath), fileContent);
3509
+ }
3510
+ catch (error) {
3511
+ // Note: If we can't write to cache, we'll process the file directly from memory
3512
+ // This handles read-only filesystems like Vercel
3513
+ if (error instanceof Error && (error.message.includes('EROFS') ||
3514
+ error.message.includes('read-only') ||
3515
+ error.message.includes('EACCES') ||
3516
+ error.message.includes('EPERM'))) {
3517
+ // Return a handler that works directly with the downloaded content
3518
+ return {
3519
+ source: name,
3520
+ filename: null,
3521
+ url,
3522
+ mimeType,
3523
+ async asJson() {
3524
+ return JSON.parse(fileContent.toString('utf-8'));
3525
+ },
3526
+ async asText() {
3527
+ return fileContent.toString('utf-8');
3528
+ },
3529
+ };
3530
+ }
3531
+ else {
3532
+ // Re-throw other unexpected errors
3533
+ throw error;
3534
+ }
3535
+ }
3507
3536
  // TODO: [๐Ÿ’ต] Check the file security
3508
3537
  // TODO: [๐Ÿงน][๐Ÿง ] Delete the file after the scraping is done
3509
3538
  return makeKnowledgeSourceHandler({ name, knowledgeSourceContent: filepath }, tools, {
@@ -6270,7 +6299,22 @@ class WebsiteScraper {
6270
6299
  extension: 'html',
6271
6300
  isVerbose,
6272
6301
  });
6273
- await this.tools.fs.writeFile(cacheFilehandler.filename, html, 'utf-8');
6302
+ // Note: Try to cache the scraped content, but don't fail if the filesystem is read-only
6303
+ try {
6304
+ await this.tools.fs.writeFile(cacheFilehandler.filename, html, 'utf-8');
6305
+ }
6306
+ catch (error) {
6307
+ // Note: If we can't write to cache, we'll continue without caching
6308
+ // This handles read-only filesystems like Vercel
6309
+ if (error instanceof Error && (error.message.includes('EROFS') ||
6310
+ error.message.includes('read-only') ||
6311
+ error.message.includes('EACCES') ||
6312
+ error.message.includes('EPERM'))) ;
6313
+ else {
6314
+ // Re-throw other unexpected errors
6315
+ throw error;
6316
+ }
6317
+ }
6274
6318
  const markdown = this.showdownConverter.makeMarkdown(html, jsdom.window.document);
6275
6319
  return { ...cacheFilehandler, markdown };
6276
6320
  }