@promptbook/website-crawler 0.100.0-0 โ†’ 0.100.0-10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/esm/index.es.js CHANGED
@@ -29,7 +29,7 @@ const BOOK_LANGUAGE_VERSION = '1.0.0';
29
29
  * @generated
30
30
  * @see https://github.com/webgptorg/promptbook
31
31
  */
32
- const PROMPTBOOK_ENGINE_VERSION = '0.100.0-0';
32
+ const PROMPTBOOK_ENGINE_VERSION = '0.100.0-10';
33
33
  /**
34
34
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
35
35
  * Note: [๐Ÿ’ž] Ignore a discrepancy between file name and entity name
@@ -1013,7 +1013,23 @@ async function getScraperIntermediateSource(source, options) {
1013
1013
  .join('/') +
1014
1014
  '.' +
1015
1015
  extension;
1016
- await mkdir(dirname(cacheFilename), { recursive: true });
1016
+ // Note: Try to create cache directory, but don't fail if filesystem has issues
1017
+ try {
1018
+ await mkdir(dirname(cacheFilename), { recursive: true });
1019
+ }
1020
+ catch (error) {
1021
+ // Note: If we can't create cache directory, continue without it
1022
+ // This handles read-only filesystems, permission issues, and missing parent directories
1023
+ if (error instanceof Error && (error.message.includes('EROFS') ||
1024
+ error.message.includes('read-only') ||
1025
+ error.message.includes('EACCES') ||
1026
+ error.message.includes('EPERM') ||
1027
+ error.message.includes('ENOENT'))) ;
1028
+ else {
1029
+ // Re-throw other unexpected errors
1030
+ throw error;
1031
+ }
1032
+ }
1017
1033
  let isDestroyed = true;
1018
1034
  const fileHandler = {
1019
1035
  filename: cacheFilename,
@@ -3498,12 +3514,58 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3498
3514
  // <- TODO: [๐Ÿฅฌ] Encapsulate sha256 to some private utility function
3499
3515
  const rootDirname = join(process.cwd(), DEFAULT_DOWNLOAD_CACHE_DIRNAME);
3500
3516
  const filepath = join(...nameToSubfolderPath(hash /* <- TODO: [๐ŸŽŽ] Maybe add some SHA256 prefix */), `${basename.substring(0, MAX_FILENAME_LENGTH)}.${mimeTypeToExtension(mimeType)}`);
3501
- await tools.fs.mkdir(dirname(join(rootDirname, filepath)), { recursive: true });
3517
+ // Note: Try to create cache directory, but don't fail if filesystem has issues
3518
+ try {
3519
+ await tools.fs.mkdir(dirname(join(rootDirname, filepath)), { recursive: true });
3520
+ }
3521
+ catch (error) {
3522
+ // Note: If we can't create cache directory, we'll handle it when trying to write the file
3523
+ // This handles read-only filesystems, permission issues, and missing parent directories
3524
+ if (error instanceof Error && (error.message.includes('EROFS') ||
3525
+ error.message.includes('read-only') ||
3526
+ error.message.includes('EACCES') ||
3527
+ error.message.includes('EPERM') ||
3528
+ error.message.includes('ENOENT'))) ;
3529
+ else {
3530
+ // Re-throw other unexpected errors
3531
+ throw error;
3532
+ }
3533
+ }
3502
3534
  const fileContent = Buffer.from(await response.arrayBuffer());
3503
3535
  if (fileContent.length > DEFAULT_MAX_FILE_SIZE /* <- TODO: Allow to pass different value to remote server */) {
3504
3536
  throw new LimitReachedError(`File is too large (${Math.round(fileContent.length / 1024 / 1024)}MB). Maximum allowed size is ${Math.round(DEFAULT_MAX_FILE_SIZE / 1024 / 1024)}MB.`);
3505
3537
  }
3506
- await tools.fs.writeFile(join(rootDirname, filepath), fileContent);
3538
+ // Note: Try to cache the downloaded file, but don't fail if the filesystem is read-only
3539
+ try {
3540
+ await tools.fs.writeFile(join(rootDirname, filepath), fileContent);
3541
+ }
3542
+ catch (error) {
3543
+ // Note: If we can't write to cache, we'll process the file directly from memory
3544
+ // This handles read-only filesystems like Vercel
3545
+ if (error instanceof Error && (error.message.includes('EROFS') ||
3546
+ error.message.includes('read-only') ||
3547
+ error.message.includes('EACCES') ||
3548
+ error.message.includes('EPERM') ||
3549
+ error.message.includes('ENOENT'))) {
3550
+ // Return a handler that works directly with the downloaded content
3551
+ return {
3552
+ source: name,
3553
+ filename: null,
3554
+ url,
3555
+ mimeType,
3556
+ async asJson() {
3557
+ return JSON.parse(fileContent.toString('utf-8'));
3558
+ },
3559
+ async asText() {
3560
+ return fileContent.toString('utf-8');
3561
+ },
3562
+ };
3563
+ }
3564
+ else {
3565
+ // Re-throw other unexpected errors
3566
+ throw error;
3567
+ }
3568
+ }
3507
3569
  // TODO: [๐Ÿ’ต] Check the file security
3508
3570
  // TODO: [๐Ÿงน][๐Ÿง ] Delete the file after the scraping is done
3509
3571
  return makeKnowledgeSourceHandler({ name, knowledgeSourceContent: filepath }, tools, {
@@ -6270,7 +6332,23 @@ class WebsiteScraper {
6270
6332
  extension: 'html',
6271
6333
  isVerbose,
6272
6334
  });
6273
- await this.tools.fs.writeFile(cacheFilehandler.filename, html, 'utf-8');
6335
+ // Note: Try to cache the scraped content, but don't fail if the filesystem is read-only
6336
+ try {
6337
+ await this.tools.fs.writeFile(cacheFilehandler.filename, html, 'utf-8');
6338
+ }
6339
+ catch (error) {
6340
+ // Note: If we can't write to cache, we'll continue without caching
6341
+ // This handles read-only filesystems like Vercel
6342
+ if (error instanceof Error && (error.message.includes('EROFS') ||
6343
+ error.message.includes('read-only') ||
6344
+ error.message.includes('EACCES') ||
6345
+ error.message.includes('EPERM') ||
6346
+ error.message.includes('ENOENT'))) ;
6347
+ else {
6348
+ // Re-throw other unexpected errors
6349
+ throw error;
6350
+ }
6351
+ }
6274
6352
  const markdown = this.showdownConverter.makeMarkdown(html, jsdom.window.document);
6275
6353
  return { ...cacheFilehandler, markdown };
6276
6354
  }