@promptbook/markitdown 0.100.0-1 โ†’ 0.100.0-10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/esm/index.es.js CHANGED
@@ -26,7 +26,7 @@ const BOOK_LANGUAGE_VERSION = '1.0.0';
26
26
  * @generated
27
27
  * @see https://github.com/webgptorg/promptbook
28
28
  */
29
- const PROMPTBOOK_ENGINE_VERSION = '0.100.0-1';
29
+ const PROMPTBOOK_ENGINE_VERSION = '0.100.0-10';
30
30
  /**
31
31
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
32
32
  * Note: [๐Ÿ’ž] Ignore a discrepancy between file name and entity name
@@ -834,7 +834,23 @@ async function getScraperIntermediateSource(source, options) {
834
834
  .join('/') +
835
835
  '.' +
836
836
  extension;
837
- await mkdir(dirname(cacheFilename), { recursive: true });
837
+ // Note: Try to create cache directory, but don't fail if filesystem has issues
838
+ try {
839
+ await mkdir(dirname(cacheFilename), { recursive: true });
840
+ }
841
+ catch (error) {
842
+ // Note: If we can't create cache directory, continue without it
843
+ // This handles read-only filesystems, permission issues, and missing parent directories
844
+ if (error instanceof Error && (error.message.includes('EROFS') ||
845
+ error.message.includes('read-only') ||
846
+ error.message.includes('EACCES') ||
847
+ error.message.includes('EPERM') ||
848
+ error.message.includes('ENOENT'))) ;
849
+ else {
850
+ // Re-throw other unexpected errors
851
+ throw error;
852
+ }
853
+ }
838
854
  let isDestroyed = true;
839
855
  const fileHandler = {
840
856
  filename: cacheFilename,
@@ -3484,12 +3500,58 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3484
3500
  // <- TODO: [๐Ÿฅฌ] Encapsulate sha256 to some private utility function
3485
3501
  const rootDirname = join(process.cwd(), DEFAULT_DOWNLOAD_CACHE_DIRNAME);
3486
3502
  const filepath = join(...nameToSubfolderPath(hash /* <- TODO: [๐ŸŽŽ] Maybe add some SHA256 prefix */), `${basename.substring(0, MAX_FILENAME_LENGTH)}.${mimeTypeToExtension(mimeType)}`);
3487
- await tools.fs.mkdir(dirname(join(rootDirname, filepath)), { recursive: true });
3503
+ // Note: Try to create cache directory, but don't fail if filesystem has issues
3504
+ try {
3505
+ await tools.fs.mkdir(dirname(join(rootDirname, filepath)), { recursive: true });
3506
+ }
3507
+ catch (error) {
3508
+ // Note: If we can't create cache directory, we'll handle it when trying to write the file
3509
+ // This handles read-only filesystems, permission issues, and missing parent directories
3510
+ if (error instanceof Error && (error.message.includes('EROFS') ||
3511
+ error.message.includes('read-only') ||
3512
+ error.message.includes('EACCES') ||
3513
+ error.message.includes('EPERM') ||
3514
+ error.message.includes('ENOENT'))) ;
3515
+ else {
3516
+ // Re-throw other unexpected errors
3517
+ throw error;
3518
+ }
3519
+ }
3488
3520
  const fileContent = Buffer.from(await response.arrayBuffer());
3489
3521
  if (fileContent.length > DEFAULT_MAX_FILE_SIZE /* <- TODO: Allow to pass different value to remote server */) {
3490
3522
  throw new LimitReachedError(`File is too large (${Math.round(fileContent.length / 1024 / 1024)}MB). Maximum allowed size is ${Math.round(DEFAULT_MAX_FILE_SIZE / 1024 / 1024)}MB.`);
3491
3523
  }
3492
- await tools.fs.writeFile(join(rootDirname, filepath), fileContent);
3524
+ // Note: Try to cache the downloaded file, but don't fail if the filesystem is read-only
3525
+ try {
3526
+ await tools.fs.writeFile(join(rootDirname, filepath), fileContent);
3527
+ }
3528
+ catch (error) {
3529
+ // Note: If we can't write to cache, we'll process the file directly from memory
3530
+ // This handles read-only filesystems like Vercel
3531
+ if (error instanceof Error && (error.message.includes('EROFS') ||
3532
+ error.message.includes('read-only') ||
3533
+ error.message.includes('EACCES') ||
3534
+ error.message.includes('EPERM') ||
3535
+ error.message.includes('ENOENT'))) {
3536
+ // Return a handler that works directly with the downloaded content
3537
+ return {
3538
+ source: name,
3539
+ filename: null,
3540
+ url,
3541
+ mimeType,
3542
+ async asJson() {
3543
+ return JSON.parse(fileContent.toString('utf-8'));
3544
+ },
3545
+ async asText() {
3546
+ return fileContent.toString('utf-8');
3547
+ },
3548
+ };
3549
+ }
3550
+ else {
3551
+ // Re-throw other unexpected errors
3552
+ throw error;
3553
+ }
3554
+ }
3493
3555
  // TODO: [๐Ÿ’ต] Check the file security
3494
3556
  // TODO: [๐Ÿงน][๐Ÿง ] Delete the file after the scraping is done
3495
3557
  return makeKnowledgeSourceHandler({ name, knowledgeSourceContent: filepath }, tools, {
@@ -6275,7 +6337,23 @@ class MarkitdownScraper {
6275
6337
  // <- TODO: [๐Ÿ€] Make MarkitdownError
6276
6338
  }
6277
6339
  // console.log('!!', { result, cacheFilehandler });
6278
- await this.tools.fs.writeFile(cacheFilehandler.filename, result.text_content);
6340
+ // Note: Try to cache the converted content, but don't fail if the filesystem is read-only
6341
+ try {
6342
+ await this.tools.fs.writeFile(cacheFilehandler.filename, result.text_content);
6343
+ }
6344
+ catch (error) {
6345
+ // Note: If we can't write to cache, we'll continue without caching
6346
+ // This handles read-only filesystems like Vercel
6347
+ if (error instanceof Error && (error.message.includes('EROFS') ||
6348
+ error.message.includes('read-only') ||
6349
+ error.message.includes('EACCES') ||
6350
+ error.message.includes('EPERM') ||
6351
+ error.message.includes('ENOENT'))) ;
6352
+ else {
6353
+ // Re-throw other unexpected errors
6354
+ throw error;
6355
+ }
6356
+ }
6279
6357
  }
6280
6358
  return cacheFilehandler;
6281
6359
  }