@promptbook/markitdown 0.100.0-1 โ 0.100.0-10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/esm/index.es.js +83 -5
- package/esm/index.es.js.map +1 -1
- package/esm/typings/src/config.d.ts +0 -10
- package/esm/typings/src/version.d.ts +1 -1
- package/esm/typings/src/wizard/wizard.d.ts +14 -4
- package/package.json +2 -2
- package/umd/index.umd.js +83 -5
- package/umd/index.umd.js.map +1 -1
- package/esm/typings/src/remote-server/connection-improvements.test.d.ts +0 -1
- package/esm/typings/src/remote-server/utils/connectionProgress.d.ts +0 -72
package/esm/index.es.js
CHANGED
|
@@ -26,7 +26,7 @@ const BOOK_LANGUAGE_VERSION = '1.0.0';
|
|
|
26
26
|
* @generated
|
|
27
27
|
* @see https://github.com/webgptorg/promptbook
|
|
28
28
|
*/
|
|
29
|
-
const PROMPTBOOK_ENGINE_VERSION = '0.100.0-
|
|
29
|
+
const PROMPTBOOK_ENGINE_VERSION = '0.100.0-10';
|
|
30
30
|
/**
|
|
31
31
|
* TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
|
|
32
32
|
* Note: [๐] Ignore a discrepancy between file name and entity name
|
|
@@ -834,7 +834,23 @@ async function getScraperIntermediateSource(source, options) {
|
|
|
834
834
|
.join('/') +
|
|
835
835
|
'.' +
|
|
836
836
|
extension;
|
|
837
|
-
|
|
837
|
+
// Note: Try to create cache directory, but don't fail if filesystem has issues
|
|
838
|
+
try {
|
|
839
|
+
await mkdir(dirname(cacheFilename), { recursive: true });
|
|
840
|
+
}
|
|
841
|
+
catch (error) {
|
|
842
|
+
// Note: If we can't create cache directory, continue without it
|
|
843
|
+
// This handles read-only filesystems, permission issues, and missing parent directories
|
|
844
|
+
if (error instanceof Error && (error.message.includes('EROFS') ||
|
|
845
|
+
error.message.includes('read-only') ||
|
|
846
|
+
error.message.includes('EACCES') ||
|
|
847
|
+
error.message.includes('EPERM') ||
|
|
848
|
+
error.message.includes('ENOENT'))) ;
|
|
849
|
+
else {
|
|
850
|
+
// Re-throw other unexpected errors
|
|
851
|
+
throw error;
|
|
852
|
+
}
|
|
853
|
+
}
|
|
838
854
|
let isDestroyed = true;
|
|
839
855
|
const fileHandler = {
|
|
840
856
|
filename: cacheFilename,
|
|
@@ -3484,12 +3500,58 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
|
3484
3500
|
// <- TODO: [๐ฅฌ] Encapsulate sha256 to some private utility function
|
|
3485
3501
|
const rootDirname = join(process.cwd(), DEFAULT_DOWNLOAD_CACHE_DIRNAME);
|
|
3486
3502
|
const filepath = join(...nameToSubfolderPath(hash /* <- TODO: [๐] Maybe add some SHA256 prefix */), `${basename.substring(0, MAX_FILENAME_LENGTH)}.${mimeTypeToExtension(mimeType)}`);
|
|
3487
|
-
|
|
3503
|
+
// Note: Try to create cache directory, but don't fail if filesystem has issues
|
|
3504
|
+
try {
|
|
3505
|
+
await tools.fs.mkdir(dirname(join(rootDirname, filepath)), { recursive: true });
|
|
3506
|
+
}
|
|
3507
|
+
catch (error) {
|
|
3508
|
+
// Note: If we can't create cache directory, we'll handle it when trying to write the file
|
|
3509
|
+
// This handles read-only filesystems, permission issues, and missing parent directories
|
|
3510
|
+
if (error instanceof Error && (error.message.includes('EROFS') ||
|
|
3511
|
+
error.message.includes('read-only') ||
|
|
3512
|
+
error.message.includes('EACCES') ||
|
|
3513
|
+
error.message.includes('EPERM') ||
|
|
3514
|
+
error.message.includes('ENOENT'))) ;
|
|
3515
|
+
else {
|
|
3516
|
+
// Re-throw other unexpected errors
|
|
3517
|
+
throw error;
|
|
3518
|
+
}
|
|
3519
|
+
}
|
|
3488
3520
|
const fileContent = Buffer.from(await response.arrayBuffer());
|
|
3489
3521
|
if (fileContent.length > DEFAULT_MAX_FILE_SIZE /* <- TODO: Allow to pass different value to remote server */) {
|
|
3490
3522
|
throw new LimitReachedError(`File is too large (${Math.round(fileContent.length / 1024 / 1024)}MB). Maximum allowed size is ${Math.round(DEFAULT_MAX_FILE_SIZE / 1024 / 1024)}MB.`);
|
|
3491
3523
|
}
|
|
3492
|
-
|
|
3524
|
+
// Note: Try to cache the downloaded file, but don't fail if the filesystem is read-only
|
|
3525
|
+
try {
|
|
3526
|
+
await tools.fs.writeFile(join(rootDirname, filepath), fileContent);
|
|
3527
|
+
}
|
|
3528
|
+
catch (error) {
|
|
3529
|
+
// Note: If we can't write to cache, we'll process the file directly from memory
|
|
3530
|
+
// This handles read-only filesystems like Vercel
|
|
3531
|
+
if (error instanceof Error && (error.message.includes('EROFS') ||
|
|
3532
|
+
error.message.includes('read-only') ||
|
|
3533
|
+
error.message.includes('EACCES') ||
|
|
3534
|
+
error.message.includes('EPERM') ||
|
|
3535
|
+
error.message.includes('ENOENT'))) {
|
|
3536
|
+
// Return a handler that works directly with the downloaded content
|
|
3537
|
+
return {
|
|
3538
|
+
source: name,
|
|
3539
|
+
filename: null,
|
|
3540
|
+
url,
|
|
3541
|
+
mimeType,
|
|
3542
|
+
async asJson() {
|
|
3543
|
+
return JSON.parse(fileContent.toString('utf-8'));
|
|
3544
|
+
},
|
|
3545
|
+
async asText() {
|
|
3546
|
+
return fileContent.toString('utf-8');
|
|
3547
|
+
},
|
|
3548
|
+
};
|
|
3549
|
+
}
|
|
3550
|
+
else {
|
|
3551
|
+
// Re-throw other unexpected errors
|
|
3552
|
+
throw error;
|
|
3553
|
+
}
|
|
3554
|
+
}
|
|
3493
3555
|
// TODO: [๐ต] Check the file security
|
|
3494
3556
|
// TODO: [๐งน][๐ง ] Delete the file after the scraping is done
|
|
3495
3557
|
return makeKnowledgeSourceHandler({ name, knowledgeSourceContent: filepath }, tools, {
|
|
@@ -6275,7 +6337,23 @@ class MarkitdownScraper {
|
|
|
6275
6337
|
// <- TODO: [๐] Make MarkitdownError
|
|
6276
6338
|
}
|
|
6277
6339
|
// console.log('!!', { result, cacheFilehandler });
|
|
6278
|
-
|
|
6340
|
+
// Note: Try to cache the converted content, but don't fail if the filesystem is read-only
|
|
6341
|
+
try {
|
|
6342
|
+
await this.tools.fs.writeFile(cacheFilehandler.filename, result.text_content);
|
|
6343
|
+
}
|
|
6344
|
+
catch (error) {
|
|
6345
|
+
// Note: If we can't write to cache, we'll continue without caching
|
|
6346
|
+
// This handles read-only filesystems like Vercel
|
|
6347
|
+
if (error instanceof Error && (error.message.includes('EROFS') ||
|
|
6348
|
+
error.message.includes('read-only') ||
|
|
6349
|
+
error.message.includes('EACCES') ||
|
|
6350
|
+
error.message.includes('EPERM') ||
|
|
6351
|
+
error.message.includes('ENOENT'))) ;
|
|
6352
|
+
else {
|
|
6353
|
+
// Re-throw other unexpected errors
|
|
6354
|
+
throw error;
|
|
6355
|
+
}
|
|
6356
|
+
}
|
|
6279
6357
|
}
|
|
6280
6358
|
return cacheFilehandler;
|
|
6281
6359
|
}
|