@promptbook/website-crawler 0.100.0-0 โ 0.100.0-10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/esm/index.es.js +83 -5
- package/esm/index.es.js.map +1 -1
- package/esm/typings/src/config.d.ts +0 -10
- package/esm/typings/src/version.d.ts +1 -1
- package/esm/typings/src/wizard/wizard.d.ts +14 -4
- package/package.json +2 -2
- package/umd/index.umd.js +83 -5
- package/umd/index.umd.js.map +1 -1
- package/esm/typings/src/remote-server/connection-improvements.test.d.ts +0 -1
- package/esm/typings/src/remote-server/utils/connectionProgress.d.ts +0 -72
package/esm/index.es.js
CHANGED
|
@@ -29,7 +29,7 @@ const BOOK_LANGUAGE_VERSION = '1.0.0';
|
|
|
29
29
|
* @generated
|
|
30
30
|
* @see https://github.com/webgptorg/promptbook
|
|
31
31
|
*/
|
|
32
|
-
const PROMPTBOOK_ENGINE_VERSION = '0.100.0-
|
|
32
|
+
const PROMPTBOOK_ENGINE_VERSION = '0.100.0-10';
|
|
33
33
|
/**
|
|
34
34
|
* TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
|
|
35
35
|
* Note: [๐] Ignore a discrepancy between file name and entity name
|
|
@@ -1013,7 +1013,23 @@ async function getScraperIntermediateSource(source, options) {
|
|
|
1013
1013
|
.join('/') +
|
|
1014
1014
|
'.' +
|
|
1015
1015
|
extension;
|
|
1016
|
-
|
|
1016
|
+
// Note: Try to create cache directory, but don't fail if filesystem has issues
|
|
1017
|
+
try {
|
|
1018
|
+
await mkdir(dirname(cacheFilename), { recursive: true });
|
|
1019
|
+
}
|
|
1020
|
+
catch (error) {
|
|
1021
|
+
// Note: If we can't create cache directory, continue without it
|
|
1022
|
+
// This handles read-only filesystems, permission issues, and missing parent directories
|
|
1023
|
+
if (error instanceof Error && (error.message.includes('EROFS') ||
|
|
1024
|
+
error.message.includes('read-only') ||
|
|
1025
|
+
error.message.includes('EACCES') ||
|
|
1026
|
+
error.message.includes('EPERM') ||
|
|
1027
|
+
error.message.includes('ENOENT'))) ;
|
|
1028
|
+
else {
|
|
1029
|
+
// Re-throw other unexpected errors
|
|
1030
|
+
throw error;
|
|
1031
|
+
}
|
|
1032
|
+
}
|
|
1017
1033
|
let isDestroyed = true;
|
|
1018
1034
|
const fileHandler = {
|
|
1019
1035
|
filename: cacheFilename,
|
|
@@ -3498,12 +3514,58 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
|
3498
3514
|
// <- TODO: [๐ฅฌ] Encapsulate sha256 to some private utility function
|
|
3499
3515
|
const rootDirname = join(process.cwd(), DEFAULT_DOWNLOAD_CACHE_DIRNAME);
|
|
3500
3516
|
const filepath = join(...nameToSubfolderPath(hash /* <- TODO: [๐] Maybe add some SHA256 prefix */), `${basename.substring(0, MAX_FILENAME_LENGTH)}.${mimeTypeToExtension(mimeType)}`);
|
|
3501
|
-
|
|
3517
|
+
// Note: Try to create cache directory, but don't fail if filesystem has issues
|
|
3518
|
+
try {
|
|
3519
|
+
await tools.fs.mkdir(dirname(join(rootDirname, filepath)), { recursive: true });
|
|
3520
|
+
}
|
|
3521
|
+
catch (error) {
|
|
3522
|
+
// Note: If we can't create cache directory, we'll handle it when trying to write the file
|
|
3523
|
+
// This handles read-only filesystems, permission issues, and missing parent directories
|
|
3524
|
+
if (error instanceof Error && (error.message.includes('EROFS') ||
|
|
3525
|
+
error.message.includes('read-only') ||
|
|
3526
|
+
error.message.includes('EACCES') ||
|
|
3527
|
+
error.message.includes('EPERM') ||
|
|
3528
|
+
error.message.includes('ENOENT'))) ;
|
|
3529
|
+
else {
|
|
3530
|
+
// Re-throw other unexpected errors
|
|
3531
|
+
throw error;
|
|
3532
|
+
}
|
|
3533
|
+
}
|
|
3502
3534
|
const fileContent = Buffer.from(await response.arrayBuffer());
|
|
3503
3535
|
if (fileContent.length > DEFAULT_MAX_FILE_SIZE /* <- TODO: Allow to pass different value to remote server */) {
|
|
3504
3536
|
throw new LimitReachedError(`File is too large (${Math.round(fileContent.length / 1024 / 1024)}MB). Maximum allowed size is ${Math.round(DEFAULT_MAX_FILE_SIZE / 1024 / 1024)}MB.`);
|
|
3505
3537
|
}
|
|
3506
|
-
|
|
3538
|
+
// Note: Try to cache the downloaded file, but don't fail if the filesystem is read-only
|
|
3539
|
+
try {
|
|
3540
|
+
await tools.fs.writeFile(join(rootDirname, filepath), fileContent);
|
|
3541
|
+
}
|
|
3542
|
+
catch (error) {
|
|
3543
|
+
// Note: If we can't write to cache, we'll process the file directly from memory
|
|
3544
|
+
// This handles read-only filesystems like Vercel
|
|
3545
|
+
if (error instanceof Error && (error.message.includes('EROFS') ||
|
|
3546
|
+
error.message.includes('read-only') ||
|
|
3547
|
+
error.message.includes('EACCES') ||
|
|
3548
|
+
error.message.includes('EPERM') ||
|
|
3549
|
+
error.message.includes('ENOENT'))) {
|
|
3550
|
+
// Return a handler that works directly with the downloaded content
|
|
3551
|
+
return {
|
|
3552
|
+
source: name,
|
|
3553
|
+
filename: null,
|
|
3554
|
+
url,
|
|
3555
|
+
mimeType,
|
|
3556
|
+
async asJson() {
|
|
3557
|
+
return JSON.parse(fileContent.toString('utf-8'));
|
|
3558
|
+
},
|
|
3559
|
+
async asText() {
|
|
3560
|
+
return fileContent.toString('utf-8');
|
|
3561
|
+
},
|
|
3562
|
+
};
|
|
3563
|
+
}
|
|
3564
|
+
else {
|
|
3565
|
+
// Re-throw other unexpected errors
|
|
3566
|
+
throw error;
|
|
3567
|
+
}
|
|
3568
|
+
}
|
|
3507
3569
|
// TODO: [๐ต] Check the file security
|
|
3508
3570
|
// TODO: [๐งน][๐ง ] Delete the file after the scraping is done
|
|
3509
3571
|
return makeKnowledgeSourceHandler({ name, knowledgeSourceContent: filepath }, tools, {
|
|
@@ -6270,7 +6332,23 @@ class WebsiteScraper {
|
|
|
6270
6332
|
extension: 'html',
|
|
6271
6333
|
isVerbose,
|
|
6272
6334
|
});
|
|
6273
|
-
|
|
6335
|
+
// Note: Try to cache the scraped content, but don't fail if the filesystem is read-only
|
|
6336
|
+
try {
|
|
6337
|
+
await this.tools.fs.writeFile(cacheFilehandler.filename, html, 'utf-8');
|
|
6338
|
+
}
|
|
6339
|
+
catch (error) {
|
|
6340
|
+
// Note: If we can't write to cache, we'll continue without caching
|
|
6341
|
+
// This handles read-only filesystems like Vercel
|
|
6342
|
+
if (error instanceof Error && (error.message.includes('EROFS') ||
|
|
6343
|
+
error.message.includes('read-only') ||
|
|
6344
|
+
error.message.includes('EACCES') ||
|
|
6345
|
+
error.message.includes('EPERM') ||
|
|
6346
|
+
error.message.includes('ENOENT'))) ;
|
|
6347
|
+
else {
|
|
6348
|
+
// Re-throw other unexpected errors
|
|
6349
|
+
throw error;
|
|
6350
|
+
}
|
|
6351
|
+
}
|
|
6274
6352
|
const markdown = this.showdownConverter.makeMarkdown(html, jsdom.window.document);
|
|
6275
6353
|
return { ...cacheFilehandler, markdown };
|
|
6276
6354
|
}
|