@promptbook/website-crawler 0.100.0 โ 0.100.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/esm/index.es.js +55 -20
- package/esm/index.es.js.map +1 -1
- package/esm/typings/src/book-2.0/agent-source/parseAgentSource.d.ts +2 -2
- package/esm/typings/src/book-2.0/commitments/_base/CommitmentDefinition.d.ts +2 -2
- package/esm/typings/src/book-2.0/commitments/_misc/AgentModelRequirements.d.ts +2 -2
- package/esm/typings/src/book-2.0/commitments/_misc/AgentSourceParseResult.d.ts +2 -2
- package/esm/typings/src/book-2.0/commitments/_misc/ParsedCommitment.d.ts +2 -2
- package/esm/typings/src/book-components/AvatarProfile/AvatarProfile/AvatarProfile.d.ts +3 -0
- package/esm/typings/src/book-components/BookEditor/BookEditor.d.ts +5 -0
- package/esm/typings/src/book-components/Chat/types/ChatParticipant.d.ts +4 -4
- package/esm/typings/src/execution/LlmExecutionTools.d.ts +3 -6
- package/esm/typings/src/execution/utils/validatePromptResult.d.ts +4 -4
- package/esm/typings/src/llm-providers/_common/profiles/llmProviderProfiles.d.ts +1 -1
- package/esm/typings/src/llm-providers/_common/register/LlmToolsMetadata.d.ts +3 -4
- package/esm/typings/src/utils/color/Color.d.ts +1 -2
- package/esm/typings/src/utils/take/interfaces/ITakeChain.d.ts +2 -2
- package/esm/typings/src/utils/validators/filePath/isValidFilePath.d.ts +1 -1
- package/esm/typings/src/version.d.ts +1 -1
- package/esm/typings/src/wizard/wizard.d.ts +2 -2
- package/package.json +2 -2
- package/umd/index.umd.js +54 -19
- package/umd/index.umd.js.map +1 -1
package/esm/index.es.js
CHANGED
|
@@ -4,7 +4,7 @@ import { JSDOM } from 'jsdom';
|
|
|
4
4
|
import { SHA256 } from 'crypto-js';
|
|
5
5
|
import hexEncoder from 'crypto-js/enc-hex';
|
|
6
6
|
import { mkdir, rm } from 'fs/promises';
|
|
7
|
-
import { basename, join, dirname } from 'path';
|
|
7
|
+
import { basename, join, dirname, isAbsolute } from 'path';
|
|
8
8
|
import parserHtml from 'prettier/parser-html';
|
|
9
9
|
import parserMarkdown from 'prettier/parser-markdown';
|
|
10
10
|
import { format } from 'prettier/standalone';
|
|
@@ -30,7 +30,7 @@ const BOOK_LANGUAGE_VERSION = '1.0.0';
|
|
|
30
30
|
* @generated
|
|
31
31
|
* @see https://github.com/webgptorg/promptbook
|
|
32
32
|
*/
|
|
33
|
-
const PROMPTBOOK_ENGINE_VERSION = '0.100.
|
|
33
|
+
const PROMPTBOOK_ENGINE_VERSION = '0.100.2';
|
|
34
34
|
/**
|
|
35
35
|
* TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
|
|
36
36
|
* Note: [๐] Ignore a discrepancy between file name and entity name
|
|
@@ -901,7 +901,7 @@ function removeEmojis(text) {
|
|
|
901
901
|
}
|
|
902
902
|
|
|
903
903
|
/**
|
|
904
|
-
* Tests if given string is valid
|
|
904
|
+
* Tests if given string is valid file path.
|
|
905
905
|
*
|
|
906
906
|
* Note: This does not check if the file exists only if the path is valid
|
|
907
907
|
* @public exported from `@promptbook/utils`
|
|
@@ -913,18 +913,25 @@ function isValidFilePath(filename) {
|
|
|
913
913
|
if (filename.split('\n').length > 1) {
|
|
914
914
|
return false;
|
|
915
915
|
}
|
|
916
|
-
|
|
917
|
-
|
|
916
|
+
// Normalize slashes early so heuristics can detect path-like inputs
|
|
917
|
+
const filenameSlashes = filename.replace(/\\/g, '/');
|
|
918
|
+
// Reject strings that look like sentences (informational text)
|
|
919
|
+
// Heuristic: contains multiple spaces and ends with a period, or contains typical sentence punctuation
|
|
920
|
+
// But skip this heuristic if the string looks like a path (contains '/' or starts with a drive letter)
|
|
921
|
+
if (filename.trim().length > 60 && // long enough to be a sentence
|
|
922
|
+
/[.!?]/.test(filename) && // contains sentence punctuation
|
|
923
|
+
filename.split(' ').length > 8 && // has many words
|
|
924
|
+
!/\/|^[A-Z]:/i.test(filenameSlashes) // do NOT treat as sentence if looks like a path
|
|
925
|
+
) {
|
|
918
926
|
return false;
|
|
919
927
|
}
|
|
920
|
-
const filenameSlashes = filename.split('\\').join('/');
|
|
921
928
|
// Absolute Unix path: /hello.txt
|
|
922
929
|
if (/^(\/)/i.test(filenameSlashes)) {
|
|
923
930
|
// console.log(filename, 'Absolute Unix path: /hello.txt');
|
|
924
931
|
return true;
|
|
925
932
|
}
|
|
926
|
-
// Absolute Windows path:
|
|
927
|
-
if (/^
|
|
933
|
+
// Absolute Windows path: C:/ or C:\ (allow spaces and multiple dots in filename)
|
|
934
|
+
if (/^[A-Z]:\/.+$/i.test(filenameSlashes)) {
|
|
928
935
|
// console.log(filename, 'Absolute Windows path: /hello.txt');
|
|
929
936
|
return true;
|
|
930
937
|
}
|
|
@@ -3119,7 +3126,7 @@ const LLM_PROVIDER_PROFILES = {
|
|
|
3119
3126
|
};
|
|
3120
3127
|
/**
|
|
3121
3128
|
* TODO: Refactor this - each profile must be alongside the provider definition
|
|
3122
|
-
* TODO: Unite `AvatarProfileProps`
|
|
3129
|
+
* TODO: [๐] Unite `AvatarProfileProps`, `ChatParticipant`, `LlmExecutionTools` + `LlmToolsMetadata`
|
|
3123
3130
|
* Note: [๐] Ignore a discrepancy between file name and entity name
|
|
3124
3131
|
*/
|
|
3125
3132
|
|
|
@@ -3641,9 +3648,15 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
|
3641
3648
|
}
|
|
3642
3649
|
if (isValidUrl(knowledgeSourceContent)) {
|
|
3643
3650
|
const url = knowledgeSourceContent;
|
|
3651
|
+
if (isVerbose) {
|
|
3652
|
+
console.info(`๐ [1] "${name}" is available at "${url}"`);
|
|
3653
|
+
}
|
|
3644
3654
|
const response = await fetch(url); // <- TODO: [๐ง ] Scraping and fetch proxy
|
|
3645
3655
|
const mimeType = ((_a = response.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
|
|
3646
3656
|
if (tools.fs === undefined || !url.endsWith('.pdf' /* <- TODO: [๐ต] */)) {
|
|
3657
|
+
if (isVerbose) {
|
|
3658
|
+
console.info(`๐ [2] "${name}" tools.fs is not available or URL is not a PDF.`);
|
|
3659
|
+
}
|
|
3647
3660
|
return {
|
|
3648
3661
|
source: name,
|
|
3649
3662
|
filename: null,
|
|
@@ -3679,13 +3692,17 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
|
3679
3692
|
await tools.fs.mkdir(dirname(join(rootDirname, filepath)), { recursive: true });
|
|
3680
3693
|
}
|
|
3681
3694
|
catch (error) {
|
|
3695
|
+
if (isVerbose) {
|
|
3696
|
+
console.info(`๐ [3] "${name}" error creating cache directory`);
|
|
3697
|
+
}
|
|
3682
3698
|
// Note: If we can't create cache directory, we'll handle it when trying to write the file
|
|
3683
3699
|
// This handles read-only filesystems, permission issues, and missing parent directories
|
|
3684
|
-
if (error instanceof Error &&
|
|
3685
|
-
error.message.includes('
|
|
3686
|
-
|
|
3687
|
-
|
|
3688
|
-
|
|
3700
|
+
if (error instanceof Error &&
|
|
3701
|
+
(error.message.includes('EROFS') ||
|
|
3702
|
+
error.message.includes('read-only') ||
|
|
3703
|
+
error.message.includes('EACCES') ||
|
|
3704
|
+
error.message.includes('EPERM') ||
|
|
3705
|
+
error.message.includes('ENOENT'))) ;
|
|
3689
3706
|
else {
|
|
3690
3707
|
// Re-throw other unexpected errors
|
|
3691
3708
|
throw error;
|
|
@@ -3700,13 +3717,17 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
|
3700
3717
|
await tools.fs.writeFile(join(rootDirname, filepath), fileContent);
|
|
3701
3718
|
}
|
|
3702
3719
|
catch (error) {
|
|
3720
|
+
if (isVerbose) {
|
|
3721
|
+
console.info(`๐ [4] "${name}" error writing cache file`);
|
|
3722
|
+
}
|
|
3703
3723
|
// Note: If we can't write to cache, we'll process the file directly from memory
|
|
3704
3724
|
// This handles read-only filesystems like Vercel
|
|
3705
|
-
if (error instanceof Error &&
|
|
3706
|
-
error.message.includes('
|
|
3707
|
-
|
|
3708
|
-
|
|
3709
|
-
|
|
3725
|
+
if (error instanceof Error &&
|
|
3726
|
+
(error.message.includes('EROFS') ||
|
|
3727
|
+
error.message.includes('read-only') ||
|
|
3728
|
+
error.message.includes('EACCES') ||
|
|
3729
|
+
error.message.includes('EPERM') ||
|
|
3730
|
+
error.message.includes('ENOENT'))) {
|
|
3710
3731
|
// Return a handler that works directly with the downloaded content
|
|
3711
3732
|
return {
|
|
3712
3733
|
source: name,
|
|
@@ -3728,6 +3749,9 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
|
3728
3749
|
}
|
|
3729
3750
|
// TODO: [๐ต] Check the file security
|
|
3730
3751
|
// TODO: [๐งน][๐ง ] Delete the file after the scraping is done
|
|
3752
|
+
if (isVerbose) {
|
|
3753
|
+
console.info(`๐ [5] "${name}" cached at "${join(rootDirname, filepath)}"`);
|
|
3754
|
+
}
|
|
3731
3755
|
return makeKnowledgeSourceHandler({ name, knowledgeSourceContent: filepath }, tools, {
|
|
3732
3756
|
...options,
|
|
3733
3757
|
rootDirname,
|
|
@@ -3742,7 +3766,12 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
|
3742
3766
|
throw new EnvironmentMismatchError('Can not import file knowledge in non-file pipeline');
|
|
3743
3767
|
// <- TODO: [๐ง ] What is the best error type here`
|
|
3744
3768
|
}
|
|
3745
|
-
const filename =
|
|
3769
|
+
const filename = isAbsolute(knowledgeSourceContent)
|
|
3770
|
+
? knowledgeSourceContent
|
|
3771
|
+
: join(rootDirname, knowledgeSourceContent).split('\\').join('/');
|
|
3772
|
+
if (isVerbose) {
|
|
3773
|
+
console.info(`๐ [6] "${name}" is a valid file "${filename}"`);
|
|
3774
|
+
}
|
|
3746
3775
|
const fileExtension = getFileExtension(filename);
|
|
3747
3776
|
const mimeType = extensionToMimeType(fileExtension || '');
|
|
3748
3777
|
if (!(await isFileExisting(filename, tools.fs))) {
|
|
@@ -3784,6 +3813,12 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
|
3784
3813
|
};
|
|
3785
3814
|
}
|
|
3786
3815
|
else {
|
|
3816
|
+
if (isVerbose) {
|
|
3817
|
+
console.info(`๐ [7] "${name}" is just a explicit string text with a knowledge source`);
|
|
3818
|
+
console.info('---');
|
|
3819
|
+
console.info(knowledgeSourceContent);
|
|
3820
|
+
console.info('---');
|
|
3821
|
+
}
|
|
3787
3822
|
return {
|
|
3788
3823
|
source: name,
|
|
3789
3824
|
filename: null,
|