@promptbook/markitdown 0.100.0 โ 0.100.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/esm/index.es.js +55 -20
- package/esm/index.es.js.map +1 -1
- package/esm/typings/src/book-2.0/agent-source/parseAgentSource.d.ts +2 -2
- package/esm/typings/src/book-2.0/commitments/_base/CommitmentDefinition.d.ts +2 -2
- package/esm/typings/src/book-2.0/commitments/_misc/AgentModelRequirements.d.ts +2 -2
- package/esm/typings/src/book-2.0/commitments/_misc/AgentSourceParseResult.d.ts +2 -2
- package/esm/typings/src/book-2.0/commitments/_misc/ParsedCommitment.d.ts +2 -2
- package/esm/typings/src/book-components/AvatarProfile/AvatarProfile/AvatarProfile.d.ts +3 -0
- package/esm/typings/src/book-components/BookEditor/BookEditor.d.ts +5 -0
- package/esm/typings/src/book-components/Chat/types/ChatParticipant.d.ts +4 -4
- package/esm/typings/src/execution/LlmExecutionTools.d.ts +3 -6
- package/esm/typings/src/execution/utils/validatePromptResult.d.ts +4 -4
- package/esm/typings/src/llm-providers/_common/profiles/llmProviderProfiles.d.ts +1 -1
- package/esm/typings/src/llm-providers/_common/register/LlmToolsMetadata.d.ts +3 -4
- package/esm/typings/src/utils/color/Color.d.ts +1 -2
- package/esm/typings/src/utils/take/interfaces/ITakeChain.d.ts +2 -2
- package/esm/typings/src/utils/validators/filePath/isValidFilePath.d.ts +1 -1
- package/esm/typings/src/version.d.ts +1 -1
- package/esm/typings/src/wizard/wizard.d.ts +2 -2
- package/package.json +2 -2
- package/umd/index.umd.js +54 -19
- package/umd/index.umd.js.map +1 -1
package/esm/index.es.js
CHANGED
|
@@ -2,7 +2,7 @@ import { mkdir, rm, readFile } from 'fs/promises';
|
|
|
2
2
|
import spaceTrim, { spaceTrim as spaceTrim$1 } from 'spacetrim';
|
|
3
3
|
import { SHA256 } from 'crypto-js';
|
|
4
4
|
import hexEncoder from 'crypto-js/enc-hex';
|
|
5
|
-
import { basename, join, dirname } from 'path';
|
|
5
|
+
import { basename, join, dirname, isAbsolute } from 'path';
|
|
6
6
|
import parserHtml from 'prettier/parser-html';
|
|
7
7
|
import parserMarkdown from 'prettier/parser-markdown';
|
|
8
8
|
import { format } from 'prettier/standalone';
|
|
@@ -27,7 +27,7 @@ const BOOK_LANGUAGE_VERSION = '1.0.0';
|
|
|
27
27
|
* @generated
|
|
28
28
|
* @see https://github.com/webgptorg/promptbook
|
|
29
29
|
*/
|
|
30
|
-
const PROMPTBOOK_ENGINE_VERSION = '0.100.
|
|
30
|
+
const PROMPTBOOK_ENGINE_VERSION = '0.100.2';
|
|
31
31
|
/**
|
|
32
32
|
* TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
|
|
33
33
|
* Note: [๐] Ignore a discrepancy between file name and entity name
|
|
@@ -722,7 +722,7 @@ function removeEmojis(text) {
|
|
|
722
722
|
}
|
|
723
723
|
|
|
724
724
|
/**
|
|
725
|
-
* Tests if given string is valid
|
|
725
|
+
* Tests if given string is valid file path.
|
|
726
726
|
*
|
|
727
727
|
* Note: This does not check if the file exists only if the path is valid
|
|
728
728
|
* @public exported from `@promptbook/utils`
|
|
@@ -734,18 +734,25 @@ function isValidFilePath(filename) {
|
|
|
734
734
|
if (filename.split('\n').length > 1) {
|
|
735
735
|
return false;
|
|
736
736
|
}
|
|
737
|
-
|
|
738
|
-
|
|
737
|
+
// Normalize slashes early so heuristics can detect path-like inputs
|
|
738
|
+
const filenameSlashes = filename.replace(/\\/g, '/');
|
|
739
|
+
// Reject strings that look like sentences (informational text)
|
|
740
|
+
// Heuristic: contains multiple spaces and ends with a period, or contains typical sentence punctuation
|
|
741
|
+
// But skip this heuristic if the string looks like a path (contains '/' or starts with a drive letter)
|
|
742
|
+
if (filename.trim().length > 60 && // long enough to be a sentence
|
|
743
|
+
/[.!?]/.test(filename) && // contains sentence punctuation
|
|
744
|
+
filename.split(' ').length > 8 && // has many words
|
|
745
|
+
!/\/|^[A-Z]:/i.test(filenameSlashes) // do NOT treat as sentence if looks like a path
|
|
746
|
+
) {
|
|
739
747
|
return false;
|
|
740
748
|
}
|
|
741
|
-
const filenameSlashes = filename.split('\\').join('/');
|
|
742
749
|
// Absolute Unix path: /hello.txt
|
|
743
750
|
if (/^(\/)/i.test(filenameSlashes)) {
|
|
744
751
|
// console.log(filename, 'Absolute Unix path: /hello.txt');
|
|
745
752
|
return true;
|
|
746
753
|
}
|
|
747
|
-
// Absolute Windows path:
|
|
748
|
-
if (/^
|
|
754
|
+
// Absolute Windows path: C:/ or C:\ (allow spaces and multiple dots in filename)
|
|
755
|
+
if (/^[A-Z]:\/.+$/i.test(filenameSlashes)) {
|
|
749
756
|
// console.log(filename, 'Absolute Windows path: /hello.txt');
|
|
750
757
|
return true;
|
|
751
758
|
}
|
|
@@ -2990,7 +2997,7 @@ const LLM_PROVIDER_PROFILES = {
|
|
|
2990
2997
|
};
|
|
2991
2998
|
/**
|
|
2992
2999
|
* TODO: Refactor this - each profile must be alongside the provider definition
|
|
2993
|
-
* TODO: Unite `AvatarProfileProps`
|
|
3000
|
+
* TODO: [๐] Unite `AvatarProfileProps`, `ChatParticipant`, `LlmExecutionTools` + `LlmToolsMetadata`
|
|
2994
3001
|
* Note: [๐] Ignore a discrepancy between file name and entity name
|
|
2995
3002
|
*/
|
|
2996
3003
|
|
|
@@ -3627,9 +3634,15 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
|
3627
3634
|
}
|
|
3628
3635
|
if (isValidUrl(knowledgeSourceContent)) {
|
|
3629
3636
|
const url = knowledgeSourceContent;
|
|
3637
|
+
if (isVerbose) {
|
|
3638
|
+
console.info(`๐ [1] "${name}" is available at "${url}"`);
|
|
3639
|
+
}
|
|
3630
3640
|
const response = await fetch(url); // <- TODO: [๐ง ] Scraping and fetch proxy
|
|
3631
3641
|
const mimeType = ((_a = response.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
|
|
3632
3642
|
if (tools.fs === undefined || !url.endsWith('.pdf' /* <- TODO: [๐ต] */)) {
|
|
3643
|
+
if (isVerbose) {
|
|
3644
|
+
console.info(`๐ [2] "${name}" tools.fs is not available or URL is not a PDF.`);
|
|
3645
|
+
}
|
|
3633
3646
|
return {
|
|
3634
3647
|
source: name,
|
|
3635
3648
|
filename: null,
|
|
@@ -3665,13 +3678,17 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
|
3665
3678
|
await tools.fs.mkdir(dirname(join(rootDirname, filepath)), { recursive: true });
|
|
3666
3679
|
}
|
|
3667
3680
|
catch (error) {
|
|
3681
|
+
if (isVerbose) {
|
|
3682
|
+
console.info(`๐ [3] "${name}" error creating cache directory`);
|
|
3683
|
+
}
|
|
3668
3684
|
// Note: If we can't create cache directory, we'll handle it when trying to write the file
|
|
3669
3685
|
// This handles read-only filesystems, permission issues, and missing parent directories
|
|
3670
|
-
if (error instanceof Error &&
|
|
3671
|
-
error.message.includes('
|
|
3672
|
-
|
|
3673
|
-
|
|
3674
|
-
|
|
3686
|
+
if (error instanceof Error &&
|
|
3687
|
+
(error.message.includes('EROFS') ||
|
|
3688
|
+
error.message.includes('read-only') ||
|
|
3689
|
+
error.message.includes('EACCES') ||
|
|
3690
|
+
error.message.includes('EPERM') ||
|
|
3691
|
+
error.message.includes('ENOENT'))) ;
|
|
3675
3692
|
else {
|
|
3676
3693
|
// Re-throw other unexpected errors
|
|
3677
3694
|
throw error;
|
|
@@ -3686,13 +3703,17 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
|
3686
3703
|
await tools.fs.writeFile(join(rootDirname, filepath), fileContent);
|
|
3687
3704
|
}
|
|
3688
3705
|
catch (error) {
|
|
3706
|
+
if (isVerbose) {
|
|
3707
|
+
console.info(`๐ [4] "${name}" error writing cache file`);
|
|
3708
|
+
}
|
|
3689
3709
|
// Note: If we can't write to cache, we'll process the file directly from memory
|
|
3690
3710
|
// This handles read-only filesystems like Vercel
|
|
3691
|
-
if (error instanceof Error &&
|
|
3692
|
-
error.message.includes('
|
|
3693
|
-
|
|
3694
|
-
|
|
3695
|
-
|
|
3711
|
+
if (error instanceof Error &&
|
|
3712
|
+
(error.message.includes('EROFS') ||
|
|
3713
|
+
error.message.includes('read-only') ||
|
|
3714
|
+
error.message.includes('EACCES') ||
|
|
3715
|
+
error.message.includes('EPERM') ||
|
|
3716
|
+
error.message.includes('ENOENT'))) {
|
|
3696
3717
|
// Return a handler that works directly with the downloaded content
|
|
3697
3718
|
return {
|
|
3698
3719
|
source: name,
|
|
@@ -3714,6 +3735,9 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
|
3714
3735
|
}
|
|
3715
3736
|
// TODO: [๐ต] Check the file security
|
|
3716
3737
|
// TODO: [๐งน][๐ง ] Delete the file after the scraping is done
|
|
3738
|
+
if (isVerbose) {
|
|
3739
|
+
console.info(`๐ [5] "${name}" cached at "${join(rootDirname, filepath)}"`);
|
|
3740
|
+
}
|
|
3717
3741
|
return makeKnowledgeSourceHandler({ name, knowledgeSourceContent: filepath }, tools, {
|
|
3718
3742
|
...options,
|
|
3719
3743
|
rootDirname,
|
|
@@ -3728,7 +3752,12 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
|
3728
3752
|
throw new EnvironmentMismatchError('Can not import file knowledge in non-file pipeline');
|
|
3729
3753
|
// <- TODO: [๐ง ] What is the best error type here`
|
|
3730
3754
|
}
|
|
3731
|
-
const filename =
|
|
3755
|
+
const filename = isAbsolute(knowledgeSourceContent)
|
|
3756
|
+
? knowledgeSourceContent
|
|
3757
|
+
: join(rootDirname, knowledgeSourceContent).split('\\').join('/');
|
|
3758
|
+
if (isVerbose) {
|
|
3759
|
+
console.info(`๐ [6] "${name}" is a valid file "${filename}"`);
|
|
3760
|
+
}
|
|
3732
3761
|
const fileExtension = getFileExtension(filename);
|
|
3733
3762
|
const mimeType = extensionToMimeType(fileExtension || '');
|
|
3734
3763
|
if (!(await isFileExisting(filename, tools.fs))) {
|
|
@@ -3770,6 +3799,12 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
|
3770
3799
|
};
|
|
3771
3800
|
}
|
|
3772
3801
|
else {
|
|
3802
|
+
if (isVerbose) {
|
|
3803
|
+
console.info(`๐ [7] "${name}" is just a explicit string text with a knowledge source`);
|
|
3804
|
+
console.info('---');
|
|
3805
|
+
console.info(knowledgeSourceContent);
|
|
3806
|
+
console.info('---');
|
|
3807
|
+
}
|
|
3773
3808
|
return {
|
|
3774
3809
|
source: name,
|
|
3775
3810
|
filename: null,
|