@promptbook/pdf 0.92.0-9 → 0.93.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +140 -88
- package/esm/index.es.js +589 -408
- package/esm/index.es.js.map +1 -1
- package/esm/typings/src/_packages/browser.index.d.ts +2 -0
- package/esm/typings/src/_packages/core.index.d.ts +26 -14
- package/esm/typings/src/_packages/types.index.d.ts +6 -2
- package/esm/typings/src/collection/PipelineCollection.d.ts +0 -2
- package/esm/typings/src/collection/SimplePipelineCollection.d.ts +1 -1
- package/esm/typings/src/commands/FOREACH/ForeachJson.d.ts +6 -6
- package/esm/typings/src/commands/FOREACH/foreachCommandParser.d.ts +0 -2
- package/esm/typings/src/commands/FORMFACTOR/formfactorCommandParser.d.ts +1 -1
- package/esm/typings/src/commands/_BOILERPLATE/boilerplateCommandParser.d.ts +1 -1
- package/esm/typings/src/commands/_common/types/CommandParser.d.ts +36 -28
- package/esm/typings/src/config.d.ts +41 -11
- package/esm/typings/src/constants.d.ts +43 -2
- package/esm/typings/src/conversion/parsePipeline.d.ts +2 -2
- package/esm/typings/src/errors/0-BoilerplateError.d.ts +2 -2
- package/esm/typings/src/errors/CollectionError.d.ts +1 -1
- package/esm/typings/src/executables/$provideExecutablesForNode.d.ts +1 -1
- package/esm/typings/src/executables/apps/locateLibreoffice.d.ts +2 -1
- package/esm/typings/src/executables/apps/locatePandoc.d.ts +2 -1
- package/esm/typings/src/executables/locateApp.d.ts +2 -2
- package/esm/typings/src/executables/platforms/locateAppOnLinux.d.ts +2 -1
- package/esm/typings/src/executables/platforms/locateAppOnMacOs.d.ts +2 -1
- package/esm/typings/src/executables/platforms/locateAppOnWindows.d.ts +2 -1
- package/esm/typings/src/execution/AbstractTaskResult.d.ts +1 -1
- package/esm/typings/src/execution/CommonToolsOptions.d.ts +3 -3
- package/esm/typings/src/execution/ExecutionTask.d.ts +19 -1
- package/esm/typings/src/execution/LlmExecutionToolsConstructor.d.ts +2 -1
- package/esm/typings/src/execution/PipelineExecutorResult.d.ts +4 -2
- package/esm/typings/src/execution/PromptbookFetch.d.ts +1 -1
- package/esm/typings/src/execution/ScriptExecutionTools.d.ts +1 -1
- package/esm/typings/src/execution/createPipelineExecutor/$OngoingTaskResult.d.ts +12 -9
- package/esm/typings/src/execution/createPipelineExecutor/10-executePipeline.d.ts +13 -10
- package/esm/typings/src/execution/createPipelineExecutor/20-executeTask.d.ts +12 -9
- package/esm/typings/src/execution/createPipelineExecutor/30-executeFormatSubvalues.d.ts +15 -3
- package/esm/typings/src/execution/createPipelineExecutor/40-executeAttempts.d.ts +21 -15
- package/esm/typings/src/execution/createPipelineExecutor/computeCosineSimilarity.d.ts +13 -0
- package/esm/typings/src/execution/createPipelineExecutor/filterJustOutputParameters.d.ts +7 -6
- package/esm/typings/src/execution/createPipelineExecutor/getContextForTask.d.ts +5 -1
- package/esm/typings/src/execution/createPipelineExecutor/getExamplesForTask.d.ts +1 -1
- package/esm/typings/src/execution/createPipelineExecutor/getKnowledgeForTask.d.ts +12 -9
- package/esm/typings/src/execution/createPipelineExecutor/getReservedParametersForTask.d.ts +18 -5
- package/esm/typings/src/execution/createPipelineExecutor/knowledgePiecesToString.d.ts +9 -0
- package/esm/typings/src/execution/execution-report/ExecutionReportJson.d.ts +1 -1
- package/esm/typings/src/execution/execution-report/ExecutionReportString.d.ts +1 -1
- package/esm/typings/src/execution/translation/automatic-translate/automatic-translators/LindatAutomaticTranslator.d.ts +4 -4
- package/esm/typings/src/execution/utils/checkExpectations.d.ts +3 -3
- package/esm/typings/src/execution/utils/uncertainNumber.d.ts +3 -2
- package/esm/typings/src/execution/utils/usageToWorktime.d.ts +1 -1
- package/esm/typings/src/formats/_common/{FormatDefinition.d.ts → FormatParser.d.ts} +8 -6
- package/esm/typings/src/formats/_common/FormatSubvalueParser.d.ts +66 -0
- package/esm/typings/src/formats/csv/CsvFormatParser.d.ts +17 -0
- package/esm/typings/src/formats/csv/CsvSettings.d.ts +2 -2
- package/esm/typings/src/formats/csv/utils/csvParse.d.ts +12 -0
- package/esm/typings/src/formats/csv/utils/isValidCsvString.d.ts +1 -1
- package/esm/typings/src/formats/index.d.ts +2 -2
- package/esm/typings/src/formats/json/JsonFormatParser.d.ts +19 -0
- package/esm/typings/src/formats/json/utils/isValidJsonString.d.ts +1 -1
- package/esm/typings/src/formats/json/utils/jsonParse.d.ts +0 -3
- package/esm/typings/src/formats/text/{TextFormatDefinition.d.ts → TextFormatParser.d.ts} +7 -7
- package/esm/typings/src/formats/xml/XmlFormatParser.d.ts +19 -0
- package/esm/typings/src/formats/xml/utils/isValidXmlString.d.ts +1 -1
- package/esm/typings/src/formfactors/_boilerplate/BoilerplateFormfactorDefinition.d.ts +3 -2
- package/esm/typings/src/formfactors/_common/AbstractFormfactorDefinition.d.ts +16 -7
- package/esm/typings/src/formfactors/_common/FormfactorDefinition.d.ts +3 -1
- package/esm/typings/src/formfactors/_common/string_formfactor_name.d.ts +2 -1
- package/esm/typings/src/formfactors/chatbot/ChatbotFormfactorDefinition.d.ts +2 -2
- package/esm/typings/src/formfactors/completion/CompletionFormfactorDefinition.d.ts +29 -0
- package/esm/typings/src/formfactors/generator/GeneratorFormfactorDefinition.d.ts +2 -1
- package/esm/typings/src/formfactors/generic/GenericFormfactorDefinition.d.ts +2 -2
- package/esm/typings/src/formfactors/index.d.ts +33 -8
- package/esm/typings/src/formfactors/matcher/MatcherFormfactorDefinition.d.ts +4 -2
- package/esm/typings/src/formfactors/sheets/SheetsFormfactorDefinition.d.ts +3 -2
- package/esm/typings/src/formfactors/translator/TranslatorFormfactorDefinition.d.ts +3 -2
- package/esm/typings/src/high-level-abstractions/index.d.ts +2 -2
- package/esm/typings/src/llm-providers/_common/register/$llmToolsMetadataRegister.d.ts +3 -3
- package/esm/typings/src/llm-providers/_common/register/$llmToolsRegister.d.ts +3 -3
- package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsConfigurationFromEnv.d.ts +4 -4
- package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsForTestingAndScriptsAndPlayground.d.ts +4 -3
- package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsFromEnv.d.ts +18 -5
- package/esm/typings/src/llm-providers/_common/register/LlmToolsConfiguration.d.ts +11 -4
- package/esm/typings/src/llm-providers/_common/register/LlmToolsMetadata.d.ts +21 -42
- package/esm/typings/src/llm-providers/_common/register/LlmToolsOptions.d.ts +9 -2
- package/esm/typings/src/llm-providers/_common/register/createLlmToolsFromConfiguration.d.ts +13 -4
- package/esm/typings/src/llm-providers/_common/utils/cache/CacheItem.d.ts +10 -5
- package/esm/typings/src/llm-providers/_common/utils/cache/CacheLlmToolsOptions.d.ts +11 -3
- package/esm/typings/src/llm-providers/_common/utils/cache/cacheLlmTools.d.ts +3 -3
- package/esm/typings/src/llm-providers/_common/utils/count-total-usage/limitTotalUsage.d.ts +5 -5
- package/esm/typings/src/llm-providers/anthropic-claude/AnthropicClaudeExecutionTools.d.ts +6 -0
- package/esm/typings/src/llm-providers/anthropic-claude/anthropic-claude-models.d.ts +1 -1
- package/esm/typings/src/llm-providers/azure-openai/AzureOpenAiExecutionTools.d.ts +1 -1
- package/esm/typings/src/llm-providers/azure-openai/AzureOpenAiExecutionToolsOptions.d.ts +4 -4
- package/esm/typings/src/llm-providers/deepseek/deepseek-models.d.ts +1 -1
- package/esm/typings/src/llm-providers/google/google-models.d.ts +1 -1
- package/esm/typings/src/llm-providers/openai/OpenAiAssistantExecutionTools.d.ts +1 -1
- package/esm/typings/src/llm-providers/openai/openai-models.d.ts +1 -1
- package/esm/typings/src/llm-providers/openai/register-configuration.d.ts +2 -2
- package/esm/typings/src/llm-providers/openai/register-constructor.d.ts +2 -2
- package/esm/typings/src/migrations/migratePipeline.d.ts +9 -0
- package/esm/typings/src/other/templates/getBookTemplates.d.ts +2 -2
- package/esm/typings/src/pipeline/PipelineInterface/PipelineInterface.d.ts +3 -3
- package/esm/typings/src/pipeline/PipelineInterface/constants.d.ts +1 -1
- package/esm/typings/src/pipeline/PipelineInterface/getPipelineInterface.d.ts +1 -1
- package/esm/typings/src/pipeline/PipelineInterface/isPipelineImplementingInterface.d.ts +5 -4
- package/esm/typings/src/pipeline/PipelineInterface/isPipelineInterfacesEqual.d.ts +1 -1
- package/esm/typings/src/pipeline/PipelineJson/CommonTaskJson.d.ts +10 -7
- package/esm/typings/src/pipeline/PipelineJson/ParameterJson.d.ts +1 -1
- package/esm/typings/src/pipeline/PipelineJson/PipelineJson.d.ts +3 -2
- package/esm/typings/src/pipeline/PipelineString.d.ts +3 -1
- package/esm/typings/src/pipeline/book-notation.d.ts +2 -2
- package/esm/typings/src/postprocessing/utils/extractBlock.d.ts +1 -1
- package/esm/typings/src/postprocessing/utils/extractJsonBlock.d.ts +2 -2
- package/esm/typings/src/prepare/prepareTasks.d.ts +8 -5
- package/esm/typings/src/remote-server/openapi.d.ts +1 -1
- package/esm/typings/src/remote-server/socket-types/listModels/PromptbookServer_ListModels_Response.d.ts +1 -1
- package/esm/typings/src/remote-server/startRemoteServer.d.ts +1 -1
- package/esm/typings/src/remote-server/types/RemoteServerOptions.d.ts +7 -6
- package/esm/typings/src/scrapers/_boilerplate/BoilerplateScraper.d.ts +3 -3
- package/esm/typings/src/scrapers/_boilerplate/createBoilerplateScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/_boilerplate/register-metadata.d.ts +1 -1
- package/esm/typings/src/scrapers/_common/Converter.d.ts +3 -1
- package/esm/typings/src/scrapers/_common/Scraper.d.ts +4 -3
- package/esm/typings/src/scrapers/_common/ScraperIntermediateSource.d.ts +4 -2
- package/esm/typings/src/scrapers/_common/prepareKnowledgePieces.d.ts +2 -2
- package/esm/typings/src/scrapers/_common/register/$provideFilesystemForNode.d.ts +2 -1
- package/esm/typings/src/scrapers/_common/register/$provideScrapersForBrowser.d.ts +6 -3
- package/esm/typings/src/scrapers/_common/register/$provideScrapersForNode.d.ts +3 -5
- package/esm/typings/src/scrapers/_common/register/$scrapersMetadataRegister.d.ts +3 -3
- package/esm/typings/src/scrapers/_common/register/$scrapersRegister.d.ts +3 -2
- package/esm/typings/src/scrapers/_common/register/ScraperAndConverterMetadata.d.ts +8 -5
- package/esm/typings/src/scrapers/_common/register/ScraperConstructor.d.ts +2 -1
- package/esm/typings/src/scrapers/_common/utils/getScraperIntermediateSource.d.ts +6 -5
- package/esm/typings/src/scrapers/_common/utils/makeKnowledgeSourceHandler.d.ts +3 -1
- package/esm/typings/src/scrapers/document/createDocumentScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/document-legacy/createLegacyDocumentScraper.d.ts +2 -1
- package/esm/typings/src/scrapers/markdown/createMarkdownScraper.d.ts +4 -1
- package/esm/typings/src/scrapers/markitdown/MarkitdownScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/pdf/createPdfScraper.d.ts +2 -1
- package/esm/typings/src/scrapers/website/createWebsiteScraper.d.ts +3 -4
- package/esm/typings/src/scripting/javascript/JavascriptExecutionTools.d.ts +1 -1
- package/esm/typings/src/scripting/javascript/postprocessing-functions.d.ts +5 -1
- package/esm/typings/src/scripting/javascript/utils/preserve.d.ts +1 -1
- package/esm/typings/src/storage/_common/PromptbookStorage.d.ts +1 -1
- package/esm/typings/src/storage/file-cache-storage/FileCacheStorage.d.ts +12 -5
- package/esm/typings/src/storage/file-cache-storage/FileCacheStorageOptions.d.ts +4 -2
- package/esm/typings/src/storage/file-cache-storage/utils/nameToSubfolderPath.d.ts +2 -1
- package/esm/typings/src/storage/local-storage/getIndexedDbStorage.d.ts +11 -0
- package/esm/typings/src/storage/local-storage/utils/IndexedDbStorageOptions.d.ts +14 -0
- package/esm/typings/src/storage/local-storage/utils/makePromptbookStorageFromIndexedDb.d.ts +8 -0
- package/esm/typings/src/storage/local-storage/utils/makePromptbookStorageFromWebStorage.d.ts +2 -1
- package/esm/typings/src/types/IntermediateFilesStrategy.d.ts +2 -1
- package/esm/typings/src/types/ModelRequirements.d.ts +2 -2
- package/esm/typings/src/types/ModelVariant.d.ts +5 -5
- package/esm/typings/src/types/typeAliases.d.ts +22 -19
- package/esm/typings/src/utils/$Register.d.ts +8 -7
- package/esm/typings/src/utils/editable/edit-pipeline-string/addPipelineCommand.d.ts +2 -2
- package/esm/typings/src/utils/editable/edit-pipeline-string/deflatePipeline.d.ts +4 -1
- package/esm/typings/src/utils/editable/utils/isFlatPipeline.d.ts +2 -1
- package/esm/typings/src/utils/editable/utils/stringifyPipelineJson.d.ts +1 -1
- package/esm/typings/src/utils/environment/$getGlobalScope.d.ts +2 -1
- package/esm/typings/src/utils/expectation-counters/index.d.ts +1 -1
- package/esm/typings/src/utils/markdown/extractAllBlocksFromMarkdown.d.ts +2 -2
- package/esm/typings/src/utils/markdown/extractAllListItemsFromMarkdown.d.ts +1 -1
- package/esm/typings/src/utils/markdown/extractOneBlockFromMarkdown.d.ts +2 -2
- package/esm/typings/src/utils/normalization/nameToUriPart.d.ts +4 -4
- package/esm/typings/src/utils/normalization/nameToUriParts.d.ts +4 -4
- package/esm/typings/src/utils/normalization/normalize-to-kebab-case.d.ts +3 -3
- package/esm/typings/src/utils/normalization/normalizeTo_SCREAMING_CASE.d.ts +3 -3
- package/esm/typings/src/utils/normalization/normalizeTo_camelCase.d.ts +4 -4
- package/esm/typings/src/utils/normalization/normalizeTo_snake_case.d.ts +3 -3
- package/esm/typings/src/utils/normalization/removeDiacritics.d.ts +3 -3
- package/esm/typings/src/utils/normalization/searchKeywords.d.ts +4 -1
- package/esm/typings/src/utils/normalization/titleToName.d.ts +4 -4
- package/esm/typings/src/utils/organization/TODO_USE.d.ts +1 -1
- package/esm/typings/src/utils/organization/empty_object.d.ts +2 -2
- package/esm/typings/src/utils/organization/just.d.ts +1 -1
- package/esm/typings/src/utils/organization/just_empty_object.d.ts +4 -4
- package/esm/typings/src/utils/organization/keepUnused.d.ts +1 -1
- package/esm/typings/src/utils/parameters/mapAvailableToExpectedParameters.d.ts +7 -7
- package/esm/typings/src/utils/removeQuotes.d.ts +2 -2
- package/esm/typings/src/utils/serialization/clonePipeline.d.ts +4 -3
- package/esm/typings/src/utils/serialization/deepClone.d.ts +5 -1
- package/esm/typings/src/utils/trimCodeBlock.d.ts +1 -1
- package/esm/typings/src/utils/trimEndOfCodeBlock.d.ts +1 -1
- package/esm/typings/src/utils/unwrapResult.d.ts +2 -2
- package/esm/typings/src/utils/validators/javascriptName/isValidJavascriptName.d.ts +3 -3
- package/esm/typings/src/utils/validators/parameterName/validateParameterName.d.ts +5 -4
- package/esm/typings/src/utils/validators/semanticVersion/isValidPromptbookVersion.d.ts +1 -1
- package/esm/typings/src/utils/validators/semanticVersion/isValidSemanticVersion.d.ts +1 -1
- package/esm/typings/src/utils/validators/url/isHostnameOnPrivateNetwork.d.ts +1 -1
- package/esm/typings/src/utils/validators/url/isUrlOnPrivateNetwork.d.ts +1 -1
- package/esm/typings/src/utils/validators/url/isValidPipelineUrl.d.ts +1 -1
- package/esm/typings/src/utils/validators/url/isValidUrl.d.ts +1 -1
- package/esm/typings/src/version.d.ts +2 -1
- package/esm/typings/src/wizzard/wizzard.d.ts +1 -1
- package/package.json +15 -3
- package/umd/index.umd.js +589 -408
- package/umd/index.umd.js.map +1 -1
- package/esm/typings/src/formats/_common/FormatSubvalueDefinition.d.ts +0 -31
- package/esm/typings/src/formats/csv/CsvFormatDefinition.d.ts +0 -17
- package/esm/typings/src/formats/json/JsonFormatDefinition.d.ts +0 -19
- package/esm/typings/src/formats/xml/XmlFormatDefinition.d.ts +0 -19
- /package/esm/typings/src/llm-providers/{multiple → _multiple}/MultipleLlmExecutionTools.d.ts +0 -0
- /package/esm/typings/src/llm-providers/{multiple → _multiple}/joinLlmExecutionTools.d.ts +0 -0
- /package/esm/typings/src/llm-providers/{multiple → _multiple}/playground/playground.d.ts +0 -0
package/esm/index.es.js
CHANGED
|
@@ -26,7 +26,7 @@ const BOOK_LANGUAGE_VERSION = '1.0.0';
|
|
|
26
26
|
* @generated
|
|
27
27
|
* @see https://github.com/webgptorg/promptbook
|
|
28
28
|
*/
|
|
29
|
-
const PROMPTBOOK_ENGINE_VERSION = '0.
|
|
29
|
+
const PROMPTBOOK_ENGINE_VERSION = '0.93.0';
|
|
30
30
|
/**
|
|
31
31
|
* TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
|
|
32
32
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
@@ -36,7 +36,7 @@ const PROMPTBOOK_ENGINE_VERSION = '0.92.0-9';
|
|
|
36
36
|
* Just says that the variable is not used but should be kept
|
|
37
37
|
* No side effects.
|
|
38
38
|
*
|
|
39
|
-
* Note: It can be
|
|
39
|
+
* Note: It can be useful for:
|
|
40
40
|
*
|
|
41
41
|
* 1) Suppressing eager optimization of unused imports
|
|
42
42
|
* 2) Suppressing eslint errors of unused variables in the tests
|
|
@@ -53,7 +53,7 @@ function keepUnused(...valuesToKeep) {
|
|
|
53
53
|
* Returns the same value that is passed as argument.
|
|
54
54
|
* No side effects.
|
|
55
55
|
*
|
|
56
|
-
* Note: It can be
|
|
56
|
+
* Note: It can be useful for:
|
|
57
57
|
*
|
|
58
58
|
* 1) Leveling indentation
|
|
59
59
|
* 2) Putting always-true or always-false conditions without getting eslint errors
|
|
@@ -102,6 +102,21 @@ const DEFAULT_BOOK_TITLE = `✨ Untitled Book`;
|
|
|
102
102
|
* @public exported from `@promptbook/core`
|
|
103
103
|
*/
|
|
104
104
|
const DEFAULT_MAX_FILE_SIZE = 100 * 1024 * 1024; // 100MB
|
|
105
|
+
/**
|
|
106
|
+
* Threshold value that determines when a dataset is considered "big"
|
|
107
|
+
* and may require special handling or optimizations
|
|
108
|
+
*
|
|
109
|
+
* For example, when error occurs in one item of the big dataset, it will not fail the whole pipeline
|
|
110
|
+
*
|
|
111
|
+
* @public exported from `@promptbook/core`
|
|
112
|
+
*/
|
|
113
|
+
const BIG_DATASET_TRESHOLD = 50;
|
|
114
|
+
/**
|
|
115
|
+
* Placeholder text used to represent a placeholder value of failed operation
|
|
116
|
+
*
|
|
117
|
+
* @public exported from `@promptbook/core`
|
|
118
|
+
*/
|
|
119
|
+
const FAILED_VALUE_PLACEHOLDER = '!?';
|
|
105
120
|
// <- TODO: [🧠] Better system for generator warnings - not always "code" and "by `@promptbook/cli`"
|
|
106
121
|
/**
|
|
107
122
|
* The maximum number of iterations for a loops
|
|
@@ -181,7 +196,7 @@ const DEFAULT_DOWNLOAD_CACHE_DIRNAME = './.promptbook/download-cache';
|
|
|
181
196
|
const DEFAULT_SCRAPE_CACHE_DIRNAME = './.promptbook/scrape-cache';
|
|
182
197
|
// <- TODO: [🧜♂️]
|
|
183
198
|
/**
|
|
184
|
-
*
|
|
199
|
+
* Default settings for parsing and generating CSV files in Promptbook.
|
|
185
200
|
*
|
|
186
201
|
* @public exported from `@promptbook/core`
|
|
187
202
|
*/
|
|
@@ -192,19 +207,19 @@ const DEFAULT_CSV_SETTINGS = Object.freeze({
|
|
|
192
207
|
skipEmptyLines: true,
|
|
193
208
|
});
|
|
194
209
|
/**
|
|
195
|
-
*
|
|
210
|
+
* Controls whether verbose logging is enabled by default throughout the application.
|
|
196
211
|
*
|
|
197
212
|
* @public exported from `@promptbook/core`
|
|
198
213
|
*/
|
|
199
214
|
let DEFAULT_IS_VERBOSE = false;
|
|
200
215
|
/**
|
|
201
|
-
*
|
|
216
|
+
* Controls whether auto-installation of dependencies is enabled by default.
|
|
202
217
|
*
|
|
203
218
|
* @public exported from `@promptbook/core`
|
|
204
219
|
*/
|
|
205
220
|
const DEFAULT_IS_AUTO_INSTALLED = false;
|
|
206
221
|
/**
|
|
207
|
-
*
|
|
222
|
+
* Indicates whether pipeline logic validation is enabled. When true, the pipeline logic is checked for consistency.
|
|
208
223
|
*
|
|
209
224
|
* @private within the repository
|
|
210
225
|
*/
|
|
@@ -298,7 +313,7 @@ class UnexpectedError extends Error {
|
|
|
298
313
|
${block(message)}
|
|
299
314
|
|
|
300
315
|
Note: This error should not happen.
|
|
301
|
-
It's
|
|
316
|
+
It's probably a bug in the pipeline collection
|
|
302
317
|
|
|
303
318
|
Please report issue:
|
|
304
319
|
${block(getErrorReportUrl(new Error(message)).href)}
|
|
@@ -355,7 +370,8 @@ async function isFileExisting(filename, fs) {
|
|
|
355
370
|
*/
|
|
356
371
|
|
|
357
372
|
/**
|
|
358
|
-
*
|
|
373
|
+
* Converts a name to a properly formatted subfolder path for cache storage.
|
|
374
|
+
* Handles normalization and path formatting to create consistent cache directory structures.
|
|
359
375
|
*
|
|
360
376
|
* @private for `FileCacheStorage`
|
|
361
377
|
*/
|
|
@@ -608,10 +624,10 @@ for (let i = 0; i < defaultDiacriticsRemovalMap.length; i++) {
|
|
|
608
624
|
*/
|
|
609
625
|
|
|
610
626
|
/**
|
|
611
|
-
*
|
|
627
|
+
* Removes diacritic marks (accents) from characters in a string.
|
|
612
628
|
*
|
|
613
|
-
* @param input
|
|
614
|
-
* @returns
|
|
629
|
+
* @param input The string containing diacritics to be normalized.
|
|
630
|
+
* @returns The string with diacritics removed or normalized.
|
|
615
631
|
* @public exported from `@promptbook/utils`
|
|
616
632
|
*/
|
|
617
633
|
function removeDiacritics(input) {
|
|
@@ -625,10 +641,10 @@ function removeDiacritics(input) {
|
|
|
625
641
|
*/
|
|
626
642
|
|
|
627
643
|
/**
|
|
628
|
-
*
|
|
644
|
+
* Converts a given text to kebab-case format.
|
|
629
645
|
*
|
|
630
|
-
* @param text
|
|
631
|
-
* @returns
|
|
646
|
+
* @param text The text to be converted.
|
|
647
|
+
* @returns The kebab-case formatted string.
|
|
632
648
|
* @example 'hello-world'
|
|
633
649
|
* @example 'i-love-promptbook'
|
|
634
650
|
* @public exported from `@promptbook/utils`
|
|
@@ -744,7 +760,7 @@ function isValidFilePath(filename) {
|
|
|
744
760
|
* Tests if given string is valid URL.
|
|
745
761
|
*
|
|
746
762
|
* Note: Dataurl are considered perfectly valid.
|
|
747
|
-
* Note: There are two
|
|
763
|
+
* Note: There are two similar functions:
|
|
748
764
|
* - `isValidUrl` which tests any URL
|
|
749
765
|
* - `isValidPipelineUrl` *(this one)* which tests just promptbook URL
|
|
750
766
|
*
|
|
@@ -770,11 +786,11 @@ function isValidUrl(url) {
|
|
|
770
786
|
}
|
|
771
787
|
|
|
772
788
|
/**
|
|
773
|
-
*
|
|
789
|
+
* Converts a title string into a normalized name.
|
|
774
790
|
*
|
|
775
|
-
* @param value
|
|
776
|
-
* @returns
|
|
777
|
-
* @example
|
|
791
|
+
* @param value The title string to be converted to a name.
|
|
792
|
+
* @returns A normalized name derived from the input title.
|
|
793
|
+
* @example 'Hello World!' -> 'hello-world'
|
|
778
794
|
* @public exported from `@promptbook/utils`
|
|
779
795
|
*/
|
|
780
796
|
function titleToName(value) {
|
|
@@ -797,7 +813,7 @@ function titleToName(value) {
|
|
|
797
813
|
* Just marks a place of place where should be something implemented
|
|
798
814
|
* No side effects.
|
|
799
815
|
*
|
|
800
|
-
* Note: It can be
|
|
816
|
+
* Note: It can be useful suppressing eslint errors of unused variables
|
|
801
817
|
*
|
|
802
818
|
* @param value any values
|
|
803
819
|
* @returns void
|
|
@@ -807,9 +823,8 @@ function TODO_USE(...value) {
|
|
|
807
823
|
}
|
|
808
824
|
|
|
809
825
|
/**
|
|
810
|
-
*
|
|
811
|
-
*
|
|
812
|
-
* Note: It also checks if directory exists and creates it if not
|
|
826
|
+
* Retrieves an intermediate source for a scraper based on the knowledge source.
|
|
827
|
+
* Manages the caching and retrieval of intermediate scraper results for optimized performance.
|
|
813
828
|
*
|
|
814
829
|
* @private as internal utility for scrapers
|
|
815
830
|
*/
|
|
@@ -945,7 +960,7 @@ function assertsError(whatWasThrown) {
|
|
|
945
960
|
* Function isValidJsonString will tell you if the string is valid JSON or not
|
|
946
961
|
*
|
|
947
962
|
* @param value The string to check
|
|
948
|
-
* @returns
|
|
963
|
+
* @returns `true` if the string is a valid JSON string, false otherwise
|
|
949
964
|
*
|
|
950
965
|
* @public exported from `@promptbook/utils`
|
|
951
966
|
*/
|
|
@@ -1057,7 +1072,7 @@ function pipelineJsonToString(pipelineJson) {
|
|
|
1057
1072
|
if (bookVersion !== `undefined`) {
|
|
1058
1073
|
commands.push(`BOOK VERSION ${bookVersion}`);
|
|
1059
1074
|
}
|
|
1060
|
-
// TODO: [main] !!5 This increases size of the bundle and is
|
|
1075
|
+
// TODO: [main] !!5 This increases size of the bundle and is probably not necessary
|
|
1061
1076
|
pipelineString = prettifyMarkdown(pipelineString);
|
|
1062
1077
|
for (const parameter of parameters.filter(({ isInput }) => isInput)) {
|
|
1063
1078
|
commands.push(`INPUT PARAMETER ${taskParameterJsonToString(parameter)}`);
|
|
@@ -1356,8 +1371,12 @@ function checkSerializableAsJson(options) {
|
|
|
1356
1371
|
*/
|
|
1357
1372
|
|
|
1358
1373
|
/**
|
|
1359
|
-
*
|
|
1374
|
+
* Creates a deep clone of the given object
|
|
1360
1375
|
*
|
|
1376
|
+
* Note: This method only works for objects that are fully serializable to JSON and do not contain functions, Dates, or special types.
|
|
1377
|
+
*
|
|
1378
|
+
* @param objectValue The object to clone.
|
|
1379
|
+
* @returns A deep, writable clone of the input object.
|
|
1361
1380
|
* @public exported from `@promptbook/utils`
|
|
1362
1381
|
*/
|
|
1363
1382
|
function deepClone(objectValue) {
|
|
@@ -1439,13 +1458,13 @@ const ORDER_OF_PIPELINE_JSON = [
|
|
|
1439
1458
|
*/
|
|
1440
1459
|
const REPLACING_NONCE = 'ptbkauk42kV2dzao34faw7FudQUHYPtW';
|
|
1441
1460
|
/**
|
|
1442
|
-
*
|
|
1461
|
+
* Placeholder value indicating a parameter is missing its value.
|
|
1443
1462
|
*
|
|
1444
1463
|
* @private within the repository
|
|
1445
1464
|
*/
|
|
1446
1465
|
const RESERVED_PARAMETER_MISSING_VALUE = 'MISSING-' + REPLACING_NONCE;
|
|
1447
1466
|
/**
|
|
1448
|
-
*
|
|
1467
|
+
* Placeholder value indicating a parameter is restricted and cannot be used directly.
|
|
1449
1468
|
*
|
|
1450
1469
|
* @private within the repository
|
|
1451
1470
|
*/
|
|
@@ -1490,7 +1509,7 @@ class PipelineLogicError extends Error {
|
|
|
1490
1509
|
/**
|
|
1491
1510
|
* Tests if given string is valid semantic version
|
|
1492
1511
|
*
|
|
1493
|
-
* Note: There are two
|
|
1512
|
+
* Note: There are two similar functions:
|
|
1494
1513
|
* - `isValidSemanticVersion` which tests any semantic version
|
|
1495
1514
|
* - `isValidPromptbookVersion` *(this one)* which tests just Promptbook versions
|
|
1496
1515
|
*
|
|
@@ -1512,7 +1531,7 @@ function isValidSemanticVersion(version) {
|
|
|
1512
1531
|
*
|
|
1513
1532
|
* @see https://www.npmjs.com/package/promptbook?activeTab=versions
|
|
1514
1533
|
* Note: When you are using for example promptbook 2.0.0 and there already is promptbook 3.0.0 it don`t know about it.
|
|
1515
|
-
* Note: There are two
|
|
1534
|
+
* Note: There are two similar functions:
|
|
1516
1535
|
* - `isValidSemanticVersion` which tests any semantic version
|
|
1517
1536
|
* - `isValidPromptbookVersion` *(this one)* which tests just Promptbook versions
|
|
1518
1537
|
*
|
|
@@ -1532,7 +1551,7 @@ function isValidPromptbookVersion(version) {
|
|
|
1532
1551
|
/**
|
|
1533
1552
|
* Tests if given string is valid pipeline URL URL.
|
|
1534
1553
|
*
|
|
1535
|
-
* Note: There are two
|
|
1554
|
+
* Note: There are two similar functions:
|
|
1536
1555
|
* - `isValidUrl` which tests any URL
|
|
1537
1556
|
* - `isValidPipelineUrl` *(this one)* which tests just pipeline URL
|
|
1538
1557
|
*
|
|
@@ -1629,7 +1648,7 @@ function validatePipeline_InnerFunction(pipeline) {
|
|
|
1629
1648
|
${block(pipelineIdentification)}
|
|
1630
1649
|
`));
|
|
1631
1650
|
}
|
|
1632
|
-
// TODO: [🧠] Maybe do here some
|
|
1651
|
+
// TODO: [🧠] Maybe do here some proper JSON-schema / ZOD checking
|
|
1633
1652
|
if (!Array.isArray(pipeline.parameters)) {
|
|
1634
1653
|
// TODO: [🧠] what is the correct error tp throw - maybe PromptbookSchemaError
|
|
1635
1654
|
throw new ParseError(spaceTrim$1((block) => `
|
|
@@ -1640,7 +1659,7 @@ function validatePipeline_InnerFunction(pipeline) {
|
|
|
1640
1659
|
${block(pipelineIdentification)}
|
|
1641
1660
|
`));
|
|
1642
1661
|
}
|
|
1643
|
-
// TODO: [🧠] Maybe do here some
|
|
1662
|
+
// TODO: [🧠] Maybe do here some proper JSON-schema / ZOD checking
|
|
1644
1663
|
if (!Array.isArray(pipeline.tasks)) {
|
|
1645
1664
|
// TODO: [🧠] what is the correct error tp throw - maybe PromptbookSchemaError
|
|
1646
1665
|
throw new ParseError(spaceTrim$1((block) => `
|
|
@@ -1944,7 +1963,7 @@ class SimplePipelineCollection {
|
|
|
1944
1963
|
/**
|
|
1945
1964
|
* Constructs a pipeline collection from pipelines
|
|
1946
1965
|
*
|
|
1947
|
-
* @param pipelines
|
|
1966
|
+
* @param pipelines Array of pipeline JSON objects to include in the collection
|
|
1948
1967
|
*
|
|
1949
1968
|
* Note: During the construction logic of all pipelines are validated
|
|
1950
1969
|
* Note: It is not recommended to use this constructor directly, use `createCollectionFromJson` *(or other variant)* instead
|
|
@@ -2058,7 +2077,7 @@ class MissingToolsError extends Error {
|
|
|
2058
2077
|
super(spaceTrim$1((block) => `
|
|
2059
2078
|
${block(message)}
|
|
2060
2079
|
|
|
2061
|
-
Note: You have
|
|
2080
|
+
Note: You have probably forgot to provide some tools for pipeline execution or preparation
|
|
2062
2081
|
|
|
2063
2082
|
`));
|
|
2064
2083
|
this.name = 'MissingToolsError';
|
|
@@ -2092,7 +2111,7 @@ class PipelineExecutionError extends Error {
|
|
|
2092
2111
|
super(message);
|
|
2093
2112
|
this.name = 'PipelineExecutionError';
|
|
2094
2113
|
// TODO: [🐙] DRY - Maybe $randomId
|
|
2095
|
-
this.id = `error-${$randomToken(8 /* <- TODO: To global config + Use Base58 to avoid
|
|
2114
|
+
this.id = `error-${$randomToken(8 /* <- TODO: To global config + Use Base58 to avoid similar char conflicts */)}`;
|
|
2096
2115
|
Object.setPrototypeOf(this, PipelineExecutionError.prototype);
|
|
2097
2116
|
}
|
|
2098
2117
|
}
|
|
@@ -2108,15 +2127,18 @@ class PipelineExecutionError extends Error {
|
|
|
2108
2127
|
* @public exported from `@promptbook/core`
|
|
2109
2128
|
*/
|
|
2110
2129
|
function isPipelinePrepared(pipeline) {
|
|
2111
|
-
// Note: Ignoring `pipeline.preparations`
|
|
2112
|
-
// Note: Ignoring `pipeline.knowledgePieces`
|
|
2130
|
+
// Note: Ignoring `pipeline.preparations`
|
|
2131
|
+
// Note: Ignoring `pipeline.knowledgePieces`
|
|
2113
2132
|
if (pipeline.title === undefined || pipeline.title === '' || pipeline.title === DEFAULT_BOOK_TITLE) {
|
|
2133
|
+
// console.log('Pipeline is not prepared because title is undefined or empty', pipeline);
|
|
2114
2134
|
return false;
|
|
2115
2135
|
}
|
|
2116
2136
|
if (!pipeline.personas.every((persona) => persona.modelsRequirements !== undefined)) {
|
|
2137
|
+
// console.log('Pipeline is not prepared because personas are not prepared', pipeline.personas);
|
|
2117
2138
|
return false;
|
|
2118
2139
|
}
|
|
2119
2140
|
if (!pipeline.knowledgeSources.every((knowledgeSource) => knowledgeSource.preparationIds !== undefined)) {
|
|
2141
|
+
//console.log('Pipeline is not prepared because knowledge sources are not prepared', pipeline.knowledgeSources);
|
|
2120
2142
|
return false;
|
|
2121
2143
|
}
|
|
2122
2144
|
/*
|
|
@@ -2137,75 +2159,6 @@ function isPipelinePrepared(pipeline) {
|
|
|
2137
2159
|
* - [♨] Are tasks prepared
|
|
2138
2160
|
*/
|
|
2139
2161
|
|
|
2140
|
-
/**
|
|
2141
|
-
* Converts a JavaScript Object Notation (JSON) string into an object.
|
|
2142
|
-
*
|
|
2143
|
-
* Note: This is wrapper around `JSON.parse()` with better error and type handling
|
|
2144
|
-
*
|
|
2145
|
-
* @public exported from `@promptbook/utils`
|
|
2146
|
-
*/
|
|
2147
|
-
function jsonParse(value) {
|
|
2148
|
-
if (value === undefined) {
|
|
2149
|
-
throw new Error(`Can not parse JSON from undefined value.`);
|
|
2150
|
-
}
|
|
2151
|
-
else if (typeof value !== 'string') {
|
|
2152
|
-
console.error('Can not parse JSON from non-string value.', { text: value });
|
|
2153
|
-
throw new Error(spaceTrim(`
|
|
2154
|
-
Can not parse JSON from non-string value.
|
|
2155
|
-
|
|
2156
|
-
The value type: ${typeof value}
|
|
2157
|
-
See more in console.
|
|
2158
|
-
`));
|
|
2159
|
-
}
|
|
2160
|
-
try {
|
|
2161
|
-
return JSON.parse(value);
|
|
2162
|
-
}
|
|
2163
|
-
catch (error) {
|
|
2164
|
-
if (!(error instanceof Error)) {
|
|
2165
|
-
throw error;
|
|
2166
|
-
}
|
|
2167
|
-
throw new Error(spaceTrim((block) => `
|
|
2168
|
-
${block(error.message)}
|
|
2169
|
-
|
|
2170
|
-
The JSON text:
|
|
2171
|
-
${block(value)}
|
|
2172
|
-
`));
|
|
2173
|
-
}
|
|
2174
|
-
}
|
|
2175
|
-
/**
|
|
2176
|
-
* TODO: !!!! Use in Promptbook.studio
|
|
2177
|
-
*/
|
|
2178
|
-
|
|
2179
|
-
/**
|
|
2180
|
-
* Recursively converts JSON strings to JSON objects
|
|
2181
|
-
|
|
2182
|
-
* @public exported from `@promptbook/utils`
|
|
2183
|
-
*/
|
|
2184
|
-
function jsonStringsToJsons(object) {
|
|
2185
|
-
if (object === null) {
|
|
2186
|
-
return object;
|
|
2187
|
-
}
|
|
2188
|
-
if (Array.isArray(object)) {
|
|
2189
|
-
return object.map(jsonStringsToJsons);
|
|
2190
|
-
}
|
|
2191
|
-
if (typeof object !== 'object') {
|
|
2192
|
-
return object;
|
|
2193
|
-
}
|
|
2194
|
-
const newObject = { ...object };
|
|
2195
|
-
for (const [key, value] of Object.entries(object)) {
|
|
2196
|
-
if (typeof value === 'string' && isValidJsonString(value)) {
|
|
2197
|
-
newObject[key] = jsonParse(value);
|
|
2198
|
-
}
|
|
2199
|
-
else {
|
|
2200
|
-
newObject[key] = jsonStringsToJsons(value);
|
|
2201
|
-
}
|
|
2202
|
-
}
|
|
2203
|
-
return newObject;
|
|
2204
|
-
}
|
|
2205
|
-
/**
|
|
2206
|
-
* TODO: Type the return type correctly
|
|
2207
|
-
*/
|
|
2208
|
-
|
|
2209
2162
|
/**
|
|
2210
2163
|
* This error indicates problems parsing the format value
|
|
2211
2164
|
*
|
|
@@ -2250,7 +2203,7 @@ class AuthenticationError extends Error {
|
|
|
2250
2203
|
}
|
|
2251
2204
|
|
|
2252
2205
|
/**
|
|
2253
|
-
* This error indicates that the pipeline collection cannot be
|
|
2206
|
+
* This error indicates that the pipeline collection cannot be properly loaded
|
|
2254
2207
|
*
|
|
2255
2208
|
* @public exported from `@promptbook/core`
|
|
2256
2209
|
*/
|
|
@@ -2389,6 +2342,101 @@ const ALL_ERRORS = {
|
|
|
2389
2342
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
2390
2343
|
*/
|
|
2391
2344
|
|
|
2345
|
+
/**
|
|
2346
|
+
* Serializes an error into a [🚉] JSON-serializable object
|
|
2347
|
+
*
|
|
2348
|
+
* @public exported from `@promptbook/utils`
|
|
2349
|
+
*/
|
|
2350
|
+
function serializeError(error) {
|
|
2351
|
+
const { name, message, stack } = error;
|
|
2352
|
+
const { id } = error;
|
|
2353
|
+
if (!Object.keys(ALL_ERRORS).includes(name)) {
|
|
2354
|
+
console.error(spaceTrim((block) => `
|
|
2355
|
+
|
|
2356
|
+
Cannot serialize error with name "${name}"
|
|
2357
|
+
|
|
2358
|
+
Authors of Promptbook probably forgot to add this error into the list of errors:
|
|
2359
|
+
https://github.com/webgptorg/promptbook/blob/main/src/errors/0-index.ts
|
|
2360
|
+
|
|
2361
|
+
|
|
2362
|
+
${block(stack || message)}
|
|
2363
|
+
|
|
2364
|
+
`));
|
|
2365
|
+
}
|
|
2366
|
+
return {
|
|
2367
|
+
name: name,
|
|
2368
|
+
message,
|
|
2369
|
+
stack,
|
|
2370
|
+
id, // Include id in the serialized object
|
|
2371
|
+
};
|
|
2372
|
+
}
|
|
2373
|
+
|
|
2374
|
+
/**
|
|
2375
|
+
* Converts a JavaScript Object Notation (JSON) string into an object.
|
|
2376
|
+
*
|
|
2377
|
+
* Note: This is wrapper around `JSON.parse()` with better error and type handling
|
|
2378
|
+
*
|
|
2379
|
+
* @public exported from `@promptbook/utils`
|
|
2380
|
+
*/
|
|
2381
|
+
function jsonParse(value) {
|
|
2382
|
+
if (value === undefined) {
|
|
2383
|
+
throw new Error(`Can not parse JSON from undefined value.`);
|
|
2384
|
+
}
|
|
2385
|
+
else if (typeof value !== 'string') {
|
|
2386
|
+
console.error('Can not parse JSON from non-string value.', { text: value });
|
|
2387
|
+
throw new Error(spaceTrim(`
|
|
2388
|
+
Can not parse JSON from non-string value.
|
|
2389
|
+
|
|
2390
|
+
The value type: ${typeof value}
|
|
2391
|
+
See more in console.
|
|
2392
|
+
`));
|
|
2393
|
+
}
|
|
2394
|
+
try {
|
|
2395
|
+
return JSON.parse(value);
|
|
2396
|
+
}
|
|
2397
|
+
catch (error) {
|
|
2398
|
+
if (!(error instanceof Error)) {
|
|
2399
|
+
throw error;
|
|
2400
|
+
}
|
|
2401
|
+
throw new Error(spaceTrim((block) => `
|
|
2402
|
+
${block(error.message)}
|
|
2403
|
+
|
|
2404
|
+
The JSON text:
|
|
2405
|
+
${block(value)}
|
|
2406
|
+
`));
|
|
2407
|
+
}
|
|
2408
|
+
}
|
|
2409
|
+
|
|
2410
|
+
/**
|
|
2411
|
+
* Recursively converts JSON strings to JSON objects
|
|
2412
|
+
|
|
2413
|
+
* @public exported from `@promptbook/utils`
|
|
2414
|
+
*/
|
|
2415
|
+
function jsonStringsToJsons(object) {
|
|
2416
|
+
if (object === null) {
|
|
2417
|
+
return object;
|
|
2418
|
+
}
|
|
2419
|
+
if (Array.isArray(object)) {
|
|
2420
|
+
return object.map(jsonStringsToJsons);
|
|
2421
|
+
}
|
|
2422
|
+
if (typeof object !== 'object') {
|
|
2423
|
+
return object;
|
|
2424
|
+
}
|
|
2425
|
+
const newObject = { ...object };
|
|
2426
|
+
for (const [key, value] of Object.entries(object)) {
|
|
2427
|
+
if (typeof value === 'string' && isValidJsonString(value)) {
|
|
2428
|
+
newObject[key] = jsonParse(value);
|
|
2429
|
+
}
|
|
2430
|
+
else {
|
|
2431
|
+
newObject[key] = jsonStringsToJsons(value);
|
|
2432
|
+
}
|
|
2433
|
+
}
|
|
2434
|
+
return newObject;
|
|
2435
|
+
}
|
|
2436
|
+
/**
|
|
2437
|
+
* TODO: Type the return type correctly
|
|
2438
|
+
*/
|
|
2439
|
+
|
|
2392
2440
|
/**
|
|
2393
2441
|
* Deserializes the error object
|
|
2394
2442
|
*
|
|
@@ -2463,8 +2511,9 @@ function assertsTaskSuccessful(executionResult) {
|
|
|
2463
2511
|
*/
|
|
2464
2512
|
function createTask(options) {
|
|
2465
2513
|
const { taskType, taskProcessCallback } = options;
|
|
2514
|
+
let { title } = options;
|
|
2466
2515
|
// TODO: [🐙] DRY
|
|
2467
|
-
const taskId = `${taskType.toLowerCase().substring(0, 4)}-${$randomToken(8 /* <- TODO: To global config + Use Base58 to avoid
|
|
2516
|
+
const taskId = `${taskType.toLowerCase().substring(0, 4)}-${$randomToken(8 /* <- TODO: To global config + Use Base58 to avoid similar char conflicts */)}`;
|
|
2468
2517
|
let status = 'RUNNING';
|
|
2469
2518
|
const createdAt = new Date();
|
|
2470
2519
|
let updatedAt = createdAt;
|
|
@@ -2474,6 +2523,10 @@ function createTask(options) {
|
|
|
2474
2523
|
const partialResultSubject = new Subject();
|
|
2475
2524
|
// <- Note: Not using `BehaviorSubject` because on error we can't access the last value
|
|
2476
2525
|
const finalResultPromise = /* not await */ taskProcessCallback((newOngoingResult) => {
|
|
2526
|
+
if (newOngoingResult.title) {
|
|
2527
|
+
title = newOngoingResult.title;
|
|
2528
|
+
}
|
|
2529
|
+
updatedAt = new Date();
|
|
2477
2530
|
Object.assign(currentValue, newOngoingResult);
|
|
2478
2531
|
// <- TODO: assign deep
|
|
2479
2532
|
partialResultSubject.next(newOngoingResult);
|
|
@@ -2519,17 +2572,24 @@ function createTask(options) {
|
|
|
2519
2572
|
return {
|
|
2520
2573
|
taskType,
|
|
2521
2574
|
taskId,
|
|
2575
|
+
get promptbookVersion() {
|
|
2576
|
+
return PROMPTBOOK_ENGINE_VERSION;
|
|
2577
|
+
},
|
|
2578
|
+
get title() {
|
|
2579
|
+
return title;
|
|
2580
|
+
// <- Note: [1] These must be getters to allow changing the value in the future
|
|
2581
|
+
},
|
|
2522
2582
|
get status() {
|
|
2523
2583
|
return status;
|
|
2524
|
-
// <- Note: [1]
|
|
2584
|
+
// <- Note: [1] --||--
|
|
2525
2585
|
},
|
|
2526
2586
|
get createdAt() {
|
|
2527
2587
|
return createdAt;
|
|
2528
|
-
// <- Note: [1]
|
|
2588
|
+
// <- Note: [1] --||--
|
|
2529
2589
|
},
|
|
2530
2590
|
get updatedAt() {
|
|
2531
2591
|
return updatedAt;
|
|
2532
|
-
// <- Note: [1]
|
|
2592
|
+
// <- Note: [1] --||--
|
|
2533
2593
|
},
|
|
2534
2594
|
asPromise,
|
|
2535
2595
|
asObservable() {
|
|
@@ -2537,15 +2597,15 @@ function createTask(options) {
|
|
|
2537
2597
|
},
|
|
2538
2598
|
get errors() {
|
|
2539
2599
|
return errors;
|
|
2540
|
-
// <- Note: [1]
|
|
2600
|
+
// <- Note: [1] --||--
|
|
2541
2601
|
},
|
|
2542
2602
|
get warnings() {
|
|
2543
2603
|
return warnings;
|
|
2544
|
-
// <- Note: [1]
|
|
2604
|
+
// <- Note: [1] --||--
|
|
2545
2605
|
},
|
|
2546
2606
|
get currentValue() {
|
|
2547
2607
|
return currentValue;
|
|
2548
|
-
// <- Note: [1]
|
|
2608
|
+
// <- Note: [1] --||--
|
|
2549
2609
|
},
|
|
2550
2610
|
};
|
|
2551
2611
|
}
|
|
@@ -2554,64 +2614,6 @@ function createTask(options) {
|
|
|
2554
2614
|
* TODO: [🐚] Split into more files and make `PrepareTask` & `RemoteTask` + split the function
|
|
2555
2615
|
*/
|
|
2556
2616
|
|
|
2557
|
-
/**
|
|
2558
|
-
* Serializes an error into a [🚉] JSON-serializable object
|
|
2559
|
-
*
|
|
2560
|
-
* @public exported from `@promptbook/utils`
|
|
2561
|
-
*/
|
|
2562
|
-
function serializeError(error) {
|
|
2563
|
-
const { name, message, stack } = error;
|
|
2564
|
-
const { id } = error;
|
|
2565
|
-
if (!Object.keys(ALL_ERRORS).includes(name)) {
|
|
2566
|
-
console.error(spaceTrim((block) => `
|
|
2567
|
-
|
|
2568
|
-
Cannot serialize error with name "${name}"
|
|
2569
|
-
|
|
2570
|
-
Authors of Promptbook probably forgot to add this error into the list of errors:
|
|
2571
|
-
https://github.com/webgptorg/promptbook/blob/main/src/errors/0-index.ts
|
|
2572
|
-
|
|
2573
|
-
|
|
2574
|
-
${block(stack || message)}
|
|
2575
|
-
|
|
2576
|
-
`));
|
|
2577
|
-
}
|
|
2578
|
-
return {
|
|
2579
|
-
name: name,
|
|
2580
|
-
message,
|
|
2581
|
-
stack,
|
|
2582
|
-
id, // Include id in the serialized object
|
|
2583
|
-
};
|
|
2584
|
-
}
|
|
2585
|
-
|
|
2586
|
-
/**
|
|
2587
|
-
* Async version of Array.forEach
|
|
2588
|
-
*
|
|
2589
|
-
* @param array - Array to iterate over
|
|
2590
|
-
* @param options - Options for the function
|
|
2591
|
-
* @param callbackfunction - Function to call for each item
|
|
2592
|
-
* @public exported from `@promptbook/utils`
|
|
2593
|
-
* @deprecated [🪂] Use queues instead
|
|
2594
|
-
*/
|
|
2595
|
-
async function forEachAsync(array, options, callbackfunction) {
|
|
2596
|
-
const { maxParallelCount = Infinity } = options;
|
|
2597
|
-
let index = 0;
|
|
2598
|
-
let runningTasks = [];
|
|
2599
|
-
const tasks = [];
|
|
2600
|
-
for (const item of array) {
|
|
2601
|
-
const currentIndex = index++;
|
|
2602
|
-
const task = callbackfunction(item, currentIndex, array);
|
|
2603
|
-
tasks.push(task);
|
|
2604
|
-
runningTasks.push(task);
|
|
2605
|
-
/* not await */ Promise.resolve(task).then(() => {
|
|
2606
|
-
runningTasks = runningTasks.filter((t) => t !== task);
|
|
2607
|
-
});
|
|
2608
|
-
if (maxParallelCount < runningTasks.length) {
|
|
2609
|
-
await Promise.race(runningTasks);
|
|
2610
|
-
}
|
|
2611
|
-
}
|
|
2612
|
-
await Promise.all(tasks);
|
|
2613
|
-
}
|
|
2614
|
-
|
|
2615
2617
|
/**
|
|
2616
2618
|
* Represents the uncertain value
|
|
2617
2619
|
*
|
|
@@ -2655,7 +2657,7 @@ const ZERO_USAGE = $deepFreeze({
|
|
|
2655
2657
|
*
|
|
2656
2658
|
* @public exported from `@promptbook/core`
|
|
2657
2659
|
*/
|
|
2658
|
-
$deepFreeze({
|
|
2660
|
+
const UNCERTAIN_USAGE = $deepFreeze({
|
|
2659
2661
|
price: UNCERTAIN_ZERO_VALUE,
|
|
2660
2662
|
input: {
|
|
2661
2663
|
tokensCount: UNCERTAIN_ZERO_VALUE,
|
|
@@ -2680,6 +2682,35 @@ $deepFreeze({
|
|
|
2680
2682
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
2681
2683
|
*/
|
|
2682
2684
|
|
|
2685
|
+
/**
|
|
2686
|
+
* Async version of Array.forEach
|
|
2687
|
+
*
|
|
2688
|
+
* @param array - Array to iterate over
|
|
2689
|
+
* @param options - Options for the function
|
|
2690
|
+
* @param callbackfunction - Function to call for each item
|
|
2691
|
+
* @public exported from `@promptbook/utils`
|
|
2692
|
+
* @deprecated [🪂] Use queues instead
|
|
2693
|
+
*/
|
|
2694
|
+
async function forEachAsync(array, options, callbackfunction) {
|
|
2695
|
+
const { maxParallelCount = Infinity } = options;
|
|
2696
|
+
let index = 0;
|
|
2697
|
+
let runningTasks = [];
|
|
2698
|
+
const tasks = [];
|
|
2699
|
+
for (const item of array) {
|
|
2700
|
+
const currentIndex = index++;
|
|
2701
|
+
const task = callbackfunction(item, currentIndex, array);
|
|
2702
|
+
tasks.push(task);
|
|
2703
|
+
runningTasks.push(task);
|
|
2704
|
+
/* not await */ Promise.resolve(task).then(() => {
|
|
2705
|
+
runningTasks = runningTasks.filter((t) => t !== task);
|
|
2706
|
+
});
|
|
2707
|
+
if (maxParallelCount < runningTasks.length) {
|
|
2708
|
+
await Promise.race(runningTasks);
|
|
2709
|
+
}
|
|
2710
|
+
}
|
|
2711
|
+
await Promise.all(tasks);
|
|
2712
|
+
}
|
|
2713
|
+
|
|
2683
2714
|
/**
|
|
2684
2715
|
* Function `addUsage` will add multiple usages into one
|
|
2685
2716
|
*
|
|
@@ -2739,12 +2770,14 @@ function countUsage(llmTools) {
|
|
|
2739
2770
|
const spending = new Subject();
|
|
2740
2771
|
const proxyTools = {
|
|
2741
2772
|
get title() {
|
|
2742
|
-
|
|
2743
|
-
|
|
2773
|
+
return `${llmTools.title} (+usage)`;
|
|
2774
|
+
// <- TODO: [🧈] Maybe standartize the suffix when wrapping `LlmExecutionTools` up
|
|
2775
|
+
// <- TODO: [🧈][🧠] Does it make sence to suffix "(+usage)"?
|
|
2744
2776
|
},
|
|
2745
2777
|
get description() {
|
|
2746
|
-
|
|
2747
|
-
|
|
2778
|
+
return `${llmTools.description} (+usage)`;
|
|
2779
|
+
// <- TODO: [🧈] Maybe standartize the suffix when wrapping `LlmExecutionTools` up
|
|
2780
|
+
// <- TODO: [🧈][🧠] Does it make sence to suffix "(+usage)"?
|
|
2748
2781
|
},
|
|
2749
2782
|
checkConfiguration() {
|
|
2750
2783
|
return /* not await */ llmTools.checkConfiguration();
|
|
@@ -2815,29 +2848,40 @@ class MultipleLlmExecutionTools {
|
|
|
2815
2848
|
return 'Multiple LLM Providers';
|
|
2816
2849
|
}
|
|
2817
2850
|
get description() {
|
|
2818
|
-
|
|
2851
|
+
const innerModelsTitlesAndDescriptions = this.llmExecutionTools
|
|
2852
|
+
.map(({ title, description }, index) => {
|
|
2853
|
+
const headLine = `${index + 1}) \`${title}\``;
|
|
2854
|
+
if (description === undefined) {
|
|
2855
|
+
return headLine;
|
|
2856
|
+
}
|
|
2857
|
+
return spaceTrim((block) => `
|
|
2858
|
+
${headLine}
|
|
2859
|
+
|
|
2860
|
+
${ /* <- Note: Indenting the description: */block(description)}
|
|
2861
|
+
`);
|
|
2862
|
+
})
|
|
2863
|
+
.join('\n\n');
|
|
2864
|
+
return spaceTrim((block) => `
|
|
2865
|
+
Multiple LLM Providers:
|
|
2866
|
+
|
|
2867
|
+
${block(innerModelsTitlesAndDescriptions)}
|
|
2868
|
+
`);
|
|
2819
2869
|
}
|
|
2820
2870
|
/**
|
|
2821
2871
|
* Check the configuration of all execution tools
|
|
2822
2872
|
*/
|
|
2823
2873
|
async checkConfiguration() {
|
|
2824
|
-
//
|
|
2825
|
-
|
|
2826
|
-
await llmExecutionTools.checkConfiguration();
|
|
2827
|
-
}
|
|
2874
|
+
// Note: Run checks in parallel
|
|
2875
|
+
await Promise.all(this.llmExecutionTools.map((tools) => tools.checkConfiguration()));
|
|
2828
2876
|
}
|
|
2829
2877
|
/**
|
|
2830
2878
|
* List all available models that can be used
|
|
2831
2879
|
* This lists is a combination of all available models from all execution tools
|
|
2832
2880
|
*/
|
|
2833
2881
|
async listModels() {
|
|
2834
|
-
|
|
2835
|
-
|
|
2836
|
-
|
|
2837
|
-
const models = await llmExecutionTools.listModels();
|
|
2838
|
-
availableModels.push(...models);
|
|
2839
|
-
}
|
|
2840
|
-
return availableModels;
|
|
2882
|
+
// Obtain all models in parallel and flatten
|
|
2883
|
+
const modelArrays = await Promise.all(this.llmExecutionTools.map((tools) => tools.listModels()));
|
|
2884
|
+
return modelArrays.flat();
|
|
2841
2885
|
}
|
|
2842
2886
|
/**
|
|
2843
2887
|
* Calls the best available chat model
|
|
@@ -3078,7 +3122,8 @@ async function preparePersona(personaDescription, tools, options) {
|
|
|
3078
3122
|
*/
|
|
3079
3123
|
|
|
3080
3124
|
/**
|
|
3081
|
-
*
|
|
3125
|
+
* Safely retrieves the global scope object (window in browser, global in Node.js)
|
|
3126
|
+
* regardless of the JavaScript environment in which the code is running
|
|
3082
3127
|
*
|
|
3083
3128
|
* Note: `$` is used to indicate that this function is not a pure function - it access global scope
|
|
3084
3129
|
*
|
|
@@ -3089,10 +3134,10 @@ function $getGlobalScope() {
|
|
|
3089
3134
|
}
|
|
3090
3135
|
|
|
3091
3136
|
/**
|
|
3092
|
-
*
|
|
3137
|
+
* Normalizes a text string to SCREAMING_CASE (all uppercase with underscores).
|
|
3093
3138
|
*
|
|
3094
|
-
* @param text
|
|
3095
|
-
* @returns
|
|
3139
|
+
* @param text The text string to be converted to SCREAMING_CASE format.
|
|
3140
|
+
* @returns The normalized text in SCREAMING_CASE format.
|
|
3096
3141
|
* @example 'HELLO_WORLD'
|
|
3097
3142
|
* @example 'I_LOVE_PROMPTBOOK'
|
|
3098
3143
|
* @public exported from `@promptbook/utils`
|
|
@@ -3144,10 +3189,10 @@ function normalizeTo_SCREAMING_CASE(text) {
|
|
|
3144
3189
|
*/
|
|
3145
3190
|
|
|
3146
3191
|
/**
|
|
3147
|
-
*
|
|
3192
|
+
* Normalizes a text string to snake_case format.
|
|
3148
3193
|
*
|
|
3149
|
-
* @param text
|
|
3150
|
-
* @returns
|
|
3194
|
+
* @param text The text string to be converted to snake_case format.
|
|
3195
|
+
* @returns The normalized text in snake_case format.
|
|
3151
3196
|
* @example 'hello_world'
|
|
3152
3197
|
* @example 'i_love_promptbook'
|
|
3153
3198
|
* @public exported from `@promptbook/utils`
|
|
@@ -3157,11 +3202,11 @@ function normalizeTo_snake_case(text) {
|
|
|
3157
3202
|
}
|
|
3158
3203
|
|
|
3159
3204
|
/**
|
|
3160
|
-
*
|
|
3205
|
+
* Global registry for storing and managing registered entities of a given type.
|
|
3161
3206
|
*
|
|
3162
3207
|
* Note: `$` is used to indicate that this function is not a pure function - it accesses and adds variables in global scope.
|
|
3163
3208
|
*
|
|
3164
|
-
* @private internal utility, exported are only
|
|
3209
|
+
* @private internal utility, exported are only singleton instances of this class
|
|
3165
3210
|
*/
|
|
3166
3211
|
class $Register {
|
|
3167
3212
|
constructor(registerName) {
|
|
@@ -3205,10 +3250,10 @@ class $Register {
|
|
|
3205
3250
|
}
|
|
3206
3251
|
|
|
3207
3252
|
/**
|
|
3208
|
-
*
|
|
3253
|
+
* Global registry for storing metadata about all available scrapers and converters.
|
|
3209
3254
|
*
|
|
3210
|
-
* Note: `$` is used to indicate that this interacts with the global scope
|
|
3211
|
-
* @singleton Only one instance of each register is created per build, but
|
|
3255
|
+
* Note: `$` is used to indicate that this interacts with the global scope.
|
|
3256
|
+
* @singleton Only one instance of each register is created per build, but there can be more in different contexts (e.g., tests).
|
|
3212
3257
|
* @public exported from `@promptbook/core`
|
|
3213
3258
|
*/
|
|
3214
3259
|
const $scrapersMetadataRegister = new $Register('scrapers_metadata');
|
|
@@ -3217,10 +3262,11 @@ const $scrapersMetadataRegister = new $Register('scrapers_metadata');
|
|
|
3217
3262
|
*/
|
|
3218
3263
|
|
|
3219
3264
|
/**
|
|
3220
|
-
*
|
|
3265
|
+
* Registry for all available scrapers in the system.
|
|
3266
|
+
* Central point for registering and accessing different types of content scrapers.
|
|
3221
3267
|
*
|
|
3222
3268
|
* Note: `$` is used to indicate that this interacts with the global scope
|
|
3223
|
-
* @singleton Only one instance of each register is created per build, but
|
|
3269
|
+
* @singleton Only one instance of each register is created per build, but there can be more than one in different build modules
|
|
3224
3270
|
* @public exported from `@promptbook/core`
|
|
3225
3271
|
*/
|
|
3226
3272
|
const $scrapersRegister = new $Register('scraper_constructors');
|
|
@@ -3398,7 +3444,9 @@ const promptbookFetch = async (urlOrRequest, init) => {
|
|
|
3398
3444
|
*/
|
|
3399
3445
|
|
|
3400
3446
|
/**
|
|
3401
|
-
*
|
|
3447
|
+
* Factory function that creates a handler for processing knowledge sources.
|
|
3448
|
+
* Provides standardized processing of different types of knowledge sources
|
|
3449
|
+
* across various scraper implementations.
|
|
3402
3450
|
*
|
|
3403
3451
|
* @public exported from `@promptbook/core`
|
|
3404
3452
|
*/
|
|
@@ -3537,7 +3585,7 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
|
3537
3585
|
}
|
|
3538
3586
|
|
|
3539
3587
|
/**
|
|
3540
|
-
* Prepares the
|
|
3588
|
+
* Prepares the knowledge pieces
|
|
3541
3589
|
*
|
|
3542
3590
|
* @see https://github.com/webgptorg/promptbook/discussions/41
|
|
3543
3591
|
* @public exported from `@promptbook/core`
|
|
@@ -3633,15 +3681,18 @@ TODO: [🧊] This is how it can look in future
|
|
|
3633
3681
|
* TODO: [🧊] In future one preparation can take data from previous preparation and save tokens and time
|
|
3634
3682
|
* Put `knowledgePieces` into `PrepareKnowledgeOptions`
|
|
3635
3683
|
* TODO: [🪂] More than max things can run in parallel by acident [1,[2a,2b,_],[3a,3b,_]]
|
|
3636
|
-
* TODO: [🧠][❎] Do here
|
|
3684
|
+
* TODO: [🧠][❎] Do here proper M:N mapping
|
|
3637
3685
|
* [x] One source can make multiple pieces
|
|
3638
3686
|
* [ ] One piece can have multiple sources
|
|
3639
3687
|
*/
|
|
3640
3688
|
|
|
3641
3689
|
/**
|
|
3642
|
-
*
|
|
3690
|
+
* Prepares tasks by adding knowledge to the prompt and ensuring all necessary parameters are included.
|
|
3643
3691
|
*
|
|
3644
|
-
* @
|
|
3692
|
+
* @param tasks Sequence of tasks that are chained together to form a pipeline
|
|
3693
|
+
* @returns A promise that resolves to the prepared tasks.
|
|
3694
|
+
*
|
|
3695
|
+
* @private internal utility of `preparePipeline`
|
|
3645
3696
|
*/
|
|
3646
3697
|
async function prepareTasks(pipeline, tools, options) {
|
|
3647
3698
|
const { maxParallelCount = DEFAULT_MAX_PARALLEL_COUNT } = options;
|
|
@@ -3679,7 +3730,7 @@ async function prepareTasks(pipeline, tools, options) {
|
|
|
3679
3730
|
return { tasksPrepared };
|
|
3680
3731
|
}
|
|
3681
3732
|
/**
|
|
3682
|
-
* TODO: [😂] Adding knowledge should be convert to async high-level abstractions,
|
|
3733
|
+
* TODO: [😂] Adding knowledge should be convert to async high-level abstractions, similar thing with expectations to sync high-level abstractions
|
|
3683
3734
|
* TODO: [🧠] Add context to each task (if missing)
|
|
3684
3735
|
* TODO: [🧠] What is better name `prepareTask` or `prepareTaskAndParameters`
|
|
3685
3736
|
* TODO: [♨][main] !!3 Prepare index the examples and maybe tasks
|
|
@@ -3815,7 +3866,7 @@ async function preparePipeline(pipeline, tools, options) {
|
|
|
3815
3866
|
order: ORDER_OF_PIPELINE_JSON,
|
|
3816
3867
|
value: {
|
|
3817
3868
|
...pipeline,
|
|
3818
|
-
// <- TODO:
|
|
3869
|
+
// <- TODO: Probably deeply clone the pipeline because `$exportJson` freezes the subobjects
|
|
3819
3870
|
title,
|
|
3820
3871
|
knowledgeSources: knowledgeSourcesPrepared,
|
|
3821
3872
|
knowledgePieces: knowledgePiecesPrepared,
|
|
@@ -4078,7 +4129,7 @@ function union(...sets) {
|
|
|
4078
4129
|
}
|
|
4079
4130
|
|
|
4080
4131
|
/**
|
|
4081
|
-
*
|
|
4132
|
+
* Contains configuration options for parsing and generating CSV files, such as delimiters and quoting rules.
|
|
4082
4133
|
*
|
|
4083
4134
|
* @public exported from `@promptbook/core`
|
|
4084
4135
|
*/
|
|
@@ -4087,11 +4138,29 @@ const MANDATORY_CSV_SETTINGS = Object.freeze({
|
|
|
4087
4138
|
// encoding: 'utf-8',
|
|
4088
4139
|
});
|
|
4089
4140
|
|
|
4141
|
+
/**
|
|
4142
|
+
* Converts a CSV string into an object
|
|
4143
|
+
*
|
|
4144
|
+
* Note: This is wrapper around `papaparse.parse()` with better autohealing
|
|
4145
|
+
*
|
|
4146
|
+
* @private - for now until `@promptbook/csv` is released
|
|
4147
|
+
*/
|
|
4148
|
+
function csvParse(value /* <- TODO: string_csv */, settings, schema /* <- TODO: Make CSV Schemas */) {
|
|
4149
|
+
settings = { ...settings, ...MANDATORY_CSV_SETTINGS };
|
|
4150
|
+
// Note: Autoheal invalid '\n' characters
|
|
4151
|
+
if (settings.newline && !settings.newline.includes('\r') && value.includes('\r')) {
|
|
4152
|
+
console.warn('CSV string contains carriage return characters, but in the CSV settings the `newline` setting does not include them. Autohealing the CSV string.');
|
|
4153
|
+
value = value.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
|
|
4154
|
+
}
|
|
4155
|
+
const csv = parse(value, settings);
|
|
4156
|
+
return csv;
|
|
4157
|
+
}
|
|
4158
|
+
|
|
4090
4159
|
/**
|
|
4091
4160
|
* Function to check if a string is valid CSV
|
|
4092
4161
|
*
|
|
4093
4162
|
* @param value The string to check
|
|
4094
|
-
* @returns
|
|
4163
|
+
* @returns `true` if the string is a valid CSV string, false otherwise
|
|
4095
4164
|
*
|
|
4096
4165
|
* @public exported from `@promptbook/utils`
|
|
4097
4166
|
*/
|
|
@@ -4115,7 +4184,7 @@ function isValidCsvString(value) {
|
|
|
4115
4184
|
* @public exported from `@promptbook/core`
|
|
4116
4185
|
* <- TODO: [🏢] Export from package `@promptbook/csv`
|
|
4117
4186
|
*/
|
|
4118
|
-
const
|
|
4187
|
+
const CsvFormatParser = {
|
|
4119
4188
|
formatName: 'CSV',
|
|
4120
4189
|
aliases: ['SPREADSHEET', 'TABLE'],
|
|
4121
4190
|
isValid(value, settings, schema) {
|
|
@@ -4127,12 +4196,12 @@ const CsvFormatDefinition = {
|
|
|
4127
4196
|
heal(value, settings, schema) {
|
|
4128
4197
|
throw new Error('Not implemented');
|
|
4129
4198
|
},
|
|
4130
|
-
|
|
4199
|
+
subvalueParsers: [
|
|
4131
4200
|
{
|
|
4132
4201
|
subvalueName: 'ROW',
|
|
4133
|
-
async mapValues(
|
|
4134
|
-
|
|
4135
|
-
const csv =
|
|
4202
|
+
async mapValues(options) {
|
|
4203
|
+
const { value, outputParameterName, settings, mapCallback, onProgress } = options;
|
|
4204
|
+
const csv = csvParse(value, settings);
|
|
4136
4205
|
if (csv.errors.length !== 0) {
|
|
4137
4206
|
throw new CsvFormatError(spaceTrim((block) => `
|
|
4138
4207
|
CSV parsing error
|
|
@@ -4147,23 +4216,37 @@ const CsvFormatDefinition = {
|
|
|
4147
4216
|
${block(value)}
|
|
4148
4217
|
`));
|
|
4149
4218
|
}
|
|
4150
|
-
const mappedData =
|
|
4219
|
+
const mappedData = [];
|
|
4220
|
+
const length = csv.data.length;
|
|
4221
|
+
for (let index = 0; index < length; index++) {
|
|
4222
|
+
const row = csv.data[index];
|
|
4151
4223
|
if (row[outputParameterName]) {
|
|
4152
4224
|
throw new CsvFormatError(`Can not overwrite existing column "${outputParameterName}" in CSV row`);
|
|
4153
4225
|
}
|
|
4154
|
-
|
|
4226
|
+
const mappedRow = {
|
|
4155
4227
|
...row,
|
|
4156
|
-
[outputParameterName]: await mapCallback(row, index),
|
|
4228
|
+
[outputParameterName]: await mapCallback(row, index, length),
|
|
4157
4229
|
};
|
|
4158
|
-
|
|
4230
|
+
mappedData.push(mappedRow);
|
|
4231
|
+
if (onProgress) {
|
|
4232
|
+
// Note: Report the CSV with all rows mapped so far
|
|
4233
|
+
/*
|
|
4234
|
+
// TODO: [🛕] Report progress with all the rows including the pending ones
|
|
4235
|
+
const progressData = mappedData.map((row, i) =>
|
|
4236
|
+
i > index ? { ...row, [outputParameterName]: PENDING_VALUE_PLACEHOLDER } : row,
|
|
4237
|
+
);
|
|
4238
|
+
*/
|
|
4239
|
+
await onProgress(unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS }));
|
|
4240
|
+
}
|
|
4241
|
+
}
|
|
4159
4242
|
return unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS });
|
|
4160
4243
|
},
|
|
4161
4244
|
},
|
|
4162
4245
|
{
|
|
4163
4246
|
subvalueName: 'CELL',
|
|
4164
|
-
async mapValues(
|
|
4165
|
-
|
|
4166
|
-
const csv =
|
|
4247
|
+
async mapValues(options) {
|
|
4248
|
+
const { value, settings, mapCallback, onProgress } = options;
|
|
4249
|
+
const csv = csvParse(value, settings);
|
|
4167
4250
|
if (csv.errors.length !== 0) {
|
|
4168
4251
|
throw new CsvFormatError(spaceTrim((block) => `
|
|
4169
4252
|
CSV parsing error
|
|
@@ -4179,9 +4262,9 @@ const CsvFormatDefinition = {
|
|
|
4179
4262
|
`));
|
|
4180
4263
|
}
|
|
4181
4264
|
const mappedData = await Promise.all(csv.data.map(async (row, rowIndex) => {
|
|
4182
|
-
return /* not await */ Promise.all(Object.entries(row).map(async ([key, value], columnIndex) => {
|
|
4265
|
+
return /* not await */ Promise.all(Object.entries(row).map(async ([key, value], columnIndex, array) => {
|
|
4183
4266
|
const index = rowIndex * Object.keys(row).length + columnIndex;
|
|
4184
|
-
return /* not await */ mapCallback({ [key]: value }, index);
|
|
4267
|
+
return /* not await */ mapCallback({ [key]: value }, index, array.length);
|
|
4185
4268
|
}));
|
|
4186
4269
|
}));
|
|
4187
4270
|
return unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS });
|
|
@@ -4190,10 +4273,10 @@ const CsvFormatDefinition = {
|
|
|
4190
4273
|
],
|
|
4191
4274
|
};
|
|
4192
4275
|
/**
|
|
4193
|
-
* TODO: [🍓] In `
|
|
4194
|
-
* TODO: [🍓] In `
|
|
4195
|
-
* TODO: [🍓] In `
|
|
4196
|
-
* TODO: [🍓] In `
|
|
4276
|
+
* TODO: [🍓] In `CsvFormatParser` implement simple `isValid`
|
|
4277
|
+
* TODO: [🍓] In `CsvFormatParser` implement partial `canBeValid`
|
|
4278
|
+
* TODO: [🍓] In `CsvFormatParser` implement `heal
|
|
4279
|
+
* TODO: [🍓] In `CsvFormatParser` implement `subvalueParsers`
|
|
4197
4280
|
* TODO: [🏢] Allow to expect something inside CSV objects and other formats
|
|
4198
4281
|
*/
|
|
4199
4282
|
|
|
@@ -4202,7 +4285,7 @@ const CsvFormatDefinition = {
|
|
|
4202
4285
|
*
|
|
4203
4286
|
* @private still in development [🏢]
|
|
4204
4287
|
*/
|
|
4205
|
-
const
|
|
4288
|
+
const JsonFormatParser = {
|
|
4206
4289
|
formatName: 'JSON',
|
|
4207
4290
|
mimeType: 'application/json',
|
|
4208
4291
|
isValid(value, settings, schema) {
|
|
@@ -4214,28 +4297,28 @@ const JsonFormatDefinition = {
|
|
|
4214
4297
|
heal(value, settings, schema) {
|
|
4215
4298
|
throw new Error('Not implemented');
|
|
4216
4299
|
},
|
|
4217
|
-
|
|
4300
|
+
subvalueParsers: [],
|
|
4218
4301
|
};
|
|
4219
4302
|
/**
|
|
4220
|
-
* TODO: [🧠] Maybe
|
|
4303
|
+
* TODO: [🧠] Maybe proper instance of object
|
|
4221
4304
|
* TODO: [0] Make string_serialized_json
|
|
4222
4305
|
* TODO: [1] Make type for JSON Settings and Schema
|
|
4223
4306
|
* TODO: [🧠] What to use for validating JSONs - JSON Schema, ZoD, typescript types/interfaces,...?
|
|
4224
|
-
* TODO: [🍓] In `
|
|
4225
|
-
* TODO: [🍓] In `
|
|
4226
|
-
* TODO: [🍓] In `
|
|
4227
|
-
* TODO: [🍓] In `
|
|
4307
|
+
* TODO: [🍓] In `JsonFormatParser` implement simple `isValid`
|
|
4308
|
+
* TODO: [🍓] In `JsonFormatParser` implement partial `canBeValid`
|
|
4309
|
+
* TODO: [🍓] In `JsonFormatParser` implement `heal
|
|
4310
|
+
* TODO: [🍓] In `JsonFormatParser` implement `subvalueParsers`
|
|
4228
4311
|
* TODO: [🏢] Allow to expect something inside JSON objects and other formats
|
|
4229
4312
|
*/
|
|
4230
4313
|
|
|
4231
4314
|
/**
|
|
4232
4315
|
* Definition for any text - this will be always valid
|
|
4233
4316
|
*
|
|
4234
|
-
* Note: This is not useful for validation, but for splitting and mapping with `
|
|
4317
|
+
* Note: This is not useful for validation, but for splitting and mapping with `subvalueParsers`
|
|
4235
4318
|
*
|
|
4236
4319
|
* @public exported from `@promptbook/core`
|
|
4237
4320
|
*/
|
|
4238
|
-
const
|
|
4321
|
+
const TextFormatParser = {
|
|
4239
4322
|
formatName: 'TEXT',
|
|
4240
4323
|
isValid(value) {
|
|
4241
4324
|
return typeof value === 'string';
|
|
@@ -4244,19 +4327,20 @@ const TextFormatDefinition = {
|
|
|
4244
4327
|
return typeof partialValue === 'string';
|
|
4245
4328
|
},
|
|
4246
4329
|
heal() {
|
|
4247
|
-
throw new UnexpectedError('It does not make sense to call `
|
|
4330
|
+
throw new UnexpectedError('It does not make sense to call `TextFormatParser.heal`');
|
|
4248
4331
|
},
|
|
4249
|
-
|
|
4332
|
+
subvalueParsers: [
|
|
4250
4333
|
{
|
|
4251
4334
|
subvalueName: 'LINE',
|
|
4252
|
-
async mapValues(
|
|
4335
|
+
async mapValues(options) {
|
|
4336
|
+
const { value, mapCallback, onProgress } = options;
|
|
4253
4337
|
const lines = value.split('\n');
|
|
4254
|
-
const mappedLines = await Promise.all(lines.map((lineContent, lineNumber) =>
|
|
4338
|
+
const mappedLines = await Promise.all(lines.map((lineContent, lineNumber, array) =>
|
|
4255
4339
|
// TODO: [🧠] Maybe option to skip empty line
|
|
4256
4340
|
/* not await */ mapCallback({
|
|
4257
4341
|
lineContent,
|
|
4258
4342
|
// TODO: [🧠] Maybe also put here `lineNumber`
|
|
4259
|
-
}, lineNumber)));
|
|
4343
|
+
}, lineNumber, array.length)));
|
|
4260
4344
|
return mappedLines.join('\n');
|
|
4261
4345
|
},
|
|
4262
4346
|
},
|
|
@@ -4266,10 +4350,10 @@ const TextFormatDefinition = {
|
|
|
4266
4350
|
/**
|
|
4267
4351
|
* TODO: [1] Make type for XML Text and Schema
|
|
4268
4352
|
* TODO: [🧠][🤠] Here should be all words, characters, lines, paragraphs, pages available as subvalues
|
|
4269
|
-
* TODO: [🍓] In `
|
|
4270
|
-
* TODO: [🍓] In `
|
|
4271
|
-
* TODO: [🍓] In `
|
|
4272
|
-
* TODO: [🍓] In `
|
|
4353
|
+
* TODO: [🍓] In `TextFormatParser` implement simple `isValid`
|
|
4354
|
+
* TODO: [🍓] In `TextFormatParser` implement partial `canBeValid`
|
|
4355
|
+
* TODO: [🍓] In `TextFormatParser` implement `heal
|
|
4356
|
+
* TODO: [🍓] In `TextFormatParser` implement `subvalueParsers`
|
|
4273
4357
|
* TODO: [🏢] Allow to expect something inside each item of list and other formats
|
|
4274
4358
|
*/
|
|
4275
4359
|
|
|
@@ -4277,7 +4361,7 @@ const TextFormatDefinition = {
|
|
|
4277
4361
|
* Function to check if a string is valid XML
|
|
4278
4362
|
*
|
|
4279
4363
|
* @param value
|
|
4280
|
-
* @returns
|
|
4364
|
+
* @returns `true` if the string is a valid XML string, false otherwise
|
|
4281
4365
|
*
|
|
4282
4366
|
* @public exported from `@promptbook/utils`
|
|
4283
4367
|
*/
|
|
@@ -4302,7 +4386,7 @@ function isValidXmlString(value) {
|
|
|
4302
4386
|
*
|
|
4303
4387
|
* @private still in development [🏢]
|
|
4304
4388
|
*/
|
|
4305
|
-
const
|
|
4389
|
+
const XmlFormatParser = {
|
|
4306
4390
|
formatName: 'XML',
|
|
4307
4391
|
mimeType: 'application/xml',
|
|
4308
4392
|
isValid(value, settings, schema) {
|
|
@@ -4314,17 +4398,17 @@ const XmlFormatDefinition = {
|
|
|
4314
4398
|
heal(value, settings, schema) {
|
|
4315
4399
|
throw new Error('Not implemented');
|
|
4316
4400
|
},
|
|
4317
|
-
|
|
4401
|
+
subvalueParsers: [],
|
|
4318
4402
|
};
|
|
4319
4403
|
/**
|
|
4320
|
-
* TODO: [🧠] Maybe
|
|
4404
|
+
* TODO: [🧠] Maybe proper instance of object
|
|
4321
4405
|
* TODO: [0] Make string_serialized_xml
|
|
4322
4406
|
* TODO: [1] Make type for XML Settings and Schema
|
|
4323
4407
|
* TODO: [🧠] What to use for validating XMLs - XSD,...
|
|
4324
|
-
* TODO: [🍓] In `
|
|
4325
|
-
* TODO: [🍓] In `
|
|
4326
|
-
* TODO: [🍓] In `
|
|
4327
|
-
* TODO: [🍓] In `
|
|
4408
|
+
* TODO: [🍓] In `XmlFormatParser` implement simple `isValid`
|
|
4409
|
+
* TODO: [🍓] In `XmlFormatParser` implement partial `canBeValid`
|
|
4410
|
+
* TODO: [🍓] In `XmlFormatParser` implement `heal
|
|
4411
|
+
* TODO: [🍓] In `XmlFormatParser` implement `subvalueParsers`
|
|
4328
4412
|
* TODO: [🏢] Allow to expect something inside XML and other formats
|
|
4329
4413
|
*/
|
|
4330
4414
|
|
|
@@ -4333,24 +4417,19 @@ const XmlFormatDefinition = {
|
|
|
4333
4417
|
*
|
|
4334
4418
|
* @private internal index of `...` <- TODO [🏢]
|
|
4335
4419
|
*/
|
|
4336
|
-
const FORMAT_DEFINITIONS = [
|
|
4337
|
-
JsonFormatDefinition,
|
|
4338
|
-
XmlFormatDefinition,
|
|
4339
|
-
TextFormatDefinition,
|
|
4340
|
-
CsvFormatDefinition,
|
|
4341
|
-
];
|
|
4420
|
+
const FORMAT_DEFINITIONS = [JsonFormatParser, XmlFormatParser, TextFormatParser, CsvFormatParser];
|
|
4342
4421
|
/**
|
|
4343
4422
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
4344
4423
|
*/
|
|
4345
4424
|
|
|
4346
4425
|
/**
|
|
4347
|
-
* Maps available parameters to expected parameters
|
|
4426
|
+
* Maps available parameters to expected parameters for a pipeline task.
|
|
4348
4427
|
*
|
|
4349
4428
|
* The strategy is:
|
|
4350
|
-
* 1)
|
|
4351
|
-
* 2)
|
|
4429
|
+
* 1) First, match parameters by name where both available and expected.
|
|
4430
|
+
* 2) Then, if there are unmatched expected and available parameters, map them by order.
|
|
4352
4431
|
*
|
|
4353
|
-
* @throws {PipelineExecutionError}
|
|
4432
|
+
* @throws {PipelineExecutionError} If the number of unmatched expected and available parameters does not match, or mapping is ambiguous.
|
|
4354
4433
|
* @private within the repository used in `createPipelineExecutor`
|
|
4355
4434
|
*/
|
|
4356
4435
|
function mapAvailableToExpectedParameters(options) {
|
|
@@ -4373,7 +4452,7 @@ function mapAvailableToExpectedParameters(options) {
|
|
|
4373
4452
|
else if (!availableParametersNames.has(parameterName) && expectedParameterNames.has(parameterName)) ;
|
|
4374
4453
|
}
|
|
4375
4454
|
if (expectedParameterNames.size === 0) {
|
|
4376
|
-
// Note: [👨👨👧] Now we can freeze `mappedParameters` to prevent
|
|
4455
|
+
// Note: [👨👨👧] Now we can freeze `mappedParameters` to prevent accidental modifications after mapping
|
|
4377
4456
|
Object.freeze(mappedParameters);
|
|
4378
4457
|
return mappedParameters;
|
|
4379
4458
|
}
|
|
@@ -4404,7 +4483,7 @@ function mapAvailableToExpectedParameters(options) {
|
|
|
4404
4483
|
for (let i = 0; i < expectedParameterNames.size; i++) {
|
|
4405
4484
|
mappedParameters[expectedParameterNamesArray[i]] = availableParameters[availableParametersNamesArray[i]];
|
|
4406
4485
|
}
|
|
4407
|
-
// Note: [👨👨👧] Now we can freeze `mappedParameters` to prevent
|
|
4486
|
+
// Note: [👨👨👧] Now we can freeze `mappedParameters` to prevent accidental modifications after mapping
|
|
4408
4487
|
Object.freeze(mappedParameters);
|
|
4409
4488
|
return mappedParameters;
|
|
4410
4489
|
}
|
|
@@ -4412,8 +4491,8 @@ function mapAvailableToExpectedParameters(options) {
|
|
|
4412
4491
|
/**
|
|
4413
4492
|
* Extracts all code blocks from markdown.
|
|
4414
4493
|
*
|
|
4415
|
-
* Note: There are multiple
|
|
4416
|
-
* - `extractBlock` just extracts the content of the code block which is also used as
|
|
4494
|
+
* Note: There are multiple similar functions:
|
|
4495
|
+
* - `extractBlock` just extracts the content of the code block which is also used as built-in function for postprocessing
|
|
4417
4496
|
* - `extractJsonBlock` extracts exactly one valid JSON code block
|
|
4418
4497
|
* - `extractOneBlockFromMarkdown` extracts exactly one code block with language of the code block
|
|
4419
4498
|
* - `extractAllBlocksFromMarkdown` extracts all code blocks with language of the code block
|
|
@@ -4463,7 +4542,7 @@ function extractAllBlocksFromMarkdown(markdown) {
|
|
|
4463
4542
|
if (currentCodeBlock.content !== '') {
|
|
4464
4543
|
currentCodeBlock.content += '\n';
|
|
4465
4544
|
}
|
|
4466
|
-
currentCodeBlock.content += line.split('\\`\\`\\`').join('```') /* <- TODO: Maybe make
|
|
4545
|
+
currentCodeBlock.content += line.split('\\`\\`\\`').join('```') /* <- TODO: Maybe make proper unescape */;
|
|
4467
4546
|
}
|
|
4468
4547
|
}
|
|
4469
4548
|
if (currentCodeBlock !== null) {
|
|
@@ -4483,7 +4562,7 @@ function extractAllBlocksFromMarkdown(markdown) {
|
|
|
4483
4562
|
* - When there are multiple JSON code blocks the function throws a `ParseError`
|
|
4484
4563
|
*
|
|
4485
4564
|
* Note: It is not important if marked as ```json BUT if it is VALID JSON
|
|
4486
|
-
* Note: There are multiple
|
|
4565
|
+
* Note: There are multiple similar function:
|
|
4487
4566
|
* - `extractBlock` just extracts the content of the code block which is also used as build-in function for postprocessing
|
|
4488
4567
|
* - `extractJsonBlock` extracts exactly one valid JSON code block
|
|
4489
4568
|
* - `extractOneBlockFromMarkdown` extracts exactly one code block with language of the code block
|
|
@@ -4508,7 +4587,7 @@ function extractJsonBlock(markdown) {
|
|
|
4508
4587
|
}
|
|
4509
4588
|
/**
|
|
4510
4589
|
* TODO: Add some auto-healing logic + extract YAML, JSON5, TOML, etc.
|
|
4511
|
-
* TODO: [🏢] Make this logic part of `
|
|
4590
|
+
* TODO: [🏢] Make this logic part of `JsonFormatParser` or `isValidJsonString`
|
|
4512
4591
|
*/
|
|
4513
4592
|
|
|
4514
4593
|
/**
|
|
@@ -4710,14 +4789,14 @@ const CountUtils = {
|
|
|
4710
4789
|
PAGES: countPages,
|
|
4711
4790
|
};
|
|
4712
4791
|
/**
|
|
4713
|
-
* TODO: [🧠][🤠] This should be
|
|
4792
|
+
* TODO: [🧠][🤠] This should be probably as part of `TextFormatParser`
|
|
4714
4793
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
4715
4794
|
*/
|
|
4716
4795
|
|
|
4717
4796
|
/**
|
|
4718
4797
|
* Function checkExpectations will check if the expectations on given value are met
|
|
4719
4798
|
*
|
|
4720
|
-
* Note: There are two
|
|
4799
|
+
* Note: There are two similar functions:
|
|
4721
4800
|
* - `checkExpectations` which throws an error if the expectations are not met
|
|
4722
4801
|
* - `isPassingExpectations` which returns a boolean
|
|
4723
4802
|
*
|
|
@@ -4738,13 +4817,17 @@ function checkExpectations(expectations, value) {
|
|
|
4738
4817
|
}
|
|
4739
4818
|
/**
|
|
4740
4819
|
* TODO: [💝] Unite object for expecting amount and format
|
|
4741
|
-
* TODO: [🧠][🤠] This should be part of `
|
|
4820
|
+
* TODO: [🧠][🤠] This should be part of `TextFormatParser`
|
|
4742
4821
|
* Note: [💝] and [🤠] are interconnected together
|
|
4743
4822
|
*/
|
|
4744
4823
|
|
|
4745
4824
|
/**
|
|
4746
|
-
*
|
|
4825
|
+
* Executes a pipeline task with multiple attempts, including joker and retry logic. Handles different task types
|
|
4826
|
+
* (prompt, script, dialog, etc.), applies postprocessing, checks expectations, and updates the execution report.
|
|
4827
|
+
* Throws errors if execution fails after all attempts.
|
|
4747
4828
|
*
|
|
4829
|
+
* @param options - The options for execution, including task, parameters, pipeline, and configuration.
|
|
4830
|
+
* @returns The result string of the executed task.
|
|
4748
4831
|
* @private internal utility of `createPipelineExecutor`
|
|
4749
4832
|
*/
|
|
4750
4833
|
async function executeAttempts(options) {
|
|
@@ -4966,7 +5049,7 @@ async function executeAttempts(options) {
|
|
|
4966
5049
|
if (task.format) {
|
|
4967
5050
|
if (task.format === 'JSON') {
|
|
4968
5051
|
if (!isValidJsonString($ongoingTaskResult.$resultString || '')) {
|
|
4969
|
-
// TODO: [🏢] Do more universally via `
|
|
5052
|
+
// TODO: [🏢] Do more universally via `FormatParser`
|
|
4970
5053
|
try {
|
|
4971
5054
|
$ongoingTaskResult.$resultString = extractJsonBlock($ongoingTaskResult.$resultString || '');
|
|
4972
5055
|
}
|
|
@@ -5068,12 +5151,16 @@ async function executeAttempts(options) {
|
|
|
5068
5151
|
*/
|
|
5069
5152
|
|
|
5070
5153
|
/**
|
|
5071
|
-
*
|
|
5154
|
+
* Executes a pipeline task that requires mapping or iterating over subvalues of a parameter (such as rows in a CSV).
|
|
5155
|
+
* Handles format and subformat resolution, error handling, and progress reporting.
|
|
5156
|
+
*
|
|
5157
|
+
* @param options - Options for execution, including task details and progress callback.
|
|
5158
|
+
* @returns The result of the subvalue mapping or execution attempts.
|
|
5072
5159
|
*
|
|
5073
5160
|
* @private internal utility of `createPipelineExecutor`
|
|
5074
5161
|
*/
|
|
5075
5162
|
async function executeFormatSubvalues(options) {
|
|
5076
|
-
const { task, jokerParameterNames, parameters, priority, csvSettings, pipelineIdentification } = options;
|
|
5163
|
+
const { task, jokerParameterNames, parameters, priority, csvSettings, onProgress, pipelineIdentification } = options;
|
|
5077
5164
|
if (task.foreach === undefined) {
|
|
5078
5165
|
return /* not await */ executeAttempts(options);
|
|
5079
5166
|
}
|
|
@@ -5104,16 +5191,16 @@ async function executeFormatSubvalues(options) {
|
|
|
5104
5191
|
${block(pipelineIdentification)}
|
|
5105
5192
|
`));
|
|
5106
5193
|
}
|
|
5107
|
-
const
|
|
5108
|
-
if (
|
|
5194
|
+
const subvalueParser = formatDefinition.subvalueParsers.find((subvalueParser) => [subvalueParser.subvalueName, ...(subvalueParser.aliases || [])].includes(task.foreach.subformatName));
|
|
5195
|
+
if (subvalueParser === undefined) {
|
|
5109
5196
|
throw new UnexpectedError(
|
|
5110
5197
|
// <- TODO: [🧠][🧐] Should be formats fixed per promptbook version or behave as plugins (=> change UnexpectedError)
|
|
5111
5198
|
spaceTrim((block) => `
|
|
5112
5199
|
Unsupported subformat name "${task.foreach.subformatName}" for format "${task.foreach.formatName}"
|
|
5113
5200
|
|
|
5114
5201
|
Available subformat names for format "${formatDefinition.formatName}":
|
|
5115
|
-
${block(formatDefinition.
|
|
5116
|
-
.map((
|
|
5202
|
+
${block(formatDefinition.subvalueParsers
|
|
5203
|
+
.map((subvalueParser) => subvalueParser.subvalueName)
|
|
5117
5204
|
.map((subvalueName) => `- ${subvalueName}`)
|
|
5118
5205
|
.join('\n'))}
|
|
5119
5206
|
|
|
@@ -5125,55 +5212,85 @@ async function executeFormatSubvalues(options) {
|
|
|
5125
5212
|
let formatSettings;
|
|
5126
5213
|
if (formatDefinition.formatName === 'CSV') {
|
|
5127
5214
|
formatSettings = csvSettings;
|
|
5128
|
-
// <- TODO: [🤹♂️] More universal, make
|
|
5129
|
-
}
|
|
5130
|
-
const resultString = await
|
|
5131
|
-
|
|
5132
|
-
|
|
5133
|
-
|
|
5134
|
-
|
|
5135
|
-
|
|
5136
|
-
|
|
5137
|
-
|
|
5138
|
-
|
|
5139
|
-
|
|
5140
|
-
|
|
5141
|
-
|
|
5142
|
-
|
|
5215
|
+
// <- TODO: [🤹♂️] More universal, make similar pattern for other formats for example \n vs \r\n in text
|
|
5216
|
+
}
|
|
5217
|
+
const resultString = await subvalueParser.mapValues({
|
|
5218
|
+
value: parameterValue,
|
|
5219
|
+
outputParameterName: task.foreach.outputSubparameterName,
|
|
5220
|
+
settings: formatSettings,
|
|
5221
|
+
onProgress(partialResultString) {
|
|
5222
|
+
return onProgress(Object.freeze({
|
|
5223
|
+
[task.resultingParameterName]: partialResultString,
|
|
5224
|
+
}));
|
|
5225
|
+
},
|
|
5226
|
+
async mapCallback(subparameters, index, length) {
|
|
5227
|
+
let mappedParameters;
|
|
5228
|
+
try {
|
|
5229
|
+
mappedParameters = mapAvailableToExpectedParameters({
|
|
5230
|
+
expectedParameters: Object.fromEntries(task.foreach.inputSubparameterNames.map((subparameterName) => [subparameterName, null])),
|
|
5231
|
+
availableParameters: subparameters,
|
|
5232
|
+
});
|
|
5143
5233
|
}
|
|
5144
|
-
|
|
5145
|
-
|
|
5234
|
+
catch (error) {
|
|
5235
|
+
if (!(error instanceof PipelineExecutionError)) {
|
|
5236
|
+
throw error;
|
|
5237
|
+
}
|
|
5238
|
+
const highLevelError = new PipelineExecutionError(spaceTrim((block) => `
|
|
5239
|
+
${error.message}
|
|
5146
5240
|
|
|
5147
|
-
|
|
5148
|
-
|
|
5241
|
+
This is error in FOREACH command when mapping ${formatDefinition.formatName} ${subvalueParser.subvalueName} data (${index + 1}/${length})
|
|
5242
|
+
You have probably passed wrong data to pipeline or wrong data was generated which are processed by FOREACH command
|
|
5149
5243
|
|
|
5150
|
-
|
|
5151
|
-
|
|
5152
|
-
|
|
5153
|
-
|
|
5154
|
-
|
|
5155
|
-
|
|
5156
|
-
|
|
5157
|
-
|
|
5158
|
-
|
|
5159
|
-
|
|
5160
|
-
|
|
5161
|
-
|
|
5162
|
-
|
|
5163
|
-
|
|
5164
|
-
|
|
5165
|
-
|
|
5166
|
-
|
|
5167
|
-
|
|
5168
|
-
|
|
5169
|
-
|
|
5244
|
+
${block(pipelineIdentification)}
|
|
5245
|
+
`));
|
|
5246
|
+
if (length > BIG_DATASET_TRESHOLD) {
|
|
5247
|
+
console.error(highLevelError);
|
|
5248
|
+
return FAILED_VALUE_PLACEHOLDER;
|
|
5249
|
+
}
|
|
5250
|
+
throw highLevelError;
|
|
5251
|
+
}
|
|
5252
|
+
const allSubparameters = {
|
|
5253
|
+
...parameters,
|
|
5254
|
+
...mappedParameters,
|
|
5255
|
+
};
|
|
5256
|
+
Object.freeze(allSubparameters);
|
|
5257
|
+
try {
|
|
5258
|
+
const subresultString = await executeAttempts({
|
|
5259
|
+
...options,
|
|
5260
|
+
priority: priority + index,
|
|
5261
|
+
parameters: allSubparameters,
|
|
5262
|
+
pipelineIdentification: spaceTrim((block) => `
|
|
5263
|
+
${block(pipelineIdentification)}
|
|
5264
|
+
Subparameter index: ${index}
|
|
5265
|
+
`),
|
|
5266
|
+
});
|
|
5267
|
+
return subresultString;
|
|
5268
|
+
}
|
|
5269
|
+
catch (error) {
|
|
5270
|
+
if (length > BIG_DATASET_TRESHOLD) {
|
|
5271
|
+
console.error(spaceTrim((block) => `
|
|
5272
|
+
${error.message}
|
|
5273
|
+
|
|
5274
|
+
This is error in FOREACH command when processing ${formatDefinition.formatName} ${subvalueParser.subvalueName} data (${index + 1}/${length})
|
|
5275
|
+
|
|
5276
|
+
${block(pipelineIdentification)}
|
|
5277
|
+
`));
|
|
5278
|
+
return FAILED_VALUE_PLACEHOLDER;
|
|
5279
|
+
}
|
|
5280
|
+
throw error;
|
|
5281
|
+
}
|
|
5282
|
+
},
|
|
5170
5283
|
});
|
|
5171
5284
|
return resultString;
|
|
5172
5285
|
}
|
|
5173
5286
|
|
|
5174
5287
|
/**
|
|
5175
|
-
*
|
|
5288
|
+
* Returns the context for a given task, typically used to provide additional information or variables
|
|
5289
|
+
* required for the execution of the task within a pipeline. The context is returned as a string value
|
|
5290
|
+
* that may include markdown formatting.
|
|
5176
5291
|
*
|
|
5292
|
+
* @param task - The task for which the context is being generated. This should be a deeply immutable TaskJson object.
|
|
5293
|
+
* @returns The context as a string, formatted as markdown and parameter value.
|
|
5177
5294
|
* @private internal utility of `createPipelineExecutor`
|
|
5178
5295
|
*/
|
|
5179
5296
|
async function getContextForTask(task) {
|
|
@@ -5181,7 +5298,7 @@ async function getContextForTask(task) {
|
|
|
5181
5298
|
}
|
|
5182
5299
|
|
|
5183
5300
|
/**
|
|
5184
|
-
*
|
|
5301
|
+
* Retrieves example values or templates for a given task, used to guide or validate pipeline execution.
|
|
5185
5302
|
*
|
|
5186
5303
|
* @private internal utility of `createPipelineExecutor`
|
|
5187
5304
|
*/
|
|
@@ -5190,91 +5307,128 @@ async function getExamplesForTask(task) {
|
|
|
5190
5307
|
}
|
|
5191
5308
|
|
|
5192
5309
|
/**
|
|
5193
|
-
*
|
|
5310
|
+
* Computes the cosine similarity between two embedding vectors
|
|
5311
|
+
*
|
|
5312
|
+
* Note: This is helping function for RAG (retrieval-augmented generation)
|
|
5194
5313
|
*
|
|
5195
|
-
*
|
|
5314
|
+
* @param embeddingVector1
|
|
5315
|
+
* @param embeddingVector2
|
|
5316
|
+
* @returns Cosine similarity between the two vectors
|
|
5317
|
+
*
|
|
5318
|
+
* @public exported from `@promptbook/core`
|
|
5319
|
+
*/
|
|
5320
|
+
function computeCosineSimilarity(embeddingVector1, embeddingVector2) {
|
|
5321
|
+
if (embeddingVector1.length !== embeddingVector2.length) {
|
|
5322
|
+
throw new TypeError('Embedding vectors must have the same length');
|
|
5323
|
+
}
|
|
5324
|
+
const dotProduct = embeddingVector1.reduce((sum, value, index) => sum + value * embeddingVector2[index], 0);
|
|
5325
|
+
const magnitude1 = Math.sqrt(embeddingVector1.reduce((sum, value) => sum + value * value, 0));
|
|
5326
|
+
const magnitude2 = Math.sqrt(embeddingVector2.reduce((sum, value) => sum + value * value, 0));
|
|
5327
|
+
return 1 - dotProduct / (magnitude1 * magnitude2);
|
|
5328
|
+
}
|
|
5329
|
+
|
|
5330
|
+
/**
|
|
5331
|
+
*
|
|
5332
|
+
* @param knowledgePieces
|
|
5333
|
+
* @returns
|
|
5334
|
+
*
|
|
5335
|
+
* @private internal utility of `createPipelineExecutor`
|
|
5336
|
+
*/
|
|
5337
|
+
function knowledgePiecesToString(knowledgePieces) {
|
|
5338
|
+
return knowledgePieces
|
|
5339
|
+
.map((knowledgePiece) => {
|
|
5340
|
+
const { content } = knowledgePiece;
|
|
5341
|
+
return `- ${content}`;
|
|
5342
|
+
})
|
|
5343
|
+
.join('\n');
|
|
5344
|
+
// <- TODO: [🧠] Some smarter aggregation of knowledge pieces, single-line vs multi-line vs mixed
|
|
5345
|
+
}
|
|
5346
|
+
|
|
5347
|
+
/**
|
|
5348
|
+
* Retrieves the most relevant knowledge pieces for a given task using embedding-based similarity search.
|
|
5349
|
+
* This is where retrieval-augmented generation (RAG) is performed to enhance the task with external knowledge.
|
|
5196
5350
|
*
|
|
5197
5351
|
* @private internal utility of `createPipelineExecutor`
|
|
5198
5352
|
*/
|
|
5199
5353
|
async function getKnowledgeForTask(options) {
|
|
5200
|
-
const { tools, preparedPipeline, task } = options;
|
|
5354
|
+
const { tools, preparedPipeline, task, parameters } = options;
|
|
5201
5355
|
const firstKnowlegePiece = preparedPipeline.knowledgePieces[0];
|
|
5202
5356
|
const firstKnowlegeIndex = firstKnowlegePiece === null || firstKnowlegePiece === void 0 ? void 0 : firstKnowlegePiece.index[0];
|
|
5203
5357
|
// <- TODO: Do not use just first knowledge piece and first index to determine embedding model, use also keyword search
|
|
5204
5358
|
if (firstKnowlegePiece === undefined || firstKnowlegeIndex === undefined) {
|
|
5205
|
-
return '
|
|
5359
|
+
return ''; // <- Note: Np knowledge present, return empty string
|
|
5206
5360
|
}
|
|
5207
|
-
|
|
5208
|
-
|
|
5209
|
-
|
|
5210
|
-
|
|
5211
|
-
|
|
5212
|
-
|
|
5213
|
-
|
|
5214
|
-
|
|
5215
|
-
|
|
5216
|
-
|
|
5217
|
-
|
|
5218
|
-
|
|
5219
|
-
}
|
|
5220
|
-
|
|
5221
|
-
|
|
5222
|
-
|
|
5223
|
-
|
|
5224
|
-
|
|
5225
|
-
|
|
5226
|
-
|
|
5361
|
+
try {
|
|
5362
|
+
// TODO: [🚐] Make arrayable LLMs -> single LLM DRY
|
|
5363
|
+
const _llms = arrayableToArray(tools.llm);
|
|
5364
|
+
const llmTools = _llms.length === 1 ? _llms[0] : joinLlmExecutionTools(..._llms);
|
|
5365
|
+
const taskEmbeddingPrompt = {
|
|
5366
|
+
title: 'Knowledge Search',
|
|
5367
|
+
modelRequirements: {
|
|
5368
|
+
modelVariant: 'EMBEDDING',
|
|
5369
|
+
modelName: firstKnowlegeIndex.modelName,
|
|
5370
|
+
},
|
|
5371
|
+
content: task.content,
|
|
5372
|
+
parameters,
|
|
5373
|
+
};
|
|
5374
|
+
const taskEmbeddingResult = await llmTools.callEmbeddingModel(taskEmbeddingPrompt);
|
|
5375
|
+
const knowledgePiecesWithRelevance = preparedPipeline.knowledgePieces.map((knowledgePiece) => {
|
|
5376
|
+
const { index } = knowledgePiece;
|
|
5377
|
+
const knowledgePieceIndex = index.find((i) => i.modelName === firstKnowlegeIndex.modelName);
|
|
5378
|
+
// <- TODO: Do not use just first knowledge piece and first index to determine embedding model
|
|
5379
|
+
if (knowledgePieceIndex === undefined) {
|
|
5380
|
+
return {
|
|
5381
|
+
content: knowledgePiece.content,
|
|
5382
|
+
relevance: 0,
|
|
5383
|
+
};
|
|
5384
|
+
}
|
|
5385
|
+
const relevance = computeCosineSimilarity(knowledgePieceIndex.position, taskEmbeddingResult.content);
|
|
5227
5386
|
return {
|
|
5228
5387
|
content: knowledgePiece.content,
|
|
5229
|
-
relevance
|
|
5388
|
+
relevance,
|
|
5230
5389
|
};
|
|
5231
|
-
}
|
|
5232
|
-
const
|
|
5233
|
-
|
|
5234
|
-
|
|
5235
|
-
|
|
5236
|
-
|
|
5237
|
-
|
|
5238
|
-
|
|
5239
|
-
|
|
5240
|
-
|
|
5241
|
-
|
|
5242
|
-
|
|
5243
|
-
|
|
5244
|
-
|
|
5245
|
-
|
|
5246
|
-
|
|
5247
|
-
|
|
5248
|
-
|
|
5249
|
-
|
|
5250
|
-
|
|
5251
|
-
|
|
5252
|
-
|
|
5253
|
-
// TODO: !!!!!! Annotate + to new file
|
|
5254
|
-
function computeCosineSimilarity(embeddingVector1, embeddingVector2) {
|
|
5255
|
-
if (embeddingVector1.length !== embeddingVector2.length) {
|
|
5256
|
-
throw new TypeError('Embedding vectors must have the same length');
|
|
5390
|
+
});
|
|
5391
|
+
const knowledgePiecesSorted = knowledgePiecesWithRelevance.sort((a, b) => a.relevance - b.relevance);
|
|
5392
|
+
const knowledgePiecesLimited = knowledgePiecesSorted.slice(0, 5);
|
|
5393
|
+
/*
|
|
5394
|
+
console.log('`getKnowledgeForTask` Embedding', {
|
|
5395
|
+
task,
|
|
5396
|
+
taskEmbeddingPrompt,
|
|
5397
|
+
taskEmbeddingResult,
|
|
5398
|
+
firstKnowlegePiece,
|
|
5399
|
+
firstKnowlegeIndex,
|
|
5400
|
+
knowledgePiecesWithRelevance,
|
|
5401
|
+
knowledgePiecesSorted,
|
|
5402
|
+
knowledgePiecesLimited,
|
|
5403
|
+
});
|
|
5404
|
+
*/
|
|
5405
|
+
return knowledgePiecesToString(knowledgePiecesLimited);
|
|
5406
|
+
}
|
|
5407
|
+
catch (error) {
|
|
5408
|
+
assertsError(error);
|
|
5409
|
+
console.error('Error in `getKnowledgeForTask`', error);
|
|
5410
|
+
// Note: If the LLM fails, just return all knowledge pieces
|
|
5411
|
+
return knowledgePiecesToString(preparedPipeline.knowledgePieces);
|
|
5257
5412
|
}
|
|
5258
|
-
const dotProduct = embeddingVector1.reduce((sum, value, index) => sum + value * embeddingVector2[index], 0);
|
|
5259
|
-
const magnitude1 = Math.sqrt(embeddingVector1.reduce((sum, value) => sum + value * value, 0));
|
|
5260
|
-
const magnitude2 = Math.sqrt(embeddingVector2.reduce((sum, value) => sum + value * value, 0));
|
|
5261
|
-
return 1 - dotProduct / (magnitude1 * magnitude2);
|
|
5262
5413
|
}
|
|
5263
5414
|
/**
|
|
5264
|
-
* TODO: !!!! Verify if this is working
|
|
5265
5415
|
* TODO: [♨] Implement Better - use keyword search
|
|
5266
5416
|
* TODO: [♨] Examples of values
|
|
5267
5417
|
*/
|
|
5268
5418
|
|
|
5269
5419
|
/**
|
|
5270
|
-
*
|
|
5420
|
+
* Retrieves all reserved parameters for a given pipeline task, including context, knowledge, examples, and metadata.
|
|
5421
|
+
* Ensures all reserved parameters are defined and throws if any are missing.
|
|
5422
|
+
*
|
|
5423
|
+
* @param options - Options including tools, pipeline, task, and context.
|
|
5424
|
+
* @returns An object containing all reserved parameters for the task.
|
|
5271
5425
|
*
|
|
5272
5426
|
* @private internal utility of `createPipelineExecutor`
|
|
5273
5427
|
*/
|
|
5274
5428
|
async function getReservedParametersForTask(options) {
|
|
5275
|
-
const { tools, preparedPipeline, task, pipelineIdentification } = options;
|
|
5429
|
+
const { tools, preparedPipeline, task, parameters, pipelineIdentification, isVerbose } = options;
|
|
5276
5430
|
const context = await getContextForTask(); // <- [🏍]
|
|
5277
|
-
const knowledge = await getKnowledgeForTask({ tools, preparedPipeline, task });
|
|
5431
|
+
const knowledge = await getKnowledgeForTask({ tools, preparedPipeline, task, parameters });
|
|
5278
5432
|
const examples = await getExamplesForTask();
|
|
5279
5433
|
const currentDate = new Date().toISOString(); // <- TODO: [🧠][💩] Better
|
|
5280
5434
|
const modelName = RESERVED_PARAMETER_MISSING_VALUE;
|
|
@@ -5286,6 +5440,9 @@ async function getReservedParametersForTask(options) {
|
|
|
5286
5440
|
currentDate,
|
|
5287
5441
|
modelName,
|
|
5288
5442
|
};
|
|
5443
|
+
if (isVerbose) {
|
|
5444
|
+
console.info('Reserved parameters for task:', { options, reservedParameters });
|
|
5445
|
+
}
|
|
5289
5446
|
// Note: Doublecheck that ALL reserved parameters are defined:
|
|
5290
5447
|
for (const parameterName of RESERVED_PARAMETER_NAMES) {
|
|
5291
5448
|
if (reservedParameters[parameterName] === undefined) {
|
|
@@ -5300,23 +5457,21 @@ async function getReservedParametersForTask(options) {
|
|
|
5300
5457
|
}
|
|
5301
5458
|
|
|
5302
5459
|
/**
|
|
5303
|
-
*
|
|
5460
|
+
* Executes a single task within a pipeline, handling parameter validation, error checking, and progress reporting.
|
|
5461
|
+
*
|
|
5462
|
+
* @param options - Options for execution, including the task, pipeline, parameters, and callbacks.
|
|
5463
|
+
* @returns The output parameters produced by the task.
|
|
5304
5464
|
*
|
|
5305
5465
|
* @private internal utility of `createPipelineExecutor`
|
|
5306
5466
|
*/
|
|
5307
5467
|
async function executeTask(options) {
|
|
5308
5468
|
const { currentTask, preparedPipeline, parametersToPass, tools, onProgress, $executionReport, pipelineIdentification, maxExecutionAttempts, maxParallelCount, csvSettings, isVerbose, rootDirname, cacheDirname, intermediateFilesStrategy, isAutoInstalled, isNotPreparedWarningSupressed, } = options;
|
|
5309
5469
|
const priority = preparedPipeline.tasks.length - preparedPipeline.tasks.indexOf(currentTask);
|
|
5310
|
-
await onProgress({
|
|
5311
|
-
outputParameters: {
|
|
5312
|
-
[currentTask.resultingParameterName]: '', // <- TODO: [🧠] What is the best value here?
|
|
5313
|
-
},
|
|
5314
|
-
});
|
|
5315
5470
|
// Note: Check consistency of used and dependent parameters which was also done in `validatePipeline`, but it’s good to doublecheck
|
|
5316
5471
|
const usedParameterNames = extractParameterNamesFromTask(currentTask);
|
|
5317
5472
|
const dependentParameterNames = new Set(currentTask.dependentParameterNames);
|
|
5318
5473
|
// TODO: [👩🏾🤝👩🏻] Use here `mapAvailableToExpectedParameters`
|
|
5319
|
-
if (union(difference(usedParameterNames, dependentParameterNames), difference(dependentParameterNames, usedParameterNames)).size !== 0) {
|
|
5474
|
+
if (difference(union(difference(usedParameterNames, dependentParameterNames), difference(dependentParameterNames, usedParameterNames)), new Set(RESERVED_PARAMETER_NAMES)).size !== 0) {
|
|
5320
5475
|
throw new UnexpectedError(spaceTrim$1((block) => `
|
|
5321
5476
|
Dependent parameters are not consistent with used parameters:
|
|
5322
5477
|
|
|
@@ -5334,13 +5489,16 @@ async function executeTask(options) {
|
|
|
5334
5489
|
|
|
5335
5490
|
`));
|
|
5336
5491
|
}
|
|
5492
|
+
const reservedParameters = await getReservedParametersForTask({
|
|
5493
|
+
tools,
|
|
5494
|
+
preparedPipeline,
|
|
5495
|
+
task: currentTask,
|
|
5496
|
+
pipelineIdentification,
|
|
5497
|
+
parameters: parametersToPass,
|
|
5498
|
+
isVerbose,
|
|
5499
|
+
});
|
|
5337
5500
|
const definedParameters = Object.freeze({
|
|
5338
|
-
...
|
|
5339
|
-
tools,
|
|
5340
|
-
preparedPipeline,
|
|
5341
|
-
task: currentTask,
|
|
5342
|
-
pipelineIdentification,
|
|
5343
|
-
})),
|
|
5501
|
+
...reservedParameters,
|
|
5344
5502
|
...parametersToPass,
|
|
5345
5503
|
});
|
|
5346
5504
|
const definedParameterNames = new Set(Object.keys(definedParameters));
|
|
@@ -5385,6 +5543,7 @@ async function executeTask(options) {
|
|
|
5385
5543
|
preparedPipeline,
|
|
5386
5544
|
tools,
|
|
5387
5545
|
$executionReport,
|
|
5546
|
+
onProgress,
|
|
5388
5547
|
pipelineIdentification,
|
|
5389
5548
|
maxExecutionAttempts,
|
|
5390
5549
|
maxParallelCount,
|
|
@@ -5412,7 +5571,8 @@ async function executeTask(options) {
|
|
|
5412
5571
|
*/
|
|
5413
5572
|
|
|
5414
5573
|
/**
|
|
5415
|
-
*
|
|
5574
|
+
* Filters and returns only the output parameters from the provided pipeline execution options.
|
|
5575
|
+
* Adds warnings for any expected output parameters that are missing.
|
|
5416
5576
|
*
|
|
5417
5577
|
* @private internal utility of `createPipelineExecutor`
|
|
5418
5578
|
*/
|
|
@@ -5437,9 +5597,12 @@ function filterJustOutputParameters(options) {
|
|
|
5437
5597
|
}
|
|
5438
5598
|
|
|
5439
5599
|
/**
|
|
5440
|
-
*
|
|
5600
|
+
* Executes an entire pipeline, resolving tasks in dependency order, handling errors, and reporting progress.
|
|
5441
5601
|
*
|
|
5442
|
-
* Note: This is not a `PipelineExecutor` (which is
|
|
5602
|
+
* Note: This is not a `PipelineExecutor` (which is bound to a single pipeline), but a utility function used by `createPipelineExecutor` to create a `PipelineExecutor`.
|
|
5603
|
+
*
|
|
5604
|
+
* @param options - Options for execution, including input parameters, pipeline, and callbacks.
|
|
5605
|
+
* @returns The result of the pipeline execution, including output parameters, errors, and usage statistics.
|
|
5443
5606
|
*
|
|
5444
5607
|
* @private internal utility of `createPipelineExecutor`
|
|
5445
5608
|
*/
|
|
@@ -5762,10 +5925,27 @@ function createPipelineExecutor(options) {
|
|
|
5762
5925
|
cacheDirname,
|
|
5763
5926
|
intermediateFilesStrategy,
|
|
5764
5927
|
isAutoInstalled,
|
|
5928
|
+
}).catch((error) => {
|
|
5929
|
+
assertsError(error);
|
|
5930
|
+
return exportJson({
|
|
5931
|
+
name: 'pipelineExecutorResult',
|
|
5932
|
+
message: `Unuccessful PipelineExecutorResult, last catch`,
|
|
5933
|
+
order: [],
|
|
5934
|
+
value: {
|
|
5935
|
+
isSuccessful: false,
|
|
5936
|
+
errors: [serializeError(error)],
|
|
5937
|
+
warnings: [],
|
|
5938
|
+
usage: UNCERTAIN_USAGE,
|
|
5939
|
+
executionReport: null,
|
|
5940
|
+
outputParameters: {},
|
|
5941
|
+
preparedPipeline,
|
|
5942
|
+
},
|
|
5943
|
+
});
|
|
5765
5944
|
});
|
|
5766
5945
|
};
|
|
5767
5946
|
const pipelineExecutor = (inputParameters) => createTask({
|
|
5768
5947
|
taskType: 'EXECUTION',
|
|
5948
|
+
title: pipeline.title,
|
|
5769
5949
|
taskProcessCallback(updateOngoingResult) {
|
|
5770
5950
|
return pipelineExecutorWithCallback(inputParameters, async (newOngoingResult) => {
|
|
5771
5951
|
updateOngoingResult(newOngoingResult);
|
|
@@ -5868,7 +6048,7 @@ class MarkdownScraper {
|
|
|
5868
6048
|
const knowledge = await Promise.all(
|
|
5869
6049
|
// TODO: [🪂] Do not send all at once but in chunks
|
|
5870
6050
|
knowledgeTextPieces.map(async (knowledgeTextPiece, i) => {
|
|
5871
|
-
// Note:
|
|
6051
|
+
// Note: These are just default values, they will be overwritten by the actual values:
|
|
5872
6052
|
let name = `piece-${i}`;
|
|
5873
6053
|
let title = spaceTrim(knowledgeTextPiece.substring(0, 100));
|
|
5874
6054
|
const knowledgePieceContent = spaceTrim(knowledgeTextPiece);
|
|
@@ -6022,8 +6202,8 @@ class MarkitdownScraper {
|
|
|
6022
6202
|
extension: 'md',
|
|
6023
6203
|
isVerbose,
|
|
6024
6204
|
});
|
|
6025
|
-
// TODO:
|
|
6026
|
-
// Note: Running
|
|
6205
|
+
// TODO: Determine if Markitdown conversion should run only if the cache file doesn't exist, or always.
|
|
6206
|
+
// Note: Running Markitdown conversion ONLY if the file in the cache does not exist
|
|
6027
6207
|
if (!(await isFileExisting(cacheFilehandler.filename, this.tools.fs))) {
|
|
6028
6208
|
const src = source.filename || source.url || null;
|
|
6029
6209
|
// console.log('!!', { src, source, cacheFilehandler });
|
|
@@ -6045,11 +6225,11 @@ class MarkitdownScraper {
|
|
|
6045
6225
|
return cacheFilehandler;
|
|
6046
6226
|
}
|
|
6047
6227
|
/**
|
|
6048
|
-
* Scrapes the
|
|
6228
|
+
* Scrapes the source document (PDF, DOCX, etc.) and returns the knowledge pieces or `null` if it can't scrape it.
|
|
6049
6229
|
*/
|
|
6050
6230
|
async scrape(source) {
|
|
6051
6231
|
const cacheFilehandler = await this.$convert(source);
|
|
6052
|
-
// TODO:
|
|
6232
|
+
// TODO: Ensure this correctly creates the source object for the internal MarkdownScraper using the converted file.
|
|
6053
6233
|
const markdownSource = {
|
|
6054
6234
|
source: source.source,
|
|
6055
6235
|
filename: cacheFilehandler.filename,
|
|
@@ -6193,7 +6373,8 @@ class PdfScraper {
|
|
|
6193
6373
|
*/
|
|
6194
6374
|
|
|
6195
6375
|
/**
|
|
6196
|
-
*
|
|
6376
|
+
* Factory function to create an instance of PdfScraper.
|
|
6377
|
+
* It bundles the scraper class with its metadata.
|
|
6197
6378
|
*
|
|
6198
6379
|
* @public exported from `@promptbook/pdf`
|
|
6199
6380
|
*/
|