@promptbook/markitdown 0.92.0-3 → 0.92.0-30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/esm/index.es.js +559 -279
- package/esm/index.es.js.map +1 -1
- package/esm/typings/src/_packages/browser.index.d.ts +2 -0
- package/esm/typings/src/_packages/core.index.d.ts +22 -6
- package/esm/typings/src/_packages/deepseek.index.d.ts +2 -0
- package/esm/typings/src/_packages/google.index.d.ts +2 -0
- package/esm/typings/src/_packages/types.index.d.ts +4 -2
- package/esm/typings/src/_packages/utils.index.d.ts +2 -0
- package/esm/typings/src/cli/common/$provideLlmToolsForCli.d.ts +1 -1
- package/esm/typings/src/collection/PipelineCollection.d.ts +0 -2
- package/esm/typings/src/collection/SimplePipelineCollection.d.ts +1 -1
- package/esm/typings/src/commands/FOREACH/ForeachJson.d.ts +6 -6
- package/esm/typings/src/commands/FOREACH/foreachCommandParser.d.ts +0 -2
- package/esm/typings/src/commands/FORMFACTOR/formfactorCommandParser.d.ts +1 -1
- package/esm/typings/src/commands/_BOILERPLATE/boilerplateCommandParser.d.ts +1 -1
- package/esm/typings/src/commands/_common/types/CommandParser.d.ts +36 -28
- package/esm/typings/src/config.d.ts +41 -11
- package/esm/typings/src/constants.d.ts +43 -2
- package/esm/typings/src/conversion/archive/loadArchive.d.ts +2 -2
- package/esm/typings/src/errors/0-BoilerplateError.d.ts +2 -2
- package/esm/typings/src/executables/$provideExecutablesForNode.d.ts +1 -1
- package/esm/typings/src/executables/apps/locateLibreoffice.d.ts +2 -1
- package/esm/typings/src/executables/apps/locatePandoc.d.ts +2 -1
- package/esm/typings/src/executables/platforms/locateAppOnLinux.d.ts +2 -1
- package/esm/typings/src/executables/platforms/locateAppOnMacOs.d.ts +2 -1
- package/esm/typings/src/executables/platforms/locateAppOnWindows.d.ts +2 -1
- package/esm/typings/src/execution/AbstractTaskResult.d.ts +1 -1
- package/esm/typings/src/execution/CommonToolsOptions.d.ts +5 -1
- package/esm/typings/src/execution/LlmExecutionToolsConstructor.d.ts +2 -1
- package/esm/typings/src/execution/PipelineExecutorResult.d.ts +4 -2
- package/esm/typings/src/execution/createPipelineExecutor/$OngoingTaskResult.d.ts +12 -9
- package/esm/typings/src/execution/createPipelineExecutor/10-executePipeline.d.ts +12 -9
- package/esm/typings/src/execution/createPipelineExecutor/20-executeTask.d.ts +11 -8
- package/esm/typings/src/execution/createPipelineExecutor/30-executeFormatSubvalues.d.ts +15 -3
- package/esm/typings/src/execution/createPipelineExecutor/40-executeAttempts.d.ts +20 -14
- package/esm/typings/src/execution/createPipelineExecutor/computeCosineSimilarity.d.ts +13 -0
- package/esm/typings/src/execution/createPipelineExecutor/filterJustOutputParameters.d.ts +7 -6
- package/esm/typings/src/execution/createPipelineExecutor/getContextForTask.d.ts +5 -1
- package/esm/typings/src/execution/createPipelineExecutor/getExamplesForTask.d.ts +1 -1
- package/esm/typings/src/execution/createPipelineExecutor/getKnowledgeForTask.d.ts +21 -5
- package/esm/typings/src/execution/createPipelineExecutor/getReservedParametersForTask.d.ts +19 -5
- package/esm/typings/src/execution/createPipelineExecutor/knowledgePiecesToString.d.ts +9 -0
- package/esm/typings/src/execution/translation/automatic-translate/automatic-translators/LindatAutomaticTranslator.d.ts +4 -4
- package/esm/typings/src/execution/utils/checkExpectations.d.ts +1 -1
- package/esm/typings/src/execution/utils/uncertainNumber.d.ts +3 -2
- package/esm/typings/src/formats/_common/{FormatDefinition.d.ts → FormatParser.d.ts} +8 -6
- package/esm/typings/src/formats/_common/FormatSubvalueParser.d.ts +66 -0
- package/esm/typings/src/formats/csv/CsvFormatParser.d.ts +17 -0
- package/esm/typings/src/formats/csv/CsvSettings.d.ts +2 -2
- package/esm/typings/src/formats/csv/utils/csvParse.d.ts +12 -0
- package/esm/typings/src/formats/csv/utils/isValidCsvString.d.ts +1 -1
- package/esm/typings/src/formats/index.d.ts +2 -2
- package/esm/typings/src/formats/json/{JsonFormatDefinition.d.ts → JsonFormatParser.d.ts} +6 -6
- package/esm/typings/src/formats/json/utils/isValidJsonString.d.ts +1 -1
- package/esm/typings/src/formats/json/utils/jsonParse.d.ts +8 -0
- package/esm/typings/src/formats/text/{TextFormatDefinition.d.ts → TextFormatParser.d.ts} +7 -7
- package/esm/typings/src/formats/xml/XmlFormatParser.d.ts +19 -0
- package/esm/typings/src/formats/xml/utils/isValidXmlString.d.ts +1 -1
- package/esm/typings/src/formfactors/_boilerplate/BoilerplateFormfactorDefinition.d.ts +3 -2
- package/esm/typings/src/formfactors/_common/AbstractFormfactorDefinition.d.ts +16 -7
- package/esm/typings/src/formfactors/_common/FormfactorDefinition.d.ts +3 -1
- package/esm/typings/src/formfactors/_common/string_formfactor_name.d.ts +2 -1
- package/esm/typings/src/formfactors/chatbot/ChatbotFormfactorDefinition.d.ts +2 -2
- package/esm/typings/src/formfactors/completion/CompletionFormfactorDefinition.d.ts +29 -0
- package/esm/typings/src/formfactors/generator/GeneratorFormfactorDefinition.d.ts +2 -1
- package/esm/typings/src/formfactors/generic/GenericFormfactorDefinition.d.ts +2 -2
- package/esm/typings/src/formfactors/index.d.ts +33 -8
- package/esm/typings/src/formfactors/matcher/MatcherFormfactorDefinition.d.ts +4 -2
- package/esm/typings/src/formfactors/sheets/SheetsFormfactorDefinition.d.ts +3 -2
- package/esm/typings/src/formfactors/translator/TranslatorFormfactorDefinition.d.ts +3 -2
- package/esm/typings/src/high-level-abstractions/index.d.ts +2 -2
- package/esm/typings/src/llm-providers/_common/register/$llmToolsMetadataRegister.d.ts +3 -3
- package/esm/typings/src/llm-providers/_common/register/$llmToolsRegister.d.ts +3 -3
- package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsConfigurationFromEnv.d.ts +4 -4
- package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsForTestingAndScriptsAndPlayground.d.ts +4 -3
- package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsFromEnv.d.ts +17 -4
- package/esm/typings/src/llm-providers/_common/register/LlmToolsConfiguration.d.ts +11 -4
- package/esm/typings/src/llm-providers/_common/register/LlmToolsMetadata.d.ts +27 -5
- package/esm/typings/src/llm-providers/_common/register/LlmToolsOptions.d.ts +9 -2
- package/esm/typings/src/llm-providers/_common/register/createLlmToolsFromConfiguration.d.ts +12 -3
- package/esm/typings/src/llm-providers/_common/utils/cache/CacheItem.d.ts +10 -5
- package/esm/typings/src/llm-providers/_common/utils/cache/CacheLlmToolsOptions.d.ts +5 -3
- package/esm/typings/src/llm-providers/_common/utils/cache/cacheLlmTools.d.ts +3 -3
- package/esm/typings/src/llm-providers/_common/utils/count-total-usage/limitTotalUsage.d.ts +5 -5
- package/esm/typings/src/llm-providers/anthropic-claude/anthropic-claude-models.d.ts +1 -1
- package/esm/typings/src/llm-providers/azure-openai/AzureOpenAiExecutionTools.d.ts +4 -0
- package/esm/typings/src/llm-providers/deepseek/deepseek-models.d.ts +23 -0
- package/esm/typings/src/llm-providers/google/google-models.d.ts +23 -0
- package/esm/typings/src/llm-providers/openai/OpenAiExecutionTools.d.ts +4 -0
- package/esm/typings/src/llm-providers/openai/openai-models.d.ts +1 -1
- package/esm/typings/src/llm-providers/openai/register-configuration.d.ts +2 -2
- package/esm/typings/src/llm-providers/openai/register-constructor.d.ts +2 -2
- package/esm/typings/src/migrations/migratePipeline.d.ts +9 -0
- package/esm/typings/src/other/templates/getBookTemplates.d.ts +2 -2
- package/esm/typings/src/personas/preparePersona.d.ts +1 -1
- package/esm/typings/src/pipeline/PipelineInterface/PipelineInterface.d.ts +3 -3
- package/esm/typings/src/pipeline/PipelineInterface/constants.d.ts +1 -1
- package/esm/typings/src/pipeline/PipelineInterface/getPipelineInterface.d.ts +1 -1
- package/esm/typings/src/pipeline/PipelineInterface/isPipelineImplementingInterface.d.ts +5 -4
- package/esm/typings/src/pipeline/PipelineInterface/isPipelineInterfacesEqual.d.ts +1 -1
- package/esm/typings/src/pipeline/PipelineJson/CommonTaskJson.d.ts +9 -6
- package/esm/typings/src/pipeline/PipelineJson/PersonaJson.d.ts +4 -2
- package/esm/typings/src/pipeline/PipelineJson/PipelineJson.d.ts +3 -2
- package/esm/typings/src/pipeline/PipelineString.d.ts +3 -1
- package/esm/typings/src/pipeline/book-notation.d.ts +2 -2
- package/esm/typings/src/postprocessing/utils/extractJsonBlock.d.ts +1 -1
- package/esm/typings/src/prepare/prepareTasks.d.ts +7 -4
- package/esm/typings/src/remote-server/openapi-types.d.ts +348 -6
- package/esm/typings/src/remote-server/openapi.d.ts +398 -4
- package/esm/typings/src/remote-server/types/RemoteServerOptions.d.ts +2 -1
- package/esm/typings/src/scrapers/_boilerplate/BoilerplateScraper.d.ts +3 -3
- package/esm/typings/src/scrapers/_boilerplate/createBoilerplateScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/_boilerplate/register-metadata.d.ts +1 -1
- package/esm/typings/src/scrapers/_common/Converter.d.ts +3 -1
- package/esm/typings/src/scrapers/_common/Scraper.d.ts +4 -3
- package/esm/typings/src/scrapers/_common/ScraperIntermediateSource.d.ts +4 -2
- package/esm/typings/src/scrapers/_common/register/$provideFilesystemForNode.d.ts +2 -1
- package/esm/typings/src/scrapers/_common/register/$provideScrapersForBrowser.d.ts +6 -3
- package/esm/typings/src/scrapers/_common/register/$provideScrapersForNode.d.ts +3 -5
- package/esm/typings/src/scrapers/_common/register/$scrapersMetadataRegister.d.ts +3 -3
- package/esm/typings/src/scrapers/_common/register/$scrapersRegister.d.ts +3 -2
- package/esm/typings/src/scrapers/_common/register/ScraperAndConverterMetadata.d.ts +8 -5
- package/esm/typings/src/scrapers/_common/register/ScraperConstructor.d.ts +2 -1
- package/esm/typings/src/scrapers/_common/utils/getScraperIntermediateSource.d.ts +6 -5
- package/esm/typings/src/scrapers/_common/utils/makeKnowledgeSourceHandler.d.ts +3 -1
- package/esm/typings/src/scrapers/document/createDocumentScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/document-legacy/createLegacyDocumentScraper.d.ts +2 -1
- package/esm/typings/src/scrapers/markdown/createMarkdownScraper.d.ts +4 -1
- package/esm/typings/src/scrapers/markitdown/MarkitdownScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/pdf/createPdfScraper.d.ts +2 -1
- package/esm/typings/src/scrapers/website/createWebsiteScraper.d.ts +3 -4
- package/esm/typings/src/scripting/javascript/postprocessing-functions.d.ts +5 -1
- package/esm/typings/src/storage/file-cache-storage/FileCacheStorage.d.ts +12 -5
- package/esm/typings/src/storage/file-cache-storage/FileCacheStorageOptions.d.ts +4 -2
- package/esm/typings/src/storage/file-cache-storage/utils/nameToSubfolderPath.d.ts +2 -1
- package/esm/typings/src/storage/local-storage/getIndexedDbStorage.d.ts +10 -0
- package/esm/typings/src/storage/local-storage/utils/makePromptbookStorageFromIndexedDb.d.ts +7 -0
- package/esm/typings/src/storage/local-storage/utils/makePromptbookStorageFromWebStorage.d.ts +2 -1
- package/esm/typings/src/types/IntermediateFilesStrategy.d.ts +2 -1
- package/esm/typings/src/types/ModelVariant.d.ts +5 -5
- package/esm/typings/src/types/typeAliases.d.ts +17 -13
- package/esm/typings/src/utils/$Register.d.ts +8 -7
- package/esm/typings/src/utils/editable/edit-pipeline-string/addPipelineCommand.d.ts +2 -2
- package/esm/typings/src/utils/editable/edit-pipeline-string/deflatePipeline.d.ts +4 -1
- package/esm/typings/src/utils/editable/utils/isFlatPipeline.d.ts +2 -1
- package/esm/typings/src/utils/environment/$getGlobalScope.d.ts +2 -1
- package/esm/typings/src/utils/expectation-counters/index.d.ts +1 -1
- package/esm/typings/src/utils/markdown/extractAllListItemsFromMarkdown.d.ts +1 -1
- package/esm/typings/src/utils/normalization/nameToUriPart.d.ts +4 -4
- package/esm/typings/src/utils/normalization/nameToUriParts.d.ts +4 -4
- package/esm/typings/src/utils/normalization/normalize-to-kebab-case.d.ts +3 -3
- package/esm/typings/src/utils/normalization/normalizeTo_SCREAMING_CASE.d.ts +3 -3
- package/esm/typings/src/utils/normalization/normalizeTo_camelCase.d.ts +4 -4
- package/esm/typings/src/utils/normalization/normalizeTo_snake_case.d.ts +3 -3
- package/esm/typings/src/utils/normalization/removeDiacritics.d.ts +3 -3
- package/esm/typings/src/utils/normalization/searchKeywords.d.ts +4 -1
- package/esm/typings/src/utils/normalization/titleToName.d.ts +4 -4
- package/esm/typings/src/utils/organization/empty_object.d.ts +2 -2
- package/esm/typings/src/utils/organization/just_empty_object.d.ts +4 -4
- package/esm/typings/src/utils/parameters/mapAvailableToExpectedParameters.d.ts +7 -7
- package/esm/typings/src/utils/serialization/clonePipeline.d.ts +4 -3
- package/esm/typings/src/utils/serialization/deepClone.d.ts +5 -1
- package/esm/typings/src/utils/validators/javascriptName/isValidJavascriptName.d.ts +3 -3
- package/esm/typings/src/utils/validators/parameterName/validateParameterName.d.ts +5 -4
- package/esm/typings/src/version.d.ts +2 -1
- package/package.json +2 -2
- package/umd/index.umd.js +559 -279
- package/umd/index.umd.js.map +1 -1
- package/esm/typings/src/formats/_common/FormatSubvalueDefinition.d.ts +0 -31
- package/esm/typings/src/formats/csv/CsvFormatDefinition.d.ts +0 -17
- package/esm/typings/src/formats/xml/XmlFormatDefinition.d.ts +0 -19
package/esm/index.es.js
CHANGED
|
@@ -26,7 +26,7 @@ const BOOK_LANGUAGE_VERSION = '1.0.0';
|
|
|
26
26
|
* @generated
|
|
27
27
|
* @see https://github.com/webgptorg/promptbook
|
|
28
28
|
*/
|
|
29
|
-
const PROMPTBOOK_ENGINE_VERSION = '0.92.0-
|
|
29
|
+
const PROMPTBOOK_ENGINE_VERSION = '0.92.0-30';
|
|
30
30
|
/**
|
|
31
31
|
* TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
|
|
32
32
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
@@ -102,6 +102,21 @@ const DEFAULT_BOOK_TITLE = `✨ Untitled Book`;
|
|
|
102
102
|
* @public exported from `@promptbook/core`
|
|
103
103
|
*/
|
|
104
104
|
const DEFAULT_MAX_FILE_SIZE = 100 * 1024 * 1024; // 100MB
|
|
105
|
+
/**
|
|
106
|
+
* Threshold value that determines when a dataset is considered "big"
|
|
107
|
+
* and may require special handling or optimizations
|
|
108
|
+
*
|
|
109
|
+
* For example, when error occurs in one item of the big dataset, it will not fail the whole pipeline
|
|
110
|
+
*
|
|
111
|
+
* @public exported from `@promptbook/core`
|
|
112
|
+
*/
|
|
113
|
+
const BIG_DATASET_TRESHOLD = 50;
|
|
114
|
+
/**
|
|
115
|
+
* Placeholder text used to represent a placeholder value of failed operation
|
|
116
|
+
*
|
|
117
|
+
* @public exported from `@promptbook/core`
|
|
118
|
+
*/
|
|
119
|
+
const FAILED_VALUE_PLACEHOLDER = '!?';
|
|
105
120
|
// <- TODO: [🧠] Better system for generator warnings - not always "code" and "by `@promptbook/cli`"
|
|
106
121
|
/**
|
|
107
122
|
* The maximum number of iterations for a loops
|
|
@@ -181,7 +196,7 @@ const DEFAULT_DOWNLOAD_CACHE_DIRNAME = './.promptbook/download-cache';
|
|
|
181
196
|
const DEFAULT_SCRAPE_CACHE_DIRNAME = './.promptbook/scrape-cache';
|
|
182
197
|
// <- TODO: [🧜♂️]
|
|
183
198
|
/**
|
|
184
|
-
*
|
|
199
|
+
* Default settings for parsing and generating CSV files in Promptbook.
|
|
185
200
|
*
|
|
186
201
|
* @public exported from `@promptbook/core`
|
|
187
202
|
*/
|
|
@@ -192,19 +207,19 @@ const DEFAULT_CSV_SETTINGS = Object.freeze({
|
|
|
192
207
|
skipEmptyLines: true,
|
|
193
208
|
});
|
|
194
209
|
/**
|
|
195
|
-
*
|
|
210
|
+
* Controls whether verbose logging is enabled by default throughout the application.
|
|
196
211
|
*
|
|
197
212
|
* @public exported from `@promptbook/core`
|
|
198
213
|
*/
|
|
199
214
|
let DEFAULT_IS_VERBOSE = false;
|
|
200
215
|
/**
|
|
201
|
-
*
|
|
216
|
+
* Controls whether auto-installation of dependencies is enabled by default.
|
|
202
217
|
*
|
|
203
218
|
* @public exported from `@promptbook/core`
|
|
204
219
|
*/
|
|
205
220
|
const DEFAULT_IS_AUTO_INSTALLED = false;
|
|
206
221
|
/**
|
|
207
|
-
*
|
|
222
|
+
* Indicates whether pipeline logic validation is enabled. When true, the pipeline logic is checked for consistency.
|
|
208
223
|
*
|
|
209
224
|
* @private within the repository
|
|
210
225
|
*/
|
|
@@ -355,7 +370,8 @@ async function isFileExisting(filename, fs) {
|
|
|
355
370
|
*/
|
|
356
371
|
|
|
357
372
|
/**
|
|
358
|
-
*
|
|
373
|
+
* Converts a name to a properly formatted subfolder path for cache storage.
|
|
374
|
+
* Handles normalization and path formatting to create consistent cache directory structures.
|
|
359
375
|
*
|
|
360
376
|
* @private for `FileCacheStorage`
|
|
361
377
|
*/
|
|
@@ -608,10 +624,10 @@ for (let i = 0; i < defaultDiacriticsRemovalMap.length; i++) {
|
|
|
608
624
|
*/
|
|
609
625
|
|
|
610
626
|
/**
|
|
611
|
-
*
|
|
627
|
+
* Removes diacritic marks (accents) from characters in a string.
|
|
612
628
|
*
|
|
613
|
-
* @param input
|
|
614
|
-
* @returns
|
|
629
|
+
* @param input The string containing diacritics to be normalized.
|
|
630
|
+
* @returns The string with diacritics removed or normalized.
|
|
615
631
|
* @public exported from `@promptbook/utils`
|
|
616
632
|
*/
|
|
617
633
|
function removeDiacritics(input) {
|
|
@@ -625,10 +641,10 @@ function removeDiacritics(input) {
|
|
|
625
641
|
*/
|
|
626
642
|
|
|
627
643
|
/**
|
|
628
|
-
*
|
|
644
|
+
* Converts a given text to kebab-case format.
|
|
629
645
|
*
|
|
630
|
-
* @param text
|
|
631
|
-
* @returns
|
|
646
|
+
* @param text The text to be converted.
|
|
647
|
+
* @returns The kebab-case formatted string.
|
|
632
648
|
* @example 'hello-world'
|
|
633
649
|
* @example 'i-love-promptbook'
|
|
634
650
|
* @public exported from `@promptbook/utils`
|
|
@@ -770,11 +786,11 @@ function isValidUrl(url) {
|
|
|
770
786
|
}
|
|
771
787
|
|
|
772
788
|
/**
|
|
773
|
-
*
|
|
789
|
+
* Converts a title string into a normalized name.
|
|
774
790
|
*
|
|
775
|
-
* @param value
|
|
776
|
-
* @returns
|
|
777
|
-
* @example
|
|
791
|
+
* @param value The title string to be converted to a name.
|
|
792
|
+
* @returns A normalized name derived from the input title.
|
|
793
|
+
* @example 'Hello World!' -> 'hello-world'
|
|
778
794
|
* @public exported from `@promptbook/utils`
|
|
779
795
|
*/
|
|
780
796
|
function titleToName(value) {
|
|
@@ -794,9 +810,8 @@ function titleToName(value) {
|
|
|
794
810
|
}
|
|
795
811
|
|
|
796
812
|
/**
|
|
797
|
-
*
|
|
798
|
-
*
|
|
799
|
-
* Note: It also checks if directory exists and creates it if not
|
|
813
|
+
* Retrieves an intermediate source for a scraper based on the knowledge source.
|
|
814
|
+
* Manages the caching and retrieval of intermediate scraper results for optimized performance.
|
|
800
815
|
*
|
|
801
816
|
* @private as internal utility for scrapers
|
|
802
817
|
*/
|
|
@@ -847,7 +862,7 @@ async function getScraperIntermediateSource(source, options) {
|
|
|
847
862
|
* Note: [🟢] Code in this file should never be never released in packages that could be imported into browser environment
|
|
848
863
|
*/
|
|
849
864
|
|
|
850
|
-
var PipelineCollection = [{title:"Prepare Knowledge from Markdown",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.book",formfactorName:"GENERIC",parameters:[{name:"knowledgeContent",description:"Markdown document content",isInput:true,isOutput:false},{name:"knowledgePieces",description:"The knowledge JSON object",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, extract the important knowledge from the document.\n\n# Rules\n\n- Make pieces of information concise, clear, and easy to understand\n- One piece of information should be approximately 1 paragraph\n- Divide the paragraphs by markdown horizontal lines ---\n- Omit irrelevant information\n- Group redundant information\n- Write just extracted information, nothing else\n\n# The document\n\nTake information from this document:\n\n> {knowledgeContent}",resultingParameterName:"knowledgePieces",dependentParameterNames:["knowledgeContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Knowledge from Markdown\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.book`\n- INPUT PARAMETER `{knowledgeContent}` Markdown document content\n- OUTPUT PARAMETER `{knowledgePieces}` The knowledge JSON object\n\n## Knowledge\n\n<!-- TODO: [🍆] -FORMAT JSON -->\n\n```markdown\nYou are experienced data researcher, extract the important knowledge from the document.\n\n# Rules\n\n- Make pieces of information concise, clear, and easy to understand\n- One piece of information should be approximately 1 paragraph\n- Divide the paragraphs by markdown horizontal lines ---\n- Omit irrelevant information\n- Group redundant information\n- Write just extracted information, nothing else\n\n# The document\n\nTake information from this document:\n\n> {knowledgeContent}\n```\n\n`-> {knowledgePieces}`\n"}],sourceFile:"./books/prepare-knowledge-from-markdown.book"},{title:"Prepare Keywords",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-keywords.book",formfactorName:"GENERIC",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"keywords",description:"Keywords separated by comma",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, detect the important keywords in the document.\n\n# Rules\n\n- Write just keywords separated by comma\n\n# The document\n\nTake information from this document:\n\n> {knowledgePieceContent}",resultingParameterName:"keywords",dependentParameterNames:["knowledgePieceContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Keywords\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-keywords.book`\n- INPUT PARAMETER `{knowledgePieceContent}` The content\n- OUTPUT PARAMETER `{keywords}` Keywords separated by comma\n\n## Knowledge\n\n<!-- TODO: [🍆] -FORMAT JSON -->\n\n```markdown\nYou are experienced data researcher, detect the important keywords in the document.\n\n# Rules\n\n- Write just keywords separated by comma\n\n# The document\n\nTake information from this document:\n\n> {knowledgePieceContent}\n```\n\n`-> {keywords}`\n"}],sourceFile:"./books/prepare-knowledge-keywords.book"},{title:"Prepare Knowledge-piece Title",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-title.book",formfactorName:"GENERIC",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"title",description:"The title of the document",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced content creator, write best title for the document.\n\n# Rules\n\n- Write just title, nothing else\n- Write maximum 5 words for the title\n\n# The document\n\n> {knowledgePieceContent}",resultingParameterName:"title",expectations:{words:{min:1,max:8}},dependentParameterNames:["knowledgePieceContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Knowledge-piece Title\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-title.book`\n- INPUT PARAMETER `{knowledgePieceContent}` The content\n- OUTPUT PARAMETER `{title}` The title of the document\n\n## Knowledge\n\n- EXPECT MIN 1 WORD\n- EXPECT MAX 8 WORDS\n\n```markdown\nYou are experienced content creator, write best title for the document.\n\n# Rules\n\n- Write just title, nothing else\n- Write maximum 5 words for the title\n\n# The document\n\n> {knowledgePieceContent}\n```\n\n`-> {title}`\n"}],sourceFile:"./books/prepare-knowledge-title.book"},{title:"Prepare Persona",pipelineUrl:"https://promptbook.studio/promptbook/prepare-persona.book",formfactorName:"GENERIC",parameters:[{name:"availableModelNames",description:"List of available model names separated by comma (,)",isInput:true,isOutput:false},{name:"personaDescription",description:"Description of the persona",isInput:true,isOutput:false},{name:"modelRequirements",description:"Specific requirements for the model",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"make-model-requirements",title:"Make modelRequirements",content:"You are experienced AI engineer, you need to create virtual assistant.\nWrite\n\n## Example\n\n```json\n{\n\"modelName\": \"gpt-4o\",\n\"systemMessage\": \"You are experienced AI engineer and helpfull assistant.\",\n\"temperature\": 0.7\n}\n```\n\n## Instructions\n\n- Your output format is JSON object\n- Write just the JSON object, no other text should be present\n- It contains the following keys:\n - `modelName`: The name of the model to use\n - `systemMessage`: The system message to provide context to the model\n - `temperature`: The sampling temperature to use\n\n### Key `modelName`\n\nPick from the following models:\n\n- {availableModelNames}\n\n### Key `systemMessage`\n\nThe system message is used to communicate instructions or provide context to the model at the beginning of a conversation. It is displayed in a different format compared to user messages, helping the model understand its role in the conversation. The system message typically guides the model's behavior, sets the tone, or specifies desired output from the model. By utilizing the system message effectively, users can steer the model towards generating more accurate and relevant responses.\n\nFor example:\n\n> You are an experienced AI engineer and helpful assistant.\n\n> You are a friendly and knowledgeable chatbot.\n\n### Key `temperature`\n\nThe sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.\n\nYou can pick a value between 0 and 2. For example:\n\n- `0.1`: Low temperature, extremely conservative and deterministic\n- `0.5`: Medium temperature, balanced between conservative and creative\n- `1.0`: High temperature, creative and bit random\n- `1.5`: Very high temperature, extremely creative and often chaotic and unpredictable\n- `2.0`: Maximum temperature, completely random and unpredictable, for some extreme creative use cases\n\n# The assistant\n\nTake this description of the persona:\n\n> {personaDescription}",resultingParameterName:"modelRequirements",format:"JSON",dependentParameterNames:["availableModelNames","personaDescription"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Persona\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-persona.book`\n- INPUT PARAMETER `{availableModelNames}` List of available model names separated by comma (,)\n- INPUT PARAMETER `{personaDescription}` Description of the persona\n- OUTPUT PARAMETER `{modelRequirements}` Specific requirements for the model\n\n## Make modelRequirements\n\n- FORMAT JSON\n\n```markdown\nYou are experienced AI engineer, you need to create virtual assistant.\nWrite\n\n## Example\n\n\\`\\`\\`json\n{\n\"modelName\": \"gpt-4o\",\n\"systemMessage\": \"You are experienced AI engineer and helpfull assistant.\",\n\"temperature\": 0.7\n}\n\\`\\`\\`\n\n## Instructions\n\n- Your output format is JSON object\n- Write just the JSON object, no other text should be present\n- It contains the following keys:\n - `modelName`: The name of the model to use\n - `systemMessage`: The system message to provide context to the model\n - `temperature`: The sampling temperature to use\n\n### Key `modelName`\n\nPick from the following models:\n\n- {availableModelNames}\n\n### Key `systemMessage`\n\nThe system message is used to communicate instructions or provide context to the model at the beginning of a conversation. It is displayed in a different format compared to user messages, helping the model understand its role in the conversation. The system message typically guides the model's behavior, sets the tone, or specifies desired output from the model. By utilizing the system message effectively, users can steer the model towards generating more accurate and relevant responses.\n\nFor example:\n\n> You are an experienced AI engineer and helpful assistant.\n\n> You are a friendly and knowledgeable chatbot.\n\n### Key `temperature`\n\nThe sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.\n\nYou can pick a value between 0 and 2. For example:\n\n- `0.1`: Low temperature, extremely conservative and deterministic\n- `0.5`: Medium temperature, balanced between conservative and creative\n- `1.0`: High temperature, creative and bit random\n- `1.5`: Very high temperature, extremely creative and often chaotic and unpredictable\n- `2.0`: Maximum temperature, completely random and unpredictable, for some extreme creative use cases\n\n# The assistant\n\nTake this description of the persona:\n\n> {personaDescription}\n```\n\n`-> {modelRequirements}`\n"}],sourceFile:"./books/prepare-persona.book"},{title:"Prepare Title",pipelineUrl:"https://promptbook.studio/promptbook/prepare-title.book",formfactorName:"GENERIC",parameters:[{name:"book",description:"The book to prepare the title for",isInput:true,isOutput:false},{name:"title",description:"Best title for the book",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"make-title",title:"Make title",content:"Make best title for given text which describes the workflow:\n\n## Rules\n\n- Write just title, nothing else\n- Title should be concise and clear - Write maximum ideally 2 words, maximum 5 words\n- Title starts with emoticon\n- Title should not mention the input and output of the workflow but the main purpose of the workflow\n _For example, not \"✍ Convert Knowledge-piece to title\" but \"✍ Title\"_\n\n## The workflow\n\n> {book}",resultingParameterName:"title",expectations:{words:{min:1,max:8},lines:{min:1,max:1}},dependentParameterNames:["book"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Title\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-title.book`\n- INPUT PARAMETER `{book}` The book to prepare the title for\n- OUTPUT PARAMETER `{title}` Best title for the book\n\n## Make title\n\n- EXPECT MIN 1 Word\n- EXPECT MAX 8 Words\n- EXPECT EXACTLY 1 Line\n\n```markdown\nMake best title for given text which describes the workflow:\n\n## Rules\n\n- Write just title, nothing else\n- Title should be concise and clear - Write maximum ideally 2 words, maximum 5 words\n- Title starts with emoticon\n- Title should not mention the input and output of the workflow but the main purpose of the workflow\n _For example, not \"✍ Convert Knowledge-piece to title\" but \"✍ Title\"_\n\n## The workflow\n\n> {book}\n```\n\n`-> {title}`\n"}],sourceFile:"./books/prepare-title.book"}];
|
|
865
|
+
var PipelineCollection = [{title:"Prepare Knowledge from Markdown",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.book",formfactorName:"GENERIC",parameters:[{name:"knowledgeContent",description:"Markdown document content",isInput:true,isOutput:false},{name:"knowledgePieces",description:"The knowledge JSON object",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, extract the important knowledge from the document.\n\n# Rules\n\n- Make pieces of information concise, clear, and easy to understand\n- One piece of information should be approximately 1 paragraph\n- Divide the paragraphs by markdown horizontal lines ---\n- Omit irrelevant information\n- Group redundant information\n- Write just extracted information, nothing else\n\n# The document\n\nTake information from this document:\n\n> {knowledgeContent}",resultingParameterName:"knowledgePieces",dependentParameterNames:["knowledgeContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Knowledge from Markdown\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.book`\n- INPUT PARAMETER `{knowledgeContent}` Markdown document content\n- OUTPUT PARAMETER `{knowledgePieces}` The knowledge JSON object\n\n## Knowledge\n\n<!-- TODO: [🍆] -FORMAT JSON -->\n\n```markdown\nYou are experienced data researcher, extract the important knowledge from the document.\n\n# Rules\n\n- Make pieces of information concise, clear, and easy to understand\n- One piece of information should be approximately 1 paragraph\n- Divide the paragraphs by markdown horizontal lines ---\n- Omit irrelevant information\n- Group redundant information\n- Write just extracted information, nothing else\n\n# The document\n\nTake information from this document:\n\n> {knowledgeContent}\n```\n\n`-> {knowledgePieces}`\n"}],sourceFile:"./books/prepare-knowledge-from-markdown.book"},{title:"Prepare Keywords",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-keywords.book",formfactorName:"GENERIC",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"keywords",description:"Keywords separated by comma",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, detect the important keywords in the document.\n\n# Rules\n\n- Write just keywords separated by comma\n\n# The document\n\nTake information from this document:\n\n> {knowledgePieceContent}",resultingParameterName:"keywords",dependentParameterNames:["knowledgePieceContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Keywords\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-keywords.book`\n- INPUT PARAMETER `{knowledgePieceContent}` The content\n- OUTPUT PARAMETER `{keywords}` Keywords separated by comma\n\n## Knowledge\n\n<!-- TODO: [🍆] -FORMAT JSON -->\n\n```markdown\nYou are experienced data researcher, detect the important keywords in the document.\n\n# Rules\n\n- Write just keywords separated by comma\n\n# The document\n\nTake information from this document:\n\n> {knowledgePieceContent}\n```\n\n`-> {keywords}`\n"}],sourceFile:"./books/prepare-knowledge-keywords.book"},{title:"Prepare Knowledge-piece Title",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-title.book",formfactorName:"GENERIC",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"title",description:"The title of the document",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced content creator, write best title for the document.\n\n# Rules\n\n- Write just title, nothing else\n- Write maximum 5 words for the title\n\n# The document\n\n> {knowledgePieceContent}",resultingParameterName:"title",expectations:{words:{min:1,max:8}},dependentParameterNames:["knowledgePieceContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Knowledge-piece Title\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-title.book`\n- INPUT PARAMETER `{knowledgePieceContent}` The content\n- OUTPUT PARAMETER `{title}` The title of the document\n\n## Knowledge\n\n- EXPECT MIN 1 WORD\n- EXPECT MAX 8 WORDS\n\n```markdown\nYou are experienced content creator, write best title for the document.\n\n# Rules\n\n- Write just title, nothing else\n- Write maximum 5 words for the title\n\n# The document\n\n> {knowledgePieceContent}\n```\n\n`-> {title}`\n"}],sourceFile:"./books/prepare-knowledge-title.book"},{title:"Prepare Persona",pipelineUrl:"https://promptbook.studio/promptbook/prepare-persona.book",formfactorName:"GENERIC",parameters:[{name:"availableModels",description:"List of available model names together with their descriptions as JSON",isInput:true,isOutput:false},{name:"personaDescription",description:"Description of the persona",isInput:true,isOutput:false},{name:"modelsRequirements",description:"Specific requirements for the model",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"make-model-requirements",title:"Make modelRequirements",content:"You are an experienced AI engineer, you need to find the best models for virtual assistants:\n\n## Example\n\n```json\n[\n {\n \"modelName\": \"gpt-4o\",\n \"systemMessage\": \"You are experienced AI engineer and helpfull assistant.\",\n \"temperature\": 0.7\n },\n {\n \"modelName\": \"claude-3-5-sonnet\",\n \"systemMessage\": \"You are a friendly and knowledgeable chatbot.\",\n \"temperature\": 0.5\n }\n]\n```\n\n## Instructions\n\n- Your output format is JSON array\n- Sort best-fitting models first\n- Omit any models that are not suitable\n- Write just the JSON, no other text should be present\n- Array contain items with following keys:\n - `modelName`: The name of the model to use\n - `systemMessage`: The system message to provide context to the model\n - `temperature`: The sampling temperature to use\n\n### Key `modelName`\n\nHere are the available models:\n\n```json\n{availableModels}\n```\n\n### Key `systemMessage`\n\nThe system message is used to communicate instructions or provide context to the model at the beginning of a conversation. It is displayed in a different format compared to user messages, helping the model understand its role in the conversation. The system message typically guides the model's behavior, sets the tone, or specifies desired output from the model. By utilizing the system message effectively, users can steer the model towards generating more accurate and relevant responses.\n\nFor example:\n\n> You are an experienced AI engineer and helpful assistant.\n\n> You are a friendly and knowledgeable chatbot.\n\n### Key `temperature`\n\nThe sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.\n\nYou can pick a value between 0 and 2. For example:\n\n- `0.1`: Low temperature, extremely conservative and deterministic\n- `0.5`: Medium temperature, balanced between conservative and creative\n- `1.0`: High temperature, creative and bit random\n- `1.5`: Very high temperature, extremely creative and often chaotic and unpredictable\n- `2.0`: Maximum temperature, completely random and unpredictable, for some extreme creative use cases\n\n# The assistant\n\nTake this description of the persona:\n\n> {personaDescription}",resultingParameterName:"modelsRequirements",format:"JSON",dependentParameterNames:["availableModels","personaDescription"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Persona\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-persona.book`\n- INPUT PARAMETER `{availableModels}` List of available model names together with their descriptions as JSON\n- INPUT PARAMETER `{personaDescription}` Description of the persona\n- OUTPUT PARAMETER `{modelsRequirements}` Specific requirements for the model\n\n## Make modelRequirements\n\n- FORMAT JSON\n\n```markdown\nYou are an experienced AI engineer, you need to find the best models for virtual assistants:\n\n## Example\n\n\\`\\`\\`json\n[\n {\n \"modelName\": \"gpt-4o\",\n \"systemMessage\": \"You are experienced AI engineer and helpfull assistant.\",\n \"temperature\": 0.7\n },\n {\n \"modelName\": \"claude-3-5-sonnet\",\n \"systemMessage\": \"You are a friendly and knowledgeable chatbot.\",\n \"temperature\": 0.5\n }\n]\n\\`\\`\\`\n\n## Instructions\n\n- Your output format is JSON array\n- Sort best-fitting models first\n- Omit any models that are not suitable\n- Write just the JSON, no other text should be present\n- Array contain items with following keys:\n - `modelName`: The name of the model to use\n - `systemMessage`: The system message to provide context to the model\n - `temperature`: The sampling temperature to use\n\n### Key `modelName`\n\nHere are the available models:\n\n\\`\\`\\`json\n{availableModels}\n\\`\\`\\`\n\n### Key `systemMessage`\n\nThe system message is used to communicate instructions or provide context to the model at the beginning of a conversation. It is displayed in a different format compared to user messages, helping the model understand its role in the conversation. The system message typically guides the model's behavior, sets the tone, or specifies desired output from the model. By utilizing the system message effectively, users can steer the model towards generating more accurate and relevant responses.\n\nFor example:\n\n> You are an experienced AI engineer and helpful assistant.\n\n> You are a friendly and knowledgeable chatbot.\n\n### Key `temperature`\n\nThe sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.\n\nYou can pick a value between 0 and 2. For example:\n\n- `0.1`: Low temperature, extremely conservative and deterministic\n- `0.5`: Medium temperature, balanced between conservative and creative\n- `1.0`: High temperature, creative and bit random\n- `1.5`: Very high temperature, extremely creative and often chaotic and unpredictable\n- `2.0`: Maximum temperature, completely random and unpredictable, for some extreme creative use cases\n\n# The assistant\n\nTake this description of the persona:\n\n> {personaDescription}\n```\n\n`-> {modelsRequirements}`\n"}],sourceFile:"./books/prepare-persona.book"},{title:"Prepare Title",pipelineUrl:"https://promptbook.studio/promptbook/prepare-title.book",formfactorName:"GENERIC",parameters:[{name:"book",description:"The book to prepare the title for",isInput:true,isOutput:false},{name:"title",description:"Best title for the book",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"make-title",title:"Make title",content:"Make best title for given text which describes the workflow:\n\n## Rules\n\n- Write just title, nothing else\n- Title should be concise and clear - Write maximum ideally 2 words, maximum 5 words\n- Title starts with emoticon\n- Title should not mention the input and output of the workflow but the main purpose of the workflow\n _For example, not \"✍ Convert Knowledge-piece to title\" but \"✍ Title\"_\n\n## The workflow\n\n> {book}",resultingParameterName:"title",expectations:{words:{min:1,max:8},lines:{min:1,max:1}},dependentParameterNames:["book"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Title\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-title.book`\n- INPUT PARAMETER `{book}` The book to prepare the title for\n- OUTPUT PARAMETER `{title}` Best title for the book\n\n## Make title\n\n- EXPECT MIN 1 Word\n- EXPECT MAX 8 Words\n- EXPECT EXACTLY 1 Line\n\n```markdown\nMake best title for given text which describes the workflow:\n\n## Rules\n\n- Write just title, nothing else\n- Title should be concise and clear - Write maximum ideally 2 words, maximum 5 words\n- Title starts with emoticon\n- Title should not mention the input and output of the workflow but the main purpose of the workflow\n _For example, not \"✍ Convert Knowledge-piece to title\" but \"✍ Title\"_\n\n## The workflow\n\n> {book}\n```\n\n`-> {title}`\n"}],sourceFile:"./books/prepare-title.book"}];
|
|
851
866
|
|
|
852
867
|
/**
|
|
853
868
|
* Checks if value is valid email
|
|
@@ -932,7 +947,7 @@ function assertsError(whatWasThrown) {
|
|
|
932
947
|
* Function isValidJsonString will tell you if the string is valid JSON or not
|
|
933
948
|
*
|
|
934
949
|
* @param value The string to check
|
|
935
|
-
* @returns
|
|
950
|
+
* @returns `true` if the string is a valid JSON string, false otherwise
|
|
936
951
|
*
|
|
937
952
|
* @public exported from `@promptbook/utils`
|
|
938
953
|
*/
|
|
@@ -1343,8 +1358,12 @@ function checkSerializableAsJson(options) {
|
|
|
1343
1358
|
*/
|
|
1344
1359
|
|
|
1345
1360
|
/**
|
|
1346
|
-
*
|
|
1361
|
+
* Creates a deep clone of the given object
|
|
1362
|
+
*
|
|
1363
|
+
* Note: This method only works for objects that are fully serializable to JSON and do not contain functions, Dates, or special types.
|
|
1347
1364
|
*
|
|
1365
|
+
* @param objectValue The object to clone.
|
|
1366
|
+
* @returns A deep, writable clone of the input object.
|
|
1348
1367
|
* @public exported from `@promptbook/utils`
|
|
1349
1368
|
*/
|
|
1350
1369
|
function deepClone(objectValue) {
|
|
@@ -1426,13 +1445,13 @@ const ORDER_OF_PIPELINE_JSON = [
|
|
|
1426
1445
|
*/
|
|
1427
1446
|
const REPLACING_NONCE = 'ptbkauk42kV2dzao34faw7FudQUHYPtW';
|
|
1428
1447
|
/**
|
|
1429
|
-
*
|
|
1448
|
+
* Placeholder value indicating a parameter is missing its value.
|
|
1430
1449
|
*
|
|
1431
1450
|
* @private within the repository
|
|
1432
1451
|
*/
|
|
1433
1452
|
const RESERVED_PARAMETER_MISSING_VALUE = 'MISSING-' + REPLACING_NONCE;
|
|
1434
1453
|
/**
|
|
1435
|
-
*
|
|
1454
|
+
* Placeholder value indicating a parameter is restricted and cannot be used directly.
|
|
1436
1455
|
*
|
|
1437
1456
|
* @private within the repository
|
|
1438
1457
|
*/
|
|
@@ -1890,7 +1909,7 @@ function extractParameterNames(template) {
|
|
|
1890
1909
|
*/
|
|
1891
1910
|
function unpreparePipeline(pipeline) {
|
|
1892
1911
|
let { personas, knowledgeSources, tasks } = pipeline;
|
|
1893
|
-
personas = personas.map((persona) => ({ ...persona,
|
|
1912
|
+
personas = personas.map((persona) => ({ ...persona, modelsRequirements: undefined, preparationIds: undefined }));
|
|
1894
1913
|
knowledgeSources = knowledgeSources.map((knowledgeSource) => ({ ...knowledgeSource, preparationIds: undefined }));
|
|
1895
1914
|
tasks = tasks.map((task) => {
|
|
1896
1915
|
let { dependentParameterNames } = task;
|
|
@@ -1931,7 +1950,7 @@ class SimplePipelineCollection {
|
|
|
1931
1950
|
/**
|
|
1932
1951
|
* Constructs a pipeline collection from pipelines
|
|
1933
1952
|
*
|
|
1934
|
-
* @param pipelines
|
|
1953
|
+
* @param pipelines Array of pipeline JSON objects to include in the collection
|
|
1935
1954
|
*
|
|
1936
1955
|
* Note: During the construction logic of all pipelines are validated
|
|
1937
1956
|
* Note: It is not recommended to use this constructor directly, use `createCollectionFromJson` *(or other variant)* instead
|
|
@@ -2095,15 +2114,21 @@ class PipelineExecutionError extends Error {
|
|
|
2095
2114
|
* @public exported from `@promptbook/core`
|
|
2096
2115
|
*/
|
|
2097
2116
|
function isPipelinePrepared(pipeline) {
|
|
2098
|
-
// Note: Ignoring `pipeline.preparations`
|
|
2099
|
-
// Note: Ignoring `pipeline.knowledgePieces`
|
|
2117
|
+
// Note: Ignoring `pipeline.preparations`
|
|
2118
|
+
// Note: Ignoring `pipeline.knowledgePieces`
|
|
2100
2119
|
if (pipeline.title === undefined || pipeline.title === '' || pipeline.title === DEFAULT_BOOK_TITLE) {
|
|
2120
|
+
// TODO: !!! Comment this out
|
|
2121
|
+
console.log('Pipeline is not prepared because title is undefined or empty', pipeline);
|
|
2101
2122
|
return false;
|
|
2102
2123
|
}
|
|
2103
|
-
if (!pipeline.personas.every((persona) => persona.
|
|
2124
|
+
if (!pipeline.personas.every((persona) => persona.modelsRequirements !== undefined)) {
|
|
2125
|
+
// TODO: !!! Comment this out
|
|
2126
|
+
console.log('Pipeline is not prepared because personas are not prepared', pipeline.personas);
|
|
2104
2127
|
return false;
|
|
2105
2128
|
}
|
|
2106
2129
|
if (!pipeline.knowledgeSources.every((knowledgeSource) => knowledgeSource.preparationIds !== undefined)) {
|
|
2130
|
+
// TODO: !!! Comment this out
|
|
2131
|
+
console.log('Pipeline is not prepared because knowledge sources are not prepared', pipeline.knowledgeSources);
|
|
2107
2132
|
return false;
|
|
2108
2133
|
}
|
|
2109
2134
|
/*
|
|
@@ -2124,36 +2149,6 @@ function isPipelinePrepared(pipeline) {
|
|
|
2124
2149
|
* - [♨] Are tasks prepared
|
|
2125
2150
|
*/
|
|
2126
2151
|
|
|
2127
|
-
/**
|
|
2128
|
-
* Recursively converts JSON strings to JSON objects
|
|
2129
|
-
|
|
2130
|
-
* @public exported from `@promptbook/utils`
|
|
2131
|
-
*/
|
|
2132
|
-
function jsonStringsToJsons(object) {
|
|
2133
|
-
if (object === null) {
|
|
2134
|
-
return object;
|
|
2135
|
-
}
|
|
2136
|
-
if (Array.isArray(object)) {
|
|
2137
|
-
return object.map(jsonStringsToJsons);
|
|
2138
|
-
}
|
|
2139
|
-
if (typeof object !== 'object') {
|
|
2140
|
-
return object;
|
|
2141
|
-
}
|
|
2142
|
-
const newObject = { ...object };
|
|
2143
|
-
for (const [key, value] of Object.entries(object)) {
|
|
2144
|
-
if (typeof value === 'string' && isValidJsonString(value)) {
|
|
2145
|
-
newObject[key] = JSON.parse(value);
|
|
2146
|
-
}
|
|
2147
|
-
else {
|
|
2148
|
-
newObject[key] = jsonStringsToJsons(value);
|
|
2149
|
-
}
|
|
2150
|
-
}
|
|
2151
|
-
return newObject;
|
|
2152
|
-
}
|
|
2153
|
-
/**
|
|
2154
|
-
* TODO: Type the return type correctly
|
|
2155
|
-
*/
|
|
2156
|
-
|
|
2157
2152
|
/**
|
|
2158
2153
|
* This error indicates problems parsing the format value
|
|
2159
2154
|
*
|
|
@@ -2337,6 +2332,101 @@ const ALL_ERRORS = {
|
|
|
2337
2332
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
2338
2333
|
*/
|
|
2339
2334
|
|
|
2335
|
+
/**
|
|
2336
|
+
* Serializes an error into a [🚉] JSON-serializable object
|
|
2337
|
+
*
|
|
2338
|
+
* @public exported from `@promptbook/utils`
|
|
2339
|
+
*/
|
|
2340
|
+
function serializeError(error) {
|
|
2341
|
+
const { name, message, stack } = error;
|
|
2342
|
+
const { id } = error;
|
|
2343
|
+
if (!Object.keys(ALL_ERRORS).includes(name)) {
|
|
2344
|
+
console.error(spaceTrim((block) => `
|
|
2345
|
+
|
|
2346
|
+
Cannot serialize error with name "${name}"
|
|
2347
|
+
|
|
2348
|
+
Authors of Promptbook probably forgot to add this error into the list of errors:
|
|
2349
|
+
https://github.com/webgptorg/promptbook/blob/main/src/errors/0-index.ts
|
|
2350
|
+
|
|
2351
|
+
|
|
2352
|
+
${block(stack || message)}
|
|
2353
|
+
|
|
2354
|
+
`));
|
|
2355
|
+
}
|
|
2356
|
+
return {
|
|
2357
|
+
name: name,
|
|
2358
|
+
message,
|
|
2359
|
+
stack,
|
|
2360
|
+
id, // Include id in the serialized object
|
|
2361
|
+
};
|
|
2362
|
+
}
|
|
2363
|
+
|
|
2364
|
+
/**
|
|
2365
|
+
* Converts a JavaScript Object Notation (JSON) string into an object.
|
|
2366
|
+
*
|
|
2367
|
+
* Note: This is wrapper around `JSON.parse()` with better error and type handling
|
|
2368
|
+
*
|
|
2369
|
+
* @public exported from `@promptbook/utils`
|
|
2370
|
+
*/
|
|
2371
|
+
function jsonParse(value) {
|
|
2372
|
+
if (value === undefined) {
|
|
2373
|
+
throw new Error(`Can not parse JSON from undefined value.`);
|
|
2374
|
+
}
|
|
2375
|
+
else if (typeof value !== 'string') {
|
|
2376
|
+
console.error('Can not parse JSON from non-string value.', { text: value });
|
|
2377
|
+
throw new Error(spaceTrim(`
|
|
2378
|
+
Can not parse JSON from non-string value.
|
|
2379
|
+
|
|
2380
|
+
The value type: ${typeof value}
|
|
2381
|
+
See more in console.
|
|
2382
|
+
`));
|
|
2383
|
+
}
|
|
2384
|
+
try {
|
|
2385
|
+
return JSON.parse(value);
|
|
2386
|
+
}
|
|
2387
|
+
catch (error) {
|
|
2388
|
+
if (!(error instanceof Error)) {
|
|
2389
|
+
throw error;
|
|
2390
|
+
}
|
|
2391
|
+
throw new Error(spaceTrim((block) => `
|
|
2392
|
+
${block(error.message)}
|
|
2393
|
+
|
|
2394
|
+
The JSON text:
|
|
2395
|
+
${block(value)}
|
|
2396
|
+
`));
|
|
2397
|
+
}
|
|
2398
|
+
}
|
|
2399
|
+
|
|
2400
|
+
/**
|
|
2401
|
+
* Recursively converts JSON strings to JSON objects
|
|
2402
|
+
|
|
2403
|
+
* @public exported from `@promptbook/utils`
|
|
2404
|
+
*/
|
|
2405
|
+
function jsonStringsToJsons(object) {
|
|
2406
|
+
if (object === null) {
|
|
2407
|
+
return object;
|
|
2408
|
+
}
|
|
2409
|
+
if (Array.isArray(object)) {
|
|
2410
|
+
return object.map(jsonStringsToJsons);
|
|
2411
|
+
}
|
|
2412
|
+
if (typeof object !== 'object') {
|
|
2413
|
+
return object;
|
|
2414
|
+
}
|
|
2415
|
+
const newObject = { ...object };
|
|
2416
|
+
for (const [key, value] of Object.entries(object)) {
|
|
2417
|
+
if (typeof value === 'string' && isValidJsonString(value)) {
|
|
2418
|
+
newObject[key] = jsonParse(value);
|
|
2419
|
+
}
|
|
2420
|
+
else {
|
|
2421
|
+
newObject[key] = jsonStringsToJsons(value);
|
|
2422
|
+
}
|
|
2423
|
+
}
|
|
2424
|
+
return newObject;
|
|
2425
|
+
}
|
|
2426
|
+
/**
|
|
2427
|
+
* TODO: Type the return type correctly
|
|
2428
|
+
*/
|
|
2429
|
+
|
|
2340
2430
|
/**
|
|
2341
2431
|
* Deserializes the error object
|
|
2342
2432
|
*
|
|
@@ -2502,64 +2592,6 @@ function createTask(options) {
|
|
|
2502
2592
|
* TODO: [🐚] Split into more files and make `PrepareTask` & `RemoteTask` + split the function
|
|
2503
2593
|
*/
|
|
2504
2594
|
|
|
2505
|
-
/**
|
|
2506
|
-
* Serializes an error into a [🚉] JSON-serializable object
|
|
2507
|
-
*
|
|
2508
|
-
* @public exported from `@promptbook/utils`
|
|
2509
|
-
*/
|
|
2510
|
-
function serializeError(error) {
|
|
2511
|
-
const { name, message, stack } = error;
|
|
2512
|
-
const { id } = error;
|
|
2513
|
-
if (!Object.keys(ALL_ERRORS).includes(name)) {
|
|
2514
|
-
console.error(spaceTrim((block) => `
|
|
2515
|
-
|
|
2516
|
-
Cannot serialize error with name "${name}"
|
|
2517
|
-
|
|
2518
|
-
Authors of Promptbook probably forgot to add this error into the list of errors:
|
|
2519
|
-
https://github.com/webgptorg/promptbook/blob/main/src/errors/0-index.ts
|
|
2520
|
-
|
|
2521
|
-
|
|
2522
|
-
${block(stack || message)}
|
|
2523
|
-
|
|
2524
|
-
`));
|
|
2525
|
-
}
|
|
2526
|
-
return {
|
|
2527
|
-
name: name,
|
|
2528
|
-
message,
|
|
2529
|
-
stack,
|
|
2530
|
-
id, // Include id in the serialized object
|
|
2531
|
-
};
|
|
2532
|
-
}
|
|
2533
|
-
|
|
2534
|
-
/**
|
|
2535
|
-
* Async version of Array.forEach
|
|
2536
|
-
*
|
|
2537
|
-
* @param array - Array to iterate over
|
|
2538
|
-
* @param options - Options for the function
|
|
2539
|
-
* @param callbackfunction - Function to call for each item
|
|
2540
|
-
* @public exported from `@promptbook/utils`
|
|
2541
|
-
* @deprecated [🪂] Use queues instead
|
|
2542
|
-
*/
|
|
2543
|
-
async function forEachAsync(array, options, callbackfunction) {
|
|
2544
|
-
const { maxParallelCount = Infinity } = options;
|
|
2545
|
-
let index = 0;
|
|
2546
|
-
let runningTasks = [];
|
|
2547
|
-
const tasks = [];
|
|
2548
|
-
for (const item of array) {
|
|
2549
|
-
const currentIndex = index++;
|
|
2550
|
-
const task = callbackfunction(item, currentIndex, array);
|
|
2551
|
-
tasks.push(task);
|
|
2552
|
-
runningTasks.push(task);
|
|
2553
|
-
/* not await */ Promise.resolve(task).then(() => {
|
|
2554
|
-
runningTasks = runningTasks.filter((t) => t !== task);
|
|
2555
|
-
});
|
|
2556
|
-
if (maxParallelCount < runningTasks.length) {
|
|
2557
|
-
await Promise.race(runningTasks);
|
|
2558
|
-
}
|
|
2559
|
-
}
|
|
2560
|
-
await Promise.all(tasks);
|
|
2561
|
-
}
|
|
2562
|
-
|
|
2563
2595
|
/**
|
|
2564
2596
|
* Represents the uncertain value
|
|
2565
2597
|
*
|
|
@@ -2603,7 +2635,7 @@ const ZERO_USAGE = $deepFreeze({
|
|
|
2603
2635
|
*
|
|
2604
2636
|
* @public exported from `@promptbook/core`
|
|
2605
2637
|
*/
|
|
2606
|
-
$deepFreeze({
|
|
2638
|
+
const UNCERTAIN_USAGE = $deepFreeze({
|
|
2607
2639
|
price: UNCERTAIN_ZERO_VALUE,
|
|
2608
2640
|
input: {
|
|
2609
2641
|
tokensCount: UNCERTAIN_ZERO_VALUE,
|
|
@@ -2628,6 +2660,35 @@ $deepFreeze({
|
|
|
2628
2660
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
2629
2661
|
*/
|
|
2630
2662
|
|
|
2663
|
+
/**
|
|
2664
|
+
* Async version of Array.forEach
|
|
2665
|
+
*
|
|
2666
|
+
* @param array - Array to iterate over
|
|
2667
|
+
* @param options - Options for the function
|
|
2668
|
+
* @param callbackfunction - Function to call for each item
|
|
2669
|
+
* @public exported from `@promptbook/utils`
|
|
2670
|
+
* @deprecated [🪂] Use queues instead
|
|
2671
|
+
*/
|
|
2672
|
+
async function forEachAsync(array, options, callbackfunction) {
|
|
2673
|
+
const { maxParallelCount = Infinity } = options;
|
|
2674
|
+
let index = 0;
|
|
2675
|
+
let runningTasks = [];
|
|
2676
|
+
const tasks = [];
|
|
2677
|
+
for (const item of array) {
|
|
2678
|
+
const currentIndex = index++;
|
|
2679
|
+
const task = callbackfunction(item, currentIndex, array);
|
|
2680
|
+
tasks.push(task);
|
|
2681
|
+
runningTasks.push(task);
|
|
2682
|
+
/* not await */ Promise.resolve(task).then(() => {
|
|
2683
|
+
runningTasks = runningTasks.filter((t) => t !== task);
|
|
2684
|
+
});
|
|
2685
|
+
if (maxParallelCount < runningTasks.length) {
|
|
2686
|
+
await Promise.race(runningTasks);
|
|
2687
|
+
}
|
|
2688
|
+
}
|
|
2689
|
+
await Promise.all(tasks);
|
|
2690
|
+
}
|
|
2691
|
+
|
|
2631
2692
|
/**
|
|
2632
2693
|
* Function `addUsage` will add multiple usages into one
|
|
2633
2694
|
*
|
|
@@ -2974,27 +3035,48 @@ async function preparePersona(personaDescription, tools, options) {
|
|
|
2974
3035
|
pipeline: await collection.getPipelineByUrl('https://promptbook.studio/promptbook/prepare-persona.book'),
|
|
2975
3036
|
tools,
|
|
2976
3037
|
});
|
|
2977
|
-
// TODO: [🚐] Make arrayable LLMs -> single LLM DRY
|
|
2978
3038
|
const _llms = arrayableToArray(tools.llm);
|
|
2979
3039
|
const llmTools = _llms.length === 1 ? _llms[0] : joinLlmExecutionTools(..._llms);
|
|
2980
|
-
const availableModels = await llmTools.listModels()
|
|
2981
|
-
const availableModelNames = availableModels
|
|
3040
|
+
const availableModels = (await llmTools.listModels())
|
|
2982
3041
|
.filter(({ modelVariant }) => modelVariant === 'CHAT')
|
|
2983
|
-
.map(({ modelName }) =>
|
|
2984
|
-
|
|
2985
|
-
|
|
3042
|
+
.map(({ modelName, modelDescription }) => ({
|
|
3043
|
+
modelName,
|
|
3044
|
+
modelDescription,
|
|
3045
|
+
// <- Note: `modelTitle` and `modelVariant` is not relevant for this task
|
|
3046
|
+
}));
|
|
3047
|
+
const result = await preparePersonaExecutor({
|
|
3048
|
+
availableModels /* <- Note: Passing as JSON */,
|
|
3049
|
+
personaDescription,
|
|
3050
|
+
}).asPromise();
|
|
2986
3051
|
const { outputParameters } = result;
|
|
2987
|
-
const {
|
|
2988
|
-
|
|
3052
|
+
const { modelsRequirements: modelsRequirementsJson } = outputParameters;
|
|
3053
|
+
let modelsRequirementsUnchecked = jsonParse(modelsRequirementsJson);
|
|
2989
3054
|
if (isVerbose) {
|
|
2990
|
-
console.info(`PERSONA ${personaDescription}`,
|
|
3055
|
+
console.info(`PERSONA ${personaDescription}`, modelsRequirementsUnchecked);
|
|
2991
3056
|
}
|
|
2992
|
-
|
|
2993
|
-
|
|
3057
|
+
if (!Array.isArray(modelsRequirementsUnchecked)) {
|
|
3058
|
+
// <- TODO: Book should have syntax and system to enforce shape of JSON
|
|
3059
|
+
modelsRequirementsUnchecked = [modelsRequirementsUnchecked];
|
|
3060
|
+
/*
|
|
3061
|
+
throw new UnexpectedError(
|
|
3062
|
+
spaceTrim(
|
|
3063
|
+
(block) => `
|
|
3064
|
+
Invalid \`modelsRequirements\`:
|
|
3065
|
+
|
|
3066
|
+
\`\`\`json
|
|
3067
|
+
${block(JSON.stringify(modelsRequirementsUnchecked, null, 4))}
|
|
3068
|
+
\`\`\`
|
|
3069
|
+
`,
|
|
3070
|
+
),
|
|
3071
|
+
);
|
|
3072
|
+
*/
|
|
3073
|
+
}
|
|
3074
|
+
const modelsRequirements = modelsRequirementsUnchecked.map((modelRequirements) => ({
|
|
2994
3075
|
modelVariant: 'CHAT',
|
|
2995
|
-
|
|
2996
|
-
|
|
2997
|
-
|
|
3076
|
+
...modelRequirements,
|
|
3077
|
+
}));
|
|
3078
|
+
return {
|
|
3079
|
+
modelsRequirements,
|
|
2998
3080
|
};
|
|
2999
3081
|
}
|
|
3000
3082
|
/**
|
|
@@ -3005,7 +3087,8 @@ async function preparePersona(personaDescription, tools, options) {
|
|
|
3005
3087
|
*/
|
|
3006
3088
|
|
|
3007
3089
|
/**
|
|
3008
|
-
*
|
|
3090
|
+
* Safely retrieves the global scope object (window in browser, global in Node.js)
|
|
3091
|
+
* regardless of the JavaScript environment in which the code is running
|
|
3009
3092
|
*
|
|
3010
3093
|
* Note: `$` is used to indicate that this function is not a pure function - it access global scope
|
|
3011
3094
|
*
|
|
@@ -3016,10 +3099,10 @@ function $getGlobalScope() {
|
|
|
3016
3099
|
}
|
|
3017
3100
|
|
|
3018
3101
|
/**
|
|
3019
|
-
*
|
|
3102
|
+
* Normalizes a text string to SCREAMING_CASE (all uppercase with underscores).
|
|
3020
3103
|
*
|
|
3021
|
-
* @param text
|
|
3022
|
-
* @returns
|
|
3104
|
+
* @param text The text string to be converted to SCREAMING_CASE format.
|
|
3105
|
+
* @returns The normalized text in SCREAMING_CASE format.
|
|
3023
3106
|
* @example 'HELLO_WORLD'
|
|
3024
3107
|
* @example 'I_LOVE_PROMPTBOOK'
|
|
3025
3108
|
* @public exported from `@promptbook/utils`
|
|
@@ -3071,10 +3154,10 @@ function normalizeTo_SCREAMING_CASE(text) {
|
|
|
3071
3154
|
*/
|
|
3072
3155
|
|
|
3073
3156
|
/**
|
|
3074
|
-
*
|
|
3157
|
+
* Normalizes a text string to snake_case format.
|
|
3075
3158
|
*
|
|
3076
|
-
* @param text
|
|
3077
|
-
* @returns
|
|
3159
|
+
* @param text The text string to be converted to snake_case format.
|
|
3160
|
+
* @returns The normalized text in snake_case format.
|
|
3078
3161
|
* @example 'hello_world'
|
|
3079
3162
|
* @example 'i_love_promptbook'
|
|
3080
3163
|
* @public exported from `@promptbook/utils`
|
|
@@ -3084,11 +3167,11 @@ function normalizeTo_snake_case(text) {
|
|
|
3084
3167
|
}
|
|
3085
3168
|
|
|
3086
3169
|
/**
|
|
3087
|
-
*
|
|
3170
|
+
* Global registry for storing and managing registered entities of a given type.
|
|
3088
3171
|
*
|
|
3089
3172
|
* Note: `$` is used to indicate that this function is not a pure function - it accesses and adds variables in global scope.
|
|
3090
3173
|
*
|
|
3091
|
-
* @private internal utility, exported are only
|
|
3174
|
+
* @private internal utility, exported are only singleton instances of this class
|
|
3092
3175
|
*/
|
|
3093
3176
|
class $Register {
|
|
3094
3177
|
constructor(registerName) {
|
|
@@ -3132,10 +3215,10 @@ class $Register {
|
|
|
3132
3215
|
}
|
|
3133
3216
|
|
|
3134
3217
|
/**
|
|
3135
|
-
*
|
|
3218
|
+
* Global registry for storing metadata about all available scrapers and converters.
|
|
3136
3219
|
*
|
|
3137
|
-
* Note: `$` is used to indicate that this interacts with the global scope
|
|
3138
|
-
* @singleton Only one instance of each register is created per build, but
|
|
3220
|
+
* Note: `$` is used to indicate that this interacts with the global scope.
|
|
3221
|
+
* @singleton Only one instance of each register is created per build, but there can be more in different contexts (e.g., tests).
|
|
3139
3222
|
* @public exported from `@promptbook/core`
|
|
3140
3223
|
*/
|
|
3141
3224
|
const $scrapersMetadataRegister = new $Register('scrapers_metadata');
|
|
@@ -3144,10 +3227,11 @@ const $scrapersMetadataRegister = new $Register('scrapers_metadata');
|
|
|
3144
3227
|
*/
|
|
3145
3228
|
|
|
3146
3229
|
/**
|
|
3147
|
-
*
|
|
3230
|
+
* Registry for all available scrapers in the system.
|
|
3231
|
+
* Central point for registering and accessing different types of content scrapers.
|
|
3148
3232
|
*
|
|
3149
3233
|
* Note: `$` is used to indicate that this interacts with the global scope
|
|
3150
|
-
* @singleton Only one instance of each register is created per build, but
|
|
3234
|
+
* @singleton Only one instance of each register is created per build, but there can be more than one in different build modules
|
|
3151
3235
|
* @public exported from `@promptbook/core`
|
|
3152
3236
|
*/
|
|
3153
3237
|
const $scrapersRegister = new $Register('scraper_constructors');
|
|
@@ -3325,7 +3409,9 @@ const promptbookFetch = async (urlOrRequest, init) => {
|
|
|
3325
3409
|
*/
|
|
3326
3410
|
|
|
3327
3411
|
/**
|
|
3328
|
-
*
|
|
3412
|
+
* Factory function that creates a handler for processing knowledge sources.
|
|
3413
|
+
* Provides standardized processing of different types of knowledge sources
|
|
3414
|
+
* across various scraper implementations.
|
|
3329
3415
|
*
|
|
3330
3416
|
* @public exported from `@promptbook/core`
|
|
3331
3417
|
*/
|
|
@@ -3432,7 +3518,7 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
|
3432
3518
|
> },
|
|
3433
3519
|
*/
|
|
3434
3520
|
async asJson() {
|
|
3435
|
-
return
|
|
3521
|
+
return jsonParse(await tools.fs.readFile(filename, 'utf-8'));
|
|
3436
3522
|
},
|
|
3437
3523
|
async asText() {
|
|
3438
3524
|
return await tools.fs.readFile(filename, 'utf-8');
|
|
@@ -3566,9 +3652,12 @@ TODO: [🧊] This is how it can look in future
|
|
|
3566
3652
|
*/
|
|
3567
3653
|
|
|
3568
3654
|
/**
|
|
3569
|
-
*
|
|
3655
|
+
* Prepares tasks by adding knowledge to the prompt and ensuring all necessary parameters are included.
|
|
3570
3656
|
*
|
|
3571
|
-
* @
|
|
3657
|
+
* @param tasks Sequence of tasks that are chained together to form a pipeline
|
|
3658
|
+
* @returns A promise that resolves to the prepared tasks.
|
|
3659
|
+
*
|
|
3660
|
+
* @private internal utility of `preparePipeline`
|
|
3572
3661
|
*/
|
|
3573
3662
|
async function prepareTasks(pipeline, tools, options) {
|
|
3574
3663
|
const { maxParallelCount = DEFAULT_MAX_PARALLEL_COUNT } = options;
|
|
@@ -3690,14 +3779,14 @@ async function preparePipeline(pipeline, tools, options) {
|
|
|
3690
3779
|
// TODO: [🖌][🧠] Implement some `mapAsync` function
|
|
3691
3780
|
const preparedPersonas = new Array(personas.length);
|
|
3692
3781
|
await forEachAsync(personas, { maxParallelCount /* <- TODO: [🪂] When there are subtasks, this maximul limit can be broken */ }, async (persona, index) => {
|
|
3693
|
-
const
|
|
3782
|
+
const { modelsRequirements } = await preparePersona(persona.description, { ...tools, llm: llmToolsWithUsage }, {
|
|
3694
3783
|
rootDirname,
|
|
3695
3784
|
maxParallelCount /* <- TODO: [🪂] */,
|
|
3696
3785
|
isVerbose,
|
|
3697
3786
|
});
|
|
3698
3787
|
const preparedPersona = {
|
|
3699
3788
|
...persona,
|
|
3700
|
-
|
|
3789
|
+
modelsRequirements,
|
|
3701
3790
|
preparationIds: [/* TODO: [🧊] -> */ currentPreparation.id],
|
|
3702
3791
|
// <- TODO: [🍙] Make some standard order of json properties
|
|
3703
3792
|
};
|
|
@@ -4005,7 +4094,7 @@ function union(...sets) {
|
|
|
4005
4094
|
}
|
|
4006
4095
|
|
|
4007
4096
|
/**
|
|
4008
|
-
*
|
|
4097
|
+
* Contains configuration options for parsing and generating CSV files, such as delimiters and quoting rules.
|
|
4009
4098
|
*
|
|
4010
4099
|
* @public exported from `@promptbook/core`
|
|
4011
4100
|
*/
|
|
@@ -4014,11 +4103,29 @@ const MANDATORY_CSV_SETTINGS = Object.freeze({
|
|
|
4014
4103
|
// encoding: 'utf-8',
|
|
4015
4104
|
});
|
|
4016
4105
|
|
|
4106
|
+
/**
|
|
4107
|
+
* Converts a CSV string into an object
|
|
4108
|
+
*
|
|
4109
|
+
* Note: This is wrapper around `papaparse.parse()` with better autohealing
|
|
4110
|
+
*
|
|
4111
|
+
* @private - for now until `@promptbook/csv` is released
|
|
4112
|
+
*/
|
|
4113
|
+
function csvParse(value /* <- TODO: string_csv */, settings, schema /* <- TODO: Make CSV Schemas */) {
|
|
4114
|
+
settings = { ...settings, ...MANDATORY_CSV_SETTINGS };
|
|
4115
|
+
// Note: Autoheal invalid '\n' characters
|
|
4116
|
+
if (settings.newline && !settings.newline.includes('\r') && value.includes('\r')) {
|
|
4117
|
+
console.warn('CSV string contains carriage return characters, but in the CSV settings the `newline` setting does not include them. Autohealing the CSV string.');
|
|
4118
|
+
value = value.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
|
|
4119
|
+
}
|
|
4120
|
+
const csv = parse(value, settings);
|
|
4121
|
+
return csv;
|
|
4122
|
+
}
|
|
4123
|
+
|
|
4017
4124
|
/**
|
|
4018
4125
|
* Function to check if a string is valid CSV
|
|
4019
4126
|
*
|
|
4020
4127
|
* @param value The string to check
|
|
4021
|
-
* @returns
|
|
4128
|
+
* @returns `true` if the string is a valid CSV string, false otherwise
|
|
4022
4129
|
*
|
|
4023
4130
|
* @public exported from `@promptbook/utils`
|
|
4024
4131
|
*/
|
|
@@ -4042,7 +4149,7 @@ function isValidCsvString(value) {
|
|
|
4042
4149
|
* @public exported from `@promptbook/core`
|
|
4043
4150
|
* <- TODO: [🏢] Export from package `@promptbook/csv`
|
|
4044
4151
|
*/
|
|
4045
|
-
const
|
|
4152
|
+
const CsvFormatParser = {
|
|
4046
4153
|
formatName: 'CSV',
|
|
4047
4154
|
aliases: ['SPREADSHEET', 'TABLE'],
|
|
4048
4155
|
isValid(value, settings, schema) {
|
|
@@ -4054,12 +4161,12 @@ const CsvFormatDefinition = {
|
|
|
4054
4161
|
heal(value, settings, schema) {
|
|
4055
4162
|
throw new Error('Not implemented');
|
|
4056
4163
|
},
|
|
4057
|
-
|
|
4164
|
+
subvalueParsers: [
|
|
4058
4165
|
{
|
|
4059
4166
|
subvalueName: 'ROW',
|
|
4060
|
-
async mapValues(
|
|
4061
|
-
|
|
4062
|
-
const csv =
|
|
4167
|
+
async mapValues(options) {
|
|
4168
|
+
const { value, outputParameterName, settings, mapCallback, onProgress } = options;
|
|
4169
|
+
const csv = csvParse(value, settings);
|
|
4063
4170
|
if (csv.errors.length !== 0) {
|
|
4064
4171
|
throw new CsvFormatError(spaceTrim((block) => `
|
|
4065
4172
|
CSV parsing error
|
|
@@ -4074,23 +4181,37 @@ const CsvFormatDefinition = {
|
|
|
4074
4181
|
${block(value)}
|
|
4075
4182
|
`));
|
|
4076
4183
|
}
|
|
4077
|
-
const mappedData =
|
|
4184
|
+
const mappedData = [];
|
|
4185
|
+
const length = csv.data.length;
|
|
4186
|
+
for (let index = 0; index < length; index++) {
|
|
4187
|
+
const row = csv.data[index];
|
|
4078
4188
|
if (row[outputParameterName]) {
|
|
4079
4189
|
throw new CsvFormatError(`Can not overwrite existing column "${outputParameterName}" in CSV row`);
|
|
4080
4190
|
}
|
|
4081
|
-
|
|
4191
|
+
const mappedRow = {
|
|
4082
4192
|
...row,
|
|
4083
|
-
[outputParameterName]: await mapCallback(row, index),
|
|
4193
|
+
[outputParameterName]: await mapCallback(row, index, length),
|
|
4084
4194
|
};
|
|
4085
|
-
|
|
4195
|
+
mappedData.push(mappedRow);
|
|
4196
|
+
if (onProgress) {
|
|
4197
|
+
// Note: Report the CSV with all rows mapped so far
|
|
4198
|
+
/*
|
|
4199
|
+
// TODO: [🛕] Report progress with all the rows including the pending ones
|
|
4200
|
+
const progressData = mappedData.map((row, i) =>
|
|
4201
|
+
i > index ? { ...row, [outputParameterName]: PENDING_VALUE_PLACEHOLDER } : row,
|
|
4202
|
+
);
|
|
4203
|
+
*/
|
|
4204
|
+
await onProgress(unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS }));
|
|
4205
|
+
}
|
|
4206
|
+
}
|
|
4086
4207
|
return unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS });
|
|
4087
4208
|
},
|
|
4088
4209
|
},
|
|
4089
4210
|
{
|
|
4090
4211
|
subvalueName: 'CELL',
|
|
4091
|
-
async mapValues(
|
|
4092
|
-
|
|
4093
|
-
const csv =
|
|
4212
|
+
async mapValues(options) {
|
|
4213
|
+
const { value, settings, mapCallback, onProgress } = options;
|
|
4214
|
+
const csv = csvParse(value, settings);
|
|
4094
4215
|
if (csv.errors.length !== 0) {
|
|
4095
4216
|
throw new CsvFormatError(spaceTrim((block) => `
|
|
4096
4217
|
CSV parsing error
|
|
@@ -4106,9 +4227,9 @@ const CsvFormatDefinition = {
|
|
|
4106
4227
|
`));
|
|
4107
4228
|
}
|
|
4108
4229
|
const mappedData = await Promise.all(csv.data.map(async (row, rowIndex) => {
|
|
4109
|
-
return /* not await */ Promise.all(Object.entries(row).map(async ([key, value], columnIndex) => {
|
|
4230
|
+
return /* not await */ Promise.all(Object.entries(row).map(async ([key, value], columnIndex, array) => {
|
|
4110
4231
|
const index = rowIndex * Object.keys(row).length + columnIndex;
|
|
4111
|
-
return /* not await */ mapCallback({ [key]: value }, index);
|
|
4232
|
+
return /* not await */ mapCallback({ [key]: value }, index, array.length);
|
|
4112
4233
|
}));
|
|
4113
4234
|
}));
|
|
4114
4235
|
return unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS });
|
|
@@ -4117,10 +4238,10 @@ const CsvFormatDefinition = {
|
|
|
4117
4238
|
],
|
|
4118
4239
|
};
|
|
4119
4240
|
/**
|
|
4120
|
-
* TODO: [🍓] In `
|
|
4121
|
-
* TODO: [🍓] In `
|
|
4122
|
-
* TODO: [🍓] In `
|
|
4123
|
-
* TODO: [🍓] In `
|
|
4241
|
+
* TODO: [🍓] In `CsvFormatParser` implement simple `isValid`
|
|
4242
|
+
* TODO: [🍓] In `CsvFormatParser` implement partial `canBeValid`
|
|
4243
|
+
* TODO: [🍓] In `CsvFormatParser` implement `heal
|
|
4244
|
+
* TODO: [🍓] In `CsvFormatParser` implement `subvalueParsers`
|
|
4124
4245
|
* TODO: [🏢] Allow to expect something inside CSV objects and other formats
|
|
4125
4246
|
*/
|
|
4126
4247
|
|
|
@@ -4129,7 +4250,7 @@ const CsvFormatDefinition = {
|
|
|
4129
4250
|
*
|
|
4130
4251
|
* @private still in development [🏢]
|
|
4131
4252
|
*/
|
|
4132
|
-
const
|
|
4253
|
+
const JsonFormatParser = {
|
|
4133
4254
|
formatName: 'JSON',
|
|
4134
4255
|
mimeType: 'application/json',
|
|
4135
4256
|
isValid(value, settings, schema) {
|
|
@@ -4141,28 +4262,28 @@ const JsonFormatDefinition = {
|
|
|
4141
4262
|
heal(value, settings, schema) {
|
|
4142
4263
|
throw new Error('Not implemented');
|
|
4143
4264
|
},
|
|
4144
|
-
|
|
4265
|
+
subvalueParsers: [],
|
|
4145
4266
|
};
|
|
4146
4267
|
/**
|
|
4147
4268
|
* TODO: [🧠] Maybe propper instance of object
|
|
4148
4269
|
* TODO: [0] Make string_serialized_json
|
|
4149
4270
|
* TODO: [1] Make type for JSON Settings and Schema
|
|
4150
4271
|
* TODO: [🧠] What to use for validating JSONs - JSON Schema, ZoD, typescript types/interfaces,...?
|
|
4151
|
-
* TODO: [🍓] In `
|
|
4152
|
-
* TODO: [🍓] In `
|
|
4153
|
-
* TODO: [🍓] In `
|
|
4154
|
-
* TODO: [🍓] In `
|
|
4272
|
+
* TODO: [🍓] In `JsonFormatParser` implement simple `isValid`
|
|
4273
|
+
* TODO: [🍓] In `JsonFormatParser` implement partial `canBeValid`
|
|
4274
|
+
* TODO: [🍓] In `JsonFormatParser` implement `heal
|
|
4275
|
+
* TODO: [🍓] In `JsonFormatParser` implement `subvalueParsers`
|
|
4155
4276
|
* TODO: [🏢] Allow to expect something inside JSON objects and other formats
|
|
4156
4277
|
*/
|
|
4157
4278
|
|
|
4158
4279
|
/**
|
|
4159
4280
|
* Definition for any text - this will be always valid
|
|
4160
4281
|
*
|
|
4161
|
-
* Note: This is not useful for validation, but for splitting and mapping with `
|
|
4282
|
+
* Note: This is not useful for validation, but for splitting and mapping with `subvalueParsers`
|
|
4162
4283
|
*
|
|
4163
4284
|
* @public exported from `@promptbook/core`
|
|
4164
4285
|
*/
|
|
4165
|
-
const
|
|
4286
|
+
const TextFormatParser = {
|
|
4166
4287
|
formatName: 'TEXT',
|
|
4167
4288
|
isValid(value) {
|
|
4168
4289
|
return typeof value === 'string';
|
|
@@ -4171,19 +4292,20 @@ const TextFormatDefinition = {
|
|
|
4171
4292
|
return typeof partialValue === 'string';
|
|
4172
4293
|
},
|
|
4173
4294
|
heal() {
|
|
4174
|
-
throw new UnexpectedError('It does not make sense to call `
|
|
4295
|
+
throw new UnexpectedError('It does not make sense to call `TextFormatParser.heal`');
|
|
4175
4296
|
},
|
|
4176
|
-
|
|
4297
|
+
subvalueParsers: [
|
|
4177
4298
|
{
|
|
4178
4299
|
subvalueName: 'LINE',
|
|
4179
|
-
async mapValues(
|
|
4300
|
+
async mapValues(options) {
|
|
4301
|
+
const { value, mapCallback, onProgress } = options;
|
|
4180
4302
|
const lines = value.split('\n');
|
|
4181
|
-
const mappedLines = await Promise.all(lines.map((lineContent, lineNumber) =>
|
|
4303
|
+
const mappedLines = await Promise.all(lines.map((lineContent, lineNumber, array) =>
|
|
4182
4304
|
// TODO: [🧠] Maybe option to skip empty line
|
|
4183
4305
|
/* not await */ mapCallback({
|
|
4184
4306
|
lineContent,
|
|
4185
4307
|
// TODO: [🧠] Maybe also put here `lineNumber`
|
|
4186
|
-
}, lineNumber)));
|
|
4308
|
+
}, lineNumber, array.length)));
|
|
4187
4309
|
return mappedLines.join('\n');
|
|
4188
4310
|
},
|
|
4189
4311
|
},
|
|
@@ -4193,10 +4315,10 @@ const TextFormatDefinition = {
|
|
|
4193
4315
|
/**
|
|
4194
4316
|
* TODO: [1] Make type for XML Text and Schema
|
|
4195
4317
|
* TODO: [🧠][🤠] Here should be all words, characters, lines, paragraphs, pages available as subvalues
|
|
4196
|
-
* TODO: [🍓] In `
|
|
4197
|
-
* TODO: [🍓] In `
|
|
4198
|
-
* TODO: [🍓] In `
|
|
4199
|
-
* TODO: [🍓] In `
|
|
4318
|
+
* TODO: [🍓] In `TextFormatParser` implement simple `isValid`
|
|
4319
|
+
* TODO: [🍓] In `TextFormatParser` implement partial `canBeValid`
|
|
4320
|
+
* TODO: [🍓] In `TextFormatParser` implement `heal
|
|
4321
|
+
* TODO: [🍓] In `TextFormatParser` implement `subvalueParsers`
|
|
4200
4322
|
* TODO: [🏢] Allow to expect something inside each item of list and other formats
|
|
4201
4323
|
*/
|
|
4202
4324
|
|
|
@@ -4204,7 +4326,7 @@ const TextFormatDefinition = {
|
|
|
4204
4326
|
* Function to check if a string is valid XML
|
|
4205
4327
|
*
|
|
4206
4328
|
* @param value
|
|
4207
|
-
* @returns
|
|
4329
|
+
* @returns `true` if the string is a valid XML string, false otherwise
|
|
4208
4330
|
*
|
|
4209
4331
|
* @public exported from `@promptbook/utils`
|
|
4210
4332
|
*/
|
|
@@ -4229,7 +4351,7 @@ function isValidXmlString(value) {
|
|
|
4229
4351
|
*
|
|
4230
4352
|
* @private still in development [🏢]
|
|
4231
4353
|
*/
|
|
4232
|
-
const
|
|
4354
|
+
const XmlFormatParser = {
|
|
4233
4355
|
formatName: 'XML',
|
|
4234
4356
|
mimeType: 'application/xml',
|
|
4235
4357
|
isValid(value, settings, schema) {
|
|
@@ -4241,17 +4363,17 @@ const XmlFormatDefinition = {
|
|
|
4241
4363
|
heal(value, settings, schema) {
|
|
4242
4364
|
throw new Error('Not implemented');
|
|
4243
4365
|
},
|
|
4244
|
-
|
|
4366
|
+
subvalueParsers: [],
|
|
4245
4367
|
};
|
|
4246
4368
|
/**
|
|
4247
4369
|
* TODO: [🧠] Maybe propper instance of object
|
|
4248
4370
|
* TODO: [0] Make string_serialized_xml
|
|
4249
4371
|
* TODO: [1] Make type for XML Settings and Schema
|
|
4250
4372
|
* TODO: [🧠] What to use for validating XMLs - XSD,...
|
|
4251
|
-
* TODO: [🍓] In `
|
|
4252
|
-
* TODO: [🍓] In `
|
|
4253
|
-
* TODO: [🍓] In `
|
|
4254
|
-
* TODO: [🍓] In `
|
|
4373
|
+
* TODO: [🍓] In `XmlFormatParser` implement simple `isValid`
|
|
4374
|
+
* TODO: [🍓] In `XmlFormatParser` implement partial `canBeValid`
|
|
4375
|
+
* TODO: [🍓] In `XmlFormatParser` implement `heal
|
|
4376
|
+
* TODO: [🍓] In `XmlFormatParser` implement `subvalueParsers`
|
|
4255
4377
|
* TODO: [🏢] Allow to expect something inside XML and other formats
|
|
4256
4378
|
*/
|
|
4257
4379
|
|
|
@@ -4260,24 +4382,19 @@ const XmlFormatDefinition = {
|
|
|
4260
4382
|
*
|
|
4261
4383
|
* @private internal index of `...` <- TODO [🏢]
|
|
4262
4384
|
*/
|
|
4263
|
-
const FORMAT_DEFINITIONS = [
|
|
4264
|
-
JsonFormatDefinition,
|
|
4265
|
-
XmlFormatDefinition,
|
|
4266
|
-
TextFormatDefinition,
|
|
4267
|
-
CsvFormatDefinition,
|
|
4268
|
-
];
|
|
4385
|
+
const FORMAT_DEFINITIONS = [JsonFormatParser, XmlFormatParser, TextFormatParser, CsvFormatParser];
|
|
4269
4386
|
/**
|
|
4270
4387
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
4271
4388
|
*/
|
|
4272
4389
|
|
|
4273
4390
|
/**
|
|
4274
|
-
* Maps available parameters to expected parameters
|
|
4391
|
+
* Maps available parameters to expected parameters for a pipeline task.
|
|
4275
4392
|
*
|
|
4276
4393
|
* The strategy is:
|
|
4277
|
-
* 1)
|
|
4278
|
-
* 2)
|
|
4394
|
+
* 1) First, match parameters by name where both available and expected.
|
|
4395
|
+
* 2) Then, if there are unmatched expected and available parameters, map them by order.
|
|
4279
4396
|
*
|
|
4280
|
-
* @throws {PipelineExecutionError}
|
|
4397
|
+
* @throws {PipelineExecutionError} If the number of unmatched expected and available parameters does not match, or mapping is ambiguous.
|
|
4281
4398
|
* @private within the repository used in `createPipelineExecutor`
|
|
4282
4399
|
*/
|
|
4283
4400
|
function mapAvailableToExpectedParameters(options) {
|
|
@@ -4300,7 +4417,7 @@ function mapAvailableToExpectedParameters(options) {
|
|
|
4300
4417
|
else if (!availableParametersNames.has(parameterName) && expectedParameterNames.has(parameterName)) ;
|
|
4301
4418
|
}
|
|
4302
4419
|
if (expectedParameterNames.size === 0) {
|
|
4303
|
-
// Note: [👨👨👧] Now we can freeze `mappedParameters` to prevent
|
|
4420
|
+
// Note: [👨👨👧] Now we can freeze `mappedParameters` to prevent accidental modifications after mapping
|
|
4304
4421
|
Object.freeze(mappedParameters);
|
|
4305
4422
|
return mappedParameters;
|
|
4306
4423
|
}
|
|
@@ -4331,7 +4448,7 @@ function mapAvailableToExpectedParameters(options) {
|
|
|
4331
4448
|
for (let i = 0; i < expectedParameterNames.size; i++) {
|
|
4332
4449
|
mappedParameters[expectedParameterNamesArray[i]] = availableParameters[availableParametersNamesArray[i]];
|
|
4333
4450
|
}
|
|
4334
|
-
// Note: [👨👨👧] Now we can freeze `mappedParameters` to prevent
|
|
4451
|
+
// Note: [👨👨👧] Now we can freeze `mappedParameters` to prevent accidental modifications after mapping
|
|
4335
4452
|
Object.freeze(mappedParameters);
|
|
4336
4453
|
return mappedParameters;
|
|
4337
4454
|
}
|
|
@@ -4435,7 +4552,7 @@ function extractJsonBlock(markdown) {
|
|
|
4435
4552
|
}
|
|
4436
4553
|
/**
|
|
4437
4554
|
* TODO: Add some auto-healing logic + extract YAML, JSON5, TOML, etc.
|
|
4438
|
-
* TODO: [🏢] Make this logic part of `
|
|
4555
|
+
* TODO: [🏢] Make this logic part of `JsonFormatParser` or `isValidJsonString`
|
|
4439
4556
|
*/
|
|
4440
4557
|
|
|
4441
4558
|
/**
|
|
@@ -4478,10 +4595,12 @@ function templateParameters(template, parameters) {
|
|
|
4478
4595
|
throw new PipelineExecutionError('Parameter is already opened or not closed');
|
|
4479
4596
|
}
|
|
4480
4597
|
if (parameters[parameterName] === undefined) {
|
|
4598
|
+
console.log('!!! templateParameters 1', { parameterName, template, parameters });
|
|
4481
4599
|
throw new PipelineExecutionError(`Parameter \`{${parameterName}}\` is not defined`);
|
|
4482
4600
|
}
|
|
4483
4601
|
let parameterValue = parameters[parameterName];
|
|
4484
4602
|
if (parameterValue === undefined) {
|
|
4603
|
+
console.log('!!! templateParameters 2', { parameterName, template, parameters });
|
|
4485
4604
|
throw new PipelineExecutionError(`Parameter \`{${parameterName}}\` is not defined`);
|
|
4486
4605
|
}
|
|
4487
4606
|
parameterValue = valueToString(parameterValue);
|
|
@@ -4637,7 +4756,7 @@ const CountUtils = {
|
|
|
4637
4756
|
PAGES: countPages,
|
|
4638
4757
|
};
|
|
4639
4758
|
/**
|
|
4640
|
-
* TODO: [🧠][🤠] This should be probbably as part of `
|
|
4759
|
+
* TODO: [🧠][🤠] This should be probbably as part of `TextFormatParser`
|
|
4641
4760
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
4642
4761
|
*/
|
|
4643
4762
|
|
|
@@ -4665,13 +4784,17 @@ function checkExpectations(expectations, value) {
|
|
|
4665
4784
|
}
|
|
4666
4785
|
/**
|
|
4667
4786
|
* TODO: [💝] Unite object for expecting amount and format
|
|
4668
|
-
* TODO: [🧠][🤠] This should be part of `
|
|
4787
|
+
* TODO: [🧠][🤠] This should be part of `TextFormatParser`
|
|
4669
4788
|
* Note: [💝] and [🤠] are interconnected together
|
|
4670
4789
|
*/
|
|
4671
4790
|
|
|
4672
4791
|
/**
|
|
4673
|
-
*
|
|
4792
|
+
* Executes a pipeline task with multiple attempts, including joker and retry logic. Handles different task types
|
|
4793
|
+
* (prompt, script, dialog, etc.), applies postprocessing, checks expectations, and updates the execution report.
|
|
4794
|
+
* Throws errors if execution fails after all attempts.
|
|
4674
4795
|
*
|
|
4796
|
+
* @param options - The options for execution, including task, parameters, pipeline, and configuration.
|
|
4797
|
+
* @returns The result string of the executed task.
|
|
4675
4798
|
* @private internal utility of `createPipelineExecutor`
|
|
4676
4799
|
*/
|
|
4677
4800
|
async function executeAttempts(options) {
|
|
@@ -4893,7 +5016,7 @@ async function executeAttempts(options) {
|
|
|
4893
5016
|
if (task.format) {
|
|
4894
5017
|
if (task.format === 'JSON') {
|
|
4895
5018
|
if (!isValidJsonString($ongoingTaskResult.$resultString || '')) {
|
|
4896
|
-
// TODO: [🏢] Do more universally via `
|
|
5019
|
+
// TODO: [🏢] Do more universally via `FormatParser`
|
|
4897
5020
|
try {
|
|
4898
5021
|
$ongoingTaskResult.$resultString = extractJsonBlock($ongoingTaskResult.$resultString || '');
|
|
4899
5022
|
}
|
|
@@ -4995,12 +5118,16 @@ async function executeAttempts(options) {
|
|
|
4995
5118
|
*/
|
|
4996
5119
|
|
|
4997
5120
|
/**
|
|
4998
|
-
*
|
|
5121
|
+
* Executes a pipeline task that requires mapping or iterating over subvalues of a parameter (such as rows in a CSV).
|
|
5122
|
+
* Handles format and subformat resolution, error handling, and progress reporting.
|
|
5123
|
+
*
|
|
5124
|
+
* @param options - Options for execution, including task details and progress callback.
|
|
5125
|
+
* @returns The result of the subvalue mapping or execution attempts.
|
|
4999
5126
|
*
|
|
5000
5127
|
* @private internal utility of `createPipelineExecutor`
|
|
5001
5128
|
*/
|
|
5002
5129
|
async function executeFormatSubvalues(options) {
|
|
5003
|
-
const { task, jokerParameterNames, parameters, priority, csvSettings, pipelineIdentification } = options;
|
|
5130
|
+
const { task, jokerParameterNames, parameters, priority, csvSettings, onProgress, pipelineIdentification } = options;
|
|
5004
5131
|
if (task.foreach === undefined) {
|
|
5005
5132
|
return /* not await */ executeAttempts(options);
|
|
5006
5133
|
}
|
|
@@ -5031,16 +5158,16 @@ async function executeFormatSubvalues(options) {
|
|
|
5031
5158
|
${block(pipelineIdentification)}
|
|
5032
5159
|
`));
|
|
5033
5160
|
}
|
|
5034
|
-
const
|
|
5035
|
-
if (
|
|
5161
|
+
const subvalueParser = formatDefinition.subvalueParsers.find((subvalueParser) => [subvalueParser.subvalueName, ...(subvalueParser.aliases || [])].includes(task.foreach.subformatName));
|
|
5162
|
+
if (subvalueParser === undefined) {
|
|
5036
5163
|
throw new UnexpectedError(
|
|
5037
5164
|
// <- TODO: [🧠][🧐] Should be formats fixed per promptbook version or behave as plugins (=> change UnexpectedError)
|
|
5038
5165
|
spaceTrim((block) => `
|
|
5039
5166
|
Unsupported subformat name "${task.foreach.subformatName}" for format "${task.foreach.formatName}"
|
|
5040
5167
|
|
|
5041
5168
|
Available subformat names for format "${formatDefinition.formatName}":
|
|
5042
|
-
${block(formatDefinition.
|
|
5043
|
-
.map((
|
|
5169
|
+
${block(formatDefinition.subvalueParsers
|
|
5170
|
+
.map((subvalueParser) => subvalueParser.subvalueName)
|
|
5044
5171
|
.map((subvalueName) => `- ${subvalueName}`)
|
|
5045
5172
|
.join('\n'))}
|
|
5046
5173
|
|
|
@@ -5054,53 +5181,83 @@ async function executeFormatSubvalues(options) {
|
|
|
5054
5181
|
formatSettings = csvSettings;
|
|
5055
5182
|
// <- TODO: [🤹♂️] More universal, make simmilar pattern for other formats for example \n vs \r\n in text
|
|
5056
5183
|
}
|
|
5057
|
-
const resultString = await
|
|
5058
|
-
|
|
5059
|
-
|
|
5060
|
-
|
|
5061
|
-
|
|
5062
|
-
|
|
5063
|
-
|
|
5064
|
-
|
|
5065
|
-
|
|
5066
|
-
|
|
5067
|
-
|
|
5068
|
-
|
|
5069
|
-
|
|
5184
|
+
const resultString = await subvalueParser.mapValues({
|
|
5185
|
+
value: parameterValue,
|
|
5186
|
+
outputParameterName: task.foreach.outputSubparameterName,
|
|
5187
|
+
settings: formatSettings,
|
|
5188
|
+
onProgress(partialResultString) {
|
|
5189
|
+
return onProgress(Object.freeze({
|
|
5190
|
+
[task.resultingParameterName]: partialResultString,
|
|
5191
|
+
}));
|
|
5192
|
+
},
|
|
5193
|
+
async mapCallback(subparameters, index, length) {
|
|
5194
|
+
let mappedParameters;
|
|
5195
|
+
try {
|
|
5196
|
+
mappedParameters = mapAvailableToExpectedParameters({
|
|
5197
|
+
expectedParameters: Object.fromEntries(task.foreach.inputSubparameterNames.map((subparameterName) => [subparameterName, null])),
|
|
5198
|
+
availableParameters: subparameters,
|
|
5199
|
+
});
|
|
5070
5200
|
}
|
|
5071
|
-
|
|
5072
|
-
|
|
5201
|
+
catch (error) {
|
|
5202
|
+
if (!(error instanceof PipelineExecutionError)) {
|
|
5203
|
+
throw error;
|
|
5204
|
+
}
|
|
5205
|
+
const highLevelError = new PipelineExecutionError(spaceTrim((block) => `
|
|
5206
|
+
${error.message}
|
|
5073
5207
|
|
|
5074
|
-
|
|
5075
|
-
|
|
5208
|
+
This is error in FOREACH command when mapping ${formatDefinition.formatName} ${subvalueParser.subvalueName} data (${index + 1}/${length})
|
|
5209
|
+
You have probbably passed wrong data to pipeline or wrong data was generated which are processed by FOREACH command
|
|
5076
5210
|
|
|
5077
|
-
|
|
5078
|
-
|
|
5079
|
-
|
|
5080
|
-
|
|
5081
|
-
|
|
5082
|
-
|
|
5083
|
-
|
|
5084
|
-
|
|
5085
|
-
|
|
5086
|
-
|
|
5087
|
-
|
|
5088
|
-
|
|
5089
|
-
|
|
5090
|
-
|
|
5091
|
-
|
|
5092
|
-
|
|
5093
|
-
|
|
5094
|
-
|
|
5095
|
-
|
|
5096
|
-
|
|
5211
|
+
${block(pipelineIdentification)}
|
|
5212
|
+
`));
|
|
5213
|
+
if (length > BIG_DATASET_TRESHOLD) {
|
|
5214
|
+
console.error(highLevelError);
|
|
5215
|
+
return FAILED_VALUE_PLACEHOLDER;
|
|
5216
|
+
}
|
|
5217
|
+
throw highLevelError;
|
|
5218
|
+
}
|
|
5219
|
+
const allSubparameters = {
|
|
5220
|
+
...parameters,
|
|
5221
|
+
...mappedParameters,
|
|
5222
|
+
};
|
|
5223
|
+
Object.freeze(allSubparameters);
|
|
5224
|
+
try {
|
|
5225
|
+
const subresultString = await executeAttempts({
|
|
5226
|
+
...options,
|
|
5227
|
+
priority: priority + index,
|
|
5228
|
+
parameters: allSubparameters,
|
|
5229
|
+
pipelineIdentification: spaceTrim((block) => `
|
|
5230
|
+
${block(pipelineIdentification)}
|
|
5231
|
+
Subparameter index: ${index}
|
|
5232
|
+
`),
|
|
5233
|
+
});
|
|
5234
|
+
return subresultString;
|
|
5235
|
+
}
|
|
5236
|
+
catch (error) {
|
|
5237
|
+
if (length > BIG_DATASET_TRESHOLD) {
|
|
5238
|
+
console.error(spaceTrim((block) => `
|
|
5239
|
+
${error.message}
|
|
5240
|
+
|
|
5241
|
+
This is error in FOREACH command when processing ${formatDefinition.formatName} ${subvalueParser.subvalueName} data (${index + 1}/${length})
|
|
5242
|
+
|
|
5243
|
+
${block(pipelineIdentification)}
|
|
5244
|
+
`));
|
|
5245
|
+
return FAILED_VALUE_PLACEHOLDER;
|
|
5246
|
+
}
|
|
5247
|
+
throw error;
|
|
5248
|
+
}
|
|
5249
|
+
},
|
|
5097
5250
|
});
|
|
5098
5251
|
return resultString;
|
|
5099
5252
|
}
|
|
5100
5253
|
|
|
5101
5254
|
/**
|
|
5102
|
-
*
|
|
5255
|
+
* Returns the context for a given task, typically used to provide additional information or variables
|
|
5256
|
+
* required for the execution of the task within a pipeline. The context is returned as a string value
|
|
5257
|
+
* that may include markdown formatting.
|
|
5103
5258
|
*
|
|
5259
|
+
* @param task - The task for which the context is being generated. This should be a deeply immutable TaskJson object.
|
|
5260
|
+
* @returns The context as a string, formatted as markdown and parameter value.
|
|
5104
5261
|
* @private internal utility of `createPipelineExecutor`
|
|
5105
5262
|
*/
|
|
5106
5263
|
async function getContextForTask(task) {
|
|
@@ -5108,7 +5265,7 @@ async function getContextForTask(task) {
|
|
|
5108
5265
|
}
|
|
5109
5266
|
|
|
5110
5267
|
/**
|
|
5111
|
-
*
|
|
5268
|
+
* Retrieves example values or templates for a given task, used to guide or validate pipeline execution.
|
|
5112
5269
|
*
|
|
5113
5270
|
* @private internal utility of `createPipelineExecutor`
|
|
5114
5271
|
*/
|
|
@@ -5117,25 +5274,127 @@ async function getExamplesForTask(task) {
|
|
|
5117
5274
|
}
|
|
5118
5275
|
|
|
5119
5276
|
/**
|
|
5120
|
-
*
|
|
5277
|
+
* Computes the cosine similarity between two embedding vectors
|
|
5278
|
+
*
|
|
5279
|
+
* Note: This is helping function for RAG (retrieval-augmented generation)
|
|
5280
|
+
*
|
|
5281
|
+
* @param embeddingVector1
|
|
5282
|
+
* @param embeddingVector2
|
|
5283
|
+
* @returns Cosine similarity between the two vectors
|
|
5284
|
+
*
|
|
5285
|
+
* @public exported from `@promptbook/core`
|
|
5286
|
+
*/
|
|
5287
|
+
function computeCosineSimilarity(embeddingVector1, embeddingVector2) {
|
|
5288
|
+
if (embeddingVector1.length !== embeddingVector2.length) {
|
|
5289
|
+
throw new TypeError('Embedding vectors must have the same length');
|
|
5290
|
+
}
|
|
5291
|
+
const dotProduct = embeddingVector1.reduce((sum, value, index) => sum + value * embeddingVector2[index], 0);
|
|
5292
|
+
const magnitude1 = Math.sqrt(embeddingVector1.reduce((sum, value) => sum + value * value, 0));
|
|
5293
|
+
const magnitude2 = Math.sqrt(embeddingVector2.reduce((sum, value) => sum + value * value, 0));
|
|
5294
|
+
return 1 - dotProduct / (magnitude1 * magnitude2);
|
|
5295
|
+
}
|
|
5296
|
+
|
|
5297
|
+
/**
|
|
5298
|
+
*
|
|
5299
|
+
* @param knowledgePieces
|
|
5300
|
+
* @returns
|
|
5301
|
+
*
|
|
5302
|
+
* @private internal utility of `createPipelineExecutor`
|
|
5303
|
+
*/
|
|
5304
|
+
function knowledgePiecesToString(knowledgePieces) {
|
|
5305
|
+
return knowledgePieces
|
|
5306
|
+
.map((knowledgePiece) => {
|
|
5307
|
+
const { content } = knowledgePiece;
|
|
5308
|
+
return `- ${content}`;
|
|
5309
|
+
})
|
|
5310
|
+
.join('\n');
|
|
5311
|
+
// <- TODO: [🧠] Some smarter aggregation of knowledge pieces, single-line vs multi-line vs mixed
|
|
5312
|
+
}
|
|
5313
|
+
|
|
5314
|
+
/**
|
|
5315
|
+
* Retrieves the most relevant knowledge pieces for a given task using embedding-based similarity search.
|
|
5316
|
+
* This is where retrieval-augmented generation (RAG) is performed to enhance the task with external knowledge.
|
|
5121
5317
|
*
|
|
5122
5318
|
* @private internal utility of `createPipelineExecutor`
|
|
5123
5319
|
*/
|
|
5124
5320
|
async function getKnowledgeForTask(options) {
|
|
5125
|
-
const { preparedPipeline, task } = options;
|
|
5126
|
-
|
|
5127
|
-
|
|
5321
|
+
const { tools, preparedPipeline, task, parameters } = options;
|
|
5322
|
+
const firstKnowlegePiece = preparedPipeline.knowledgePieces[0];
|
|
5323
|
+
const firstKnowlegeIndex = firstKnowlegePiece === null || firstKnowlegePiece === void 0 ? void 0 : firstKnowlegePiece.index[0];
|
|
5324
|
+
// <- TODO: Do not use just first knowledge piece and first index to determine embedding model, use also keyword search
|
|
5325
|
+
if (firstKnowlegePiece === undefined || firstKnowlegeIndex === undefined) {
|
|
5326
|
+
return ''; // <- Note: Np knowledge present, return empty string
|
|
5327
|
+
}
|
|
5328
|
+
try {
|
|
5329
|
+
// TODO: [🚐] Make arrayable LLMs -> single LLM DRY
|
|
5330
|
+
const _llms = arrayableToArray(tools.llm);
|
|
5331
|
+
const llmTools = _llms.length === 1 ? _llms[0] : joinLlmExecutionTools(..._llms);
|
|
5332
|
+
const taskEmbeddingPrompt = {
|
|
5333
|
+
title: 'Knowledge Search',
|
|
5334
|
+
modelRequirements: {
|
|
5335
|
+
modelVariant: 'EMBEDDING',
|
|
5336
|
+
modelName: firstKnowlegeIndex.modelName,
|
|
5337
|
+
},
|
|
5338
|
+
content: task.content,
|
|
5339
|
+
parameters,
|
|
5340
|
+
};
|
|
5341
|
+
const taskEmbeddingResult = await llmTools.callEmbeddingModel(taskEmbeddingPrompt);
|
|
5342
|
+
const knowledgePiecesWithRelevance = preparedPipeline.knowledgePieces.map((knowledgePiece) => {
|
|
5343
|
+
const { index } = knowledgePiece;
|
|
5344
|
+
const knowledgePieceIndex = index.find((i) => i.modelName === firstKnowlegeIndex.modelName);
|
|
5345
|
+
// <- TODO: Do not use just first knowledge piece and first index to determine embedding model
|
|
5346
|
+
if (knowledgePieceIndex === undefined) {
|
|
5347
|
+
return {
|
|
5348
|
+
content: knowledgePiece.content,
|
|
5349
|
+
relevance: 0,
|
|
5350
|
+
};
|
|
5351
|
+
}
|
|
5352
|
+
const relevance = computeCosineSimilarity(knowledgePieceIndex.position, taskEmbeddingResult.content);
|
|
5353
|
+
return {
|
|
5354
|
+
content: knowledgePiece.content,
|
|
5355
|
+
relevance,
|
|
5356
|
+
};
|
|
5357
|
+
});
|
|
5358
|
+
const knowledgePiecesSorted = knowledgePiecesWithRelevance.sort((a, b) => a.relevance - b.relevance);
|
|
5359
|
+
const knowledgePiecesLimited = knowledgePiecesSorted.slice(0, 5);
|
|
5360
|
+
console.log('!!! Embedding', {
|
|
5361
|
+
task,
|
|
5362
|
+
taskEmbeddingPrompt,
|
|
5363
|
+
taskEmbeddingResult,
|
|
5364
|
+
firstKnowlegePiece,
|
|
5365
|
+
firstKnowlegeIndex,
|
|
5366
|
+
knowledgePiecesWithRelevance,
|
|
5367
|
+
knowledgePiecesSorted,
|
|
5368
|
+
knowledgePiecesLimited,
|
|
5369
|
+
});
|
|
5370
|
+
return knowledgePiecesToString(knowledgePiecesLimited);
|
|
5371
|
+
}
|
|
5372
|
+
catch (error) {
|
|
5373
|
+
assertsError(error);
|
|
5374
|
+
console.error('Error in `getKnowledgeForTask`', error);
|
|
5375
|
+
// Note: If the LLM fails, just return all knowledge pieces
|
|
5376
|
+
return knowledgePiecesToString(preparedPipeline.knowledgePieces);
|
|
5377
|
+
}
|
|
5128
5378
|
}
|
|
5379
|
+
/**
|
|
5380
|
+
* TODO: !!!! Verify if this is working
|
|
5381
|
+
* TODO: [♨] Implement Better - use keyword search
|
|
5382
|
+
* TODO: [♨] Examples of values
|
|
5383
|
+
*/
|
|
5129
5384
|
|
|
5130
5385
|
/**
|
|
5131
|
-
*
|
|
5386
|
+
* Retrieves all reserved parameters for a given pipeline task, including context, knowledge, examples, and metadata.
|
|
5387
|
+
* Ensures all reserved parameters are defined and throws if any are missing.
|
|
5388
|
+
*
|
|
5389
|
+
* @param options - Options including tools, pipeline, task, and context.
|
|
5390
|
+
* @returns An object containing all reserved parameters for the task.
|
|
5132
5391
|
*
|
|
5133
5392
|
* @private internal utility of `createPipelineExecutor`
|
|
5134
5393
|
*/
|
|
5135
5394
|
async function getReservedParametersForTask(options) {
|
|
5136
|
-
const { preparedPipeline, task, pipelineIdentification } = options;
|
|
5395
|
+
const { tools, preparedPipeline, task, parameters, pipelineIdentification } = options;
|
|
5137
5396
|
const context = await getContextForTask(); // <- [🏍]
|
|
5138
|
-
const knowledge = await getKnowledgeForTask({ preparedPipeline, task });
|
|
5397
|
+
const knowledge = await getKnowledgeForTask({ tools, preparedPipeline, task, parameters });
|
|
5139
5398
|
const examples = await getExamplesForTask();
|
|
5140
5399
|
const currentDate = new Date().toISOString(); // <- TODO: [🧠][💩] Better
|
|
5141
5400
|
const modelName = RESERVED_PARAMETER_MISSING_VALUE;
|
|
@@ -5161,23 +5420,21 @@ async function getReservedParametersForTask(options) {
|
|
|
5161
5420
|
}
|
|
5162
5421
|
|
|
5163
5422
|
/**
|
|
5164
|
-
*
|
|
5423
|
+
* Executes a single task within a pipeline, handling parameter validation, error checking, and progress reporting.
|
|
5424
|
+
*
|
|
5425
|
+
* @param options - Options for execution, including the task, pipeline, parameters, and callbacks.
|
|
5426
|
+
* @returns The output parameters produced by the task.
|
|
5165
5427
|
*
|
|
5166
5428
|
* @private internal utility of `createPipelineExecutor`
|
|
5167
5429
|
*/
|
|
5168
5430
|
async function executeTask(options) {
|
|
5169
5431
|
const { currentTask, preparedPipeline, parametersToPass, tools, onProgress, $executionReport, pipelineIdentification, maxExecutionAttempts, maxParallelCount, csvSettings, isVerbose, rootDirname, cacheDirname, intermediateFilesStrategy, isAutoInstalled, isNotPreparedWarningSupressed, } = options;
|
|
5170
5432
|
const priority = preparedPipeline.tasks.length - preparedPipeline.tasks.indexOf(currentTask);
|
|
5171
|
-
await onProgress({
|
|
5172
|
-
outputParameters: {
|
|
5173
|
-
[currentTask.resultingParameterName]: '', // <- TODO: [🧠] What is the best value here?
|
|
5174
|
-
},
|
|
5175
|
-
});
|
|
5176
5433
|
// Note: Check consistency of used and dependent parameters which was also done in `validatePipeline`, but it’s good to doublecheck
|
|
5177
5434
|
const usedParameterNames = extractParameterNamesFromTask(currentTask);
|
|
5178
5435
|
const dependentParameterNames = new Set(currentTask.dependentParameterNames);
|
|
5179
5436
|
// TODO: [👩🏾🤝👩🏻] Use here `mapAvailableToExpectedParameters`
|
|
5180
|
-
if (union(difference(usedParameterNames, dependentParameterNames), difference(dependentParameterNames, usedParameterNames)).size !== 0) {
|
|
5437
|
+
if (difference(union(difference(usedParameterNames, dependentParameterNames), difference(dependentParameterNames, usedParameterNames)), new Set(RESERVED_PARAMETER_NAMES)).size !== 0) {
|
|
5181
5438
|
throw new UnexpectedError(spaceTrim$1((block) => `
|
|
5182
5439
|
Dependent parameters are not consistent with used parameters:
|
|
5183
5440
|
|
|
@@ -5197,9 +5454,11 @@ async function executeTask(options) {
|
|
|
5197
5454
|
}
|
|
5198
5455
|
const definedParameters = Object.freeze({
|
|
5199
5456
|
...(await getReservedParametersForTask({
|
|
5457
|
+
tools,
|
|
5200
5458
|
preparedPipeline,
|
|
5201
5459
|
task: currentTask,
|
|
5202
5460
|
pipelineIdentification,
|
|
5461
|
+
parameters: parametersToPass,
|
|
5203
5462
|
})),
|
|
5204
5463
|
...parametersToPass,
|
|
5205
5464
|
});
|
|
@@ -5245,6 +5504,7 @@ async function executeTask(options) {
|
|
|
5245
5504
|
preparedPipeline,
|
|
5246
5505
|
tools,
|
|
5247
5506
|
$executionReport,
|
|
5507
|
+
onProgress,
|
|
5248
5508
|
pipelineIdentification,
|
|
5249
5509
|
maxExecutionAttempts,
|
|
5250
5510
|
maxParallelCount,
|
|
@@ -5272,7 +5532,8 @@ async function executeTask(options) {
|
|
|
5272
5532
|
*/
|
|
5273
5533
|
|
|
5274
5534
|
/**
|
|
5275
|
-
*
|
|
5535
|
+
* Filters and returns only the output parameters from the provided pipeline execution options.
|
|
5536
|
+
* Adds warnings for any expected output parameters that are missing.
|
|
5276
5537
|
*
|
|
5277
5538
|
* @private internal utility of `createPipelineExecutor`
|
|
5278
5539
|
*/
|
|
@@ -5297,9 +5558,12 @@ function filterJustOutputParameters(options) {
|
|
|
5297
5558
|
}
|
|
5298
5559
|
|
|
5299
5560
|
/**
|
|
5300
|
-
*
|
|
5561
|
+
* Executes an entire pipeline, resolving tasks in dependency order, handling errors, and reporting progress.
|
|
5301
5562
|
*
|
|
5302
|
-
* Note: This is not a `PipelineExecutor` (which is
|
|
5563
|
+
* Note: This is not a `PipelineExecutor` (which is bound to a single pipeline), but a utility function used by `createPipelineExecutor` to create a `PipelineExecutor`.
|
|
5564
|
+
*
|
|
5565
|
+
* @param options - Options for execution, including input parameters, pipeline, and callbacks.
|
|
5566
|
+
* @returns The result of the pipeline execution, including output parameters, errors, and usage statistics.
|
|
5303
5567
|
*
|
|
5304
5568
|
* @private internal utility of `createPipelineExecutor`
|
|
5305
5569
|
*/
|
|
@@ -5622,6 +5886,22 @@ function createPipelineExecutor(options) {
|
|
|
5622
5886
|
cacheDirname,
|
|
5623
5887
|
intermediateFilesStrategy,
|
|
5624
5888
|
isAutoInstalled,
|
|
5889
|
+
}).catch((error) => {
|
|
5890
|
+
assertsError(error);
|
|
5891
|
+
return exportJson({
|
|
5892
|
+
name: 'pipelineExecutorResult',
|
|
5893
|
+
message: `Unuccessful PipelineExecutorResult, last catch`,
|
|
5894
|
+
order: [],
|
|
5895
|
+
value: {
|
|
5896
|
+
isSuccessful: false,
|
|
5897
|
+
errors: [serializeError(error)],
|
|
5898
|
+
warnings: [],
|
|
5899
|
+
usage: UNCERTAIN_USAGE,
|
|
5900
|
+
executionReport: null,
|
|
5901
|
+
outputParameters: {},
|
|
5902
|
+
preparedPipeline,
|
|
5903
|
+
},
|
|
5904
|
+
});
|
|
5625
5905
|
});
|
|
5626
5906
|
};
|
|
5627
5907
|
const pipelineExecutor = (inputParameters) => createTask({
|
|
@@ -5882,8 +6162,8 @@ class MarkitdownScraper {
|
|
|
5882
6162
|
extension: 'md',
|
|
5883
6163
|
isVerbose,
|
|
5884
6164
|
});
|
|
5885
|
-
// TODO:
|
|
5886
|
-
// Note: Running
|
|
6165
|
+
// TODO: Determine if Markitdown conversion should run only if the cache file doesn't exist, or always.
|
|
6166
|
+
// Note: Running Markitdown conversion ONLY if the file in the cache does not exist
|
|
5887
6167
|
if (!(await isFileExisting(cacheFilehandler.filename, this.tools.fs))) {
|
|
5888
6168
|
const src = source.filename || source.url || null;
|
|
5889
6169
|
// console.log('!!', { src, source, cacheFilehandler });
|
|
@@ -5905,11 +6185,11 @@ class MarkitdownScraper {
|
|
|
5905
6185
|
return cacheFilehandler;
|
|
5906
6186
|
}
|
|
5907
6187
|
/**
|
|
5908
|
-
* Scrapes the
|
|
6188
|
+
* Scrapes the source document (PDF, DOCX, etc.) and returns the knowledge pieces or `null` if it can't scrape it.
|
|
5909
6189
|
*/
|
|
5910
6190
|
async scrape(source) {
|
|
5911
6191
|
const cacheFilehandler = await this.$convert(source);
|
|
5912
|
-
// TODO:
|
|
6192
|
+
// TODO: Ensure this correctly creates the source object for the internal MarkdownScraper using the converted file.
|
|
5913
6193
|
const markdownSource = {
|
|
5914
6194
|
source: source.source,
|
|
5915
6195
|
filename: cacheFilehandler.filename,
|