@promptbook/markitdown 0.92.0-3 → 0.92.0-30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. package/esm/index.es.js +559 -279
  2. package/esm/index.es.js.map +1 -1
  3. package/esm/typings/src/_packages/browser.index.d.ts +2 -0
  4. package/esm/typings/src/_packages/core.index.d.ts +22 -6
  5. package/esm/typings/src/_packages/deepseek.index.d.ts +2 -0
  6. package/esm/typings/src/_packages/google.index.d.ts +2 -0
  7. package/esm/typings/src/_packages/types.index.d.ts +4 -2
  8. package/esm/typings/src/_packages/utils.index.d.ts +2 -0
  9. package/esm/typings/src/cli/common/$provideLlmToolsForCli.d.ts +1 -1
  10. package/esm/typings/src/collection/PipelineCollection.d.ts +0 -2
  11. package/esm/typings/src/collection/SimplePipelineCollection.d.ts +1 -1
  12. package/esm/typings/src/commands/FOREACH/ForeachJson.d.ts +6 -6
  13. package/esm/typings/src/commands/FOREACH/foreachCommandParser.d.ts +0 -2
  14. package/esm/typings/src/commands/FORMFACTOR/formfactorCommandParser.d.ts +1 -1
  15. package/esm/typings/src/commands/_BOILERPLATE/boilerplateCommandParser.d.ts +1 -1
  16. package/esm/typings/src/commands/_common/types/CommandParser.d.ts +36 -28
  17. package/esm/typings/src/config.d.ts +41 -11
  18. package/esm/typings/src/constants.d.ts +43 -2
  19. package/esm/typings/src/conversion/archive/loadArchive.d.ts +2 -2
  20. package/esm/typings/src/errors/0-BoilerplateError.d.ts +2 -2
  21. package/esm/typings/src/executables/$provideExecutablesForNode.d.ts +1 -1
  22. package/esm/typings/src/executables/apps/locateLibreoffice.d.ts +2 -1
  23. package/esm/typings/src/executables/apps/locatePandoc.d.ts +2 -1
  24. package/esm/typings/src/executables/platforms/locateAppOnLinux.d.ts +2 -1
  25. package/esm/typings/src/executables/platforms/locateAppOnMacOs.d.ts +2 -1
  26. package/esm/typings/src/executables/platforms/locateAppOnWindows.d.ts +2 -1
  27. package/esm/typings/src/execution/AbstractTaskResult.d.ts +1 -1
  28. package/esm/typings/src/execution/CommonToolsOptions.d.ts +5 -1
  29. package/esm/typings/src/execution/LlmExecutionToolsConstructor.d.ts +2 -1
  30. package/esm/typings/src/execution/PipelineExecutorResult.d.ts +4 -2
  31. package/esm/typings/src/execution/createPipelineExecutor/$OngoingTaskResult.d.ts +12 -9
  32. package/esm/typings/src/execution/createPipelineExecutor/10-executePipeline.d.ts +12 -9
  33. package/esm/typings/src/execution/createPipelineExecutor/20-executeTask.d.ts +11 -8
  34. package/esm/typings/src/execution/createPipelineExecutor/30-executeFormatSubvalues.d.ts +15 -3
  35. package/esm/typings/src/execution/createPipelineExecutor/40-executeAttempts.d.ts +20 -14
  36. package/esm/typings/src/execution/createPipelineExecutor/computeCosineSimilarity.d.ts +13 -0
  37. package/esm/typings/src/execution/createPipelineExecutor/filterJustOutputParameters.d.ts +7 -6
  38. package/esm/typings/src/execution/createPipelineExecutor/getContextForTask.d.ts +5 -1
  39. package/esm/typings/src/execution/createPipelineExecutor/getExamplesForTask.d.ts +1 -1
  40. package/esm/typings/src/execution/createPipelineExecutor/getKnowledgeForTask.d.ts +21 -5
  41. package/esm/typings/src/execution/createPipelineExecutor/getReservedParametersForTask.d.ts +19 -5
  42. package/esm/typings/src/execution/createPipelineExecutor/knowledgePiecesToString.d.ts +9 -0
  43. package/esm/typings/src/execution/translation/automatic-translate/automatic-translators/LindatAutomaticTranslator.d.ts +4 -4
  44. package/esm/typings/src/execution/utils/checkExpectations.d.ts +1 -1
  45. package/esm/typings/src/execution/utils/uncertainNumber.d.ts +3 -2
  46. package/esm/typings/src/formats/_common/{FormatDefinition.d.ts → FormatParser.d.ts} +8 -6
  47. package/esm/typings/src/formats/_common/FormatSubvalueParser.d.ts +66 -0
  48. package/esm/typings/src/formats/csv/CsvFormatParser.d.ts +17 -0
  49. package/esm/typings/src/formats/csv/CsvSettings.d.ts +2 -2
  50. package/esm/typings/src/formats/csv/utils/csvParse.d.ts +12 -0
  51. package/esm/typings/src/formats/csv/utils/isValidCsvString.d.ts +1 -1
  52. package/esm/typings/src/formats/index.d.ts +2 -2
  53. package/esm/typings/src/formats/json/{JsonFormatDefinition.d.ts → JsonFormatParser.d.ts} +6 -6
  54. package/esm/typings/src/formats/json/utils/isValidJsonString.d.ts +1 -1
  55. package/esm/typings/src/formats/json/utils/jsonParse.d.ts +8 -0
  56. package/esm/typings/src/formats/text/{TextFormatDefinition.d.ts → TextFormatParser.d.ts} +7 -7
  57. package/esm/typings/src/formats/xml/XmlFormatParser.d.ts +19 -0
  58. package/esm/typings/src/formats/xml/utils/isValidXmlString.d.ts +1 -1
  59. package/esm/typings/src/formfactors/_boilerplate/BoilerplateFormfactorDefinition.d.ts +3 -2
  60. package/esm/typings/src/formfactors/_common/AbstractFormfactorDefinition.d.ts +16 -7
  61. package/esm/typings/src/formfactors/_common/FormfactorDefinition.d.ts +3 -1
  62. package/esm/typings/src/formfactors/_common/string_formfactor_name.d.ts +2 -1
  63. package/esm/typings/src/formfactors/chatbot/ChatbotFormfactorDefinition.d.ts +2 -2
  64. package/esm/typings/src/formfactors/completion/CompletionFormfactorDefinition.d.ts +29 -0
  65. package/esm/typings/src/formfactors/generator/GeneratorFormfactorDefinition.d.ts +2 -1
  66. package/esm/typings/src/formfactors/generic/GenericFormfactorDefinition.d.ts +2 -2
  67. package/esm/typings/src/formfactors/index.d.ts +33 -8
  68. package/esm/typings/src/formfactors/matcher/MatcherFormfactorDefinition.d.ts +4 -2
  69. package/esm/typings/src/formfactors/sheets/SheetsFormfactorDefinition.d.ts +3 -2
  70. package/esm/typings/src/formfactors/translator/TranslatorFormfactorDefinition.d.ts +3 -2
  71. package/esm/typings/src/high-level-abstractions/index.d.ts +2 -2
  72. package/esm/typings/src/llm-providers/_common/register/$llmToolsMetadataRegister.d.ts +3 -3
  73. package/esm/typings/src/llm-providers/_common/register/$llmToolsRegister.d.ts +3 -3
  74. package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsConfigurationFromEnv.d.ts +4 -4
  75. package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsForTestingAndScriptsAndPlayground.d.ts +4 -3
  76. package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsFromEnv.d.ts +17 -4
  77. package/esm/typings/src/llm-providers/_common/register/LlmToolsConfiguration.d.ts +11 -4
  78. package/esm/typings/src/llm-providers/_common/register/LlmToolsMetadata.d.ts +27 -5
  79. package/esm/typings/src/llm-providers/_common/register/LlmToolsOptions.d.ts +9 -2
  80. package/esm/typings/src/llm-providers/_common/register/createLlmToolsFromConfiguration.d.ts +12 -3
  81. package/esm/typings/src/llm-providers/_common/utils/cache/CacheItem.d.ts +10 -5
  82. package/esm/typings/src/llm-providers/_common/utils/cache/CacheLlmToolsOptions.d.ts +5 -3
  83. package/esm/typings/src/llm-providers/_common/utils/cache/cacheLlmTools.d.ts +3 -3
  84. package/esm/typings/src/llm-providers/_common/utils/count-total-usage/limitTotalUsage.d.ts +5 -5
  85. package/esm/typings/src/llm-providers/anthropic-claude/anthropic-claude-models.d.ts +1 -1
  86. package/esm/typings/src/llm-providers/azure-openai/AzureOpenAiExecutionTools.d.ts +4 -0
  87. package/esm/typings/src/llm-providers/deepseek/deepseek-models.d.ts +23 -0
  88. package/esm/typings/src/llm-providers/google/google-models.d.ts +23 -0
  89. package/esm/typings/src/llm-providers/openai/OpenAiExecutionTools.d.ts +4 -0
  90. package/esm/typings/src/llm-providers/openai/openai-models.d.ts +1 -1
  91. package/esm/typings/src/llm-providers/openai/register-configuration.d.ts +2 -2
  92. package/esm/typings/src/llm-providers/openai/register-constructor.d.ts +2 -2
  93. package/esm/typings/src/migrations/migratePipeline.d.ts +9 -0
  94. package/esm/typings/src/other/templates/getBookTemplates.d.ts +2 -2
  95. package/esm/typings/src/personas/preparePersona.d.ts +1 -1
  96. package/esm/typings/src/pipeline/PipelineInterface/PipelineInterface.d.ts +3 -3
  97. package/esm/typings/src/pipeline/PipelineInterface/constants.d.ts +1 -1
  98. package/esm/typings/src/pipeline/PipelineInterface/getPipelineInterface.d.ts +1 -1
  99. package/esm/typings/src/pipeline/PipelineInterface/isPipelineImplementingInterface.d.ts +5 -4
  100. package/esm/typings/src/pipeline/PipelineInterface/isPipelineInterfacesEqual.d.ts +1 -1
  101. package/esm/typings/src/pipeline/PipelineJson/CommonTaskJson.d.ts +9 -6
  102. package/esm/typings/src/pipeline/PipelineJson/PersonaJson.d.ts +4 -2
  103. package/esm/typings/src/pipeline/PipelineJson/PipelineJson.d.ts +3 -2
  104. package/esm/typings/src/pipeline/PipelineString.d.ts +3 -1
  105. package/esm/typings/src/pipeline/book-notation.d.ts +2 -2
  106. package/esm/typings/src/postprocessing/utils/extractJsonBlock.d.ts +1 -1
  107. package/esm/typings/src/prepare/prepareTasks.d.ts +7 -4
  108. package/esm/typings/src/remote-server/openapi-types.d.ts +348 -6
  109. package/esm/typings/src/remote-server/openapi.d.ts +398 -4
  110. package/esm/typings/src/remote-server/types/RemoteServerOptions.d.ts +2 -1
  111. package/esm/typings/src/scrapers/_boilerplate/BoilerplateScraper.d.ts +3 -3
  112. package/esm/typings/src/scrapers/_boilerplate/createBoilerplateScraper.d.ts +1 -1
  113. package/esm/typings/src/scrapers/_boilerplate/register-metadata.d.ts +1 -1
  114. package/esm/typings/src/scrapers/_common/Converter.d.ts +3 -1
  115. package/esm/typings/src/scrapers/_common/Scraper.d.ts +4 -3
  116. package/esm/typings/src/scrapers/_common/ScraperIntermediateSource.d.ts +4 -2
  117. package/esm/typings/src/scrapers/_common/register/$provideFilesystemForNode.d.ts +2 -1
  118. package/esm/typings/src/scrapers/_common/register/$provideScrapersForBrowser.d.ts +6 -3
  119. package/esm/typings/src/scrapers/_common/register/$provideScrapersForNode.d.ts +3 -5
  120. package/esm/typings/src/scrapers/_common/register/$scrapersMetadataRegister.d.ts +3 -3
  121. package/esm/typings/src/scrapers/_common/register/$scrapersRegister.d.ts +3 -2
  122. package/esm/typings/src/scrapers/_common/register/ScraperAndConverterMetadata.d.ts +8 -5
  123. package/esm/typings/src/scrapers/_common/register/ScraperConstructor.d.ts +2 -1
  124. package/esm/typings/src/scrapers/_common/utils/getScraperIntermediateSource.d.ts +6 -5
  125. package/esm/typings/src/scrapers/_common/utils/makeKnowledgeSourceHandler.d.ts +3 -1
  126. package/esm/typings/src/scrapers/document/createDocumentScraper.d.ts +1 -1
  127. package/esm/typings/src/scrapers/document-legacy/createLegacyDocumentScraper.d.ts +2 -1
  128. package/esm/typings/src/scrapers/markdown/createMarkdownScraper.d.ts +4 -1
  129. package/esm/typings/src/scrapers/markitdown/MarkitdownScraper.d.ts +1 -1
  130. package/esm/typings/src/scrapers/pdf/createPdfScraper.d.ts +2 -1
  131. package/esm/typings/src/scrapers/website/createWebsiteScraper.d.ts +3 -4
  132. package/esm/typings/src/scripting/javascript/postprocessing-functions.d.ts +5 -1
  133. package/esm/typings/src/storage/file-cache-storage/FileCacheStorage.d.ts +12 -5
  134. package/esm/typings/src/storage/file-cache-storage/FileCacheStorageOptions.d.ts +4 -2
  135. package/esm/typings/src/storage/file-cache-storage/utils/nameToSubfolderPath.d.ts +2 -1
  136. package/esm/typings/src/storage/local-storage/getIndexedDbStorage.d.ts +10 -0
  137. package/esm/typings/src/storage/local-storage/utils/makePromptbookStorageFromIndexedDb.d.ts +7 -0
  138. package/esm/typings/src/storage/local-storage/utils/makePromptbookStorageFromWebStorage.d.ts +2 -1
  139. package/esm/typings/src/types/IntermediateFilesStrategy.d.ts +2 -1
  140. package/esm/typings/src/types/ModelVariant.d.ts +5 -5
  141. package/esm/typings/src/types/typeAliases.d.ts +17 -13
  142. package/esm/typings/src/utils/$Register.d.ts +8 -7
  143. package/esm/typings/src/utils/editable/edit-pipeline-string/addPipelineCommand.d.ts +2 -2
  144. package/esm/typings/src/utils/editable/edit-pipeline-string/deflatePipeline.d.ts +4 -1
  145. package/esm/typings/src/utils/editable/utils/isFlatPipeline.d.ts +2 -1
  146. package/esm/typings/src/utils/environment/$getGlobalScope.d.ts +2 -1
  147. package/esm/typings/src/utils/expectation-counters/index.d.ts +1 -1
  148. package/esm/typings/src/utils/markdown/extractAllListItemsFromMarkdown.d.ts +1 -1
  149. package/esm/typings/src/utils/normalization/nameToUriPart.d.ts +4 -4
  150. package/esm/typings/src/utils/normalization/nameToUriParts.d.ts +4 -4
  151. package/esm/typings/src/utils/normalization/normalize-to-kebab-case.d.ts +3 -3
  152. package/esm/typings/src/utils/normalization/normalizeTo_SCREAMING_CASE.d.ts +3 -3
  153. package/esm/typings/src/utils/normalization/normalizeTo_camelCase.d.ts +4 -4
  154. package/esm/typings/src/utils/normalization/normalizeTo_snake_case.d.ts +3 -3
  155. package/esm/typings/src/utils/normalization/removeDiacritics.d.ts +3 -3
  156. package/esm/typings/src/utils/normalization/searchKeywords.d.ts +4 -1
  157. package/esm/typings/src/utils/normalization/titleToName.d.ts +4 -4
  158. package/esm/typings/src/utils/organization/empty_object.d.ts +2 -2
  159. package/esm/typings/src/utils/organization/just_empty_object.d.ts +4 -4
  160. package/esm/typings/src/utils/parameters/mapAvailableToExpectedParameters.d.ts +7 -7
  161. package/esm/typings/src/utils/serialization/clonePipeline.d.ts +4 -3
  162. package/esm/typings/src/utils/serialization/deepClone.d.ts +5 -1
  163. package/esm/typings/src/utils/validators/javascriptName/isValidJavascriptName.d.ts +3 -3
  164. package/esm/typings/src/utils/validators/parameterName/validateParameterName.d.ts +5 -4
  165. package/esm/typings/src/version.d.ts +2 -1
  166. package/package.json +2 -2
  167. package/umd/index.umd.js +559 -279
  168. package/umd/index.umd.js.map +1 -1
  169. package/esm/typings/src/formats/_common/FormatSubvalueDefinition.d.ts +0 -31
  170. package/esm/typings/src/formats/csv/CsvFormatDefinition.d.ts +0 -17
  171. package/esm/typings/src/formats/xml/XmlFormatDefinition.d.ts +0 -19
package/esm/index.es.js CHANGED
@@ -26,7 +26,7 @@ const BOOK_LANGUAGE_VERSION = '1.0.0';
26
26
  * @generated
27
27
  * @see https://github.com/webgptorg/promptbook
28
28
  */
29
- const PROMPTBOOK_ENGINE_VERSION = '0.92.0-3';
29
+ const PROMPTBOOK_ENGINE_VERSION = '0.92.0-30';
30
30
  /**
31
31
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
32
32
  * Note: [💞] Ignore a discrepancy between file name and entity name
@@ -102,6 +102,21 @@ const DEFAULT_BOOK_TITLE = `✨ Untitled Book`;
102
102
  * @public exported from `@promptbook/core`
103
103
  */
104
104
  const DEFAULT_MAX_FILE_SIZE = 100 * 1024 * 1024; // 100MB
105
+ /**
106
+ * Threshold value that determines when a dataset is considered "big"
107
+ * and may require special handling or optimizations
108
+ *
109
+ * For example, when error occurs in one item of the big dataset, it will not fail the whole pipeline
110
+ *
111
+ * @public exported from `@promptbook/core`
112
+ */
113
+ const BIG_DATASET_TRESHOLD = 50;
114
+ /**
115
+ * Placeholder text used to represent a placeholder value of failed operation
116
+ *
117
+ * @public exported from `@promptbook/core`
118
+ */
119
+ const FAILED_VALUE_PLACEHOLDER = '!?';
105
120
  // <- TODO: [🧠] Better system for generator warnings - not always "code" and "by `@promptbook/cli`"
106
121
  /**
107
122
  * The maximum number of iterations for a loops
@@ -181,7 +196,7 @@ const DEFAULT_DOWNLOAD_CACHE_DIRNAME = './.promptbook/download-cache';
181
196
  const DEFAULT_SCRAPE_CACHE_DIRNAME = './.promptbook/scrape-cache';
182
197
  // <- TODO: [🧜‍♂️]
183
198
  /**
184
- * @@@
199
+ * Default settings for parsing and generating CSV files in Promptbook.
185
200
  *
186
201
  * @public exported from `@promptbook/core`
187
202
  */
@@ -192,19 +207,19 @@ const DEFAULT_CSV_SETTINGS = Object.freeze({
192
207
  skipEmptyLines: true,
193
208
  });
194
209
  /**
195
- * @@@
210
+ * Controls whether verbose logging is enabled by default throughout the application.
196
211
  *
197
212
  * @public exported from `@promptbook/core`
198
213
  */
199
214
  let DEFAULT_IS_VERBOSE = false;
200
215
  /**
201
- * @@@
216
+ * Controls whether auto-installation of dependencies is enabled by default.
202
217
  *
203
218
  * @public exported from `@promptbook/core`
204
219
  */
205
220
  const DEFAULT_IS_AUTO_INSTALLED = false;
206
221
  /**
207
- * @@@
222
+ * Indicates whether pipeline logic validation is enabled. When true, the pipeline logic is checked for consistency.
208
223
  *
209
224
  * @private within the repository
210
225
  */
@@ -355,7 +370,8 @@ async function isFileExisting(filename, fs) {
355
370
  */
356
371
 
357
372
  /**
358
- * @@@
373
+ * Converts a name to a properly formatted subfolder path for cache storage.
374
+ * Handles normalization and path formatting to create consistent cache directory structures.
359
375
  *
360
376
  * @private for `FileCacheStorage`
361
377
  */
@@ -608,10 +624,10 @@ for (let i = 0; i < defaultDiacriticsRemovalMap.length; i++) {
608
624
  */
609
625
 
610
626
  /**
611
- * @@@
627
+ * Removes diacritic marks (accents) from characters in a string.
612
628
  *
613
- * @param input @@@
614
- * @returns @@@
629
+ * @param input The string containing diacritics to be normalized.
630
+ * @returns The string with diacritics removed or normalized.
615
631
  * @public exported from `@promptbook/utils`
616
632
  */
617
633
  function removeDiacritics(input) {
@@ -625,10 +641,10 @@ function removeDiacritics(input) {
625
641
  */
626
642
 
627
643
  /**
628
- * @@@
644
+ * Converts a given text to kebab-case format.
629
645
  *
630
- * @param text @@@
631
- * @returns @@@
646
+ * @param text The text to be converted.
647
+ * @returns The kebab-case formatted string.
632
648
  * @example 'hello-world'
633
649
  * @example 'i-love-promptbook'
634
650
  * @public exported from `@promptbook/utils`
@@ -770,11 +786,11 @@ function isValidUrl(url) {
770
786
  }
771
787
 
772
788
  /**
773
- * @@@
789
+ * Converts a title string into a normalized name.
774
790
  *
775
- * @param value @@@
776
- * @returns @@@
777
- * @example @@@
791
+ * @param value The title string to be converted to a name.
792
+ * @returns A normalized name derived from the input title.
793
+ * @example 'Hello World!' -> 'hello-world'
778
794
  * @public exported from `@promptbook/utils`
779
795
  */
780
796
  function titleToName(value) {
@@ -794,9 +810,8 @@ function titleToName(value) {
794
810
  }
795
811
 
796
812
  /**
797
- * Create a filename for intermediate cache for scrapers
798
- *
799
- * Note: It also checks if directory exists and creates it if not
813
+ * Retrieves an intermediate source for a scraper based on the knowledge source.
814
+ * Manages the caching and retrieval of intermediate scraper results for optimized performance.
800
815
  *
801
816
  * @private as internal utility for scrapers
802
817
  */
@@ -847,7 +862,7 @@ async function getScraperIntermediateSource(source, options) {
847
862
  * Note: [🟢] Code in this file should never be never released in packages that could be imported into browser environment
848
863
  */
849
864
 
850
- var PipelineCollection = [{title:"Prepare Knowledge from Markdown",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.book",formfactorName:"GENERIC",parameters:[{name:"knowledgeContent",description:"Markdown document content",isInput:true,isOutput:false},{name:"knowledgePieces",description:"The knowledge JSON object",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, extract the important knowledge from the document.\n\n# Rules\n\n- Make pieces of information concise, clear, and easy to understand\n- One piece of information should be approximately 1 paragraph\n- Divide the paragraphs by markdown horizontal lines ---\n- Omit irrelevant information\n- Group redundant information\n- Write just extracted information, nothing else\n\n# The document\n\nTake information from this document:\n\n> {knowledgeContent}",resultingParameterName:"knowledgePieces",dependentParameterNames:["knowledgeContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Knowledge from Markdown\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.book`\n- INPUT PARAMETER `{knowledgeContent}` Markdown document content\n- OUTPUT PARAMETER `{knowledgePieces}` The knowledge JSON object\n\n## Knowledge\n\n<!-- TODO: [🍆] -FORMAT JSON -->\n\n```markdown\nYou are experienced data researcher, extract the important knowledge from the document.\n\n# Rules\n\n- Make pieces of information concise, clear, and easy to understand\n- One piece of information should be approximately 1 paragraph\n- Divide the paragraphs by markdown horizontal lines ---\n- Omit irrelevant information\n- Group redundant information\n- Write just extracted information, nothing else\n\n# The document\n\nTake information from this document:\n\n> {knowledgeContent}\n```\n\n`-> {knowledgePieces}`\n"}],sourceFile:"./books/prepare-knowledge-from-markdown.book"},{title:"Prepare Keywords",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-keywords.book",formfactorName:"GENERIC",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"keywords",description:"Keywords separated by comma",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, detect the important keywords in the document.\n\n# Rules\n\n- Write just keywords separated by comma\n\n# The document\n\nTake information from this document:\n\n> {knowledgePieceContent}",resultingParameterName:"keywords",dependentParameterNames:["knowledgePieceContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Keywords\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-keywords.book`\n- INPUT PARAMETER `{knowledgePieceContent}` The content\n- OUTPUT PARAMETER `{keywords}` Keywords separated by comma\n\n## Knowledge\n\n<!-- TODO: [🍆] -FORMAT JSON -->\n\n```markdown\nYou are experienced data researcher, detect the important keywords in the document.\n\n# Rules\n\n- Write just keywords separated by comma\n\n# The document\n\nTake information from this document:\n\n> {knowledgePieceContent}\n```\n\n`-> {keywords}`\n"}],sourceFile:"./books/prepare-knowledge-keywords.book"},{title:"Prepare Knowledge-piece Title",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-title.book",formfactorName:"GENERIC",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"title",description:"The title of the document",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced content creator, write best title for the document.\n\n# Rules\n\n- Write just title, nothing else\n- Write maximum 5 words for the title\n\n# The document\n\n> {knowledgePieceContent}",resultingParameterName:"title",expectations:{words:{min:1,max:8}},dependentParameterNames:["knowledgePieceContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Knowledge-piece Title\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-title.book`\n- INPUT PARAMETER `{knowledgePieceContent}` The content\n- OUTPUT PARAMETER `{title}` The title of the document\n\n## Knowledge\n\n- EXPECT MIN 1 WORD\n- EXPECT MAX 8 WORDS\n\n```markdown\nYou are experienced content creator, write best title for the document.\n\n# Rules\n\n- Write just title, nothing else\n- Write maximum 5 words for the title\n\n# The document\n\n> {knowledgePieceContent}\n```\n\n`-> {title}`\n"}],sourceFile:"./books/prepare-knowledge-title.book"},{title:"Prepare Persona",pipelineUrl:"https://promptbook.studio/promptbook/prepare-persona.book",formfactorName:"GENERIC",parameters:[{name:"availableModelNames",description:"List of available model names separated by comma (,)",isInput:true,isOutput:false},{name:"personaDescription",description:"Description of the persona",isInput:true,isOutput:false},{name:"modelRequirements",description:"Specific requirements for the model",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"make-model-requirements",title:"Make modelRequirements",content:"You are experienced AI engineer, you need to create virtual assistant.\nWrite\n\n## Example\n\n```json\n{\n\"modelName\": \"gpt-4o\",\n\"systemMessage\": \"You are experienced AI engineer and helpfull assistant.\",\n\"temperature\": 0.7\n}\n```\n\n## Instructions\n\n- Your output format is JSON object\n- Write just the JSON object, no other text should be present\n- It contains the following keys:\n - `modelName`: The name of the model to use\n - `systemMessage`: The system message to provide context to the model\n - `temperature`: The sampling temperature to use\n\n### Key `modelName`\n\nPick from the following models:\n\n- {availableModelNames}\n\n### Key `systemMessage`\n\nThe system message is used to communicate instructions or provide context to the model at the beginning of a conversation. It is displayed in a different format compared to user messages, helping the model understand its role in the conversation. The system message typically guides the model's behavior, sets the tone, or specifies desired output from the model. By utilizing the system message effectively, users can steer the model towards generating more accurate and relevant responses.\n\nFor example:\n\n> You are an experienced AI engineer and helpful assistant.\n\n> You are a friendly and knowledgeable chatbot.\n\n### Key `temperature`\n\nThe sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.\n\nYou can pick a value between 0 and 2. For example:\n\n- `0.1`: Low temperature, extremely conservative and deterministic\n- `0.5`: Medium temperature, balanced between conservative and creative\n- `1.0`: High temperature, creative and bit random\n- `1.5`: Very high temperature, extremely creative and often chaotic and unpredictable\n- `2.0`: Maximum temperature, completely random and unpredictable, for some extreme creative use cases\n\n# The assistant\n\nTake this description of the persona:\n\n> {personaDescription}",resultingParameterName:"modelRequirements",format:"JSON",dependentParameterNames:["availableModelNames","personaDescription"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Persona\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-persona.book`\n- INPUT PARAMETER `{availableModelNames}` List of available model names separated by comma (,)\n- INPUT PARAMETER `{personaDescription}` Description of the persona\n- OUTPUT PARAMETER `{modelRequirements}` Specific requirements for the model\n\n## Make modelRequirements\n\n- FORMAT JSON\n\n```markdown\nYou are experienced AI engineer, you need to create virtual assistant.\nWrite\n\n## Example\n\n\\`\\`\\`json\n{\n\"modelName\": \"gpt-4o\",\n\"systemMessage\": \"You are experienced AI engineer and helpfull assistant.\",\n\"temperature\": 0.7\n}\n\\`\\`\\`\n\n## Instructions\n\n- Your output format is JSON object\n- Write just the JSON object, no other text should be present\n- It contains the following keys:\n - `modelName`: The name of the model to use\n - `systemMessage`: The system message to provide context to the model\n - `temperature`: The sampling temperature to use\n\n### Key `modelName`\n\nPick from the following models:\n\n- {availableModelNames}\n\n### Key `systemMessage`\n\nThe system message is used to communicate instructions or provide context to the model at the beginning of a conversation. It is displayed in a different format compared to user messages, helping the model understand its role in the conversation. The system message typically guides the model's behavior, sets the tone, or specifies desired output from the model. By utilizing the system message effectively, users can steer the model towards generating more accurate and relevant responses.\n\nFor example:\n\n> You are an experienced AI engineer and helpful assistant.\n\n> You are a friendly and knowledgeable chatbot.\n\n### Key `temperature`\n\nThe sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.\n\nYou can pick a value between 0 and 2. For example:\n\n- `0.1`: Low temperature, extremely conservative and deterministic\n- `0.5`: Medium temperature, balanced between conservative and creative\n- `1.0`: High temperature, creative and bit random\n- `1.5`: Very high temperature, extremely creative and often chaotic and unpredictable\n- `2.0`: Maximum temperature, completely random and unpredictable, for some extreme creative use cases\n\n# The assistant\n\nTake this description of the persona:\n\n> {personaDescription}\n```\n\n`-> {modelRequirements}`\n"}],sourceFile:"./books/prepare-persona.book"},{title:"Prepare Title",pipelineUrl:"https://promptbook.studio/promptbook/prepare-title.book",formfactorName:"GENERIC",parameters:[{name:"book",description:"The book to prepare the title for",isInput:true,isOutput:false},{name:"title",description:"Best title for the book",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"make-title",title:"Make title",content:"Make best title for given text which describes the workflow:\n\n## Rules\n\n- Write just title, nothing else\n- Title should be concise and clear - Write maximum ideally 2 words, maximum 5 words\n- Title starts with emoticon\n- Title should not mention the input and output of the workflow but the main purpose of the workflow\n _For example, not \"✍ Convert Knowledge-piece to title\" but \"✍ Title\"_\n\n## The workflow\n\n> {book}",resultingParameterName:"title",expectations:{words:{min:1,max:8},lines:{min:1,max:1}},dependentParameterNames:["book"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Title\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-title.book`\n- INPUT PARAMETER `{book}` The book to prepare the title for\n- OUTPUT PARAMETER `{title}` Best title for the book\n\n## Make title\n\n- EXPECT MIN 1 Word\n- EXPECT MAX 8 Words\n- EXPECT EXACTLY 1 Line\n\n```markdown\nMake best title for given text which describes the workflow:\n\n## Rules\n\n- Write just title, nothing else\n- Title should be concise and clear - Write maximum ideally 2 words, maximum 5 words\n- Title starts with emoticon\n- Title should not mention the input and output of the workflow but the main purpose of the workflow\n _For example, not \"✍ Convert Knowledge-piece to title\" but \"✍ Title\"_\n\n## The workflow\n\n> {book}\n```\n\n`-> {title}`\n"}],sourceFile:"./books/prepare-title.book"}];
865
+ var PipelineCollection = [{title:"Prepare Knowledge from Markdown",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.book",formfactorName:"GENERIC",parameters:[{name:"knowledgeContent",description:"Markdown document content",isInput:true,isOutput:false},{name:"knowledgePieces",description:"The knowledge JSON object",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, extract the important knowledge from the document.\n\n# Rules\n\n- Make pieces of information concise, clear, and easy to understand\n- One piece of information should be approximately 1 paragraph\n- Divide the paragraphs by markdown horizontal lines ---\n- Omit irrelevant information\n- Group redundant information\n- Write just extracted information, nothing else\n\n# The document\n\nTake information from this document:\n\n> {knowledgeContent}",resultingParameterName:"knowledgePieces",dependentParameterNames:["knowledgeContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Knowledge from Markdown\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.book`\n- INPUT PARAMETER `{knowledgeContent}` Markdown document content\n- OUTPUT PARAMETER `{knowledgePieces}` The knowledge JSON object\n\n## Knowledge\n\n<!-- TODO: [🍆] -FORMAT JSON -->\n\n```markdown\nYou are experienced data researcher, extract the important knowledge from the document.\n\n# Rules\n\n- Make pieces of information concise, clear, and easy to understand\n- One piece of information should be approximately 1 paragraph\n- Divide the paragraphs by markdown horizontal lines ---\n- Omit irrelevant information\n- Group redundant information\n- Write just extracted information, nothing else\n\n# The document\n\nTake information from this document:\n\n> {knowledgeContent}\n```\n\n`-> {knowledgePieces}`\n"}],sourceFile:"./books/prepare-knowledge-from-markdown.book"},{title:"Prepare Keywords",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-keywords.book",formfactorName:"GENERIC",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"keywords",description:"Keywords separated by comma",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, detect the important keywords in the document.\n\n# Rules\n\n- Write just keywords separated by comma\n\n# The document\n\nTake information from this document:\n\n> {knowledgePieceContent}",resultingParameterName:"keywords",dependentParameterNames:["knowledgePieceContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Keywords\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-keywords.book`\n- INPUT PARAMETER `{knowledgePieceContent}` The content\n- OUTPUT PARAMETER `{keywords}` Keywords separated by comma\n\n## Knowledge\n\n<!-- TODO: [🍆] -FORMAT JSON -->\n\n```markdown\nYou are experienced data researcher, detect the important keywords in the document.\n\n# Rules\n\n- Write just keywords separated by comma\n\n# The document\n\nTake information from this document:\n\n> {knowledgePieceContent}\n```\n\n`-> {keywords}`\n"}],sourceFile:"./books/prepare-knowledge-keywords.book"},{title:"Prepare Knowledge-piece Title",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-title.book",formfactorName:"GENERIC",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"title",description:"The title of the document",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced content creator, write best title for the document.\n\n# Rules\n\n- Write just title, nothing else\n- Write maximum 5 words for the title\n\n# The document\n\n> {knowledgePieceContent}",resultingParameterName:"title",expectations:{words:{min:1,max:8}},dependentParameterNames:["knowledgePieceContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Knowledge-piece Title\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-title.book`\n- INPUT PARAMETER `{knowledgePieceContent}` The content\n- OUTPUT PARAMETER `{title}` The title of the document\n\n## Knowledge\n\n- EXPECT MIN 1 WORD\n- EXPECT MAX 8 WORDS\n\n```markdown\nYou are experienced content creator, write best title for the document.\n\n# Rules\n\n- Write just title, nothing else\n- Write maximum 5 words for the title\n\n# The document\n\n> {knowledgePieceContent}\n```\n\n`-> {title}`\n"}],sourceFile:"./books/prepare-knowledge-title.book"},{title:"Prepare Persona",pipelineUrl:"https://promptbook.studio/promptbook/prepare-persona.book",formfactorName:"GENERIC",parameters:[{name:"availableModels",description:"List of available model names together with their descriptions as JSON",isInput:true,isOutput:false},{name:"personaDescription",description:"Description of the persona",isInput:true,isOutput:false},{name:"modelsRequirements",description:"Specific requirements for the model",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"make-model-requirements",title:"Make modelRequirements",content:"You are an experienced AI engineer, you need to find the best models for virtual assistants:\n\n## Example\n\n```json\n[\n {\n \"modelName\": \"gpt-4o\",\n \"systemMessage\": \"You are experienced AI engineer and helpfull assistant.\",\n \"temperature\": 0.7\n },\n {\n \"modelName\": \"claude-3-5-sonnet\",\n \"systemMessage\": \"You are a friendly and knowledgeable chatbot.\",\n \"temperature\": 0.5\n }\n]\n```\n\n## Instructions\n\n- Your output format is JSON array\n- Sort best-fitting models first\n- Omit any models that are not suitable\n- Write just the JSON, no other text should be present\n- Array contain items with following keys:\n - `modelName`: The name of the model to use\n - `systemMessage`: The system message to provide context to the model\n - `temperature`: The sampling temperature to use\n\n### Key `modelName`\n\nHere are the available models:\n\n```json\n{availableModels}\n```\n\n### Key `systemMessage`\n\nThe system message is used to communicate instructions or provide context to the model at the beginning of a conversation. It is displayed in a different format compared to user messages, helping the model understand its role in the conversation. The system message typically guides the model's behavior, sets the tone, or specifies desired output from the model. By utilizing the system message effectively, users can steer the model towards generating more accurate and relevant responses.\n\nFor example:\n\n> You are an experienced AI engineer and helpful assistant.\n\n> You are a friendly and knowledgeable chatbot.\n\n### Key `temperature`\n\nThe sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.\n\nYou can pick a value between 0 and 2. For example:\n\n- `0.1`: Low temperature, extremely conservative and deterministic\n- `0.5`: Medium temperature, balanced between conservative and creative\n- `1.0`: High temperature, creative and bit random\n- `1.5`: Very high temperature, extremely creative and often chaotic and unpredictable\n- `2.0`: Maximum temperature, completely random and unpredictable, for some extreme creative use cases\n\n# The assistant\n\nTake this description of the persona:\n\n> {personaDescription}",resultingParameterName:"modelsRequirements",format:"JSON",dependentParameterNames:["availableModels","personaDescription"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Persona\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-persona.book`\n- INPUT PARAMETER `{availableModels}` List of available model names together with their descriptions as JSON\n- INPUT PARAMETER `{personaDescription}` Description of the persona\n- OUTPUT PARAMETER `{modelsRequirements}` Specific requirements for the model\n\n## Make modelRequirements\n\n- FORMAT JSON\n\n```markdown\nYou are an experienced AI engineer, you need to find the best models for virtual assistants:\n\n## Example\n\n\\`\\`\\`json\n[\n {\n \"modelName\": \"gpt-4o\",\n \"systemMessage\": \"You are experienced AI engineer and helpfull assistant.\",\n \"temperature\": 0.7\n },\n {\n \"modelName\": \"claude-3-5-sonnet\",\n \"systemMessage\": \"You are a friendly and knowledgeable chatbot.\",\n \"temperature\": 0.5\n }\n]\n\\`\\`\\`\n\n## Instructions\n\n- Your output format is JSON array\n- Sort best-fitting models first\n- Omit any models that are not suitable\n- Write just the JSON, no other text should be present\n- Array contain items with following keys:\n - `modelName`: The name of the model to use\n - `systemMessage`: The system message to provide context to the model\n - `temperature`: The sampling temperature to use\n\n### Key `modelName`\n\nHere are the available models:\n\n\\`\\`\\`json\n{availableModels}\n\\`\\`\\`\n\n### Key `systemMessage`\n\nThe system message is used to communicate instructions or provide context to the model at the beginning of a conversation. It is displayed in a different format compared to user messages, helping the model understand its role in the conversation. The system message typically guides the model's behavior, sets the tone, or specifies desired output from the model. By utilizing the system message effectively, users can steer the model towards generating more accurate and relevant responses.\n\nFor example:\n\n> You are an experienced AI engineer and helpful assistant.\n\n> You are a friendly and knowledgeable chatbot.\n\n### Key `temperature`\n\nThe sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.\n\nYou can pick a value between 0 and 2. For example:\n\n- `0.1`: Low temperature, extremely conservative and deterministic\n- `0.5`: Medium temperature, balanced between conservative and creative\n- `1.0`: High temperature, creative and bit random\n- `1.5`: Very high temperature, extremely creative and often chaotic and unpredictable\n- `2.0`: Maximum temperature, completely random and unpredictable, for some extreme creative use cases\n\n# The assistant\n\nTake this description of the persona:\n\n> {personaDescription}\n```\n\n`-> {modelsRequirements}`\n"}],sourceFile:"./books/prepare-persona.book"},{title:"Prepare Title",pipelineUrl:"https://promptbook.studio/promptbook/prepare-title.book",formfactorName:"GENERIC",parameters:[{name:"book",description:"The book to prepare the title for",isInput:true,isOutput:false},{name:"title",description:"Best title for the book",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"make-title",title:"Make title",content:"Make best title for given text which describes the workflow:\n\n## Rules\n\n- Write just title, nothing else\n- Title should be concise and clear - Write maximum ideally 2 words, maximum 5 words\n- Title starts with emoticon\n- Title should not mention the input and output of the workflow but the main purpose of the workflow\n _For example, not \"✍ Convert Knowledge-piece to title\" but \"✍ Title\"_\n\n## The workflow\n\n> {book}",resultingParameterName:"title",expectations:{words:{min:1,max:8},lines:{min:1,max:1}},dependentParameterNames:["book"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Title\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-title.book`\n- INPUT PARAMETER `{book}` The book to prepare the title for\n- OUTPUT PARAMETER `{title}` Best title for the book\n\n## Make title\n\n- EXPECT MIN 1 Word\n- EXPECT MAX 8 Words\n- EXPECT EXACTLY 1 Line\n\n```markdown\nMake best title for given text which describes the workflow:\n\n## Rules\n\n- Write just title, nothing else\n- Title should be concise and clear - Write maximum ideally 2 words, maximum 5 words\n- Title starts with emoticon\n- Title should not mention the input and output of the workflow but the main purpose of the workflow\n _For example, not \"✍ Convert Knowledge-piece to title\" but \"✍ Title\"_\n\n## The workflow\n\n> {book}\n```\n\n`-> {title}`\n"}],sourceFile:"./books/prepare-title.book"}];
851
866
 
852
867
  /**
853
868
  * Checks if value is valid email
@@ -932,7 +947,7 @@ function assertsError(whatWasThrown) {
932
947
  * Function isValidJsonString will tell you if the string is valid JSON or not
933
948
  *
934
949
  * @param value The string to check
935
- * @returns True if the string is a valid JSON string, false otherwise
950
+ * @returns `true` if the string is a valid JSON string, false otherwise
936
951
  *
937
952
  * @public exported from `@promptbook/utils`
938
953
  */
@@ -1343,8 +1358,12 @@ function checkSerializableAsJson(options) {
1343
1358
  */
1344
1359
 
1345
1360
  /**
1346
- * @@@
1361
+ * Creates a deep clone of the given object
1362
+ *
1363
+ * Note: This method only works for objects that are fully serializable to JSON and do not contain functions, Dates, or special types.
1347
1364
  *
1365
+ * @param objectValue The object to clone.
1366
+ * @returns A deep, writable clone of the input object.
1348
1367
  * @public exported from `@promptbook/utils`
1349
1368
  */
1350
1369
  function deepClone(objectValue) {
@@ -1426,13 +1445,13 @@ const ORDER_OF_PIPELINE_JSON = [
1426
1445
  */
1427
1446
  const REPLACING_NONCE = 'ptbkauk42kV2dzao34faw7FudQUHYPtW';
1428
1447
  /**
1429
- * @@@
1448
+ * Placeholder value indicating a parameter is missing its value.
1430
1449
  *
1431
1450
  * @private within the repository
1432
1451
  */
1433
1452
  const RESERVED_PARAMETER_MISSING_VALUE = 'MISSING-' + REPLACING_NONCE;
1434
1453
  /**
1435
- * @@@
1454
+ * Placeholder value indicating a parameter is restricted and cannot be used directly.
1436
1455
  *
1437
1456
  * @private within the repository
1438
1457
  */
@@ -1890,7 +1909,7 @@ function extractParameterNames(template) {
1890
1909
  */
1891
1910
  function unpreparePipeline(pipeline) {
1892
1911
  let { personas, knowledgeSources, tasks } = pipeline;
1893
- personas = personas.map((persona) => ({ ...persona, modelRequirements: undefined, preparationIds: undefined }));
1912
+ personas = personas.map((persona) => ({ ...persona, modelsRequirements: undefined, preparationIds: undefined }));
1894
1913
  knowledgeSources = knowledgeSources.map((knowledgeSource) => ({ ...knowledgeSource, preparationIds: undefined }));
1895
1914
  tasks = tasks.map((task) => {
1896
1915
  let { dependentParameterNames } = task;
@@ -1931,7 +1950,7 @@ class SimplePipelineCollection {
1931
1950
  /**
1932
1951
  * Constructs a pipeline collection from pipelines
1933
1952
  *
1934
- * @param pipelines @@@
1953
+ * @param pipelines Array of pipeline JSON objects to include in the collection
1935
1954
  *
1936
1955
  * Note: During the construction logic of all pipelines are validated
1937
1956
  * Note: It is not recommended to use this constructor directly, use `createCollectionFromJson` *(or other variant)* instead
@@ -2095,15 +2114,21 @@ class PipelineExecutionError extends Error {
2095
2114
  * @public exported from `@promptbook/core`
2096
2115
  */
2097
2116
  function isPipelinePrepared(pipeline) {
2098
- // Note: Ignoring `pipeline.preparations` @@@
2099
- // Note: Ignoring `pipeline.knowledgePieces` @@@
2117
+ // Note: Ignoring `pipeline.preparations`
2118
+ // Note: Ignoring `pipeline.knowledgePieces`
2100
2119
  if (pipeline.title === undefined || pipeline.title === '' || pipeline.title === DEFAULT_BOOK_TITLE) {
2120
+ // TODO: !!! Comment this out
2121
+ console.log('Pipeline is not prepared because title is undefined or empty', pipeline);
2101
2122
  return false;
2102
2123
  }
2103
- if (!pipeline.personas.every((persona) => persona.modelRequirements !== undefined)) {
2124
+ if (!pipeline.personas.every((persona) => persona.modelsRequirements !== undefined)) {
2125
+ // TODO: !!! Comment this out
2126
+ console.log('Pipeline is not prepared because personas are not prepared', pipeline.personas);
2104
2127
  return false;
2105
2128
  }
2106
2129
  if (!pipeline.knowledgeSources.every((knowledgeSource) => knowledgeSource.preparationIds !== undefined)) {
2130
+ // TODO: !!! Comment this out
2131
+ console.log('Pipeline is not prepared because knowledge sources are not prepared', pipeline.knowledgeSources);
2107
2132
  return false;
2108
2133
  }
2109
2134
  /*
@@ -2124,36 +2149,6 @@ function isPipelinePrepared(pipeline) {
2124
2149
  * - [♨] Are tasks prepared
2125
2150
  */
2126
2151
 
2127
- /**
2128
- * Recursively converts JSON strings to JSON objects
2129
-
2130
- * @public exported from `@promptbook/utils`
2131
- */
2132
- function jsonStringsToJsons(object) {
2133
- if (object === null) {
2134
- return object;
2135
- }
2136
- if (Array.isArray(object)) {
2137
- return object.map(jsonStringsToJsons);
2138
- }
2139
- if (typeof object !== 'object') {
2140
- return object;
2141
- }
2142
- const newObject = { ...object };
2143
- for (const [key, value] of Object.entries(object)) {
2144
- if (typeof value === 'string' && isValidJsonString(value)) {
2145
- newObject[key] = JSON.parse(value);
2146
- }
2147
- else {
2148
- newObject[key] = jsonStringsToJsons(value);
2149
- }
2150
- }
2151
- return newObject;
2152
- }
2153
- /**
2154
- * TODO: Type the return type correctly
2155
- */
2156
-
2157
2152
  /**
2158
2153
  * This error indicates problems parsing the format value
2159
2154
  *
@@ -2337,6 +2332,101 @@ const ALL_ERRORS = {
2337
2332
  * Note: [💞] Ignore a discrepancy between file name and entity name
2338
2333
  */
2339
2334
 
2335
+ /**
2336
+ * Serializes an error into a [🚉] JSON-serializable object
2337
+ *
2338
+ * @public exported from `@promptbook/utils`
2339
+ */
2340
+ function serializeError(error) {
2341
+ const { name, message, stack } = error;
2342
+ const { id } = error;
2343
+ if (!Object.keys(ALL_ERRORS).includes(name)) {
2344
+ console.error(spaceTrim((block) => `
2345
+
2346
+ Cannot serialize error with name "${name}"
2347
+
2348
+ Authors of Promptbook probably forgot to add this error into the list of errors:
2349
+ https://github.com/webgptorg/promptbook/blob/main/src/errors/0-index.ts
2350
+
2351
+
2352
+ ${block(stack || message)}
2353
+
2354
+ `));
2355
+ }
2356
+ return {
2357
+ name: name,
2358
+ message,
2359
+ stack,
2360
+ id, // Include id in the serialized object
2361
+ };
2362
+ }
2363
+
2364
+ /**
2365
+ * Converts a JavaScript Object Notation (JSON) string into an object.
2366
+ *
2367
+ * Note: This is wrapper around `JSON.parse()` with better error and type handling
2368
+ *
2369
+ * @public exported from `@promptbook/utils`
2370
+ */
2371
+ function jsonParse(value) {
2372
+ if (value === undefined) {
2373
+ throw new Error(`Can not parse JSON from undefined value.`);
2374
+ }
2375
+ else if (typeof value !== 'string') {
2376
+ console.error('Can not parse JSON from non-string value.', { text: value });
2377
+ throw new Error(spaceTrim(`
2378
+ Can not parse JSON from non-string value.
2379
+
2380
+ The value type: ${typeof value}
2381
+ See more in console.
2382
+ `));
2383
+ }
2384
+ try {
2385
+ return JSON.parse(value);
2386
+ }
2387
+ catch (error) {
2388
+ if (!(error instanceof Error)) {
2389
+ throw error;
2390
+ }
2391
+ throw new Error(spaceTrim((block) => `
2392
+ ${block(error.message)}
2393
+
2394
+ The JSON text:
2395
+ ${block(value)}
2396
+ `));
2397
+ }
2398
+ }
2399
+
2400
+ /**
2401
+ * Recursively converts JSON strings to JSON objects
2402
+
2403
+ * @public exported from `@promptbook/utils`
2404
+ */
2405
+ function jsonStringsToJsons(object) {
2406
+ if (object === null) {
2407
+ return object;
2408
+ }
2409
+ if (Array.isArray(object)) {
2410
+ return object.map(jsonStringsToJsons);
2411
+ }
2412
+ if (typeof object !== 'object') {
2413
+ return object;
2414
+ }
2415
+ const newObject = { ...object };
2416
+ for (const [key, value] of Object.entries(object)) {
2417
+ if (typeof value === 'string' && isValidJsonString(value)) {
2418
+ newObject[key] = jsonParse(value);
2419
+ }
2420
+ else {
2421
+ newObject[key] = jsonStringsToJsons(value);
2422
+ }
2423
+ }
2424
+ return newObject;
2425
+ }
2426
+ /**
2427
+ * TODO: Type the return type correctly
2428
+ */
2429
+
2340
2430
  /**
2341
2431
  * Deserializes the error object
2342
2432
  *
@@ -2502,64 +2592,6 @@ function createTask(options) {
2502
2592
  * TODO: [🐚] Split into more files and make `PrepareTask` & `RemoteTask` + split the function
2503
2593
  */
2504
2594
 
2505
- /**
2506
- * Serializes an error into a [🚉] JSON-serializable object
2507
- *
2508
- * @public exported from `@promptbook/utils`
2509
- */
2510
- function serializeError(error) {
2511
- const { name, message, stack } = error;
2512
- const { id } = error;
2513
- if (!Object.keys(ALL_ERRORS).includes(name)) {
2514
- console.error(spaceTrim((block) => `
2515
-
2516
- Cannot serialize error with name "${name}"
2517
-
2518
- Authors of Promptbook probably forgot to add this error into the list of errors:
2519
- https://github.com/webgptorg/promptbook/blob/main/src/errors/0-index.ts
2520
-
2521
-
2522
- ${block(stack || message)}
2523
-
2524
- `));
2525
- }
2526
- return {
2527
- name: name,
2528
- message,
2529
- stack,
2530
- id, // Include id in the serialized object
2531
- };
2532
- }
2533
-
2534
- /**
2535
- * Async version of Array.forEach
2536
- *
2537
- * @param array - Array to iterate over
2538
- * @param options - Options for the function
2539
- * @param callbackfunction - Function to call for each item
2540
- * @public exported from `@promptbook/utils`
2541
- * @deprecated [🪂] Use queues instead
2542
- */
2543
- async function forEachAsync(array, options, callbackfunction) {
2544
- const { maxParallelCount = Infinity } = options;
2545
- let index = 0;
2546
- let runningTasks = [];
2547
- const tasks = [];
2548
- for (const item of array) {
2549
- const currentIndex = index++;
2550
- const task = callbackfunction(item, currentIndex, array);
2551
- tasks.push(task);
2552
- runningTasks.push(task);
2553
- /* not await */ Promise.resolve(task).then(() => {
2554
- runningTasks = runningTasks.filter((t) => t !== task);
2555
- });
2556
- if (maxParallelCount < runningTasks.length) {
2557
- await Promise.race(runningTasks);
2558
- }
2559
- }
2560
- await Promise.all(tasks);
2561
- }
2562
-
2563
2595
  /**
2564
2596
  * Represents the uncertain value
2565
2597
  *
@@ -2603,7 +2635,7 @@ const ZERO_USAGE = $deepFreeze({
2603
2635
  *
2604
2636
  * @public exported from `@promptbook/core`
2605
2637
  */
2606
- $deepFreeze({
2638
+ const UNCERTAIN_USAGE = $deepFreeze({
2607
2639
  price: UNCERTAIN_ZERO_VALUE,
2608
2640
  input: {
2609
2641
  tokensCount: UNCERTAIN_ZERO_VALUE,
@@ -2628,6 +2660,35 @@ $deepFreeze({
2628
2660
  * Note: [💞] Ignore a discrepancy between file name and entity name
2629
2661
  */
2630
2662
 
2663
+ /**
2664
+ * Async version of Array.forEach
2665
+ *
2666
+ * @param array - Array to iterate over
2667
+ * @param options - Options for the function
2668
+ * @param callbackfunction - Function to call for each item
2669
+ * @public exported from `@promptbook/utils`
2670
+ * @deprecated [🪂] Use queues instead
2671
+ */
2672
+ async function forEachAsync(array, options, callbackfunction) {
2673
+ const { maxParallelCount = Infinity } = options;
2674
+ let index = 0;
2675
+ let runningTasks = [];
2676
+ const tasks = [];
2677
+ for (const item of array) {
2678
+ const currentIndex = index++;
2679
+ const task = callbackfunction(item, currentIndex, array);
2680
+ tasks.push(task);
2681
+ runningTasks.push(task);
2682
+ /* not await */ Promise.resolve(task).then(() => {
2683
+ runningTasks = runningTasks.filter((t) => t !== task);
2684
+ });
2685
+ if (maxParallelCount < runningTasks.length) {
2686
+ await Promise.race(runningTasks);
2687
+ }
2688
+ }
2689
+ await Promise.all(tasks);
2690
+ }
2691
+
2631
2692
  /**
2632
2693
  * Function `addUsage` will add multiple usages into one
2633
2694
  *
@@ -2974,27 +3035,48 @@ async function preparePersona(personaDescription, tools, options) {
2974
3035
  pipeline: await collection.getPipelineByUrl('https://promptbook.studio/promptbook/prepare-persona.book'),
2975
3036
  tools,
2976
3037
  });
2977
- // TODO: [🚐] Make arrayable LLMs -> single LLM DRY
2978
3038
  const _llms = arrayableToArray(tools.llm);
2979
3039
  const llmTools = _llms.length === 1 ? _llms[0] : joinLlmExecutionTools(..._llms);
2980
- const availableModels = await llmTools.listModels();
2981
- const availableModelNames = availableModels
3040
+ const availableModels = (await llmTools.listModels())
2982
3041
  .filter(({ modelVariant }) => modelVariant === 'CHAT')
2983
- .map(({ modelName }) => modelName)
2984
- .join(',');
2985
- const result = await preparePersonaExecutor({ availableModelNames, personaDescription }).asPromise();
3042
+ .map(({ modelName, modelDescription }) => ({
3043
+ modelName,
3044
+ modelDescription,
3045
+ // <- Note: `modelTitle` and `modelVariant` is not relevant for this task
3046
+ }));
3047
+ const result = await preparePersonaExecutor({
3048
+ availableModels /* <- Note: Passing as JSON */,
3049
+ personaDescription,
3050
+ }).asPromise();
2986
3051
  const { outputParameters } = result;
2987
- const { modelRequirements: modelRequirementsRaw } = outputParameters;
2988
- const modelRequirements = JSON.parse(modelRequirementsRaw);
3052
+ const { modelsRequirements: modelsRequirementsJson } = outputParameters;
3053
+ let modelsRequirementsUnchecked = jsonParse(modelsRequirementsJson);
2989
3054
  if (isVerbose) {
2990
- console.info(`PERSONA ${personaDescription}`, modelRequirements);
3055
+ console.info(`PERSONA ${personaDescription}`, modelsRequirementsUnchecked);
2991
3056
  }
2992
- const { modelName, systemMessage, temperature } = modelRequirements;
2993
- return {
3057
+ if (!Array.isArray(modelsRequirementsUnchecked)) {
3058
+ // <- TODO: Book should have syntax and system to enforce shape of JSON
3059
+ modelsRequirementsUnchecked = [modelsRequirementsUnchecked];
3060
+ /*
3061
+ throw new UnexpectedError(
3062
+ spaceTrim(
3063
+ (block) => `
3064
+ Invalid \`modelsRequirements\`:
3065
+
3066
+ \`\`\`json
3067
+ ${block(JSON.stringify(modelsRequirementsUnchecked, null, 4))}
3068
+ \`\`\`
3069
+ `,
3070
+ ),
3071
+ );
3072
+ */
3073
+ }
3074
+ const modelsRequirements = modelsRequirementsUnchecked.map((modelRequirements) => ({
2994
3075
  modelVariant: 'CHAT',
2995
- modelName,
2996
- systemMessage,
2997
- temperature,
3076
+ ...modelRequirements,
3077
+ }));
3078
+ return {
3079
+ modelsRequirements,
2998
3080
  };
2999
3081
  }
3000
3082
  /**
@@ -3005,7 +3087,8 @@ async function preparePersona(personaDescription, tools, options) {
3005
3087
  */
3006
3088
 
3007
3089
  /**
3008
- * @@@
3090
+ * Safely retrieves the global scope object (window in browser, global in Node.js)
3091
+ * regardless of the JavaScript environment in which the code is running
3009
3092
  *
3010
3093
  * Note: `$` is used to indicate that this function is not a pure function - it access global scope
3011
3094
  *
@@ -3016,10 +3099,10 @@ function $getGlobalScope() {
3016
3099
  }
3017
3100
 
3018
3101
  /**
3019
- * @@@
3102
+ * Normalizes a text string to SCREAMING_CASE (all uppercase with underscores).
3020
3103
  *
3021
- * @param text @@@
3022
- * @returns @@@
3104
+ * @param text The text string to be converted to SCREAMING_CASE format.
3105
+ * @returns The normalized text in SCREAMING_CASE format.
3023
3106
  * @example 'HELLO_WORLD'
3024
3107
  * @example 'I_LOVE_PROMPTBOOK'
3025
3108
  * @public exported from `@promptbook/utils`
@@ -3071,10 +3154,10 @@ function normalizeTo_SCREAMING_CASE(text) {
3071
3154
  */
3072
3155
 
3073
3156
  /**
3074
- * @@@
3157
+ * Normalizes a text string to snake_case format.
3075
3158
  *
3076
- * @param text @@@
3077
- * @returns @@@
3159
+ * @param text The text string to be converted to snake_case format.
3160
+ * @returns The normalized text in snake_case format.
3078
3161
  * @example 'hello_world'
3079
3162
  * @example 'i_love_promptbook'
3080
3163
  * @public exported from `@promptbook/utils`
@@ -3084,11 +3167,11 @@ function normalizeTo_snake_case(text) {
3084
3167
  }
3085
3168
 
3086
3169
  /**
3087
- * Register is @@@
3170
+ * Global registry for storing and managing registered entities of a given type.
3088
3171
  *
3089
3172
  * Note: `$` is used to indicate that this function is not a pure function - it accesses and adds variables in global scope.
3090
3173
  *
3091
- * @private internal utility, exported are only signleton instances of this class
3174
+ * @private internal utility, exported are only singleton instances of this class
3092
3175
  */
3093
3176
  class $Register {
3094
3177
  constructor(registerName) {
@@ -3132,10 +3215,10 @@ class $Register {
3132
3215
  }
3133
3216
 
3134
3217
  /**
3135
- * @@@
3218
+ * Global registry for storing metadata about all available scrapers and converters.
3136
3219
  *
3137
- * Note: `$` is used to indicate that this interacts with the global scope
3138
- * @singleton Only one instance of each register is created per build, but thare can be more @@@
3220
+ * Note: `$` is used to indicate that this interacts with the global scope.
3221
+ * @singleton Only one instance of each register is created per build, but there can be more in different contexts (e.g., tests).
3139
3222
  * @public exported from `@promptbook/core`
3140
3223
  */
3141
3224
  const $scrapersMetadataRegister = new $Register('scrapers_metadata');
@@ -3144,10 +3227,11 @@ const $scrapersMetadataRegister = new $Register('scrapers_metadata');
3144
3227
  */
3145
3228
 
3146
3229
  /**
3147
- * @@@
3230
+ * Registry for all available scrapers in the system.
3231
+ * Central point for registering and accessing different types of content scrapers.
3148
3232
  *
3149
3233
  * Note: `$` is used to indicate that this interacts with the global scope
3150
- * @singleton Only one instance of each register is created per build, but thare can be more @@@
3234
+ * @singleton Only one instance of each register is created per build, but there can be more than one in different build modules
3151
3235
  * @public exported from `@promptbook/core`
3152
3236
  */
3153
3237
  const $scrapersRegister = new $Register('scraper_constructors');
@@ -3325,7 +3409,9 @@ const promptbookFetch = async (urlOrRequest, init) => {
3325
3409
  */
3326
3410
 
3327
3411
  /**
3328
- * @@@
3412
+ * Factory function that creates a handler for processing knowledge sources.
3413
+ * Provides standardized processing of different types of knowledge sources
3414
+ * across various scraper implementations.
3329
3415
  *
3330
3416
  * @public exported from `@promptbook/core`
3331
3417
  */
@@ -3432,7 +3518,7 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3432
3518
  > },
3433
3519
  */
3434
3520
  async asJson() {
3435
- return JSON.parse(await tools.fs.readFile(filename, 'utf-8'));
3521
+ return jsonParse(await tools.fs.readFile(filename, 'utf-8'));
3436
3522
  },
3437
3523
  async asText() {
3438
3524
  return await tools.fs.readFile(filename, 'utf-8');
@@ -3566,9 +3652,12 @@ TODO: [🧊] This is how it can look in future
3566
3652
  */
3567
3653
 
3568
3654
  /**
3569
- * @@@
3655
+ * Prepares tasks by adding knowledge to the prompt and ensuring all necessary parameters are included.
3570
3656
  *
3571
- * @public exported from `@promptbook/core`
3657
+ * @param tasks Sequence of tasks that are chained together to form a pipeline
3658
+ * @returns A promise that resolves to the prepared tasks.
3659
+ *
3660
+ * @private internal utility of `preparePipeline`
3572
3661
  */
3573
3662
  async function prepareTasks(pipeline, tools, options) {
3574
3663
  const { maxParallelCount = DEFAULT_MAX_PARALLEL_COUNT } = options;
@@ -3690,14 +3779,14 @@ async function preparePipeline(pipeline, tools, options) {
3690
3779
  // TODO: [🖌][🧠] Implement some `mapAsync` function
3691
3780
  const preparedPersonas = new Array(personas.length);
3692
3781
  await forEachAsync(personas, { maxParallelCount /* <- TODO: [🪂] When there are subtasks, this maximul limit can be broken */ }, async (persona, index) => {
3693
- const modelRequirements = await preparePersona(persona.description, { ...tools, llm: llmToolsWithUsage }, {
3782
+ const { modelsRequirements } = await preparePersona(persona.description, { ...tools, llm: llmToolsWithUsage }, {
3694
3783
  rootDirname,
3695
3784
  maxParallelCount /* <- TODO: [🪂] */,
3696
3785
  isVerbose,
3697
3786
  });
3698
3787
  const preparedPersona = {
3699
3788
  ...persona,
3700
- modelRequirements,
3789
+ modelsRequirements,
3701
3790
  preparationIds: [/* TODO: [🧊] -> */ currentPreparation.id],
3702
3791
  // <- TODO: [🍙] Make some standard order of json properties
3703
3792
  };
@@ -4005,7 +4094,7 @@ function union(...sets) {
4005
4094
  }
4006
4095
 
4007
4096
  /**
4008
- * @@@
4097
+ * Contains configuration options for parsing and generating CSV files, such as delimiters and quoting rules.
4009
4098
  *
4010
4099
  * @public exported from `@promptbook/core`
4011
4100
  */
@@ -4014,11 +4103,29 @@ const MANDATORY_CSV_SETTINGS = Object.freeze({
4014
4103
  // encoding: 'utf-8',
4015
4104
  });
4016
4105
 
4106
+ /**
4107
+ * Converts a CSV string into an object
4108
+ *
4109
+ * Note: This is wrapper around `papaparse.parse()` with better autohealing
4110
+ *
4111
+ * @private - for now until `@promptbook/csv` is released
4112
+ */
4113
+ function csvParse(value /* <- TODO: string_csv */, settings, schema /* <- TODO: Make CSV Schemas */) {
4114
+ settings = { ...settings, ...MANDATORY_CSV_SETTINGS };
4115
+ // Note: Autoheal invalid '\n' characters
4116
+ if (settings.newline && !settings.newline.includes('\r') && value.includes('\r')) {
4117
+ console.warn('CSV string contains carriage return characters, but in the CSV settings the `newline` setting does not include them. Autohealing the CSV string.');
4118
+ value = value.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
4119
+ }
4120
+ const csv = parse(value, settings);
4121
+ return csv;
4122
+ }
4123
+
4017
4124
  /**
4018
4125
  * Function to check if a string is valid CSV
4019
4126
  *
4020
4127
  * @param value The string to check
4021
- * @returns True if the string is a valid CSV string, false otherwise
4128
+ * @returns `true` if the string is a valid CSV string, false otherwise
4022
4129
  *
4023
4130
  * @public exported from `@promptbook/utils`
4024
4131
  */
@@ -4042,7 +4149,7 @@ function isValidCsvString(value) {
4042
4149
  * @public exported from `@promptbook/core`
4043
4150
  * <- TODO: [🏢] Export from package `@promptbook/csv`
4044
4151
  */
4045
- const CsvFormatDefinition = {
4152
+ const CsvFormatParser = {
4046
4153
  formatName: 'CSV',
4047
4154
  aliases: ['SPREADSHEET', 'TABLE'],
4048
4155
  isValid(value, settings, schema) {
@@ -4054,12 +4161,12 @@ const CsvFormatDefinition = {
4054
4161
  heal(value, settings, schema) {
4055
4162
  throw new Error('Not implemented');
4056
4163
  },
4057
- subvalueDefinitions: [
4164
+ subvalueParsers: [
4058
4165
  {
4059
4166
  subvalueName: 'ROW',
4060
- async mapValues(value, outputParameterName, settings, mapCallback) {
4061
- // TODO: [👨🏾‍🤝‍👨🏼] DRY csv parsing
4062
- const csv = parse(value, { ...settings, ...MANDATORY_CSV_SETTINGS });
4167
+ async mapValues(options) {
4168
+ const { value, outputParameterName, settings, mapCallback, onProgress } = options;
4169
+ const csv = csvParse(value, settings);
4063
4170
  if (csv.errors.length !== 0) {
4064
4171
  throw new CsvFormatError(spaceTrim((block) => `
4065
4172
  CSV parsing error
@@ -4074,23 +4181,37 @@ const CsvFormatDefinition = {
4074
4181
  ${block(value)}
4075
4182
  `));
4076
4183
  }
4077
- const mappedData = await Promise.all(csv.data.map(async (row, index) => {
4184
+ const mappedData = [];
4185
+ const length = csv.data.length;
4186
+ for (let index = 0; index < length; index++) {
4187
+ const row = csv.data[index];
4078
4188
  if (row[outputParameterName]) {
4079
4189
  throw new CsvFormatError(`Can not overwrite existing column "${outputParameterName}" in CSV row`);
4080
4190
  }
4081
- return {
4191
+ const mappedRow = {
4082
4192
  ...row,
4083
- [outputParameterName]: await mapCallback(row, index),
4193
+ [outputParameterName]: await mapCallback(row, index, length),
4084
4194
  };
4085
- }));
4195
+ mappedData.push(mappedRow);
4196
+ if (onProgress) {
4197
+ // Note: Report the CSV with all rows mapped so far
4198
+ /*
4199
+ // TODO: [🛕] Report progress with all the rows including the pending ones
4200
+ const progressData = mappedData.map((row, i) =>
4201
+ i > index ? { ...row, [outputParameterName]: PENDING_VALUE_PLACEHOLDER } : row,
4202
+ );
4203
+ */
4204
+ await onProgress(unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS }));
4205
+ }
4206
+ }
4086
4207
  return unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS });
4087
4208
  },
4088
4209
  },
4089
4210
  {
4090
4211
  subvalueName: 'CELL',
4091
- async mapValues(value, outputParameterName, settings, mapCallback) {
4092
- // TODO: [👨🏾‍🤝‍👨🏼] DRY csv parsing
4093
- const csv = parse(value, { ...settings, ...MANDATORY_CSV_SETTINGS });
4212
+ async mapValues(options) {
4213
+ const { value, settings, mapCallback, onProgress } = options;
4214
+ const csv = csvParse(value, settings);
4094
4215
  if (csv.errors.length !== 0) {
4095
4216
  throw new CsvFormatError(spaceTrim((block) => `
4096
4217
  CSV parsing error
@@ -4106,9 +4227,9 @@ const CsvFormatDefinition = {
4106
4227
  `));
4107
4228
  }
4108
4229
  const mappedData = await Promise.all(csv.data.map(async (row, rowIndex) => {
4109
- return /* not await */ Promise.all(Object.entries(row).map(async ([key, value], columnIndex) => {
4230
+ return /* not await */ Promise.all(Object.entries(row).map(async ([key, value], columnIndex, array) => {
4110
4231
  const index = rowIndex * Object.keys(row).length + columnIndex;
4111
- return /* not await */ mapCallback({ [key]: value }, index);
4232
+ return /* not await */ mapCallback({ [key]: value }, index, array.length);
4112
4233
  }));
4113
4234
  }));
4114
4235
  return unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS });
@@ -4117,10 +4238,10 @@ const CsvFormatDefinition = {
4117
4238
  ],
4118
4239
  };
4119
4240
  /**
4120
- * TODO: [🍓] In `CsvFormatDefinition` implement simple `isValid`
4121
- * TODO: [🍓] In `CsvFormatDefinition` implement partial `canBeValid`
4122
- * TODO: [🍓] In `CsvFormatDefinition` implement `heal
4123
- * TODO: [🍓] In `CsvFormatDefinition` implement `subvalueDefinitions`
4241
+ * TODO: [🍓] In `CsvFormatParser` implement simple `isValid`
4242
+ * TODO: [🍓] In `CsvFormatParser` implement partial `canBeValid`
4243
+ * TODO: [🍓] In `CsvFormatParser` implement `heal
4244
+ * TODO: [🍓] In `CsvFormatParser` implement `subvalueParsers`
4124
4245
  * TODO: [🏢] Allow to expect something inside CSV objects and other formats
4125
4246
  */
4126
4247
 
@@ -4129,7 +4250,7 @@ const CsvFormatDefinition = {
4129
4250
  *
4130
4251
  * @private still in development [🏢]
4131
4252
  */
4132
- const JsonFormatDefinition = {
4253
+ const JsonFormatParser = {
4133
4254
  formatName: 'JSON',
4134
4255
  mimeType: 'application/json',
4135
4256
  isValid(value, settings, schema) {
@@ -4141,28 +4262,28 @@ const JsonFormatDefinition = {
4141
4262
  heal(value, settings, schema) {
4142
4263
  throw new Error('Not implemented');
4143
4264
  },
4144
- subvalueDefinitions: [],
4265
+ subvalueParsers: [],
4145
4266
  };
4146
4267
  /**
4147
4268
  * TODO: [🧠] Maybe propper instance of object
4148
4269
  * TODO: [0] Make string_serialized_json
4149
4270
  * TODO: [1] Make type for JSON Settings and Schema
4150
4271
  * TODO: [🧠] What to use for validating JSONs - JSON Schema, ZoD, typescript types/interfaces,...?
4151
- * TODO: [🍓] In `JsonFormatDefinition` implement simple `isValid`
4152
- * TODO: [🍓] In `JsonFormatDefinition` implement partial `canBeValid`
4153
- * TODO: [🍓] In `JsonFormatDefinition` implement `heal
4154
- * TODO: [🍓] In `JsonFormatDefinition` implement `subvalueDefinitions`
4272
+ * TODO: [🍓] In `JsonFormatParser` implement simple `isValid`
4273
+ * TODO: [🍓] In `JsonFormatParser` implement partial `canBeValid`
4274
+ * TODO: [🍓] In `JsonFormatParser` implement `heal
4275
+ * TODO: [🍓] In `JsonFormatParser` implement `subvalueParsers`
4155
4276
  * TODO: [🏢] Allow to expect something inside JSON objects and other formats
4156
4277
  */
4157
4278
 
4158
4279
  /**
4159
4280
  * Definition for any text - this will be always valid
4160
4281
  *
4161
- * Note: This is not useful for validation, but for splitting and mapping with `subvalueDefinitions`
4282
+ * Note: This is not useful for validation, but for splitting and mapping with `subvalueParsers`
4162
4283
  *
4163
4284
  * @public exported from `@promptbook/core`
4164
4285
  */
4165
- const TextFormatDefinition = {
4286
+ const TextFormatParser = {
4166
4287
  formatName: 'TEXT',
4167
4288
  isValid(value) {
4168
4289
  return typeof value === 'string';
@@ -4171,19 +4292,20 @@ const TextFormatDefinition = {
4171
4292
  return typeof partialValue === 'string';
4172
4293
  },
4173
4294
  heal() {
4174
- throw new UnexpectedError('It does not make sense to call `TextFormatDefinition.heal`');
4295
+ throw new UnexpectedError('It does not make sense to call `TextFormatParser.heal`');
4175
4296
  },
4176
- subvalueDefinitions: [
4297
+ subvalueParsers: [
4177
4298
  {
4178
4299
  subvalueName: 'LINE',
4179
- async mapValues(value, outputParameterName, settings, mapCallback) {
4300
+ async mapValues(options) {
4301
+ const { value, mapCallback, onProgress } = options;
4180
4302
  const lines = value.split('\n');
4181
- const mappedLines = await Promise.all(lines.map((lineContent, lineNumber) =>
4303
+ const mappedLines = await Promise.all(lines.map((lineContent, lineNumber, array) =>
4182
4304
  // TODO: [🧠] Maybe option to skip empty line
4183
4305
  /* not await */ mapCallback({
4184
4306
  lineContent,
4185
4307
  // TODO: [🧠] Maybe also put here `lineNumber`
4186
- }, lineNumber)));
4308
+ }, lineNumber, array.length)));
4187
4309
  return mappedLines.join('\n');
4188
4310
  },
4189
4311
  },
@@ -4193,10 +4315,10 @@ const TextFormatDefinition = {
4193
4315
  /**
4194
4316
  * TODO: [1] Make type for XML Text and Schema
4195
4317
  * TODO: [🧠][🤠] Here should be all words, characters, lines, paragraphs, pages available as subvalues
4196
- * TODO: [🍓] In `TextFormatDefinition` implement simple `isValid`
4197
- * TODO: [🍓] In `TextFormatDefinition` implement partial `canBeValid`
4198
- * TODO: [🍓] In `TextFormatDefinition` implement `heal
4199
- * TODO: [🍓] In `TextFormatDefinition` implement `subvalueDefinitions`
4318
+ * TODO: [🍓] In `TextFormatParser` implement simple `isValid`
4319
+ * TODO: [🍓] In `TextFormatParser` implement partial `canBeValid`
4320
+ * TODO: [🍓] In `TextFormatParser` implement `heal
4321
+ * TODO: [🍓] In `TextFormatParser` implement `subvalueParsers`
4200
4322
  * TODO: [🏢] Allow to expect something inside each item of list and other formats
4201
4323
  */
4202
4324
 
@@ -4204,7 +4326,7 @@ const TextFormatDefinition = {
4204
4326
  * Function to check if a string is valid XML
4205
4327
  *
4206
4328
  * @param value
4207
- * @returns True if the string is a valid XML string, false otherwise
4329
+ * @returns `true` if the string is a valid XML string, false otherwise
4208
4330
  *
4209
4331
  * @public exported from `@promptbook/utils`
4210
4332
  */
@@ -4229,7 +4351,7 @@ function isValidXmlString(value) {
4229
4351
  *
4230
4352
  * @private still in development [🏢]
4231
4353
  */
4232
- const XmlFormatDefinition = {
4354
+ const XmlFormatParser = {
4233
4355
  formatName: 'XML',
4234
4356
  mimeType: 'application/xml',
4235
4357
  isValid(value, settings, schema) {
@@ -4241,17 +4363,17 @@ const XmlFormatDefinition = {
4241
4363
  heal(value, settings, schema) {
4242
4364
  throw new Error('Not implemented');
4243
4365
  },
4244
- subvalueDefinitions: [],
4366
+ subvalueParsers: [],
4245
4367
  };
4246
4368
  /**
4247
4369
  * TODO: [🧠] Maybe propper instance of object
4248
4370
  * TODO: [0] Make string_serialized_xml
4249
4371
  * TODO: [1] Make type for XML Settings and Schema
4250
4372
  * TODO: [🧠] What to use for validating XMLs - XSD,...
4251
- * TODO: [🍓] In `XmlFormatDefinition` implement simple `isValid`
4252
- * TODO: [🍓] In `XmlFormatDefinition` implement partial `canBeValid`
4253
- * TODO: [🍓] In `XmlFormatDefinition` implement `heal
4254
- * TODO: [🍓] In `XmlFormatDefinition` implement `subvalueDefinitions`
4373
+ * TODO: [🍓] In `XmlFormatParser` implement simple `isValid`
4374
+ * TODO: [🍓] In `XmlFormatParser` implement partial `canBeValid`
4375
+ * TODO: [🍓] In `XmlFormatParser` implement `heal
4376
+ * TODO: [🍓] In `XmlFormatParser` implement `subvalueParsers`
4255
4377
  * TODO: [🏢] Allow to expect something inside XML and other formats
4256
4378
  */
4257
4379
 
@@ -4260,24 +4382,19 @@ const XmlFormatDefinition = {
4260
4382
  *
4261
4383
  * @private internal index of `...` <- TODO [🏢]
4262
4384
  */
4263
- const FORMAT_DEFINITIONS = [
4264
- JsonFormatDefinition,
4265
- XmlFormatDefinition,
4266
- TextFormatDefinition,
4267
- CsvFormatDefinition,
4268
- ];
4385
+ const FORMAT_DEFINITIONS = [JsonFormatParser, XmlFormatParser, TextFormatParser, CsvFormatParser];
4269
4386
  /**
4270
4387
  * Note: [💞] Ignore a discrepancy between file name and entity name
4271
4388
  */
4272
4389
 
4273
4390
  /**
4274
- * Maps available parameters to expected parameters
4391
+ * Maps available parameters to expected parameters for a pipeline task.
4275
4392
  *
4276
4393
  * The strategy is:
4277
- * 1) @@@
4278
- * 2) @@@
4394
+ * 1) First, match parameters by name where both available and expected.
4395
+ * 2) Then, if there are unmatched expected and available parameters, map them by order.
4279
4396
  *
4280
- * @throws {PipelineExecutionError} @@@
4397
+ * @throws {PipelineExecutionError} If the number of unmatched expected and available parameters does not match, or mapping is ambiguous.
4281
4398
  * @private within the repository used in `createPipelineExecutor`
4282
4399
  */
4283
4400
  function mapAvailableToExpectedParameters(options) {
@@ -4300,7 +4417,7 @@ function mapAvailableToExpectedParameters(options) {
4300
4417
  else if (!availableParametersNames.has(parameterName) && expectedParameterNames.has(parameterName)) ;
4301
4418
  }
4302
4419
  if (expectedParameterNames.size === 0) {
4303
- // Note: [👨‍👨‍👧] Now we can freeze `mappedParameters` to prevent @@@
4420
+ // Note: [👨‍👨‍👧] Now we can freeze `mappedParameters` to prevent accidental modifications after mapping
4304
4421
  Object.freeze(mappedParameters);
4305
4422
  return mappedParameters;
4306
4423
  }
@@ -4331,7 +4448,7 @@ function mapAvailableToExpectedParameters(options) {
4331
4448
  for (let i = 0; i < expectedParameterNames.size; i++) {
4332
4449
  mappedParameters[expectedParameterNamesArray[i]] = availableParameters[availableParametersNamesArray[i]];
4333
4450
  }
4334
- // Note: [👨‍👨‍👧] Now we can freeze `mappedParameters` to prevent @@@
4451
+ // Note: [👨‍👨‍👧] Now we can freeze `mappedParameters` to prevent accidental modifications after mapping
4335
4452
  Object.freeze(mappedParameters);
4336
4453
  return mappedParameters;
4337
4454
  }
@@ -4435,7 +4552,7 @@ function extractJsonBlock(markdown) {
4435
4552
  }
4436
4553
  /**
4437
4554
  * TODO: Add some auto-healing logic + extract YAML, JSON5, TOML, etc.
4438
- * TODO: [🏢] Make this logic part of `JsonFormatDefinition` or `isValidJsonString`
4555
+ * TODO: [🏢] Make this logic part of `JsonFormatParser` or `isValidJsonString`
4439
4556
  */
4440
4557
 
4441
4558
  /**
@@ -4478,10 +4595,12 @@ function templateParameters(template, parameters) {
4478
4595
  throw new PipelineExecutionError('Parameter is already opened or not closed');
4479
4596
  }
4480
4597
  if (parameters[parameterName] === undefined) {
4598
+ console.log('!!! templateParameters 1', { parameterName, template, parameters });
4481
4599
  throw new PipelineExecutionError(`Parameter \`{${parameterName}}\` is not defined`);
4482
4600
  }
4483
4601
  let parameterValue = parameters[parameterName];
4484
4602
  if (parameterValue === undefined) {
4603
+ console.log('!!! templateParameters 2', { parameterName, template, parameters });
4485
4604
  throw new PipelineExecutionError(`Parameter \`{${parameterName}}\` is not defined`);
4486
4605
  }
4487
4606
  parameterValue = valueToString(parameterValue);
@@ -4637,7 +4756,7 @@ const CountUtils = {
4637
4756
  PAGES: countPages,
4638
4757
  };
4639
4758
  /**
4640
- * TODO: [🧠][🤠] This should be probbably as part of `TextFormatDefinition`
4759
+ * TODO: [🧠][🤠] This should be probbably as part of `TextFormatParser`
4641
4760
  * Note: [💞] Ignore a discrepancy between file name and entity name
4642
4761
  */
4643
4762
 
@@ -4665,13 +4784,17 @@ function checkExpectations(expectations, value) {
4665
4784
  }
4666
4785
  /**
4667
4786
  * TODO: [💝] Unite object for expecting amount and format
4668
- * TODO: [🧠][🤠] This should be part of `TextFormatDefinition`
4787
+ * TODO: [🧠][🤠] This should be part of `TextFormatParser`
4669
4788
  * Note: [💝] and [🤠] are interconnected together
4670
4789
  */
4671
4790
 
4672
4791
  /**
4673
- * @@@
4792
+ * Executes a pipeline task with multiple attempts, including joker and retry logic. Handles different task types
4793
+ * (prompt, script, dialog, etc.), applies postprocessing, checks expectations, and updates the execution report.
4794
+ * Throws errors if execution fails after all attempts.
4674
4795
  *
4796
+ * @param options - The options for execution, including task, parameters, pipeline, and configuration.
4797
+ * @returns The result string of the executed task.
4675
4798
  * @private internal utility of `createPipelineExecutor`
4676
4799
  */
4677
4800
  async function executeAttempts(options) {
@@ -4893,7 +5016,7 @@ async function executeAttempts(options) {
4893
5016
  if (task.format) {
4894
5017
  if (task.format === 'JSON') {
4895
5018
  if (!isValidJsonString($ongoingTaskResult.$resultString || '')) {
4896
- // TODO: [🏢] Do more universally via `FormatDefinition`
5019
+ // TODO: [🏢] Do more universally via `FormatParser`
4897
5020
  try {
4898
5021
  $ongoingTaskResult.$resultString = extractJsonBlock($ongoingTaskResult.$resultString || '');
4899
5022
  }
@@ -4995,12 +5118,16 @@ async function executeAttempts(options) {
4995
5118
  */
4996
5119
 
4997
5120
  /**
4998
- * @@@
5121
+ * Executes a pipeline task that requires mapping or iterating over subvalues of a parameter (such as rows in a CSV).
5122
+ * Handles format and subformat resolution, error handling, and progress reporting.
5123
+ *
5124
+ * @param options - Options for execution, including task details and progress callback.
5125
+ * @returns The result of the subvalue mapping or execution attempts.
4999
5126
  *
5000
5127
  * @private internal utility of `createPipelineExecutor`
5001
5128
  */
5002
5129
  async function executeFormatSubvalues(options) {
5003
- const { task, jokerParameterNames, parameters, priority, csvSettings, pipelineIdentification } = options;
5130
+ const { task, jokerParameterNames, parameters, priority, csvSettings, onProgress, pipelineIdentification } = options;
5004
5131
  if (task.foreach === undefined) {
5005
5132
  return /* not await */ executeAttempts(options);
5006
5133
  }
@@ -5031,16 +5158,16 @@ async function executeFormatSubvalues(options) {
5031
5158
  ${block(pipelineIdentification)}
5032
5159
  `));
5033
5160
  }
5034
- const subvalueDefinition = formatDefinition.subvalueDefinitions.find((subvalueDefinition) => [subvalueDefinition.subvalueName, ...(subvalueDefinition.aliases || [])].includes(task.foreach.subformatName));
5035
- if (subvalueDefinition === undefined) {
5161
+ const subvalueParser = formatDefinition.subvalueParsers.find((subvalueParser) => [subvalueParser.subvalueName, ...(subvalueParser.aliases || [])].includes(task.foreach.subformatName));
5162
+ if (subvalueParser === undefined) {
5036
5163
  throw new UnexpectedError(
5037
5164
  // <- TODO: [🧠][🧐] Should be formats fixed per promptbook version or behave as plugins (=> change UnexpectedError)
5038
5165
  spaceTrim((block) => `
5039
5166
  Unsupported subformat name "${task.foreach.subformatName}" for format "${task.foreach.formatName}"
5040
5167
 
5041
5168
  Available subformat names for format "${formatDefinition.formatName}":
5042
- ${block(formatDefinition.subvalueDefinitions
5043
- .map((subvalueDefinition) => subvalueDefinition.subvalueName)
5169
+ ${block(formatDefinition.subvalueParsers
5170
+ .map((subvalueParser) => subvalueParser.subvalueName)
5044
5171
  .map((subvalueName) => `- ${subvalueName}`)
5045
5172
  .join('\n'))}
5046
5173
 
@@ -5054,53 +5181,83 @@ async function executeFormatSubvalues(options) {
5054
5181
  formatSettings = csvSettings;
5055
5182
  // <- TODO: [🤹‍♂️] More universal, make simmilar pattern for other formats for example \n vs \r\n in text
5056
5183
  }
5057
- const resultString = await subvalueDefinition.mapValues(parameterValue, task.foreach.outputSubparameterName, formatSettings, async (subparameters, index) => {
5058
- let mappedParameters;
5059
- // TODO: [🤹‍♂️][🪂] Limit to N concurrent executions
5060
- // TODO: When done [🐚] Report progress also for each subvalue here
5061
- try {
5062
- mappedParameters = mapAvailableToExpectedParameters({
5063
- expectedParameters: Object.fromEntries(task.foreach.inputSubparameterNames.map((subparameterName) => [subparameterName, null])),
5064
- availableParameters: subparameters,
5065
- });
5066
- }
5067
- catch (error) {
5068
- if (!(error instanceof PipelineExecutionError)) {
5069
- throw error;
5184
+ const resultString = await subvalueParser.mapValues({
5185
+ value: parameterValue,
5186
+ outputParameterName: task.foreach.outputSubparameterName,
5187
+ settings: formatSettings,
5188
+ onProgress(partialResultString) {
5189
+ return onProgress(Object.freeze({
5190
+ [task.resultingParameterName]: partialResultString,
5191
+ }));
5192
+ },
5193
+ async mapCallback(subparameters, index, length) {
5194
+ let mappedParameters;
5195
+ try {
5196
+ mappedParameters = mapAvailableToExpectedParameters({
5197
+ expectedParameters: Object.fromEntries(task.foreach.inputSubparameterNames.map((subparameterName) => [subparameterName, null])),
5198
+ availableParameters: subparameters,
5199
+ });
5070
5200
  }
5071
- throw new PipelineExecutionError(spaceTrim((block) => `
5072
- ${error.message}
5201
+ catch (error) {
5202
+ if (!(error instanceof PipelineExecutionError)) {
5203
+ throw error;
5204
+ }
5205
+ const highLevelError = new PipelineExecutionError(spaceTrim((block) => `
5206
+ ${error.message}
5073
5207
 
5074
- This is error in FOREACH command
5075
- You have probbably passed wrong data to pipeline or wrong data was generated which are processed by FOREACH command
5208
+ This is error in FOREACH command when mapping ${formatDefinition.formatName} ${subvalueParser.subvalueName} data (${index + 1}/${length})
5209
+ You have probbably passed wrong data to pipeline or wrong data was generated which are processed by FOREACH command
5076
5210
 
5077
- ${block(pipelineIdentification)}
5078
- Subparameter index: ${index}
5079
- `));
5080
- }
5081
- const allSubparameters = {
5082
- ...parameters,
5083
- ...mappedParameters,
5084
- };
5085
- // Note: [👨‍👨‍👧] Now we can freeze `subparameters` because we are sure that all and only used parameters are defined and are not going to be changed
5086
- Object.freeze(allSubparameters);
5087
- const subresultString = await executeAttempts({
5088
- ...options,
5089
- priority: priority + index,
5090
- parameters: allSubparameters,
5091
- pipelineIdentification: spaceTrim((block) => `
5092
- ${block(pipelineIdentification)}
5093
- Subparameter index: ${index}
5094
- `),
5095
- });
5096
- return subresultString;
5211
+ ${block(pipelineIdentification)}
5212
+ `));
5213
+ if (length > BIG_DATASET_TRESHOLD) {
5214
+ console.error(highLevelError);
5215
+ return FAILED_VALUE_PLACEHOLDER;
5216
+ }
5217
+ throw highLevelError;
5218
+ }
5219
+ const allSubparameters = {
5220
+ ...parameters,
5221
+ ...mappedParameters,
5222
+ };
5223
+ Object.freeze(allSubparameters);
5224
+ try {
5225
+ const subresultString = await executeAttempts({
5226
+ ...options,
5227
+ priority: priority + index,
5228
+ parameters: allSubparameters,
5229
+ pipelineIdentification: spaceTrim((block) => `
5230
+ ${block(pipelineIdentification)}
5231
+ Subparameter index: ${index}
5232
+ `),
5233
+ });
5234
+ return subresultString;
5235
+ }
5236
+ catch (error) {
5237
+ if (length > BIG_DATASET_TRESHOLD) {
5238
+ console.error(spaceTrim((block) => `
5239
+ ${error.message}
5240
+
5241
+ This is error in FOREACH command when processing ${formatDefinition.formatName} ${subvalueParser.subvalueName} data (${index + 1}/${length})
5242
+
5243
+ ${block(pipelineIdentification)}
5244
+ `));
5245
+ return FAILED_VALUE_PLACEHOLDER;
5246
+ }
5247
+ throw error;
5248
+ }
5249
+ },
5097
5250
  });
5098
5251
  return resultString;
5099
5252
  }
5100
5253
 
5101
5254
  /**
5102
- * @@@
5255
+ * Returns the context for a given task, typically used to provide additional information or variables
5256
+ * required for the execution of the task within a pipeline. The context is returned as a string value
5257
+ * that may include markdown formatting.
5103
5258
  *
5259
+ * @param task - The task for which the context is being generated. This should be a deeply immutable TaskJson object.
5260
+ * @returns The context as a string, formatted as markdown and parameter value.
5104
5261
  * @private internal utility of `createPipelineExecutor`
5105
5262
  */
5106
5263
  async function getContextForTask(task) {
@@ -5108,7 +5265,7 @@ async function getContextForTask(task) {
5108
5265
  }
5109
5266
 
5110
5267
  /**
5111
- * @@@
5268
+ * Retrieves example values or templates for a given task, used to guide or validate pipeline execution.
5112
5269
  *
5113
5270
  * @private internal utility of `createPipelineExecutor`
5114
5271
  */
@@ -5117,25 +5274,127 @@ async function getExamplesForTask(task) {
5117
5274
  }
5118
5275
 
5119
5276
  /**
5120
- * @@@
5277
+ * Computes the cosine similarity between two embedding vectors
5278
+ *
5279
+ * Note: This is helping function for RAG (retrieval-augmented generation)
5280
+ *
5281
+ * @param embeddingVector1
5282
+ * @param embeddingVector2
5283
+ * @returns Cosine similarity between the two vectors
5284
+ *
5285
+ * @public exported from `@promptbook/core`
5286
+ */
5287
+ function computeCosineSimilarity(embeddingVector1, embeddingVector2) {
5288
+ if (embeddingVector1.length !== embeddingVector2.length) {
5289
+ throw new TypeError('Embedding vectors must have the same length');
5290
+ }
5291
+ const dotProduct = embeddingVector1.reduce((sum, value, index) => sum + value * embeddingVector2[index], 0);
5292
+ const magnitude1 = Math.sqrt(embeddingVector1.reduce((sum, value) => sum + value * value, 0));
5293
+ const magnitude2 = Math.sqrt(embeddingVector2.reduce((sum, value) => sum + value * value, 0));
5294
+ return 1 - dotProduct / (magnitude1 * magnitude2);
5295
+ }
5296
+
5297
+ /**
5298
+ *
5299
+ * @param knowledgePieces
5300
+ * @returns
5301
+ *
5302
+ * @private internal utility of `createPipelineExecutor`
5303
+ */
5304
+ function knowledgePiecesToString(knowledgePieces) {
5305
+ return knowledgePieces
5306
+ .map((knowledgePiece) => {
5307
+ const { content } = knowledgePiece;
5308
+ return `- ${content}`;
5309
+ })
5310
+ .join('\n');
5311
+ // <- TODO: [🧠] Some smarter aggregation of knowledge pieces, single-line vs multi-line vs mixed
5312
+ }
5313
+
5314
+ /**
5315
+ * Retrieves the most relevant knowledge pieces for a given task using embedding-based similarity search.
5316
+ * This is where retrieval-augmented generation (RAG) is performed to enhance the task with external knowledge.
5121
5317
  *
5122
5318
  * @private internal utility of `createPipelineExecutor`
5123
5319
  */
5124
5320
  async function getKnowledgeForTask(options) {
5125
- const { preparedPipeline, task } = options;
5126
- return preparedPipeline.knowledgePieces.map(({ content }) => `- ${content}`).join('\n');
5127
- // <- TODO: [🧠] Some smart aggregation of knowledge pieces, single-line vs multi-line vs mixed
5321
+ const { tools, preparedPipeline, task, parameters } = options;
5322
+ const firstKnowlegePiece = preparedPipeline.knowledgePieces[0];
5323
+ const firstKnowlegeIndex = firstKnowlegePiece === null || firstKnowlegePiece === void 0 ? void 0 : firstKnowlegePiece.index[0];
5324
+ // <- TODO: Do not use just first knowledge piece and first index to determine embedding model, use also keyword search
5325
+ if (firstKnowlegePiece === undefined || firstKnowlegeIndex === undefined) {
5326
+ return ''; // <- Note: Np knowledge present, return empty string
5327
+ }
5328
+ try {
5329
+ // TODO: [🚐] Make arrayable LLMs -> single LLM DRY
5330
+ const _llms = arrayableToArray(tools.llm);
5331
+ const llmTools = _llms.length === 1 ? _llms[0] : joinLlmExecutionTools(..._llms);
5332
+ const taskEmbeddingPrompt = {
5333
+ title: 'Knowledge Search',
5334
+ modelRequirements: {
5335
+ modelVariant: 'EMBEDDING',
5336
+ modelName: firstKnowlegeIndex.modelName,
5337
+ },
5338
+ content: task.content,
5339
+ parameters,
5340
+ };
5341
+ const taskEmbeddingResult = await llmTools.callEmbeddingModel(taskEmbeddingPrompt);
5342
+ const knowledgePiecesWithRelevance = preparedPipeline.knowledgePieces.map((knowledgePiece) => {
5343
+ const { index } = knowledgePiece;
5344
+ const knowledgePieceIndex = index.find((i) => i.modelName === firstKnowlegeIndex.modelName);
5345
+ // <- TODO: Do not use just first knowledge piece and first index to determine embedding model
5346
+ if (knowledgePieceIndex === undefined) {
5347
+ return {
5348
+ content: knowledgePiece.content,
5349
+ relevance: 0,
5350
+ };
5351
+ }
5352
+ const relevance = computeCosineSimilarity(knowledgePieceIndex.position, taskEmbeddingResult.content);
5353
+ return {
5354
+ content: knowledgePiece.content,
5355
+ relevance,
5356
+ };
5357
+ });
5358
+ const knowledgePiecesSorted = knowledgePiecesWithRelevance.sort((a, b) => a.relevance - b.relevance);
5359
+ const knowledgePiecesLimited = knowledgePiecesSorted.slice(0, 5);
5360
+ console.log('!!! Embedding', {
5361
+ task,
5362
+ taskEmbeddingPrompt,
5363
+ taskEmbeddingResult,
5364
+ firstKnowlegePiece,
5365
+ firstKnowlegeIndex,
5366
+ knowledgePiecesWithRelevance,
5367
+ knowledgePiecesSorted,
5368
+ knowledgePiecesLimited,
5369
+ });
5370
+ return knowledgePiecesToString(knowledgePiecesLimited);
5371
+ }
5372
+ catch (error) {
5373
+ assertsError(error);
5374
+ console.error('Error in `getKnowledgeForTask`', error);
5375
+ // Note: If the LLM fails, just return all knowledge pieces
5376
+ return knowledgePiecesToString(preparedPipeline.knowledgePieces);
5377
+ }
5128
5378
  }
5379
+ /**
5380
+ * TODO: !!!! Verify if this is working
5381
+ * TODO: [♨] Implement Better - use keyword search
5382
+ * TODO: [♨] Examples of values
5383
+ */
5129
5384
 
5130
5385
  /**
5131
- * @@@
5386
+ * Retrieves all reserved parameters for a given pipeline task, including context, knowledge, examples, and metadata.
5387
+ * Ensures all reserved parameters are defined and throws if any are missing.
5388
+ *
5389
+ * @param options - Options including tools, pipeline, task, and context.
5390
+ * @returns An object containing all reserved parameters for the task.
5132
5391
  *
5133
5392
  * @private internal utility of `createPipelineExecutor`
5134
5393
  */
5135
5394
  async function getReservedParametersForTask(options) {
5136
- const { preparedPipeline, task, pipelineIdentification } = options;
5395
+ const { tools, preparedPipeline, task, parameters, pipelineIdentification } = options;
5137
5396
  const context = await getContextForTask(); // <- [🏍]
5138
- const knowledge = await getKnowledgeForTask({ preparedPipeline, task });
5397
+ const knowledge = await getKnowledgeForTask({ tools, preparedPipeline, task, parameters });
5139
5398
  const examples = await getExamplesForTask();
5140
5399
  const currentDate = new Date().toISOString(); // <- TODO: [🧠][💩] Better
5141
5400
  const modelName = RESERVED_PARAMETER_MISSING_VALUE;
@@ -5161,23 +5420,21 @@ async function getReservedParametersForTask(options) {
5161
5420
  }
5162
5421
 
5163
5422
  /**
5164
- * @@@
5423
+ * Executes a single task within a pipeline, handling parameter validation, error checking, and progress reporting.
5424
+ *
5425
+ * @param options - Options for execution, including the task, pipeline, parameters, and callbacks.
5426
+ * @returns The output parameters produced by the task.
5165
5427
  *
5166
5428
  * @private internal utility of `createPipelineExecutor`
5167
5429
  */
5168
5430
  async function executeTask(options) {
5169
5431
  const { currentTask, preparedPipeline, parametersToPass, tools, onProgress, $executionReport, pipelineIdentification, maxExecutionAttempts, maxParallelCount, csvSettings, isVerbose, rootDirname, cacheDirname, intermediateFilesStrategy, isAutoInstalled, isNotPreparedWarningSupressed, } = options;
5170
5432
  const priority = preparedPipeline.tasks.length - preparedPipeline.tasks.indexOf(currentTask);
5171
- await onProgress({
5172
- outputParameters: {
5173
- [currentTask.resultingParameterName]: '', // <- TODO: [🧠] What is the best value here?
5174
- },
5175
- });
5176
5433
  // Note: Check consistency of used and dependent parameters which was also done in `validatePipeline`, but it’s good to doublecheck
5177
5434
  const usedParameterNames = extractParameterNamesFromTask(currentTask);
5178
5435
  const dependentParameterNames = new Set(currentTask.dependentParameterNames);
5179
5436
  // TODO: [👩🏾‍🤝‍👩🏻] Use here `mapAvailableToExpectedParameters`
5180
- if (union(difference(usedParameterNames, dependentParameterNames), difference(dependentParameterNames, usedParameterNames)).size !== 0) {
5437
+ if (difference(union(difference(usedParameterNames, dependentParameterNames), difference(dependentParameterNames, usedParameterNames)), new Set(RESERVED_PARAMETER_NAMES)).size !== 0) {
5181
5438
  throw new UnexpectedError(spaceTrim$1((block) => `
5182
5439
  Dependent parameters are not consistent with used parameters:
5183
5440
 
@@ -5197,9 +5454,11 @@ async function executeTask(options) {
5197
5454
  }
5198
5455
  const definedParameters = Object.freeze({
5199
5456
  ...(await getReservedParametersForTask({
5457
+ tools,
5200
5458
  preparedPipeline,
5201
5459
  task: currentTask,
5202
5460
  pipelineIdentification,
5461
+ parameters: parametersToPass,
5203
5462
  })),
5204
5463
  ...parametersToPass,
5205
5464
  });
@@ -5245,6 +5504,7 @@ async function executeTask(options) {
5245
5504
  preparedPipeline,
5246
5505
  tools,
5247
5506
  $executionReport,
5507
+ onProgress,
5248
5508
  pipelineIdentification,
5249
5509
  maxExecutionAttempts,
5250
5510
  maxParallelCount,
@@ -5272,7 +5532,8 @@ async function executeTask(options) {
5272
5532
  */
5273
5533
 
5274
5534
  /**
5275
- * @@@
5535
+ * Filters and returns only the output parameters from the provided pipeline execution options.
5536
+ * Adds warnings for any expected output parameters that are missing.
5276
5537
  *
5277
5538
  * @private internal utility of `createPipelineExecutor`
5278
5539
  */
@@ -5297,9 +5558,12 @@ function filterJustOutputParameters(options) {
5297
5558
  }
5298
5559
 
5299
5560
  /**
5300
- * @@@
5561
+ * Executes an entire pipeline, resolving tasks in dependency order, handling errors, and reporting progress.
5301
5562
  *
5302
- * Note: This is not a `PipelineExecutor` (which is binded with one exact pipeline), but a utility function of `createPipelineExecutor` which creates `PipelineExecutor`
5563
+ * Note: This is not a `PipelineExecutor` (which is bound to a single pipeline), but a utility function used by `createPipelineExecutor` to create a `PipelineExecutor`.
5564
+ *
5565
+ * @param options - Options for execution, including input parameters, pipeline, and callbacks.
5566
+ * @returns The result of the pipeline execution, including output parameters, errors, and usage statistics.
5303
5567
  *
5304
5568
  * @private internal utility of `createPipelineExecutor`
5305
5569
  */
@@ -5622,6 +5886,22 @@ function createPipelineExecutor(options) {
5622
5886
  cacheDirname,
5623
5887
  intermediateFilesStrategy,
5624
5888
  isAutoInstalled,
5889
+ }).catch((error) => {
5890
+ assertsError(error);
5891
+ return exportJson({
5892
+ name: 'pipelineExecutorResult',
5893
+ message: `Unuccessful PipelineExecutorResult, last catch`,
5894
+ order: [],
5895
+ value: {
5896
+ isSuccessful: false,
5897
+ errors: [serializeError(error)],
5898
+ warnings: [],
5899
+ usage: UNCERTAIN_USAGE,
5900
+ executionReport: null,
5901
+ outputParameters: {},
5902
+ preparedPipeline,
5903
+ },
5904
+ });
5625
5905
  });
5626
5906
  };
5627
5907
  const pipelineExecutor = (inputParameters) => createTask({
@@ -5882,8 +6162,8 @@ class MarkitdownScraper {
5882
6162
  extension: 'md',
5883
6163
  isVerbose,
5884
6164
  });
5885
- // TODO: @@@ Preserve, delete or modify
5886
- // Note: Running Pandoc ONLY if the file in the cache does not exist
6165
+ // TODO: Determine if Markitdown conversion should run only if the cache file doesn't exist, or always.
6166
+ // Note: Running Markitdown conversion ONLY if the file in the cache does not exist
5887
6167
  if (!(await isFileExisting(cacheFilehandler.filename, this.tools.fs))) {
5888
6168
  const src = source.filename || source.url || null;
5889
6169
  // console.log('!!', { src, source, cacheFilehandler });
@@ -5905,11 +6185,11 @@ class MarkitdownScraper {
5905
6185
  return cacheFilehandler;
5906
6186
  }
5907
6187
  /**
5908
- * Scrapes the docx file and returns the knowledge pieces or `null` if it can't scrape it
6188
+ * Scrapes the source document (PDF, DOCX, etc.) and returns the knowledge pieces or `null` if it can't scrape it.
5909
6189
  */
5910
6190
  async scrape(source) {
5911
6191
  const cacheFilehandler = await this.$convert(source);
5912
- // TODO: @@@ Preserve, delete or modify
6192
+ // TODO: Ensure this correctly creates the source object for the internal MarkdownScraper using the converted file.
5913
6193
  const markdownSource = {
5914
6194
  source: source.source,
5915
6195
  filename: cacheFilehandler.filename,