@promptbook/pdf 0.92.0-3 → 0.92.0-30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. package/esm/index.es.js +561 -280
  2. package/esm/index.es.js.map +1 -1
  3. package/esm/typings/src/_packages/browser.index.d.ts +2 -0
  4. package/esm/typings/src/_packages/core.index.d.ts +22 -6
  5. package/esm/typings/src/_packages/deepseek.index.d.ts +2 -0
  6. package/esm/typings/src/_packages/google.index.d.ts +2 -0
  7. package/esm/typings/src/_packages/types.index.d.ts +4 -2
  8. package/esm/typings/src/_packages/utils.index.d.ts +2 -0
  9. package/esm/typings/src/cli/common/$provideLlmToolsForCli.d.ts +1 -1
  10. package/esm/typings/src/collection/PipelineCollection.d.ts +0 -2
  11. package/esm/typings/src/collection/SimplePipelineCollection.d.ts +1 -1
  12. package/esm/typings/src/commands/FOREACH/ForeachJson.d.ts +6 -6
  13. package/esm/typings/src/commands/FOREACH/foreachCommandParser.d.ts +0 -2
  14. package/esm/typings/src/commands/FORMFACTOR/formfactorCommandParser.d.ts +1 -1
  15. package/esm/typings/src/commands/_BOILERPLATE/boilerplateCommandParser.d.ts +1 -1
  16. package/esm/typings/src/commands/_common/types/CommandParser.d.ts +36 -28
  17. package/esm/typings/src/config.d.ts +41 -11
  18. package/esm/typings/src/constants.d.ts +43 -2
  19. package/esm/typings/src/conversion/archive/loadArchive.d.ts +2 -2
  20. package/esm/typings/src/errors/0-BoilerplateError.d.ts +2 -2
  21. package/esm/typings/src/executables/$provideExecutablesForNode.d.ts +1 -1
  22. package/esm/typings/src/executables/apps/locateLibreoffice.d.ts +2 -1
  23. package/esm/typings/src/executables/apps/locatePandoc.d.ts +2 -1
  24. package/esm/typings/src/executables/platforms/locateAppOnLinux.d.ts +2 -1
  25. package/esm/typings/src/executables/platforms/locateAppOnMacOs.d.ts +2 -1
  26. package/esm/typings/src/executables/platforms/locateAppOnWindows.d.ts +2 -1
  27. package/esm/typings/src/execution/AbstractTaskResult.d.ts +1 -1
  28. package/esm/typings/src/execution/CommonToolsOptions.d.ts +5 -1
  29. package/esm/typings/src/execution/LlmExecutionToolsConstructor.d.ts +2 -1
  30. package/esm/typings/src/execution/PipelineExecutorResult.d.ts +4 -2
  31. package/esm/typings/src/execution/createPipelineExecutor/$OngoingTaskResult.d.ts +12 -9
  32. package/esm/typings/src/execution/createPipelineExecutor/10-executePipeline.d.ts +12 -9
  33. package/esm/typings/src/execution/createPipelineExecutor/20-executeTask.d.ts +11 -8
  34. package/esm/typings/src/execution/createPipelineExecutor/30-executeFormatSubvalues.d.ts +15 -3
  35. package/esm/typings/src/execution/createPipelineExecutor/40-executeAttempts.d.ts +20 -14
  36. package/esm/typings/src/execution/createPipelineExecutor/computeCosineSimilarity.d.ts +13 -0
  37. package/esm/typings/src/execution/createPipelineExecutor/filterJustOutputParameters.d.ts +7 -6
  38. package/esm/typings/src/execution/createPipelineExecutor/getContextForTask.d.ts +5 -1
  39. package/esm/typings/src/execution/createPipelineExecutor/getExamplesForTask.d.ts +1 -1
  40. package/esm/typings/src/execution/createPipelineExecutor/getKnowledgeForTask.d.ts +21 -5
  41. package/esm/typings/src/execution/createPipelineExecutor/getReservedParametersForTask.d.ts +19 -5
  42. package/esm/typings/src/execution/createPipelineExecutor/knowledgePiecesToString.d.ts +9 -0
  43. package/esm/typings/src/execution/translation/automatic-translate/automatic-translators/LindatAutomaticTranslator.d.ts +4 -4
  44. package/esm/typings/src/execution/utils/checkExpectations.d.ts +1 -1
  45. package/esm/typings/src/execution/utils/uncertainNumber.d.ts +3 -2
  46. package/esm/typings/src/formats/_common/{FormatDefinition.d.ts → FormatParser.d.ts} +8 -6
  47. package/esm/typings/src/formats/_common/FormatSubvalueParser.d.ts +66 -0
  48. package/esm/typings/src/formats/csv/CsvFormatParser.d.ts +17 -0
  49. package/esm/typings/src/formats/csv/CsvSettings.d.ts +2 -2
  50. package/esm/typings/src/formats/csv/utils/csvParse.d.ts +12 -0
  51. package/esm/typings/src/formats/csv/utils/isValidCsvString.d.ts +1 -1
  52. package/esm/typings/src/formats/index.d.ts +2 -2
  53. package/esm/typings/src/formats/json/{JsonFormatDefinition.d.ts → JsonFormatParser.d.ts} +6 -6
  54. package/esm/typings/src/formats/json/utils/isValidJsonString.d.ts +1 -1
  55. package/esm/typings/src/formats/json/utils/jsonParse.d.ts +8 -0
  56. package/esm/typings/src/formats/text/{TextFormatDefinition.d.ts → TextFormatParser.d.ts} +7 -7
  57. package/esm/typings/src/formats/xml/XmlFormatParser.d.ts +19 -0
  58. package/esm/typings/src/formats/xml/utils/isValidXmlString.d.ts +1 -1
  59. package/esm/typings/src/formfactors/_boilerplate/BoilerplateFormfactorDefinition.d.ts +3 -2
  60. package/esm/typings/src/formfactors/_common/AbstractFormfactorDefinition.d.ts +16 -7
  61. package/esm/typings/src/formfactors/_common/FormfactorDefinition.d.ts +3 -1
  62. package/esm/typings/src/formfactors/_common/string_formfactor_name.d.ts +2 -1
  63. package/esm/typings/src/formfactors/chatbot/ChatbotFormfactorDefinition.d.ts +2 -2
  64. package/esm/typings/src/formfactors/completion/CompletionFormfactorDefinition.d.ts +29 -0
  65. package/esm/typings/src/formfactors/generator/GeneratorFormfactorDefinition.d.ts +2 -1
  66. package/esm/typings/src/formfactors/generic/GenericFormfactorDefinition.d.ts +2 -2
  67. package/esm/typings/src/formfactors/index.d.ts +33 -8
  68. package/esm/typings/src/formfactors/matcher/MatcherFormfactorDefinition.d.ts +4 -2
  69. package/esm/typings/src/formfactors/sheets/SheetsFormfactorDefinition.d.ts +3 -2
  70. package/esm/typings/src/formfactors/translator/TranslatorFormfactorDefinition.d.ts +3 -2
  71. package/esm/typings/src/high-level-abstractions/index.d.ts +2 -2
  72. package/esm/typings/src/llm-providers/_common/register/$llmToolsMetadataRegister.d.ts +3 -3
  73. package/esm/typings/src/llm-providers/_common/register/$llmToolsRegister.d.ts +3 -3
  74. package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsConfigurationFromEnv.d.ts +4 -4
  75. package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsForTestingAndScriptsAndPlayground.d.ts +4 -3
  76. package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsFromEnv.d.ts +17 -4
  77. package/esm/typings/src/llm-providers/_common/register/LlmToolsConfiguration.d.ts +11 -4
  78. package/esm/typings/src/llm-providers/_common/register/LlmToolsMetadata.d.ts +27 -5
  79. package/esm/typings/src/llm-providers/_common/register/LlmToolsOptions.d.ts +9 -2
  80. package/esm/typings/src/llm-providers/_common/register/createLlmToolsFromConfiguration.d.ts +12 -3
  81. package/esm/typings/src/llm-providers/_common/utils/cache/CacheItem.d.ts +10 -5
  82. package/esm/typings/src/llm-providers/_common/utils/cache/CacheLlmToolsOptions.d.ts +5 -3
  83. package/esm/typings/src/llm-providers/_common/utils/cache/cacheLlmTools.d.ts +3 -3
  84. package/esm/typings/src/llm-providers/_common/utils/count-total-usage/limitTotalUsage.d.ts +5 -5
  85. package/esm/typings/src/llm-providers/anthropic-claude/anthropic-claude-models.d.ts +1 -1
  86. package/esm/typings/src/llm-providers/azure-openai/AzureOpenAiExecutionTools.d.ts +4 -0
  87. package/esm/typings/src/llm-providers/deepseek/deepseek-models.d.ts +23 -0
  88. package/esm/typings/src/llm-providers/google/google-models.d.ts +23 -0
  89. package/esm/typings/src/llm-providers/openai/OpenAiExecutionTools.d.ts +4 -0
  90. package/esm/typings/src/llm-providers/openai/openai-models.d.ts +1 -1
  91. package/esm/typings/src/llm-providers/openai/register-configuration.d.ts +2 -2
  92. package/esm/typings/src/llm-providers/openai/register-constructor.d.ts +2 -2
  93. package/esm/typings/src/migrations/migratePipeline.d.ts +9 -0
  94. package/esm/typings/src/other/templates/getBookTemplates.d.ts +2 -2
  95. package/esm/typings/src/personas/preparePersona.d.ts +1 -1
  96. package/esm/typings/src/pipeline/PipelineInterface/PipelineInterface.d.ts +3 -3
  97. package/esm/typings/src/pipeline/PipelineInterface/constants.d.ts +1 -1
  98. package/esm/typings/src/pipeline/PipelineInterface/getPipelineInterface.d.ts +1 -1
  99. package/esm/typings/src/pipeline/PipelineInterface/isPipelineImplementingInterface.d.ts +5 -4
  100. package/esm/typings/src/pipeline/PipelineInterface/isPipelineInterfacesEqual.d.ts +1 -1
  101. package/esm/typings/src/pipeline/PipelineJson/CommonTaskJson.d.ts +9 -6
  102. package/esm/typings/src/pipeline/PipelineJson/PersonaJson.d.ts +4 -2
  103. package/esm/typings/src/pipeline/PipelineJson/PipelineJson.d.ts +3 -2
  104. package/esm/typings/src/pipeline/PipelineString.d.ts +3 -1
  105. package/esm/typings/src/pipeline/book-notation.d.ts +2 -2
  106. package/esm/typings/src/postprocessing/utils/extractJsonBlock.d.ts +1 -1
  107. package/esm/typings/src/prepare/prepareTasks.d.ts +7 -4
  108. package/esm/typings/src/remote-server/openapi-types.d.ts +348 -6
  109. package/esm/typings/src/remote-server/openapi.d.ts +398 -4
  110. package/esm/typings/src/remote-server/types/RemoteServerOptions.d.ts +2 -1
  111. package/esm/typings/src/scrapers/_boilerplate/BoilerplateScraper.d.ts +3 -3
  112. package/esm/typings/src/scrapers/_boilerplate/createBoilerplateScraper.d.ts +1 -1
  113. package/esm/typings/src/scrapers/_boilerplate/register-metadata.d.ts +1 -1
  114. package/esm/typings/src/scrapers/_common/Converter.d.ts +3 -1
  115. package/esm/typings/src/scrapers/_common/Scraper.d.ts +4 -3
  116. package/esm/typings/src/scrapers/_common/ScraperIntermediateSource.d.ts +4 -2
  117. package/esm/typings/src/scrapers/_common/register/$provideFilesystemForNode.d.ts +2 -1
  118. package/esm/typings/src/scrapers/_common/register/$provideScrapersForBrowser.d.ts +6 -3
  119. package/esm/typings/src/scrapers/_common/register/$provideScrapersForNode.d.ts +3 -5
  120. package/esm/typings/src/scrapers/_common/register/$scrapersMetadataRegister.d.ts +3 -3
  121. package/esm/typings/src/scrapers/_common/register/$scrapersRegister.d.ts +3 -2
  122. package/esm/typings/src/scrapers/_common/register/ScraperAndConverterMetadata.d.ts +8 -5
  123. package/esm/typings/src/scrapers/_common/register/ScraperConstructor.d.ts +2 -1
  124. package/esm/typings/src/scrapers/_common/utils/getScraperIntermediateSource.d.ts +6 -5
  125. package/esm/typings/src/scrapers/_common/utils/makeKnowledgeSourceHandler.d.ts +3 -1
  126. package/esm/typings/src/scrapers/document/createDocumentScraper.d.ts +1 -1
  127. package/esm/typings/src/scrapers/document-legacy/createLegacyDocumentScraper.d.ts +2 -1
  128. package/esm/typings/src/scrapers/markdown/createMarkdownScraper.d.ts +4 -1
  129. package/esm/typings/src/scrapers/markitdown/MarkitdownScraper.d.ts +1 -1
  130. package/esm/typings/src/scrapers/pdf/createPdfScraper.d.ts +2 -1
  131. package/esm/typings/src/scrapers/website/createWebsiteScraper.d.ts +3 -4
  132. package/esm/typings/src/scripting/javascript/postprocessing-functions.d.ts +5 -1
  133. package/esm/typings/src/storage/file-cache-storage/FileCacheStorage.d.ts +12 -5
  134. package/esm/typings/src/storage/file-cache-storage/FileCacheStorageOptions.d.ts +4 -2
  135. package/esm/typings/src/storage/file-cache-storage/utils/nameToSubfolderPath.d.ts +2 -1
  136. package/esm/typings/src/storage/local-storage/getIndexedDbStorage.d.ts +10 -0
  137. package/esm/typings/src/storage/local-storage/utils/makePromptbookStorageFromIndexedDb.d.ts +7 -0
  138. package/esm/typings/src/storage/local-storage/utils/makePromptbookStorageFromWebStorage.d.ts +2 -1
  139. package/esm/typings/src/types/IntermediateFilesStrategy.d.ts +2 -1
  140. package/esm/typings/src/types/ModelVariant.d.ts +5 -5
  141. package/esm/typings/src/types/typeAliases.d.ts +17 -13
  142. package/esm/typings/src/utils/$Register.d.ts +8 -7
  143. package/esm/typings/src/utils/editable/edit-pipeline-string/addPipelineCommand.d.ts +2 -2
  144. package/esm/typings/src/utils/editable/edit-pipeline-string/deflatePipeline.d.ts +4 -1
  145. package/esm/typings/src/utils/editable/utils/isFlatPipeline.d.ts +2 -1
  146. package/esm/typings/src/utils/environment/$getGlobalScope.d.ts +2 -1
  147. package/esm/typings/src/utils/expectation-counters/index.d.ts +1 -1
  148. package/esm/typings/src/utils/markdown/extractAllListItemsFromMarkdown.d.ts +1 -1
  149. package/esm/typings/src/utils/normalization/nameToUriPart.d.ts +4 -4
  150. package/esm/typings/src/utils/normalization/nameToUriParts.d.ts +4 -4
  151. package/esm/typings/src/utils/normalization/normalize-to-kebab-case.d.ts +3 -3
  152. package/esm/typings/src/utils/normalization/normalizeTo_SCREAMING_CASE.d.ts +3 -3
  153. package/esm/typings/src/utils/normalization/normalizeTo_camelCase.d.ts +4 -4
  154. package/esm/typings/src/utils/normalization/normalizeTo_snake_case.d.ts +3 -3
  155. package/esm/typings/src/utils/normalization/removeDiacritics.d.ts +3 -3
  156. package/esm/typings/src/utils/normalization/searchKeywords.d.ts +4 -1
  157. package/esm/typings/src/utils/normalization/titleToName.d.ts +4 -4
  158. package/esm/typings/src/utils/organization/empty_object.d.ts +2 -2
  159. package/esm/typings/src/utils/organization/just_empty_object.d.ts +4 -4
  160. package/esm/typings/src/utils/parameters/mapAvailableToExpectedParameters.d.ts +7 -7
  161. package/esm/typings/src/utils/serialization/clonePipeline.d.ts +4 -3
  162. package/esm/typings/src/utils/serialization/deepClone.d.ts +5 -1
  163. package/esm/typings/src/utils/validators/javascriptName/isValidJavascriptName.d.ts +3 -3
  164. package/esm/typings/src/utils/validators/parameterName/validateParameterName.d.ts +5 -4
  165. package/esm/typings/src/version.d.ts +2 -1
  166. package/package.json +2 -2
  167. package/umd/index.umd.js +561 -280
  168. package/umd/index.umd.js.map +1 -1
  169. package/esm/typings/src/formats/_common/FormatSubvalueDefinition.d.ts +0 -31
  170. package/esm/typings/src/formats/csv/CsvFormatDefinition.d.ts +0 -17
  171. package/esm/typings/src/formats/xml/XmlFormatDefinition.d.ts +0 -19
package/esm/index.es.js CHANGED
@@ -26,7 +26,7 @@ const BOOK_LANGUAGE_VERSION = '1.0.0';
26
26
  * @generated
27
27
  * @see https://github.com/webgptorg/promptbook
28
28
  */
29
- const PROMPTBOOK_ENGINE_VERSION = '0.92.0-3';
29
+ const PROMPTBOOK_ENGINE_VERSION = '0.92.0-30';
30
30
  /**
31
31
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
32
32
  * Note: [💞] Ignore a discrepancy between file name and entity name
@@ -102,6 +102,21 @@ const DEFAULT_BOOK_TITLE = `✨ Untitled Book`;
102
102
  * @public exported from `@promptbook/core`
103
103
  */
104
104
  const DEFAULT_MAX_FILE_SIZE = 100 * 1024 * 1024; // 100MB
105
+ /**
106
+ * Threshold value that determines when a dataset is considered "big"
107
+ * and may require special handling or optimizations
108
+ *
109
+ * For example, when error occurs in one item of the big dataset, it will not fail the whole pipeline
110
+ *
111
+ * @public exported from `@promptbook/core`
112
+ */
113
+ const BIG_DATASET_TRESHOLD = 50;
114
+ /**
115
+ * Placeholder text used to represent a placeholder value of failed operation
116
+ *
117
+ * @public exported from `@promptbook/core`
118
+ */
119
+ const FAILED_VALUE_PLACEHOLDER = '!?';
105
120
  // <- TODO: [🧠] Better system for generator warnings - not always "code" and "by `@promptbook/cli`"
106
121
  /**
107
122
  * The maximum number of iterations for a loops
@@ -181,7 +196,7 @@ const DEFAULT_DOWNLOAD_CACHE_DIRNAME = './.promptbook/download-cache';
181
196
  const DEFAULT_SCRAPE_CACHE_DIRNAME = './.promptbook/scrape-cache';
182
197
  // <- TODO: [🧜‍♂️]
183
198
  /**
184
- * @@@
199
+ * Default settings for parsing and generating CSV files in Promptbook.
185
200
  *
186
201
  * @public exported from `@promptbook/core`
187
202
  */
@@ -192,19 +207,19 @@ const DEFAULT_CSV_SETTINGS = Object.freeze({
192
207
  skipEmptyLines: true,
193
208
  });
194
209
  /**
195
- * @@@
210
+ * Controls whether verbose logging is enabled by default throughout the application.
196
211
  *
197
212
  * @public exported from `@promptbook/core`
198
213
  */
199
214
  let DEFAULT_IS_VERBOSE = false;
200
215
  /**
201
- * @@@
216
+ * Controls whether auto-installation of dependencies is enabled by default.
202
217
  *
203
218
  * @public exported from `@promptbook/core`
204
219
  */
205
220
  const DEFAULT_IS_AUTO_INSTALLED = false;
206
221
  /**
207
- * @@@
222
+ * Indicates whether pipeline logic validation is enabled. When true, the pipeline logic is checked for consistency.
208
223
  *
209
224
  * @private within the repository
210
225
  */
@@ -355,7 +370,8 @@ async function isFileExisting(filename, fs) {
355
370
  */
356
371
 
357
372
  /**
358
- * @@@
373
+ * Converts a name to a properly formatted subfolder path for cache storage.
374
+ * Handles normalization and path formatting to create consistent cache directory structures.
359
375
  *
360
376
  * @private for `FileCacheStorage`
361
377
  */
@@ -608,10 +624,10 @@ for (let i = 0; i < defaultDiacriticsRemovalMap.length; i++) {
608
624
  */
609
625
 
610
626
  /**
611
- * @@@
627
+ * Removes diacritic marks (accents) from characters in a string.
612
628
  *
613
- * @param input @@@
614
- * @returns @@@
629
+ * @param input The string containing diacritics to be normalized.
630
+ * @returns The string with diacritics removed or normalized.
615
631
  * @public exported from `@promptbook/utils`
616
632
  */
617
633
  function removeDiacritics(input) {
@@ -625,10 +641,10 @@ function removeDiacritics(input) {
625
641
  */
626
642
 
627
643
  /**
628
- * @@@
644
+ * Converts a given text to kebab-case format.
629
645
  *
630
- * @param text @@@
631
- * @returns @@@
646
+ * @param text The text to be converted.
647
+ * @returns The kebab-case formatted string.
632
648
  * @example 'hello-world'
633
649
  * @example 'i-love-promptbook'
634
650
  * @public exported from `@promptbook/utils`
@@ -770,11 +786,11 @@ function isValidUrl(url) {
770
786
  }
771
787
 
772
788
  /**
773
- * @@@
789
+ * Converts a title string into a normalized name.
774
790
  *
775
- * @param value @@@
776
- * @returns @@@
777
- * @example @@@
791
+ * @param value The title string to be converted to a name.
792
+ * @returns A normalized name derived from the input title.
793
+ * @example 'Hello World!' -> 'hello-world'
778
794
  * @public exported from `@promptbook/utils`
779
795
  */
780
796
  function titleToName(value) {
@@ -807,9 +823,8 @@ function TODO_USE(...value) {
807
823
  }
808
824
 
809
825
  /**
810
- * Create a filename for intermediate cache for scrapers
811
- *
812
- * Note: It also checks if directory exists and creates it if not
826
+ * Retrieves an intermediate source for a scraper based on the knowledge source.
827
+ * Manages the caching and retrieval of intermediate scraper results for optimized performance.
813
828
  *
814
829
  * @private as internal utility for scrapers
815
830
  */
@@ -860,7 +875,7 @@ async function getScraperIntermediateSource(source, options) {
860
875
  * Note: [🟢] Code in this file should never be never released in packages that could be imported into browser environment
861
876
  */
862
877
 
863
- var PipelineCollection = [{title:"Prepare Knowledge from Markdown",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.book",formfactorName:"GENERIC",parameters:[{name:"knowledgeContent",description:"Markdown document content",isInput:true,isOutput:false},{name:"knowledgePieces",description:"The knowledge JSON object",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, extract the important knowledge from the document.\n\n# Rules\n\n- Make pieces of information concise, clear, and easy to understand\n- One piece of information should be approximately 1 paragraph\n- Divide the paragraphs by markdown horizontal lines ---\n- Omit irrelevant information\n- Group redundant information\n- Write just extracted information, nothing else\n\n# The document\n\nTake information from this document:\n\n> {knowledgeContent}",resultingParameterName:"knowledgePieces",dependentParameterNames:["knowledgeContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Knowledge from Markdown\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.book`\n- INPUT PARAMETER `{knowledgeContent}` Markdown document content\n- OUTPUT PARAMETER `{knowledgePieces}` The knowledge JSON object\n\n## Knowledge\n\n<!-- TODO: [🍆] -FORMAT JSON -->\n\n```markdown\nYou are experienced data researcher, extract the important knowledge from the document.\n\n# Rules\n\n- Make pieces of information concise, clear, and easy to understand\n- One piece of information should be approximately 1 paragraph\n- Divide the paragraphs by markdown horizontal lines ---\n- Omit irrelevant information\n- Group redundant information\n- Write just extracted information, nothing else\n\n# The document\n\nTake information from this document:\n\n> {knowledgeContent}\n```\n\n`-> {knowledgePieces}`\n"}],sourceFile:"./books/prepare-knowledge-from-markdown.book"},{title:"Prepare Keywords",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-keywords.book",formfactorName:"GENERIC",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"keywords",description:"Keywords separated by comma",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, detect the important keywords in the document.\n\n# Rules\n\n- Write just keywords separated by comma\n\n# The document\n\nTake information from this document:\n\n> {knowledgePieceContent}",resultingParameterName:"keywords",dependentParameterNames:["knowledgePieceContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Keywords\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-keywords.book`\n- INPUT PARAMETER `{knowledgePieceContent}` The content\n- OUTPUT PARAMETER `{keywords}` Keywords separated by comma\n\n## Knowledge\n\n<!-- TODO: [🍆] -FORMAT JSON -->\n\n```markdown\nYou are experienced data researcher, detect the important keywords in the document.\n\n# Rules\n\n- Write just keywords separated by comma\n\n# The document\n\nTake information from this document:\n\n> {knowledgePieceContent}\n```\n\n`-> {keywords}`\n"}],sourceFile:"./books/prepare-knowledge-keywords.book"},{title:"Prepare Knowledge-piece Title",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-title.book",formfactorName:"GENERIC",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"title",description:"The title of the document",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced content creator, write best title for the document.\n\n# Rules\n\n- Write just title, nothing else\n- Write maximum 5 words for the title\n\n# The document\n\n> {knowledgePieceContent}",resultingParameterName:"title",expectations:{words:{min:1,max:8}},dependentParameterNames:["knowledgePieceContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Knowledge-piece Title\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-title.book`\n- INPUT PARAMETER `{knowledgePieceContent}` The content\n- OUTPUT PARAMETER `{title}` The title of the document\n\n## Knowledge\n\n- EXPECT MIN 1 WORD\n- EXPECT MAX 8 WORDS\n\n```markdown\nYou are experienced content creator, write best title for the document.\n\n# Rules\n\n- Write just title, nothing else\n- Write maximum 5 words for the title\n\n# The document\n\n> {knowledgePieceContent}\n```\n\n`-> {title}`\n"}],sourceFile:"./books/prepare-knowledge-title.book"},{title:"Prepare Persona",pipelineUrl:"https://promptbook.studio/promptbook/prepare-persona.book",formfactorName:"GENERIC",parameters:[{name:"availableModelNames",description:"List of available model names separated by comma (,)",isInput:true,isOutput:false},{name:"personaDescription",description:"Description of the persona",isInput:true,isOutput:false},{name:"modelRequirements",description:"Specific requirements for the model",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"make-model-requirements",title:"Make modelRequirements",content:"You are experienced AI engineer, you need to create virtual assistant.\nWrite\n\n## Example\n\n```json\n{\n\"modelName\": \"gpt-4o\",\n\"systemMessage\": \"You are experienced AI engineer and helpfull assistant.\",\n\"temperature\": 0.7\n}\n```\n\n## Instructions\n\n- Your output format is JSON object\n- Write just the JSON object, no other text should be present\n- It contains the following keys:\n - `modelName`: The name of the model to use\n - `systemMessage`: The system message to provide context to the model\n - `temperature`: The sampling temperature to use\n\n### Key `modelName`\n\nPick from the following models:\n\n- {availableModelNames}\n\n### Key `systemMessage`\n\nThe system message is used to communicate instructions or provide context to the model at the beginning of a conversation. It is displayed in a different format compared to user messages, helping the model understand its role in the conversation. The system message typically guides the model's behavior, sets the tone, or specifies desired output from the model. By utilizing the system message effectively, users can steer the model towards generating more accurate and relevant responses.\n\nFor example:\n\n> You are an experienced AI engineer and helpful assistant.\n\n> You are a friendly and knowledgeable chatbot.\n\n### Key `temperature`\n\nThe sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.\n\nYou can pick a value between 0 and 2. For example:\n\n- `0.1`: Low temperature, extremely conservative and deterministic\n- `0.5`: Medium temperature, balanced between conservative and creative\n- `1.0`: High temperature, creative and bit random\n- `1.5`: Very high temperature, extremely creative and often chaotic and unpredictable\n- `2.0`: Maximum temperature, completely random and unpredictable, for some extreme creative use cases\n\n# The assistant\n\nTake this description of the persona:\n\n> {personaDescription}",resultingParameterName:"modelRequirements",format:"JSON",dependentParameterNames:["availableModelNames","personaDescription"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Persona\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-persona.book`\n- INPUT PARAMETER `{availableModelNames}` List of available model names separated by comma (,)\n- INPUT PARAMETER `{personaDescription}` Description of the persona\n- OUTPUT PARAMETER `{modelRequirements}` Specific requirements for the model\n\n## Make modelRequirements\n\n- FORMAT JSON\n\n```markdown\nYou are experienced AI engineer, you need to create virtual assistant.\nWrite\n\n## Example\n\n\\`\\`\\`json\n{\n\"modelName\": \"gpt-4o\",\n\"systemMessage\": \"You are experienced AI engineer and helpfull assistant.\",\n\"temperature\": 0.7\n}\n\\`\\`\\`\n\n## Instructions\n\n- Your output format is JSON object\n- Write just the JSON object, no other text should be present\n- It contains the following keys:\n - `modelName`: The name of the model to use\n - `systemMessage`: The system message to provide context to the model\n - `temperature`: The sampling temperature to use\n\n### Key `modelName`\n\nPick from the following models:\n\n- {availableModelNames}\n\n### Key `systemMessage`\n\nThe system message is used to communicate instructions or provide context to the model at the beginning of a conversation. It is displayed in a different format compared to user messages, helping the model understand its role in the conversation. The system message typically guides the model's behavior, sets the tone, or specifies desired output from the model. By utilizing the system message effectively, users can steer the model towards generating more accurate and relevant responses.\n\nFor example:\n\n> You are an experienced AI engineer and helpful assistant.\n\n> You are a friendly and knowledgeable chatbot.\n\n### Key `temperature`\n\nThe sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.\n\nYou can pick a value between 0 and 2. For example:\n\n- `0.1`: Low temperature, extremely conservative and deterministic\n- `0.5`: Medium temperature, balanced between conservative and creative\n- `1.0`: High temperature, creative and bit random\n- `1.5`: Very high temperature, extremely creative and often chaotic and unpredictable\n- `2.0`: Maximum temperature, completely random and unpredictable, for some extreme creative use cases\n\n# The assistant\n\nTake this description of the persona:\n\n> {personaDescription}\n```\n\n`-> {modelRequirements}`\n"}],sourceFile:"./books/prepare-persona.book"},{title:"Prepare Title",pipelineUrl:"https://promptbook.studio/promptbook/prepare-title.book",formfactorName:"GENERIC",parameters:[{name:"book",description:"The book to prepare the title for",isInput:true,isOutput:false},{name:"title",description:"Best title for the book",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"make-title",title:"Make title",content:"Make best title for given text which describes the workflow:\n\n## Rules\n\n- Write just title, nothing else\n- Title should be concise and clear - Write maximum ideally 2 words, maximum 5 words\n- Title starts with emoticon\n- Title should not mention the input and output of the workflow but the main purpose of the workflow\n _For example, not \"✍ Convert Knowledge-piece to title\" but \"✍ Title\"_\n\n## The workflow\n\n> {book}",resultingParameterName:"title",expectations:{words:{min:1,max:8},lines:{min:1,max:1}},dependentParameterNames:["book"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Title\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-title.book`\n- INPUT PARAMETER `{book}` The book to prepare the title for\n- OUTPUT PARAMETER `{title}` Best title for the book\n\n## Make title\n\n- EXPECT MIN 1 Word\n- EXPECT MAX 8 Words\n- EXPECT EXACTLY 1 Line\n\n```markdown\nMake best title for given text which describes the workflow:\n\n## Rules\n\n- Write just title, nothing else\n- Title should be concise and clear - Write maximum ideally 2 words, maximum 5 words\n- Title starts with emoticon\n- Title should not mention the input and output of the workflow but the main purpose of the workflow\n _For example, not \"✍ Convert Knowledge-piece to title\" but \"✍ Title\"_\n\n## The workflow\n\n> {book}\n```\n\n`-> {title}`\n"}],sourceFile:"./books/prepare-title.book"}];
878
+ var PipelineCollection = [{title:"Prepare Knowledge from Markdown",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.book",formfactorName:"GENERIC",parameters:[{name:"knowledgeContent",description:"Markdown document content",isInput:true,isOutput:false},{name:"knowledgePieces",description:"The knowledge JSON object",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, extract the important knowledge from the document.\n\n# Rules\n\n- Make pieces of information concise, clear, and easy to understand\n- One piece of information should be approximately 1 paragraph\n- Divide the paragraphs by markdown horizontal lines ---\n- Omit irrelevant information\n- Group redundant information\n- Write just extracted information, nothing else\n\n# The document\n\nTake information from this document:\n\n> {knowledgeContent}",resultingParameterName:"knowledgePieces",dependentParameterNames:["knowledgeContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Knowledge from Markdown\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.book`\n- INPUT PARAMETER `{knowledgeContent}` Markdown document content\n- OUTPUT PARAMETER `{knowledgePieces}` The knowledge JSON object\n\n## Knowledge\n\n<!-- TODO: [🍆] -FORMAT JSON -->\n\n```markdown\nYou are experienced data researcher, extract the important knowledge from the document.\n\n# Rules\n\n- Make pieces of information concise, clear, and easy to understand\n- One piece of information should be approximately 1 paragraph\n- Divide the paragraphs by markdown horizontal lines ---\n- Omit irrelevant information\n- Group redundant information\n- Write just extracted information, nothing else\n\n# The document\n\nTake information from this document:\n\n> {knowledgeContent}\n```\n\n`-> {knowledgePieces}`\n"}],sourceFile:"./books/prepare-knowledge-from-markdown.book"},{title:"Prepare Keywords",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-keywords.book",formfactorName:"GENERIC",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"keywords",description:"Keywords separated by comma",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, detect the important keywords in the document.\n\n# Rules\n\n- Write just keywords separated by comma\n\n# The document\n\nTake information from this document:\n\n> {knowledgePieceContent}",resultingParameterName:"keywords",dependentParameterNames:["knowledgePieceContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Keywords\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-keywords.book`\n- INPUT PARAMETER `{knowledgePieceContent}` The content\n- OUTPUT PARAMETER `{keywords}` Keywords separated by comma\n\n## Knowledge\n\n<!-- TODO: [🍆] -FORMAT JSON -->\n\n```markdown\nYou are experienced data researcher, detect the important keywords in the document.\n\n# Rules\n\n- Write just keywords separated by comma\n\n# The document\n\nTake information from this document:\n\n> {knowledgePieceContent}\n```\n\n`-> {keywords}`\n"}],sourceFile:"./books/prepare-knowledge-keywords.book"},{title:"Prepare Knowledge-piece Title",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-title.book",formfactorName:"GENERIC",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"title",description:"The title of the document",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced content creator, write best title for the document.\n\n# Rules\n\n- Write just title, nothing else\n- Write maximum 5 words for the title\n\n# The document\n\n> {knowledgePieceContent}",resultingParameterName:"title",expectations:{words:{min:1,max:8}},dependentParameterNames:["knowledgePieceContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Knowledge-piece Title\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-title.book`\n- INPUT PARAMETER `{knowledgePieceContent}` The content\n- OUTPUT PARAMETER `{title}` The title of the document\n\n## Knowledge\n\n- EXPECT MIN 1 WORD\n- EXPECT MAX 8 WORDS\n\n```markdown\nYou are experienced content creator, write best title for the document.\n\n# Rules\n\n- Write just title, nothing else\n- Write maximum 5 words for the title\n\n# The document\n\n> {knowledgePieceContent}\n```\n\n`-> {title}`\n"}],sourceFile:"./books/prepare-knowledge-title.book"},{title:"Prepare Persona",pipelineUrl:"https://promptbook.studio/promptbook/prepare-persona.book",formfactorName:"GENERIC",parameters:[{name:"availableModels",description:"List of available model names together with their descriptions as JSON",isInput:true,isOutput:false},{name:"personaDescription",description:"Description of the persona",isInput:true,isOutput:false},{name:"modelsRequirements",description:"Specific requirements for the model",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"make-model-requirements",title:"Make modelRequirements",content:"You are an experienced AI engineer, you need to find the best models for virtual assistants:\n\n## Example\n\n```json\n[\n {\n \"modelName\": \"gpt-4o\",\n \"systemMessage\": \"You are experienced AI engineer and helpfull assistant.\",\n \"temperature\": 0.7\n },\n {\n \"modelName\": \"claude-3-5-sonnet\",\n \"systemMessage\": \"You are a friendly and knowledgeable chatbot.\",\n \"temperature\": 0.5\n }\n]\n```\n\n## Instructions\n\n- Your output format is JSON array\n- Sort best-fitting models first\n- Omit any models that are not suitable\n- Write just the JSON, no other text should be present\n- Array contain items with following keys:\n - `modelName`: The name of the model to use\n - `systemMessage`: The system message to provide context to the model\n - `temperature`: The sampling temperature to use\n\n### Key `modelName`\n\nHere are the available models:\n\n```json\n{availableModels}\n```\n\n### Key `systemMessage`\n\nThe system message is used to communicate instructions or provide context to the model at the beginning of a conversation. It is displayed in a different format compared to user messages, helping the model understand its role in the conversation. The system message typically guides the model's behavior, sets the tone, or specifies desired output from the model. By utilizing the system message effectively, users can steer the model towards generating more accurate and relevant responses.\n\nFor example:\n\n> You are an experienced AI engineer and helpful assistant.\n\n> You are a friendly and knowledgeable chatbot.\n\n### Key `temperature`\n\nThe sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.\n\nYou can pick a value between 0 and 2. For example:\n\n- `0.1`: Low temperature, extremely conservative and deterministic\n- `0.5`: Medium temperature, balanced between conservative and creative\n- `1.0`: High temperature, creative and bit random\n- `1.5`: Very high temperature, extremely creative and often chaotic and unpredictable\n- `2.0`: Maximum temperature, completely random and unpredictable, for some extreme creative use cases\n\n# The assistant\n\nTake this description of the persona:\n\n> {personaDescription}",resultingParameterName:"modelsRequirements",format:"JSON",dependentParameterNames:["availableModels","personaDescription"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Persona\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-persona.book`\n- INPUT PARAMETER `{availableModels}` List of available model names together with their descriptions as JSON\n- INPUT PARAMETER `{personaDescription}` Description of the persona\n- OUTPUT PARAMETER `{modelsRequirements}` Specific requirements for the model\n\n## Make modelRequirements\n\n- FORMAT JSON\n\n```markdown\nYou are an experienced AI engineer, you need to find the best models for virtual assistants:\n\n## Example\n\n\\`\\`\\`json\n[\n {\n \"modelName\": \"gpt-4o\",\n \"systemMessage\": \"You are experienced AI engineer and helpfull assistant.\",\n \"temperature\": 0.7\n },\n {\n \"modelName\": \"claude-3-5-sonnet\",\n \"systemMessage\": \"You are a friendly and knowledgeable chatbot.\",\n \"temperature\": 0.5\n }\n]\n\\`\\`\\`\n\n## Instructions\n\n- Your output format is JSON array\n- Sort best-fitting models first\n- Omit any models that are not suitable\n- Write just the JSON, no other text should be present\n- Array contain items with following keys:\n - `modelName`: The name of the model to use\n - `systemMessage`: The system message to provide context to the model\n - `temperature`: The sampling temperature to use\n\n### Key `modelName`\n\nHere are the available models:\n\n\\`\\`\\`json\n{availableModels}\n\\`\\`\\`\n\n### Key `systemMessage`\n\nThe system message is used to communicate instructions or provide context to the model at the beginning of a conversation. It is displayed in a different format compared to user messages, helping the model understand its role in the conversation. The system message typically guides the model's behavior, sets the tone, or specifies desired output from the model. By utilizing the system message effectively, users can steer the model towards generating more accurate and relevant responses.\n\nFor example:\n\n> You are an experienced AI engineer and helpful assistant.\n\n> You are a friendly and knowledgeable chatbot.\n\n### Key `temperature`\n\nThe sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.\n\nYou can pick a value between 0 and 2. For example:\n\n- `0.1`: Low temperature, extremely conservative and deterministic\n- `0.5`: Medium temperature, balanced between conservative and creative\n- `1.0`: High temperature, creative and bit random\n- `1.5`: Very high temperature, extremely creative and often chaotic and unpredictable\n- `2.0`: Maximum temperature, completely random and unpredictable, for some extreme creative use cases\n\n# The assistant\n\nTake this description of the persona:\n\n> {personaDescription}\n```\n\n`-> {modelsRequirements}`\n"}],sourceFile:"./books/prepare-persona.book"},{title:"Prepare Title",pipelineUrl:"https://promptbook.studio/promptbook/prepare-title.book",formfactorName:"GENERIC",parameters:[{name:"book",description:"The book to prepare the title for",isInput:true,isOutput:false},{name:"title",description:"Best title for the book",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"make-title",title:"Make title",content:"Make best title for given text which describes the workflow:\n\n## Rules\n\n- Write just title, nothing else\n- Title should be concise and clear - Write maximum ideally 2 words, maximum 5 words\n- Title starts with emoticon\n- Title should not mention the input and output of the workflow but the main purpose of the workflow\n _For example, not \"✍ Convert Knowledge-piece to title\" but \"✍ Title\"_\n\n## The workflow\n\n> {book}",resultingParameterName:"title",expectations:{words:{min:1,max:8},lines:{min:1,max:1}},dependentParameterNames:["book"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Title\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-title.book`\n- INPUT PARAMETER `{book}` The book to prepare the title for\n- OUTPUT PARAMETER `{title}` Best title for the book\n\n## Make title\n\n- EXPECT MIN 1 Word\n- EXPECT MAX 8 Words\n- EXPECT EXACTLY 1 Line\n\n```markdown\nMake best title for given text which describes the workflow:\n\n## Rules\n\n- Write just title, nothing else\n- Title should be concise and clear - Write maximum ideally 2 words, maximum 5 words\n- Title starts with emoticon\n- Title should not mention the input and output of the workflow but the main purpose of the workflow\n _For example, not \"✍ Convert Knowledge-piece to title\" but \"✍ Title\"_\n\n## The workflow\n\n> {book}\n```\n\n`-> {title}`\n"}],sourceFile:"./books/prepare-title.book"}];
864
879
 
865
880
  /**
866
881
  * Checks if value is valid email
@@ -945,7 +960,7 @@ function assertsError(whatWasThrown) {
945
960
  * Function isValidJsonString will tell you if the string is valid JSON or not
946
961
  *
947
962
  * @param value The string to check
948
- * @returns True if the string is a valid JSON string, false otherwise
963
+ * @returns `true` if the string is a valid JSON string, false otherwise
949
964
  *
950
965
  * @public exported from `@promptbook/utils`
951
966
  */
@@ -1356,8 +1371,12 @@ function checkSerializableAsJson(options) {
1356
1371
  */
1357
1372
 
1358
1373
  /**
1359
- * @@@
1374
+ * Creates a deep clone of the given object
1375
+ *
1376
+ * Note: This method only works for objects that are fully serializable to JSON and do not contain functions, Dates, or special types.
1360
1377
  *
1378
+ * @param objectValue The object to clone.
1379
+ * @returns A deep, writable clone of the input object.
1361
1380
  * @public exported from `@promptbook/utils`
1362
1381
  */
1363
1382
  function deepClone(objectValue) {
@@ -1439,13 +1458,13 @@ const ORDER_OF_PIPELINE_JSON = [
1439
1458
  */
1440
1459
  const REPLACING_NONCE = 'ptbkauk42kV2dzao34faw7FudQUHYPtW';
1441
1460
  /**
1442
- * @@@
1461
+ * Placeholder value indicating a parameter is missing its value.
1443
1462
  *
1444
1463
  * @private within the repository
1445
1464
  */
1446
1465
  const RESERVED_PARAMETER_MISSING_VALUE = 'MISSING-' + REPLACING_NONCE;
1447
1466
  /**
1448
- * @@@
1467
+ * Placeholder value indicating a parameter is restricted and cannot be used directly.
1449
1468
  *
1450
1469
  * @private within the repository
1451
1470
  */
@@ -1903,7 +1922,7 @@ function extractParameterNames(template) {
1903
1922
  */
1904
1923
  function unpreparePipeline(pipeline) {
1905
1924
  let { personas, knowledgeSources, tasks } = pipeline;
1906
- personas = personas.map((persona) => ({ ...persona, modelRequirements: undefined, preparationIds: undefined }));
1925
+ personas = personas.map((persona) => ({ ...persona, modelsRequirements: undefined, preparationIds: undefined }));
1907
1926
  knowledgeSources = knowledgeSources.map((knowledgeSource) => ({ ...knowledgeSource, preparationIds: undefined }));
1908
1927
  tasks = tasks.map((task) => {
1909
1928
  let { dependentParameterNames } = task;
@@ -1944,7 +1963,7 @@ class SimplePipelineCollection {
1944
1963
  /**
1945
1964
  * Constructs a pipeline collection from pipelines
1946
1965
  *
1947
- * @param pipelines @@@
1966
+ * @param pipelines Array of pipeline JSON objects to include in the collection
1948
1967
  *
1949
1968
  * Note: During the construction logic of all pipelines are validated
1950
1969
  * Note: It is not recommended to use this constructor directly, use `createCollectionFromJson` *(or other variant)* instead
@@ -2108,15 +2127,21 @@ class PipelineExecutionError extends Error {
2108
2127
  * @public exported from `@promptbook/core`
2109
2128
  */
2110
2129
  function isPipelinePrepared(pipeline) {
2111
- // Note: Ignoring `pipeline.preparations` @@@
2112
- // Note: Ignoring `pipeline.knowledgePieces` @@@
2130
+ // Note: Ignoring `pipeline.preparations`
2131
+ // Note: Ignoring `pipeline.knowledgePieces`
2113
2132
  if (pipeline.title === undefined || pipeline.title === '' || pipeline.title === DEFAULT_BOOK_TITLE) {
2133
+ // TODO: !!! Comment this out
2134
+ console.log('Pipeline is not prepared because title is undefined or empty', pipeline);
2114
2135
  return false;
2115
2136
  }
2116
- if (!pipeline.personas.every((persona) => persona.modelRequirements !== undefined)) {
2137
+ if (!pipeline.personas.every((persona) => persona.modelsRequirements !== undefined)) {
2138
+ // TODO: !!! Comment this out
2139
+ console.log('Pipeline is not prepared because personas are not prepared', pipeline.personas);
2117
2140
  return false;
2118
2141
  }
2119
2142
  if (!pipeline.knowledgeSources.every((knowledgeSource) => knowledgeSource.preparationIds !== undefined)) {
2143
+ // TODO: !!! Comment this out
2144
+ console.log('Pipeline is not prepared because knowledge sources are not prepared', pipeline.knowledgeSources);
2120
2145
  return false;
2121
2146
  }
2122
2147
  /*
@@ -2137,36 +2162,6 @@ function isPipelinePrepared(pipeline) {
2137
2162
  * - [♨] Are tasks prepared
2138
2163
  */
2139
2164
 
2140
- /**
2141
- * Recursively converts JSON strings to JSON objects
2142
-
2143
- * @public exported from `@promptbook/utils`
2144
- */
2145
- function jsonStringsToJsons(object) {
2146
- if (object === null) {
2147
- return object;
2148
- }
2149
- if (Array.isArray(object)) {
2150
- return object.map(jsonStringsToJsons);
2151
- }
2152
- if (typeof object !== 'object') {
2153
- return object;
2154
- }
2155
- const newObject = { ...object };
2156
- for (const [key, value] of Object.entries(object)) {
2157
- if (typeof value === 'string' && isValidJsonString(value)) {
2158
- newObject[key] = JSON.parse(value);
2159
- }
2160
- else {
2161
- newObject[key] = jsonStringsToJsons(value);
2162
- }
2163
- }
2164
- return newObject;
2165
- }
2166
- /**
2167
- * TODO: Type the return type correctly
2168
- */
2169
-
2170
2165
  /**
2171
2166
  * This error indicates problems parsing the format value
2172
2167
  *
@@ -2350,6 +2345,101 @@ const ALL_ERRORS = {
2350
2345
  * Note: [💞] Ignore a discrepancy between file name and entity name
2351
2346
  */
2352
2347
 
2348
+ /**
2349
+ * Serializes an error into a [🚉] JSON-serializable object
2350
+ *
2351
+ * @public exported from `@promptbook/utils`
2352
+ */
2353
+ function serializeError(error) {
2354
+ const { name, message, stack } = error;
2355
+ const { id } = error;
2356
+ if (!Object.keys(ALL_ERRORS).includes(name)) {
2357
+ console.error(spaceTrim((block) => `
2358
+
2359
+ Cannot serialize error with name "${name}"
2360
+
2361
+ Authors of Promptbook probably forgot to add this error into the list of errors:
2362
+ https://github.com/webgptorg/promptbook/blob/main/src/errors/0-index.ts
2363
+
2364
+
2365
+ ${block(stack || message)}
2366
+
2367
+ `));
2368
+ }
2369
+ return {
2370
+ name: name,
2371
+ message,
2372
+ stack,
2373
+ id, // Include id in the serialized object
2374
+ };
2375
+ }
2376
+
2377
+ /**
2378
+ * Converts a JavaScript Object Notation (JSON) string into an object.
2379
+ *
2380
+ * Note: This is wrapper around `JSON.parse()` with better error and type handling
2381
+ *
2382
+ * @public exported from `@promptbook/utils`
2383
+ */
2384
+ function jsonParse(value) {
2385
+ if (value === undefined) {
2386
+ throw new Error(`Can not parse JSON from undefined value.`);
2387
+ }
2388
+ else if (typeof value !== 'string') {
2389
+ console.error('Can not parse JSON from non-string value.', { text: value });
2390
+ throw new Error(spaceTrim(`
2391
+ Can not parse JSON from non-string value.
2392
+
2393
+ The value type: ${typeof value}
2394
+ See more in console.
2395
+ `));
2396
+ }
2397
+ try {
2398
+ return JSON.parse(value);
2399
+ }
2400
+ catch (error) {
2401
+ if (!(error instanceof Error)) {
2402
+ throw error;
2403
+ }
2404
+ throw new Error(spaceTrim((block) => `
2405
+ ${block(error.message)}
2406
+
2407
+ The JSON text:
2408
+ ${block(value)}
2409
+ `));
2410
+ }
2411
+ }
2412
+
2413
+ /**
2414
+ * Recursively converts JSON strings to JSON objects
2415
+
2416
+ * @public exported from `@promptbook/utils`
2417
+ */
2418
+ function jsonStringsToJsons(object) {
2419
+ if (object === null) {
2420
+ return object;
2421
+ }
2422
+ if (Array.isArray(object)) {
2423
+ return object.map(jsonStringsToJsons);
2424
+ }
2425
+ if (typeof object !== 'object') {
2426
+ return object;
2427
+ }
2428
+ const newObject = { ...object };
2429
+ for (const [key, value] of Object.entries(object)) {
2430
+ if (typeof value === 'string' && isValidJsonString(value)) {
2431
+ newObject[key] = jsonParse(value);
2432
+ }
2433
+ else {
2434
+ newObject[key] = jsonStringsToJsons(value);
2435
+ }
2436
+ }
2437
+ return newObject;
2438
+ }
2439
+ /**
2440
+ * TODO: Type the return type correctly
2441
+ */
2442
+
2353
2443
  /**
2354
2444
  * Deserializes the error object
2355
2445
  *
@@ -2515,64 +2605,6 @@ function createTask(options) {
2515
2605
  * TODO: [🐚] Split into more files and make `PrepareTask` & `RemoteTask` + split the function
2516
2606
  */
2517
2607
 
2518
- /**
2519
- * Serializes an error into a [🚉] JSON-serializable object
2520
- *
2521
- * @public exported from `@promptbook/utils`
2522
- */
2523
- function serializeError(error) {
2524
- const { name, message, stack } = error;
2525
- const { id } = error;
2526
- if (!Object.keys(ALL_ERRORS).includes(name)) {
2527
- console.error(spaceTrim((block) => `
2528
-
2529
- Cannot serialize error with name "${name}"
2530
-
2531
- Authors of Promptbook probably forgot to add this error into the list of errors:
2532
- https://github.com/webgptorg/promptbook/blob/main/src/errors/0-index.ts
2533
-
2534
-
2535
- ${block(stack || message)}
2536
-
2537
- `));
2538
- }
2539
- return {
2540
- name: name,
2541
- message,
2542
- stack,
2543
- id, // Include id in the serialized object
2544
- };
2545
- }
2546
-
2547
- /**
2548
- * Async version of Array.forEach
2549
- *
2550
- * @param array - Array to iterate over
2551
- * @param options - Options for the function
2552
- * @param callbackfunction - Function to call for each item
2553
- * @public exported from `@promptbook/utils`
2554
- * @deprecated [🪂] Use queues instead
2555
- */
2556
- async function forEachAsync(array, options, callbackfunction) {
2557
- const { maxParallelCount = Infinity } = options;
2558
- let index = 0;
2559
- let runningTasks = [];
2560
- const tasks = [];
2561
- for (const item of array) {
2562
- const currentIndex = index++;
2563
- const task = callbackfunction(item, currentIndex, array);
2564
- tasks.push(task);
2565
- runningTasks.push(task);
2566
- /* not await */ Promise.resolve(task).then(() => {
2567
- runningTasks = runningTasks.filter((t) => t !== task);
2568
- });
2569
- if (maxParallelCount < runningTasks.length) {
2570
- await Promise.race(runningTasks);
2571
- }
2572
- }
2573
- await Promise.all(tasks);
2574
- }
2575
-
2576
2608
  /**
2577
2609
  * Represents the uncertain value
2578
2610
  *
@@ -2616,7 +2648,7 @@ const ZERO_USAGE = $deepFreeze({
2616
2648
  *
2617
2649
  * @public exported from `@promptbook/core`
2618
2650
  */
2619
- $deepFreeze({
2651
+ const UNCERTAIN_USAGE = $deepFreeze({
2620
2652
  price: UNCERTAIN_ZERO_VALUE,
2621
2653
  input: {
2622
2654
  tokensCount: UNCERTAIN_ZERO_VALUE,
@@ -2641,6 +2673,35 @@ $deepFreeze({
2641
2673
  * Note: [💞] Ignore a discrepancy between file name and entity name
2642
2674
  */
2643
2675
 
2676
+ /**
2677
+ * Async version of Array.forEach
2678
+ *
2679
+ * @param array - Array to iterate over
2680
+ * @param options - Options for the function
2681
+ * @param callbackfunction - Function to call for each item
2682
+ * @public exported from `@promptbook/utils`
2683
+ * @deprecated [🪂] Use queues instead
2684
+ */
2685
+ async function forEachAsync(array, options, callbackfunction) {
2686
+ const { maxParallelCount = Infinity } = options;
2687
+ let index = 0;
2688
+ let runningTasks = [];
2689
+ const tasks = [];
2690
+ for (const item of array) {
2691
+ const currentIndex = index++;
2692
+ const task = callbackfunction(item, currentIndex, array);
2693
+ tasks.push(task);
2694
+ runningTasks.push(task);
2695
+ /* not await */ Promise.resolve(task).then(() => {
2696
+ runningTasks = runningTasks.filter((t) => t !== task);
2697
+ });
2698
+ if (maxParallelCount < runningTasks.length) {
2699
+ await Promise.race(runningTasks);
2700
+ }
2701
+ }
2702
+ await Promise.all(tasks);
2703
+ }
2704
+
2644
2705
  /**
2645
2706
  * Function `addUsage` will add multiple usages into one
2646
2707
  *
@@ -2987,27 +3048,48 @@ async function preparePersona(personaDescription, tools, options) {
2987
3048
  pipeline: await collection.getPipelineByUrl('https://promptbook.studio/promptbook/prepare-persona.book'),
2988
3049
  tools,
2989
3050
  });
2990
- // TODO: [🚐] Make arrayable LLMs -> single LLM DRY
2991
3051
  const _llms = arrayableToArray(tools.llm);
2992
3052
  const llmTools = _llms.length === 1 ? _llms[0] : joinLlmExecutionTools(..._llms);
2993
- const availableModels = await llmTools.listModels();
2994
- const availableModelNames = availableModels
3053
+ const availableModels = (await llmTools.listModels())
2995
3054
  .filter(({ modelVariant }) => modelVariant === 'CHAT')
2996
- .map(({ modelName }) => modelName)
2997
- .join(',');
2998
- const result = await preparePersonaExecutor({ availableModelNames, personaDescription }).asPromise();
3055
+ .map(({ modelName, modelDescription }) => ({
3056
+ modelName,
3057
+ modelDescription,
3058
+ // <- Note: `modelTitle` and `modelVariant` is not relevant for this task
3059
+ }));
3060
+ const result = await preparePersonaExecutor({
3061
+ availableModels /* <- Note: Passing as JSON */,
3062
+ personaDescription,
3063
+ }).asPromise();
2999
3064
  const { outputParameters } = result;
3000
- const { modelRequirements: modelRequirementsRaw } = outputParameters;
3001
- const modelRequirements = JSON.parse(modelRequirementsRaw);
3065
+ const { modelsRequirements: modelsRequirementsJson } = outputParameters;
3066
+ let modelsRequirementsUnchecked = jsonParse(modelsRequirementsJson);
3002
3067
  if (isVerbose) {
3003
- console.info(`PERSONA ${personaDescription}`, modelRequirements);
3068
+ console.info(`PERSONA ${personaDescription}`, modelsRequirementsUnchecked);
3004
3069
  }
3005
- const { modelName, systemMessage, temperature } = modelRequirements;
3006
- return {
3070
+ if (!Array.isArray(modelsRequirementsUnchecked)) {
3071
+ // <- TODO: Book should have syntax and system to enforce shape of JSON
3072
+ modelsRequirementsUnchecked = [modelsRequirementsUnchecked];
3073
+ /*
3074
+ throw new UnexpectedError(
3075
+ spaceTrim(
3076
+ (block) => `
3077
+ Invalid \`modelsRequirements\`:
3078
+
3079
+ \`\`\`json
3080
+ ${block(JSON.stringify(modelsRequirementsUnchecked, null, 4))}
3081
+ \`\`\`
3082
+ `,
3083
+ ),
3084
+ );
3085
+ */
3086
+ }
3087
+ const modelsRequirements = modelsRequirementsUnchecked.map((modelRequirements) => ({
3007
3088
  modelVariant: 'CHAT',
3008
- modelName,
3009
- systemMessage,
3010
- temperature,
3089
+ ...modelRequirements,
3090
+ }));
3091
+ return {
3092
+ modelsRequirements,
3011
3093
  };
3012
3094
  }
3013
3095
  /**
@@ -3018,7 +3100,8 @@ async function preparePersona(personaDescription, tools, options) {
3018
3100
  */
3019
3101
 
3020
3102
  /**
3021
- * @@@
3103
+ * Safely retrieves the global scope object (window in browser, global in Node.js)
3104
+ * regardless of the JavaScript environment in which the code is running
3022
3105
  *
3023
3106
  * Note: `$` is used to indicate that this function is not a pure function - it access global scope
3024
3107
  *
@@ -3029,10 +3112,10 @@ function $getGlobalScope() {
3029
3112
  }
3030
3113
 
3031
3114
  /**
3032
- * @@@
3115
+ * Normalizes a text string to SCREAMING_CASE (all uppercase with underscores).
3033
3116
  *
3034
- * @param text @@@
3035
- * @returns @@@
3117
+ * @param text The text string to be converted to SCREAMING_CASE format.
3118
+ * @returns The normalized text in SCREAMING_CASE format.
3036
3119
  * @example 'HELLO_WORLD'
3037
3120
  * @example 'I_LOVE_PROMPTBOOK'
3038
3121
  * @public exported from `@promptbook/utils`
@@ -3084,10 +3167,10 @@ function normalizeTo_SCREAMING_CASE(text) {
3084
3167
  */
3085
3168
 
3086
3169
  /**
3087
- * @@@
3170
+ * Normalizes a text string to snake_case format.
3088
3171
  *
3089
- * @param text @@@
3090
- * @returns @@@
3172
+ * @param text The text string to be converted to snake_case format.
3173
+ * @returns The normalized text in snake_case format.
3091
3174
  * @example 'hello_world'
3092
3175
  * @example 'i_love_promptbook'
3093
3176
  * @public exported from `@promptbook/utils`
@@ -3097,11 +3180,11 @@ function normalizeTo_snake_case(text) {
3097
3180
  }
3098
3181
 
3099
3182
  /**
3100
- * Register is @@@
3183
+ * Global registry for storing and managing registered entities of a given type.
3101
3184
  *
3102
3185
  * Note: `$` is used to indicate that this function is not a pure function - it accesses and adds variables in global scope.
3103
3186
  *
3104
- * @private internal utility, exported are only signleton instances of this class
3187
+ * @private internal utility, exported are only singleton instances of this class
3105
3188
  */
3106
3189
  class $Register {
3107
3190
  constructor(registerName) {
@@ -3145,10 +3228,10 @@ class $Register {
3145
3228
  }
3146
3229
 
3147
3230
  /**
3148
- * @@@
3231
+ * Global registry for storing metadata about all available scrapers and converters.
3149
3232
  *
3150
- * Note: `$` is used to indicate that this interacts with the global scope
3151
- * @singleton Only one instance of each register is created per build, but thare can be more @@@
3233
+ * Note: `$` is used to indicate that this interacts with the global scope.
3234
+ * @singleton Only one instance of each register is created per build, but there can be more in different contexts (e.g., tests).
3152
3235
  * @public exported from `@promptbook/core`
3153
3236
  */
3154
3237
  const $scrapersMetadataRegister = new $Register('scrapers_metadata');
@@ -3157,10 +3240,11 @@ const $scrapersMetadataRegister = new $Register('scrapers_metadata');
3157
3240
  */
3158
3241
 
3159
3242
  /**
3160
- * @@@
3243
+ * Registry for all available scrapers in the system.
3244
+ * Central point for registering and accessing different types of content scrapers.
3161
3245
  *
3162
3246
  * Note: `$` is used to indicate that this interacts with the global scope
3163
- * @singleton Only one instance of each register is created per build, but thare can be more @@@
3247
+ * @singleton Only one instance of each register is created per build, but there can be more than one in different build modules
3164
3248
  * @public exported from `@promptbook/core`
3165
3249
  */
3166
3250
  const $scrapersRegister = new $Register('scraper_constructors');
@@ -3338,7 +3422,9 @@ const promptbookFetch = async (urlOrRequest, init) => {
3338
3422
  */
3339
3423
 
3340
3424
  /**
3341
- * @@@
3425
+ * Factory function that creates a handler for processing knowledge sources.
3426
+ * Provides standardized processing of different types of knowledge sources
3427
+ * across various scraper implementations.
3342
3428
  *
3343
3429
  * @public exported from `@promptbook/core`
3344
3430
  */
@@ -3445,7 +3531,7 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3445
3531
  > },
3446
3532
  */
3447
3533
  async asJson() {
3448
- return JSON.parse(await tools.fs.readFile(filename, 'utf-8'));
3534
+ return jsonParse(await tools.fs.readFile(filename, 'utf-8'));
3449
3535
  },
3450
3536
  async asText() {
3451
3537
  return await tools.fs.readFile(filename, 'utf-8');
@@ -3579,9 +3665,12 @@ TODO: [🧊] This is how it can look in future
3579
3665
  */
3580
3666
 
3581
3667
  /**
3582
- * @@@
3668
+ * Prepares tasks by adding knowledge to the prompt and ensuring all necessary parameters are included.
3583
3669
  *
3584
- * @public exported from `@promptbook/core`
3670
+ * @param tasks Sequence of tasks that are chained together to form a pipeline
3671
+ * @returns A promise that resolves to the prepared tasks.
3672
+ *
3673
+ * @private internal utility of `preparePipeline`
3585
3674
  */
3586
3675
  async function prepareTasks(pipeline, tools, options) {
3587
3676
  const { maxParallelCount = DEFAULT_MAX_PARALLEL_COUNT } = options;
@@ -3703,14 +3792,14 @@ async function preparePipeline(pipeline, tools, options) {
3703
3792
  // TODO: [🖌][🧠] Implement some `mapAsync` function
3704
3793
  const preparedPersonas = new Array(personas.length);
3705
3794
  await forEachAsync(personas, { maxParallelCount /* <- TODO: [🪂] When there are subtasks, this maximul limit can be broken */ }, async (persona, index) => {
3706
- const modelRequirements = await preparePersona(persona.description, { ...tools, llm: llmToolsWithUsage }, {
3795
+ const { modelsRequirements } = await preparePersona(persona.description, { ...tools, llm: llmToolsWithUsage }, {
3707
3796
  rootDirname,
3708
3797
  maxParallelCount /* <- TODO: [🪂] */,
3709
3798
  isVerbose,
3710
3799
  });
3711
3800
  const preparedPersona = {
3712
3801
  ...persona,
3713
- modelRequirements,
3802
+ modelsRequirements,
3714
3803
  preparationIds: [/* TODO: [🧊] -> */ currentPreparation.id],
3715
3804
  // <- TODO: [🍙] Make some standard order of json properties
3716
3805
  };
@@ -4018,7 +4107,7 @@ function union(...sets) {
4018
4107
  }
4019
4108
 
4020
4109
  /**
4021
- * @@@
4110
+ * Contains configuration options for parsing and generating CSV files, such as delimiters and quoting rules.
4022
4111
  *
4023
4112
  * @public exported from `@promptbook/core`
4024
4113
  */
@@ -4027,11 +4116,29 @@ const MANDATORY_CSV_SETTINGS = Object.freeze({
4027
4116
  // encoding: 'utf-8',
4028
4117
  });
4029
4118
 
4119
+ /**
4120
+ * Converts a CSV string into an object
4121
+ *
4122
+ * Note: This is wrapper around `papaparse.parse()` with better autohealing
4123
+ *
4124
+ * @private - for now until `@promptbook/csv` is released
4125
+ */
4126
+ function csvParse(value /* <- TODO: string_csv */, settings, schema /* <- TODO: Make CSV Schemas */) {
4127
+ settings = { ...settings, ...MANDATORY_CSV_SETTINGS };
4128
+ // Note: Autoheal invalid '\n' characters
4129
+ if (settings.newline && !settings.newline.includes('\r') && value.includes('\r')) {
4130
+ console.warn('CSV string contains carriage return characters, but in the CSV settings the `newline` setting does not include them. Autohealing the CSV string.');
4131
+ value = value.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
4132
+ }
4133
+ const csv = parse(value, settings);
4134
+ return csv;
4135
+ }
4136
+
4030
4137
  /**
4031
4138
  * Function to check if a string is valid CSV
4032
4139
  *
4033
4140
  * @param value The string to check
4034
- * @returns True if the string is a valid CSV string, false otherwise
4141
+ * @returns `true` if the string is a valid CSV string, false otherwise
4035
4142
  *
4036
4143
  * @public exported from `@promptbook/utils`
4037
4144
  */
@@ -4055,7 +4162,7 @@ function isValidCsvString(value) {
4055
4162
  * @public exported from `@promptbook/core`
4056
4163
  * <- TODO: [🏢] Export from package `@promptbook/csv`
4057
4164
  */
4058
- const CsvFormatDefinition = {
4165
+ const CsvFormatParser = {
4059
4166
  formatName: 'CSV',
4060
4167
  aliases: ['SPREADSHEET', 'TABLE'],
4061
4168
  isValid(value, settings, schema) {
@@ -4067,12 +4174,12 @@ const CsvFormatDefinition = {
4067
4174
  heal(value, settings, schema) {
4068
4175
  throw new Error('Not implemented');
4069
4176
  },
4070
- subvalueDefinitions: [
4177
+ subvalueParsers: [
4071
4178
  {
4072
4179
  subvalueName: 'ROW',
4073
- async mapValues(value, outputParameterName, settings, mapCallback) {
4074
- // TODO: [👨🏾‍🤝‍👨🏼] DRY csv parsing
4075
- const csv = parse(value, { ...settings, ...MANDATORY_CSV_SETTINGS });
4180
+ async mapValues(options) {
4181
+ const { value, outputParameterName, settings, mapCallback, onProgress } = options;
4182
+ const csv = csvParse(value, settings);
4076
4183
  if (csv.errors.length !== 0) {
4077
4184
  throw new CsvFormatError(spaceTrim((block) => `
4078
4185
  CSV parsing error
@@ -4087,23 +4194,37 @@ const CsvFormatDefinition = {
4087
4194
  ${block(value)}
4088
4195
  `));
4089
4196
  }
4090
- const mappedData = await Promise.all(csv.data.map(async (row, index) => {
4197
+ const mappedData = [];
4198
+ const length = csv.data.length;
4199
+ for (let index = 0; index < length; index++) {
4200
+ const row = csv.data[index];
4091
4201
  if (row[outputParameterName]) {
4092
4202
  throw new CsvFormatError(`Can not overwrite existing column "${outputParameterName}" in CSV row`);
4093
4203
  }
4094
- return {
4204
+ const mappedRow = {
4095
4205
  ...row,
4096
- [outputParameterName]: await mapCallback(row, index),
4206
+ [outputParameterName]: await mapCallback(row, index, length),
4097
4207
  };
4098
- }));
4208
+ mappedData.push(mappedRow);
4209
+ if (onProgress) {
4210
+ // Note: Report the CSV with all rows mapped so far
4211
+ /*
4212
+ // TODO: [🛕] Report progress with all the rows including the pending ones
4213
+ const progressData = mappedData.map((row, i) =>
4214
+ i > index ? { ...row, [outputParameterName]: PENDING_VALUE_PLACEHOLDER } : row,
4215
+ );
4216
+ */
4217
+ await onProgress(unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS }));
4218
+ }
4219
+ }
4099
4220
  return unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS });
4100
4221
  },
4101
4222
  },
4102
4223
  {
4103
4224
  subvalueName: 'CELL',
4104
- async mapValues(value, outputParameterName, settings, mapCallback) {
4105
- // TODO: [👨🏾‍🤝‍👨🏼] DRY csv parsing
4106
- const csv = parse(value, { ...settings, ...MANDATORY_CSV_SETTINGS });
4225
+ async mapValues(options) {
4226
+ const { value, settings, mapCallback, onProgress } = options;
4227
+ const csv = csvParse(value, settings);
4107
4228
  if (csv.errors.length !== 0) {
4108
4229
  throw new CsvFormatError(spaceTrim((block) => `
4109
4230
  CSV parsing error
@@ -4119,9 +4240,9 @@ const CsvFormatDefinition = {
4119
4240
  `));
4120
4241
  }
4121
4242
  const mappedData = await Promise.all(csv.data.map(async (row, rowIndex) => {
4122
- return /* not await */ Promise.all(Object.entries(row).map(async ([key, value], columnIndex) => {
4243
+ return /* not await */ Promise.all(Object.entries(row).map(async ([key, value], columnIndex, array) => {
4123
4244
  const index = rowIndex * Object.keys(row).length + columnIndex;
4124
- return /* not await */ mapCallback({ [key]: value }, index);
4245
+ return /* not await */ mapCallback({ [key]: value }, index, array.length);
4125
4246
  }));
4126
4247
  }));
4127
4248
  return unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS });
@@ -4130,10 +4251,10 @@ const CsvFormatDefinition = {
4130
4251
  ],
4131
4252
  };
4132
4253
  /**
4133
- * TODO: [🍓] In `CsvFormatDefinition` implement simple `isValid`
4134
- * TODO: [🍓] In `CsvFormatDefinition` implement partial `canBeValid`
4135
- * TODO: [🍓] In `CsvFormatDefinition` implement `heal
4136
- * TODO: [🍓] In `CsvFormatDefinition` implement `subvalueDefinitions`
4254
+ * TODO: [🍓] In `CsvFormatParser` implement simple `isValid`
4255
+ * TODO: [🍓] In `CsvFormatParser` implement partial `canBeValid`
4256
+ * TODO: [🍓] In `CsvFormatParser` implement `heal
4257
+ * TODO: [🍓] In `CsvFormatParser` implement `subvalueParsers`
4137
4258
  * TODO: [🏢] Allow to expect something inside CSV objects and other formats
4138
4259
  */
4139
4260
 
@@ -4142,7 +4263,7 @@ const CsvFormatDefinition = {
4142
4263
  *
4143
4264
  * @private still in development [🏢]
4144
4265
  */
4145
- const JsonFormatDefinition = {
4266
+ const JsonFormatParser = {
4146
4267
  formatName: 'JSON',
4147
4268
  mimeType: 'application/json',
4148
4269
  isValid(value, settings, schema) {
@@ -4154,28 +4275,28 @@ const JsonFormatDefinition = {
4154
4275
  heal(value, settings, schema) {
4155
4276
  throw new Error('Not implemented');
4156
4277
  },
4157
- subvalueDefinitions: [],
4278
+ subvalueParsers: [],
4158
4279
  };
4159
4280
  /**
4160
4281
  * TODO: [🧠] Maybe propper instance of object
4161
4282
  * TODO: [0] Make string_serialized_json
4162
4283
  * TODO: [1] Make type for JSON Settings and Schema
4163
4284
  * TODO: [🧠] What to use for validating JSONs - JSON Schema, ZoD, typescript types/interfaces,...?
4164
- * TODO: [🍓] In `JsonFormatDefinition` implement simple `isValid`
4165
- * TODO: [🍓] In `JsonFormatDefinition` implement partial `canBeValid`
4166
- * TODO: [🍓] In `JsonFormatDefinition` implement `heal
4167
- * TODO: [🍓] In `JsonFormatDefinition` implement `subvalueDefinitions`
4285
+ * TODO: [🍓] In `JsonFormatParser` implement simple `isValid`
4286
+ * TODO: [🍓] In `JsonFormatParser` implement partial `canBeValid`
4287
+ * TODO: [🍓] In `JsonFormatParser` implement `heal
4288
+ * TODO: [🍓] In `JsonFormatParser` implement `subvalueParsers`
4168
4289
  * TODO: [🏢] Allow to expect something inside JSON objects and other formats
4169
4290
  */
4170
4291
 
4171
4292
  /**
4172
4293
  * Definition for any text - this will be always valid
4173
4294
  *
4174
- * Note: This is not useful for validation, but for splitting and mapping with `subvalueDefinitions`
4295
+ * Note: This is not useful for validation, but for splitting and mapping with `subvalueParsers`
4175
4296
  *
4176
4297
  * @public exported from `@promptbook/core`
4177
4298
  */
4178
- const TextFormatDefinition = {
4299
+ const TextFormatParser = {
4179
4300
  formatName: 'TEXT',
4180
4301
  isValid(value) {
4181
4302
  return typeof value === 'string';
@@ -4184,19 +4305,20 @@ const TextFormatDefinition = {
4184
4305
  return typeof partialValue === 'string';
4185
4306
  },
4186
4307
  heal() {
4187
- throw new UnexpectedError('It does not make sense to call `TextFormatDefinition.heal`');
4308
+ throw new UnexpectedError('It does not make sense to call `TextFormatParser.heal`');
4188
4309
  },
4189
- subvalueDefinitions: [
4310
+ subvalueParsers: [
4190
4311
  {
4191
4312
  subvalueName: 'LINE',
4192
- async mapValues(value, outputParameterName, settings, mapCallback) {
4313
+ async mapValues(options) {
4314
+ const { value, mapCallback, onProgress } = options;
4193
4315
  const lines = value.split('\n');
4194
- const mappedLines = await Promise.all(lines.map((lineContent, lineNumber) =>
4316
+ const mappedLines = await Promise.all(lines.map((lineContent, lineNumber, array) =>
4195
4317
  // TODO: [🧠] Maybe option to skip empty line
4196
4318
  /* not await */ mapCallback({
4197
4319
  lineContent,
4198
4320
  // TODO: [🧠] Maybe also put here `lineNumber`
4199
- }, lineNumber)));
4321
+ }, lineNumber, array.length)));
4200
4322
  return mappedLines.join('\n');
4201
4323
  },
4202
4324
  },
@@ -4206,10 +4328,10 @@ const TextFormatDefinition = {
4206
4328
  /**
4207
4329
  * TODO: [1] Make type for XML Text and Schema
4208
4330
  * TODO: [🧠][🤠] Here should be all words, characters, lines, paragraphs, pages available as subvalues
4209
- * TODO: [🍓] In `TextFormatDefinition` implement simple `isValid`
4210
- * TODO: [🍓] In `TextFormatDefinition` implement partial `canBeValid`
4211
- * TODO: [🍓] In `TextFormatDefinition` implement `heal
4212
- * TODO: [🍓] In `TextFormatDefinition` implement `subvalueDefinitions`
4331
+ * TODO: [🍓] In `TextFormatParser` implement simple `isValid`
4332
+ * TODO: [🍓] In `TextFormatParser` implement partial `canBeValid`
4333
+ * TODO: [🍓] In `TextFormatParser` implement `heal
4334
+ * TODO: [🍓] In `TextFormatParser` implement `subvalueParsers`
4213
4335
  * TODO: [🏢] Allow to expect something inside each item of list and other formats
4214
4336
  */
4215
4337
 
@@ -4217,7 +4339,7 @@ const TextFormatDefinition = {
4217
4339
  * Function to check if a string is valid XML
4218
4340
  *
4219
4341
  * @param value
4220
- * @returns True if the string is a valid XML string, false otherwise
4342
+ * @returns `true` if the string is a valid XML string, false otherwise
4221
4343
  *
4222
4344
  * @public exported from `@promptbook/utils`
4223
4345
  */
@@ -4242,7 +4364,7 @@ function isValidXmlString(value) {
4242
4364
  *
4243
4365
  * @private still in development [🏢]
4244
4366
  */
4245
- const XmlFormatDefinition = {
4367
+ const XmlFormatParser = {
4246
4368
  formatName: 'XML',
4247
4369
  mimeType: 'application/xml',
4248
4370
  isValid(value, settings, schema) {
@@ -4254,17 +4376,17 @@ const XmlFormatDefinition = {
4254
4376
  heal(value, settings, schema) {
4255
4377
  throw new Error('Not implemented');
4256
4378
  },
4257
- subvalueDefinitions: [],
4379
+ subvalueParsers: [],
4258
4380
  };
4259
4381
  /**
4260
4382
  * TODO: [🧠] Maybe propper instance of object
4261
4383
  * TODO: [0] Make string_serialized_xml
4262
4384
  * TODO: [1] Make type for XML Settings and Schema
4263
4385
  * TODO: [🧠] What to use for validating XMLs - XSD,...
4264
- * TODO: [🍓] In `XmlFormatDefinition` implement simple `isValid`
4265
- * TODO: [🍓] In `XmlFormatDefinition` implement partial `canBeValid`
4266
- * TODO: [🍓] In `XmlFormatDefinition` implement `heal
4267
- * TODO: [🍓] In `XmlFormatDefinition` implement `subvalueDefinitions`
4386
+ * TODO: [🍓] In `XmlFormatParser` implement simple `isValid`
4387
+ * TODO: [🍓] In `XmlFormatParser` implement partial `canBeValid`
4388
+ * TODO: [🍓] In `XmlFormatParser` implement `heal
4389
+ * TODO: [🍓] In `XmlFormatParser` implement `subvalueParsers`
4268
4390
  * TODO: [🏢] Allow to expect something inside XML and other formats
4269
4391
  */
4270
4392
 
@@ -4273,24 +4395,19 @@ const XmlFormatDefinition = {
4273
4395
  *
4274
4396
  * @private internal index of `...` <- TODO [🏢]
4275
4397
  */
4276
- const FORMAT_DEFINITIONS = [
4277
- JsonFormatDefinition,
4278
- XmlFormatDefinition,
4279
- TextFormatDefinition,
4280
- CsvFormatDefinition,
4281
- ];
4398
+ const FORMAT_DEFINITIONS = [JsonFormatParser, XmlFormatParser, TextFormatParser, CsvFormatParser];
4282
4399
  /**
4283
4400
  * Note: [💞] Ignore a discrepancy between file name and entity name
4284
4401
  */
4285
4402
 
4286
4403
  /**
4287
- * Maps available parameters to expected parameters
4404
+ * Maps available parameters to expected parameters for a pipeline task.
4288
4405
  *
4289
4406
  * The strategy is:
4290
- * 1) @@@
4291
- * 2) @@@
4407
+ * 1) First, match parameters by name where both available and expected.
4408
+ * 2) Then, if there are unmatched expected and available parameters, map them by order.
4292
4409
  *
4293
- * @throws {PipelineExecutionError} @@@
4410
+ * @throws {PipelineExecutionError} If the number of unmatched expected and available parameters does not match, or mapping is ambiguous.
4294
4411
  * @private within the repository used in `createPipelineExecutor`
4295
4412
  */
4296
4413
  function mapAvailableToExpectedParameters(options) {
@@ -4313,7 +4430,7 @@ function mapAvailableToExpectedParameters(options) {
4313
4430
  else if (!availableParametersNames.has(parameterName) && expectedParameterNames.has(parameterName)) ;
4314
4431
  }
4315
4432
  if (expectedParameterNames.size === 0) {
4316
- // Note: [👨‍👨‍👧] Now we can freeze `mappedParameters` to prevent @@@
4433
+ // Note: [👨‍👨‍👧] Now we can freeze `mappedParameters` to prevent accidental modifications after mapping
4317
4434
  Object.freeze(mappedParameters);
4318
4435
  return mappedParameters;
4319
4436
  }
@@ -4344,7 +4461,7 @@ function mapAvailableToExpectedParameters(options) {
4344
4461
  for (let i = 0; i < expectedParameterNames.size; i++) {
4345
4462
  mappedParameters[expectedParameterNamesArray[i]] = availableParameters[availableParametersNamesArray[i]];
4346
4463
  }
4347
- // Note: [👨‍👨‍👧] Now we can freeze `mappedParameters` to prevent @@@
4464
+ // Note: [👨‍👨‍👧] Now we can freeze `mappedParameters` to prevent accidental modifications after mapping
4348
4465
  Object.freeze(mappedParameters);
4349
4466
  return mappedParameters;
4350
4467
  }
@@ -4448,7 +4565,7 @@ function extractJsonBlock(markdown) {
4448
4565
  }
4449
4566
  /**
4450
4567
  * TODO: Add some auto-healing logic + extract YAML, JSON5, TOML, etc.
4451
- * TODO: [🏢] Make this logic part of `JsonFormatDefinition` or `isValidJsonString`
4568
+ * TODO: [🏢] Make this logic part of `JsonFormatParser` or `isValidJsonString`
4452
4569
  */
4453
4570
 
4454
4571
  /**
@@ -4491,10 +4608,12 @@ function templateParameters(template, parameters) {
4491
4608
  throw new PipelineExecutionError('Parameter is already opened or not closed');
4492
4609
  }
4493
4610
  if (parameters[parameterName] === undefined) {
4611
+ console.log('!!! templateParameters 1', { parameterName, template, parameters });
4494
4612
  throw new PipelineExecutionError(`Parameter \`{${parameterName}}\` is not defined`);
4495
4613
  }
4496
4614
  let parameterValue = parameters[parameterName];
4497
4615
  if (parameterValue === undefined) {
4616
+ console.log('!!! templateParameters 2', { parameterName, template, parameters });
4498
4617
  throw new PipelineExecutionError(`Parameter \`{${parameterName}}\` is not defined`);
4499
4618
  }
4500
4619
  parameterValue = valueToString(parameterValue);
@@ -4650,7 +4769,7 @@ const CountUtils = {
4650
4769
  PAGES: countPages,
4651
4770
  };
4652
4771
  /**
4653
- * TODO: [🧠][🤠] This should be probbably as part of `TextFormatDefinition`
4772
+ * TODO: [🧠][🤠] This should be probbably as part of `TextFormatParser`
4654
4773
  * Note: [💞] Ignore a discrepancy between file name and entity name
4655
4774
  */
4656
4775
 
@@ -4678,13 +4797,17 @@ function checkExpectations(expectations, value) {
4678
4797
  }
4679
4798
  /**
4680
4799
  * TODO: [💝] Unite object for expecting amount and format
4681
- * TODO: [🧠][🤠] This should be part of `TextFormatDefinition`
4800
+ * TODO: [🧠][🤠] This should be part of `TextFormatParser`
4682
4801
  * Note: [💝] and [🤠] are interconnected together
4683
4802
  */
4684
4803
 
4685
4804
  /**
4686
- * @@@
4805
+ * Executes a pipeline task with multiple attempts, including joker and retry logic. Handles different task types
4806
+ * (prompt, script, dialog, etc.), applies postprocessing, checks expectations, and updates the execution report.
4807
+ * Throws errors if execution fails after all attempts.
4687
4808
  *
4809
+ * @param options - The options for execution, including task, parameters, pipeline, and configuration.
4810
+ * @returns The result string of the executed task.
4688
4811
  * @private internal utility of `createPipelineExecutor`
4689
4812
  */
4690
4813
  async function executeAttempts(options) {
@@ -4906,7 +5029,7 @@ async function executeAttempts(options) {
4906
5029
  if (task.format) {
4907
5030
  if (task.format === 'JSON') {
4908
5031
  if (!isValidJsonString($ongoingTaskResult.$resultString || '')) {
4909
- // TODO: [🏢] Do more universally via `FormatDefinition`
5032
+ // TODO: [🏢] Do more universally via `FormatParser`
4910
5033
  try {
4911
5034
  $ongoingTaskResult.$resultString = extractJsonBlock($ongoingTaskResult.$resultString || '');
4912
5035
  }
@@ -5008,12 +5131,16 @@ async function executeAttempts(options) {
5008
5131
  */
5009
5132
 
5010
5133
  /**
5011
- * @@@
5134
+ * Executes a pipeline task that requires mapping or iterating over subvalues of a parameter (such as rows in a CSV).
5135
+ * Handles format and subformat resolution, error handling, and progress reporting.
5136
+ *
5137
+ * @param options - Options for execution, including task details and progress callback.
5138
+ * @returns The result of the subvalue mapping or execution attempts.
5012
5139
  *
5013
5140
  * @private internal utility of `createPipelineExecutor`
5014
5141
  */
5015
5142
  async function executeFormatSubvalues(options) {
5016
- const { task, jokerParameterNames, parameters, priority, csvSettings, pipelineIdentification } = options;
5143
+ const { task, jokerParameterNames, parameters, priority, csvSettings, onProgress, pipelineIdentification } = options;
5017
5144
  if (task.foreach === undefined) {
5018
5145
  return /* not await */ executeAttempts(options);
5019
5146
  }
@@ -5044,16 +5171,16 @@ async function executeFormatSubvalues(options) {
5044
5171
  ${block(pipelineIdentification)}
5045
5172
  `));
5046
5173
  }
5047
- const subvalueDefinition = formatDefinition.subvalueDefinitions.find((subvalueDefinition) => [subvalueDefinition.subvalueName, ...(subvalueDefinition.aliases || [])].includes(task.foreach.subformatName));
5048
- if (subvalueDefinition === undefined) {
5174
+ const subvalueParser = formatDefinition.subvalueParsers.find((subvalueParser) => [subvalueParser.subvalueName, ...(subvalueParser.aliases || [])].includes(task.foreach.subformatName));
5175
+ if (subvalueParser === undefined) {
5049
5176
  throw new UnexpectedError(
5050
5177
  // <- TODO: [🧠][🧐] Should be formats fixed per promptbook version or behave as plugins (=> change UnexpectedError)
5051
5178
  spaceTrim((block) => `
5052
5179
  Unsupported subformat name "${task.foreach.subformatName}" for format "${task.foreach.formatName}"
5053
5180
 
5054
5181
  Available subformat names for format "${formatDefinition.formatName}":
5055
- ${block(formatDefinition.subvalueDefinitions
5056
- .map((subvalueDefinition) => subvalueDefinition.subvalueName)
5182
+ ${block(formatDefinition.subvalueParsers
5183
+ .map((subvalueParser) => subvalueParser.subvalueName)
5057
5184
  .map((subvalueName) => `- ${subvalueName}`)
5058
5185
  .join('\n'))}
5059
5186
 
@@ -5067,53 +5194,83 @@ async function executeFormatSubvalues(options) {
5067
5194
  formatSettings = csvSettings;
5068
5195
  // <- TODO: [🤹‍♂️] More universal, make simmilar pattern for other formats for example \n vs \r\n in text
5069
5196
  }
5070
- const resultString = await subvalueDefinition.mapValues(parameterValue, task.foreach.outputSubparameterName, formatSettings, async (subparameters, index) => {
5071
- let mappedParameters;
5072
- // TODO: [🤹‍♂️][🪂] Limit to N concurrent executions
5073
- // TODO: When done [🐚] Report progress also for each subvalue here
5074
- try {
5075
- mappedParameters = mapAvailableToExpectedParameters({
5076
- expectedParameters: Object.fromEntries(task.foreach.inputSubparameterNames.map((subparameterName) => [subparameterName, null])),
5077
- availableParameters: subparameters,
5078
- });
5079
- }
5080
- catch (error) {
5081
- if (!(error instanceof PipelineExecutionError)) {
5082
- throw error;
5197
+ const resultString = await subvalueParser.mapValues({
5198
+ value: parameterValue,
5199
+ outputParameterName: task.foreach.outputSubparameterName,
5200
+ settings: formatSettings,
5201
+ onProgress(partialResultString) {
5202
+ return onProgress(Object.freeze({
5203
+ [task.resultingParameterName]: partialResultString,
5204
+ }));
5205
+ },
5206
+ async mapCallback(subparameters, index, length) {
5207
+ let mappedParameters;
5208
+ try {
5209
+ mappedParameters = mapAvailableToExpectedParameters({
5210
+ expectedParameters: Object.fromEntries(task.foreach.inputSubparameterNames.map((subparameterName) => [subparameterName, null])),
5211
+ availableParameters: subparameters,
5212
+ });
5083
5213
  }
5084
- throw new PipelineExecutionError(spaceTrim((block) => `
5085
- ${error.message}
5214
+ catch (error) {
5215
+ if (!(error instanceof PipelineExecutionError)) {
5216
+ throw error;
5217
+ }
5218
+ const highLevelError = new PipelineExecutionError(spaceTrim((block) => `
5219
+ ${error.message}
5086
5220
 
5087
- This is error in FOREACH command
5088
- You have probbably passed wrong data to pipeline or wrong data was generated which are processed by FOREACH command
5221
+ This is error in FOREACH command when mapping ${formatDefinition.formatName} ${subvalueParser.subvalueName} data (${index + 1}/${length})
5222
+ You have probbably passed wrong data to pipeline or wrong data was generated which are processed by FOREACH command
5089
5223
 
5090
- ${block(pipelineIdentification)}
5091
- Subparameter index: ${index}
5092
- `));
5093
- }
5094
- const allSubparameters = {
5095
- ...parameters,
5096
- ...mappedParameters,
5097
- };
5098
- // Note: [👨‍👨‍👧] Now we can freeze `subparameters` because we are sure that all and only used parameters are defined and are not going to be changed
5099
- Object.freeze(allSubparameters);
5100
- const subresultString = await executeAttempts({
5101
- ...options,
5102
- priority: priority + index,
5103
- parameters: allSubparameters,
5104
- pipelineIdentification: spaceTrim((block) => `
5105
- ${block(pipelineIdentification)}
5106
- Subparameter index: ${index}
5107
- `),
5108
- });
5109
- return subresultString;
5224
+ ${block(pipelineIdentification)}
5225
+ `));
5226
+ if (length > BIG_DATASET_TRESHOLD) {
5227
+ console.error(highLevelError);
5228
+ return FAILED_VALUE_PLACEHOLDER;
5229
+ }
5230
+ throw highLevelError;
5231
+ }
5232
+ const allSubparameters = {
5233
+ ...parameters,
5234
+ ...mappedParameters,
5235
+ };
5236
+ Object.freeze(allSubparameters);
5237
+ try {
5238
+ const subresultString = await executeAttempts({
5239
+ ...options,
5240
+ priority: priority + index,
5241
+ parameters: allSubparameters,
5242
+ pipelineIdentification: spaceTrim((block) => `
5243
+ ${block(pipelineIdentification)}
5244
+ Subparameter index: ${index}
5245
+ `),
5246
+ });
5247
+ return subresultString;
5248
+ }
5249
+ catch (error) {
5250
+ if (length > BIG_DATASET_TRESHOLD) {
5251
+ console.error(spaceTrim((block) => `
5252
+ ${error.message}
5253
+
5254
+ This is error in FOREACH command when processing ${formatDefinition.formatName} ${subvalueParser.subvalueName} data (${index + 1}/${length})
5255
+
5256
+ ${block(pipelineIdentification)}
5257
+ `));
5258
+ return FAILED_VALUE_PLACEHOLDER;
5259
+ }
5260
+ throw error;
5261
+ }
5262
+ },
5110
5263
  });
5111
5264
  return resultString;
5112
5265
  }
5113
5266
 
5114
5267
  /**
5115
- * @@@
5268
+ * Returns the context for a given task, typically used to provide additional information or variables
5269
+ * required for the execution of the task within a pipeline. The context is returned as a string value
5270
+ * that may include markdown formatting.
5116
5271
  *
5272
+ * @param task - The task for which the context is being generated. This should be a deeply immutable TaskJson object.
5273
+ * @returns The context as a string, formatted as markdown and parameter value.
5117
5274
  * @private internal utility of `createPipelineExecutor`
5118
5275
  */
5119
5276
  async function getContextForTask(task) {
@@ -5121,7 +5278,7 @@ async function getContextForTask(task) {
5121
5278
  }
5122
5279
 
5123
5280
  /**
5124
- * @@@
5281
+ * Retrieves example values or templates for a given task, used to guide or validate pipeline execution.
5125
5282
  *
5126
5283
  * @private internal utility of `createPipelineExecutor`
5127
5284
  */
@@ -5130,25 +5287,127 @@ async function getExamplesForTask(task) {
5130
5287
  }
5131
5288
 
5132
5289
  /**
5133
- * @@@
5290
+ * Computes the cosine similarity between two embedding vectors
5291
+ *
5292
+ * Note: This is helping function for RAG (retrieval-augmented generation)
5293
+ *
5294
+ * @param embeddingVector1
5295
+ * @param embeddingVector2
5296
+ * @returns Cosine similarity between the two vectors
5297
+ *
5298
+ * @public exported from `@promptbook/core`
5299
+ */
5300
+ function computeCosineSimilarity(embeddingVector1, embeddingVector2) {
5301
+ if (embeddingVector1.length !== embeddingVector2.length) {
5302
+ throw new TypeError('Embedding vectors must have the same length');
5303
+ }
5304
+ const dotProduct = embeddingVector1.reduce((sum, value, index) => sum + value * embeddingVector2[index], 0);
5305
+ const magnitude1 = Math.sqrt(embeddingVector1.reduce((sum, value) => sum + value * value, 0));
5306
+ const magnitude2 = Math.sqrt(embeddingVector2.reduce((sum, value) => sum + value * value, 0));
5307
+ return 1 - dotProduct / (magnitude1 * magnitude2);
5308
+ }
5309
+
5310
+ /**
5311
+ *
5312
+ * @param knowledgePieces
5313
+ * @returns
5314
+ *
5315
+ * @private internal utility of `createPipelineExecutor`
5316
+ */
5317
+ function knowledgePiecesToString(knowledgePieces) {
5318
+ return knowledgePieces
5319
+ .map((knowledgePiece) => {
5320
+ const { content } = knowledgePiece;
5321
+ return `- ${content}`;
5322
+ })
5323
+ .join('\n');
5324
+ // <- TODO: [🧠] Some smarter aggregation of knowledge pieces, single-line vs multi-line vs mixed
5325
+ }
5326
+
5327
+ /**
5328
+ * Retrieves the most relevant knowledge pieces for a given task using embedding-based similarity search.
5329
+ * This is where retrieval-augmented generation (RAG) is performed to enhance the task with external knowledge.
5134
5330
  *
5135
5331
  * @private internal utility of `createPipelineExecutor`
5136
5332
  */
5137
5333
  async function getKnowledgeForTask(options) {
5138
- const { preparedPipeline, task } = options;
5139
- return preparedPipeline.knowledgePieces.map(({ content }) => `- ${content}`).join('\n');
5140
- // <- TODO: [🧠] Some smart aggregation of knowledge pieces, single-line vs multi-line vs mixed
5334
+ const { tools, preparedPipeline, task, parameters } = options;
5335
+ const firstKnowlegePiece = preparedPipeline.knowledgePieces[0];
5336
+ const firstKnowlegeIndex = firstKnowlegePiece === null || firstKnowlegePiece === void 0 ? void 0 : firstKnowlegePiece.index[0];
5337
+ // <- TODO: Do not use just first knowledge piece and first index to determine embedding model, use also keyword search
5338
+ if (firstKnowlegePiece === undefined || firstKnowlegeIndex === undefined) {
5339
+ return ''; // <- Note: Np knowledge present, return empty string
5340
+ }
5341
+ try {
5342
+ // TODO: [🚐] Make arrayable LLMs -> single LLM DRY
5343
+ const _llms = arrayableToArray(tools.llm);
5344
+ const llmTools = _llms.length === 1 ? _llms[0] : joinLlmExecutionTools(..._llms);
5345
+ const taskEmbeddingPrompt = {
5346
+ title: 'Knowledge Search',
5347
+ modelRequirements: {
5348
+ modelVariant: 'EMBEDDING',
5349
+ modelName: firstKnowlegeIndex.modelName,
5350
+ },
5351
+ content: task.content,
5352
+ parameters,
5353
+ };
5354
+ const taskEmbeddingResult = await llmTools.callEmbeddingModel(taskEmbeddingPrompt);
5355
+ const knowledgePiecesWithRelevance = preparedPipeline.knowledgePieces.map((knowledgePiece) => {
5356
+ const { index } = knowledgePiece;
5357
+ const knowledgePieceIndex = index.find((i) => i.modelName === firstKnowlegeIndex.modelName);
5358
+ // <- TODO: Do not use just first knowledge piece and first index to determine embedding model
5359
+ if (knowledgePieceIndex === undefined) {
5360
+ return {
5361
+ content: knowledgePiece.content,
5362
+ relevance: 0,
5363
+ };
5364
+ }
5365
+ const relevance = computeCosineSimilarity(knowledgePieceIndex.position, taskEmbeddingResult.content);
5366
+ return {
5367
+ content: knowledgePiece.content,
5368
+ relevance,
5369
+ };
5370
+ });
5371
+ const knowledgePiecesSorted = knowledgePiecesWithRelevance.sort((a, b) => a.relevance - b.relevance);
5372
+ const knowledgePiecesLimited = knowledgePiecesSorted.slice(0, 5);
5373
+ console.log('!!! Embedding', {
5374
+ task,
5375
+ taskEmbeddingPrompt,
5376
+ taskEmbeddingResult,
5377
+ firstKnowlegePiece,
5378
+ firstKnowlegeIndex,
5379
+ knowledgePiecesWithRelevance,
5380
+ knowledgePiecesSorted,
5381
+ knowledgePiecesLimited,
5382
+ });
5383
+ return knowledgePiecesToString(knowledgePiecesLimited);
5384
+ }
5385
+ catch (error) {
5386
+ assertsError(error);
5387
+ console.error('Error in `getKnowledgeForTask`', error);
5388
+ // Note: If the LLM fails, just return all knowledge pieces
5389
+ return knowledgePiecesToString(preparedPipeline.knowledgePieces);
5390
+ }
5141
5391
  }
5392
+ /**
5393
+ * TODO: !!!! Verify if this is working
5394
+ * TODO: [♨] Implement Better - use keyword search
5395
+ * TODO: [♨] Examples of values
5396
+ */
5142
5397
 
5143
5398
  /**
5144
- * @@@
5399
+ * Retrieves all reserved parameters for a given pipeline task, including context, knowledge, examples, and metadata.
5400
+ * Ensures all reserved parameters are defined and throws if any are missing.
5401
+ *
5402
+ * @param options - Options including tools, pipeline, task, and context.
5403
+ * @returns An object containing all reserved parameters for the task.
5145
5404
  *
5146
5405
  * @private internal utility of `createPipelineExecutor`
5147
5406
  */
5148
5407
  async function getReservedParametersForTask(options) {
5149
- const { preparedPipeline, task, pipelineIdentification } = options;
5408
+ const { tools, preparedPipeline, task, parameters, pipelineIdentification } = options;
5150
5409
  const context = await getContextForTask(); // <- [🏍]
5151
- const knowledge = await getKnowledgeForTask({ preparedPipeline, task });
5410
+ const knowledge = await getKnowledgeForTask({ tools, preparedPipeline, task, parameters });
5152
5411
  const examples = await getExamplesForTask();
5153
5412
  const currentDate = new Date().toISOString(); // <- TODO: [🧠][💩] Better
5154
5413
  const modelName = RESERVED_PARAMETER_MISSING_VALUE;
@@ -5174,23 +5433,21 @@ async function getReservedParametersForTask(options) {
5174
5433
  }
5175
5434
 
5176
5435
  /**
5177
- * @@@
5436
+ * Executes a single task within a pipeline, handling parameter validation, error checking, and progress reporting.
5437
+ *
5438
+ * @param options - Options for execution, including the task, pipeline, parameters, and callbacks.
5439
+ * @returns The output parameters produced by the task.
5178
5440
  *
5179
5441
  * @private internal utility of `createPipelineExecutor`
5180
5442
  */
5181
5443
  async function executeTask(options) {
5182
5444
  const { currentTask, preparedPipeline, parametersToPass, tools, onProgress, $executionReport, pipelineIdentification, maxExecutionAttempts, maxParallelCount, csvSettings, isVerbose, rootDirname, cacheDirname, intermediateFilesStrategy, isAutoInstalled, isNotPreparedWarningSupressed, } = options;
5183
5445
  const priority = preparedPipeline.tasks.length - preparedPipeline.tasks.indexOf(currentTask);
5184
- await onProgress({
5185
- outputParameters: {
5186
- [currentTask.resultingParameterName]: '', // <- TODO: [🧠] What is the best value here?
5187
- },
5188
- });
5189
5446
  // Note: Check consistency of used and dependent parameters which was also done in `validatePipeline`, but it’s good to doublecheck
5190
5447
  const usedParameterNames = extractParameterNamesFromTask(currentTask);
5191
5448
  const dependentParameterNames = new Set(currentTask.dependentParameterNames);
5192
5449
  // TODO: [👩🏾‍🤝‍👩🏻] Use here `mapAvailableToExpectedParameters`
5193
- if (union(difference(usedParameterNames, dependentParameterNames), difference(dependentParameterNames, usedParameterNames)).size !== 0) {
5450
+ if (difference(union(difference(usedParameterNames, dependentParameterNames), difference(dependentParameterNames, usedParameterNames)), new Set(RESERVED_PARAMETER_NAMES)).size !== 0) {
5194
5451
  throw new UnexpectedError(spaceTrim$1((block) => `
5195
5452
  Dependent parameters are not consistent with used parameters:
5196
5453
 
@@ -5210,9 +5467,11 @@ async function executeTask(options) {
5210
5467
  }
5211
5468
  const definedParameters = Object.freeze({
5212
5469
  ...(await getReservedParametersForTask({
5470
+ tools,
5213
5471
  preparedPipeline,
5214
5472
  task: currentTask,
5215
5473
  pipelineIdentification,
5474
+ parameters: parametersToPass,
5216
5475
  })),
5217
5476
  ...parametersToPass,
5218
5477
  });
@@ -5258,6 +5517,7 @@ async function executeTask(options) {
5258
5517
  preparedPipeline,
5259
5518
  tools,
5260
5519
  $executionReport,
5520
+ onProgress,
5261
5521
  pipelineIdentification,
5262
5522
  maxExecutionAttempts,
5263
5523
  maxParallelCount,
@@ -5285,7 +5545,8 @@ async function executeTask(options) {
5285
5545
  */
5286
5546
 
5287
5547
  /**
5288
- * @@@
5548
+ * Filters and returns only the output parameters from the provided pipeline execution options.
5549
+ * Adds warnings for any expected output parameters that are missing.
5289
5550
  *
5290
5551
  * @private internal utility of `createPipelineExecutor`
5291
5552
  */
@@ -5310,9 +5571,12 @@ function filterJustOutputParameters(options) {
5310
5571
  }
5311
5572
 
5312
5573
  /**
5313
- * @@@
5574
+ * Executes an entire pipeline, resolving tasks in dependency order, handling errors, and reporting progress.
5314
5575
  *
5315
- * Note: This is not a `PipelineExecutor` (which is binded with one exact pipeline), but a utility function of `createPipelineExecutor` which creates `PipelineExecutor`
5576
+ * Note: This is not a `PipelineExecutor` (which is bound to a single pipeline), but a utility function used by `createPipelineExecutor` to create a `PipelineExecutor`.
5577
+ *
5578
+ * @param options - Options for execution, including input parameters, pipeline, and callbacks.
5579
+ * @returns The result of the pipeline execution, including output parameters, errors, and usage statistics.
5316
5580
  *
5317
5581
  * @private internal utility of `createPipelineExecutor`
5318
5582
  */
@@ -5635,6 +5899,22 @@ function createPipelineExecutor(options) {
5635
5899
  cacheDirname,
5636
5900
  intermediateFilesStrategy,
5637
5901
  isAutoInstalled,
5902
+ }).catch((error) => {
5903
+ assertsError(error);
5904
+ return exportJson({
5905
+ name: 'pipelineExecutorResult',
5906
+ message: `Unuccessful PipelineExecutorResult, last catch`,
5907
+ order: [],
5908
+ value: {
5909
+ isSuccessful: false,
5910
+ errors: [serializeError(error)],
5911
+ warnings: [],
5912
+ usage: UNCERTAIN_USAGE,
5913
+ executionReport: null,
5914
+ outputParameters: {},
5915
+ preparedPipeline,
5916
+ },
5917
+ });
5638
5918
  });
5639
5919
  };
5640
5920
  const pipelineExecutor = (inputParameters) => createTask({
@@ -5895,8 +6175,8 @@ class MarkitdownScraper {
5895
6175
  extension: 'md',
5896
6176
  isVerbose,
5897
6177
  });
5898
- // TODO: @@@ Preserve, delete or modify
5899
- // Note: Running Pandoc ONLY if the file in the cache does not exist
6178
+ // TODO: Determine if Markitdown conversion should run only if the cache file doesn't exist, or always.
6179
+ // Note: Running Markitdown conversion ONLY if the file in the cache does not exist
5900
6180
  if (!(await isFileExisting(cacheFilehandler.filename, this.tools.fs))) {
5901
6181
  const src = source.filename || source.url || null;
5902
6182
  // console.log('!!', { src, source, cacheFilehandler });
@@ -5918,11 +6198,11 @@ class MarkitdownScraper {
5918
6198
  return cacheFilehandler;
5919
6199
  }
5920
6200
  /**
5921
- * Scrapes the docx file and returns the knowledge pieces or `null` if it can't scrape it
6201
+ * Scrapes the source document (PDF, DOCX, etc.) and returns the knowledge pieces or `null` if it can't scrape it.
5922
6202
  */
5923
6203
  async scrape(source) {
5924
6204
  const cacheFilehandler = await this.$convert(source);
5925
- // TODO: @@@ Preserve, delete or modify
6205
+ // TODO: Ensure this correctly creates the source object for the internal MarkdownScraper using the converted file.
5926
6206
  const markdownSource = {
5927
6207
  source: source.source,
5928
6208
  filename: cacheFilehandler.filename,
@@ -6066,7 +6346,8 @@ class PdfScraper {
6066
6346
  */
6067
6347
 
6068
6348
  /**
6069
- * @@@
6349
+ * Factory function to create an instance of PdfScraper.
6350
+ * It bundles the scraper class with its metadata.
6070
6351
  *
6071
6352
  * @public exported from `@promptbook/pdf`
6072
6353
  */