@promptbook/legacy-documents 0.92.0-3 → 0.92.0-30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. package/esm/index.es.js +557 -276
  2. package/esm/index.es.js.map +1 -1
  3. package/esm/typings/src/_packages/browser.index.d.ts +2 -0
  4. package/esm/typings/src/_packages/core.index.d.ts +22 -6
  5. package/esm/typings/src/_packages/deepseek.index.d.ts +2 -0
  6. package/esm/typings/src/_packages/google.index.d.ts +2 -0
  7. package/esm/typings/src/_packages/types.index.d.ts +4 -2
  8. package/esm/typings/src/_packages/utils.index.d.ts +2 -0
  9. package/esm/typings/src/cli/common/$provideLlmToolsForCli.d.ts +1 -1
  10. package/esm/typings/src/collection/PipelineCollection.d.ts +0 -2
  11. package/esm/typings/src/collection/SimplePipelineCollection.d.ts +1 -1
  12. package/esm/typings/src/commands/FOREACH/ForeachJson.d.ts +6 -6
  13. package/esm/typings/src/commands/FOREACH/foreachCommandParser.d.ts +0 -2
  14. package/esm/typings/src/commands/FORMFACTOR/formfactorCommandParser.d.ts +1 -1
  15. package/esm/typings/src/commands/_BOILERPLATE/boilerplateCommandParser.d.ts +1 -1
  16. package/esm/typings/src/commands/_common/types/CommandParser.d.ts +36 -28
  17. package/esm/typings/src/config.d.ts +41 -11
  18. package/esm/typings/src/constants.d.ts +43 -2
  19. package/esm/typings/src/conversion/archive/loadArchive.d.ts +2 -2
  20. package/esm/typings/src/errors/0-BoilerplateError.d.ts +2 -2
  21. package/esm/typings/src/executables/$provideExecutablesForNode.d.ts +1 -1
  22. package/esm/typings/src/executables/apps/locateLibreoffice.d.ts +2 -1
  23. package/esm/typings/src/executables/apps/locatePandoc.d.ts +2 -1
  24. package/esm/typings/src/executables/platforms/locateAppOnLinux.d.ts +2 -1
  25. package/esm/typings/src/executables/platforms/locateAppOnMacOs.d.ts +2 -1
  26. package/esm/typings/src/executables/platforms/locateAppOnWindows.d.ts +2 -1
  27. package/esm/typings/src/execution/AbstractTaskResult.d.ts +1 -1
  28. package/esm/typings/src/execution/CommonToolsOptions.d.ts +5 -1
  29. package/esm/typings/src/execution/LlmExecutionToolsConstructor.d.ts +2 -1
  30. package/esm/typings/src/execution/PipelineExecutorResult.d.ts +4 -2
  31. package/esm/typings/src/execution/createPipelineExecutor/$OngoingTaskResult.d.ts +12 -9
  32. package/esm/typings/src/execution/createPipelineExecutor/10-executePipeline.d.ts +12 -9
  33. package/esm/typings/src/execution/createPipelineExecutor/20-executeTask.d.ts +11 -8
  34. package/esm/typings/src/execution/createPipelineExecutor/30-executeFormatSubvalues.d.ts +15 -3
  35. package/esm/typings/src/execution/createPipelineExecutor/40-executeAttempts.d.ts +20 -14
  36. package/esm/typings/src/execution/createPipelineExecutor/computeCosineSimilarity.d.ts +13 -0
  37. package/esm/typings/src/execution/createPipelineExecutor/filterJustOutputParameters.d.ts +7 -6
  38. package/esm/typings/src/execution/createPipelineExecutor/getContextForTask.d.ts +5 -1
  39. package/esm/typings/src/execution/createPipelineExecutor/getExamplesForTask.d.ts +1 -1
  40. package/esm/typings/src/execution/createPipelineExecutor/getKnowledgeForTask.d.ts +21 -5
  41. package/esm/typings/src/execution/createPipelineExecutor/getReservedParametersForTask.d.ts +19 -5
  42. package/esm/typings/src/execution/createPipelineExecutor/knowledgePiecesToString.d.ts +9 -0
  43. package/esm/typings/src/execution/translation/automatic-translate/automatic-translators/LindatAutomaticTranslator.d.ts +4 -4
  44. package/esm/typings/src/execution/utils/checkExpectations.d.ts +1 -1
  45. package/esm/typings/src/execution/utils/uncertainNumber.d.ts +3 -2
  46. package/esm/typings/src/formats/_common/{FormatDefinition.d.ts → FormatParser.d.ts} +8 -6
  47. package/esm/typings/src/formats/_common/FormatSubvalueParser.d.ts +66 -0
  48. package/esm/typings/src/formats/csv/CsvFormatParser.d.ts +17 -0
  49. package/esm/typings/src/formats/csv/CsvSettings.d.ts +2 -2
  50. package/esm/typings/src/formats/csv/utils/csvParse.d.ts +12 -0
  51. package/esm/typings/src/formats/csv/utils/isValidCsvString.d.ts +1 -1
  52. package/esm/typings/src/formats/index.d.ts +2 -2
  53. package/esm/typings/src/formats/json/{JsonFormatDefinition.d.ts → JsonFormatParser.d.ts} +6 -6
  54. package/esm/typings/src/formats/json/utils/isValidJsonString.d.ts +1 -1
  55. package/esm/typings/src/formats/json/utils/jsonParse.d.ts +8 -0
  56. package/esm/typings/src/formats/text/{TextFormatDefinition.d.ts → TextFormatParser.d.ts} +7 -7
  57. package/esm/typings/src/formats/xml/XmlFormatParser.d.ts +19 -0
  58. package/esm/typings/src/formats/xml/utils/isValidXmlString.d.ts +1 -1
  59. package/esm/typings/src/formfactors/_boilerplate/BoilerplateFormfactorDefinition.d.ts +3 -2
  60. package/esm/typings/src/formfactors/_common/AbstractFormfactorDefinition.d.ts +16 -7
  61. package/esm/typings/src/formfactors/_common/FormfactorDefinition.d.ts +3 -1
  62. package/esm/typings/src/formfactors/_common/string_formfactor_name.d.ts +2 -1
  63. package/esm/typings/src/formfactors/chatbot/ChatbotFormfactorDefinition.d.ts +2 -2
  64. package/esm/typings/src/formfactors/completion/CompletionFormfactorDefinition.d.ts +29 -0
  65. package/esm/typings/src/formfactors/generator/GeneratorFormfactorDefinition.d.ts +2 -1
  66. package/esm/typings/src/formfactors/generic/GenericFormfactorDefinition.d.ts +2 -2
  67. package/esm/typings/src/formfactors/index.d.ts +33 -8
  68. package/esm/typings/src/formfactors/matcher/MatcherFormfactorDefinition.d.ts +4 -2
  69. package/esm/typings/src/formfactors/sheets/SheetsFormfactorDefinition.d.ts +3 -2
  70. package/esm/typings/src/formfactors/translator/TranslatorFormfactorDefinition.d.ts +3 -2
  71. package/esm/typings/src/high-level-abstractions/index.d.ts +2 -2
  72. package/esm/typings/src/llm-providers/_common/register/$llmToolsMetadataRegister.d.ts +3 -3
  73. package/esm/typings/src/llm-providers/_common/register/$llmToolsRegister.d.ts +3 -3
  74. package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsConfigurationFromEnv.d.ts +4 -4
  75. package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsForTestingAndScriptsAndPlayground.d.ts +4 -3
  76. package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsFromEnv.d.ts +17 -4
  77. package/esm/typings/src/llm-providers/_common/register/LlmToolsConfiguration.d.ts +11 -4
  78. package/esm/typings/src/llm-providers/_common/register/LlmToolsMetadata.d.ts +27 -5
  79. package/esm/typings/src/llm-providers/_common/register/LlmToolsOptions.d.ts +9 -2
  80. package/esm/typings/src/llm-providers/_common/register/createLlmToolsFromConfiguration.d.ts +12 -3
  81. package/esm/typings/src/llm-providers/_common/utils/cache/CacheItem.d.ts +10 -5
  82. package/esm/typings/src/llm-providers/_common/utils/cache/CacheLlmToolsOptions.d.ts +5 -3
  83. package/esm/typings/src/llm-providers/_common/utils/cache/cacheLlmTools.d.ts +3 -3
  84. package/esm/typings/src/llm-providers/_common/utils/count-total-usage/limitTotalUsage.d.ts +5 -5
  85. package/esm/typings/src/llm-providers/anthropic-claude/anthropic-claude-models.d.ts +1 -1
  86. package/esm/typings/src/llm-providers/azure-openai/AzureOpenAiExecutionTools.d.ts +4 -0
  87. package/esm/typings/src/llm-providers/deepseek/deepseek-models.d.ts +23 -0
  88. package/esm/typings/src/llm-providers/google/google-models.d.ts +23 -0
  89. package/esm/typings/src/llm-providers/openai/OpenAiExecutionTools.d.ts +4 -0
  90. package/esm/typings/src/llm-providers/openai/openai-models.d.ts +1 -1
  91. package/esm/typings/src/llm-providers/openai/register-configuration.d.ts +2 -2
  92. package/esm/typings/src/llm-providers/openai/register-constructor.d.ts +2 -2
  93. package/esm/typings/src/migrations/migratePipeline.d.ts +9 -0
  94. package/esm/typings/src/other/templates/getBookTemplates.d.ts +2 -2
  95. package/esm/typings/src/personas/preparePersona.d.ts +1 -1
  96. package/esm/typings/src/pipeline/PipelineInterface/PipelineInterface.d.ts +3 -3
  97. package/esm/typings/src/pipeline/PipelineInterface/constants.d.ts +1 -1
  98. package/esm/typings/src/pipeline/PipelineInterface/getPipelineInterface.d.ts +1 -1
  99. package/esm/typings/src/pipeline/PipelineInterface/isPipelineImplementingInterface.d.ts +5 -4
  100. package/esm/typings/src/pipeline/PipelineInterface/isPipelineInterfacesEqual.d.ts +1 -1
  101. package/esm/typings/src/pipeline/PipelineJson/CommonTaskJson.d.ts +9 -6
  102. package/esm/typings/src/pipeline/PipelineJson/PersonaJson.d.ts +4 -2
  103. package/esm/typings/src/pipeline/PipelineJson/PipelineJson.d.ts +3 -2
  104. package/esm/typings/src/pipeline/PipelineString.d.ts +3 -1
  105. package/esm/typings/src/pipeline/book-notation.d.ts +2 -2
  106. package/esm/typings/src/postprocessing/utils/extractJsonBlock.d.ts +1 -1
  107. package/esm/typings/src/prepare/prepareTasks.d.ts +7 -4
  108. package/esm/typings/src/remote-server/openapi-types.d.ts +348 -6
  109. package/esm/typings/src/remote-server/openapi.d.ts +398 -4
  110. package/esm/typings/src/remote-server/types/RemoteServerOptions.d.ts +2 -1
  111. package/esm/typings/src/scrapers/_boilerplate/BoilerplateScraper.d.ts +3 -3
  112. package/esm/typings/src/scrapers/_boilerplate/createBoilerplateScraper.d.ts +1 -1
  113. package/esm/typings/src/scrapers/_boilerplate/register-metadata.d.ts +1 -1
  114. package/esm/typings/src/scrapers/_common/Converter.d.ts +3 -1
  115. package/esm/typings/src/scrapers/_common/Scraper.d.ts +4 -3
  116. package/esm/typings/src/scrapers/_common/ScraperIntermediateSource.d.ts +4 -2
  117. package/esm/typings/src/scrapers/_common/register/$provideFilesystemForNode.d.ts +2 -1
  118. package/esm/typings/src/scrapers/_common/register/$provideScrapersForBrowser.d.ts +6 -3
  119. package/esm/typings/src/scrapers/_common/register/$provideScrapersForNode.d.ts +3 -5
  120. package/esm/typings/src/scrapers/_common/register/$scrapersMetadataRegister.d.ts +3 -3
  121. package/esm/typings/src/scrapers/_common/register/$scrapersRegister.d.ts +3 -2
  122. package/esm/typings/src/scrapers/_common/register/ScraperAndConverterMetadata.d.ts +8 -5
  123. package/esm/typings/src/scrapers/_common/register/ScraperConstructor.d.ts +2 -1
  124. package/esm/typings/src/scrapers/_common/utils/getScraperIntermediateSource.d.ts +6 -5
  125. package/esm/typings/src/scrapers/_common/utils/makeKnowledgeSourceHandler.d.ts +3 -1
  126. package/esm/typings/src/scrapers/document/createDocumentScraper.d.ts +1 -1
  127. package/esm/typings/src/scrapers/document-legacy/createLegacyDocumentScraper.d.ts +2 -1
  128. package/esm/typings/src/scrapers/markdown/createMarkdownScraper.d.ts +4 -1
  129. package/esm/typings/src/scrapers/markitdown/MarkitdownScraper.d.ts +1 -1
  130. package/esm/typings/src/scrapers/pdf/createPdfScraper.d.ts +2 -1
  131. package/esm/typings/src/scrapers/website/createWebsiteScraper.d.ts +3 -4
  132. package/esm/typings/src/scripting/javascript/postprocessing-functions.d.ts +5 -1
  133. package/esm/typings/src/storage/file-cache-storage/FileCacheStorage.d.ts +12 -5
  134. package/esm/typings/src/storage/file-cache-storage/FileCacheStorageOptions.d.ts +4 -2
  135. package/esm/typings/src/storage/file-cache-storage/utils/nameToSubfolderPath.d.ts +2 -1
  136. package/esm/typings/src/storage/local-storage/getIndexedDbStorage.d.ts +10 -0
  137. package/esm/typings/src/storage/local-storage/utils/makePromptbookStorageFromIndexedDb.d.ts +7 -0
  138. package/esm/typings/src/storage/local-storage/utils/makePromptbookStorageFromWebStorage.d.ts +2 -1
  139. package/esm/typings/src/types/IntermediateFilesStrategy.d.ts +2 -1
  140. package/esm/typings/src/types/ModelVariant.d.ts +5 -5
  141. package/esm/typings/src/types/typeAliases.d.ts +17 -13
  142. package/esm/typings/src/utils/$Register.d.ts +8 -7
  143. package/esm/typings/src/utils/editable/edit-pipeline-string/addPipelineCommand.d.ts +2 -2
  144. package/esm/typings/src/utils/editable/edit-pipeline-string/deflatePipeline.d.ts +4 -1
  145. package/esm/typings/src/utils/editable/utils/isFlatPipeline.d.ts +2 -1
  146. package/esm/typings/src/utils/environment/$getGlobalScope.d.ts +2 -1
  147. package/esm/typings/src/utils/expectation-counters/index.d.ts +1 -1
  148. package/esm/typings/src/utils/markdown/extractAllListItemsFromMarkdown.d.ts +1 -1
  149. package/esm/typings/src/utils/normalization/nameToUriPart.d.ts +4 -4
  150. package/esm/typings/src/utils/normalization/nameToUriParts.d.ts +4 -4
  151. package/esm/typings/src/utils/normalization/normalize-to-kebab-case.d.ts +3 -3
  152. package/esm/typings/src/utils/normalization/normalizeTo_SCREAMING_CASE.d.ts +3 -3
  153. package/esm/typings/src/utils/normalization/normalizeTo_camelCase.d.ts +4 -4
  154. package/esm/typings/src/utils/normalization/normalizeTo_snake_case.d.ts +3 -3
  155. package/esm/typings/src/utils/normalization/removeDiacritics.d.ts +3 -3
  156. package/esm/typings/src/utils/normalization/searchKeywords.d.ts +4 -1
  157. package/esm/typings/src/utils/normalization/titleToName.d.ts +4 -4
  158. package/esm/typings/src/utils/organization/empty_object.d.ts +2 -2
  159. package/esm/typings/src/utils/organization/just_empty_object.d.ts +4 -4
  160. package/esm/typings/src/utils/parameters/mapAvailableToExpectedParameters.d.ts +7 -7
  161. package/esm/typings/src/utils/serialization/clonePipeline.d.ts +4 -3
  162. package/esm/typings/src/utils/serialization/deepClone.d.ts +5 -1
  163. package/esm/typings/src/utils/validators/javascriptName/isValidJavascriptName.d.ts +3 -3
  164. package/esm/typings/src/utils/validators/parameterName/validateParameterName.d.ts +5 -4
  165. package/esm/typings/src/version.d.ts +2 -1
  166. package/package.json +2 -2
  167. package/umd/index.umd.js +557 -276
  168. package/umd/index.umd.js.map +1 -1
  169. package/esm/typings/src/formats/_common/FormatSubvalueDefinition.d.ts +0 -31
  170. package/esm/typings/src/formats/csv/CsvFormatDefinition.d.ts +0 -17
  171. package/esm/typings/src/formats/xml/XmlFormatDefinition.d.ts +0 -19
package/umd/index.umd.js CHANGED
@@ -26,7 +26,7 @@
26
26
  * @generated
27
27
  * @see https://github.com/webgptorg/promptbook
28
28
  */
29
- const PROMPTBOOK_ENGINE_VERSION = '0.92.0-3';
29
+ const PROMPTBOOK_ENGINE_VERSION = '0.92.0-30';
30
30
  /**
31
31
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
32
32
  * Note: [💞] Ignore a discrepancy between file name and entity name
@@ -102,6 +102,21 @@
102
102
  * @public exported from `@promptbook/core`
103
103
  */
104
104
  const DEFAULT_MAX_FILE_SIZE = 100 * 1024 * 1024; // 100MB
105
+ /**
106
+ * Threshold value that determines when a dataset is considered "big"
107
+ * and may require special handling or optimizations
108
+ *
109
+ * For example, when error occurs in one item of the big dataset, it will not fail the whole pipeline
110
+ *
111
+ * @public exported from `@promptbook/core`
112
+ */
113
+ const BIG_DATASET_TRESHOLD = 50;
114
+ /**
115
+ * Placeholder text used to represent a placeholder value of failed operation
116
+ *
117
+ * @public exported from `@promptbook/core`
118
+ */
119
+ const FAILED_VALUE_PLACEHOLDER = '!?';
105
120
  // <- TODO: [🧠] Better system for generator warnings - not always "code" and "by `@promptbook/cli`"
106
121
  /**
107
122
  * The maximum number of iterations for a loops
@@ -181,7 +196,7 @@
181
196
  const DEFAULT_SCRAPE_CACHE_DIRNAME = './.promptbook/scrape-cache';
182
197
  // <- TODO: [🧜‍♂️]
183
198
  /**
184
- * @@@
199
+ * Default settings for parsing and generating CSV files in Promptbook.
185
200
  *
186
201
  * @public exported from `@promptbook/core`
187
202
  */
@@ -192,19 +207,19 @@
192
207
  skipEmptyLines: true,
193
208
  });
194
209
  /**
195
- * @@@
210
+ * Controls whether verbose logging is enabled by default throughout the application.
196
211
  *
197
212
  * @public exported from `@promptbook/core`
198
213
  */
199
214
  let DEFAULT_IS_VERBOSE = false;
200
215
  /**
201
- * @@@
216
+ * Controls whether auto-installation of dependencies is enabled by default.
202
217
  *
203
218
  * @public exported from `@promptbook/core`
204
219
  */
205
220
  const DEFAULT_IS_AUTO_INSTALLED = false;
206
221
  /**
207
- * @@@
222
+ * Indicates whether pipeline logic validation is enabled. When true, the pipeline logic is checked for consistency.
208
223
  *
209
224
  * @private within the repository
210
225
  */
@@ -545,7 +560,8 @@
545
560
  */
546
561
 
547
562
  /**
548
- * @@@
563
+ * Converts a name to a properly formatted subfolder path for cache storage.
564
+ * Handles normalization and path formatting to create consistent cache directory structures.
549
565
  *
550
566
  * @private for `FileCacheStorage`
551
567
  */
@@ -798,10 +814,10 @@
798
814
  */
799
815
 
800
816
  /**
801
- * @@@
817
+ * Removes diacritic marks (accents) from characters in a string.
802
818
  *
803
- * @param input @@@
804
- * @returns @@@
819
+ * @param input The string containing diacritics to be normalized.
820
+ * @returns The string with diacritics removed or normalized.
805
821
  * @public exported from `@promptbook/utils`
806
822
  */
807
823
  function removeDiacritics(input) {
@@ -815,10 +831,10 @@
815
831
  */
816
832
 
817
833
  /**
818
- * @@@
834
+ * Converts a given text to kebab-case format.
819
835
  *
820
- * @param text @@@
821
- * @returns @@@
836
+ * @param text The text to be converted.
837
+ * @returns The kebab-case formatted string.
822
838
  * @example 'hello-world'
823
839
  * @example 'i-love-promptbook'
824
840
  * @public exported from `@promptbook/utils`
@@ -960,11 +976,11 @@
960
976
  }
961
977
 
962
978
  /**
963
- * @@@
979
+ * Converts a title string into a normalized name.
964
980
  *
965
- * @param value @@@
966
- * @returns @@@
967
- * @example @@@
981
+ * @param value The title string to be converted to a name.
982
+ * @returns A normalized name derived from the input title.
983
+ * @example 'Hello World!' -> 'hello-world'
968
984
  * @public exported from `@promptbook/utils`
969
985
  */
970
986
  function titleToName(value) {
@@ -984,9 +1000,8 @@
984
1000
  }
985
1001
 
986
1002
  /**
987
- * Create a filename for intermediate cache for scrapers
988
- *
989
- * Note: It also checks if directory exists and creates it if not
1003
+ * Retrieves an intermediate source for a scraper based on the knowledge source.
1004
+ * Manages the caching and retrieval of intermediate scraper results for optimized performance.
990
1005
  *
991
1006
  * @private as internal utility for scrapers
992
1007
  */
@@ -1037,7 +1052,7 @@
1037
1052
  * Note: [🟢] Code in this file should never be never released in packages that could be imported into browser environment
1038
1053
  */
1039
1054
 
1040
- var PipelineCollection = [{title:"Prepare Knowledge from Markdown",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.book",formfactorName:"GENERIC",parameters:[{name:"knowledgeContent",description:"Markdown document content",isInput:true,isOutput:false},{name:"knowledgePieces",description:"The knowledge JSON object",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, extract the important knowledge from the document.\n\n# Rules\n\n- Make pieces of information concise, clear, and easy to understand\n- One piece of information should be approximately 1 paragraph\n- Divide the paragraphs by markdown horizontal lines ---\n- Omit irrelevant information\n- Group redundant information\n- Write just extracted information, nothing else\n\n# The document\n\nTake information from this document:\n\n> {knowledgeContent}",resultingParameterName:"knowledgePieces",dependentParameterNames:["knowledgeContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Knowledge from Markdown\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.book`\n- INPUT PARAMETER `{knowledgeContent}` Markdown document content\n- OUTPUT PARAMETER `{knowledgePieces}` The knowledge JSON object\n\n## Knowledge\n\n<!-- TODO: [🍆] -FORMAT JSON -->\n\n```markdown\nYou are experienced data researcher, extract the important knowledge from the document.\n\n# Rules\n\n- Make pieces of information concise, clear, and easy to understand\n- One piece of information should be approximately 1 paragraph\n- Divide the paragraphs by markdown horizontal lines ---\n- Omit irrelevant information\n- Group redundant information\n- Write just extracted information, nothing else\n\n# The document\n\nTake information from this document:\n\n> {knowledgeContent}\n```\n\n`-> {knowledgePieces}`\n"}],sourceFile:"./books/prepare-knowledge-from-markdown.book"},{title:"Prepare Keywords",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-keywords.book",formfactorName:"GENERIC",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"keywords",description:"Keywords separated by comma",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, detect the important keywords in the document.\n\n# Rules\n\n- Write just keywords separated by comma\n\n# The document\n\nTake information from this document:\n\n> {knowledgePieceContent}",resultingParameterName:"keywords",dependentParameterNames:["knowledgePieceContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Keywords\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-keywords.book`\n- INPUT PARAMETER `{knowledgePieceContent}` The content\n- OUTPUT PARAMETER `{keywords}` Keywords separated by comma\n\n## Knowledge\n\n<!-- TODO: [🍆] -FORMAT JSON -->\n\n```markdown\nYou are experienced data researcher, detect the important keywords in the document.\n\n# Rules\n\n- Write just keywords separated by comma\n\n# The document\n\nTake information from this document:\n\n> {knowledgePieceContent}\n```\n\n`-> {keywords}`\n"}],sourceFile:"./books/prepare-knowledge-keywords.book"},{title:"Prepare Knowledge-piece Title",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-title.book",formfactorName:"GENERIC",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"title",description:"The title of the document",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced content creator, write best title for the document.\n\n# Rules\n\n- Write just title, nothing else\n- Write maximum 5 words for the title\n\n# The document\n\n> {knowledgePieceContent}",resultingParameterName:"title",expectations:{words:{min:1,max:8}},dependentParameterNames:["knowledgePieceContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Knowledge-piece Title\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-title.book`\n- INPUT PARAMETER `{knowledgePieceContent}` The content\n- OUTPUT PARAMETER `{title}` The title of the document\n\n## Knowledge\n\n- EXPECT MIN 1 WORD\n- EXPECT MAX 8 WORDS\n\n```markdown\nYou are experienced content creator, write best title for the document.\n\n# Rules\n\n- Write just title, nothing else\n- Write maximum 5 words for the title\n\n# The document\n\n> {knowledgePieceContent}\n```\n\n`-> {title}`\n"}],sourceFile:"./books/prepare-knowledge-title.book"},{title:"Prepare Persona",pipelineUrl:"https://promptbook.studio/promptbook/prepare-persona.book",formfactorName:"GENERIC",parameters:[{name:"availableModelNames",description:"List of available model names separated by comma (,)",isInput:true,isOutput:false},{name:"personaDescription",description:"Description of the persona",isInput:true,isOutput:false},{name:"modelRequirements",description:"Specific requirements for the model",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"make-model-requirements",title:"Make modelRequirements",content:"You are experienced AI engineer, you need to create virtual assistant.\nWrite\n\n## Example\n\n```json\n{\n\"modelName\": \"gpt-4o\",\n\"systemMessage\": \"You are experienced AI engineer and helpfull assistant.\",\n\"temperature\": 0.7\n}\n```\n\n## Instructions\n\n- Your output format is JSON object\n- Write just the JSON object, no other text should be present\n- It contains the following keys:\n - `modelName`: The name of the model to use\n - `systemMessage`: The system message to provide context to the model\n - `temperature`: The sampling temperature to use\n\n### Key `modelName`\n\nPick from the following models:\n\n- {availableModelNames}\n\n### Key `systemMessage`\n\nThe system message is used to communicate instructions or provide context to the model at the beginning of a conversation. It is displayed in a different format compared to user messages, helping the model understand its role in the conversation. The system message typically guides the model's behavior, sets the tone, or specifies desired output from the model. By utilizing the system message effectively, users can steer the model towards generating more accurate and relevant responses.\n\nFor example:\n\n> You are an experienced AI engineer and helpful assistant.\n\n> You are a friendly and knowledgeable chatbot.\n\n### Key `temperature`\n\nThe sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.\n\nYou can pick a value between 0 and 2. For example:\n\n- `0.1`: Low temperature, extremely conservative and deterministic\n- `0.5`: Medium temperature, balanced between conservative and creative\n- `1.0`: High temperature, creative and bit random\n- `1.5`: Very high temperature, extremely creative and often chaotic and unpredictable\n- `2.0`: Maximum temperature, completely random and unpredictable, for some extreme creative use cases\n\n# The assistant\n\nTake this description of the persona:\n\n> {personaDescription}",resultingParameterName:"modelRequirements",format:"JSON",dependentParameterNames:["availableModelNames","personaDescription"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Persona\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-persona.book`\n- INPUT PARAMETER `{availableModelNames}` List of available model names separated by comma (,)\n- INPUT PARAMETER `{personaDescription}` Description of the persona\n- OUTPUT PARAMETER `{modelRequirements}` Specific requirements for the model\n\n## Make modelRequirements\n\n- FORMAT JSON\n\n```markdown\nYou are experienced AI engineer, you need to create virtual assistant.\nWrite\n\n## Example\n\n\\`\\`\\`json\n{\n\"modelName\": \"gpt-4o\",\n\"systemMessage\": \"You are experienced AI engineer and helpfull assistant.\",\n\"temperature\": 0.7\n}\n\\`\\`\\`\n\n## Instructions\n\n- Your output format is JSON object\n- Write just the JSON object, no other text should be present\n- It contains the following keys:\n - `modelName`: The name of the model to use\n - `systemMessage`: The system message to provide context to the model\n - `temperature`: The sampling temperature to use\n\n### Key `modelName`\n\nPick from the following models:\n\n- {availableModelNames}\n\n### Key `systemMessage`\n\nThe system message is used to communicate instructions or provide context to the model at the beginning of a conversation. It is displayed in a different format compared to user messages, helping the model understand its role in the conversation. The system message typically guides the model's behavior, sets the tone, or specifies desired output from the model. By utilizing the system message effectively, users can steer the model towards generating more accurate and relevant responses.\n\nFor example:\n\n> You are an experienced AI engineer and helpful assistant.\n\n> You are a friendly and knowledgeable chatbot.\n\n### Key `temperature`\n\nThe sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.\n\nYou can pick a value between 0 and 2. For example:\n\n- `0.1`: Low temperature, extremely conservative and deterministic\n- `0.5`: Medium temperature, balanced between conservative and creative\n- `1.0`: High temperature, creative and bit random\n- `1.5`: Very high temperature, extremely creative and often chaotic and unpredictable\n- `2.0`: Maximum temperature, completely random and unpredictable, for some extreme creative use cases\n\n# The assistant\n\nTake this description of the persona:\n\n> {personaDescription}\n```\n\n`-> {modelRequirements}`\n"}],sourceFile:"./books/prepare-persona.book"},{title:"Prepare Title",pipelineUrl:"https://promptbook.studio/promptbook/prepare-title.book",formfactorName:"GENERIC",parameters:[{name:"book",description:"The book to prepare the title for",isInput:true,isOutput:false},{name:"title",description:"Best title for the book",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"make-title",title:"Make title",content:"Make best title for given text which describes the workflow:\n\n## Rules\n\n- Write just title, nothing else\n- Title should be concise and clear - Write maximum ideally 2 words, maximum 5 words\n- Title starts with emoticon\n- Title should not mention the input and output of the workflow but the main purpose of the workflow\n _For example, not \"✍ Convert Knowledge-piece to title\" but \"✍ Title\"_\n\n## The workflow\n\n> {book}",resultingParameterName:"title",expectations:{words:{min:1,max:8},lines:{min:1,max:1}},dependentParameterNames:["book"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Title\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-title.book`\n- INPUT PARAMETER `{book}` The book to prepare the title for\n- OUTPUT PARAMETER `{title}` Best title for the book\n\n## Make title\n\n- EXPECT MIN 1 Word\n- EXPECT MAX 8 Words\n- EXPECT EXACTLY 1 Line\n\n```markdown\nMake best title for given text which describes the workflow:\n\n## Rules\n\n- Write just title, nothing else\n- Title should be concise and clear - Write maximum ideally 2 words, maximum 5 words\n- Title starts with emoticon\n- Title should not mention the input and output of the workflow but the main purpose of the workflow\n _For example, not \"✍ Convert Knowledge-piece to title\" but \"✍ Title\"_\n\n## The workflow\n\n> {book}\n```\n\n`-> {title}`\n"}],sourceFile:"./books/prepare-title.book"}];
1055
+ var PipelineCollection = [{title:"Prepare Knowledge from Markdown",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.book",formfactorName:"GENERIC",parameters:[{name:"knowledgeContent",description:"Markdown document content",isInput:true,isOutput:false},{name:"knowledgePieces",description:"The knowledge JSON object",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, extract the important knowledge from the document.\n\n# Rules\n\n- Make pieces of information concise, clear, and easy to understand\n- One piece of information should be approximately 1 paragraph\n- Divide the paragraphs by markdown horizontal lines ---\n- Omit irrelevant information\n- Group redundant information\n- Write just extracted information, nothing else\n\n# The document\n\nTake information from this document:\n\n> {knowledgeContent}",resultingParameterName:"knowledgePieces",dependentParameterNames:["knowledgeContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Knowledge from Markdown\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.book`\n- INPUT PARAMETER `{knowledgeContent}` Markdown document content\n- OUTPUT PARAMETER `{knowledgePieces}` The knowledge JSON object\n\n## Knowledge\n\n<!-- TODO: [🍆] -FORMAT JSON -->\n\n```markdown\nYou are experienced data researcher, extract the important knowledge from the document.\n\n# Rules\n\n- Make pieces of information concise, clear, and easy to understand\n- One piece of information should be approximately 1 paragraph\n- Divide the paragraphs by markdown horizontal lines ---\n- Omit irrelevant information\n- Group redundant information\n- Write just extracted information, nothing else\n\n# The document\n\nTake information from this document:\n\n> {knowledgeContent}\n```\n\n`-> {knowledgePieces}`\n"}],sourceFile:"./books/prepare-knowledge-from-markdown.book"},{title:"Prepare Keywords",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-keywords.book",formfactorName:"GENERIC",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"keywords",description:"Keywords separated by comma",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, detect the important keywords in the document.\n\n# Rules\n\n- Write just keywords separated by comma\n\n# The document\n\nTake information from this document:\n\n> {knowledgePieceContent}",resultingParameterName:"keywords",dependentParameterNames:["knowledgePieceContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Keywords\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-keywords.book`\n- INPUT PARAMETER `{knowledgePieceContent}` The content\n- OUTPUT PARAMETER `{keywords}` Keywords separated by comma\n\n## Knowledge\n\n<!-- TODO: [🍆] -FORMAT JSON -->\n\n```markdown\nYou are experienced data researcher, detect the important keywords in the document.\n\n# Rules\n\n- Write just keywords separated by comma\n\n# The document\n\nTake information from this document:\n\n> {knowledgePieceContent}\n```\n\n`-> {keywords}`\n"}],sourceFile:"./books/prepare-knowledge-keywords.book"},{title:"Prepare Knowledge-piece Title",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-title.book",formfactorName:"GENERIC",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"title",description:"The title of the document",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced content creator, write best title for the document.\n\n# Rules\n\n- Write just title, nothing else\n- Write maximum 5 words for the title\n\n# The document\n\n> {knowledgePieceContent}",resultingParameterName:"title",expectations:{words:{min:1,max:8}},dependentParameterNames:["knowledgePieceContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Knowledge-piece Title\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-title.book`\n- INPUT PARAMETER `{knowledgePieceContent}` The content\n- OUTPUT PARAMETER `{title}` The title of the document\n\n## Knowledge\n\n- EXPECT MIN 1 WORD\n- EXPECT MAX 8 WORDS\n\n```markdown\nYou are experienced content creator, write best title for the document.\n\n# Rules\n\n- Write just title, nothing else\n- Write maximum 5 words for the title\n\n# The document\n\n> {knowledgePieceContent}\n```\n\n`-> {title}`\n"}],sourceFile:"./books/prepare-knowledge-title.book"},{title:"Prepare Persona",pipelineUrl:"https://promptbook.studio/promptbook/prepare-persona.book",formfactorName:"GENERIC",parameters:[{name:"availableModels",description:"List of available model names together with their descriptions as JSON",isInput:true,isOutput:false},{name:"personaDescription",description:"Description of the persona",isInput:true,isOutput:false},{name:"modelsRequirements",description:"Specific requirements for the model",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"make-model-requirements",title:"Make modelRequirements",content:"You are an experienced AI engineer, you need to find the best models for virtual assistants:\n\n## Example\n\n```json\n[\n {\n \"modelName\": \"gpt-4o\",\n \"systemMessage\": \"You are experienced AI engineer and helpfull assistant.\",\n \"temperature\": 0.7\n },\n {\n \"modelName\": \"claude-3-5-sonnet\",\n \"systemMessage\": \"You are a friendly and knowledgeable chatbot.\",\n \"temperature\": 0.5\n }\n]\n```\n\n## Instructions\n\n- Your output format is JSON array\n- Sort best-fitting models first\n- Omit any models that are not suitable\n- Write just the JSON, no other text should be present\n- Array contain items with following keys:\n - `modelName`: The name of the model to use\n - `systemMessage`: The system message to provide context to the model\n - `temperature`: The sampling temperature to use\n\n### Key `modelName`\n\nHere are the available models:\n\n```json\n{availableModels}\n```\n\n### Key `systemMessage`\n\nThe system message is used to communicate instructions or provide context to the model at the beginning of a conversation. It is displayed in a different format compared to user messages, helping the model understand its role in the conversation. The system message typically guides the model's behavior, sets the tone, or specifies desired output from the model. By utilizing the system message effectively, users can steer the model towards generating more accurate and relevant responses.\n\nFor example:\n\n> You are an experienced AI engineer and helpful assistant.\n\n> You are a friendly and knowledgeable chatbot.\n\n### Key `temperature`\n\nThe sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.\n\nYou can pick a value between 0 and 2. For example:\n\n- `0.1`: Low temperature, extremely conservative and deterministic\n- `0.5`: Medium temperature, balanced between conservative and creative\n- `1.0`: High temperature, creative and bit random\n- `1.5`: Very high temperature, extremely creative and often chaotic and unpredictable\n- `2.0`: Maximum temperature, completely random and unpredictable, for some extreme creative use cases\n\n# The assistant\n\nTake this description of the persona:\n\n> {personaDescription}",resultingParameterName:"modelsRequirements",format:"JSON",dependentParameterNames:["availableModels","personaDescription"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Persona\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-persona.book`\n- INPUT PARAMETER `{availableModels}` List of available model names together with their descriptions as JSON\n- INPUT PARAMETER `{personaDescription}` Description of the persona\n- OUTPUT PARAMETER `{modelsRequirements}` Specific requirements for the model\n\n## Make modelRequirements\n\n- FORMAT JSON\n\n```markdown\nYou are an experienced AI engineer, you need to find the best models for virtual assistants:\n\n## Example\n\n\\`\\`\\`json\n[\n {\n \"modelName\": \"gpt-4o\",\n \"systemMessage\": \"You are experienced AI engineer and helpfull assistant.\",\n \"temperature\": 0.7\n },\n {\n \"modelName\": \"claude-3-5-sonnet\",\n \"systemMessage\": \"You are a friendly and knowledgeable chatbot.\",\n \"temperature\": 0.5\n }\n]\n\\`\\`\\`\n\n## Instructions\n\n- Your output format is JSON array\n- Sort best-fitting models first\n- Omit any models that are not suitable\n- Write just the JSON, no other text should be present\n- Array contain items with following keys:\n - `modelName`: The name of the model to use\n - `systemMessage`: The system message to provide context to the model\n - `temperature`: The sampling temperature to use\n\n### Key `modelName`\n\nHere are the available models:\n\n\\`\\`\\`json\n{availableModels}\n\\`\\`\\`\n\n### Key `systemMessage`\n\nThe system message is used to communicate instructions or provide context to the model at the beginning of a conversation. It is displayed in a different format compared to user messages, helping the model understand its role in the conversation. The system message typically guides the model's behavior, sets the tone, or specifies desired output from the model. By utilizing the system message effectively, users can steer the model towards generating more accurate and relevant responses.\n\nFor example:\n\n> You are an experienced AI engineer and helpful assistant.\n\n> You are a friendly and knowledgeable chatbot.\n\n### Key `temperature`\n\nThe sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.\n\nYou can pick a value between 0 and 2. For example:\n\n- `0.1`: Low temperature, extremely conservative and deterministic\n- `0.5`: Medium temperature, balanced between conservative and creative\n- `1.0`: High temperature, creative and bit random\n- `1.5`: Very high temperature, extremely creative and often chaotic and unpredictable\n- `2.0`: Maximum temperature, completely random and unpredictable, for some extreme creative use cases\n\n# The assistant\n\nTake this description of the persona:\n\n> {personaDescription}\n```\n\n`-> {modelsRequirements}`\n"}],sourceFile:"./books/prepare-persona.book"},{title:"Prepare Title",pipelineUrl:"https://promptbook.studio/promptbook/prepare-title.book",formfactorName:"GENERIC",parameters:[{name:"book",description:"The book to prepare the title for",isInput:true,isOutput:false},{name:"title",description:"Best title for the book",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"make-title",title:"Make title",content:"Make best title for given text which describes the workflow:\n\n## Rules\n\n- Write just title, nothing else\n- Title should be concise and clear - Write maximum ideally 2 words, maximum 5 words\n- Title starts with emoticon\n- Title should not mention the input and output of the workflow but the main purpose of the workflow\n _For example, not \"✍ Convert Knowledge-piece to title\" but \"✍ Title\"_\n\n## The workflow\n\n> {book}",resultingParameterName:"title",expectations:{words:{min:1,max:8},lines:{min:1,max:1}},dependentParameterNames:["book"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Title\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-title.book`\n- INPUT PARAMETER `{book}` The book to prepare the title for\n- OUTPUT PARAMETER `{title}` Best title for the book\n\n## Make title\n\n- EXPECT MIN 1 Word\n- EXPECT MAX 8 Words\n- EXPECT EXACTLY 1 Line\n\n```markdown\nMake best title for given text which describes the workflow:\n\n## Rules\n\n- Write just title, nothing else\n- Title should be concise and clear - Write maximum ideally 2 words, maximum 5 words\n- Title starts with emoticon\n- Title should not mention the input and output of the workflow but the main purpose of the workflow\n _For example, not \"✍ Convert Knowledge-piece to title\" but \"✍ Title\"_\n\n## The workflow\n\n> {book}\n```\n\n`-> {title}`\n"}],sourceFile:"./books/prepare-title.book"}];
1041
1056
 
1042
1057
  /**
1043
1058
  * Checks if value is valid email
@@ -1122,7 +1137,7 @@
1122
1137
  * Function isValidJsonString will tell you if the string is valid JSON or not
1123
1138
  *
1124
1139
  * @param value The string to check
1125
- * @returns True if the string is a valid JSON string, false otherwise
1140
+ * @returns `true` if the string is a valid JSON string, false otherwise
1126
1141
  *
1127
1142
  * @public exported from `@promptbook/utils`
1128
1143
  */
@@ -1533,8 +1548,12 @@
1533
1548
  */
1534
1549
 
1535
1550
  /**
1536
- * @@@
1551
+ * Creates a deep clone of the given object
1552
+ *
1553
+ * Note: This method only works for objects that are fully serializable to JSON and do not contain functions, Dates, or special types.
1537
1554
  *
1555
+ * @param objectValue The object to clone.
1556
+ * @returns A deep, writable clone of the input object.
1538
1557
  * @public exported from `@promptbook/utils`
1539
1558
  */
1540
1559
  function deepClone(objectValue) {
@@ -1616,13 +1635,13 @@
1616
1635
  */
1617
1636
  const REPLACING_NONCE = 'ptbkauk42kV2dzao34faw7FudQUHYPtW';
1618
1637
  /**
1619
- * @@@
1638
+ * Placeholder value indicating a parameter is missing its value.
1620
1639
  *
1621
1640
  * @private within the repository
1622
1641
  */
1623
1642
  const RESERVED_PARAMETER_MISSING_VALUE = 'MISSING-' + REPLACING_NONCE;
1624
1643
  /**
1625
- * @@@
1644
+ * Placeholder value indicating a parameter is restricted and cannot be used directly.
1626
1645
  *
1627
1646
  * @private within the repository
1628
1647
  */
@@ -2080,7 +2099,7 @@
2080
2099
  */
2081
2100
  function unpreparePipeline(pipeline) {
2082
2101
  let { personas, knowledgeSources, tasks } = pipeline;
2083
- personas = personas.map((persona) => ({ ...persona, modelRequirements: undefined, preparationIds: undefined }));
2102
+ personas = personas.map((persona) => ({ ...persona, modelsRequirements: undefined, preparationIds: undefined }));
2084
2103
  knowledgeSources = knowledgeSources.map((knowledgeSource) => ({ ...knowledgeSource, preparationIds: undefined }));
2085
2104
  tasks = tasks.map((task) => {
2086
2105
  let { dependentParameterNames } = task;
@@ -2121,7 +2140,7 @@
2121
2140
  /**
2122
2141
  * Constructs a pipeline collection from pipelines
2123
2142
  *
2124
- * @param pipelines @@@
2143
+ * @param pipelines Array of pipeline JSON objects to include in the collection
2125
2144
  *
2126
2145
  * Note: During the construction logic of all pipelines are validated
2127
2146
  * Note: It is not recommended to use this constructor directly, use `createCollectionFromJson` *(or other variant)* instead
@@ -2267,15 +2286,21 @@
2267
2286
  * @public exported from `@promptbook/core`
2268
2287
  */
2269
2288
  function isPipelinePrepared(pipeline) {
2270
- // Note: Ignoring `pipeline.preparations` @@@
2271
- // Note: Ignoring `pipeline.knowledgePieces` @@@
2289
+ // Note: Ignoring `pipeline.preparations`
2290
+ // Note: Ignoring `pipeline.knowledgePieces`
2272
2291
  if (pipeline.title === undefined || pipeline.title === '' || pipeline.title === DEFAULT_BOOK_TITLE) {
2292
+ // TODO: !!! Comment this out
2293
+ console.log('Pipeline is not prepared because title is undefined or empty', pipeline);
2273
2294
  return false;
2274
2295
  }
2275
- if (!pipeline.personas.every((persona) => persona.modelRequirements !== undefined)) {
2296
+ if (!pipeline.personas.every((persona) => persona.modelsRequirements !== undefined)) {
2297
+ // TODO: !!! Comment this out
2298
+ console.log('Pipeline is not prepared because personas are not prepared', pipeline.personas);
2276
2299
  return false;
2277
2300
  }
2278
2301
  if (!pipeline.knowledgeSources.every((knowledgeSource) => knowledgeSource.preparationIds !== undefined)) {
2302
+ // TODO: !!! Comment this out
2303
+ console.log('Pipeline is not prepared because knowledge sources are not prepared', pipeline.knowledgeSources);
2279
2304
  return false;
2280
2305
  }
2281
2306
  /*
@@ -2296,36 +2321,6 @@
2296
2321
  * - [♨] Are tasks prepared
2297
2322
  */
2298
2323
 
2299
- /**
2300
- * Recursively converts JSON strings to JSON objects
2301
-
2302
- * @public exported from `@promptbook/utils`
2303
- */
2304
- function jsonStringsToJsons(object) {
2305
- if (object === null) {
2306
- return object;
2307
- }
2308
- if (Array.isArray(object)) {
2309
- return object.map(jsonStringsToJsons);
2310
- }
2311
- if (typeof object !== 'object') {
2312
- return object;
2313
- }
2314
- const newObject = { ...object };
2315
- for (const [key, value] of Object.entries(object)) {
2316
- if (typeof value === 'string' && isValidJsonString(value)) {
2317
- newObject[key] = JSON.parse(value);
2318
- }
2319
- else {
2320
- newObject[key] = jsonStringsToJsons(value);
2321
- }
2322
- }
2323
- return newObject;
2324
- }
2325
- /**
2326
- * TODO: Type the return type correctly
2327
- */
2328
-
2329
2324
  /**
2330
2325
  * This error indicates problems parsing the format value
2331
2326
  *
@@ -2509,6 +2504,101 @@
2509
2504
  * Note: [💞] Ignore a discrepancy between file name and entity name
2510
2505
  */
2511
2506
 
2507
+ /**
2508
+ * Serializes an error into a [🚉] JSON-serializable object
2509
+ *
2510
+ * @public exported from `@promptbook/utils`
2511
+ */
2512
+ function serializeError(error) {
2513
+ const { name, message, stack } = error;
2514
+ const { id } = error;
2515
+ if (!Object.keys(ALL_ERRORS).includes(name)) {
2516
+ console.error(spaceTrim__default["default"]((block) => `
2517
+
2518
+ Cannot serialize error with name "${name}"
2519
+
2520
+ Authors of Promptbook probably forgot to add this error into the list of errors:
2521
+ https://github.com/webgptorg/promptbook/blob/main/src/errors/0-index.ts
2522
+
2523
+
2524
+ ${block(stack || message)}
2525
+
2526
+ `));
2527
+ }
2528
+ return {
2529
+ name: name,
2530
+ message,
2531
+ stack,
2532
+ id, // Include id in the serialized object
2533
+ };
2534
+ }
2535
+
2536
+ /**
2537
+ * Converts a JavaScript Object Notation (JSON) string into an object.
2538
+ *
2539
+ * Note: This is wrapper around `JSON.parse()` with better error and type handling
2540
+ *
2541
+ * @public exported from `@promptbook/utils`
2542
+ */
2543
+ function jsonParse(value) {
2544
+ if (value === undefined) {
2545
+ throw new Error(`Can not parse JSON from undefined value.`);
2546
+ }
2547
+ else if (typeof value !== 'string') {
2548
+ console.error('Can not parse JSON from non-string value.', { text: value });
2549
+ throw new Error(spaceTrim__default["default"](`
2550
+ Can not parse JSON from non-string value.
2551
+
2552
+ The value type: ${typeof value}
2553
+ See more in console.
2554
+ `));
2555
+ }
2556
+ try {
2557
+ return JSON.parse(value);
2558
+ }
2559
+ catch (error) {
2560
+ if (!(error instanceof Error)) {
2561
+ throw error;
2562
+ }
2563
+ throw new Error(spaceTrim__default["default"]((block) => `
2564
+ ${block(error.message)}
2565
+
2566
+ The JSON text:
2567
+ ${block(value)}
2568
+ `));
2569
+ }
2570
+ }
2571
+
2572
+ /**
2573
+ * Recursively converts JSON strings to JSON objects
2574
+
2575
+ * @public exported from `@promptbook/utils`
2576
+ */
2577
+ function jsonStringsToJsons(object) {
2578
+ if (object === null) {
2579
+ return object;
2580
+ }
2581
+ if (Array.isArray(object)) {
2582
+ return object.map(jsonStringsToJsons);
2583
+ }
2584
+ if (typeof object !== 'object') {
2585
+ return object;
2586
+ }
2587
+ const newObject = { ...object };
2588
+ for (const [key, value] of Object.entries(object)) {
2589
+ if (typeof value === 'string' && isValidJsonString(value)) {
2590
+ newObject[key] = jsonParse(value);
2591
+ }
2592
+ else {
2593
+ newObject[key] = jsonStringsToJsons(value);
2594
+ }
2595
+ }
2596
+ return newObject;
2597
+ }
2598
+ /**
2599
+ * TODO: Type the return type correctly
2600
+ */
2601
+
2512
2602
  /**
2513
2603
  * Deserializes the error object
2514
2604
  *
@@ -2674,64 +2764,6 @@
2674
2764
  * TODO: [🐚] Split into more files and make `PrepareTask` & `RemoteTask` + split the function
2675
2765
  */
2676
2766
 
2677
- /**
2678
- * Serializes an error into a [🚉] JSON-serializable object
2679
- *
2680
- * @public exported from `@promptbook/utils`
2681
- */
2682
- function serializeError(error) {
2683
- const { name, message, stack } = error;
2684
- const { id } = error;
2685
- if (!Object.keys(ALL_ERRORS).includes(name)) {
2686
- console.error(spaceTrim__default["default"]((block) => `
2687
-
2688
- Cannot serialize error with name "${name}"
2689
-
2690
- Authors of Promptbook probably forgot to add this error into the list of errors:
2691
- https://github.com/webgptorg/promptbook/blob/main/src/errors/0-index.ts
2692
-
2693
-
2694
- ${block(stack || message)}
2695
-
2696
- `));
2697
- }
2698
- return {
2699
- name: name,
2700
- message,
2701
- stack,
2702
- id, // Include id in the serialized object
2703
- };
2704
- }
2705
-
2706
- /**
2707
- * Async version of Array.forEach
2708
- *
2709
- * @param array - Array to iterate over
2710
- * @param options - Options for the function
2711
- * @param callbackfunction - Function to call for each item
2712
- * @public exported from `@promptbook/utils`
2713
- * @deprecated [🪂] Use queues instead
2714
- */
2715
- async function forEachAsync(array, options, callbackfunction) {
2716
- const { maxParallelCount = Infinity } = options;
2717
- let index = 0;
2718
- let runningTasks = [];
2719
- const tasks = [];
2720
- for (const item of array) {
2721
- const currentIndex = index++;
2722
- const task = callbackfunction(item, currentIndex, array);
2723
- tasks.push(task);
2724
- runningTasks.push(task);
2725
- /* not await */ Promise.resolve(task).then(() => {
2726
- runningTasks = runningTasks.filter((t) => t !== task);
2727
- });
2728
- if (maxParallelCount < runningTasks.length) {
2729
- await Promise.race(runningTasks);
2730
- }
2731
- }
2732
- await Promise.all(tasks);
2733
- }
2734
-
2735
2767
  /**
2736
2768
  * Represents the uncertain value
2737
2769
  *
@@ -2775,7 +2807,7 @@
2775
2807
  *
2776
2808
  * @public exported from `@promptbook/core`
2777
2809
  */
2778
- $deepFreeze({
2810
+ const UNCERTAIN_USAGE = $deepFreeze({
2779
2811
  price: UNCERTAIN_ZERO_VALUE,
2780
2812
  input: {
2781
2813
  tokensCount: UNCERTAIN_ZERO_VALUE,
@@ -2800,6 +2832,35 @@
2800
2832
  * Note: [💞] Ignore a discrepancy between file name and entity name
2801
2833
  */
2802
2834
 
2835
+ /**
2836
+ * Async version of Array.forEach
2837
+ *
2838
+ * @param array - Array to iterate over
2839
+ * @param options - Options for the function
2840
+ * @param callbackfunction - Function to call for each item
2841
+ * @public exported from `@promptbook/utils`
2842
+ * @deprecated [🪂] Use queues instead
2843
+ */
2844
+ async function forEachAsync(array, options, callbackfunction) {
2845
+ const { maxParallelCount = Infinity } = options;
2846
+ let index = 0;
2847
+ let runningTasks = [];
2848
+ const tasks = [];
2849
+ for (const item of array) {
2850
+ const currentIndex = index++;
2851
+ const task = callbackfunction(item, currentIndex, array);
2852
+ tasks.push(task);
2853
+ runningTasks.push(task);
2854
+ /* not await */ Promise.resolve(task).then(() => {
2855
+ runningTasks = runningTasks.filter((t) => t !== task);
2856
+ });
2857
+ if (maxParallelCount < runningTasks.length) {
2858
+ await Promise.race(runningTasks);
2859
+ }
2860
+ }
2861
+ await Promise.all(tasks);
2862
+ }
2863
+
2803
2864
  /**
2804
2865
  * Function `addUsage` will add multiple usages into one
2805
2866
  *
@@ -3146,27 +3207,48 @@
3146
3207
  pipeline: await collection.getPipelineByUrl('https://promptbook.studio/promptbook/prepare-persona.book'),
3147
3208
  tools,
3148
3209
  });
3149
- // TODO: [🚐] Make arrayable LLMs -> single LLM DRY
3150
3210
  const _llms = arrayableToArray(tools.llm);
3151
3211
  const llmTools = _llms.length === 1 ? _llms[0] : joinLlmExecutionTools(..._llms);
3152
- const availableModels = await llmTools.listModels();
3153
- const availableModelNames = availableModels
3212
+ const availableModels = (await llmTools.listModels())
3154
3213
  .filter(({ modelVariant }) => modelVariant === 'CHAT')
3155
- .map(({ modelName }) => modelName)
3156
- .join(',');
3157
- const result = await preparePersonaExecutor({ availableModelNames, personaDescription }).asPromise();
3214
+ .map(({ modelName, modelDescription }) => ({
3215
+ modelName,
3216
+ modelDescription,
3217
+ // <- Note: `modelTitle` and `modelVariant` is not relevant for this task
3218
+ }));
3219
+ const result = await preparePersonaExecutor({
3220
+ availableModels /* <- Note: Passing as JSON */,
3221
+ personaDescription,
3222
+ }).asPromise();
3158
3223
  const { outputParameters } = result;
3159
- const { modelRequirements: modelRequirementsRaw } = outputParameters;
3160
- const modelRequirements = JSON.parse(modelRequirementsRaw);
3224
+ const { modelsRequirements: modelsRequirementsJson } = outputParameters;
3225
+ let modelsRequirementsUnchecked = jsonParse(modelsRequirementsJson);
3161
3226
  if (isVerbose) {
3162
- console.info(`PERSONA ${personaDescription}`, modelRequirements);
3227
+ console.info(`PERSONA ${personaDescription}`, modelsRequirementsUnchecked);
3163
3228
  }
3164
- const { modelName, systemMessage, temperature } = modelRequirements;
3165
- return {
3229
+ if (!Array.isArray(modelsRequirementsUnchecked)) {
3230
+ // <- TODO: Book should have syntax and system to enforce shape of JSON
3231
+ modelsRequirementsUnchecked = [modelsRequirementsUnchecked];
3232
+ /*
3233
+ throw new UnexpectedError(
3234
+ spaceTrim(
3235
+ (block) => `
3236
+ Invalid \`modelsRequirements\`:
3237
+
3238
+ \`\`\`json
3239
+ ${block(JSON.stringify(modelsRequirementsUnchecked, null, 4))}
3240
+ \`\`\`
3241
+ `,
3242
+ ),
3243
+ );
3244
+ */
3245
+ }
3246
+ const modelsRequirements = modelsRequirementsUnchecked.map((modelRequirements) => ({
3166
3247
  modelVariant: 'CHAT',
3167
- modelName,
3168
- systemMessage,
3169
- temperature,
3248
+ ...modelRequirements,
3249
+ }));
3250
+ return {
3251
+ modelsRequirements,
3170
3252
  };
3171
3253
  }
3172
3254
  /**
@@ -3177,7 +3259,8 @@
3177
3259
  */
3178
3260
 
3179
3261
  /**
3180
- * @@@
3262
+ * Safely retrieves the global scope object (window in browser, global in Node.js)
3263
+ * regardless of the JavaScript environment in which the code is running
3181
3264
  *
3182
3265
  * Note: `$` is used to indicate that this function is not a pure function - it access global scope
3183
3266
  *
@@ -3188,10 +3271,10 @@
3188
3271
  }
3189
3272
 
3190
3273
  /**
3191
- * @@@
3274
+ * Normalizes a text string to SCREAMING_CASE (all uppercase with underscores).
3192
3275
  *
3193
- * @param text @@@
3194
- * @returns @@@
3276
+ * @param text The text string to be converted to SCREAMING_CASE format.
3277
+ * @returns The normalized text in SCREAMING_CASE format.
3195
3278
  * @example 'HELLO_WORLD'
3196
3279
  * @example 'I_LOVE_PROMPTBOOK'
3197
3280
  * @public exported from `@promptbook/utils`
@@ -3243,10 +3326,10 @@
3243
3326
  */
3244
3327
 
3245
3328
  /**
3246
- * @@@
3329
+ * Normalizes a text string to snake_case format.
3247
3330
  *
3248
- * @param text @@@
3249
- * @returns @@@
3331
+ * @param text The text string to be converted to snake_case format.
3332
+ * @returns The normalized text in snake_case format.
3250
3333
  * @example 'hello_world'
3251
3334
  * @example 'i_love_promptbook'
3252
3335
  * @public exported from `@promptbook/utils`
@@ -3256,11 +3339,11 @@
3256
3339
  }
3257
3340
 
3258
3341
  /**
3259
- * Register is @@@
3342
+ * Global registry for storing and managing registered entities of a given type.
3260
3343
  *
3261
3344
  * Note: `$` is used to indicate that this function is not a pure function - it accesses and adds variables in global scope.
3262
3345
  *
3263
- * @private internal utility, exported are only signleton instances of this class
3346
+ * @private internal utility, exported are only singleton instances of this class
3264
3347
  */
3265
3348
  class $Register {
3266
3349
  constructor(registerName) {
@@ -3304,10 +3387,10 @@
3304
3387
  }
3305
3388
 
3306
3389
  /**
3307
- * @@@
3390
+ * Global registry for storing metadata about all available scrapers and converters.
3308
3391
  *
3309
- * Note: `$` is used to indicate that this interacts with the global scope
3310
- * @singleton Only one instance of each register is created per build, but thare can be more @@@
3392
+ * Note: `$` is used to indicate that this interacts with the global scope.
3393
+ * @singleton Only one instance of each register is created per build, but there can be more in different contexts (e.g., tests).
3311
3394
  * @public exported from `@promptbook/core`
3312
3395
  */
3313
3396
  const $scrapersMetadataRegister = new $Register('scrapers_metadata');
@@ -3316,10 +3399,11 @@
3316
3399
  */
3317
3400
 
3318
3401
  /**
3319
- * @@@
3402
+ * Registry for all available scrapers in the system.
3403
+ * Central point for registering and accessing different types of content scrapers.
3320
3404
  *
3321
3405
  * Note: `$` is used to indicate that this interacts with the global scope
3322
- * @singleton Only one instance of each register is created per build, but thare can be more @@@
3406
+ * @singleton Only one instance of each register is created per build, but there can be more than one in different build modules
3323
3407
  * @public exported from `@promptbook/core`
3324
3408
  */
3325
3409
  const $scrapersRegister = new $Register('scraper_constructors');
@@ -3487,7 +3571,9 @@
3487
3571
  */
3488
3572
 
3489
3573
  /**
3490
- * @@@
3574
+ * Factory function that creates a handler for processing knowledge sources.
3575
+ * Provides standardized processing of different types of knowledge sources
3576
+ * across various scraper implementations.
3491
3577
  *
3492
3578
  * @public exported from `@promptbook/core`
3493
3579
  */
@@ -3594,7 +3680,7 @@
3594
3680
  > },
3595
3681
  */
3596
3682
  async asJson() {
3597
- return JSON.parse(await tools.fs.readFile(filename, 'utf-8'));
3683
+ return jsonParse(await tools.fs.readFile(filename, 'utf-8'));
3598
3684
  },
3599
3685
  async asText() {
3600
3686
  return await tools.fs.readFile(filename, 'utf-8');
@@ -3728,9 +3814,12 @@
3728
3814
  */
3729
3815
 
3730
3816
  /**
3731
- * @@@
3817
+ * Prepares tasks by adding knowledge to the prompt and ensuring all necessary parameters are included.
3732
3818
  *
3733
- * @public exported from `@promptbook/core`
3819
+ * @param tasks Sequence of tasks that are chained together to form a pipeline
3820
+ * @returns A promise that resolves to the prepared tasks.
3821
+ *
3822
+ * @private internal utility of `preparePipeline`
3734
3823
  */
3735
3824
  async function prepareTasks(pipeline, tools, options) {
3736
3825
  const { maxParallelCount = DEFAULT_MAX_PARALLEL_COUNT } = options;
@@ -3852,14 +3941,14 @@
3852
3941
  // TODO: [🖌][🧠] Implement some `mapAsync` function
3853
3942
  const preparedPersonas = new Array(personas.length);
3854
3943
  await forEachAsync(personas, { maxParallelCount /* <- TODO: [🪂] When there are subtasks, this maximul limit can be broken */ }, async (persona, index) => {
3855
- const modelRequirements = await preparePersona(persona.description, { ...tools, llm: llmToolsWithUsage }, {
3944
+ const { modelsRequirements } = await preparePersona(persona.description, { ...tools, llm: llmToolsWithUsage }, {
3856
3945
  rootDirname,
3857
3946
  maxParallelCount /* <- TODO: [🪂] */,
3858
3947
  isVerbose,
3859
3948
  });
3860
3949
  const preparedPersona = {
3861
3950
  ...persona,
3862
- modelRequirements,
3951
+ modelsRequirements,
3863
3952
  preparationIds: [/* TODO: [🧊] -> */ currentPreparation.id],
3864
3953
  // <- TODO: [🍙] Make some standard order of json properties
3865
3954
  };
@@ -4167,7 +4256,7 @@
4167
4256
  }
4168
4257
 
4169
4258
  /**
4170
- * @@@
4259
+ * Contains configuration options for parsing and generating CSV files, such as delimiters and quoting rules.
4171
4260
  *
4172
4261
  * @public exported from `@promptbook/core`
4173
4262
  */
@@ -4176,11 +4265,29 @@
4176
4265
  // encoding: 'utf-8',
4177
4266
  });
4178
4267
 
4268
+ /**
4269
+ * Converts a CSV string into an object
4270
+ *
4271
+ * Note: This is wrapper around `papaparse.parse()` with better autohealing
4272
+ *
4273
+ * @private - for now until `@promptbook/csv` is released
4274
+ */
4275
+ function csvParse(value /* <- TODO: string_csv */, settings, schema /* <- TODO: Make CSV Schemas */) {
4276
+ settings = { ...settings, ...MANDATORY_CSV_SETTINGS };
4277
+ // Note: Autoheal invalid '\n' characters
4278
+ if (settings.newline && !settings.newline.includes('\r') && value.includes('\r')) {
4279
+ console.warn('CSV string contains carriage return characters, but in the CSV settings the `newline` setting does not include them. Autohealing the CSV string.');
4280
+ value = value.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
4281
+ }
4282
+ const csv = papaparse.parse(value, settings);
4283
+ return csv;
4284
+ }
4285
+
4179
4286
  /**
4180
4287
  * Function to check if a string is valid CSV
4181
4288
  *
4182
4289
  * @param value The string to check
4183
- * @returns True if the string is a valid CSV string, false otherwise
4290
+ * @returns `true` if the string is a valid CSV string, false otherwise
4184
4291
  *
4185
4292
  * @public exported from `@promptbook/utils`
4186
4293
  */
@@ -4204,7 +4311,7 @@
4204
4311
  * @public exported from `@promptbook/core`
4205
4312
  * <- TODO: [🏢] Export from package `@promptbook/csv`
4206
4313
  */
4207
- const CsvFormatDefinition = {
4314
+ const CsvFormatParser = {
4208
4315
  formatName: 'CSV',
4209
4316
  aliases: ['SPREADSHEET', 'TABLE'],
4210
4317
  isValid(value, settings, schema) {
@@ -4216,12 +4323,12 @@
4216
4323
  heal(value, settings, schema) {
4217
4324
  throw new Error('Not implemented');
4218
4325
  },
4219
- subvalueDefinitions: [
4326
+ subvalueParsers: [
4220
4327
  {
4221
4328
  subvalueName: 'ROW',
4222
- async mapValues(value, outputParameterName, settings, mapCallback) {
4223
- // TODO: [👨🏾‍🤝‍👨🏼] DRY csv parsing
4224
- const csv = papaparse.parse(value, { ...settings, ...MANDATORY_CSV_SETTINGS });
4329
+ async mapValues(options) {
4330
+ const { value, outputParameterName, settings, mapCallback, onProgress } = options;
4331
+ const csv = csvParse(value, settings);
4225
4332
  if (csv.errors.length !== 0) {
4226
4333
  throw new CsvFormatError(spaceTrim__default["default"]((block) => `
4227
4334
  CSV parsing error
@@ -4236,23 +4343,37 @@
4236
4343
  ${block(value)}
4237
4344
  `));
4238
4345
  }
4239
- const mappedData = await Promise.all(csv.data.map(async (row, index) => {
4346
+ const mappedData = [];
4347
+ const length = csv.data.length;
4348
+ for (let index = 0; index < length; index++) {
4349
+ const row = csv.data[index];
4240
4350
  if (row[outputParameterName]) {
4241
4351
  throw new CsvFormatError(`Can not overwrite existing column "${outputParameterName}" in CSV row`);
4242
4352
  }
4243
- return {
4353
+ const mappedRow = {
4244
4354
  ...row,
4245
- [outputParameterName]: await mapCallback(row, index),
4355
+ [outputParameterName]: await mapCallback(row, index, length),
4246
4356
  };
4247
- }));
4357
+ mappedData.push(mappedRow);
4358
+ if (onProgress) {
4359
+ // Note: Report the CSV with all rows mapped so far
4360
+ /*
4361
+ // TODO: [🛕] Report progress with all the rows including the pending ones
4362
+ const progressData = mappedData.map((row, i) =>
4363
+ i > index ? { ...row, [outputParameterName]: PENDING_VALUE_PLACEHOLDER } : row,
4364
+ );
4365
+ */
4366
+ await onProgress(papaparse.unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS }));
4367
+ }
4368
+ }
4248
4369
  return papaparse.unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS });
4249
4370
  },
4250
4371
  },
4251
4372
  {
4252
4373
  subvalueName: 'CELL',
4253
- async mapValues(value, outputParameterName, settings, mapCallback) {
4254
- // TODO: [👨🏾‍🤝‍👨🏼] DRY csv parsing
4255
- const csv = papaparse.parse(value, { ...settings, ...MANDATORY_CSV_SETTINGS });
4374
+ async mapValues(options) {
4375
+ const { value, settings, mapCallback, onProgress } = options;
4376
+ const csv = csvParse(value, settings);
4256
4377
  if (csv.errors.length !== 0) {
4257
4378
  throw new CsvFormatError(spaceTrim__default["default"]((block) => `
4258
4379
  CSV parsing error
@@ -4268,9 +4389,9 @@
4268
4389
  `));
4269
4390
  }
4270
4391
  const mappedData = await Promise.all(csv.data.map(async (row, rowIndex) => {
4271
- return /* not await */ Promise.all(Object.entries(row).map(async ([key, value], columnIndex) => {
4392
+ return /* not await */ Promise.all(Object.entries(row).map(async ([key, value], columnIndex, array) => {
4272
4393
  const index = rowIndex * Object.keys(row).length + columnIndex;
4273
- return /* not await */ mapCallback({ [key]: value }, index);
4394
+ return /* not await */ mapCallback({ [key]: value }, index, array.length);
4274
4395
  }));
4275
4396
  }));
4276
4397
  return papaparse.unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS });
@@ -4279,10 +4400,10 @@
4279
4400
  ],
4280
4401
  };
4281
4402
  /**
4282
- * TODO: [🍓] In `CsvFormatDefinition` implement simple `isValid`
4283
- * TODO: [🍓] In `CsvFormatDefinition` implement partial `canBeValid`
4284
- * TODO: [🍓] In `CsvFormatDefinition` implement `heal
4285
- * TODO: [🍓] In `CsvFormatDefinition` implement `subvalueDefinitions`
4403
+ * TODO: [🍓] In `CsvFormatParser` implement simple `isValid`
4404
+ * TODO: [🍓] In `CsvFormatParser` implement partial `canBeValid`
4405
+ * TODO: [🍓] In `CsvFormatParser` implement `heal
4406
+ * TODO: [🍓] In `CsvFormatParser` implement `subvalueParsers`
4286
4407
  * TODO: [🏢] Allow to expect something inside CSV objects and other formats
4287
4408
  */
4288
4409
 
@@ -4291,7 +4412,7 @@
4291
4412
  *
4292
4413
  * @private still in development [🏢]
4293
4414
  */
4294
- const JsonFormatDefinition = {
4415
+ const JsonFormatParser = {
4295
4416
  formatName: 'JSON',
4296
4417
  mimeType: 'application/json',
4297
4418
  isValid(value, settings, schema) {
@@ -4303,28 +4424,28 @@
4303
4424
  heal(value, settings, schema) {
4304
4425
  throw new Error('Not implemented');
4305
4426
  },
4306
- subvalueDefinitions: [],
4427
+ subvalueParsers: [],
4307
4428
  };
4308
4429
  /**
4309
4430
  * TODO: [🧠] Maybe propper instance of object
4310
4431
  * TODO: [0] Make string_serialized_json
4311
4432
  * TODO: [1] Make type for JSON Settings and Schema
4312
4433
  * TODO: [🧠] What to use for validating JSONs - JSON Schema, ZoD, typescript types/interfaces,...?
4313
- * TODO: [🍓] In `JsonFormatDefinition` implement simple `isValid`
4314
- * TODO: [🍓] In `JsonFormatDefinition` implement partial `canBeValid`
4315
- * TODO: [🍓] In `JsonFormatDefinition` implement `heal
4316
- * TODO: [🍓] In `JsonFormatDefinition` implement `subvalueDefinitions`
4434
+ * TODO: [🍓] In `JsonFormatParser` implement simple `isValid`
4435
+ * TODO: [🍓] In `JsonFormatParser` implement partial `canBeValid`
4436
+ * TODO: [🍓] In `JsonFormatParser` implement `heal
4437
+ * TODO: [🍓] In `JsonFormatParser` implement `subvalueParsers`
4317
4438
  * TODO: [🏢] Allow to expect something inside JSON objects and other formats
4318
4439
  */
4319
4440
 
4320
4441
  /**
4321
4442
  * Definition for any text - this will be always valid
4322
4443
  *
4323
- * Note: This is not useful for validation, but for splitting and mapping with `subvalueDefinitions`
4444
+ * Note: This is not useful for validation, but for splitting and mapping with `subvalueParsers`
4324
4445
  *
4325
4446
  * @public exported from `@promptbook/core`
4326
4447
  */
4327
- const TextFormatDefinition = {
4448
+ const TextFormatParser = {
4328
4449
  formatName: 'TEXT',
4329
4450
  isValid(value) {
4330
4451
  return typeof value === 'string';
@@ -4333,19 +4454,20 @@
4333
4454
  return typeof partialValue === 'string';
4334
4455
  },
4335
4456
  heal() {
4336
- throw new UnexpectedError('It does not make sense to call `TextFormatDefinition.heal`');
4457
+ throw new UnexpectedError('It does not make sense to call `TextFormatParser.heal`');
4337
4458
  },
4338
- subvalueDefinitions: [
4459
+ subvalueParsers: [
4339
4460
  {
4340
4461
  subvalueName: 'LINE',
4341
- async mapValues(value, outputParameterName, settings, mapCallback) {
4462
+ async mapValues(options) {
4463
+ const { value, mapCallback, onProgress } = options;
4342
4464
  const lines = value.split('\n');
4343
- const mappedLines = await Promise.all(lines.map((lineContent, lineNumber) =>
4465
+ const mappedLines = await Promise.all(lines.map((lineContent, lineNumber, array) =>
4344
4466
  // TODO: [🧠] Maybe option to skip empty line
4345
4467
  /* not await */ mapCallback({
4346
4468
  lineContent,
4347
4469
  // TODO: [🧠] Maybe also put here `lineNumber`
4348
- }, lineNumber)));
4470
+ }, lineNumber, array.length)));
4349
4471
  return mappedLines.join('\n');
4350
4472
  },
4351
4473
  },
@@ -4355,10 +4477,10 @@
4355
4477
  /**
4356
4478
  * TODO: [1] Make type for XML Text and Schema
4357
4479
  * TODO: [🧠][🤠] Here should be all words, characters, lines, paragraphs, pages available as subvalues
4358
- * TODO: [🍓] In `TextFormatDefinition` implement simple `isValid`
4359
- * TODO: [🍓] In `TextFormatDefinition` implement partial `canBeValid`
4360
- * TODO: [🍓] In `TextFormatDefinition` implement `heal
4361
- * TODO: [🍓] In `TextFormatDefinition` implement `subvalueDefinitions`
4480
+ * TODO: [🍓] In `TextFormatParser` implement simple `isValid`
4481
+ * TODO: [🍓] In `TextFormatParser` implement partial `canBeValid`
4482
+ * TODO: [🍓] In `TextFormatParser` implement `heal
4483
+ * TODO: [🍓] In `TextFormatParser` implement `subvalueParsers`
4362
4484
  * TODO: [🏢] Allow to expect something inside each item of list and other formats
4363
4485
  */
4364
4486
 
@@ -4366,7 +4488,7 @@
4366
4488
  * Function to check if a string is valid XML
4367
4489
  *
4368
4490
  * @param value
4369
- * @returns True if the string is a valid XML string, false otherwise
4491
+ * @returns `true` if the string is a valid XML string, false otherwise
4370
4492
  *
4371
4493
  * @public exported from `@promptbook/utils`
4372
4494
  */
@@ -4391,7 +4513,7 @@
4391
4513
  *
4392
4514
  * @private still in development [🏢]
4393
4515
  */
4394
- const XmlFormatDefinition = {
4516
+ const XmlFormatParser = {
4395
4517
  formatName: 'XML',
4396
4518
  mimeType: 'application/xml',
4397
4519
  isValid(value, settings, schema) {
@@ -4403,17 +4525,17 @@
4403
4525
  heal(value, settings, schema) {
4404
4526
  throw new Error('Not implemented');
4405
4527
  },
4406
- subvalueDefinitions: [],
4528
+ subvalueParsers: [],
4407
4529
  };
4408
4530
  /**
4409
4531
  * TODO: [🧠] Maybe propper instance of object
4410
4532
  * TODO: [0] Make string_serialized_xml
4411
4533
  * TODO: [1] Make type for XML Settings and Schema
4412
4534
  * TODO: [🧠] What to use for validating XMLs - XSD,...
4413
- * TODO: [🍓] In `XmlFormatDefinition` implement simple `isValid`
4414
- * TODO: [🍓] In `XmlFormatDefinition` implement partial `canBeValid`
4415
- * TODO: [🍓] In `XmlFormatDefinition` implement `heal
4416
- * TODO: [🍓] In `XmlFormatDefinition` implement `subvalueDefinitions`
4535
+ * TODO: [🍓] In `XmlFormatParser` implement simple `isValid`
4536
+ * TODO: [🍓] In `XmlFormatParser` implement partial `canBeValid`
4537
+ * TODO: [🍓] In `XmlFormatParser` implement `heal
4538
+ * TODO: [🍓] In `XmlFormatParser` implement `subvalueParsers`
4417
4539
  * TODO: [🏢] Allow to expect something inside XML and other formats
4418
4540
  */
4419
4541
 
@@ -4422,24 +4544,19 @@
4422
4544
  *
4423
4545
  * @private internal index of `...` <- TODO [🏢]
4424
4546
  */
4425
- const FORMAT_DEFINITIONS = [
4426
- JsonFormatDefinition,
4427
- XmlFormatDefinition,
4428
- TextFormatDefinition,
4429
- CsvFormatDefinition,
4430
- ];
4547
+ const FORMAT_DEFINITIONS = [JsonFormatParser, XmlFormatParser, TextFormatParser, CsvFormatParser];
4431
4548
  /**
4432
4549
  * Note: [💞] Ignore a discrepancy between file name and entity name
4433
4550
  */
4434
4551
 
4435
4552
  /**
4436
- * Maps available parameters to expected parameters
4553
+ * Maps available parameters to expected parameters for a pipeline task.
4437
4554
  *
4438
4555
  * The strategy is:
4439
- * 1) @@@
4440
- * 2) @@@
4556
+ * 1) First, match parameters by name where both available and expected.
4557
+ * 2) Then, if there are unmatched expected and available parameters, map them by order.
4441
4558
  *
4442
- * @throws {PipelineExecutionError} @@@
4559
+ * @throws {PipelineExecutionError} If the number of unmatched expected and available parameters does not match, or mapping is ambiguous.
4443
4560
  * @private within the repository used in `createPipelineExecutor`
4444
4561
  */
4445
4562
  function mapAvailableToExpectedParameters(options) {
@@ -4462,7 +4579,7 @@
4462
4579
  else if (!availableParametersNames.has(parameterName) && expectedParameterNames.has(parameterName)) ;
4463
4580
  }
4464
4581
  if (expectedParameterNames.size === 0) {
4465
- // Note: [👨‍👨‍👧] Now we can freeze `mappedParameters` to prevent @@@
4582
+ // Note: [👨‍👨‍👧] Now we can freeze `mappedParameters` to prevent accidental modifications after mapping
4466
4583
  Object.freeze(mappedParameters);
4467
4584
  return mappedParameters;
4468
4585
  }
@@ -4493,7 +4610,7 @@
4493
4610
  for (let i = 0; i < expectedParameterNames.size; i++) {
4494
4611
  mappedParameters[expectedParameterNamesArray[i]] = availableParameters[availableParametersNamesArray[i]];
4495
4612
  }
4496
- // Note: [👨‍👨‍👧] Now we can freeze `mappedParameters` to prevent @@@
4613
+ // Note: [👨‍👨‍👧] Now we can freeze `mappedParameters` to prevent accidental modifications after mapping
4497
4614
  Object.freeze(mappedParameters);
4498
4615
  return mappedParameters;
4499
4616
  }
@@ -4597,7 +4714,7 @@
4597
4714
  }
4598
4715
  /**
4599
4716
  * TODO: Add some auto-healing logic + extract YAML, JSON5, TOML, etc.
4600
- * TODO: [🏢] Make this logic part of `JsonFormatDefinition` or `isValidJsonString`
4717
+ * TODO: [🏢] Make this logic part of `JsonFormatParser` or `isValidJsonString`
4601
4718
  */
4602
4719
 
4603
4720
  /**
@@ -4640,10 +4757,12 @@
4640
4757
  throw new PipelineExecutionError('Parameter is already opened or not closed');
4641
4758
  }
4642
4759
  if (parameters[parameterName] === undefined) {
4760
+ console.log('!!! templateParameters 1', { parameterName, template, parameters });
4643
4761
  throw new PipelineExecutionError(`Parameter \`{${parameterName}}\` is not defined`);
4644
4762
  }
4645
4763
  let parameterValue = parameters[parameterName];
4646
4764
  if (parameterValue === undefined) {
4765
+ console.log('!!! templateParameters 2', { parameterName, template, parameters });
4647
4766
  throw new PipelineExecutionError(`Parameter \`{${parameterName}}\` is not defined`);
4648
4767
  }
4649
4768
  parameterValue = valueToString(parameterValue);
@@ -4799,7 +4918,7 @@
4799
4918
  PAGES: countPages,
4800
4919
  };
4801
4920
  /**
4802
- * TODO: [🧠][🤠] This should be probbably as part of `TextFormatDefinition`
4921
+ * TODO: [🧠][🤠] This should be probbably as part of `TextFormatParser`
4803
4922
  * Note: [💞] Ignore a discrepancy between file name and entity name
4804
4923
  */
4805
4924
 
@@ -4827,13 +4946,17 @@
4827
4946
  }
4828
4947
  /**
4829
4948
  * TODO: [💝] Unite object for expecting amount and format
4830
- * TODO: [🧠][🤠] This should be part of `TextFormatDefinition`
4949
+ * TODO: [🧠][🤠] This should be part of `TextFormatParser`
4831
4950
  * Note: [💝] and [🤠] are interconnected together
4832
4951
  */
4833
4952
 
4834
4953
  /**
4835
- * @@@
4954
+ * Executes a pipeline task with multiple attempts, including joker and retry logic. Handles different task types
4955
+ * (prompt, script, dialog, etc.), applies postprocessing, checks expectations, and updates the execution report.
4956
+ * Throws errors if execution fails after all attempts.
4836
4957
  *
4958
+ * @param options - The options for execution, including task, parameters, pipeline, and configuration.
4959
+ * @returns The result string of the executed task.
4837
4960
  * @private internal utility of `createPipelineExecutor`
4838
4961
  */
4839
4962
  async function executeAttempts(options) {
@@ -5055,7 +5178,7 @@
5055
5178
  if (task.format) {
5056
5179
  if (task.format === 'JSON') {
5057
5180
  if (!isValidJsonString($ongoingTaskResult.$resultString || '')) {
5058
- // TODO: [🏢] Do more universally via `FormatDefinition`
5181
+ // TODO: [🏢] Do more universally via `FormatParser`
5059
5182
  try {
5060
5183
  $ongoingTaskResult.$resultString = extractJsonBlock($ongoingTaskResult.$resultString || '');
5061
5184
  }
@@ -5157,12 +5280,16 @@
5157
5280
  */
5158
5281
 
5159
5282
  /**
5160
- * @@@
5283
+ * Executes a pipeline task that requires mapping or iterating over subvalues of a parameter (such as rows in a CSV).
5284
+ * Handles format and subformat resolution, error handling, and progress reporting.
5285
+ *
5286
+ * @param options - Options for execution, including task details and progress callback.
5287
+ * @returns The result of the subvalue mapping or execution attempts.
5161
5288
  *
5162
5289
  * @private internal utility of `createPipelineExecutor`
5163
5290
  */
5164
5291
  async function executeFormatSubvalues(options) {
5165
- const { task, jokerParameterNames, parameters, priority, csvSettings, pipelineIdentification } = options;
5292
+ const { task, jokerParameterNames, parameters, priority, csvSettings, onProgress, pipelineIdentification } = options;
5166
5293
  if (task.foreach === undefined) {
5167
5294
  return /* not await */ executeAttempts(options);
5168
5295
  }
@@ -5193,16 +5320,16 @@
5193
5320
  ${block(pipelineIdentification)}
5194
5321
  `));
5195
5322
  }
5196
- const subvalueDefinition = formatDefinition.subvalueDefinitions.find((subvalueDefinition) => [subvalueDefinition.subvalueName, ...(subvalueDefinition.aliases || [])].includes(task.foreach.subformatName));
5197
- if (subvalueDefinition === undefined) {
5323
+ const subvalueParser = formatDefinition.subvalueParsers.find((subvalueParser) => [subvalueParser.subvalueName, ...(subvalueParser.aliases || [])].includes(task.foreach.subformatName));
5324
+ if (subvalueParser === undefined) {
5198
5325
  throw new UnexpectedError(
5199
5326
  // <- TODO: [🧠][🧐] Should be formats fixed per promptbook version or behave as plugins (=> change UnexpectedError)
5200
5327
  spaceTrim__default["default"]((block) => `
5201
5328
  Unsupported subformat name "${task.foreach.subformatName}" for format "${task.foreach.formatName}"
5202
5329
 
5203
5330
  Available subformat names for format "${formatDefinition.formatName}":
5204
- ${block(formatDefinition.subvalueDefinitions
5205
- .map((subvalueDefinition) => subvalueDefinition.subvalueName)
5331
+ ${block(formatDefinition.subvalueParsers
5332
+ .map((subvalueParser) => subvalueParser.subvalueName)
5206
5333
  .map((subvalueName) => `- ${subvalueName}`)
5207
5334
  .join('\n'))}
5208
5335
 
@@ -5216,53 +5343,83 @@
5216
5343
  formatSettings = csvSettings;
5217
5344
  // <- TODO: [🤹‍♂️] More universal, make simmilar pattern for other formats for example \n vs \r\n in text
5218
5345
  }
5219
- const resultString = await subvalueDefinition.mapValues(parameterValue, task.foreach.outputSubparameterName, formatSettings, async (subparameters, index) => {
5220
- let mappedParameters;
5221
- // TODO: [🤹‍♂️][🪂] Limit to N concurrent executions
5222
- // TODO: When done [🐚] Report progress also for each subvalue here
5223
- try {
5224
- mappedParameters = mapAvailableToExpectedParameters({
5225
- expectedParameters: Object.fromEntries(task.foreach.inputSubparameterNames.map((subparameterName) => [subparameterName, null])),
5226
- availableParameters: subparameters,
5227
- });
5228
- }
5229
- catch (error) {
5230
- if (!(error instanceof PipelineExecutionError)) {
5231
- throw error;
5346
+ const resultString = await subvalueParser.mapValues({
5347
+ value: parameterValue,
5348
+ outputParameterName: task.foreach.outputSubparameterName,
5349
+ settings: formatSettings,
5350
+ onProgress(partialResultString) {
5351
+ return onProgress(Object.freeze({
5352
+ [task.resultingParameterName]: partialResultString,
5353
+ }));
5354
+ },
5355
+ async mapCallback(subparameters, index, length) {
5356
+ let mappedParameters;
5357
+ try {
5358
+ mappedParameters = mapAvailableToExpectedParameters({
5359
+ expectedParameters: Object.fromEntries(task.foreach.inputSubparameterNames.map((subparameterName) => [subparameterName, null])),
5360
+ availableParameters: subparameters,
5361
+ });
5232
5362
  }
5233
- throw new PipelineExecutionError(spaceTrim__default["default"]((block) => `
5234
- ${error.message}
5363
+ catch (error) {
5364
+ if (!(error instanceof PipelineExecutionError)) {
5365
+ throw error;
5366
+ }
5367
+ const highLevelError = new PipelineExecutionError(spaceTrim__default["default"]((block) => `
5368
+ ${error.message}
5235
5369
 
5236
- This is error in FOREACH command
5237
- You have probbably passed wrong data to pipeline or wrong data was generated which are processed by FOREACH command
5370
+ This is error in FOREACH command when mapping ${formatDefinition.formatName} ${subvalueParser.subvalueName} data (${index + 1}/${length})
5371
+ You have probbably passed wrong data to pipeline or wrong data was generated which are processed by FOREACH command
5238
5372
 
5239
- ${block(pipelineIdentification)}
5240
- Subparameter index: ${index}
5241
- `));
5242
- }
5243
- const allSubparameters = {
5244
- ...parameters,
5245
- ...mappedParameters,
5246
- };
5247
- // Note: [👨‍👨‍👧] Now we can freeze `subparameters` because we are sure that all and only used parameters are defined and are not going to be changed
5248
- Object.freeze(allSubparameters);
5249
- const subresultString = await executeAttempts({
5250
- ...options,
5251
- priority: priority + index,
5252
- parameters: allSubparameters,
5253
- pipelineIdentification: spaceTrim__default["default"]((block) => `
5254
- ${block(pipelineIdentification)}
5255
- Subparameter index: ${index}
5256
- `),
5257
- });
5258
- return subresultString;
5373
+ ${block(pipelineIdentification)}
5374
+ `));
5375
+ if (length > BIG_DATASET_TRESHOLD) {
5376
+ console.error(highLevelError);
5377
+ return FAILED_VALUE_PLACEHOLDER;
5378
+ }
5379
+ throw highLevelError;
5380
+ }
5381
+ const allSubparameters = {
5382
+ ...parameters,
5383
+ ...mappedParameters,
5384
+ };
5385
+ Object.freeze(allSubparameters);
5386
+ try {
5387
+ const subresultString = await executeAttempts({
5388
+ ...options,
5389
+ priority: priority + index,
5390
+ parameters: allSubparameters,
5391
+ pipelineIdentification: spaceTrim__default["default"]((block) => `
5392
+ ${block(pipelineIdentification)}
5393
+ Subparameter index: ${index}
5394
+ `),
5395
+ });
5396
+ return subresultString;
5397
+ }
5398
+ catch (error) {
5399
+ if (length > BIG_DATASET_TRESHOLD) {
5400
+ console.error(spaceTrim__default["default"]((block) => `
5401
+ ${error.message}
5402
+
5403
+ This is error in FOREACH command when processing ${formatDefinition.formatName} ${subvalueParser.subvalueName} data (${index + 1}/${length})
5404
+
5405
+ ${block(pipelineIdentification)}
5406
+ `));
5407
+ return FAILED_VALUE_PLACEHOLDER;
5408
+ }
5409
+ throw error;
5410
+ }
5411
+ },
5259
5412
  });
5260
5413
  return resultString;
5261
5414
  }
5262
5415
 
5263
5416
  /**
5264
- * @@@
5417
+ * Returns the context for a given task, typically used to provide additional information or variables
5418
+ * required for the execution of the task within a pipeline. The context is returned as a string value
5419
+ * that may include markdown formatting.
5265
5420
  *
5421
+ * @param task - The task for which the context is being generated. This should be a deeply immutable TaskJson object.
5422
+ * @returns The context as a string, formatted as markdown and parameter value.
5266
5423
  * @private internal utility of `createPipelineExecutor`
5267
5424
  */
5268
5425
  async function getContextForTask(task) {
@@ -5270,7 +5427,7 @@
5270
5427
  }
5271
5428
 
5272
5429
  /**
5273
- * @@@
5430
+ * Retrieves example values or templates for a given task, used to guide or validate pipeline execution.
5274
5431
  *
5275
5432
  * @private internal utility of `createPipelineExecutor`
5276
5433
  */
@@ -5279,25 +5436,127 @@
5279
5436
  }
5280
5437
 
5281
5438
  /**
5282
- * @@@
5439
+ * Computes the cosine similarity between two embedding vectors
5440
+ *
5441
+ * Note: This is helping function for RAG (retrieval-augmented generation)
5442
+ *
5443
+ * @param embeddingVector1
5444
+ * @param embeddingVector2
5445
+ * @returns Cosine similarity between the two vectors
5446
+ *
5447
+ * @public exported from `@promptbook/core`
5448
+ */
5449
+ function computeCosineSimilarity(embeddingVector1, embeddingVector2) {
5450
+ if (embeddingVector1.length !== embeddingVector2.length) {
5451
+ throw new TypeError('Embedding vectors must have the same length');
5452
+ }
5453
+ const dotProduct = embeddingVector1.reduce((sum, value, index) => sum + value * embeddingVector2[index], 0);
5454
+ const magnitude1 = Math.sqrt(embeddingVector1.reduce((sum, value) => sum + value * value, 0));
5455
+ const magnitude2 = Math.sqrt(embeddingVector2.reduce((sum, value) => sum + value * value, 0));
5456
+ return 1 - dotProduct / (magnitude1 * magnitude2);
5457
+ }
5458
+
5459
+ /**
5460
+ *
5461
+ * @param knowledgePieces
5462
+ * @returns
5463
+ *
5464
+ * @private internal utility of `createPipelineExecutor`
5465
+ */
5466
+ function knowledgePiecesToString(knowledgePieces) {
5467
+ return knowledgePieces
5468
+ .map((knowledgePiece) => {
5469
+ const { content } = knowledgePiece;
5470
+ return `- ${content}`;
5471
+ })
5472
+ .join('\n');
5473
+ // <- TODO: [🧠] Some smarter aggregation of knowledge pieces, single-line vs multi-line vs mixed
5474
+ }
5475
+
5476
+ /**
5477
+ * Retrieves the most relevant knowledge pieces for a given task using embedding-based similarity search.
5478
+ * This is where retrieval-augmented generation (RAG) is performed to enhance the task with external knowledge.
5283
5479
  *
5284
5480
  * @private internal utility of `createPipelineExecutor`
5285
5481
  */
5286
5482
  async function getKnowledgeForTask(options) {
5287
- const { preparedPipeline, task } = options;
5288
- return preparedPipeline.knowledgePieces.map(({ content }) => `- ${content}`).join('\n');
5289
- // <- TODO: [🧠] Some smart aggregation of knowledge pieces, single-line vs multi-line vs mixed
5483
+ const { tools, preparedPipeline, task, parameters } = options;
5484
+ const firstKnowlegePiece = preparedPipeline.knowledgePieces[0];
5485
+ const firstKnowlegeIndex = firstKnowlegePiece === null || firstKnowlegePiece === void 0 ? void 0 : firstKnowlegePiece.index[0];
5486
+ // <- TODO: Do not use just first knowledge piece and first index to determine embedding model, use also keyword search
5487
+ if (firstKnowlegePiece === undefined || firstKnowlegeIndex === undefined) {
5488
+ return ''; // <- Note: Np knowledge present, return empty string
5489
+ }
5490
+ try {
5491
+ // TODO: [🚐] Make arrayable LLMs -> single LLM DRY
5492
+ const _llms = arrayableToArray(tools.llm);
5493
+ const llmTools = _llms.length === 1 ? _llms[0] : joinLlmExecutionTools(..._llms);
5494
+ const taskEmbeddingPrompt = {
5495
+ title: 'Knowledge Search',
5496
+ modelRequirements: {
5497
+ modelVariant: 'EMBEDDING',
5498
+ modelName: firstKnowlegeIndex.modelName,
5499
+ },
5500
+ content: task.content,
5501
+ parameters,
5502
+ };
5503
+ const taskEmbeddingResult = await llmTools.callEmbeddingModel(taskEmbeddingPrompt);
5504
+ const knowledgePiecesWithRelevance = preparedPipeline.knowledgePieces.map((knowledgePiece) => {
5505
+ const { index } = knowledgePiece;
5506
+ const knowledgePieceIndex = index.find((i) => i.modelName === firstKnowlegeIndex.modelName);
5507
+ // <- TODO: Do not use just first knowledge piece and first index to determine embedding model
5508
+ if (knowledgePieceIndex === undefined) {
5509
+ return {
5510
+ content: knowledgePiece.content,
5511
+ relevance: 0,
5512
+ };
5513
+ }
5514
+ const relevance = computeCosineSimilarity(knowledgePieceIndex.position, taskEmbeddingResult.content);
5515
+ return {
5516
+ content: knowledgePiece.content,
5517
+ relevance,
5518
+ };
5519
+ });
5520
+ const knowledgePiecesSorted = knowledgePiecesWithRelevance.sort((a, b) => a.relevance - b.relevance);
5521
+ const knowledgePiecesLimited = knowledgePiecesSorted.slice(0, 5);
5522
+ console.log('!!! Embedding', {
5523
+ task,
5524
+ taskEmbeddingPrompt,
5525
+ taskEmbeddingResult,
5526
+ firstKnowlegePiece,
5527
+ firstKnowlegeIndex,
5528
+ knowledgePiecesWithRelevance,
5529
+ knowledgePiecesSorted,
5530
+ knowledgePiecesLimited,
5531
+ });
5532
+ return knowledgePiecesToString(knowledgePiecesLimited);
5533
+ }
5534
+ catch (error) {
5535
+ assertsError(error);
5536
+ console.error('Error in `getKnowledgeForTask`', error);
5537
+ // Note: If the LLM fails, just return all knowledge pieces
5538
+ return knowledgePiecesToString(preparedPipeline.knowledgePieces);
5539
+ }
5290
5540
  }
5541
+ /**
5542
+ * TODO: !!!! Verify if this is working
5543
+ * TODO: [♨] Implement Better - use keyword search
5544
+ * TODO: [♨] Examples of values
5545
+ */
5291
5546
 
5292
5547
  /**
5293
- * @@@
5548
+ * Retrieves all reserved parameters for a given pipeline task, including context, knowledge, examples, and metadata.
5549
+ * Ensures all reserved parameters are defined and throws if any are missing.
5550
+ *
5551
+ * @param options - Options including tools, pipeline, task, and context.
5552
+ * @returns An object containing all reserved parameters for the task.
5294
5553
  *
5295
5554
  * @private internal utility of `createPipelineExecutor`
5296
5555
  */
5297
5556
  async function getReservedParametersForTask(options) {
5298
- const { preparedPipeline, task, pipelineIdentification } = options;
5557
+ const { tools, preparedPipeline, task, parameters, pipelineIdentification } = options;
5299
5558
  const context = await getContextForTask(); // <- [🏍]
5300
- const knowledge = await getKnowledgeForTask({ preparedPipeline, task });
5559
+ const knowledge = await getKnowledgeForTask({ tools, preparedPipeline, task, parameters });
5301
5560
  const examples = await getExamplesForTask();
5302
5561
  const currentDate = new Date().toISOString(); // <- TODO: [🧠][💩] Better
5303
5562
  const modelName = RESERVED_PARAMETER_MISSING_VALUE;
@@ -5323,23 +5582,21 @@
5323
5582
  }
5324
5583
 
5325
5584
  /**
5326
- * @@@
5585
+ * Executes a single task within a pipeline, handling parameter validation, error checking, and progress reporting.
5586
+ *
5587
+ * @param options - Options for execution, including the task, pipeline, parameters, and callbacks.
5588
+ * @returns The output parameters produced by the task.
5327
5589
  *
5328
5590
  * @private internal utility of `createPipelineExecutor`
5329
5591
  */
5330
5592
  async function executeTask(options) {
5331
5593
  const { currentTask, preparedPipeline, parametersToPass, tools, onProgress, $executionReport, pipelineIdentification, maxExecutionAttempts, maxParallelCount, csvSettings, isVerbose, rootDirname, cacheDirname, intermediateFilesStrategy, isAutoInstalled, isNotPreparedWarningSupressed, } = options;
5332
5594
  const priority = preparedPipeline.tasks.length - preparedPipeline.tasks.indexOf(currentTask);
5333
- await onProgress({
5334
- outputParameters: {
5335
- [currentTask.resultingParameterName]: '', // <- TODO: [🧠] What is the best value here?
5336
- },
5337
- });
5338
5595
  // Note: Check consistency of used and dependent parameters which was also done in `validatePipeline`, but it’s good to doublecheck
5339
5596
  const usedParameterNames = extractParameterNamesFromTask(currentTask);
5340
5597
  const dependentParameterNames = new Set(currentTask.dependentParameterNames);
5341
5598
  // TODO: [👩🏾‍🤝‍👩🏻] Use here `mapAvailableToExpectedParameters`
5342
- if (union(difference(usedParameterNames, dependentParameterNames), difference(dependentParameterNames, usedParameterNames)).size !== 0) {
5599
+ if (difference(union(difference(usedParameterNames, dependentParameterNames), difference(dependentParameterNames, usedParameterNames)), new Set(RESERVED_PARAMETER_NAMES)).size !== 0) {
5343
5600
  throw new UnexpectedError(spaceTrim.spaceTrim((block) => `
5344
5601
  Dependent parameters are not consistent with used parameters:
5345
5602
 
@@ -5359,9 +5616,11 @@
5359
5616
  }
5360
5617
  const definedParameters = Object.freeze({
5361
5618
  ...(await getReservedParametersForTask({
5619
+ tools,
5362
5620
  preparedPipeline,
5363
5621
  task: currentTask,
5364
5622
  pipelineIdentification,
5623
+ parameters: parametersToPass,
5365
5624
  })),
5366
5625
  ...parametersToPass,
5367
5626
  });
@@ -5407,6 +5666,7 @@
5407
5666
  preparedPipeline,
5408
5667
  tools,
5409
5668
  $executionReport,
5669
+ onProgress,
5410
5670
  pipelineIdentification,
5411
5671
  maxExecutionAttempts,
5412
5672
  maxParallelCount,
@@ -5434,7 +5694,8 @@
5434
5694
  */
5435
5695
 
5436
5696
  /**
5437
- * @@@
5697
+ * Filters and returns only the output parameters from the provided pipeline execution options.
5698
+ * Adds warnings for any expected output parameters that are missing.
5438
5699
  *
5439
5700
  * @private internal utility of `createPipelineExecutor`
5440
5701
  */
@@ -5459,9 +5720,12 @@
5459
5720
  }
5460
5721
 
5461
5722
  /**
5462
- * @@@
5723
+ * Executes an entire pipeline, resolving tasks in dependency order, handling errors, and reporting progress.
5463
5724
  *
5464
- * Note: This is not a `PipelineExecutor` (which is binded with one exact pipeline), but a utility function of `createPipelineExecutor` which creates `PipelineExecutor`
5725
+ * Note: This is not a `PipelineExecutor` (which is bound to a single pipeline), but a utility function used by `createPipelineExecutor` to create a `PipelineExecutor`.
5726
+ *
5727
+ * @param options - Options for execution, including input parameters, pipeline, and callbacks.
5728
+ * @returns The result of the pipeline execution, including output parameters, errors, and usage statistics.
5465
5729
  *
5466
5730
  * @private internal utility of `createPipelineExecutor`
5467
5731
  */
@@ -5784,6 +6048,22 @@
5784
6048
  cacheDirname,
5785
6049
  intermediateFilesStrategy,
5786
6050
  isAutoInstalled,
6051
+ }).catch((error) => {
6052
+ assertsError(error);
6053
+ return exportJson({
6054
+ name: 'pipelineExecutorResult',
6055
+ message: `Unuccessful PipelineExecutorResult, last catch`,
6056
+ order: [],
6057
+ value: {
6058
+ isSuccessful: false,
6059
+ errors: [serializeError(error)],
6060
+ warnings: [],
6061
+ usage: UNCERTAIN_USAGE,
6062
+ executionReport: null,
6063
+ outputParameters: {},
6064
+ preparedPipeline,
6065
+ },
6066
+ });
5787
6067
  });
5788
6068
  };
5789
6069
  const pipelineExecutor = (inputParameters) => createTask({
@@ -6268,7 +6548,8 @@
6268
6548
  */
6269
6549
 
6270
6550
  /**
6271
- * @@@
6551
+ * Creates a scraper for legacy document formats (.doc, .rtf, etc).
6552
+ * Uses LibreOffice for conversion to extract content from older document formats.
6272
6553
  *
6273
6554
  * @public exported from `@promptbook/legacy-documents`
6274
6555
  */