@promptbook/pdf 0.92.0-9 → 0.93.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (206) hide show
  1. package/README.md +140 -88
  2. package/esm/index.es.js +589 -408
  3. package/esm/index.es.js.map +1 -1
  4. package/esm/typings/src/_packages/browser.index.d.ts +2 -0
  5. package/esm/typings/src/_packages/core.index.d.ts +26 -14
  6. package/esm/typings/src/_packages/types.index.d.ts +6 -2
  7. package/esm/typings/src/collection/PipelineCollection.d.ts +0 -2
  8. package/esm/typings/src/collection/SimplePipelineCollection.d.ts +1 -1
  9. package/esm/typings/src/commands/FOREACH/ForeachJson.d.ts +6 -6
  10. package/esm/typings/src/commands/FOREACH/foreachCommandParser.d.ts +0 -2
  11. package/esm/typings/src/commands/FORMFACTOR/formfactorCommandParser.d.ts +1 -1
  12. package/esm/typings/src/commands/_BOILERPLATE/boilerplateCommandParser.d.ts +1 -1
  13. package/esm/typings/src/commands/_common/types/CommandParser.d.ts +36 -28
  14. package/esm/typings/src/config.d.ts +41 -11
  15. package/esm/typings/src/constants.d.ts +43 -2
  16. package/esm/typings/src/conversion/parsePipeline.d.ts +2 -2
  17. package/esm/typings/src/errors/0-BoilerplateError.d.ts +2 -2
  18. package/esm/typings/src/errors/CollectionError.d.ts +1 -1
  19. package/esm/typings/src/executables/$provideExecutablesForNode.d.ts +1 -1
  20. package/esm/typings/src/executables/apps/locateLibreoffice.d.ts +2 -1
  21. package/esm/typings/src/executables/apps/locatePandoc.d.ts +2 -1
  22. package/esm/typings/src/executables/locateApp.d.ts +2 -2
  23. package/esm/typings/src/executables/platforms/locateAppOnLinux.d.ts +2 -1
  24. package/esm/typings/src/executables/platforms/locateAppOnMacOs.d.ts +2 -1
  25. package/esm/typings/src/executables/platforms/locateAppOnWindows.d.ts +2 -1
  26. package/esm/typings/src/execution/AbstractTaskResult.d.ts +1 -1
  27. package/esm/typings/src/execution/CommonToolsOptions.d.ts +3 -3
  28. package/esm/typings/src/execution/ExecutionTask.d.ts +19 -1
  29. package/esm/typings/src/execution/LlmExecutionToolsConstructor.d.ts +2 -1
  30. package/esm/typings/src/execution/PipelineExecutorResult.d.ts +4 -2
  31. package/esm/typings/src/execution/PromptbookFetch.d.ts +1 -1
  32. package/esm/typings/src/execution/ScriptExecutionTools.d.ts +1 -1
  33. package/esm/typings/src/execution/createPipelineExecutor/$OngoingTaskResult.d.ts +12 -9
  34. package/esm/typings/src/execution/createPipelineExecutor/10-executePipeline.d.ts +13 -10
  35. package/esm/typings/src/execution/createPipelineExecutor/20-executeTask.d.ts +12 -9
  36. package/esm/typings/src/execution/createPipelineExecutor/30-executeFormatSubvalues.d.ts +15 -3
  37. package/esm/typings/src/execution/createPipelineExecutor/40-executeAttempts.d.ts +21 -15
  38. package/esm/typings/src/execution/createPipelineExecutor/computeCosineSimilarity.d.ts +13 -0
  39. package/esm/typings/src/execution/createPipelineExecutor/filterJustOutputParameters.d.ts +7 -6
  40. package/esm/typings/src/execution/createPipelineExecutor/getContextForTask.d.ts +5 -1
  41. package/esm/typings/src/execution/createPipelineExecutor/getExamplesForTask.d.ts +1 -1
  42. package/esm/typings/src/execution/createPipelineExecutor/getKnowledgeForTask.d.ts +12 -9
  43. package/esm/typings/src/execution/createPipelineExecutor/getReservedParametersForTask.d.ts +18 -5
  44. package/esm/typings/src/execution/createPipelineExecutor/knowledgePiecesToString.d.ts +9 -0
  45. package/esm/typings/src/execution/execution-report/ExecutionReportJson.d.ts +1 -1
  46. package/esm/typings/src/execution/execution-report/ExecutionReportString.d.ts +1 -1
  47. package/esm/typings/src/execution/translation/automatic-translate/automatic-translators/LindatAutomaticTranslator.d.ts +4 -4
  48. package/esm/typings/src/execution/utils/checkExpectations.d.ts +3 -3
  49. package/esm/typings/src/execution/utils/uncertainNumber.d.ts +3 -2
  50. package/esm/typings/src/execution/utils/usageToWorktime.d.ts +1 -1
  51. package/esm/typings/src/formats/_common/{FormatDefinition.d.ts → FormatParser.d.ts} +8 -6
  52. package/esm/typings/src/formats/_common/FormatSubvalueParser.d.ts +66 -0
  53. package/esm/typings/src/formats/csv/CsvFormatParser.d.ts +17 -0
  54. package/esm/typings/src/formats/csv/CsvSettings.d.ts +2 -2
  55. package/esm/typings/src/formats/csv/utils/csvParse.d.ts +12 -0
  56. package/esm/typings/src/formats/csv/utils/isValidCsvString.d.ts +1 -1
  57. package/esm/typings/src/formats/index.d.ts +2 -2
  58. package/esm/typings/src/formats/json/JsonFormatParser.d.ts +19 -0
  59. package/esm/typings/src/formats/json/utils/isValidJsonString.d.ts +1 -1
  60. package/esm/typings/src/formats/json/utils/jsonParse.d.ts +0 -3
  61. package/esm/typings/src/formats/text/{TextFormatDefinition.d.ts → TextFormatParser.d.ts} +7 -7
  62. package/esm/typings/src/formats/xml/XmlFormatParser.d.ts +19 -0
  63. package/esm/typings/src/formats/xml/utils/isValidXmlString.d.ts +1 -1
  64. package/esm/typings/src/formfactors/_boilerplate/BoilerplateFormfactorDefinition.d.ts +3 -2
  65. package/esm/typings/src/formfactors/_common/AbstractFormfactorDefinition.d.ts +16 -7
  66. package/esm/typings/src/formfactors/_common/FormfactorDefinition.d.ts +3 -1
  67. package/esm/typings/src/formfactors/_common/string_formfactor_name.d.ts +2 -1
  68. package/esm/typings/src/formfactors/chatbot/ChatbotFormfactorDefinition.d.ts +2 -2
  69. package/esm/typings/src/formfactors/completion/CompletionFormfactorDefinition.d.ts +29 -0
  70. package/esm/typings/src/formfactors/generator/GeneratorFormfactorDefinition.d.ts +2 -1
  71. package/esm/typings/src/formfactors/generic/GenericFormfactorDefinition.d.ts +2 -2
  72. package/esm/typings/src/formfactors/index.d.ts +33 -8
  73. package/esm/typings/src/formfactors/matcher/MatcherFormfactorDefinition.d.ts +4 -2
  74. package/esm/typings/src/formfactors/sheets/SheetsFormfactorDefinition.d.ts +3 -2
  75. package/esm/typings/src/formfactors/translator/TranslatorFormfactorDefinition.d.ts +3 -2
  76. package/esm/typings/src/high-level-abstractions/index.d.ts +2 -2
  77. package/esm/typings/src/llm-providers/_common/register/$llmToolsMetadataRegister.d.ts +3 -3
  78. package/esm/typings/src/llm-providers/_common/register/$llmToolsRegister.d.ts +3 -3
  79. package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsConfigurationFromEnv.d.ts +4 -4
  80. package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsForTestingAndScriptsAndPlayground.d.ts +4 -3
  81. package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsFromEnv.d.ts +18 -5
  82. package/esm/typings/src/llm-providers/_common/register/LlmToolsConfiguration.d.ts +11 -4
  83. package/esm/typings/src/llm-providers/_common/register/LlmToolsMetadata.d.ts +21 -42
  84. package/esm/typings/src/llm-providers/_common/register/LlmToolsOptions.d.ts +9 -2
  85. package/esm/typings/src/llm-providers/_common/register/createLlmToolsFromConfiguration.d.ts +13 -4
  86. package/esm/typings/src/llm-providers/_common/utils/cache/CacheItem.d.ts +10 -5
  87. package/esm/typings/src/llm-providers/_common/utils/cache/CacheLlmToolsOptions.d.ts +11 -3
  88. package/esm/typings/src/llm-providers/_common/utils/cache/cacheLlmTools.d.ts +3 -3
  89. package/esm/typings/src/llm-providers/_common/utils/count-total-usage/limitTotalUsage.d.ts +5 -5
  90. package/esm/typings/src/llm-providers/anthropic-claude/AnthropicClaudeExecutionTools.d.ts +6 -0
  91. package/esm/typings/src/llm-providers/anthropic-claude/anthropic-claude-models.d.ts +1 -1
  92. package/esm/typings/src/llm-providers/azure-openai/AzureOpenAiExecutionTools.d.ts +1 -1
  93. package/esm/typings/src/llm-providers/azure-openai/AzureOpenAiExecutionToolsOptions.d.ts +4 -4
  94. package/esm/typings/src/llm-providers/deepseek/deepseek-models.d.ts +1 -1
  95. package/esm/typings/src/llm-providers/google/google-models.d.ts +1 -1
  96. package/esm/typings/src/llm-providers/openai/OpenAiAssistantExecutionTools.d.ts +1 -1
  97. package/esm/typings/src/llm-providers/openai/openai-models.d.ts +1 -1
  98. package/esm/typings/src/llm-providers/openai/register-configuration.d.ts +2 -2
  99. package/esm/typings/src/llm-providers/openai/register-constructor.d.ts +2 -2
  100. package/esm/typings/src/migrations/migratePipeline.d.ts +9 -0
  101. package/esm/typings/src/other/templates/getBookTemplates.d.ts +2 -2
  102. package/esm/typings/src/pipeline/PipelineInterface/PipelineInterface.d.ts +3 -3
  103. package/esm/typings/src/pipeline/PipelineInterface/constants.d.ts +1 -1
  104. package/esm/typings/src/pipeline/PipelineInterface/getPipelineInterface.d.ts +1 -1
  105. package/esm/typings/src/pipeline/PipelineInterface/isPipelineImplementingInterface.d.ts +5 -4
  106. package/esm/typings/src/pipeline/PipelineInterface/isPipelineInterfacesEqual.d.ts +1 -1
  107. package/esm/typings/src/pipeline/PipelineJson/CommonTaskJson.d.ts +10 -7
  108. package/esm/typings/src/pipeline/PipelineJson/ParameterJson.d.ts +1 -1
  109. package/esm/typings/src/pipeline/PipelineJson/PipelineJson.d.ts +3 -2
  110. package/esm/typings/src/pipeline/PipelineString.d.ts +3 -1
  111. package/esm/typings/src/pipeline/book-notation.d.ts +2 -2
  112. package/esm/typings/src/postprocessing/utils/extractBlock.d.ts +1 -1
  113. package/esm/typings/src/postprocessing/utils/extractJsonBlock.d.ts +2 -2
  114. package/esm/typings/src/prepare/prepareTasks.d.ts +8 -5
  115. package/esm/typings/src/remote-server/openapi.d.ts +1 -1
  116. package/esm/typings/src/remote-server/socket-types/listModels/PromptbookServer_ListModels_Response.d.ts +1 -1
  117. package/esm/typings/src/remote-server/startRemoteServer.d.ts +1 -1
  118. package/esm/typings/src/remote-server/types/RemoteServerOptions.d.ts +7 -6
  119. package/esm/typings/src/scrapers/_boilerplate/BoilerplateScraper.d.ts +3 -3
  120. package/esm/typings/src/scrapers/_boilerplate/createBoilerplateScraper.d.ts +1 -1
  121. package/esm/typings/src/scrapers/_boilerplate/register-metadata.d.ts +1 -1
  122. package/esm/typings/src/scrapers/_common/Converter.d.ts +3 -1
  123. package/esm/typings/src/scrapers/_common/Scraper.d.ts +4 -3
  124. package/esm/typings/src/scrapers/_common/ScraperIntermediateSource.d.ts +4 -2
  125. package/esm/typings/src/scrapers/_common/prepareKnowledgePieces.d.ts +2 -2
  126. package/esm/typings/src/scrapers/_common/register/$provideFilesystemForNode.d.ts +2 -1
  127. package/esm/typings/src/scrapers/_common/register/$provideScrapersForBrowser.d.ts +6 -3
  128. package/esm/typings/src/scrapers/_common/register/$provideScrapersForNode.d.ts +3 -5
  129. package/esm/typings/src/scrapers/_common/register/$scrapersMetadataRegister.d.ts +3 -3
  130. package/esm/typings/src/scrapers/_common/register/$scrapersRegister.d.ts +3 -2
  131. package/esm/typings/src/scrapers/_common/register/ScraperAndConverterMetadata.d.ts +8 -5
  132. package/esm/typings/src/scrapers/_common/register/ScraperConstructor.d.ts +2 -1
  133. package/esm/typings/src/scrapers/_common/utils/getScraperIntermediateSource.d.ts +6 -5
  134. package/esm/typings/src/scrapers/_common/utils/makeKnowledgeSourceHandler.d.ts +3 -1
  135. package/esm/typings/src/scrapers/document/createDocumentScraper.d.ts +1 -1
  136. package/esm/typings/src/scrapers/document-legacy/createLegacyDocumentScraper.d.ts +2 -1
  137. package/esm/typings/src/scrapers/markdown/createMarkdownScraper.d.ts +4 -1
  138. package/esm/typings/src/scrapers/markitdown/MarkitdownScraper.d.ts +1 -1
  139. package/esm/typings/src/scrapers/pdf/createPdfScraper.d.ts +2 -1
  140. package/esm/typings/src/scrapers/website/createWebsiteScraper.d.ts +3 -4
  141. package/esm/typings/src/scripting/javascript/JavascriptExecutionTools.d.ts +1 -1
  142. package/esm/typings/src/scripting/javascript/postprocessing-functions.d.ts +5 -1
  143. package/esm/typings/src/scripting/javascript/utils/preserve.d.ts +1 -1
  144. package/esm/typings/src/storage/_common/PromptbookStorage.d.ts +1 -1
  145. package/esm/typings/src/storage/file-cache-storage/FileCacheStorage.d.ts +12 -5
  146. package/esm/typings/src/storage/file-cache-storage/FileCacheStorageOptions.d.ts +4 -2
  147. package/esm/typings/src/storage/file-cache-storage/utils/nameToSubfolderPath.d.ts +2 -1
  148. package/esm/typings/src/storage/local-storage/getIndexedDbStorage.d.ts +11 -0
  149. package/esm/typings/src/storage/local-storage/utils/IndexedDbStorageOptions.d.ts +14 -0
  150. package/esm/typings/src/storage/local-storage/utils/makePromptbookStorageFromIndexedDb.d.ts +8 -0
  151. package/esm/typings/src/storage/local-storage/utils/makePromptbookStorageFromWebStorage.d.ts +2 -1
  152. package/esm/typings/src/types/IntermediateFilesStrategy.d.ts +2 -1
  153. package/esm/typings/src/types/ModelRequirements.d.ts +2 -2
  154. package/esm/typings/src/types/ModelVariant.d.ts +5 -5
  155. package/esm/typings/src/types/typeAliases.d.ts +22 -19
  156. package/esm/typings/src/utils/$Register.d.ts +8 -7
  157. package/esm/typings/src/utils/editable/edit-pipeline-string/addPipelineCommand.d.ts +2 -2
  158. package/esm/typings/src/utils/editable/edit-pipeline-string/deflatePipeline.d.ts +4 -1
  159. package/esm/typings/src/utils/editable/utils/isFlatPipeline.d.ts +2 -1
  160. package/esm/typings/src/utils/editable/utils/stringifyPipelineJson.d.ts +1 -1
  161. package/esm/typings/src/utils/environment/$getGlobalScope.d.ts +2 -1
  162. package/esm/typings/src/utils/expectation-counters/index.d.ts +1 -1
  163. package/esm/typings/src/utils/markdown/extractAllBlocksFromMarkdown.d.ts +2 -2
  164. package/esm/typings/src/utils/markdown/extractAllListItemsFromMarkdown.d.ts +1 -1
  165. package/esm/typings/src/utils/markdown/extractOneBlockFromMarkdown.d.ts +2 -2
  166. package/esm/typings/src/utils/normalization/nameToUriPart.d.ts +4 -4
  167. package/esm/typings/src/utils/normalization/nameToUriParts.d.ts +4 -4
  168. package/esm/typings/src/utils/normalization/normalize-to-kebab-case.d.ts +3 -3
  169. package/esm/typings/src/utils/normalization/normalizeTo_SCREAMING_CASE.d.ts +3 -3
  170. package/esm/typings/src/utils/normalization/normalizeTo_camelCase.d.ts +4 -4
  171. package/esm/typings/src/utils/normalization/normalizeTo_snake_case.d.ts +3 -3
  172. package/esm/typings/src/utils/normalization/removeDiacritics.d.ts +3 -3
  173. package/esm/typings/src/utils/normalization/searchKeywords.d.ts +4 -1
  174. package/esm/typings/src/utils/normalization/titleToName.d.ts +4 -4
  175. package/esm/typings/src/utils/organization/TODO_USE.d.ts +1 -1
  176. package/esm/typings/src/utils/organization/empty_object.d.ts +2 -2
  177. package/esm/typings/src/utils/organization/just.d.ts +1 -1
  178. package/esm/typings/src/utils/organization/just_empty_object.d.ts +4 -4
  179. package/esm/typings/src/utils/organization/keepUnused.d.ts +1 -1
  180. package/esm/typings/src/utils/parameters/mapAvailableToExpectedParameters.d.ts +7 -7
  181. package/esm/typings/src/utils/removeQuotes.d.ts +2 -2
  182. package/esm/typings/src/utils/serialization/clonePipeline.d.ts +4 -3
  183. package/esm/typings/src/utils/serialization/deepClone.d.ts +5 -1
  184. package/esm/typings/src/utils/trimCodeBlock.d.ts +1 -1
  185. package/esm/typings/src/utils/trimEndOfCodeBlock.d.ts +1 -1
  186. package/esm/typings/src/utils/unwrapResult.d.ts +2 -2
  187. package/esm/typings/src/utils/validators/javascriptName/isValidJavascriptName.d.ts +3 -3
  188. package/esm/typings/src/utils/validators/parameterName/validateParameterName.d.ts +5 -4
  189. package/esm/typings/src/utils/validators/semanticVersion/isValidPromptbookVersion.d.ts +1 -1
  190. package/esm/typings/src/utils/validators/semanticVersion/isValidSemanticVersion.d.ts +1 -1
  191. package/esm/typings/src/utils/validators/url/isHostnameOnPrivateNetwork.d.ts +1 -1
  192. package/esm/typings/src/utils/validators/url/isUrlOnPrivateNetwork.d.ts +1 -1
  193. package/esm/typings/src/utils/validators/url/isValidPipelineUrl.d.ts +1 -1
  194. package/esm/typings/src/utils/validators/url/isValidUrl.d.ts +1 -1
  195. package/esm/typings/src/version.d.ts +2 -1
  196. package/esm/typings/src/wizzard/wizzard.d.ts +1 -1
  197. package/package.json +15 -3
  198. package/umd/index.umd.js +589 -408
  199. package/umd/index.umd.js.map +1 -1
  200. package/esm/typings/src/formats/_common/FormatSubvalueDefinition.d.ts +0 -31
  201. package/esm/typings/src/formats/csv/CsvFormatDefinition.d.ts +0 -17
  202. package/esm/typings/src/formats/json/JsonFormatDefinition.d.ts +0 -19
  203. package/esm/typings/src/formats/xml/XmlFormatDefinition.d.ts +0 -19
  204. /package/esm/typings/src/llm-providers/{multiple → _multiple}/MultipleLlmExecutionTools.d.ts +0 -0
  205. /package/esm/typings/src/llm-providers/{multiple → _multiple}/joinLlmExecutionTools.d.ts +0 -0
  206. /package/esm/typings/src/llm-providers/{multiple → _multiple}/playground/playground.d.ts +0 -0
package/esm/index.es.js CHANGED
@@ -26,7 +26,7 @@ const BOOK_LANGUAGE_VERSION = '1.0.0';
26
26
  * @generated
27
27
  * @see https://github.com/webgptorg/promptbook
28
28
  */
29
- const PROMPTBOOK_ENGINE_VERSION = '0.92.0-9';
29
+ const PROMPTBOOK_ENGINE_VERSION = '0.93.0';
30
30
  /**
31
31
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
32
32
  * Note: [💞] Ignore a discrepancy between file name and entity name
@@ -36,7 +36,7 @@ const PROMPTBOOK_ENGINE_VERSION = '0.92.0-9';
36
36
  * Just says that the variable is not used but should be kept
37
37
  * No side effects.
38
38
  *
39
- * Note: It can be usefull for:
39
+ * Note: It can be useful for:
40
40
  *
41
41
  * 1) Suppressing eager optimization of unused imports
42
42
  * 2) Suppressing eslint errors of unused variables in the tests
@@ -53,7 +53,7 @@ function keepUnused(...valuesToKeep) {
53
53
  * Returns the same value that is passed as argument.
54
54
  * No side effects.
55
55
  *
56
- * Note: It can be usefull for:
56
+ * Note: It can be useful for:
57
57
  *
58
58
  * 1) Leveling indentation
59
59
  * 2) Putting always-true or always-false conditions without getting eslint errors
@@ -102,6 +102,21 @@ const DEFAULT_BOOK_TITLE = `✨ Untitled Book`;
102
102
  * @public exported from `@promptbook/core`
103
103
  */
104
104
  const DEFAULT_MAX_FILE_SIZE = 100 * 1024 * 1024; // 100MB
105
+ /**
106
+ * Threshold value that determines when a dataset is considered "big"
107
+ * and may require special handling or optimizations
108
+ *
109
+ * For example, when error occurs in one item of the big dataset, it will not fail the whole pipeline
110
+ *
111
+ * @public exported from `@promptbook/core`
112
+ */
113
+ const BIG_DATASET_TRESHOLD = 50;
114
+ /**
115
+ * Placeholder text used to represent a placeholder value of failed operation
116
+ *
117
+ * @public exported from `@promptbook/core`
118
+ */
119
+ const FAILED_VALUE_PLACEHOLDER = '!?';
105
120
  // <- TODO: [🧠] Better system for generator warnings - not always "code" and "by `@promptbook/cli`"
106
121
  /**
107
122
  * The maximum number of iterations for a loops
@@ -181,7 +196,7 @@ const DEFAULT_DOWNLOAD_CACHE_DIRNAME = './.promptbook/download-cache';
181
196
  const DEFAULT_SCRAPE_CACHE_DIRNAME = './.promptbook/scrape-cache';
182
197
  // <- TODO: [🧜‍♂️]
183
198
  /**
184
- * @@@
199
+ * Default settings for parsing and generating CSV files in Promptbook.
185
200
  *
186
201
  * @public exported from `@promptbook/core`
187
202
  */
@@ -192,19 +207,19 @@ const DEFAULT_CSV_SETTINGS = Object.freeze({
192
207
  skipEmptyLines: true,
193
208
  });
194
209
  /**
195
- * @@@
210
+ * Controls whether verbose logging is enabled by default throughout the application.
196
211
  *
197
212
  * @public exported from `@promptbook/core`
198
213
  */
199
214
  let DEFAULT_IS_VERBOSE = false;
200
215
  /**
201
- * @@@
216
+ * Controls whether auto-installation of dependencies is enabled by default.
202
217
  *
203
218
  * @public exported from `@promptbook/core`
204
219
  */
205
220
  const DEFAULT_IS_AUTO_INSTALLED = false;
206
221
  /**
207
- * @@@
222
+ * Indicates whether pipeline logic validation is enabled. When true, the pipeline logic is checked for consistency.
208
223
  *
209
224
  * @private within the repository
210
225
  */
@@ -298,7 +313,7 @@ class UnexpectedError extends Error {
298
313
  ${block(message)}
299
314
 
300
315
  Note: This error should not happen.
301
- It's probbably a bug in the pipeline collection
316
+ It's probably a bug in the pipeline collection
302
317
 
303
318
  Please report issue:
304
319
  ${block(getErrorReportUrl(new Error(message)).href)}
@@ -355,7 +370,8 @@ async function isFileExisting(filename, fs) {
355
370
  */
356
371
 
357
372
  /**
358
- * @@@
373
+ * Converts a name to a properly formatted subfolder path for cache storage.
374
+ * Handles normalization and path formatting to create consistent cache directory structures.
359
375
  *
360
376
  * @private for `FileCacheStorage`
361
377
  */
@@ -608,10 +624,10 @@ for (let i = 0; i < defaultDiacriticsRemovalMap.length; i++) {
608
624
  */
609
625
 
610
626
  /**
611
- * @@@
627
+ * Removes diacritic marks (accents) from characters in a string.
612
628
  *
613
- * @param input @@@
614
- * @returns @@@
629
+ * @param input The string containing diacritics to be normalized.
630
+ * @returns The string with diacritics removed or normalized.
615
631
  * @public exported from `@promptbook/utils`
616
632
  */
617
633
  function removeDiacritics(input) {
@@ -625,10 +641,10 @@ function removeDiacritics(input) {
625
641
  */
626
642
 
627
643
  /**
628
- * @@@
644
+ * Converts a given text to kebab-case format.
629
645
  *
630
- * @param text @@@
631
- * @returns @@@
646
+ * @param text The text to be converted.
647
+ * @returns The kebab-case formatted string.
632
648
  * @example 'hello-world'
633
649
  * @example 'i-love-promptbook'
634
650
  * @public exported from `@promptbook/utils`
@@ -744,7 +760,7 @@ function isValidFilePath(filename) {
744
760
  * Tests if given string is valid URL.
745
761
  *
746
762
  * Note: Dataurl are considered perfectly valid.
747
- * Note: There are two simmilar functions:
763
+ * Note: There are two similar functions:
748
764
  * - `isValidUrl` which tests any URL
749
765
  * - `isValidPipelineUrl` *(this one)* which tests just promptbook URL
750
766
  *
@@ -770,11 +786,11 @@ function isValidUrl(url) {
770
786
  }
771
787
 
772
788
  /**
773
- * @@@
789
+ * Converts a title string into a normalized name.
774
790
  *
775
- * @param value @@@
776
- * @returns @@@
777
- * @example @@@
791
+ * @param value The title string to be converted to a name.
792
+ * @returns A normalized name derived from the input title.
793
+ * @example 'Hello World!' -> 'hello-world'
778
794
  * @public exported from `@promptbook/utils`
779
795
  */
780
796
  function titleToName(value) {
@@ -797,7 +813,7 @@ function titleToName(value) {
797
813
  * Just marks a place of place where should be something implemented
798
814
  * No side effects.
799
815
  *
800
- * Note: It can be usefull suppressing eslint errors of unused variables
816
+ * Note: It can be useful suppressing eslint errors of unused variables
801
817
  *
802
818
  * @param value any values
803
819
  * @returns void
@@ -807,9 +823,8 @@ function TODO_USE(...value) {
807
823
  }
808
824
 
809
825
  /**
810
- * Create a filename for intermediate cache for scrapers
811
- *
812
- * Note: It also checks if directory exists and creates it if not
826
+ * Retrieves an intermediate source for a scraper based on the knowledge source.
827
+ * Manages the caching and retrieval of intermediate scraper results for optimized performance.
813
828
  *
814
829
  * @private as internal utility for scrapers
815
830
  */
@@ -945,7 +960,7 @@ function assertsError(whatWasThrown) {
945
960
  * Function isValidJsonString will tell you if the string is valid JSON or not
946
961
  *
947
962
  * @param value The string to check
948
- * @returns True if the string is a valid JSON string, false otherwise
963
+ * @returns `true` if the string is a valid JSON string, false otherwise
949
964
  *
950
965
  * @public exported from `@promptbook/utils`
951
966
  */
@@ -1057,7 +1072,7 @@ function pipelineJsonToString(pipelineJson) {
1057
1072
  if (bookVersion !== `undefined`) {
1058
1073
  commands.push(`BOOK VERSION ${bookVersion}`);
1059
1074
  }
1060
- // TODO: [main] !!5 This increases size of the bundle and is probbably not necessary
1075
+ // TODO: [main] !!5 This increases size of the bundle and is probably not necessary
1061
1076
  pipelineString = prettifyMarkdown(pipelineString);
1062
1077
  for (const parameter of parameters.filter(({ isInput }) => isInput)) {
1063
1078
  commands.push(`INPUT PARAMETER ${taskParameterJsonToString(parameter)}`);
@@ -1356,8 +1371,12 @@ function checkSerializableAsJson(options) {
1356
1371
  */
1357
1372
 
1358
1373
  /**
1359
- * @@@
1374
+ * Creates a deep clone of the given object
1360
1375
  *
1376
+ * Note: This method only works for objects that are fully serializable to JSON and do not contain functions, Dates, or special types.
1377
+ *
1378
+ * @param objectValue The object to clone.
1379
+ * @returns A deep, writable clone of the input object.
1361
1380
  * @public exported from `@promptbook/utils`
1362
1381
  */
1363
1382
  function deepClone(objectValue) {
@@ -1439,13 +1458,13 @@ const ORDER_OF_PIPELINE_JSON = [
1439
1458
  */
1440
1459
  const REPLACING_NONCE = 'ptbkauk42kV2dzao34faw7FudQUHYPtW';
1441
1460
  /**
1442
- * @@@
1461
+ * Placeholder value indicating a parameter is missing its value.
1443
1462
  *
1444
1463
  * @private within the repository
1445
1464
  */
1446
1465
  const RESERVED_PARAMETER_MISSING_VALUE = 'MISSING-' + REPLACING_NONCE;
1447
1466
  /**
1448
- * @@@
1467
+ * Placeholder value indicating a parameter is restricted and cannot be used directly.
1449
1468
  *
1450
1469
  * @private within the repository
1451
1470
  */
@@ -1490,7 +1509,7 @@ class PipelineLogicError extends Error {
1490
1509
  /**
1491
1510
  * Tests if given string is valid semantic version
1492
1511
  *
1493
- * Note: There are two simmilar functions:
1512
+ * Note: There are two similar functions:
1494
1513
  * - `isValidSemanticVersion` which tests any semantic version
1495
1514
  * - `isValidPromptbookVersion` *(this one)* which tests just Promptbook versions
1496
1515
  *
@@ -1512,7 +1531,7 @@ function isValidSemanticVersion(version) {
1512
1531
  *
1513
1532
  * @see https://www.npmjs.com/package/promptbook?activeTab=versions
1514
1533
  * Note: When you are using for example promptbook 2.0.0 and there already is promptbook 3.0.0 it don`t know about it.
1515
- * Note: There are two simmilar functions:
1534
+ * Note: There are two similar functions:
1516
1535
  * - `isValidSemanticVersion` which tests any semantic version
1517
1536
  * - `isValidPromptbookVersion` *(this one)* which tests just Promptbook versions
1518
1537
  *
@@ -1532,7 +1551,7 @@ function isValidPromptbookVersion(version) {
1532
1551
  /**
1533
1552
  * Tests if given string is valid pipeline URL URL.
1534
1553
  *
1535
- * Note: There are two simmilar functions:
1554
+ * Note: There are two similar functions:
1536
1555
  * - `isValidUrl` which tests any URL
1537
1556
  * - `isValidPipelineUrl` *(this one)* which tests just pipeline URL
1538
1557
  *
@@ -1629,7 +1648,7 @@ function validatePipeline_InnerFunction(pipeline) {
1629
1648
  ${block(pipelineIdentification)}
1630
1649
  `));
1631
1650
  }
1632
- // TODO: [🧠] Maybe do here some propper JSON-schema / ZOD checking
1651
+ // TODO: [🧠] Maybe do here some proper JSON-schema / ZOD checking
1633
1652
  if (!Array.isArray(pipeline.parameters)) {
1634
1653
  // TODO: [🧠] what is the correct error tp throw - maybe PromptbookSchemaError
1635
1654
  throw new ParseError(spaceTrim$1((block) => `
@@ -1640,7 +1659,7 @@ function validatePipeline_InnerFunction(pipeline) {
1640
1659
  ${block(pipelineIdentification)}
1641
1660
  `));
1642
1661
  }
1643
- // TODO: [🧠] Maybe do here some propper JSON-schema / ZOD checking
1662
+ // TODO: [🧠] Maybe do here some proper JSON-schema / ZOD checking
1644
1663
  if (!Array.isArray(pipeline.tasks)) {
1645
1664
  // TODO: [🧠] what is the correct error tp throw - maybe PromptbookSchemaError
1646
1665
  throw new ParseError(spaceTrim$1((block) => `
@@ -1944,7 +1963,7 @@ class SimplePipelineCollection {
1944
1963
  /**
1945
1964
  * Constructs a pipeline collection from pipelines
1946
1965
  *
1947
- * @param pipelines @@@
1966
+ * @param pipelines Array of pipeline JSON objects to include in the collection
1948
1967
  *
1949
1968
  * Note: During the construction logic of all pipelines are validated
1950
1969
  * Note: It is not recommended to use this constructor directly, use `createCollectionFromJson` *(or other variant)* instead
@@ -2058,7 +2077,7 @@ class MissingToolsError extends Error {
2058
2077
  super(spaceTrim$1((block) => `
2059
2078
  ${block(message)}
2060
2079
 
2061
- Note: You have probbably forgot to provide some tools for pipeline execution or preparation
2080
+ Note: You have probably forgot to provide some tools for pipeline execution or preparation
2062
2081
 
2063
2082
  `));
2064
2083
  this.name = 'MissingToolsError';
@@ -2092,7 +2111,7 @@ class PipelineExecutionError extends Error {
2092
2111
  super(message);
2093
2112
  this.name = 'PipelineExecutionError';
2094
2113
  // TODO: [🐙] DRY - Maybe $randomId
2095
- this.id = `error-${$randomToken(8 /* <- TODO: To global config + Use Base58 to avoid simmilar char conflicts */)}`;
2114
+ this.id = `error-${$randomToken(8 /* <- TODO: To global config + Use Base58 to avoid similar char conflicts */)}`;
2096
2115
  Object.setPrototypeOf(this, PipelineExecutionError.prototype);
2097
2116
  }
2098
2117
  }
@@ -2108,15 +2127,18 @@ class PipelineExecutionError extends Error {
2108
2127
  * @public exported from `@promptbook/core`
2109
2128
  */
2110
2129
  function isPipelinePrepared(pipeline) {
2111
- // Note: Ignoring `pipeline.preparations` @@@
2112
- // Note: Ignoring `pipeline.knowledgePieces` @@@
2130
+ // Note: Ignoring `pipeline.preparations`
2131
+ // Note: Ignoring `pipeline.knowledgePieces`
2113
2132
  if (pipeline.title === undefined || pipeline.title === '' || pipeline.title === DEFAULT_BOOK_TITLE) {
2133
+ // console.log('Pipeline is not prepared because title is undefined or empty', pipeline);
2114
2134
  return false;
2115
2135
  }
2116
2136
  if (!pipeline.personas.every((persona) => persona.modelsRequirements !== undefined)) {
2137
+ // console.log('Pipeline is not prepared because personas are not prepared', pipeline.personas);
2117
2138
  return false;
2118
2139
  }
2119
2140
  if (!pipeline.knowledgeSources.every((knowledgeSource) => knowledgeSource.preparationIds !== undefined)) {
2141
+ //console.log('Pipeline is not prepared because knowledge sources are not prepared', pipeline.knowledgeSources);
2120
2142
  return false;
2121
2143
  }
2122
2144
  /*
@@ -2137,75 +2159,6 @@ function isPipelinePrepared(pipeline) {
2137
2159
  * - [♨] Are tasks prepared
2138
2160
  */
2139
2161
 
2140
- /**
2141
- * Converts a JavaScript Object Notation (JSON) string into an object.
2142
- *
2143
- * Note: This is wrapper around `JSON.parse()` with better error and type handling
2144
- *
2145
- * @public exported from `@promptbook/utils`
2146
- */
2147
- function jsonParse(value) {
2148
- if (value === undefined) {
2149
- throw new Error(`Can not parse JSON from undefined value.`);
2150
- }
2151
- else if (typeof value !== 'string') {
2152
- console.error('Can not parse JSON from non-string value.', { text: value });
2153
- throw new Error(spaceTrim(`
2154
- Can not parse JSON from non-string value.
2155
-
2156
- The value type: ${typeof value}
2157
- See more in console.
2158
- `));
2159
- }
2160
- try {
2161
- return JSON.parse(value);
2162
- }
2163
- catch (error) {
2164
- if (!(error instanceof Error)) {
2165
- throw error;
2166
- }
2167
- throw new Error(spaceTrim((block) => `
2168
- ${block(error.message)}
2169
-
2170
- The JSON text:
2171
- ${block(value)}
2172
- `));
2173
- }
2174
- }
2175
- /**
2176
- * TODO: !!!! Use in Promptbook.studio
2177
- */
2178
-
2179
- /**
2180
- * Recursively converts JSON strings to JSON objects
2181
-
2182
- * @public exported from `@promptbook/utils`
2183
- */
2184
- function jsonStringsToJsons(object) {
2185
- if (object === null) {
2186
- return object;
2187
- }
2188
- if (Array.isArray(object)) {
2189
- return object.map(jsonStringsToJsons);
2190
- }
2191
- if (typeof object !== 'object') {
2192
- return object;
2193
- }
2194
- const newObject = { ...object };
2195
- for (const [key, value] of Object.entries(object)) {
2196
- if (typeof value === 'string' && isValidJsonString(value)) {
2197
- newObject[key] = jsonParse(value);
2198
- }
2199
- else {
2200
- newObject[key] = jsonStringsToJsons(value);
2201
- }
2202
- }
2203
- return newObject;
2204
- }
2205
- /**
2206
- * TODO: Type the return type correctly
2207
- */
2208
-
2209
2162
  /**
2210
2163
  * This error indicates problems parsing the format value
2211
2164
  *
@@ -2250,7 +2203,7 @@ class AuthenticationError extends Error {
2250
2203
  }
2251
2204
 
2252
2205
  /**
2253
- * This error indicates that the pipeline collection cannot be propperly loaded
2206
+ * This error indicates that the pipeline collection cannot be properly loaded
2254
2207
  *
2255
2208
  * @public exported from `@promptbook/core`
2256
2209
  */
@@ -2389,6 +2342,101 @@ const ALL_ERRORS = {
2389
2342
  * Note: [💞] Ignore a discrepancy between file name and entity name
2390
2343
  */
2391
2344
 
2345
+ /**
2346
+ * Serializes an error into a [🚉] JSON-serializable object
2347
+ *
2348
+ * @public exported from `@promptbook/utils`
2349
+ */
2350
+ function serializeError(error) {
2351
+ const { name, message, stack } = error;
2352
+ const { id } = error;
2353
+ if (!Object.keys(ALL_ERRORS).includes(name)) {
2354
+ console.error(spaceTrim((block) => `
2355
+
2356
+ Cannot serialize error with name "${name}"
2357
+
2358
+ Authors of Promptbook probably forgot to add this error into the list of errors:
2359
+ https://github.com/webgptorg/promptbook/blob/main/src/errors/0-index.ts
2360
+
2361
+
2362
+ ${block(stack || message)}
2363
+
2364
+ `));
2365
+ }
2366
+ return {
2367
+ name: name,
2368
+ message,
2369
+ stack,
2370
+ id, // Include id in the serialized object
2371
+ };
2372
+ }
2373
+
2374
+ /**
2375
+ * Converts a JavaScript Object Notation (JSON) string into an object.
2376
+ *
2377
+ * Note: This is wrapper around `JSON.parse()` with better error and type handling
2378
+ *
2379
+ * @public exported from `@promptbook/utils`
2380
+ */
2381
+ function jsonParse(value) {
2382
+ if (value === undefined) {
2383
+ throw new Error(`Can not parse JSON from undefined value.`);
2384
+ }
2385
+ else if (typeof value !== 'string') {
2386
+ console.error('Can not parse JSON from non-string value.', { text: value });
2387
+ throw new Error(spaceTrim(`
2388
+ Can not parse JSON from non-string value.
2389
+
2390
+ The value type: ${typeof value}
2391
+ See more in console.
2392
+ `));
2393
+ }
2394
+ try {
2395
+ return JSON.parse(value);
2396
+ }
2397
+ catch (error) {
2398
+ if (!(error instanceof Error)) {
2399
+ throw error;
2400
+ }
2401
+ throw new Error(spaceTrim((block) => `
2402
+ ${block(error.message)}
2403
+
2404
+ The JSON text:
2405
+ ${block(value)}
2406
+ `));
2407
+ }
2408
+ }
2409
+
2410
+ /**
2411
+ * Recursively converts JSON strings to JSON objects
2412
+
2413
+ * @public exported from `@promptbook/utils`
2414
+ */
2415
+ function jsonStringsToJsons(object) {
2416
+ if (object === null) {
2417
+ return object;
2418
+ }
2419
+ if (Array.isArray(object)) {
2420
+ return object.map(jsonStringsToJsons);
2421
+ }
2422
+ if (typeof object !== 'object') {
2423
+ return object;
2424
+ }
2425
+ const newObject = { ...object };
2426
+ for (const [key, value] of Object.entries(object)) {
2427
+ if (typeof value === 'string' && isValidJsonString(value)) {
2428
+ newObject[key] = jsonParse(value);
2429
+ }
2430
+ else {
2431
+ newObject[key] = jsonStringsToJsons(value);
2432
+ }
2433
+ }
2434
+ return newObject;
2435
+ }
2436
+ /**
2437
+ * TODO: Type the return type correctly
2438
+ */
2439
+
2392
2440
  /**
2393
2441
  * Deserializes the error object
2394
2442
  *
@@ -2463,8 +2511,9 @@ function assertsTaskSuccessful(executionResult) {
2463
2511
  */
2464
2512
  function createTask(options) {
2465
2513
  const { taskType, taskProcessCallback } = options;
2514
+ let { title } = options;
2466
2515
  // TODO: [🐙] DRY
2467
- const taskId = `${taskType.toLowerCase().substring(0, 4)}-${$randomToken(8 /* <- TODO: To global config + Use Base58 to avoid simmilar char conflicts */)}`;
2516
+ const taskId = `${taskType.toLowerCase().substring(0, 4)}-${$randomToken(8 /* <- TODO: To global config + Use Base58 to avoid similar char conflicts */)}`;
2468
2517
  let status = 'RUNNING';
2469
2518
  const createdAt = new Date();
2470
2519
  let updatedAt = createdAt;
@@ -2474,6 +2523,10 @@ function createTask(options) {
2474
2523
  const partialResultSubject = new Subject();
2475
2524
  // <- Note: Not using `BehaviorSubject` because on error we can't access the last value
2476
2525
  const finalResultPromise = /* not await */ taskProcessCallback((newOngoingResult) => {
2526
+ if (newOngoingResult.title) {
2527
+ title = newOngoingResult.title;
2528
+ }
2529
+ updatedAt = new Date();
2477
2530
  Object.assign(currentValue, newOngoingResult);
2478
2531
  // <- TODO: assign deep
2479
2532
  partialResultSubject.next(newOngoingResult);
@@ -2519,17 +2572,24 @@ function createTask(options) {
2519
2572
  return {
2520
2573
  taskType,
2521
2574
  taskId,
2575
+ get promptbookVersion() {
2576
+ return PROMPTBOOK_ENGINE_VERSION;
2577
+ },
2578
+ get title() {
2579
+ return title;
2580
+ // <- Note: [1] These must be getters to allow changing the value in the future
2581
+ },
2522
2582
  get status() {
2523
2583
  return status;
2524
- // <- Note: [1] Theese must be getters to allow changing the value in the future
2584
+ // <- Note: [1] --||--
2525
2585
  },
2526
2586
  get createdAt() {
2527
2587
  return createdAt;
2528
- // <- Note: [1]
2588
+ // <- Note: [1] --||--
2529
2589
  },
2530
2590
  get updatedAt() {
2531
2591
  return updatedAt;
2532
- // <- Note: [1]
2592
+ // <- Note: [1] --||--
2533
2593
  },
2534
2594
  asPromise,
2535
2595
  asObservable() {
@@ -2537,15 +2597,15 @@ function createTask(options) {
2537
2597
  },
2538
2598
  get errors() {
2539
2599
  return errors;
2540
- // <- Note: [1]
2600
+ // <- Note: [1] --||--
2541
2601
  },
2542
2602
  get warnings() {
2543
2603
  return warnings;
2544
- // <- Note: [1]
2604
+ // <- Note: [1] --||--
2545
2605
  },
2546
2606
  get currentValue() {
2547
2607
  return currentValue;
2548
- // <- Note: [1]
2608
+ // <- Note: [1] --||--
2549
2609
  },
2550
2610
  };
2551
2611
  }
@@ -2554,64 +2614,6 @@ function createTask(options) {
2554
2614
  * TODO: [🐚] Split into more files and make `PrepareTask` & `RemoteTask` + split the function
2555
2615
  */
2556
2616
 
2557
- /**
2558
- * Serializes an error into a [🚉] JSON-serializable object
2559
- *
2560
- * @public exported from `@promptbook/utils`
2561
- */
2562
- function serializeError(error) {
2563
- const { name, message, stack } = error;
2564
- const { id } = error;
2565
- if (!Object.keys(ALL_ERRORS).includes(name)) {
2566
- console.error(spaceTrim((block) => `
2567
-
2568
- Cannot serialize error with name "${name}"
2569
-
2570
- Authors of Promptbook probably forgot to add this error into the list of errors:
2571
- https://github.com/webgptorg/promptbook/blob/main/src/errors/0-index.ts
2572
-
2573
-
2574
- ${block(stack || message)}
2575
-
2576
- `));
2577
- }
2578
- return {
2579
- name: name,
2580
- message,
2581
- stack,
2582
- id, // Include id in the serialized object
2583
- };
2584
- }
2585
-
2586
- /**
2587
- * Async version of Array.forEach
2588
- *
2589
- * @param array - Array to iterate over
2590
- * @param options - Options for the function
2591
- * @param callbackfunction - Function to call for each item
2592
- * @public exported from `@promptbook/utils`
2593
- * @deprecated [🪂] Use queues instead
2594
- */
2595
- async function forEachAsync(array, options, callbackfunction) {
2596
- const { maxParallelCount = Infinity } = options;
2597
- let index = 0;
2598
- let runningTasks = [];
2599
- const tasks = [];
2600
- for (const item of array) {
2601
- const currentIndex = index++;
2602
- const task = callbackfunction(item, currentIndex, array);
2603
- tasks.push(task);
2604
- runningTasks.push(task);
2605
- /* not await */ Promise.resolve(task).then(() => {
2606
- runningTasks = runningTasks.filter((t) => t !== task);
2607
- });
2608
- if (maxParallelCount < runningTasks.length) {
2609
- await Promise.race(runningTasks);
2610
- }
2611
- }
2612
- await Promise.all(tasks);
2613
- }
2614
-
2615
2617
  /**
2616
2618
  * Represents the uncertain value
2617
2619
  *
@@ -2655,7 +2657,7 @@ const ZERO_USAGE = $deepFreeze({
2655
2657
  *
2656
2658
  * @public exported from `@promptbook/core`
2657
2659
  */
2658
- $deepFreeze({
2660
+ const UNCERTAIN_USAGE = $deepFreeze({
2659
2661
  price: UNCERTAIN_ZERO_VALUE,
2660
2662
  input: {
2661
2663
  tokensCount: UNCERTAIN_ZERO_VALUE,
@@ -2680,6 +2682,35 @@ $deepFreeze({
2680
2682
  * Note: [💞] Ignore a discrepancy between file name and entity name
2681
2683
  */
2682
2684
 
2685
+ /**
2686
+ * Async version of Array.forEach
2687
+ *
2688
+ * @param array - Array to iterate over
2689
+ * @param options - Options for the function
2690
+ * @param callbackfunction - Function to call for each item
2691
+ * @public exported from `@promptbook/utils`
2692
+ * @deprecated [🪂] Use queues instead
2693
+ */
2694
+ async function forEachAsync(array, options, callbackfunction) {
2695
+ const { maxParallelCount = Infinity } = options;
2696
+ let index = 0;
2697
+ let runningTasks = [];
2698
+ const tasks = [];
2699
+ for (const item of array) {
2700
+ const currentIndex = index++;
2701
+ const task = callbackfunction(item, currentIndex, array);
2702
+ tasks.push(task);
2703
+ runningTasks.push(task);
2704
+ /* not await */ Promise.resolve(task).then(() => {
2705
+ runningTasks = runningTasks.filter((t) => t !== task);
2706
+ });
2707
+ if (maxParallelCount < runningTasks.length) {
2708
+ await Promise.race(runningTasks);
2709
+ }
2710
+ }
2711
+ await Promise.all(tasks);
2712
+ }
2713
+
2683
2714
  /**
2684
2715
  * Function `addUsage` will add multiple usages into one
2685
2716
  *
@@ -2739,12 +2770,14 @@ function countUsage(llmTools) {
2739
2770
  const spending = new Subject();
2740
2771
  const proxyTools = {
2741
2772
  get title() {
2742
- // TODO: [🧠] Maybe put here some suffix
2743
- return llmTools.title;
2773
+ return `${llmTools.title} (+usage)`;
2774
+ // <- TODO: [🧈] Maybe standartize the suffix when wrapping `LlmExecutionTools` up
2775
+ // <- TODO: [🧈][🧠] Does it make sence to suffix "(+usage)"?
2744
2776
  },
2745
2777
  get description() {
2746
- // TODO: [🧠] Maybe put here some suffix
2747
- return llmTools.description;
2778
+ return `${llmTools.description} (+usage)`;
2779
+ // <- TODO: [🧈] Maybe standartize the suffix when wrapping `LlmExecutionTools` up
2780
+ // <- TODO: [🧈][🧠] Does it make sence to suffix "(+usage)"?
2748
2781
  },
2749
2782
  checkConfiguration() {
2750
2783
  return /* not await */ llmTools.checkConfiguration();
@@ -2815,29 +2848,40 @@ class MultipleLlmExecutionTools {
2815
2848
  return 'Multiple LLM Providers';
2816
2849
  }
2817
2850
  get description() {
2818
- return this.llmExecutionTools.map(({ title }, index) => `${index + 1}) \`${title}\``).join('\n');
2851
+ const innerModelsTitlesAndDescriptions = this.llmExecutionTools
2852
+ .map(({ title, description }, index) => {
2853
+ const headLine = `${index + 1}) \`${title}\``;
2854
+ if (description === undefined) {
2855
+ return headLine;
2856
+ }
2857
+ return spaceTrim((block) => `
2858
+ ${headLine}
2859
+
2860
+ ${ /* <- Note: Indenting the description: */block(description)}
2861
+ `);
2862
+ })
2863
+ .join('\n\n');
2864
+ return spaceTrim((block) => `
2865
+ Multiple LLM Providers:
2866
+
2867
+ ${block(innerModelsTitlesAndDescriptions)}
2868
+ `);
2819
2869
  }
2820
2870
  /**
2821
2871
  * Check the configuration of all execution tools
2822
2872
  */
2823
2873
  async checkConfiguration() {
2824
- // TODO: Maybe do it in parallel
2825
- for (const llmExecutionTools of this.llmExecutionTools) {
2826
- await llmExecutionTools.checkConfiguration();
2827
- }
2874
+ // Note: Run checks in parallel
2875
+ await Promise.all(this.llmExecutionTools.map((tools) => tools.checkConfiguration()));
2828
2876
  }
2829
2877
  /**
2830
2878
  * List all available models that can be used
2831
2879
  * This lists is a combination of all available models from all execution tools
2832
2880
  */
2833
2881
  async listModels() {
2834
- const availableModels = [];
2835
- for (const llmExecutionTools of this.llmExecutionTools) {
2836
- // TODO: [🪂] Obtain models in parallel
2837
- const models = await llmExecutionTools.listModels();
2838
- availableModels.push(...models);
2839
- }
2840
- return availableModels;
2882
+ // Obtain all models in parallel and flatten
2883
+ const modelArrays = await Promise.all(this.llmExecutionTools.map((tools) => tools.listModels()));
2884
+ return modelArrays.flat();
2841
2885
  }
2842
2886
  /**
2843
2887
  * Calls the best available chat model
@@ -3078,7 +3122,8 @@ async function preparePersona(personaDescription, tools, options) {
3078
3122
  */
3079
3123
 
3080
3124
  /**
3081
- * @@@
3125
+ * Safely retrieves the global scope object (window in browser, global in Node.js)
3126
+ * regardless of the JavaScript environment in which the code is running
3082
3127
  *
3083
3128
  * Note: `$` is used to indicate that this function is not a pure function - it access global scope
3084
3129
  *
@@ -3089,10 +3134,10 @@ function $getGlobalScope() {
3089
3134
  }
3090
3135
 
3091
3136
  /**
3092
- * @@@
3137
+ * Normalizes a text string to SCREAMING_CASE (all uppercase with underscores).
3093
3138
  *
3094
- * @param text @@@
3095
- * @returns @@@
3139
+ * @param text The text string to be converted to SCREAMING_CASE format.
3140
+ * @returns The normalized text in SCREAMING_CASE format.
3096
3141
  * @example 'HELLO_WORLD'
3097
3142
  * @example 'I_LOVE_PROMPTBOOK'
3098
3143
  * @public exported from `@promptbook/utils`
@@ -3144,10 +3189,10 @@ function normalizeTo_SCREAMING_CASE(text) {
3144
3189
  */
3145
3190
 
3146
3191
  /**
3147
- * @@@
3192
+ * Normalizes a text string to snake_case format.
3148
3193
  *
3149
- * @param text @@@
3150
- * @returns @@@
3194
+ * @param text The text string to be converted to snake_case format.
3195
+ * @returns The normalized text in snake_case format.
3151
3196
  * @example 'hello_world'
3152
3197
  * @example 'i_love_promptbook'
3153
3198
  * @public exported from `@promptbook/utils`
@@ -3157,11 +3202,11 @@ function normalizeTo_snake_case(text) {
3157
3202
  }
3158
3203
 
3159
3204
  /**
3160
- * Register is @@@
3205
+ * Global registry for storing and managing registered entities of a given type.
3161
3206
  *
3162
3207
  * Note: `$` is used to indicate that this function is not a pure function - it accesses and adds variables in global scope.
3163
3208
  *
3164
- * @private internal utility, exported are only signleton instances of this class
3209
+ * @private internal utility, exported are only singleton instances of this class
3165
3210
  */
3166
3211
  class $Register {
3167
3212
  constructor(registerName) {
@@ -3205,10 +3250,10 @@ class $Register {
3205
3250
  }
3206
3251
 
3207
3252
  /**
3208
- * @@@
3253
+ * Global registry for storing metadata about all available scrapers and converters.
3209
3254
  *
3210
- * Note: `$` is used to indicate that this interacts with the global scope
3211
- * @singleton Only one instance of each register is created per build, but thare can be more @@@
3255
+ * Note: `$` is used to indicate that this interacts with the global scope.
3256
+ * @singleton Only one instance of each register is created per build, but there can be more in different contexts (e.g., tests).
3212
3257
  * @public exported from `@promptbook/core`
3213
3258
  */
3214
3259
  const $scrapersMetadataRegister = new $Register('scrapers_metadata');
@@ -3217,10 +3262,11 @@ const $scrapersMetadataRegister = new $Register('scrapers_metadata');
3217
3262
  */
3218
3263
 
3219
3264
  /**
3220
- * @@@
3265
+ * Registry for all available scrapers in the system.
3266
+ * Central point for registering and accessing different types of content scrapers.
3221
3267
  *
3222
3268
  * Note: `$` is used to indicate that this interacts with the global scope
3223
- * @singleton Only one instance of each register is created per build, but thare can be more @@@
3269
+ * @singleton Only one instance of each register is created per build, but there can be more than one in different build modules
3224
3270
  * @public exported from `@promptbook/core`
3225
3271
  */
3226
3272
  const $scrapersRegister = new $Register('scraper_constructors');
@@ -3398,7 +3444,9 @@ const promptbookFetch = async (urlOrRequest, init) => {
3398
3444
  */
3399
3445
 
3400
3446
  /**
3401
- * @@@
3447
+ * Factory function that creates a handler for processing knowledge sources.
3448
+ * Provides standardized processing of different types of knowledge sources
3449
+ * across various scraper implementations.
3402
3450
  *
3403
3451
  * @public exported from `@promptbook/core`
3404
3452
  */
@@ -3537,7 +3585,7 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3537
3585
  }
3538
3586
 
3539
3587
  /**
3540
- * Prepares the knowle
3588
+ * Prepares the knowledge pieces
3541
3589
  *
3542
3590
  * @see https://github.com/webgptorg/promptbook/discussions/41
3543
3591
  * @public exported from `@promptbook/core`
@@ -3633,15 +3681,18 @@ TODO: [🧊] This is how it can look in future
3633
3681
  * TODO: [🧊] In future one preparation can take data from previous preparation and save tokens and time
3634
3682
  * Put `knowledgePieces` into `PrepareKnowledgeOptions`
3635
3683
  * TODO: [🪂] More than max things can run in parallel by acident [1,[2a,2b,_],[3a,3b,_]]
3636
- * TODO: [🧠][❎] Do here propper M:N mapping
3684
+ * TODO: [🧠][❎] Do here proper M:N mapping
3637
3685
  * [x] One source can make multiple pieces
3638
3686
  * [ ] One piece can have multiple sources
3639
3687
  */
3640
3688
 
3641
3689
  /**
3642
- * @@@
3690
+ * Prepares tasks by adding knowledge to the prompt and ensuring all necessary parameters are included.
3643
3691
  *
3644
- * @public exported from `@promptbook/core`
3692
+ * @param tasks Sequence of tasks that are chained together to form a pipeline
3693
+ * @returns A promise that resolves to the prepared tasks.
3694
+ *
3695
+ * @private internal utility of `preparePipeline`
3645
3696
  */
3646
3697
  async function prepareTasks(pipeline, tools, options) {
3647
3698
  const { maxParallelCount = DEFAULT_MAX_PARALLEL_COUNT } = options;
@@ -3679,7 +3730,7 @@ async function prepareTasks(pipeline, tools, options) {
3679
3730
  return { tasksPrepared };
3680
3731
  }
3681
3732
  /**
3682
- * TODO: [😂] Adding knowledge should be convert to async high-level abstractions, simmilar thing with expectations to sync high-level abstractions
3733
+ * TODO: [😂] Adding knowledge should be convert to async high-level abstractions, similar thing with expectations to sync high-level abstractions
3683
3734
  * TODO: [🧠] Add context to each task (if missing)
3684
3735
  * TODO: [🧠] What is better name `prepareTask` or `prepareTaskAndParameters`
3685
3736
  * TODO: [♨][main] !!3 Prepare index the examples and maybe tasks
@@ -3815,7 +3866,7 @@ async function preparePipeline(pipeline, tools, options) {
3815
3866
  order: ORDER_OF_PIPELINE_JSON,
3816
3867
  value: {
3817
3868
  ...pipeline,
3818
- // <- TODO: Probbably deeply clone the pipeline because `$exportJson` freezes the subobjects
3869
+ // <- TODO: Probably deeply clone the pipeline because `$exportJson` freezes the subobjects
3819
3870
  title,
3820
3871
  knowledgeSources: knowledgeSourcesPrepared,
3821
3872
  knowledgePieces: knowledgePiecesPrepared,
@@ -4078,7 +4129,7 @@ function union(...sets) {
4078
4129
  }
4079
4130
 
4080
4131
  /**
4081
- * @@@
4132
+ * Contains configuration options for parsing and generating CSV files, such as delimiters and quoting rules.
4082
4133
  *
4083
4134
  * @public exported from `@promptbook/core`
4084
4135
  */
@@ -4087,11 +4138,29 @@ const MANDATORY_CSV_SETTINGS = Object.freeze({
4087
4138
  // encoding: 'utf-8',
4088
4139
  });
4089
4140
 
4141
+ /**
4142
+ * Converts a CSV string into an object
4143
+ *
4144
+ * Note: This is wrapper around `papaparse.parse()` with better autohealing
4145
+ *
4146
+ * @private - for now until `@promptbook/csv` is released
4147
+ */
4148
+ function csvParse(value /* <- TODO: string_csv */, settings, schema /* <- TODO: Make CSV Schemas */) {
4149
+ settings = { ...settings, ...MANDATORY_CSV_SETTINGS };
4150
+ // Note: Autoheal invalid '\n' characters
4151
+ if (settings.newline && !settings.newline.includes('\r') && value.includes('\r')) {
4152
+ console.warn('CSV string contains carriage return characters, but in the CSV settings the `newline` setting does not include them. Autohealing the CSV string.');
4153
+ value = value.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
4154
+ }
4155
+ const csv = parse(value, settings);
4156
+ return csv;
4157
+ }
4158
+
4090
4159
  /**
4091
4160
  * Function to check if a string is valid CSV
4092
4161
  *
4093
4162
  * @param value The string to check
4094
- * @returns True if the string is a valid CSV string, false otherwise
4163
+ * @returns `true` if the string is a valid CSV string, false otherwise
4095
4164
  *
4096
4165
  * @public exported from `@promptbook/utils`
4097
4166
  */
@@ -4115,7 +4184,7 @@ function isValidCsvString(value) {
4115
4184
  * @public exported from `@promptbook/core`
4116
4185
  * <- TODO: [🏢] Export from package `@promptbook/csv`
4117
4186
  */
4118
- const CsvFormatDefinition = {
4187
+ const CsvFormatParser = {
4119
4188
  formatName: 'CSV',
4120
4189
  aliases: ['SPREADSHEET', 'TABLE'],
4121
4190
  isValid(value, settings, schema) {
@@ -4127,12 +4196,12 @@ const CsvFormatDefinition = {
4127
4196
  heal(value, settings, schema) {
4128
4197
  throw new Error('Not implemented');
4129
4198
  },
4130
- subvalueDefinitions: [
4199
+ subvalueParsers: [
4131
4200
  {
4132
4201
  subvalueName: 'ROW',
4133
- async mapValues(value, outputParameterName, settings, mapCallback) {
4134
- // TODO: [👨🏾‍🤝‍👨🏼] DRY csv parsing
4135
- const csv = parse(value, { ...settings, ...MANDATORY_CSV_SETTINGS });
4202
+ async mapValues(options) {
4203
+ const { value, outputParameterName, settings, mapCallback, onProgress } = options;
4204
+ const csv = csvParse(value, settings);
4136
4205
  if (csv.errors.length !== 0) {
4137
4206
  throw new CsvFormatError(spaceTrim((block) => `
4138
4207
  CSV parsing error
@@ -4147,23 +4216,37 @@ const CsvFormatDefinition = {
4147
4216
  ${block(value)}
4148
4217
  `));
4149
4218
  }
4150
- const mappedData = await Promise.all(csv.data.map(async (row, index) => {
4219
+ const mappedData = [];
4220
+ const length = csv.data.length;
4221
+ for (let index = 0; index < length; index++) {
4222
+ const row = csv.data[index];
4151
4223
  if (row[outputParameterName]) {
4152
4224
  throw new CsvFormatError(`Can not overwrite existing column "${outputParameterName}" in CSV row`);
4153
4225
  }
4154
- return {
4226
+ const mappedRow = {
4155
4227
  ...row,
4156
- [outputParameterName]: await mapCallback(row, index),
4228
+ [outputParameterName]: await mapCallback(row, index, length),
4157
4229
  };
4158
- }));
4230
+ mappedData.push(mappedRow);
4231
+ if (onProgress) {
4232
+ // Note: Report the CSV with all rows mapped so far
4233
+ /*
4234
+ // TODO: [🛕] Report progress with all the rows including the pending ones
4235
+ const progressData = mappedData.map((row, i) =>
4236
+ i > index ? { ...row, [outputParameterName]: PENDING_VALUE_PLACEHOLDER } : row,
4237
+ );
4238
+ */
4239
+ await onProgress(unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS }));
4240
+ }
4241
+ }
4159
4242
  return unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS });
4160
4243
  },
4161
4244
  },
4162
4245
  {
4163
4246
  subvalueName: 'CELL',
4164
- async mapValues(value, outputParameterName, settings, mapCallback) {
4165
- // TODO: [👨🏾‍🤝‍👨🏼] DRY csv parsing
4166
- const csv = parse(value, { ...settings, ...MANDATORY_CSV_SETTINGS });
4247
+ async mapValues(options) {
4248
+ const { value, settings, mapCallback, onProgress } = options;
4249
+ const csv = csvParse(value, settings);
4167
4250
  if (csv.errors.length !== 0) {
4168
4251
  throw new CsvFormatError(spaceTrim((block) => `
4169
4252
  CSV parsing error
@@ -4179,9 +4262,9 @@ const CsvFormatDefinition = {
4179
4262
  `));
4180
4263
  }
4181
4264
  const mappedData = await Promise.all(csv.data.map(async (row, rowIndex) => {
4182
- return /* not await */ Promise.all(Object.entries(row).map(async ([key, value], columnIndex) => {
4265
+ return /* not await */ Promise.all(Object.entries(row).map(async ([key, value], columnIndex, array) => {
4183
4266
  const index = rowIndex * Object.keys(row).length + columnIndex;
4184
- return /* not await */ mapCallback({ [key]: value }, index);
4267
+ return /* not await */ mapCallback({ [key]: value }, index, array.length);
4185
4268
  }));
4186
4269
  }));
4187
4270
  return unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS });
@@ -4190,10 +4273,10 @@ const CsvFormatDefinition = {
4190
4273
  ],
4191
4274
  };
4192
4275
  /**
4193
- * TODO: [🍓] In `CsvFormatDefinition` implement simple `isValid`
4194
- * TODO: [🍓] In `CsvFormatDefinition` implement partial `canBeValid`
4195
- * TODO: [🍓] In `CsvFormatDefinition` implement `heal
4196
- * TODO: [🍓] In `CsvFormatDefinition` implement `subvalueDefinitions`
4276
+ * TODO: [🍓] In `CsvFormatParser` implement simple `isValid`
4277
+ * TODO: [🍓] In `CsvFormatParser` implement partial `canBeValid`
4278
+ * TODO: [🍓] In `CsvFormatParser` implement `heal
4279
+ * TODO: [🍓] In `CsvFormatParser` implement `subvalueParsers`
4197
4280
  * TODO: [🏢] Allow to expect something inside CSV objects and other formats
4198
4281
  */
4199
4282
 
@@ -4202,7 +4285,7 @@ const CsvFormatDefinition = {
4202
4285
  *
4203
4286
  * @private still in development [🏢]
4204
4287
  */
4205
- const JsonFormatDefinition = {
4288
+ const JsonFormatParser = {
4206
4289
  formatName: 'JSON',
4207
4290
  mimeType: 'application/json',
4208
4291
  isValid(value, settings, schema) {
@@ -4214,28 +4297,28 @@ const JsonFormatDefinition = {
4214
4297
  heal(value, settings, schema) {
4215
4298
  throw new Error('Not implemented');
4216
4299
  },
4217
- subvalueDefinitions: [],
4300
+ subvalueParsers: [],
4218
4301
  };
4219
4302
  /**
4220
- * TODO: [🧠] Maybe propper instance of object
4303
+ * TODO: [🧠] Maybe proper instance of object
4221
4304
  * TODO: [0] Make string_serialized_json
4222
4305
  * TODO: [1] Make type for JSON Settings and Schema
4223
4306
  * TODO: [🧠] What to use for validating JSONs - JSON Schema, ZoD, typescript types/interfaces,...?
4224
- * TODO: [🍓] In `JsonFormatDefinition` implement simple `isValid`
4225
- * TODO: [🍓] In `JsonFormatDefinition` implement partial `canBeValid`
4226
- * TODO: [🍓] In `JsonFormatDefinition` implement `heal
4227
- * TODO: [🍓] In `JsonFormatDefinition` implement `subvalueDefinitions`
4307
+ * TODO: [🍓] In `JsonFormatParser` implement simple `isValid`
4308
+ * TODO: [🍓] In `JsonFormatParser` implement partial `canBeValid`
4309
+ * TODO: [🍓] In `JsonFormatParser` implement `heal
4310
+ * TODO: [🍓] In `JsonFormatParser` implement `subvalueParsers`
4228
4311
  * TODO: [🏢] Allow to expect something inside JSON objects and other formats
4229
4312
  */
4230
4313
 
4231
4314
  /**
4232
4315
  * Definition for any text - this will be always valid
4233
4316
  *
4234
- * Note: This is not useful for validation, but for splitting and mapping with `subvalueDefinitions`
4317
+ * Note: This is not useful for validation, but for splitting and mapping with `subvalueParsers`
4235
4318
  *
4236
4319
  * @public exported from `@promptbook/core`
4237
4320
  */
4238
- const TextFormatDefinition = {
4321
+ const TextFormatParser = {
4239
4322
  formatName: 'TEXT',
4240
4323
  isValid(value) {
4241
4324
  return typeof value === 'string';
@@ -4244,19 +4327,20 @@ const TextFormatDefinition = {
4244
4327
  return typeof partialValue === 'string';
4245
4328
  },
4246
4329
  heal() {
4247
- throw new UnexpectedError('It does not make sense to call `TextFormatDefinition.heal`');
4330
+ throw new UnexpectedError('It does not make sense to call `TextFormatParser.heal`');
4248
4331
  },
4249
- subvalueDefinitions: [
4332
+ subvalueParsers: [
4250
4333
  {
4251
4334
  subvalueName: 'LINE',
4252
- async mapValues(value, outputParameterName, settings, mapCallback) {
4335
+ async mapValues(options) {
4336
+ const { value, mapCallback, onProgress } = options;
4253
4337
  const lines = value.split('\n');
4254
- const mappedLines = await Promise.all(lines.map((lineContent, lineNumber) =>
4338
+ const mappedLines = await Promise.all(lines.map((lineContent, lineNumber, array) =>
4255
4339
  // TODO: [🧠] Maybe option to skip empty line
4256
4340
  /* not await */ mapCallback({
4257
4341
  lineContent,
4258
4342
  // TODO: [🧠] Maybe also put here `lineNumber`
4259
- }, lineNumber)));
4343
+ }, lineNumber, array.length)));
4260
4344
  return mappedLines.join('\n');
4261
4345
  },
4262
4346
  },
@@ -4266,10 +4350,10 @@ const TextFormatDefinition = {
4266
4350
  /**
4267
4351
  * TODO: [1] Make type for XML Text and Schema
4268
4352
  * TODO: [🧠][🤠] Here should be all words, characters, lines, paragraphs, pages available as subvalues
4269
- * TODO: [🍓] In `TextFormatDefinition` implement simple `isValid`
4270
- * TODO: [🍓] In `TextFormatDefinition` implement partial `canBeValid`
4271
- * TODO: [🍓] In `TextFormatDefinition` implement `heal
4272
- * TODO: [🍓] In `TextFormatDefinition` implement `subvalueDefinitions`
4353
+ * TODO: [🍓] In `TextFormatParser` implement simple `isValid`
4354
+ * TODO: [🍓] In `TextFormatParser` implement partial `canBeValid`
4355
+ * TODO: [🍓] In `TextFormatParser` implement `heal
4356
+ * TODO: [🍓] In `TextFormatParser` implement `subvalueParsers`
4273
4357
  * TODO: [🏢] Allow to expect something inside each item of list and other formats
4274
4358
  */
4275
4359
 
@@ -4277,7 +4361,7 @@ const TextFormatDefinition = {
4277
4361
  * Function to check if a string is valid XML
4278
4362
  *
4279
4363
  * @param value
4280
- * @returns True if the string is a valid XML string, false otherwise
4364
+ * @returns `true` if the string is a valid XML string, false otherwise
4281
4365
  *
4282
4366
  * @public exported from `@promptbook/utils`
4283
4367
  */
@@ -4302,7 +4386,7 @@ function isValidXmlString(value) {
4302
4386
  *
4303
4387
  * @private still in development [🏢]
4304
4388
  */
4305
- const XmlFormatDefinition = {
4389
+ const XmlFormatParser = {
4306
4390
  formatName: 'XML',
4307
4391
  mimeType: 'application/xml',
4308
4392
  isValid(value, settings, schema) {
@@ -4314,17 +4398,17 @@ const XmlFormatDefinition = {
4314
4398
  heal(value, settings, schema) {
4315
4399
  throw new Error('Not implemented');
4316
4400
  },
4317
- subvalueDefinitions: [],
4401
+ subvalueParsers: [],
4318
4402
  };
4319
4403
  /**
4320
- * TODO: [🧠] Maybe propper instance of object
4404
+ * TODO: [🧠] Maybe proper instance of object
4321
4405
  * TODO: [0] Make string_serialized_xml
4322
4406
  * TODO: [1] Make type for XML Settings and Schema
4323
4407
  * TODO: [🧠] What to use for validating XMLs - XSD,...
4324
- * TODO: [🍓] In `XmlFormatDefinition` implement simple `isValid`
4325
- * TODO: [🍓] In `XmlFormatDefinition` implement partial `canBeValid`
4326
- * TODO: [🍓] In `XmlFormatDefinition` implement `heal
4327
- * TODO: [🍓] In `XmlFormatDefinition` implement `subvalueDefinitions`
4408
+ * TODO: [🍓] In `XmlFormatParser` implement simple `isValid`
4409
+ * TODO: [🍓] In `XmlFormatParser` implement partial `canBeValid`
4410
+ * TODO: [🍓] In `XmlFormatParser` implement `heal
4411
+ * TODO: [🍓] In `XmlFormatParser` implement `subvalueParsers`
4328
4412
  * TODO: [🏢] Allow to expect something inside XML and other formats
4329
4413
  */
4330
4414
 
@@ -4333,24 +4417,19 @@ const XmlFormatDefinition = {
4333
4417
  *
4334
4418
  * @private internal index of `...` <- TODO [🏢]
4335
4419
  */
4336
- const FORMAT_DEFINITIONS = [
4337
- JsonFormatDefinition,
4338
- XmlFormatDefinition,
4339
- TextFormatDefinition,
4340
- CsvFormatDefinition,
4341
- ];
4420
+ const FORMAT_DEFINITIONS = [JsonFormatParser, XmlFormatParser, TextFormatParser, CsvFormatParser];
4342
4421
  /**
4343
4422
  * Note: [💞] Ignore a discrepancy between file name and entity name
4344
4423
  */
4345
4424
 
4346
4425
  /**
4347
- * Maps available parameters to expected parameters
4426
+ * Maps available parameters to expected parameters for a pipeline task.
4348
4427
  *
4349
4428
  * The strategy is:
4350
- * 1) @@@
4351
- * 2) @@@
4429
+ * 1) First, match parameters by name where both available and expected.
4430
+ * 2) Then, if there are unmatched expected and available parameters, map them by order.
4352
4431
  *
4353
- * @throws {PipelineExecutionError} @@@
4432
+ * @throws {PipelineExecutionError} If the number of unmatched expected and available parameters does not match, or mapping is ambiguous.
4354
4433
  * @private within the repository used in `createPipelineExecutor`
4355
4434
  */
4356
4435
  function mapAvailableToExpectedParameters(options) {
@@ -4373,7 +4452,7 @@ function mapAvailableToExpectedParameters(options) {
4373
4452
  else if (!availableParametersNames.has(parameterName) && expectedParameterNames.has(parameterName)) ;
4374
4453
  }
4375
4454
  if (expectedParameterNames.size === 0) {
4376
- // Note: [👨‍👨‍👧] Now we can freeze `mappedParameters` to prevent @@@
4455
+ // Note: [👨‍👨‍👧] Now we can freeze `mappedParameters` to prevent accidental modifications after mapping
4377
4456
  Object.freeze(mappedParameters);
4378
4457
  return mappedParameters;
4379
4458
  }
@@ -4404,7 +4483,7 @@ function mapAvailableToExpectedParameters(options) {
4404
4483
  for (let i = 0; i < expectedParameterNames.size; i++) {
4405
4484
  mappedParameters[expectedParameterNamesArray[i]] = availableParameters[availableParametersNamesArray[i]];
4406
4485
  }
4407
- // Note: [👨‍👨‍👧] Now we can freeze `mappedParameters` to prevent @@@
4486
+ // Note: [👨‍👨‍👧] Now we can freeze `mappedParameters` to prevent accidental modifications after mapping
4408
4487
  Object.freeze(mappedParameters);
4409
4488
  return mappedParameters;
4410
4489
  }
@@ -4412,8 +4491,8 @@ function mapAvailableToExpectedParameters(options) {
4412
4491
  /**
4413
4492
  * Extracts all code blocks from markdown.
4414
4493
  *
4415
- * Note: There are multiple simmilar function:
4416
- * - `extractBlock` just extracts the content of the code block which is also used as build-in function for postprocessing
4494
+ * Note: There are multiple similar functions:
4495
+ * - `extractBlock` just extracts the content of the code block which is also used as built-in function for postprocessing
4417
4496
  * - `extractJsonBlock` extracts exactly one valid JSON code block
4418
4497
  * - `extractOneBlockFromMarkdown` extracts exactly one code block with language of the code block
4419
4498
  * - `extractAllBlocksFromMarkdown` extracts all code blocks with language of the code block
@@ -4463,7 +4542,7 @@ function extractAllBlocksFromMarkdown(markdown) {
4463
4542
  if (currentCodeBlock.content !== '') {
4464
4543
  currentCodeBlock.content += '\n';
4465
4544
  }
4466
- currentCodeBlock.content += line.split('\\`\\`\\`').join('```') /* <- TODO: Maybe make propper unescape */;
4545
+ currentCodeBlock.content += line.split('\\`\\`\\`').join('```') /* <- TODO: Maybe make proper unescape */;
4467
4546
  }
4468
4547
  }
4469
4548
  if (currentCodeBlock !== null) {
@@ -4483,7 +4562,7 @@ function extractAllBlocksFromMarkdown(markdown) {
4483
4562
  * - When there are multiple JSON code blocks the function throws a `ParseError`
4484
4563
  *
4485
4564
  * Note: It is not important if marked as ```json BUT if it is VALID JSON
4486
- * Note: There are multiple simmilar function:
4565
+ * Note: There are multiple similar function:
4487
4566
  * - `extractBlock` just extracts the content of the code block which is also used as build-in function for postprocessing
4488
4567
  * - `extractJsonBlock` extracts exactly one valid JSON code block
4489
4568
  * - `extractOneBlockFromMarkdown` extracts exactly one code block with language of the code block
@@ -4508,7 +4587,7 @@ function extractJsonBlock(markdown) {
4508
4587
  }
4509
4588
  /**
4510
4589
  * TODO: Add some auto-healing logic + extract YAML, JSON5, TOML, etc.
4511
- * TODO: [🏢] Make this logic part of `JsonFormatDefinition` or `isValidJsonString`
4590
+ * TODO: [🏢] Make this logic part of `JsonFormatParser` or `isValidJsonString`
4512
4591
  */
4513
4592
 
4514
4593
  /**
@@ -4710,14 +4789,14 @@ const CountUtils = {
4710
4789
  PAGES: countPages,
4711
4790
  };
4712
4791
  /**
4713
- * TODO: [🧠][🤠] This should be probbably as part of `TextFormatDefinition`
4792
+ * TODO: [🧠][🤠] This should be probably as part of `TextFormatParser`
4714
4793
  * Note: [💞] Ignore a discrepancy between file name and entity name
4715
4794
  */
4716
4795
 
4717
4796
  /**
4718
4797
  * Function checkExpectations will check if the expectations on given value are met
4719
4798
  *
4720
- * Note: There are two simmilar functions:
4799
+ * Note: There are two similar functions:
4721
4800
  * - `checkExpectations` which throws an error if the expectations are not met
4722
4801
  * - `isPassingExpectations` which returns a boolean
4723
4802
  *
@@ -4738,13 +4817,17 @@ function checkExpectations(expectations, value) {
4738
4817
  }
4739
4818
  /**
4740
4819
  * TODO: [💝] Unite object for expecting amount and format
4741
- * TODO: [🧠][🤠] This should be part of `TextFormatDefinition`
4820
+ * TODO: [🧠][🤠] This should be part of `TextFormatParser`
4742
4821
  * Note: [💝] and [🤠] are interconnected together
4743
4822
  */
4744
4823
 
4745
4824
  /**
4746
- * @@@
4825
+ * Executes a pipeline task with multiple attempts, including joker and retry logic. Handles different task types
4826
+ * (prompt, script, dialog, etc.), applies postprocessing, checks expectations, and updates the execution report.
4827
+ * Throws errors if execution fails after all attempts.
4747
4828
  *
4829
+ * @param options - The options for execution, including task, parameters, pipeline, and configuration.
4830
+ * @returns The result string of the executed task.
4748
4831
  * @private internal utility of `createPipelineExecutor`
4749
4832
  */
4750
4833
  async function executeAttempts(options) {
@@ -4966,7 +5049,7 @@ async function executeAttempts(options) {
4966
5049
  if (task.format) {
4967
5050
  if (task.format === 'JSON') {
4968
5051
  if (!isValidJsonString($ongoingTaskResult.$resultString || '')) {
4969
- // TODO: [🏢] Do more universally via `FormatDefinition`
5052
+ // TODO: [🏢] Do more universally via `FormatParser`
4970
5053
  try {
4971
5054
  $ongoingTaskResult.$resultString = extractJsonBlock($ongoingTaskResult.$resultString || '');
4972
5055
  }
@@ -5068,12 +5151,16 @@ async function executeAttempts(options) {
5068
5151
  */
5069
5152
 
5070
5153
  /**
5071
- * @@@
5154
+ * Executes a pipeline task that requires mapping or iterating over subvalues of a parameter (such as rows in a CSV).
5155
+ * Handles format and subformat resolution, error handling, and progress reporting.
5156
+ *
5157
+ * @param options - Options for execution, including task details and progress callback.
5158
+ * @returns The result of the subvalue mapping or execution attempts.
5072
5159
  *
5073
5160
  * @private internal utility of `createPipelineExecutor`
5074
5161
  */
5075
5162
  async function executeFormatSubvalues(options) {
5076
- const { task, jokerParameterNames, parameters, priority, csvSettings, pipelineIdentification } = options;
5163
+ const { task, jokerParameterNames, parameters, priority, csvSettings, onProgress, pipelineIdentification } = options;
5077
5164
  if (task.foreach === undefined) {
5078
5165
  return /* not await */ executeAttempts(options);
5079
5166
  }
@@ -5104,16 +5191,16 @@ async function executeFormatSubvalues(options) {
5104
5191
  ${block(pipelineIdentification)}
5105
5192
  `));
5106
5193
  }
5107
- const subvalueDefinition = formatDefinition.subvalueDefinitions.find((subvalueDefinition) => [subvalueDefinition.subvalueName, ...(subvalueDefinition.aliases || [])].includes(task.foreach.subformatName));
5108
- if (subvalueDefinition === undefined) {
5194
+ const subvalueParser = formatDefinition.subvalueParsers.find((subvalueParser) => [subvalueParser.subvalueName, ...(subvalueParser.aliases || [])].includes(task.foreach.subformatName));
5195
+ if (subvalueParser === undefined) {
5109
5196
  throw new UnexpectedError(
5110
5197
  // <- TODO: [🧠][🧐] Should be formats fixed per promptbook version or behave as plugins (=> change UnexpectedError)
5111
5198
  spaceTrim((block) => `
5112
5199
  Unsupported subformat name "${task.foreach.subformatName}" for format "${task.foreach.formatName}"
5113
5200
 
5114
5201
  Available subformat names for format "${formatDefinition.formatName}":
5115
- ${block(formatDefinition.subvalueDefinitions
5116
- .map((subvalueDefinition) => subvalueDefinition.subvalueName)
5202
+ ${block(formatDefinition.subvalueParsers
5203
+ .map((subvalueParser) => subvalueParser.subvalueName)
5117
5204
  .map((subvalueName) => `- ${subvalueName}`)
5118
5205
  .join('\n'))}
5119
5206
 
@@ -5125,55 +5212,85 @@ async function executeFormatSubvalues(options) {
5125
5212
  let formatSettings;
5126
5213
  if (formatDefinition.formatName === 'CSV') {
5127
5214
  formatSettings = csvSettings;
5128
- // <- TODO: [🤹‍♂️] More universal, make simmilar pattern for other formats for example \n vs \r\n in text
5129
- }
5130
- const resultString = await subvalueDefinition.mapValues(parameterValue, task.foreach.outputSubparameterName, formatSettings, async (subparameters, index) => {
5131
- let mappedParameters;
5132
- // TODO: [🤹‍♂️][🪂] Limit to N concurrent executions
5133
- // TODO: When done [🐚] Report progress also for each subvalue here
5134
- try {
5135
- mappedParameters = mapAvailableToExpectedParameters({
5136
- expectedParameters: Object.fromEntries(task.foreach.inputSubparameterNames.map((subparameterName) => [subparameterName, null])),
5137
- availableParameters: subparameters,
5138
- });
5139
- }
5140
- catch (error) {
5141
- if (!(error instanceof PipelineExecutionError)) {
5142
- throw error;
5215
+ // <- TODO: [🤹‍♂️] More universal, make similar pattern for other formats for example \n vs \r\n in text
5216
+ }
5217
+ const resultString = await subvalueParser.mapValues({
5218
+ value: parameterValue,
5219
+ outputParameterName: task.foreach.outputSubparameterName,
5220
+ settings: formatSettings,
5221
+ onProgress(partialResultString) {
5222
+ return onProgress(Object.freeze({
5223
+ [task.resultingParameterName]: partialResultString,
5224
+ }));
5225
+ },
5226
+ async mapCallback(subparameters, index, length) {
5227
+ let mappedParameters;
5228
+ try {
5229
+ mappedParameters = mapAvailableToExpectedParameters({
5230
+ expectedParameters: Object.fromEntries(task.foreach.inputSubparameterNames.map((subparameterName) => [subparameterName, null])),
5231
+ availableParameters: subparameters,
5232
+ });
5143
5233
  }
5144
- throw new PipelineExecutionError(spaceTrim((block) => `
5145
- ${error.message}
5234
+ catch (error) {
5235
+ if (!(error instanceof PipelineExecutionError)) {
5236
+ throw error;
5237
+ }
5238
+ const highLevelError = new PipelineExecutionError(spaceTrim((block) => `
5239
+ ${error.message}
5146
5240
 
5147
- This is error in FOREACH command
5148
- You have probbably passed wrong data to pipeline or wrong data was generated which are processed by FOREACH command
5241
+ This is error in FOREACH command when mapping ${formatDefinition.formatName} ${subvalueParser.subvalueName} data (${index + 1}/${length})
5242
+ You have probably passed wrong data to pipeline or wrong data was generated which are processed by FOREACH command
5149
5243
 
5150
- ${block(pipelineIdentification)}
5151
- Subparameter index: ${index}
5152
- `));
5153
- }
5154
- const allSubparameters = {
5155
- ...parameters,
5156
- ...mappedParameters,
5157
- };
5158
- // Note: [👨‍👨‍👧] Now we can freeze `subparameters` because we are sure that all and only used parameters are defined and are not going to be changed
5159
- Object.freeze(allSubparameters);
5160
- const subresultString = await executeAttempts({
5161
- ...options,
5162
- priority: priority + index,
5163
- parameters: allSubparameters,
5164
- pipelineIdentification: spaceTrim((block) => `
5165
- ${block(pipelineIdentification)}
5166
- Subparameter index: ${index}
5167
- `),
5168
- });
5169
- return subresultString;
5244
+ ${block(pipelineIdentification)}
5245
+ `));
5246
+ if (length > BIG_DATASET_TRESHOLD) {
5247
+ console.error(highLevelError);
5248
+ return FAILED_VALUE_PLACEHOLDER;
5249
+ }
5250
+ throw highLevelError;
5251
+ }
5252
+ const allSubparameters = {
5253
+ ...parameters,
5254
+ ...mappedParameters,
5255
+ };
5256
+ Object.freeze(allSubparameters);
5257
+ try {
5258
+ const subresultString = await executeAttempts({
5259
+ ...options,
5260
+ priority: priority + index,
5261
+ parameters: allSubparameters,
5262
+ pipelineIdentification: spaceTrim((block) => `
5263
+ ${block(pipelineIdentification)}
5264
+ Subparameter index: ${index}
5265
+ `),
5266
+ });
5267
+ return subresultString;
5268
+ }
5269
+ catch (error) {
5270
+ if (length > BIG_DATASET_TRESHOLD) {
5271
+ console.error(spaceTrim((block) => `
5272
+ ${error.message}
5273
+
5274
+ This is error in FOREACH command when processing ${formatDefinition.formatName} ${subvalueParser.subvalueName} data (${index + 1}/${length})
5275
+
5276
+ ${block(pipelineIdentification)}
5277
+ `));
5278
+ return FAILED_VALUE_PLACEHOLDER;
5279
+ }
5280
+ throw error;
5281
+ }
5282
+ },
5170
5283
  });
5171
5284
  return resultString;
5172
5285
  }
5173
5286
 
5174
5287
  /**
5175
- * @@@
5288
+ * Returns the context for a given task, typically used to provide additional information or variables
5289
+ * required for the execution of the task within a pipeline. The context is returned as a string value
5290
+ * that may include markdown formatting.
5176
5291
  *
5292
+ * @param task - The task for which the context is being generated. This should be a deeply immutable TaskJson object.
5293
+ * @returns The context as a string, formatted as markdown and parameter value.
5177
5294
  * @private internal utility of `createPipelineExecutor`
5178
5295
  */
5179
5296
  async function getContextForTask(task) {
@@ -5181,7 +5298,7 @@ async function getContextForTask(task) {
5181
5298
  }
5182
5299
 
5183
5300
  /**
5184
- * @@@
5301
+ * Retrieves example values or templates for a given task, used to guide or validate pipeline execution.
5185
5302
  *
5186
5303
  * @private internal utility of `createPipelineExecutor`
5187
5304
  */
@@ -5190,91 +5307,128 @@ async function getExamplesForTask(task) {
5190
5307
  }
5191
5308
 
5192
5309
  /**
5193
- * @@@
5310
+ * Computes the cosine similarity between two embedding vectors
5311
+ *
5312
+ * Note: This is helping function for RAG (retrieval-augmented generation)
5194
5313
  *
5195
- * Here is the place where RAG (retrieval-augmented generation) happens
5314
+ * @param embeddingVector1
5315
+ * @param embeddingVector2
5316
+ * @returns Cosine similarity between the two vectors
5317
+ *
5318
+ * @public exported from `@promptbook/core`
5319
+ */
5320
+ function computeCosineSimilarity(embeddingVector1, embeddingVector2) {
5321
+ if (embeddingVector1.length !== embeddingVector2.length) {
5322
+ throw new TypeError('Embedding vectors must have the same length');
5323
+ }
5324
+ const dotProduct = embeddingVector1.reduce((sum, value, index) => sum + value * embeddingVector2[index], 0);
5325
+ const magnitude1 = Math.sqrt(embeddingVector1.reduce((sum, value) => sum + value * value, 0));
5326
+ const magnitude2 = Math.sqrt(embeddingVector2.reduce((sum, value) => sum + value * value, 0));
5327
+ return 1 - dotProduct / (magnitude1 * magnitude2);
5328
+ }
5329
+
5330
+ /**
5331
+ *
5332
+ * @param knowledgePieces
5333
+ * @returns
5334
+ *
5335
+ * @private internal utility of `createPipelineExecutor`
5336
+ */
5337
+ function knowledgePiecesToString(knowledgePieces) {
5338
+ return knowledgePieces
5339
+ .map((knowledgePiece) => {
5340
+ const { content } = knowledgePiece;
5341
+ return `- ${content}`;
5342
+ })
5343
+ .join('\n');
5344
+ // <- TODO: [🧠] Some smarter aggregation of knowledge pieces, single-line vs multi-line vs mixed
5345
+ }
5346
+
5347
+ /**
5348
+ * Retrieves the most relevant knowledge pieces for a given task using embedding-based similarity search.
5349
+ * This is where retrieval-augmented generation (RAG) is performed to enhance the task with external knowledge.
5196
5350
  *
5197
5351
  * @private internal utility of `createPipelineExecutor`
5198
5352
  */
5199
5353
  async function getKnowledgeForTask(options) {
5200
- const { tools, preparedPipeline, task } = options;
5354
+ const { tools, preparedPipeline, task, parameters } = options;
5201
5355
  const firstKnowlegePiece = preparedPipeline.knowledgePieces[0];
5202
5356
  const firstKnowlegeIndex = firstKnowlegePiece === null || firstKnowlegePiece === void 0 ? void 0 : firstKnowlegePiece.index[0];
5203
5357
  // <- TODO: Do not use just first knowledge piece and first index to determine embedding model, use also keyword search
5204
5358
  if (firstKnowlegePiece === undefined || firstKnowlegeIndex === undefined) {
5205
- return 'No knowledge pieces found';
5359
+ return ''; // <- Note: Np knowledge present, return empty string
5206
5360
  }
5207
- // TODO: [🚐] Make arrayable LLMs -> single LLM DRY
5208
- const _llms = arrayableToArray(tools.llm);
5209
- const llmTools = _llms.length === 1 ? _llms[0] : joinLlmExecutionTools(..._llms);
5210
- const taskEmbeddingPrompt = {
5211
- title: 'Knowledge Search',
5212
- modelRequirements: {
5213
- modelVariant: 'EMBEDDING',
5214
- modelName: firstKnowlegeIndex.modelName,
5215
- },
5216
- content: task.content,
5217
- parameters: {
5218
- /* !!!!!!!! */
5219
- },
5220
- };
5221
- const taskEmbeddingResult = await llmTools.callEmbeddingModel(taskEmbeddingPrompt);
5222
- const knowledgePiecesWithRelevance = preparedPipeline.knowledgePieces.map((knowledgePiece) => {
5223
- const { index } = knowledgePiece;
5224
- const knowledgePieceIndex = index.find((i) => i.modelName === firstKnowlegeIndex.modelName);
5225
- // <- TODO: Do not use just first knowledge piece and first index to determine embedding model
5226
- if (knowledgePieceIndex === undefined) {
5361
+ try {
5362
+ // TODO: [🚐] Make arrayable LLMs -> single LLM DRY
5363
+ const _llms = arrayableToArray(tools.llm);
5364
+ const llmTools = _llms.length === 1 ? _llms[0] : joinLlmExecutionTools(..._llms);
5365
+ const taskEmbeddingPrompt = {
5366
+ title: 'Knowledge Search',
5367
+ modelRequirements: {
5368
+ modelVariant: 'EMBEDDING',
5369
+ modelName: firstKnowlegeIndex.modelName,
5370
+ },
5371
+ content: task.content,
5372
+ parameters,
5373
+ };
5374
+ const taskEmbeddingResult = await llmTools.callEmbeddingModel(taskEmbeddingPrompt);
5375
+ const knowledgePiecesWithRelevance = preparedPipeline.knowledgePieces.map((knowledgePiece) => {
5376
+ const { index } = knowledgePiece;
5377
+ const knowledgePieceIndex = index.find((i) => i.modelName === firstKnowlegeIndex.modelName);
5378
+ // <- TODO: Do not use just first knowledge piece and first index to determine embedding model
5379
+ if (knowledgePieceIndex === undefined) {
5380
+ return {
5381
+ content: knowledgePiece.content,
5382
+ relevance: 0,
5383
+ };
5384
+ }
5385
+ const relevance = computeCosineSimilarity(knowledgePieceIndex.position, taskEmbeddingResult.content);
5227
5386
  return {
5228
5387
  content: knowledgePiece.content,
5229
- relevance: 0,
5388
+ relevance,
5230
5389
  };
5231
- }
5232
- const relevance = computeCosineSimilarity(knowledgePieceIndex.position, taskEmbeddingResult.content);
5233
- return {
5234
- content: knowledgePiece.content,
5235
- relevance,
5236
- };
5237
- });
5238
- const knowledgePiecesSorted = knowledgePiecesWithRelevance.sort((a, b) => a.relevance - b.relevance);
5239
- const knowledgePiecesLimited = knowledgePiecesSorted.slice(0, 5);
5240
- console.log('!!! Embedding', {
5241
- task,
5242
- taskEmbeddingPrompt,
5243
- taskEmbeddingResult,
5244
- firstKnowlegePiece,
5245
- firstKnowlegeIndex,
5246
- knowledgePiecesWithRelevance,
5247
- knowledgePiecesSorted,
5248
- knowledgePiecesLimited,
5249
- });
5250
- return knowledgePiecesLimited.map(({ content }) => `- ${content}`).join('\n');
5251
- // <- TODO: [🧠] Some smart aggregation of knowledge pieces, single-line vs multi-line vs mixed
5252
- }
5253
- // TODO: !!!!!! Annotate + to new file
5254
- function computeCosineSimilarity(embeddingVector1, embeddingVector2) {
5255
- if (embeddingVector1.length !== embeddingVector2.length) {
5256
- throw new TypeError('Embedding vectors must have the same length');
5390
+ });
5391
+ const knowledgePiecesSorted = knowledgePiecesWithRelevance.sort((a, b) => a.relevance - b.relevance);
5392
+ const knowledgePiecesLimited = knowledgePiecesSorted.slice(0, 5);
5393
+ /*
5394
+ console.log('`getKnowledgeForTask` Embedding', {
5395
+ task,
5396
+ taskEmbeddingPrompt,
5397
+ taskEmbeddingResult,
5398
+ firstKnowlegePiece,
5399
+ firstKnowlegeIndex,
5400
+ knowledgePiecesWithRelevance,
5401
+ knowledgePiecesSorted,
5402
+ knowledgePiecesLimited,
5403
+ });
5404
+ */
5405
+ return knowledgePiecesToString(knowledgePiecesLimited);
5406
+ }
5407
+ catch (error) {
5408
+ assertsError(error);
5409
+ console.error('Error in `getKnowledgeForTask`', error);
5410
+ // Note: If the LLM fails, just return all knowledge pieces
5411
+ return knowledgePiecesToString(preparedPipeline.knowledgePieces);
5257
5412
  }
5258
- const dotProduct = embeddingVector1.reduce((sum, value, index) => sum + value * embeddingVector2[index], 0);
5259
- const magnitude1 = Math.sqrt(embeddingVector1.reduce((sum, value) => sum + value * value, 0));
5260
- const magnitude2 = Math.sqrt(embeddingVector2.reduce((sum, value) => sum + value * value, 0));
5261
- return 1 - dotProduct / (magnitude1 * magnitude2);
5262
5413
  }
5263
5414
  /**
5264
- * TODO: !!!! Verify if this is working
5265
5415
  * TODO: [♨] Implement Better - use keyword search
5266
5416
  * TODO: [♨] Examples of values
5267
5417
  */
5268
5418
 
5269
5419
  /**
5270
- * @@@
5420
+ * Retrieves all reserved parameters for a given pipeline task, including context, knowledge, examples, and metadata.
5421
+ * Ensures all reserved parameters are defined and throws if any are missing.
5422
+ *
5423
+ * @param options - Options including tools, pipeline, task, and context.
5424
+ * @returns An object containing all reserved parameters for the task.
5271
5425
  *
5272
5426
  * @private internal utility of `createPipelineExecutor`
5273
5427
  */
5274
5428
  async function getReservedParametersForTask(options) {
5275
- const { tools, preparedPipeline, task, pipelineIdentification } = options;
5429
+ const { tools, preparedPipeline, task, parameters, pipelineIdentification, isVerbose } = options;
5276
5430
  const context = await getContextForTask(); // <- [🏍]
5277
- const knowledge = await getKnowledgeForTask({ tools, preparedPipeline, task });
5431
+ const knowledge = await getKnowledgeForTask({ tools, preparedPipeline, task, parameters });
5278
5432
  const examples = await getExamplesForTask();
5279
5433
  const currentDate = new Date().toISOString(); // <- TODO: [🧠][💩] Better
5280
5434
  const modelName = RESERVED_PARAMETER_MISSING_VALUE;
@@ -5286,6 +5440,9 @@ async function getReservedParametersForTask(options) {
5286
5440
  currentDate,
5287
5441
  modelName,
5288
5442
  };
5443
+ if (isVerbose) {
5444
+ console.info('Reserved parameters for task:', { options, reservedParameters });
5445
+ }
5289
5446
  // Note: Doublecheck that ALL reserved parameters are defined:
5290
5447
  for (const parameterName of RESERVED_PARAMETER_NAMES) {
5291
5448
  if (reservedParameters[parameterName] === undefined) {
@@ -5300,23 +5457,21 @@ async function getReservedParametersForTask(options) {
5300
5457
  }
5301
5458
 
5302
5459
  /**
5303
- * @@@
5460
+ * Executes a single task within a pipeline, handling parameter validation, error checking, and progress reporting.
5461
+ *
5462
+ * @param options - Options for execution, including the task, pipeline, parameters, and callbacks.
5463
+ * @returns The output parameters produced by the task.
5304
5464
  *
5305
5465
  * @private internal utility of `createPipelineExecutor`
5306
5466
  */
5307
5467
  async function executeTask(options) {
5308
5468
  const { currentTask, preparedPipeline, parametersToPass, tools, onProgress, $executionReport, pipelineIdentification, maxExecutionAttempts, maxParallelCount, csvSettings, isVerbose, rootDirname, cacheDirname, intermediateFilesStrategy, isAutoInstalled, isNotPreparedWarningSupressed, } = options;
5309
5469
  const priority = preparedPipeline.tasks.length - preparedPipeline.tasks.indexOf(currentTask);
5310
- await onProgress({
5311
- outputParameters: {
5312
- [currentTask.resultingParameterName]: '', // <- TODO: [🧠] What is the best value here?
5313
- },
5314
- });
5315
5470
  // Note: Check consistency of used and dependent parameters which was also done in `validatePipeline`, but it’s good to doublecheck
5316
5471
  const usedParameterNames = extractParameterNamesFromTask(currentTask);
5317
5472
  const dependentParameterNames = new Set(currentTask.dependentParameterNames);
5318
5473
  // TODO: [👩🏾‍🤝‍👩🏻] Use here `mapAvailableToExpectedParameters`
5319
- if (union(difference(usedParameterNames, dependentParameterNames), difference(dependentParameterNames, usedParameterNames)).size !== 0) {
5474
+ if (difference(union(difference(usedParameterNames, dependentParameterNames), difference(dependentParameterNames, usedParameterNames)), new Set(RESERVED_PARAMETER_NAMES)).size !== 0) {
5320
5475
  throw new UnexpectedError(spaceTrim$1((block) => `
5321
5476
  Dependent parameters are not consistent with used parameters:
5322
5477
 
@@ -5334,13 +5489,16 @@ async function executeTask(options) {
5334
5489
 
5335
5490
  `));
5336
5491
  }
5492
+ const reservedParameters = await getReservedParametersForTask({
5493
+ tools,
5494
+ preparedPipeline,
5495
+ task: currentTask,
5496
+ pipelineIdentification,
5497
+ parameters: parametersToPass,
5498
+ isVerbose,
5499
+ });
5337
5500
  const definedParameters = Object.freeze({
5338
- ...(await getReservedParametersForTask({
5339
- tools,
5340
- preparedPipeline,
5341
- task: currentTask,
5342
- pipelineIdentification,
5343
- })),
5501
+ ...reservedParameters,
5344
5502
  ...parametersToPass,
5345
5503
  });
5346
5504
  const definedParameterNames = new Set(Object.keys(definedParameters));
@@ -5385,6 +5543,7 @@ async function executeTask(options) {
5385
5543
  preparedPipeline,
5386
5544
  tools,
5387
5545
  $executionReport,
5546
+ onProgress,
5388
5547
  pipelineIdentification,
5389
5548
  maxExecutionAttempts,
5390
5549
  maxParallelCount,
@@ -5412,7 +5571,8 @@ async function executeTask(options) {
5412
5571
  */
5413
5572
 
5414
5573
  /**
5415
- * @@@
5574
+ * Filters and returns only the output parameters from the provided pipeline execution options.
5575
+ * Adds warnings for any expected output parameters that are missing.
5416
5576
  *
5417
5577
  * @private internal utility of `createPipelineExecutor`
5418
5578
  */
@@ -5437,9 +5597,12 @@ function filterJustOutputParameters(options) {
5437
5597
  }
5438
5598
 
5439
5599
  /**
5440
- * @@@
5600
+ * Executes an entire pipeline, resolving tasks in dependency order, handling errors, and reporting progress.
5441
5601
  *
5442
- * Note: This is not a `PipelineExecutor` (which is binded with one exact pipeline), but a utility function of `createPipelineExecutor` which creates `PipelineExecutor`
5602
+ * Note: This is not a `PipelineExecutor` (which is bound to a single pipeline), but a utility function used by `createPipelineExecutor` to create a `PipelineExecutor`.
5603
+ *
5604
+ * @param options - Options for execution, including input parameters, pipeline, and callbacks.
5605
+ * @returns The result of the pipeline execution, including output parameters, errors, and usage statistics.
5443
5606
  *
5444
5607
  * @private internal utility of `createPipelineExecutor`
5445
5608
  */
@@ -5762,10 +5925,27 @@ function createPipelineExecutor(options) {
5762
5925
  cacheDirname,
5763
5926
  intermediateFilesStrategy,
5764
5927
  isAutoInstalled,
5928
+ }).catch((error) => {
5929
+ assertsError(error);
5930
+ return exportJson({
5931
+ name: 'pipelineExecutorResult',
5932
+ message: `Unuccessful PipelineExecutorResult, last catch`,
5933
+ order: [],
5934
+ value: {
5935
+ isSuccessful: false,
5936
+ errors: [serializeError(error)],
5937
+ warnings: [],
5938
+ usage: UNCERTAIN_USAGE,
5939
+ executionReport: null,
5940
+ outputParameters: {},
5941
+ preparedPipeline,
5942
+ },
5943
+ });
5765
5944
  });
5766
5945
  };
5767
5946
  const pipelineExecutor = (inputParameters) => createTask({
5768
5947
  taskType: 'EXECUTION',
5948
+ title: pipeline.title,
5769
5949
  taskProcessCallback(updateOngoingResult) {
5770
5950
  return pipelineExecutorWithCallback(inputParameters, async (newOngoingResult) => {
5771
5951
  updateOngoingResult(newOngoingResult);
@@ -5868,7 +6048,7 @@ class MarkdownScraper {
5868
6048
  const knowledge = await Promise.all(
5869
6049
  // TODO: [🪂] Do not send all at once but in chunks
5870
6050
  knowledgeTextPieces.map(async (knowledgeTextPiece, i) => {
5871
- // Note: Theese are just default values, they will be overwritten by the actual values:
6051
+ // Note: These are just default values, they will be overwritten by the actual values:
5872
6052
  let name = `piece-${i}`;
5873
6053
  let title = spaceTrim(knowledgeTextPiece.substring(0, 100));
5874
6054
  const knowledgePieceContent = spaceTrim(knowledgeTextPiece);
@@ -6022,8 +6202,8 @@ class MarkitdownScraper {
6022
6202
  extension: 'md',
6023
6203
  isVerbose,
6024
6204
  });
6025
- // TODO: @@@ Preserve, delete or modify
6026
- // Note: Running Pandoc ONLY if the file in the cache does not exist
6205
+ // TODO: Determine if Markitdown conversion should run only if the cache file doesn't exist, or always.
6206
+ // Note: Running Markitdown conversion ONLY if the file in the cache does not exist
6027
6207
  if (!(await isFileExisting(cacheFilehandler.filename, this.tools.fs))) {
6028
6208
  const src = source.filename || source.url || null;
6029
6209
  // console.log('!!', { src, source, cacheFilehandler });
@@ -6045,11 +6225,11 @@ class MarkitdownScraper {
6045
6225
  return cacheFilehandler;
6046
6226
  }
6047
6227
  /**
6048
- * Scrapes the docx file and returns the knowledge pieces or `null` if it can't scrape it
6228
+ * Scrapes the source document (PDF, DOCX, etc.) and returns the knowledge pieces or `null` if it can't scrape it.
6049
6229
  */
6050
6230
  async scrape(source) {
6051
6231
  const cacheFilehandler = await this.$convert(source);
6052
- // TODO: @@@ Preserve, delete or modify
6232
+ // TODO: Ensure this correctly creates the source object for the internal MarkdownScraper using the converted file.
6053
6233
  const markdownSource = {
6054
6234
  source: source.source,
6055
6235
  filename: cacheFilehandler.filename,
@@ -6193,7 +6373,8 @@ class PdfScraper {
6193
6373
  */
6194
6374
 
6195
6375
  /**
6196
- * @@@
6376
+ * Factory function to create an instance of PdfScraper.
6377
+ * It bundles the scraper class with its metadata.
6197
6378
  *
6198
6379
  * @public exported from `@promptbook/pdf`
6199
6380
  */