@promptbook/pdf 0.92.0-3 → 0.92.0-30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. package/esm/index.es.js +561 -280
  2. package/esm/index.es.js.map +1 -1
  3. package/esm/typings/src/_packages/browser.index.d.ts +2 -0
  4. package/esm/typings/src/_packages/core.index.d.ts +22 -6
  5. package/esm/typings/src/_packages/deepseek.index.d.ts +2 -0
  6. package/esm/typings/src/_packages/google.index.d.ts +2 -0
  7. package/esm/typings/src/_packages/types.index.d.ts +4 -2
  8. package/esm/typings/src/_packages/utils.index.d.ts +2 -0
  9. package/esm/typings/src/cli/common/$provideLlmToolsForCli.d.ts +1 -1
  10. package/esm/typings/src/collection/PipelineCollection.d.ts +0 -2
  11. package/esm/typings/src/collection/SimplePipelineCollection.d.ts +1 -1
  12. package/esm/typings/src/commands/FOREACH/ForeachJson.d.ts +6 -6
  13. package/esm/typings/src/commands/FOREACH/foreachCommandParser.d.ts +0 -2
  14. package/esm/typings/src/commands/FORMFACTOR/formfactorCommandParser.d.ts +1 -1
  15. package/esm/typings/src/commands/_BOILERPLATE/boilerplateCommandParser.d.ts +1 -1
  16. package/esm/typings/src/commands/_common/types/CommandParser.d.ts +36 -28
  17. package/esm/typings/src/config.d.ts +41 -11
  18. package/esm/typings/src/constants.d.ts +43 -2
  19. package/esm/typings/src/conversion/archive/loadArchive.d.ts +2 -2
  20. package/esm/typings/src/errors/0-BoilerplateError.d.ts +2 -2
  21. package/esm/typings/src/executables/$provideExecutablesForNode.d.ts +1 -1
  22. package/esm/typings/src/executables/apps/locateLibreoffice.d.ts +2 -1
  23. package/esm/typings/src/executables/apps/locatePandoc.d.ts +2 -1
  24. package/esm/typings/src/executables/platforms/locateAppOnLinux.d.ts +2 -1
  25. package/esm/typings/src/executables/platforms/locateAppOnMacOs.d.ts +2 -1
  26. package/esm/typings/src/executables/platforms/locateAppOnWindows.d.ts +2 -1
  27. package/esm/typings/src/execution/AbstractTaskResult.d.ts +1 -1
  28. package/esm/typings/src/execution/CommonToolsOptions.d.ts +5 -1
  29. package/esm/typings/src/execution/LlmExecutionToolsConstructor.d.ts +2 -1
  30. package/esm/typings/src/execution/PipelineExecutorResult.d.ts +4 -2
  31. package/esm/typings/src/execution/createPipelineExecutor/$OngoingTaskResult.d.ts +12 -9
  32. package/esm/typings/src/execution/createPipelineExecutor/10-executePipeline.d.ts +12 -9
  33. package/esm/typings/src/execution/createPipelineExecutor/20-executeTask.d.ts +11 -8
  34. package/esm/typings/src/execution/createPipelineExecutor/30-executeFormatSubvalues.d.ts +15 -3
  35. package/esm/typings/src/execution/createPipelineExecutor/40-executeAttempts.d.ts +20 -14
  36. package/esm/typings/src/execution/createPipelineExecutor/computeCosineSimilarity.d.ts +13 -0
  37. package/esm/typings/src/execution/createPipelineExecutor/filterJustOutputParameters.d.ts +7 -6
  38. package/esm/typings/src/execution/createPipelineExecutor/getContextForTask.d.ts +5 -1
  39. package/esm/typings/src/execution/createPipelineExecutor/getExamplesForTask.d.ts +1 -1
  40. package/esm/typings/src/execution/createPipelineExecutor/getKnowledgeForTask.d.ts +21 -5
  41. package/esm/typings/src/execution/createPipelineExecutor/getReservedParametersForTask.d.ts +19 -5
  42. package/esm/typings/src/execution/createPipelineExecutor/knowledgePiecesToString.d.ts +9 -0
  43. package/esm/typings/src/execution/translation/automatic-translate/automatic-translators/LindatAutomaticTranslator.d.ts +4 -4
  44. package/esm/typings/src/execution/utils/checkExpectations.d.ts +1 -1
  45. package/esm/typings/src/execution/utils/uncertainNumber.d.ts +3 -2
  46. package/esm/typings/src/formats/_common/{FormatDefinition.d.ts → FormatParser.d.ts} +8 -6
  47. package/esm/typings/src/formats/_common/FormatSubvalueParser.d.ts +66 -0
  48. package/esm/typings/src/formats/csv/CsvFormatParser.d.ts +17 -0
  49. package/esm/typings/src/formats/csv/CsvSettings.d.ts +2 -2
  50. package/esm/typings/src/formats/csv/utils/csvParse.d.ts +12 -0
  51. package/esm/typings/src/formats/csv/utils/isValidCsvString.d.ts +1 -1
  52. package/esm/typings/src/formats/index.d.ts +2 -2
  53. package/esm/typings/src/formats/json/{JsonFormatDefinition.d.ts → JsonFormatParser.d.ts} +6 -6
  54. package/esm/typings/src/formats/json/utils/isValidJsonString.d.ts +1 -1
  55. package/esm/typings/src/formats/json/utils/jsonParse.d.ts +8 -0
  56. package/esm/typings/src/formats/text/{TextFormatDefinition.d.ts → TextFormatParser.d.ts} +7 -7
  57. package/esm/typings/src/formats/xml/XmlFormatParser.d.ts +19 -0
  58. package/esm/typings/src/formats/xml/utils/isValidXmlString.d.ts +1 -1
  59. package/esm/typings/src/formfactors/_boilerplate/BoilerplateFormfactorDefinition.d.ts +3 -2
  60. package/esm/typings/src/formfactors/_common/AbstractFormfactorDefinition.d.ts +16 -7
  61. package/esm/typings/src/formfactors/_common/FormfactorDefinition.d.ts +3 -1
  62. package/esm/typings/src/formfactors/_common/string_formfactor_name.d.ts +2 -1
  63. package/esm/typings/src/formfactors/chatbot/ChatbotFormfactorDefinition.d.ts +2 -2
  64. package/esm/typings/src/formfactors/completion/CompletionFormfactorDefinition.d.ts +29 -0
  65. package/esm/typings/src/formfactors/generator/GeneratorFormfactorDefinition.d.ts +2 -1
  66. package/esm/typings/src/formfactors/generic/GenericFormfactorDefinition.d.ts +2 -2
  67. package/esm/typings/src/formfactors/index.d.ts +33 -8
  68. package/esm/typings/src/formfactors/matcher/MatcherFormfactorDefinition.d.ts +4 -2
  69. package/esm/typings/src/formfactors/sheets/SheetsFormfactorDefinition.d.ts +3 -2
  70. package/esm/typings/src/formfactors/translator/TranslatorFormfactorDefinition.d.ts +3 -2
  71. package/esm/typings/src/high-level-abstractions/index.d.ts +2 -2
  72. package/esm/typings/src/llm-providers/_common/register/$llmToolsMetadataRegister.d.ts +3 -3
  73. package/esm/typings/src/llm-providers/_common/register/$llmToolsRegister.d.ts +3 -3
  74. package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsConfigurationFromEnv.d.ts +4 -4
  75. package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsForTestingAndScriptsAndPlayground.d.ts +4 -3
  76. package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsFromEnv.d.ts +17 -4
  77. package/esm/typings/src/llm-providers/_common/register/LlmToolsConfiguration.d.ts +11 -4
  78. package/esm/typings/src/llm-providers/_common/register/LlmToolsMetadata.d.ts +27 -5
  79. package/esm/typings/src/llm-providers/_common/register/LlmToolsOptions.d.ts +9 -2
  80. package/esm/typings/src/llm-providers/_common/register/createLlmToolsFromConfiguration.d.ts +12 -3
  81. package/esm/typings/src/llm-providers/_common/utils/cache/CacheItem.d.ts +10 -5
  82. package/esm/typings/src/llm-providers/_common/utils/cache/CacheLlmToolsOptions.d.ts +5 -3
  83. package/esm/typings/src/llm-providers/_common/utils/cache/cacheLlmTools.d.ts +3 -3
  84. package/esm/typings/src/llm-providers/_common/utils/count-total-usage/limitTotalUsage.d.ts +5 -5
  85. package/esm/typings/src/llm-providers/anthropic-claude/anthropic-claude-models.d.ts +1 -1
  86. package/esm/typings/src/llm-providers/azure-openai/AzureOpenAiExecutionTools.d.ts +4 -0
  87. package/esm/typings/src/llm-providers/deepseek/deepseek-models.d.ts +23 -0
  88. package/esm/typings/src/llm-providers/google/google-models.d.ts +23 -0
  89. package/esm/typings/src/llm-providers/openai/OpenAiExecutionTools.d.ts +4 -0
  90. package/esm/typings/src/llm-providers/openai/openai-models.d.ts +1 -1
  91. package/esm/typings/src/llm-providers/openai/register-configuration.d.ts +2 -2
  92. package/esm/typings/src/llm-providers/openai/register-constructor.d.ts +2 -2
  93. package/esm/typings/src/migrations/migratePipeline.d.ts +9 -0
  94. package/esm/typings/src/other/templates/getBookTemplates.d.ts +2 -2
  95. package/esm/typings/src/personas/preparePersona.d.ts +1 -1
  96. package/esm/typings/src/pipeline/PipelineInterface/PipelineInterface.d.ts +3 -3
  97. package/esm/typings/src/pipeline/PipelineInterface/constants.d.ts +1 -1
  98. package/esm/typings/src/pipeline/PipelineInterface/getPipelineInterface.d.ts +1 -1
  99. package/esm/typings/src/pipeline/PipelineInterface/isPipelineImplementingInterface.d.ts +5 -4
  100. package/esm/typings/src/pipeline/PipelineInterface/isPipelineInterfacesEqual.d.ts +1 -1
  101. package/esm/typings/src/pipeline/PipelineJson/CommonTaskJson.d.ts +9 -6
  102. package/esm/typings/src/pipeline/PipelineJson/PersonaJson.d.ts +4 -2
  103. package/esm/typings/src/pipeline/PipelineJson/PipelineJson.d.ts +3 -2
  104. package/esm/typings/src/pipeline/PipelineString.d.ts +3 -1
  105. package/esm/typings/src/pipeline/book-notation.d.ts +2 -2
  106. package/esm/typings/src/postprocessing/utils/extractJsonBlock.d.ts +1 -1
  107. package/esm/typings/src/prepare/prepareTasks.d.ts +7 -4
  108. package/esm/typings/src/remote-server/openapi-types.d.ts +348 -6
  109. package/esm/typings/src/remote-server/openapi.d.ts +398 -4
  110. package/esm/typings/src/remote-server/types/RemoteServerOptions.d.ts +2 -1
  111. package/esm/typings/src/scrapers/_boilerplate/BoilerplateScraper.d.ts +3 -3
  112. package/esm/typings/src/scrapers/_boilerplate/createBoilerplateScraper.d.ts +1 -1
  113. package/esm/typings/src/scrapers/_boilerplate/register-metadata.d.ts +1 -1
  114. package/esm/typings/src/scrapers/_common/Converter.d.ts +3 -1
  115. package/esm/typings/src/scrapers/_common/Scraper.d.ts +4 -3
  116. package/esm/typings/src/scrapers/_common/ScraperIntermediateSource.d.ts +4 -2
  117. package/esm/typings/src/scrapers/_common/register/$provideFilesystemForNode.d.ts +2 -1
  118. package/esm/typings/src/scrapers/_common/register/$provideScrapersForBrowser.d.ts +6 -3
  119. package/esm/typings/src/scrapers/_common/register/$provideScrapersForNode.d.ts +3 -5
  120. package/esm/typings/src/scrapers/_common/register/$scrapersMetadataRegister.d.ts +3 -3
  121. package/esm/typings/src/scrapers/_common/register/$scrapersRegister.d.ts +3 -2
  122. package/esm/typings/src/scrapers/_common/register/ScraperAndConverterMetadata.d.ts +8 -5
  123. package/esm/typings/src/scrapers/_common/register/ScraperConstructor.d.ts +2 -1
  124. package/esm/typings/src/scrapers/_common/utils/getScraperIntermediateSource.d.ts +6 -5
  125. package/esm/typings/src/scrapers/_common/utils/makeKnowledgeSourceHandler.d.ts +3 -1
  126. package/esm/typings/src/scrapers/document/createDocumentScraper.d.ts +1 -1
  127. package/esm/typings/src/scrapers/document-legacy/createLegacyDocumentScraper.d.ts +2 -1
  128. package/esm/typings/src/scrapers/markdown/createMarkdownScraper.d.ts +4 -1
  129. package/esm/typings/src/scrapers/markitdown/MarkitdownScraper.d.ts +1 -1
  130. package/esm/typings/src/scrapers/pdf/createPdfScraper.d.ts +2 -1
  131. package/esm/typings/src/scrapers/website/createWebsiteScraper.d.ts +3 -4
  132. package/esm/typings/src/scripting/javascript/postprocessing-functions.d.ts +5 -1
  133. package/esm/typings/src/storage/file-cache-storage/FileCacheStorage.d.ts +12 -5
  134. package/esm/typings/src/storage/file-cache-storage/FileCacheStorageOptions.d.ts +4 -2
  135. package/esm/typings/src/storage/file-cache-storage/utils/nameToSubfolderPath.d.ts +2 -1
  136. package/esm/typings/src/storage/local-storage/getIndexedDbStorage.d.ts +10 -0
  137. package/esm/typings/src/storage/local-storage/utils/makePromptbookStorageFromIndexedDb.d.ts +7 -0
  138. package/esm/typings/src/storage/local-storage/utils/makePromptbookStorageFromWebStorage.d.ts +2 -1
  139. package/esm/typings/src/types/IntermediateFilesStrategy.d.ts +2 -1
  140. package/esm/typings/src/types/ModelVariant.d.ts +5 -5
  141. package/esm/typings/src/types/typeAliases.d.ts +17 -13
  142. package/esm/typings/src/utils/$Register.d.ts +8 -7
  143. package/esm/typings/src/utils/editable/edit-pipeline-string/addPipelineCommand.d.ts +2 -2
  144. package/esm/typings/src/utils/editable/edit-pipeline-string/deflatePipeline.d.ts +4 -1
  145. package/esm/typings/src/utils/editable/utils/isFlatPipeline.d.ts +2 -1
  146. package/esm/typings/src/utils/environment/$getGlobalScope.d.ts +2 -1
  147. package/esm/typings/src/utils/expectation-counters/index.d.ts +1 -1
  148. package/esm/typings/src/utils/markdown/extractAllListItemsFromMarkdown.d.ts +1 -1
  149. package/esm/typings/src/utils/normalization/nameToUriPart.d.ts +4 -4
  150. package/esm/typings/src/utils/normalization/nameToUriParts.d.ts +4 -4
  151. package/esm/typings/src/utils/normalization/normalize-to-kebab-case.d.ts +3 -3
  152. package/esm/typings/src/utils/normalization/normalizeTo_SCREAMING_CASE.d.ts +3 -3
  153. package/esm/typings/src/utils/normalization/normalizeTo_camelCase.d.ts +4 -4
  154. package/esm/typings/src/utils/normalization/normalizeTo_snake_case.d.ts +3 -3
  155. package/esm/typings/src/utils/normalization/removeDiacritics.d.ts +3 -3
  156. package/esm/typings/src/utils/normalization/searchKeywords.d.ts +4 -1
  157. package/esm/typings/src/utils/normalization/titleToName.d.ts +4 -4
  158. package/esm/typings/src/utils/organization/empty_object.d.ts +2 -2
  159. package/esm/typings/src/utils/organization/just_empty_object.d.ts +4 -4
  160. package/esm/typings/src/utils/parameters/mapAvailableToExpectedParameters.d.ts +7 -7
  161. package/esm/typings/src/utils/serialization/clonePipeline.d.ts +4 -3
  162. package/esm/typings/src/utils/serialization/deepClone.d.ts +5 -1
  163. package/esm/typings/src/utils/validators/javascriptName/isValidJavascriptName.d.ts +3 -3
  164. package/esm/typings/src/utils/validators/parameterName/validateParameterName.d.ts +5 -4
  165. package/esm/typings/src/version.d.ts +2 -1
  166. package/package.json +2 -2
  167. package/umd/index.umd.js +561 -280
  168. package/umd/index.umd.js.map +1 -1
  169. package/esm/typings/src/formats/_common/FormatSubvalueDefinition.d.ts +0 -31
  170. package/esm/typings/src/formats/csv/CsvFormatDefinition.d.ts +0 -17
  171. package/esm/typings/src/formats/xml/XmlFormatDefinition.d.ts +0 -19
package/umd/index.umd.js CHANGED
@@ -25,7 +25,7 @@
25
25
  * @generated
26
26
  * @see https://github.com/webgptorg/promptbook
27
27
  */
28
- const PROMPTBOOK_ENGINE_VERSION = '0.92.0-3';
28
+ const PROMPTBOOK_ENGINE_VERSION = '0.92.0-30';
29
29
  /**
30
30
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
31
31
  * Note: [💞] Ignore a discrepancy between file name and entity name
@@ -101,6 +101,21 @@
101
101
  * @public exported from `@promptbook/core`
102
102
  */
103
103
  const DEFAULT_MAX_FILE_SIZE = 100 * 1024 * 1024; // 100MB
104
+ /**
105
+ * Threshold value that determines when a dataset is considered "big"
106
+ * and may require special handling or optimizations
107
+ *
108
+ * For example, when error occurs in one item of the big dataset, it will not fail the whole pipeline
109
+ *
110
+ * @public exported from `@promptbook/core`
111
+ */
112
+ const BIG_DATASET_TRESHOLD = 50;
113
+ /**
114
+ * Placeholder text used to represent a placeholder value of failed operation
115
+ *
116
+ * @public exported from `@promptbook/core`
117
+ */
118
+ const FAILED_VALUE_PLACEHOLDER = '!?';
104
119
  // <- TODO: [🧠] Better system for generator warnings - not always "code" and "by `@promptbook/cli`"
105
120
  /**
106
121
  * The maximum number of iterations for a loops
@@ -180,7 +195,7 @@
180
195
  const DEFAULT_SCRAPE_CACHE_DIRNAME = './.promptbook/scrape-cache';
181
196
  // <- TODO: [🧜‍♂️]
182
197
  /**
183
- * @@@
198
+ * Default settings for parsing and generating CSV files in Promptbook.
184
199
  *
185
200
  * @public exported from `@promptbook/core`
186
201
  */
@@ -191,19 +206,19 @@
191
206
  skipEmptyLines: true,
192
207
  });
193
208
  /**
194
- * @@@
209
+ * Controls whether verbose logging is enabled by default throughout the application.
195
210
  *
196
211
  * @public exported from `@promptbook/core`
197
212
  */
198
213
  let DEFAULT_IS_VERBOSE = false;
199
214
  /**
200
- * @@@
215
+ * Controls whether auto-installation of dependencies is enabled by default.
201
216
  *
202
217
  * @public exported from `@promptbook/core`
203
218
  */
204
219
  const DEFAULT_IS_AUTO_INSTALLED = false;
205
220
  /**
206
- * @@@
221
+ * Indicates whether pipeline logic validation is enabled. When true, the pipeline logic is checked for consistency.
207
222
  *
208
223
  * @private within the repository
209
224
  */
@@ -354,7 +369,8 @@
354
369
  */
355
370
 
356
371
  /**
357
- * @@@
372
+ * Converts a name to a properly formatted subfolder path for cache storage.
373
+ * Handles normalization and path formatting to create consistent cache directory structures.
358
374
  *
359
375
  * @private for `FileCacheStorage`
360
376
  */
@@ -607,10 +623,10 @@
607
623
  */
608
624
 
609
625
  /**
610
- * @@@
626
+ * Removes diacritic marks (accents) from characters in a string.
611
627
  *
612
- * @param input @@@
613
- * @returns @@@
628
+ * @param input The string containing diacritics to be normalized.
629
+ * @returns The string with diacritics removed or normalized.
614
630
  * @public exported from `@promptbook/utils`
615
631
  */
616
632
  function removeDiacritics(input) {
@@ -624,10 +640,10 @@
624
640
  */
625
641
 
626
642
  /**
627
- * @@@
643
+ * Converts a given text to kebab-case format.
628
644
  *
629
- * @param text @@@
630
- * @returns @@@
645
+ * @param text The text to be converted.
646
+ * @returns The kebab-case formatted string.
631
647
  * @example 'hello-world'
632
648
  * @example 'i-love-promptbook'
633
649
  * @public exported from `@promptbook/utils`
@@ -769,11 +785,11 @@
769
785
  }
770
786
 
771
787
  /**
772
- * @@@
788
+ * Converts a title string into a normalized name.
773
789
  *
774
- * @param value @@@
775
- * @returns @@@
776
- * @example @@@
790
+ * @param value The title string to be converted to a name.
791
+ * @returns A normalized name derived from the input title.
792
+ * @example 'Hello World!' -> 'hello-world'
777
793
  * @public exported from `@promptbook/utils`
778
794
  */
779
795
  function titleToName(value) {
@@ -806,9 +822,8 @@
806
822
  }
807
823
 
808
824
  /**
809
- * Create a filename for intermediate cache for scrapers
810
- *
811
- * Note: It also checks if directory exists and creates it if not
825
+ * Retrieves an intermediate source for a scraper based on the knowledge source.
826
+ * Manages the caching and retrieval of intermediate scraper results for optimized performance.
812
827
  *
813
828
  * @private as internal utility for scrapers
814
829
  */
@@ -859,7 +874,7 @@
859
874
  * Note: [🟢] Code in this file should never be never released in packages that could be imported into browser environment
860
875
  */
861
876
 
862
- var PipelineCollection = [{title:"Prepare Knowledge from Markdown",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.book",formfactorName:"GENERIC",parameters:[{name:"knowledgeContent",description:"Markdown document content",isInput:true,isOutput:false},{name:"knowledgePieces",description:"The knowledge JSON object",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, extract the important knowledge from the document.\n\n# Rules\n\n- Make pieces of information concise, clear, and easy to understand\n- One piece of information should be approximately 1 paragraph\n- Divide the paragraphs by markdown horizontal lines ---\n- Omit irrelevant information\n- Group redundant information\n- Write just extracted information, nothing else\n\n# The document\n\nTake information from this document:\n\n> {knowledgeContent}",resultingParameterName:"knowledgePieces",dependentParameterNames:["knowledgeContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Knowledge from Markdown\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.book`\n- INPUT PARAMETER `{knowledgeContent}` Markdown document content\n- OUTPUT PARAMETER `{knowledgePieces}` The knowledge JSON object\n\n## Knowledge\n\n<!-- TODO: [🍆] -FORMAT JSON -->\n\n```markdown\nYou are experienced data researcher, extract the important knowledge from the document.\n\n# Rules\n\n- Make pieces of information concise, clear, and easy to understand\n- One piece of information should be approximately 1 paragraph\n- Divide the paragraphs by markdown horizontal lines ---\n- Omit irrelevant information\n- Group redundant information\n- Write just extracted information, nothing else\n\n# The document\n\nTake information from this document:\n\n> {knowledgeContent}\n```\n\n`-> {knowledgePieces}`\n"}],sourceFile:"./books/prepare-knowledge-from-markdown.book"},{title:"Prepare Keywords",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-keywords.book",formfactorName:"GENERIC",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"keywords",description:"Keywords separated by comma",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, detect the important keywords in the document.\n\n# Rules\n\n- Write just keywords separated by comma\n\n# The document\n\nTake information from this document:\n\n> {knowledgePieceContent}",resultingParameterName:"keywords",dependentParameterNames:["knowledgePieceContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Keywords\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-keywords.book`\n- INPUT PARAMETER `{knowledgePieceContent}` The content\n- OUTPUT PARAMETER `{keywords}` Keywords separated by comma\n\n## Knowledge\n\n<!-- TODO: [🍆] -FORMAT JSON -->\n\n```markdown\nYou are experienced data researcher, detect the important keywords in the document.\n\n# Rules\n\n- Write just keywords separated by comma\n\n# The document\n\nTake information from this document:\n\n> {knowledgePieceContent}\n```\n\n`-> {keywords}`\n"}],sourceFile:"./books/prepare-knowledge-keywords.book"},{title:"Prepare Knowledge-piece Title",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-title.book",formfactorName:"GENERIC",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"title",description:"The title of the document",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced content creator, write best title for the document.\n\n# Rules\n\n- Write just title, nothing else\n- Write maximum 5 words for the title\n\n# The document\n\n> {knowledgePieceContent}",resultingParameterName:"title",expectations:{words:{min:1,max:8}},dependentParameterNames:["knowledgePieceContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Knowledge-piece Title\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-title.book`\n- INPUT PARAMETER `{knowledgePieceContent}` The content\n- OUTPUT PARAMETER `{title}` The title of the document\n\n## Knowledge\n\n- EXPECT MIN 1 WORD\n- EXPECT MAX 8 WORDS\n\n```markdown\nYou are experienced content creator, write best title for the document.\n\n# Rules\n\n- Write just title, nothing else\n- Write maximum 5 words for the title\n\n# The document\n\n> {knowledgePieceContent}\n```\n\n`-> {title}`\n"}],sourceFile:"./books/prepare-knowledge-title.book"},{title:"Prepare Persona",pipelineUrl:"https://promptbook.studio/promptbook/prepare-persona.book",formfactorName:"GENERIC",parameters:[{name:"availableModelNames",description:"List of available model names separated by comma (,)",isInput:true,isOutput:false},{name:"personaDescription",description:"Description of the persona",isInput:true,isOutput:false},{name:"modelRequirements",description:"Specific requirements for the model",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"make-model-requirements",title:"Make modelRequirements",content:"You are experienced AI engineer, you need to create virtual assistant.\nWrite\n\n## Example\n\n```json\n{\n\"modelName\": \"gpt-4o\",\n\"systemMessage\": \"You are experienced AI engineer and helpfull assistant.\",\n\"temperature\": 0.7\n}\n```\n\n## Instructions\n\n- Your output format is JSON object\n- Write just the JSON object, no other text should be present\n- It contains the following keys:\n - `modelName`: The name of the model to use\n - `systemMessage`: The system message to provide context to the model\n - `temperature`: The sampling temperature to use\n\n### Key `modelName`\n\nPick from the following models:\n\n- {availableModelNames}\n\n### Key `systemMessage`\n\nThe system message is used to communicate instructions or provide context to the model at the beginning of a conversation. It is displayed in a different format compared to user messages, helping the model understand its role in the conversation. The system message typically guides the model's behavior, sets the tone, or specifies desired output from the model. By utilizing the system message effectively, users can steer the model towards generating more accurate and relevant responses.\n\nFor example:\n\n> You are an experienced AI engineer and helpful assistant.\n\n> You are a friendly and knowledgeable chatbot.\n\n### Key `temperature`\n\nThe sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.\n\nYou can pick a value between 0 and 2. For example:\n\n- `0.1`: Low temperature, extremely conservative and deterministic\n- `0.5`: Medium temperature, balanced between conservative and creative\n- `1.0`: High temperature, creative and bit random\n- `1.5`: Very high temperature, extremely creative and often chaotic and unpredictable\n- `2.0`: Maximum temperature, completely random and unpredictable, for some extreme creative use cases\n\n# The assistant\n\nTake this description of the persona:\n\n> {personaDescription}",resultingParameterName:"modelRequirements",format:"JSON",dependentParameterNames:["availableModelNames","personaDescription"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Persona\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-persona.book`\n- INPUT PARAMETER `{availableModelNames}` List of available model names separated by comma (,)\n- INPUT PARAMETER `{personaDescription}` Description of the persona\n- OUTPUT PARAMETER `{modelRequirements}` Specific requirements for the model\n\n## Make modelRequirements\n\n- FORMAT JSON\n\n```markdown\nYou are experienced AI engineer, you need to create virtual assistant.\nWrite\n\n## Example\n\n\\`\\`\\`json\n{\n\"modelName\": \"gpt-4o\",\n\"systemMessage\": \"You are experienced AI engineer and helpfull assistant.\",\n\"temperature\": 0.7\n}\n\\`\\`\\`\n\n## Instructions\n\n- Your output format is JSON object\n- Write just the JSON object, no other text should be present\n- It contains the following keys:\n - `modelName`: The name of the model to use\n - `systemMessage`: The system message to provide context to the model\n - `temperature`: The sampling temperature to use\n\n### Key `modelName`\n\nPick from the following models:\n\n- {availableModelNames}\n\n### Key `systemMessage`\n\nThe system message is used to communicate instructions or provide context to the model at the beginning of a conversation. It is displayed in a different format compared to user messages, helping the model understand its role in the conversation. The system message typically guides the model's behavior, sets the tone, or specifies desired output from the model. By utilizing the system message effectively, users can steer the model towards generating more accurate and relevant responses.\n\nFor example:\n\n> You are an experienced AI engineer and helpful assistant.\n\n> You are a friendly and knowledgeable chatbot.\n\n### Key `temperature`\n\nThe sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.\n\nYou can pick a value between 0 and 2. For example:\n\n- `0.1`: Low temperature, extremely conservative and deterministic\n- `0.5`: Medium temperature, balanced between conservative and creative\n- `1.0`: High temperature, creative and bit random\n- `1.5`: Very high temperature, extremely creative and often chaotic and unpredictable\n- `2.0`: Maximum temperature, completely random and unpredictable, for some extreme creative use cases\n\n# The assistant\n\nTake this description of the persona:\n\n> {personaDescription}\n```\n\n`-> {modelRequirements}`\n"}],sourceFile:"./books/prepare-persona.book"},{title:"Prepare Title",pipelineUrl:"https://promptbook.studio/promptbook/prepare-title.book",formfactorName:"GENERIC",parameters:[{name:"book",description:"The book to prepare the title for",isInput:true,isOutput:false},{name:"title",description:"Best title for the book",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"make-title",title:"Make title",content:"Make best title for given text which describes the workflow:\n\n## Rules\n\n- Write just title, nothing else\n- Title should be concise and clear - Write maximum ideally 2 words, maximum 5 words\n- Title starts with emoticon\n- Title should not mention the input and output of the workflow but the main purpose of the workflow\n _For example, not \"✍ Convert Knowledge-piece to title\" but \"✍ Title\"_\n\n## The workflow\n\n> {book}",resultingParameterName:"title",expectations:{words:{min:1,max:8},lines:{min:1,max:1}},dependentParameterNames:["book"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Title\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-title.book`\n- INPUT PARAMETER `{book}` The book to prepare the title for\n- OUTPUT PARAMETER `{title}` Best title for the book\n\n## Make title\n\n- EXPECT MIN 1 Word\n- EXPECT MAX 8 Words\n- EXPECT EXACTLY 1 Line\n\n```markdown\nMake best title for given text which describes the workflow:\n\n## Rules\n\n- Write just title, nothing else\n- Title should be concise and clear - Write maximum ideally 2 words, maximum 5 words\n- Title starts with emoticon\n- Title should not mention the input and output of the workflow but the main purpose of the workflow\n _For example, not \"✍ Convert Knowledge-piece to title\" but \"✍ Title\"_\n\n## The workflow\n\n> {book}\n```\n\n`-> {title}`\n"}],sourceFile:"./books/prepare-title.book"}];
877
+ var PipelineCollection = [{title:"Prepare Knowledge from Markdown",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.book",formfactorName:"GENERIC",parameters:[{name:"knowledgeContent",description:"Markdown document content",isInput:true,isOutput:false},{name:"knowledgePieces",description:"The knowledge JSON object",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, extract the important knowledge from the document.\n\n# Rules\n\n- Make pieces of information concise, clear, and easy to understand\n- One piece of information should be approximately 1 paragraph\n- Divide the paragraphs by markdown horizontal lines ---\n- Omit irrelevant information\n- Group redundant information\n- Write just extracted information, nothing else\n\n# The document\n\nTake information from this document:\n\n> {knowledgeContent}",resultingParameterName:"knowledgePieces",dependentParameterNames:["knowledgeContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Knowledge from Markdown\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.book`\n- INPUT PARAMETER `{knowledgeContent}` Markdown document content\n- OUTPUT PARAMETER `{knowledgePieces}` The knowledge JSON object\n\n## Knowledge\n\n<!-- TODO: [🍆] -FORMAT JSON -->\n\n```markdown\nYou are experienced data researcher, extract the important knowledge from the document.\n\n# Rules\n\n- Make pieces of information concise, clear, and easy to understand\n- One piece of information should be approximately 1 paragraph\n- Divide the paragraphs by markdown horizontal lines ---\n- Omit irrelevant information\n- Group redundant information\n- Write just extracted information, nothing else\n\n# The document\n\nTake information from this document:\n\n> {knowledgeContent}\n```\n\n`-> {knowledgePieces}`\n"}],sourceFile:"./books/prepare-knowledge-from-markdown.book"},{title:"Prepare Keywords",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-keywords.book",formfactorName:"GENERIC",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"keywords",description:"Keywords separated by comma",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, detect the important keywords in the document.\n\n# Rules\n\n- Write just keywords separated by comma\n\n# The document\n\nTake information from this document:\n\n> {knowledgePieceContent}",resultingParameterName:"keywords",dependentParameterNames:["knowledgePieceContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Keywords\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-keywords.book`\n- INPUT PARAMETER `{knowledgePieceContent}` The content\n- OUTPUT PARAMETER `{keywords}` Keywords separated by comma\n\n## Knowledge\n\n<!-- TODO: [🍆] -FORMAT JSON -->\n\n```markdown\nYou are experienced data researcher, detect the important keywords in the document.\n\n# Rules\n\n- Write just keywords separated by comma\n\n# The document\n\nTake information from this document:\n\n> {knowledgePieceContent}\n```\n\n`-> {keywords}`\n"}],sourceFile:"./books/prepare-knowledge-keywords.book"},{title:"Prepare Knowledge-piece Title",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-title.book",formfactorName:"GENERIC",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"title",description:"The title of the document",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced content creator, write best title for the document.\n\n# Rules\n\n- Write just title, nothing else\n- Write maximum 5 words for the title\n\n# The document\n\n> {knowledgePieceContent}",resultingParameterName:"title",expectations:{words:{min:1,max:8}},dependentParameterNames:["knowledgePieceContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Knowledge-piece Title\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-title.book`\n- INPUT PARAMETER `{knowledgePieceContent}` The content\n- OUTPUT PARAMETER `{title}` The title of the document\n\n## Knowledge\n\n- EXPECT MIN 1 WORD\n- EXPECT MAX 8 WORDS\n\n```markdown\nYou are experienced content creator, write best title for the document.\n\n# Rules\n\n- Write just title, nothing else\n- Write maximum 5 words for the title\n\n# The document\n\n> {knowledgePieceContent}\n```\n\n`-> {title}`\n"}],sourceFile:"./books/prepare-knowledge-title.book"},{title:"Prepare Persona",pipelineUrl:"https://promptbook.studio/promptbook/prepare-persona.book",formfactorName:"GENERIC",parameters:[{name:"availableModels",description:"List of available model names together with their descriptions as JSON",isInput:true,isOutput:false},{name:"personaDescription",description:"Description of the persona",isInput:true,isOutput:false},{name:"modelsRequirements",description:"Specific requirements for the model",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"make-model-requirements",title:"Make modelRequirements",content:"You are an experienced AI engineer, you need to find the best models for virtual assistants:\n\n## Example\n\n```json\n[\n {\n \"modelName\": \"gpt-4o\",\n \"systemMessage\": \"You are experienced AI engineer and helpfull assistant.\",\n \"temperature\": 0.7\n },\n {\n \"modelName\": \"claude-3-5-sonnet\",\n \"systemMessage\": \"You are a friendly and knowledgeable chatbot.\",\n \"temperature\": 0.5\n }\n]\n```\n\n## Instructions\n\n- Your output format is JSON array\n- Sort best-fitting models first\n- Omit any models that are not suitable\n- Write just the JSON, no other text should be present\n- Array contain items with following keys:\n - `modelName`: The name of the model to use\n - `systemMessage`: The system message to provide context to the model\n - `temperature`: The sampling temperature to use\n\n### Key `modelName`\n\nHere are the available models:\n\n```json\n{availableModels}\n```\n\n### Key `systemMessage`\n\nThe system message is used to communicate instructions or provide context to the model at the beginning of a conversation. It is displayed in a different format compared to user messages, helping the model understand its role in the conversation. The system message typically guides the model's behavior, sets the tone, or specifies desired output from the model. By utilizing the system message effectively, users can steer the model towards generating more accurate and relevant responses.\n\nFor example:\n\n> You are an experienced AI engineer and helpful assistant.\n\n> You are a friendly and knowledgeable chatbot.\n\n### Key `temperature`\n\nThe sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.\n\nYou can pick a value between 0 and 2. For example:\n\n- `0.1`: Low temperature, extremely conservative and deterministic\n- `0.5`: Medium temperature, balanced between conservative and creative\n- `1.0`: High temperature, creative and bit random\n- `1.5`: Very high temperature, extremely creative and often chaotic and unpredictable\n- `2.0`: Maximum temperature, completely random and unpredictable, for some extreme creative use cases\n\n# The assistant\n\nTake this description of the persona:\n\n> {personaDescription}",resultingParameterName:"modelsRequirements",format:"JSON",dependentParameterNames:["availableModels","personaDescription"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Persona\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-persona.book`\n- INPUT PARAMETER `{availableModels}` List of available model names together with their descriptions as JSON\n- INPUT PARAMETER `{personaDescription}` Description of the persona\n- OUTPUT PARAMETER `{modelsRequirements}` Specific requirements for the model\n\n## Make modelRequirements\n\n- FORMAT JSON\n\n```markdown\nYou are an experienced AI engineer, you need to find the best models for virtual assistants:\n\n## Example\n\n\\`\\`\\`json\n[\n {\n \"modelName\": \"gpt-4o\",\n \"systemMessage\": \"You are experienced AI engineer and helpfull assistant.\",\n \"temperature\": 0.7\n },\n {\n \"modelName\": \"claude-3-5-sonnet\",\n \"systemMessage\": \"You are a friendly and knowledgeable chatbot.\",\n \"temperature\": 0.5\n }\n]\n\\`\\`\\`\n\n## Instructions\n\n- Your output format is JSON array\n- Sort best-fitting models first\n- Omit any models that are not suitable\n- Write just the JSON, no other text should be present\n- Array contain items with following keys:\n - `modelName`: The name of the model to use\n - `systemMessage`: The system message to provide context to the model\n - `temperature`: The sampling temperature to use\n\n### Key `modelName`\n\nHere are the available models:\n\n\\`\\`\\`json\n{availableModels}\n\\`\\`\\`\n\n### Key `systemMessage`\n\nThe system message is used to communicate instructions or provide context to the model at the beginning of a conversation. It is displayed in a different format compared to user messages, helping the model understand its role in the conversation. The system message typically guides the model's behavior, sets the tone, or specifies desired output from the model. By utilizing the system message effectively, users can steer the model towards generating more accurate and relevant responses.\n\nFor example:\n\n> You are an experienced AI engineer and helpful assistant.\n\n> You are a friendly and knowledgeable chatbot.\n\n### Key `temperature`\n\nThe sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.\n\nYou can pick a value between 0 and 2. For example:\n\n- `0.1`: Low temperature, extremely conservative and deterministic\n- `0.5`: Medium temperature, balanced between conservative and creative\n- `1.0`: High temperature, creative and bit random\n- `1.5`: Very high temperature, extremely creative and often chaotic and unpredictable\n- `2.0`: Maximum temperature, completely random and unpredictable, for some extreme creative use cases\n\n# The assistant\n\nTake this description of the persona:\n\n> {personaDescription}\n```\n\n`-> {modelsRequirements}`\n"}],sourceFile:"./books/prepare-persona.book"},{title:"Prepare Title",pipelineUrl:"https://promptbook.studio/promptbook/prepare-title.book",formfactorName:"GENERIC",parameters:[{name:"book",description:"The book to prepare the title for",isInput:true,isOutput:false},{name:"title",description:"Best title for the book",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"make-title",title:"Make title",content:"Make best title for given text which describes the workflow:\n\n## Rules\n\n- Write just title, nothing else\n- Title should be concise and clear - Write maximum ideally 2 words, maximum 5 words\n- Title starts with emoticon\n- Title should not mention the input and output of the workflow but the main purpose of the workflow\n _For example, not \"✍ Convert Knowledge-piece to title\" but \"✍ Title\"_\n\n## The workflow\n\n> {book}",resultingParameterName:"title",expectations:{words:{min:1,max:8},lines:{min:1,max:1}},dependentParameterNames:["book"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Title\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-title.book`\n- INPUT PARAMETER `{book}` The book to prepare the title for\n- OUTPUT PARAMETER `{title}` Best title for the book\n\n## Make title\n\n- EXPECT MIN 1 Word\n- EXPECT MAX 8 Words\n- EXPECT EXACTLY 1 Line\n\n```markdown\nMake best title for given text which describes the workflow:\n\n## Rules\n\n- Write just title, nothing else\n- Title should be concise and clear - Write maximum ideally 2 words, maximum 5 words\n- Title starts with emoticon\n- Title should not mention the input and output of the workflow but the main purpose of the workflow\n _For example, not \"✍ Convert Knowledge-piece to title\" but \"✍ Title\"_\n\n## The workflow\n\n> {book}\n```\n\n`-> {title}`\n"}],sourceFile:"./books/prepare-title.book"}];
863
878
 
864
879
  /**
865
880
  * Checks if value is valid email
@@ -944,7 +959,7 @@
944
959
  * Function isValidJsonString will tell you if the string is valid JSON or not
945
960
  *
946
961
  * @param value The string to check
947
- * @returns True if the string is a valid JSON string, false otherwise
962
+ * @returns `true` if the string is a valid JSON string, false otherwise
948
963
  *
949
964
  * @public exported from `@promptbook/utils`
950
965
  */
@@ -1355,8 +1370,12 @@
1355
1370
  */
1356
1371
 
1357
1372
  /**
1358
- * @@@
1373
+ * Creates a deep clone of the given object
1374
+ *
1375
+ * Note: This method only works for objects that are fully serializable to JSON and do not contain functions, Dates, or special types.
1359
1376
  *
1377
+ * @param objectValue The object to clone.
1378
+ * @returns A deep, writable clone of the input object.
1360
1379
  * @public exported from `@promptbook/utils`
1361
1380
  */
1362
1381
  function deepClone(objectValue) {
@@ -1438,13 +1457,13 @@
1438
1457
  */
1439
1458
  const REPLACING_NONCE = 'ptbkauk42kV2dzao34faw7FudQUHYPtW';
1440
1459
  /**
1441
- * @@@
1460
+ * Placeholder value indicating a parameter is missing its value.
1442
1461
  *
1443
1462
  * @private within the repository
1444
1463
  */
1445
1464
  const RESERVED_PARAMETER_MISSING_VALUE = 'MISSING-' + REPLACING_NONCE;
1446
1465
  /**
1447
- * @@@
1466
+ * Placeholder value indicating a parameter is restricted and cannot be used directly.
1448
1467
  *
1449
1468
  * @private within the repository
1450
1469
  */
@@ -1902,7 +1921,7 @@
1902
1921
  */
1903
1922
  function unpreparePipeline(pipeline) {
1904
1923
  let { personas, knowledgeSources, tasks } = pipeline;
1905
- personas = personas.map((persona) => ({ ...persona, modelRequirements: undefined, preparationIds: undefined }));
1924
+ personas = personas.map((persona) => ({ ...persona, modelsRequirements: undefined, preparationIds: undefined }));
1906
1925
  knowledgeSources = knowledgeSources.map((knowledgeSource) => ({ ...knowledgeSource, preparationIds: undefined }));
1907
1926
  tasks = tasks.map((task) => {
1908
1927
  let { dependentParameterNames } = task;
@@ -1943,7 +1962,7 @@
1943
1962
  /**
1944
1963
  * Constructs a pipeline collection from pipelines
1945
1964
  *
1946
- * @param pipelines @@@
1965
+ * @param pipelines Array of pipeline JSON objects to include in the collection
1947
1966
  *
1948
1967
  * Note: During the construction logic of all pipelines are validated
1949
1968
  * Note: It is not recommended to use this constructor directly, use `createCollectionFromJson` *(or other variant)* instead
@@ -2107,15 +2126,21 @@
2107
2126
  * @public exported from `@promptbook/core`
2108
2127
  */
2109
2128
  function isPipelinePrepared(pipeline) {
2110
- // Note: Ignoring `pipeline.preparations` @@@
2111
- // Note: Ignoring `pipeline.knowledgePieces` @@@
2129
+ // Note: Ignoring `pipeline.preparations`
2130
+ // Note: Ignoring `pipeline.knowledgePieces`
2112
2131
  if (pipeline.title === undefined || pipeline.title === '' || pipeline.title === DEFAULT_BOOK_TITLE) {
2132
+ // TODO: !!! Comment this out
2133
+ console.log('Pipeline is not prepared because title is undefined or empty', pipeline);
2113
2134
  return false;
2114
2135
  }
2115
- if (!pipeline.personas.every((persona) => persona.modelRequirements !== undefined)) {
2136
+ if (!pipeline.personas.every((persona) => persona.modelsRequirements !== undefined)) {
2137
+ // TODO: !!! Comment this out
2138
+ console.log('Pipeline is not prepared because personas are not prepared', pipeline.personas);
2116
2139
  return false;
2117
2140
  }
2118
2141
  if (!pipeline.knowledgeSources.every((knowledgeSource) => knowledgeSource.preparationIds !== undefined)) {
2142
+ // TODO: !!! Comment this out
2143
+ console.log('Pipeline is not prepared because knowledge sources are not prepared', pipeline.knowledgeSources);
2119
2144
  return false;
2120
2145
  }
2121
2146
  /*
@@ -2136,36 +2161,6 @@
2136
2161
  * - [♨] Are tasks prepared
2137
2162
  */
2138
2163
 
2139
- /**
2140
- * Recursively converts JSON strings to JSON objects
2141
-
2142
- * @public exported from `@promptbook/utils`
2143
- */
2144
- function jsonStringsToJsons(object) {
2145
- if (object === null) {
2146
- return object;
2147
- }
2148
- if (Array.isArray(object)) {
2149
- return object.map(jsonStringsToJsons);
2150
- }
2151
- if (typeof object !== 'object') {
2152
- return object;
2153
- }
2154
- const newObject = { ...object };
2155
- for (const [key, value] of Object.entries(object)) {
2156
- if (typeof value === 'string' && isValidJsonString(value)) {
2157
- newObject[key] = JSON.parse(value);
2158
- }
2159
- else {
2160
- newObject[key] = jsonStringsToJsons(value);
2161
- }
2162
- }
2163
- return newObject;
2164
- }
2165
- /**
2166
- * TODO: Type the return type correctly
2167
- */
2168
-
2169
2164
  /**
2170
2165
  * This error indicates problems parsing the format value
2171
2166
  *
@@ -2349,6 +2344,101 @@
2349
2344
  * Note: [💞] Ignore a discrepancy between file name and entity name
2350
2345
  */
2351
2346
 
2347
+ /**
2348
+ * Serializes an error into a [🚉] JSON-serializable object
2349
+ *
2350
+ * @public exported from `@promptbook/utils`
2351
+ */
2352
+ function serializeError(error) {
2353
+ const { name, message, stack } = error;
2354
+ const { id } = error;
2355
+ if (!Object.keys(ALL_ERRORS).includes(name)) {
2356
+ console.error(spaceTrim__default["default"]((block) => `
2357
+
2358
+ Cannot serialize error with name "${name}"
2359
+
2360
+ Authors of Promptbook probably forgot to add this error into the list of errors:
2361
+ https://github.com/webgptorg/promptbook/blob/main/src/errors/0-index.ts
2362
+
2363
+
2364
+ ${block(stack || message)}
2365
+
2366
+ `));
2367
+ }
2368
+ return {
2369
+ name: name,
2370
+ message,
2371
+ stack,
2372
+ id, // Include id in the serialized object
2373
+ };
2374
+ }
2375
+
2376
+ /**
2377
+ * Converts a JavaScript Object Notation (JSON) string into an object.
2378
+ *
2379
+ * Note: This is wrapper around `JSON.parse()` with better error and type handling
2380
+ *
2381
+ * @public exported from `@promptbook/utils`
2382
+ */
2383
+ function jsonParse(value) {
2384
+ if (value === undefined) {
2385
+ throw new Error(`Can not parse JSON from undefined value.`);
2386
+ }
2387
+ else if (typeof value !== 'string') {
2388
+ console.error('Can not parse JSON from non-string value.', { text: value });
2389
+ throw new Error(spaceTrim__default["default"](`
2390
+ Can not parse JSON from non-string value.
2391
+
2392
+ The value type: ${typeof value}
2393
+ See more in console.
2394
+ `));
2395
+ }
2396
+ try {
2397
+ return JSON.parse(value);
2398
+ }
2399
+ catch (error) {
2400
+ if (!(error instanceof Error)) {
2401
+ throw error;
2402
+ }
2403
+ throw new Error(spaceTrim__default["default"]((block) => `
2404
+ ${block(error.message)}
2405
+
2406
+ The JSON text:
2407
+ ${block(value)}
2408
+ `));
2409
+ }
2410
+ }
2411
+
2412
+ /**
2413
+ * Recursively converts JSON strings to JSON objects
2414
+
2415
+ * @public exported from `@promptbook/utils`
2416
+ */
2417
+ function jsonStringsToJsons(object) {
2418
+ if (object === null) {
2419
+ return object;
2420
+ }
2421
+ if (Array.isArray(object)) {
2422
+ return object.map(jsonStringsToJsons);
2423
+ }
2424
+ if (typeof object !== 'object') {
2425
+ return object;
2426
+ }
2427
+ const newObject = { ...object };
2428
+ for (const [key, value] of Object.entries(object)) {
2429
+ if (typeof value === 'string' && isValidJsonString(value)) {
2430
+ newObject[key] = jsonParse(value);
2431
+ }
2432
+ else {
2433
+ newObject[key] = jsonStringsToJsons(value);
2434
+ }
2435
+ }
2436
+ return newObject;
2437
+ }
2438
+ /**
2439
+ * TODO: Type the return type correctly
2440
+ */
2441
+
2352
2442
  /**
2353
2443
  * Deserializes the error object
2354
2444
  *
@@ -2514,64 +2604,6 @@
2514
2604
  * TODO: [🐚] Split into more files and make `PrepareTask` & `RemoteTask` + split the function
2515
2605
  */
2516
2606
 
2517
- /**
2518
- * Serializes an error into a [🚉] JSON-serializable object
2519
- *
2520
- * @public exported from `@promptbook/utils`
2521
- */
2522
- function serializeError(error) {
2523
- const { name, message, stack } = error;
2524
- const { id } = error;
2525
- if (!Object.keys(ALL_ERRORS).includes(name)) {
2526
- console.error(spaceTrim__default["default"]((block) => `
2527
-
2528
- Cannot serialize error with name "${name}"
2529
-
2530
- Authors of Promptbook probably forgot to add this error into the list of errors:
2531
- https://github.com/webgptorg/promptbook/blob/main/src/errors/0-index.ts
2532
-
2533
-
2534
- ${block(stack || message)}
2535
-
2536
- `));
2537
- }
2538
- return {
2539
- name: name,
2540
- message,
2541
- stack,
2542
- id, // Include id in the serialized object
2543
- };
2544
- }
2545
-
2546
- /**
2547
- * Async version of Array.forEach
2548
- *
2549
- * @param array - Array to iterate over
2550
- * @param options - Options for the function
2551
- * @param callbackfunction - Function to call for each item
2552
- * @public exported from `@promptbook/utils`
2553
- * @deprecated [🪂] Use queues instead
2554
- */
2555
- async function forEachAsync(array, options, callbackfunction) {
2556
- const { maxParallelCount = Infinity } = options;
2557
- let index = 0;
2558
- let runningTasks = [];
2559
- const tasks = [];
2560
- for (const item of array) {
2561
- const currentIndex = index++;
2562
- const task = callbackfunction(item, currentIndex, array);
2563
- tasks.push(task);
2564
- runningTasks.push(task);
2565
- /* not await */ Promise.resolve(task).then(() => {
2566
- runningTasks = runningTasks.filter((t) => t !== task);
2567
- });
2568
- if (maxParallelCount < runningTasks.length) {
2569
- await Promise.race(runningTasks);
2570
- }
2571
- }
2572
- await Promise.all(tasks);
2573
- }
2574
-
2575
2607
  /**
2576
2608
  * Represents the uncertain value
2577
2609
  *
@@ -2615,7 +2647,7 @@
2615
2647
  *
2616
2648
  * @public exported from `@promptbook/core`
2617
2649
  */
2618
- $deepFreeze({
2650
+ const UNCERTAIN_USAGE = $deepFreeze({
2619
2651
  price: UNCERTAIN_ZERO_VALUE,
2620
2652
  input: {
2621
2653
  tokensCount: UNCERTAIN_ZERO_VALUE,
@@ -2640,6 +2672,35 @@
2640
2672
  * Note: [💞] Ignore a discrepancy between file name and entity name
2641
2673
  */
2642
2674
 
2675
+ /**
2676
+ * Async version of Array.forEach
2677
+ *
2678
+ * @param array - Array to iterate over
2679
+ * @param options - Options for the function
2680
+ * @param callbackfunction - Function to call for each item
2681
+ * @public exported from `@promptbook/utils`
2682
+ * @deprecated [🪂] Use queues instead
2683
+ */
2684
+ async function forEachAsync(array, options, callbackfunction) {
2685
+ const { maxParallelCount = Infinity } = options;
2686
+ let index = 0;
2687
+ let runningTasks = [];
2688
+ const tasks = [];
2689
+ for (const item of array) {
2690
+ const currentIndex = index++;
2691
+ const task = callbackfunction(item, currentIndex, array);
2692
+ tasks.push(task);
2693
+ runningTasks.push(task);
2694
+ /* not await */ Promise.resolve(task).then(() => {
2695
+ runningTasks = runningTasks.filter((t) => t !== task);
2696
+ });
2697
+ if (maxParallelCount < runningTasks.length) {
2698
+ await Promise.race(runningTasks);
2699
+ }
2700
+ }
2701
+ await Promise.all(tasks);
2702
+ }
2703
+
2643
2704
  /**
2644
2705
  * Function `addUsage` will add multiple usages into one
2645
2706
  *
@@ -2986,27 +3047,48 @@
2986
3047
  pipeline: await collection.getPipelineByUrl('https://promptbook.studio/promptbook/prepare-persona.book'),
2987
3048
  tools,
2988
3049
  });
2989
- // TODO: [🚐] Make arrayable LLMs -> single LLM DRY
2990
3050
  const _llms = arrayableToArray(tools.llm);
2991
3051
  const llmTools = _llms.length === 1 ? _llms[0] : joinLlmExecutionTools(..._llms);
2992
- const availableModels = await llmTools.listModels();
2993
- const availableModelNames = availableModels
3052
+ const availableModels = (await llmTools.listModels())
2994
3053
  .filter(({ modelVariant }) => modelVariant === 'CHAT')
2995
- .map(({ modelName }) => modelName)
2996
- .join(',');
2997
- const result = await preparePersonaExecutor({ availableModelNames, personaDescription }).asPromise();
3054
+ .map(({ modelName, modelDescription }) => ({
3055
+ modelName,
3056
+ modelDescription,
3057
+ // <- Note: `modelTitle` and `modelVariant` is not relevant for this task
3058
+ }));
3059
+ const result = await preparePersonaExecutor({
3060
+ availableModels /* <- Note: Passing as JSON */,
3061
+ personaDescription,
3062
+ }).asPromise();
2998
3063
  const { outputParameters } = result;
2999
- const { modelRequirements: modelRequirementsRaw } = outputParameters;
3000
- const modelRequirements = JSON.parse(modelRequirementsRaw);
3064
+ const { modelsRequirements: modelsRequirementsJson } = outputParameters;
3065
+ let modelsRequirementsUnchecked = jsonParse(modelsRequirementsJson);
3001
3066
  if (isVerbose) {
3002
- console.info(`PERSONA ${personaDescription}`, modelRequirements);
3067
+ console.info(`PERSONA ${personaDescription}`, modelsRequirementsUnchecked);
3003
3068
  }
3004
- const { modelName, systemMessage, temperature } = modelRequirements;
3005
- return {
3069
+ if (!Array.isArray(modelsRequirementsUnchecked)) {
3070
+ // <- TODO: Book should have syntax and system to enforce shape of JSON
3071
+ modelsRequirementsUnchecked = [modelsRequirementsUnchecked];
3072
+ /*
3073
+ throw new UnexpectedError(
3074
+ spaceTrim(
3075
+ (block) => `
3076
+ Invalid \`modelsRequirements\`:
3077
+
3078
+ \`\`\`json
3079
+ ${block(JSON.stringify(modelsRequirementsUnchecked, null, 4))}
3080
+ \`\`\`
3081
+ `,
3082
+ ),
3083
+ );
3084
+ */
3085
+ }
3086
+ const modelsRequirements = modelsRequirementsUnchecked.map((modelRequirements) => ({
3006
3087
  modelVariant: 'CHAT',
3007
- modelName,
3008
- systemMessage,
3009
- temperature,
3088
+ ...modelRequirements,
3089
+ }));
3090
+ return {
3091
+ modelsRequirements,
3010
3092
  };
3011
3093
  }
3012
3094
  /**
@@ -3017,7 +3099,8 @@
3017
3099
  */
3018
3100
 
3019
3101
  /**
3020
- * @@@
3102
+ * Safely retrieves the global scope object (window in browser, global in Node.js)
3103
+ * regardless of the JavaScript environment in which the code is running
3021
3104
  *
3022
3105
  * Note: `$` is used to indicate that this function is not a pure function - it access global scope
3023
3106
  *
@@ -3028,10 +3111,10 @@
3028
3111
  }
3029
3112
 
3030
3113
  /**
3031
- * @@@
3114
+ * Normalizes a text string to SCREAMING_CASE (all uppercase with underscores).
3032
3115
  *
3033
- * @param text @@@
3034
- * @returns @@@
3116
+ * @param text The text string to be converted to SCREAMING_CASE format.
3117
+ * @returns The normalized text in SCREAMING_CASE format.
3035
3118
  * @example 'HELLO_WORLD'
3036
3119
  * @example 'I_LOVE_PROMPTBOOK'
3037
3120
  * @public exported from `@promptbook/utils`
@@ -3083,10 +3166,10 @@
3083
3166
  */
3084
3167
 
3085
3168
  /**
3086
- * @@@
3169
+ * Normalizes a text string to snake_case format.
3087
3170
  *
3088
- * @param text @@@
3089
- * @returns @@@
3171
+ * @param text The text string to be converted to snake_case format.
3172
+ * @returns The normalized text in snake_case format.
3090
3173
  * @example 'hello_world'
3091
3174
  * @example 'i_love_promptbook'
3092
3175
  * @public exported from `@promptbook/utils`
@@ -3096,11 +3179,11 @@
3096
3179
  }
3097
3180
 
3098
3181
  /**
3099
- * Register is @@@
3182
+ * Global registry for storing and managing registered entities of a given type.
3100
3183
  *
3101
3184
  * Note: `$` is used to indicate that this function is not a pure function - it accesses and adds variables in global scope.
3102
3185
  *
3103
- * @private internal utility, exported are only signleton instances of this class
3186
+ * @private internal utility, exported are only singleton instances of this class
3104
3187
  */
3105
3188
  class $Register {
3106
3189
  constructor(registerName) {
@@ -3144,10 +3227,10 @@
3144
3227
  }
3145
3228
 
3146
3229
  /**
3147
- * @@@
3230
+ * Global registry for storing metadata about all available scrapers and converters.
3148
3231
  *
3149
- * Note: `$` is used to indicate that this interacts with the global scope
3150
- * @singleton Only one instance of each register is created per build, but thare can be more @@@
3232
+ * Note: `$` is used to indicate that this interacts with the global scope.
3233
+ * @singleton Only one instance of each register is created per build, but there can be more in different contexts (e.g., tests).
3151
3234
  * @public exported from `@promptbook/core`
3152
3235
  */
3153
3236
  const $scrapersMetadataRegister = new $Register('scrapers_metadata');
@@ -3156,10 +3239,11 @@
3156
3239
  */
3157
3240
 
3158
3241
  /**
3159
- * @@@
3242
+ * Registry for all available scrapers in the system.
3243
+ * Central point for registering and accessing different types of content scrapers.
3160
3244
  *
3161
3245
  * Note: `$` is used to indicate that this interacts with the global scope
3162
- * @singleton Only one instance of each register is created per build, but thare can be more @@@
3246
+ * @singleton Only one instance of each register is created per build, but there can be more than one in different build modules
3163
3247
  * @public exported from `@promptbook/core`
3164
3248
  */
3165
3249
  const $scrapersRegister = new $Register('scraper_constructors');
@@ -3337,7 +3421,9 @@
3337
3421
  */
3338
3422
 
3339
3423
  /**
3340
- * @@@
3424
+ * Factory function that creates a handler for processing knowledge sources.
3425
+ * Provides standardized processing of different types of knowledge sources
3426
+ * across various scraper implementations.
3341
3427
  *
3342
3428
  * @public exported from `@promptbook/core`
3343
3429
  */
@@ -3444,7 +3530,7 @@
3444
3530
  > },
3445
3531
  */
3446
3532
  async asJson() {
3447
- return JSON.parse(await tools.fs.readFile(filename, 'utf-8'));
3533
+ return jsonParse(await tools.fs.readFile(filename, 'utf-8'));
3448
3534
  },
3449
3535
  async asText() {
3450
3536
  return await tools.fs.readFile(filename, 'utf-8');
@@ -3578,9 +3664,12 @@
3578
3664
  */
3579
3665
 
3580
3666
  /**
3581
- * @@@
3667
+ * Prepares tasks by adding knowledge to the prompt and ensuring all necessary parameters are included.
3582
3668
  *
3583
- * @public exported from `@promptbook/core`
3669
+ * @param tasks Sequence of tasks that are chained together to form a pipeline
3670
+ * @returns A promise that resolves to the prepared tasks.
3671
+ *
3672
+ * @private internal utility of `preparePipeline`
3584
3673
  */
3585
3674
  async function prepareTasks(pipeline, tools, options) {
3586
3675
  const { maxParallelCount = DEFAULT_MAX_PARALLEL_COUNT } = options;
@@ -3702,14 +3791,14 @@
3702
3791
  // TODO: [🖌][🧠] Implement some `mapAsync` function
3703
3792
  const preparedPersonas = new Array(personas.length);
3704
3793
  await forEachAsync(personas, { maxParallelCount /* <- TODO: [🪂] When there are subtasks, this maximul limit can be broken */ }, async (persona, index) => {
3705
- const modelRequirements = await preparePersona(persona.description, { ...tools, llm: llmToolsWithUsage }, {
3794
+ const { modelsRequirements } = await preparePersona(persona.description, { ...tools, llm: llmToolsWithUsage }, {
3706
3795
  rootDirname,
3707
3796
  maxParallelCount /* <- TODO: [🪂] */,
3708
3797
  isVerbose,
3709
3798
  });
3710
3799
  const preparedPersona = {
3711
3800
  ...persona,
3712
- modelRequirements,
3801
+ modelsRequirements,
3713
3802
  preparationIds: [/* TODO: [🧊] -> */ currentPreparation.id],
3714
3803
  // <- TODO: [🍙] Make some standard order of json properties
3715
3804
  };
@@ -4017,7 +4106,7 @@
4017
4106
  }
4018
4107
 
4019
4108
  /**
4020
- * @@@
4109
+ * Contains configuration options for parsing and generating CSV files, such as delimiters and quoting rules.
4021
4110
  *
4022
4111
  * @public exported from `@promptbook/core`
4023
4112
  */
@@ -4026,11 +4115,29 @@
4026
4115
  // encoding: 'utf-8',
4027
4116
  });
4028
4117
 
4118
+ /**
4119
+ * Converts a CSV string into an object
4120
+ *
4121
+ * Note: This is wrapper around `papaparse.parse()` with better autohealing
4122
+ *
4123
+ * @private - for now until `@promptbook/csv` is released
4124
+ */
4125
+ function csvParse(value /* <- TODO: string_csv */, settings, schema /* <- TODO: Make CSV Schemas */) {
4126
+ settings = { ...settings, ...MANDATORY_CSV_SETTINGS };
4127
+ // Note: Autoheal invalid '\n' characters
4128
+ if (settings.newline && !settings.newline.includes('\r') && value.includes('\r')) {
4129
+ console.warn('CSV string contains carriage return characters, but in the CSV settings the `newline` setting does not include them. Autohealing the CSV string.');
4130
+ value = value.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
4131
+ }
4132
+ const csv = papaparse.parse(value, settings);
4133
+ return csv;
4134
+ }
4135
+
4029
4136
  /**
4030
4137
  * Function to check if a string is valid CSV
4031
4138
  *
4032
4139
  * @param value The string to check
4033
- * @returns True if the string is a valid CSV string, false otherwise
4140
+ * @returns `true` if the string is a valid CSV string, false otherwise
4034
4141
  *
4035
4142
  * @public exported from `@promptbook/utils`
4036
4143
  */
@@ -4054,7 +4161,7 @@
4054
4161
  * @public exported from `@promptbook/core`
4055
4162
  * <- TODO: [🏢] Export from package `@promptbook/csv`
4056
4163
  */
4057
- const CsvFormatDefinition = {
4164
+ const CsvFormatParser = {
4058
4165
  formatName: 'CSV',
4059
4166
  aliases: ['SPREADSHEET', 'TABLE'],
4060
4167
  isValid(value, settings, schema) {
@@ -4066,12 +4173,12 @@
4066
4173
  heal(value, settings, schema) {
4067
4174
  throw new Error('Not implemented');
4068
4175
  },
4069
- subvalueDefinitions: [
4176
+ subvalueParsers: [
4070
4177
  {
4071
4178
  subvalueName: 'ROW',
4072
- async mapValues(value, outputParameterName, settings, mapCallback) {
4073
- // TODO: [👨🏾‍🤝‍👨🏼] DRY csv parsing
4074
- const csv = papaparse.parse(value, { ...settings, ...MANDATORY_CSV_SETTINGS });
4179
+ async mapValues(options) {
4180
+ const { value, outputParameterName, settings, mapCallback, onProgress } = options;
4181
+ const csv = csvParse(value, settings);
4075
4182
  if (csv.errors.length !== 0) {
4076
4183
  throw new CsvFormatError(spaceTrim__default["default"]((block) => `
4077
4184
  CSV parsing error
@@ -4086,23 +4193,37 @@
4086
4193
  ${block(value)}
4087
4194
  `));
4088
4195
  }
4089
- const mappedData = await Promise.all(csv.data.map(async (row, index) => {
4196
+ const mappedData = [];
4197
+ const length = csv.data.length;
4198
+ for (let index = 0; index < length; index++) {
4199
+ const row = csv.data[index];
4090
4200
  if (row[outputParameterName]) {
4091
4201
  throw new CsvFormatError(`Can not overwrite existing column "${outputParameterName}" in CSV row`);
4092
4202
  }
4093
- return {
4203
+ const mappedRow = {
4094
4204
  ...row,
4095
- [outputParameterName]: await mapCallback(row, index),
4205
+ [outputParameterName]: await mapCallback(row, index, length),
4096
4206
  };
4097
- }));
4207
+ mappedData.push(mappedRow);
4208
+ if (onProgress) {
4209
+ // Note: Report the CSV with all rows mapped so far
4210
+ /*
4211
+ // TODO: [🛕] Report progress with all the rows including the pending ones
4212
+ const progressData = mappedData.map((row, i) =>
4213
+ i > index ? { ...row, [outputParameterName]: PENDING_VALUE_PLACEHOLDER } : row,
4214
+ );
4215
+ */
4216
+ await onProgress(papaparse.unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS }));
4217
+ }
4218
+ }
4098
4219
  return papaparse.unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS });
4099
4220
  },
4100
4221
  },
4101
4222
  {
4102
4223
  subvalueName: 'CELL',
4103
- async mapValues(value, outputParameterName, settings, mapCallback) {
4104
- // TODO: [👨🏾‍🤝‍👨🏼] DRY csv parsing
4105
- const csv = papaparse.parse(value, { ...settings, ...MANDATORY_CSV_SETTINGS });
4224
+ async mapValues(options) {
4225
+ const { value, settings, mapCallback, onProgress } = options;
4226
+ const csv = csvParse(value, settings);
4106
4227
  if (csv.errors.length !== 0) {
4107
4228
  throw new CsvFormatError(spaceTrim__default["default"]((block) => `
4108
4229
  CSV parsing error
@@ -4118,9 +4239,9 @@
4118
4239
  `));
4119
4240
  }
4120
4241
  const mappedData = await Promise.all(csv.data.map(async (row, rowIndex) => {
4121
- return /* not await */ Promise.all(Object.entries(row).map(async ([key, value], columnIndex) => {
4242
+ return /* not await */ Promise.all(Object.entries(row).map(async ([key, value], columnIndex, array) => {
4122
4243
  const index = rowIndex * Object.keys(row).length + columnIndex;
4123
- return /* not await */ mapCallback({ [key]: value }, index);
4244
+ return /* not await */ mapCallback({ [key]: value }, index, array.length);
4124
4245
  }));
4125
4246
  }));
4126
4247
  return papaparse.unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS });
@@ -4129,10 +4250,10 @@
4129
4250
  ],
4130
4251
  };
4131
4252
  /**
4132
- * TODO: [🍓] In `CsvFormatDefinition` implement simple `isValid`
4133
- * TODO: [🍓] In `CsvFormatDefinition` implement partial `canBeValid`
4134
- * TODO: [🍓] In `CsvFormatDefinition` implement `heal
4135
- * TODO: [🍓] In `CsvFormatDefinition` implement `subvalueDefinitions`
4253
+ * TODO: [🍓] In `CsvFormatParser` implement simple `isValid`
4254
+ * TODO: [🍓] In `CsvFormatParser` implement partial `canBeValid`
4255
+ * TODO: [🍓] In `CsvFormatParser` implement `heal
4256
+ * TODO: [🍓] In `CsvFormatParser` implement `subvalueParsers`
4136
4257
  * TODO: [🏢] Allow to expect something inside CSV objects and other formats
4137
4258
  */
4138
4259
 
@@ -4141,7 +4262,7 @@
4141
4262
  *
4142
4263
  * @private still in development [🏢]
4143
4264
  */
4144
- const JsonFormatDefinition = {
4265
+ const JsonFormatParser = {
4145
4266
  formatName: 'JSON',
4146
4267
  mimeType: 'application/json',
4147
4268
  isValid(value, settings, schema) {
@@ -4153,28 +4274,28 @@
4153
4274
  heal(value, settings, schema) {
4154
4275
  throw new Error('Not implemented');
4155
4276
  },
4156
- subvalueDefinitions: [],
4277
+ subvalueParsers: [],
4157
4278
  };
4158
4279
  /**
4159
4280
  * TODO: [🧠] Maybe propper instance of object
4160
4281
  * TODO: [0] Make string_serialized_json
4161
4282
  * TODO: [1] Make type for JSON Settings and Schema
4162
4283
  * TODO: [🧠] What to use for validating JSONs - JSON Schema, ZoD, typescript types/interfaces,...?
4163
- * TODO: [🍓] In `JsonFormatDefinition` implement simple `isValid`
4164
- * TODO: [🍓] In `JsonFormatDefinition` implement partial `canBeValid`
4165
- * TODO: [🍓] In `JsonFormatDefinition` implement `heal
4166
- * TODO: [🍓] In `JsonFormatDefinition` implement `subvalueDefinitions`
4284
+ * TODO: [🍓] In `JsonFormatParser` implement simple `isValid`
4285
+ * TODO: [🍓] In `JsonFormatParser` implement partial `canBeValid`
4286
+ * TODO: [🍓] In `JsonFormatParser` implement `heal
4287
+ * TODO: [🍓] In `JsonFormatParser` implement `subvalueParsers`
4167
4288
  * TODO: [🏢] Allow to expect something inside JSON objects and other formats
4168
4289
  */
4169
4290
 
4170
4291
  /**
4171
4292
  * Definition for any text - this will be always valid
4172
4293
  *
4173
- * Note: This is not useful for validation, but for splitting and mapping with `subvalueDefinitions`
4294
+ * Note: This is not useful for validation, but for splitting and mapping with `subvalueParsers`
4174
4295
  *
4175
4296
  * @public exported from `@promptbook/core`
4176
4297
  */
4177
- const TextFormatDefinition = {
4298
+ const TextFormatParser = {
4178
4299
  formatName: 'TEXT',
4179
4300
  isValid(value) {
4180
4301
  return typeof value === 'string';
@@ -4183,19 +4304,20 @@
4183
4304
  return typeof partialValue === 'string';
4184
4305
  },
4185
4306
  heal() {
4186
- throw new UnexpectedError('It does not make sense to call `TextFormatDefinition.heal`');
4307
+ throw new UnexpectedError('It does not make sense to call `TextFormatParser.heal`');
4187
4308
  },
4188
- subvalueDefinitions: [
4309
+ subvalueParsers: [
4189
4310
  {
4190
4311
  subvalueName: 'LINE',
4191
- async mapValues(value, outputParameterName, settings, mapCallback) {
4312
+ async mapValues(options) {
4313
+ const { value, mapCallback, onProgress } = options;
4192
4314
  const lines = value.split('\n');
4193
- const mappedLines = await Promise.all(lines.map((lineContent, lineNumber) =>
4315
+ const mappedLines = await Promise.all(lines.map((lineContent, lineNumber, array) =>
4194
4316
  // TODO: [🧠] Maybe option to skip empty line
4195
4317
  /* not await */ mapCallback({
4196
4318
  lineContent,
4197
4319
  // TODO: [🧠] Maybe also put here `lineNumber`
4198
- }, lineNumber)));
4320
+ }, lineNumber, array.length)));
4199
4321
  return mappedLines.join('\n');
4200
4322
  },
4201
4323
  },
@@ -4205,10 +4327,10 @@
4205
4327
  /**
4206
4328
  * TODO: [1] Make type for XML Text and Schema
4207
4329
  * TODO: [🧠][🤠] Here should be all words, characters, lines, paragraphs, pages available as subvalues
4208
- * TODO: [🍓] In `TextFormatDefinition` implement simple `isValid`
4209
- * TODO: [🍓] In `TextFormatDefinition` implement partial `canBeValid`
4210
- * TODO: [🍓] In `TextFormatDefinition` implement `heal
4211
- * TODO: [🍓] In `TextFormatDefinition` implement `subvalueDefinitions`
4330
+ * TODO: [🍓] In `TextFormatParser` implement simple `isValid`
4331
+ * TODO: [🍓] In `TextFormatParser` implement partial `canBeValid`
4332
+ * TODO: [🍓] In `TextFormatParser` implement `heal
4333
+ * TODO: [🍓] In `TextFormatParser` implement `subvalueParsers`
4212
4334
  * TODO: [🏢] Allow to expect something inside each item of list and other formats
4213
4335
  */
4214
4336
 
@@ -4216,7 +4338,7 @@
4216
4338
  * Function to check if a string is valid XML
4217
4339
  *
4218
4340
  * @param value
4219
- * @returns True if the string is a valid XML string, false otherwise
4341
+ * @returns `true` if the string is a valid XML string, false otherwise
4220
4342
  *
4221
4343
  * @public exported from `@promptbook/utils`
4222
4344
  */
@@ -4241,7 +4363,7 @@
4241
4363
  *
4242
4364
  * @private still in development [🏢]
4243
4365
  */
4244
- const XmlFormatDefinition = {
4366
+ const XmlFormatParser = {
4245
4367
  formatName: 'XML',
4246
4368
  mimeType: 'application/xml',
4247
4369
  isValid(value, settings, schema) {
@@ -4253,17 +4375,17 @@
4253
4375
  heal(value, settings, schema) {
4254
4376
  throw new Error('Not implemented');
4255
4377
  },
4256
- subvalueDefinitions: [],
4378
+ subvalueParsers: [],
4257
4379
  };
4258
4380
  /**
4259
4381
  * TODO: [🧠] Maybe propper instance of object
4260
4382
  * TODO: [0] Make string_serialized_xml
4261
4383
  * TODO: [1] Make type for XML Settings and Schema
4262
4384
  * TODO: [🧠] What to use for validating XMLs - XSD,...
4263
- * TODO: [🍓] In `XmlFormatDefinition` implement simple `isValid`
4264
- * TODO: [🍓] In `XmlFormatDefinition` implement partial `canBeValid`
4265
- * TODO: [🍓] In `XmlFormatDefinition` implement `heal
4266
- * TODO: [🍓] In `XmlFormatDefinition` implement `subvalueDefinitions`
4385
+ * TODO: [🍓] In `XmlFormatParser` implement simple `isValid`
4386
+ * TODO: [🍓] In `XmlFormatParser` implement partial `canBeValid`
4387
+ * TODO: [🍓] In `XmlFormatParser` implement `heal
4388
+ * TODO: [🍓] In `XmlFormatParser` implement `subvalueParsers`
4267
4389
  * TODO: [🏢] Allow to expect something inside XML and other formats
4268
4390
  */
4269
4391
 
@@ -4272,24 +4394,19 @@
4272
4394
  *
4273
4395
  * @private internal index of `...` <- TODO [🏢]
4274
4396
  */
4275
- const FORMAT_DEFINITIONS = [
4276
- JsonFormatDefinition,
4277
- XmlFormatDefinition,
4278
- TextFormatDefinition,
4279
- CsvFormatDefinition,
4280
- ];
4397
+ const FORMAT_DEFINITIONS = [JsonFormatParser, XmlFormatParser, TextFormatParser, CsvFormatParser];
4281
4398
  /**
4282
4399
  * Note: [💞] Ignore a discrepancy between file name and entity name
4283
4400
  */
4284
4401
 
4285
4402
  /**
4286
- * Maps available parameters to expected parameters
4403
+ * Maps available parameters to expected parameters for a pipeline task.
4287
4404
  *
4288
4405
  * The strategy is:
4289
- * 1) @@@
4290
- * 2) @@@
4406
+ * 1) First, match parameters by name where both available and expected.
4407
+ * 2) Then, if there are unmatched expected and available parameters, map them by order.
4291
4408
  *
4292
- * @throws {PipelineExecutionError} @@@
4409
+ * @throws {PipelineExecutionError} If the number of unmatched expected and available parameters does not match, or mapping is ambiguous.
4293
4410
  * @private within the repository used in `createPipelineExecutor`
4294
4411
  */
4295
4412
  function mapAvailableToExpectedParameters(options) {
@@ -4312,7 +4429,7 @@
4312
4429
  else if (!availableParametersNames.has(parameterName) && expectedParameterNames.has(parameterName)) ;
4313
4430
  }
4314
4431
  if (expectedParameterNames.size === 0) {
4315
- // Note: [👨‍👨‍👧] Now we can freeze `mappedParameters` to prevent @@@
4432
+ // Note: [👨‍👨‍👧] Now we can freeze `mappedParameters` to prevent accidental modifications after mapping
4316
4433
  Object.freeze(mappedParameters);
4317
4434
  return mappedParameters;
4318
4435
  }
@@ -4343,7 +4460,7 @@
4343
4460
  for (let i = 0; i < expectedParameterNames.size; i++) {
4344
4461
  mappedParameters[expectedParameterNamesArray[i]] = availableParameters[availableParametersNamesArray[i]];
4345
4462
  }
4346
- // Note: [👨‍👨‍👧] Now we can freeze `mappedParameters` to prevent @@@
4463
+ // Note: [👨‍👨‍👧] Now we can freeze `mappedParameters` to prevent accidental modifications after mapping
4347
4464
  Object.freeze(mappedParameters);
4348
4465
  return mappedParameters;
4349
4466
  }
@@ -4447,7 +4564,7 @@
4447
4564
  }
4448
4565
  /**
4449
4566
  * TODO: Add some auto-healing logic + extract YAML, JSON5, TOML, etc.
4450
- * TODO: [🏢] Make this logic part of `JsonFormatDefinition` or `isValidJsonString`
4567
+ * TODO: [🏢] Make this logic part of `JsonFormatParser` or `isValidJsonString`
4451
4568
  */
4452
4569
 
4453
4570
  /**
@@ -4490,10 +4607,12 @@
4490
4607
  throw new PipelineExecutionError('Parameter is already opened or not closed');
4491
4608
  }
4492
4609
  if (parameters[parameterName] === undefined) {
4610
+ console.log('!!! templateParameters 1', { parameterName, template, parameters });
4493
4611
  throw new PipelineExecutionError(`Parameter \`{${parameterName}}\` is not defined`);
4494
4612
  }
4495
4613
  let parameterValue = parameters[parameterName];
4496
4614
  if (parameterValue === undefined) {
4615
+ console.log('!!! templateParameters 2', { parameterName, template, parameters });
4497
4616
  throw new PipelineExecutionError(`Parameter \`{${parameterName}}\` is not defined`);
4498
4617
  }
4499
4618
  parameterValue = valueToString(parameterValue);
@@ -4649,7 +4768,7 @@
4649
4768
  PAGES: countPages,
4650
4769
  };
4651
4770
  /**
4652
- * TODO: [🧠][🤠] This should be probbably as part of `TextFormatDefinition`
4771
+ * TODO: [🧠][🤠] This should be probbably as part of `TextFormatParser`
4653
4772
  * Note: [💞] Ignore a discrepancy between file name and entity name
4654
4773
  */
4655
4774
 
@@ -4677,13 +4796,17 @@
4677
4796
  }
4678
4797
  /**
4679
4798
  * TODO: [💝] Unite object for expecting amount and format
4680
- * TODO: [🧠][🤠] This should be part of `TextFormatDefinition`
4799
+ * TODO: [🧠][🤠] This should be part of `TextFormatParser`
4681
4800
  * Note: [💝] and [🤠] are interconnected together
4682
4801
  */
4683
4802
 
4684
4803
  /**
4685
- * @@@
4804
+ * Executes a pipeline task with multiple attempts, including joker and retry logic. Handles different task types
4805
+ * (prompt, script, dialog, etc.), applies postprocessing, checks expectations, and updates the execution report.
4806
+ * Throws errors if execution fails after all attempts.
4686
4807
  *
4808
+ * @param options - The options for execution, including task, parameters, pipeline, and configuration.
4809
+ * @returns The result string of the executed task.
4687
4810
  * @private internal utility of `createPipelineExecutor`
4688
4811
  */
4689
4812
  async function executeAttempts(options) {
@@ -4905,7 +5028,7 @@
4905
5028
  if (task.format) {
4906
5029
  if (task.format === 'JSON') {
4907
5030
  if (!isValidJsonString($ongoingTaskResult.$resultString || '')) {
4908
- // TODO: [🏢] Do more universally via `FormatDefinition`
5031
+ // TODO: [🏢] Do more universally via `FormatParser`
4909
5032
  try {
4910
5033
  $ongoingTaskResult.$resultString = extractJsonBlock($ongoingTaskResult.$resultString || '');
4911
5034
  }
@@ -5007,12 +5130,16 @@
5007
5130
  */
5008
5131
 
5009
5132
  /**
5010
- * @@@
5133
+ * Executes a pipeline task that requires mapping or iterating over subvalues of a parameter (such as rows in a CSV).
5134
+ * Handles format and subformat resolution, error handling, and progress reporting.
5135
+ *
5136
+ * @param options - Options for execution, including task details and progress callback.
5137
+ * @returns The result of the subvalue mapping or execution attempts.
5011
5138
  *
5012
5139
  * @private internal utility of `createPipelineExecutor`
5013
5140
  */
5014
5141
  async function executeFormatSubvalues(options) {
5015
- const { task, jokerParameterNames, parameters, priority, csvSettings, pipelineIdentification } = options;
5142
+ const { task, jokerParameterNames, parameters, priority, csvSettings, onProgress, pipelineIdentification } = options;
5016
5143
  if (task.foreach === undefined) {
5017
5144
  return /* not await */ executeAttempts(options);
5018
5145
  }
@@ -5043,16 +5170,16 @@
5043
5170
  ${block(pipelineIdentification)}
5044
5171
  `));
5045
5172
  }
5046
- const subvalueDefinition = formatDefinition.subvalueDefinitions.find((subvalueDefinition) => [subvalueDefinition.subvalueName, ...(subvalueDefinition.aliases || [])].includes(task.foreach.subformatName));
5047
- if (subvalueDefinition === undefined) {
5173
+ const subvalueParser = formatDefinition.subvalueParsers.find((subvalueParser) => [subvalueParser.subvalueName, ...(subvalueParser.aliases || [])].includes(task.foreach.subformatName));
5174
+ if (subvalueParser === undefined) {
5048
5175
  throw new UnexpectedError(
5049
5176
  // <- TODO: [🧠][🧐] Should be formats fixed per promptbook version or behave as plugins (=> change UnexpectedError)
5050
5177
  spaceTrim__default["default"]((block) => `
5051
5178
  Unsupported subformat name "${task.foreach.subformatName}" for format "${task.foreach.formatName}"
5052
5179
 
5053
5180
  Available subformat names for format "${formatDefinition.formatName}":
5054
- ${block(formatDefinition.subvalueDefinitions
5055
- .map((subvalueDefinition) => subvalueDefinition.subvalueName)
5181
+ ${block(formatDefinition.subvalueParsers
5182
+ .map((subvalueParser) => subvalueParser.subvalueName)
5056
5183
  .map((subvalueName) => `- ${subvalueName}`)
5057
5184
  .join('\n'))}
5058
5185
 
@@ -5066,53 +5193,83 @@
5066
5193
  formatSettings = csvSettings;
5067
5194
  // <- TODO: [🤹‍♂️] More universal, make simmilar pattern for other formats for example \n vs \r\n in text
5068
5195
  }
5069
- const resultString = await subvalueDefinition.mapValues(parameterValue, task.foreach.outputSubparameterName, formatSettings, async (subparameters, index) => {
5070
- let mappedParameters;
5071
- // TODO: [🤹‍♂️][🪂] Limit to N concurrent executions
5072
- // TODO: When done [🐚] Report progress also for each subvalue here
5073
- try {
5074
- mappedParameters = mapAvailableToExpectedParameters({
5075
- expectedParameters: Object.fromEntries(task.foreach.inputSubparameterNames.map((subparameterName) => [subparameterName, null])),
5076
- availableParameters: subparameters,
5077
- });
5078
- }
5079
- catch (error) {
5080
- if (!(error instanceof PipelineExecutionError)) {
5081
- throw error;
5196
+ const resultString = await subvalueParser.mapValues({
5197
+ value: parameterValue,
5198
+ outputParameterName: task.foreach.outputSubparameterName,
5199
+ settings: formatSettings,
5200
+ onProgress(partialResultString) {
5201
+ return onProgress(Object.freeze({
5202
+ [task.resultingParameterName]: partialResultString,
5203
+ }));
5204
+ },
5205
+ async mapCallback(subparameters, index, length) {
5206
+ let mappedParameters;
5207
+ try {
5208
+ mappedParameters = mapAvailableToExpectedParameters({
5209
+ expectedParameters: Object.fromEntries(task.foreach.inputSubparameterNames.map((subparameterName) => [subparameterName, null])),
5210
+ availableParameters: subparameters,
5211
+ });
5082
5212
  }
5083
- throw new PipelineExecutionError(spaceTrim__default["default"]((block) => `
5084
- ${error.message}
5213
+ catch (error) {
5214
+ if (!(error instanceof PipelineExecutionError)) {
5215
+ throw error;
5216
+ }
5217
+ const highLevelError = new PipelineExecutionError(spaceTrim__default["default"]((block) => `
5218
+ ${error.message}
5085
5219
 
5086
- This is error in FOREACH command
5087
- You have probbably passed wrong data to pipeline or wrong data was generated which are processed by FOREACH command
5220
+ This is error in FOREACH command when mapping ${formatDefinition.formatName} ${subvalueParser.subvalueName} data (${index + 1}/${length})
5221
+ You have probbably passed wrong data to pipeline or wrong data was generated which are processed by FOREACH command
5088
5222
 
5089
- ${block(pipelineIdentification)}
5090
- Subparameter index: ${index}
5091
- `));
5092
- }
5093
- const allSubparameters = {
5094
- ...parameters,
5095
- ...mappedParameters,
5096
- };
5097
- // Note: [👨‍👨‍👧] Now we can freeze `subparameters` because we are sure that all and only used parameters are defined and are not going to be changed
5098
- Object.freeze(allSubparameters);
5099
- const subresultString = await executeAttempts({
5100
- ...options,
5101
- priority: priority + index,
5102
- parameters: allSubparameters,
5103
- pipelineIdentification: spaceTrim__default["default"]((block) => `
5104
- ${block(pipelineIdentification)}
5105
- Subparameter index: ${index}
5106
- `),
5107
- });
5108
- return subresultString;
5223
+ ${block(pipelineIdentification)}
5224
+ `));
5225
+ if (length > BIG_DATASET_TRESHOLD) {
5226
+ console.error(highLevelError);
5227
+ return FAILED_VALUE_PLACEHOLDER;
5228
+ }
5229
+ throw highLevelError;
5230
+ }
5231
+ const allSubparameters = {
5232
+ ...parameters,
5233
+ ...mappedParameters,
5234
+ };
5235
+ Object.freeze(allSubparameters);
5236
+ try {
5237
+ const subresultString = await executeAttempts({
5238
+ ...options,
5239
+ priority: priority + index,
5240
+ parameters: allSubparameters,
5241
+ pipelineIdentification: spaceTrim__default["default"]((block) => `
5242
+ ${block(pipelineIdentification)}
5243
+ Subparameter index: ${index}
5244
+ `),
5245
+ });
5246
+ return subresultString;
5247
+ }
5248
+ catch (error) {
5249
+ if (length > BIG_DATASET_TRESHOLD) {
5250
+ console.error(spaceTrim__default["default"]((block) => `
5251
+ ${error.message}
5252
+
5253
+ This is error in FOREACH command when processing ${formatDefinition.formatName} ${subvalueParser.subvalueName} data (${index + 1}/${length})
5254
+
5255
+ ${block(pipelineIdentification)}
5256
+ `));
5257
+ return FAILED_VALUE_PLACEHOLDER;
5258
+ }
5259
+ throw error;
5260
+ }
5261
+ },
5109
5262
  });
5110
5263
  return resultString;
5111
5264
  }
5112
5265
 
5113
5266
  /**
5114
- * @@@
5267
+ * Returns the context for a given task, typically used to provide additional information or variables
5268
+ * required for the execution of the task within a pipeline. The context is returned as a string value
5269
+ * that may include markdown formatting.
5115
5270
  *
5271
+ * @param task - The task for which the context is being generated. This should be a deeply immutable TaskJson object.
5272
+ * @returns The context as a string, formatted as markdown and parameter value.
5116
5273
  * @private internal utility of `createPipelineExecutor`
5117
5274
  */
5118
5275
  async function getContextForTask(task) {
@@ -5120,7 +5277,7 @@
5120
5277
  }
5121
5278
 
5122
5279
  /**
5123
- * @@@
5280
+ * Retrieves example values or templates for a given task, used to guide or validate pipeline execution.
5124
5281
  *
5125
5282
  * @private internal utility of `createPipelineExecutor`
5126
5283
  */
@@ -5129,25 +5286,127 @@
5129
5286
  }
5130
5287
 
5131
5288
  /**
5132
- * @@@
5289
+ * Computes the cosine similarity between two embedding vectors
5290
+ *
5291
+ * Note: This is helping function for RAG (retrieval-augmented generation)
5292
+ *
5293
+ * @param embeddingVector1
5294
+ * @param embeddingVector2
5295
+ * @returns Cosine similarity between the two vectors
5296
+ *
5297
+ * @public exported from `@promptbook/core`
5298
+ */
5299
+ function computeCosineSimilarity(embeddingVector1, embeddingVector2) {
5300
+ if (embeddingVector1.length !== embeddingVector2.length) {
5301
+ throw new TypeError('Embedding vectors must have the same length');
5302
+ }
5303
+ const dotProduct = embeddingVector1.reduce((sum, value, index) => sum + value * embeddingVector2[index], 0);
5304
+ const magnitude1 = Math.sqrt(embeddingVector1.reduce((sum, value) => sum + value * value, 0));
5305
+ const magnitude2 = Math.sqrt(embeddingVector2.reduce((sum, value) => sum + value * value, 0));
5306
+ return 1 - dotProduct / (magnitude1 * magnitude2);
5307
+ }
5308
+
5309
+ /**
5310
+ *
5311
+ * @param knowledgePieces
5312
+ * @returns
5313
+ *
5314
+ * @private internal utility of `createPipelineExecutor`
5315
+ */
5316
+ function knowledgePiecesToString(knowledgePieces) {
5317
+ return knowledgePieces
5318
+ .map((knowledgePiece) => {
5319
+ const { content } = knowledgePiece;
5320
+ return `- ${content}`;
5321
+ })
5322
+ .join('\n');
5323
+ // <- TODO: [🧠] Some smarter aggregation of knowledge pieces, single-line vs multi-line vs mixed
5324
+ }
5325
+
5326
+ /**
5327
+ * Retrieves the most relevant knowledge pieces for a given task using embedding-based similarity search.
5328
+ * This is where retrieval-augmented generation (RAG) is performed to enhance the task with external knowledge.
5133
5329
  *
5134
5330
  * @private internal utility of `createPipelineExecutor`
5135
5331
  */
5136
5332
  async function getKnowledgeForTask(options) {
5137
- const { preparedPipeline, task } = options;
5138
- return preparedPipeline.knowledgePieces.map(({ content }) => `- ${content}`).join('\n');
5139
- // <- TODO: [🧠] Some smart aggregation of knowledge pieces, single-line vs multi-line vs mixed
5333
+ const { tools, preparedPipeline, task, parameters } = options;
5334
+ const firstKnowlegePiece = preparedPipeline.knowledgePieces[0];
5335
+ const firstKnowlegeIndex = firstKnowlegePiece === null || firstKnowlegePiece === void 0 ? void 0 : firstKnowlegePiece.index[0];
5336
+ // <- TODO: Do not use just first knowledge piece and first index to determine embedding model, use also keyword search
5337
+ if (firstKnowlegePiece === undefined || firstKnowlegeIndex === undefined) {
5338
+ return ''; // <- Note: Np knowledge present, return empty string
5339
+ }
5340
+ try {
5341
+ // TODO: [🚐] Make arrayable LLMs -> single LLM DRY
5342
+ const _llms = arrayableToArray(tools.llm);
5343
+ const llmTools = _llms.length === 1 ? _llms[0] : joinLlmExecutionTools(..._llms);
5344
+ const taskEmbeddingPrompt = {
5345
+ title: 'Knowledge Search',
5346
+ modelRequirements: {
5347
+ modelVariant: 'EMBEDDING',
5348
+ modelName: firstKnowlegeIndex.modelName,
5349
+ },
5350
+ content: task.content,
5351
+ parameters,
5352
+ };
5353
+ const taskEmbeddingResult = await llmTools.callEmbeddingModel(taskEmbeddingPrompt);
5354
+ const knowledgePiecesWithRelevance = preparedPipeline.knowledgePieces.map((knowledgePiece) => {
5355
+ const { index } = knowledgePiece;
5356
+ const knowledgePieceIndex = index.find((i) => i.modelName === firstKnowlegeIndex.modelName);
5357
+ // <- TODO: Do not use just first knowledge piece and first index to determine embedding model
5358
+ if (knowledgePieceIndex === undefined) {
5359
+ return {
5360
+ content: knowledgePiece.content,
5361
+ relevance: 0,
5362
+ };
5363
+ }
5364
+ const relevance = computeCosineSimilarity(knowledgePieceIndex.position, taskEmbeddingResult.content);
5365
+ return {
5366
+ content: knowledgePiece.content,
5367
+ relevance,
5368
+ };
5369
+ });
5370
+ const knowledgePiecesSorted = knowledgePiecesWithRelevance.sort((a, b) => a.relevance - b.relevance);
5371
+ const knowledgePiecesLimited = knowledgePiecesSorted.slice(0, 5);
5372
+ console.log('!!! Embedding', {
5373
+ task,
5374
+ taskEmbeddingPrompt,
5375
+ taskEmbeddingResult,
5376
+ firstKnowlegePiece,
5377
+ firstKnowlegeIndex,
5378
+ knowledgePiecesWithRelevance,
5379
+ knowledgePiecesSorted,
5380
+ knowledgePiecesLimited,
5381
+ });
5382
+ return knowledgePiecesToString(knowledgePiecesLimited);
5383
+ }
5384
+ catch (error) {
5385
+ assertsError(error);
5386
+ console.error('Error in `getKnowledgeForTask`', error);
5387
+ // Note: If the LLM fails, just return all knowledge pieces
5388
+ return knowledgePiecesToString(preparedPipeline.knowledgePieces);
5389
+ }
5140
5390
  }
5391
+ /**
5392
+ * TODO: !!!! Verify if this is working
5393
+ * TODO: [♨] Implement Better - use keyword search
5394
+ * TODO: [♨] Examples of values
5395
+ */
5141
5396
 
5142
5397
  /**
5143
- * @@@
5398
+ * Retrieves all reserved parameters for a given pipeline task, including context, knowledge, examples, and metadata.
5399
+ * Ensures all reserved parameters are defined and throws if any are missing.
5400
+ *
5401
+ * @param options - Options including tools, pipeline, task, and context.
5402
+ * @returns An object containing all reserved parameters for the task.
5144
5403
  *
5145
5404
  * @private internal utility of `createPipelineExecutor`
5146
5405
  */
5147
5406
  async function getReservedParametersForTask(options) {
5148
- const { preparedPipeline, task, pipelineIdentification } = options;
5407
+ const { tools, preparedPipeline, task, parameters, pipelineIdentification } = options;
5149
5408
  const context = await getContextForTask(); // <- [🏍]
5150
- const knowledge = await getKnowledgeForTask({ preparedPipeline, task });
5409
+ const knowledge = await getKnowledgeForTask({ tools, preparedPipeline, task, parameters });
5151
5410
  const examples = await getExamplesForTask();
5152
5411
  const currentDate = new Date().toISOString(); // <- TODO: [🧠][💩] Better
5153
5412
  const modelName = RESERVED_PARAMETER_MISSING_VALUE;
@@ -5173,23 +5432,21 @@
5173
5432
  }
5174
5433
 
5175
5434
  /**
5176
- * @@@
5435
+ * Executes a single task within a pipeline, handling parameter validation, error checking, and progress reporting.
5436
+ *
5437
+ * @param options - Options for execution, including the task, pipeline, parameters, and callbacks.
5438
+ * @returns The output parameters produced by the task.
5177
5439
  *
5178
5440
  * @private internal utility of `createPipelineExecutor`
5179
5441
  */
5180
5442
  async function executeTask(options) {
5181
5443
  const { currentTask, preparedPipeline, parametersToPass, tools, onProgress, $executionReport, pipelineIdentification, maxExecutionAttempts, maxParallelCount, csvSettings, isVerbose, rootDirname, cacheDirname, intermediateFilesStrategy, isAutoInstalled, isNotPreparedWarningSupressed, } = options;
5182
5444
  const priority = preparedPipeline.tasks.length - preparedPipeline.tasks.indexOf(currentTask);
5183
- await onProgress({
5184
- outputParameters: {
5185
- [currentTask.resultingParameterName]: '', // <- TODO: [🧠] What is the best value here?
5186
- },
5187
- });
5188
5445
  // Note: Check consistency of used and dependent parameters which was also done in `validatePipeline`, but it’s good to doublecheck
5189
5446
  const usedParameterNames = extractParameterNamesFromTask(currentTask);
5190
5447
  const dependentParameterNames = new Set(currentTask.dependentParameterNames);
5191
5448
  // TODO: [👩🏾‍🤝‍👩🏻] Use here `mapAvailableToExpectedParameters`
5192
- if (union(difference(usedParameterNames, dependentParameterNames), difference(dependentParameterNames, usedParameterNames)).size !== 0) {
5449
+ if (difference(union(difference(usedParameterNames, dependentParameterNames), difference(dependentParameterNames, usedParameterNames)), new Set(RESERVED_PARAMETER_NAMES)).size !== 0) {
5193
5450
  throw new UnexpectedError(spaceTrim.spaceTrim((block) => `
5194
5451
  Dependent parameters are not consistent with used parameters:
5195
5452
 
@@ -5209,9 +5466,11 @@
5209
5466
  }
5210
5467
  const definedParameters = Object.freeze({
5211
5468
  ...(await getReservedParametersForTask({
5469
+ tools,
5212
5470
  preparedPipeline,
5213
5471
  task: currentTask,
5214
5472
  pipelineIdentification,
5473
+ parameters: parametersToPass,
5215
5474
  })),
5216
5475
  ...parametersToPass,
5217
5476
  });
@@ -5257,6 +5516,7 @@
5257
5516
  preparedPipeline,
5258
5517
  tools,
5259
5518
  $executionReport,
5519
+ onProgress,
5260
5520
  pipelineIdentification,
5261
5521
  maxExecutionAttempts,
5262
5522
  maxParallelCount,
@@ -5284,7 +5544,8 @@
5284
5544
  */
5285
5545
 
5286
5546
  /**
5287
- * @@@
5547
+ * Filters and returns only the output parameters from the provided pipeline execution options.
5548
+ * Adds warnings for any expected output parameters that are missing.
5288
5549
  *
5289
5550
  * @private internal utility of `createPipelineExecutor`
5290
5551
  */
@@ -5309,9 +5570,12 @@
5309
5570
  }
5310
5571
 
5311
5572
  /**
5312
- * @@@
5573
+ * Executes an entire pipeline, resolving tasks in dependency order, handling errors, and reporting progress.
5313
5574
  *
5314
- * Note: This is not a `PipelineExecutor` (which is binded with one exact pipeline), but a utility function of `createPipelineExecutor` which creates `PipelineExecutor`
5575
+ * Note: This is not a `PipelineExecutor` (which is bound to a single pipeline), but a utility function used by `createPipelineExecutor` to create a `PipelineExecutor`.
5576
+ *
5577
+ * @param options - Options for execution, including input parameters, pipeline, and callbacks.
5578
+ * @returns The result of the pipeline execution, including output parameters, errors, and usage statistics.
5315
5579
  *
5316
5580
  * @private internal utility of `createPipelineExecutor`
5317
5581
  */
@@ -5634,6 +5898,22 @@
5634
5898
  cacheDirname,
5635
5899
  intermediateFilesStrategy,
5636
5900
  isAutoInstalled,
5901
+ }).catch((error) => {
5902
+ assertsError(error);
5903
+ return exportJson({
5904
+ name: 'pipelineExecutorResult',
5905
+ message: `Unuccessful PipelineExecutorResult, last catch`,
5906
+ order: [],
5907
+ value: {
5908
+ isSuccessful: false,
5909
+ errors: [serializeError(error)],
5910
+ warnings: [],
5911
+ usage: UNCERTAIN_USAGE,
5912
+ executionReport: null,
5913
+ outputParameters: {},
5914
+ preparedPipeline,
5915
+ },
5916
+ });
5637
5917
  });
5638
5918
  };
5639
5919
  const pipelineExecutor = (inputParameters) => createTask({
@@ -5894,8 +6174,8 @@
5894
6174
  extension: 'md',
5895
6175
  isVerbose,
5896
6176
  });
5897
- // TODO: @@@ Preserve, delete or modify
5898
- // Note: Running Pandoc ONLY if the file in the cache does not exist
6177
+ // TODO: Determine if Markitdown conversion should run only if the cache file doesn't exist, or always.
6178
+ // Note: Running Markitdown conversion ONLY if the file in the cache does not exist
5899
6179
  if (!(await isFileExisting(cacheFilehandler.filename, this.tools.fs))) {
5900
6180
  const src = source.filename || source.url || null;
5901
6181
  // console.log('!!', { src, source, cacheFilehandler });
@@ -5917,11 +6197,11 @@
5917
6197
  return cacheFilehandler;
5918
6198
  }
5919
6199
  /**
5920
- * Scrapes the docx file and returns the knowledge pieces or `null` if it can't scrape it
6200
+ * Scrapes the source document (PDF, DOCX, etc.) and returns the knowledge pieces or `null` if it can't scrape it.
5921
6201
  */
5922
6202
  async scrape(source) {
5923
6203
  const cacheFilehandler = await this.$convert(source);
5924
- // TODO: @@@ Preserve, delete or modify
6204
+ // TODO: Ensure this correctly creates the source object for the internal MarkdownScraper using the converted file.
5925
6205
  const markdownSource = {
5926
6206
  source: source.source,
5927
6207
  filename: cacheFilehandler.filename,
@@ -6065,7 +6345,8 @@
6065
6345
  */
6066
6346
 
6067
6347
  /**
6068
- * @@@
6348
+ * Factory function to create an instance of PdfScraper.
6349
+ * It bundles the scraper class with its metadata.
6069
6350
  *
6070
6351
  * @public exported from `@promptbook/pdf`
6071
6352
  */