@promptbook/core 0.72.0-6 → 0.72.0-7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. package/README.md +6 -0
  2. package/esm/index.es.js +1512 -453
  3. package/esm/index.es.js.map +1 -1
  4. package/esm/typings/src/_packages/browser.index.d.ts +1 -1
  5. package/esm/typings/src/_packages/core.index.d.ts +24 -10
  6. package/esm/typings/src/_packages/node.index.d.ts +6 -2
  7. package/esm/typings/src/_packages/types.index.d.ts +28 -20
  8. package/esm/typings/src/cli/cli-commands/about.d.ts +1 -1
  9. package/esm/typings/src/cli/cli-commands/hello.d.ts +2 -1
  10. package/esm/typings/src/cli/cli-commands/make.d.ts +1 -1
  11. package/esm/typings/src/cli/cli-commands/prettify.d.ts +2 -1
  12. package/esm/typings/src/cli/cli-commands/test-command.d.ts +13 -0
  13. package/esm/typings/src/cli/main.d.ts +1 -1
  14. package/esm/typings/src/cli/promptbookCli.d.ts +1 -1
  15. package/esm/typings/src/collection/constructors/createCollectionFromDirectory.d.ts +8 -5
  16. package/esm/typings/src/commands/EXPECT/expectCommandParser.d.ts +1 -1
  17. package/esm/typings/src/commands/FOREACH/foreachCommandParser.d.ts +1 -1
  18. package/esm/typings/src/commands/FORMAT/formatCommandParser.d.ts +1 -1
  19. package/esm/typings/src/commands/JOKER/jokerCommandParser.d.ts +1 -1
  20. package/esm/typings/src/commands/KNOWLEDGE/knowledgeCommandParser.d.ts +1 -1
  21. package/esm/typings/src/commands/KNOWLEDGE/utils/sourceContentToName.d.ts +11 -0
  22. package/esm/typings/src/commands/KNOWLEDGE/utils/sourceContentToName.test.d.ts +4 -0
  23. package/esm/typings/src/commands/MODEL/modelCommandParser.d.ts +1 -1
  24. package/esm/typings/src/commands/PARAMETER/parameterCommandParser.d.ts +1 -1
  25. package/esm/typings/src/commands/PERSONA/personaCommandParser.d.ts +1 -1
  26. package/esm/typings/src/commands/POSTPROCESS/postprocessCommandParser.d.ts +1 -1
  27. package/esm/typings/src/commands/PROMPTBOOK_VERSION/promptbookVersionCommandParser.d.ts +1 -1
  28. package/esm/typings/src/commands/TEMPLATE/templateCommandParser.d.ts +1 -1
  29. package/esm/typings/src/commands/URL/urlCommandParser.d.ts +1 -1
  30. package/esm/typings/src/commands/X_ACTION/actionCommandParser.d.ts +1 -1
  31. package/esm/typings/src/commands/X_INSTRUMENT/instrumentCommandParser.d.ts +1 -1
  32. package/esm/typings/src/commands/_BOILERPLATE/boilerplateCommandParser.d.ts +1 -1
  33. package/esm/typings/src/config.d.ts +10 -0
  34. package/esm/typings/src/conversion/pipelineStringToJson.d.ts +2 -15
  35. package/esm/typings/src/conversion/validation/_importPipeline.d.ts +1 -1
  36. package/esm/typings/src/conversion/validation/validatePipeline.d.ts +5 -5
  37. package/esm/typings/src/{knowledge/dialogs → dialogs}/callback/CallbackInterfaceTools.d.ts +2 -2
  38. package/esm/typings/src/{knowledge/dialogs → dialogs}/callback/CallbackInterfaceToolsOptions.d.ts +2 -2
  39. package/esm/typings/src/{knowledge/dialogs → dialogs}/simple-prompt/SimplePromptInterfaceTools.d.ts +4 -4
  40. package/esm/typings/src/errors/KnowledgeScrapeError.d.ts +9 -0
  41. package/esm/typings/src/errors/MissingToolsError.d.ts +9 -0
  42. package/esm/typings/src/execution/ExecutionTools.d.ts +3 -3
  43. package/esm/typings/src/execution/createPipelineExecutor/00-CreatePipelineExecutorOptions.d.ts +5 -2
  44. package/esm/typings/src/execution/createPipelineExecutor/00-CreatePipelineExecutorSettings.d.ts +2 -13
  45. package/esm/typings/src/execution/createPipelineExecutor/00-createPipelineExecutor.d.ts +1 -1
  46. package/esm/typings/src/execution/createPipelineExecutor/10-executePipeline.d.ts +1 -1
  47. package/esm/typings/src/execution/translation/automatic-translate/translateMessages.d.ts +3 -0
  48. package/esm/typings/src/execution/utils/forEachAsync.d.ts +1 -0
  49. package/esm/typings/src/llm-providers/_common/createLlmToolsFromConfigurationFromEnv.d.ts +1 -1
  50. package/esm/typings/src/llm-providers/_common/createLlmToolsFromEnv.d.ts +1 -1
  51. package/esm/typings/src/llm-providers/_common/getLlmToolsForCli.d.ts +1 -1
  52. package/esm/typings/src/llm-providers/anthropic-claude/playground/playground.d.ts +1 -0
  53. package/esm/typings/src/llm-providers/azure-openai/AzureOpenAiExecutionTools.d.ts +6 -0
  54. package/esm/typings/src/llm-providers/azure-openai/playground/playground.d.ts +1 -0
  55. package/esm/typings/src/llm-providers/langtail/playground/playground.d.ts +3 -0
  56. package/esm/typings/src/llm-providers/multiple/playground/playground.d.ts +3 -0
  57. package/esm/typings/src/llm-providers/openai/playground/playground.d.ts +1 -0
  58. package/esm/typings/src/llm-providers/remote/playground/playground.d.ts +3 -0
  59. package/esm/typings/src/personas/preparePersona.d.ts +2 -2
  60. package/esm/typings/src/prepare/PrepareAndScrapeOptions.d.ts +67 -0
  61. package/esm/typings/src/prepare/preparePipeline.d.ts +2 -2
  62. package/esm/typings/src/prepare/prepareTemplates.d.ts +2 -2
  63. package/esm/typings/src/scrapers/_common/Converter.d.ts +28 -0
  64. package/esm/typings/src/scrapers/_common/Scraper.d.ts +71 -0
  65. package/esm/typings/src/scrapers/_common/ScraperIntermediateSource.d.ts +11 -0
  66. package/esm/typings/src/{knowledge/prepare-knowledge → scrapers}/_common/prepareKnowledgePieces.d.ts +4 -4
  67. package/esm/typings/src/scrapers/_common/utils/getScraperIntermediateSource.d.ts +33 -0
  68. package/esm/typings/src/scrapers/_common/utils/getScraperIntermediateSource.test.d.ts +4 -0
  69. package/esm/typings/src/scrapers/_common/utils/makeKnowledgeSourceHandler.d.ts +10 -0
  70. package/esm/typings/src/scrapers/document/documentScraper.d.ts +37 -0
  71. package/esm/typings/src/scrapers/document/documentScraper.test.d.ts +4 -0
  72. package/esm/typings/src/scrapers/document/playground/document-scraper-playground.d.ts +5 -0
  73. package/esm/typings/src/scrapers/document-legacy/legacyDocumentScraper.d.ts +37 -0
  74. package/esm/typings/src/scrapers/document-legacy/legacyDocumentScraper.test.d.ts +4 -0
  75. package/esm/typings/src/scrapers/document-legacy/playground/legacy-document-scraper-playground.d.ts +5 -0
  76. package/esm/typings/src/scrapers/index.d.ts +7 -0
  77. package/esm/typings/src/scrapers/markdown/markdownScraper.d.ts +29 -0
  78. package/esm/typings/src/scrapers/markdown/playground/markdown-scraper-playground.d.ts +5 -0
  79. package/esm/typings/src/scrapers/pdf/pdfScraper.d.ts +35 -0
  80. package/esm/typings/src/scrapers/pdf/playground/pdf-scraper-playground.d.ts +5 -0
  81. package/esm/typings/src/scrapers/website/playground/website-scraper-playground.d.ts +5 -0
  82. package/esm/typings/src/scrapers/website/utils/markdownConverter.d.ts +12 -0
  83. package/esm/typings/src/scrapers/website/websiteScraper.d.ts +43 -0
  84. package/esm/typings/src/storage/{files-storage/FilesStorage.d.ts → file-cache-storage/FileCacheStorage.d.ts} +5 -5
  85. package/esm/typings/src/storage/file-cache-storage/FileCacheStorageOptions.d.ts +10 -0
  86. package/esm/typings/src/storage/{files-storage → file-cache-storage}/utils/nameToSubfolderPath.d.ts +1 -1
  87. package/esm/typings/src/storage/file-cache-storage/utils/nameToSubfolderPath.test.d.ts +1 -0
  88. package/esm/typings/src/storage/local-storage/getLocalStorage.d.ts +1 -1
  89. package/esm/typings/src/storage/local-storage/getSessionStorage.d.ts +1 -1
  90. package/esm/typings/src/types/PipelineJson/KnowledgeSourceJson.d.ts +9 -2
  91. package/esm/typings/src/types/PipelineJson/PipelineJson.d.ts +2 -2
  92. package/esm/typings/src/types/typeAliases.d.ts +8 -11
  93. package/esm/typings/src/utils/execCommand/$execCommand.d.ts +14 -0
  94. package/esm/typings/src/utils/execCommand/$execCommands.d.ts +17 -0
  95. package/esm/typings/src/utils/execCommand/IExecCommandOptions.d.ts +23 -0
  96. package/esm/typings/src/utils/execCommand/execCommand.test.d.ts +1 -0
  97. package/esm/typings/src/utils/execCommand/execCommandNormalizeOptions.d.ts +10 -0
  98. package/esm/typings/src/utils/execCommand/execCommandNormalizeOptions.test.d.ts +1 -0
  99. package/esm/typings/src/utils/files/$isDirectoryExisting.d.ts +3 -3
  100. package/esm/typings/src/utils/files/$isFileExisting.d.ts +3 -3
  101. package/esm/typings/src/utils/files/$listAllFiles.d.ts +5 -4
  102. package/esm/typings/src/utils/files/extensionToMimeType.d.ts +8 -0
  103. package/esm/typings/src/utils/files/extensionToMimeType.test.d.ts +1 -0
  104. package/esm/typings/src/utils/files/getFileExtension.d.ts +8 -0
  105. package/esm/typings/src/utils/files/getFileExtension.test.d.ts +1 -0
  106. package/esm/typings/src/utils/validators/filePath/isValidFilePath.d.ts +2 -2
  107. package/package.json +6 -1
  108. package/umd/index.umd.js +1517 -458
  109. package/umd/index.umd.js.map +1 -1
  110. package/esm/typings/src/knowledge/prepare-knowledge/_common/Scraper.d.ts +0 -37
  111. package/esm/typings/src/knowledge/prepare-knowledge/markdown/playground/markdown-knowledge-playground.d.ts +0 -2
  112. package/esm/typings/src/knowledge/prepare-knowledge/markdown/prepareKnowledgeFromMarkdown.d.ts +0 -14
  113. package/esm/typings/src/knowledge/prepare-knowledge/pdf/prepareKnowledgeFromPdf.d.ts +0 -15
  114. package/esm/typings/src/prepare/PrepareOptions.d.ts +0 -22
  115. package/esm/typings/src/storage/files-storage/FilesStorageOptions.d.ts +0 -10
  116. /package/esm/typings/src/{knowledge/dialogs → dialogs}/user-interface-execution-tools.test.d.ts +0 -0
  117. /package/esm/typings/src/{knowledge/prepare-knowledge → scrapers}/_common/prepareKnowledgePieces.test.d.ts +0 -0
  118. /package/esm/typings/src/{knowledge/prepare-knowledge/markdown/prepareKnowledgeFromMarkdown.test.d.ts → scrapers/markdown/markdownScraper.test.d.ts} +0 -0
  119. /package/esm/typings/src/{knowledge/prepare-knowledge/pdf/prepareKnowledgeFromPdf.test.d.ts → scrapers/website/utils/markdownConverter.test.d.ts} +0 -0
  120. /package/esm/typings/src/{storage/files-storage/utils/nameToSubfolderPath.test.d.ts → scrapers/website/websiteScraper.test.d.ts} +0 -0
package/esm/index.es.js CHANGED
@@ -1,9 +1,18 @@
1
1
  import spaceTrim, { spaceTrim as spaceTrim$1 } from 'spacetrim';
2
2
  import { format } from 'prettier';
3
3
  import parserHtml from 'prettier/parser-html';
4
+ import { stat, access, constants, mkdir, rm, readFile, rmdir, rename, readdir, writeFile } from 'fs/promises';
5
+ import { basename, join, dirname } from 'path';
6
+ import { spawn } from 'child_process';
7
+ import colors from 'colors';
4
8
  import { forTime } from 'waitasecond';
5
- import { unparse, parse } from 'papaparse';
9
+ import { SHA256 } from 'crypto-js';
6
10
  import hexEncoder from 'crypto-js/enc-hex';
11
+ import { unparse, parse } from 'papaparse';
12
+ import { Readability } from '@mozilla/readability';
13
+ import { JSDOM } from 'jsdom';
14
+ import { Converter } from 'showdown';
15
+ import { lookup } from 'mime-types';
7
16
  import sha256 from 'crypto-js/sha256';
8
17
  import moment from 'moment';
9
18
 
@@ -11,7 +20,7 @@ import moment from 'moment';
11
20
  /**
12
21
  * The version of the Promptbook library
13
22
  */
14
- var PROMPTBOOK_VERSION = '0.72.0-5';
23
+ var PROMPTBOOK_VERSION = '0.72.0-6';
15
24
  // TODO: [main] !!!! List here all the versions and annotate + put into script
16
25
 
17
26
  /*! *****************************************************************************
@@ -683,9 +692,19 @@ var MAX_KNOWLEDGE_SOURCES_SCRAPING_TOTAL = 200;
683
692
  /**
684
693
  * Where to store the cache of executions for promptbook CLI
685
694
  *
695
+ * Note: When the folder does not exist, it is created recursively
696
+ *
686
697
  * @public exported from `@promptbook/core`
687
698
  */
688
699
  var EXECUTIONS_CACHE_DIRNAME = '/.promptbook/executions-cache';
700
+ /**
701
+ * Where to store the scrape cache
702
+ *
703
+ * Note: When the folder does not exist, it is created recursively
704
+ *
705
+ * @public exported from `@promptbook/core`
706
+ */
707
+ var SCRAPE_CACHE_DIRNAME = '/.promptbook/scrape-cache';
689
708
  /**
690
709
  * The name of the builded pipeline collection made by CLI `ptbk make` and for lookup in `createCollectionFromDirectory`
691
710
  *
@@ -1215,11 +1234,11 @@ function validatePipelineCore(pipeline) {
1215
1234
  * > ex port function validatePipeline(promptbook: really_unknown): asserts promptbook is PipelineJson {
1216
1235
  */
1217
1236
  /**
1218
- * TODO: [🐣][main] !!!! Validate that all samples match expectations
1219
- * TODO: [🐣][🐝][main] !!!! Validate that knowledge is valid (non-void)
1220
- * TODO: [🐣][main] !!!! Validate that persona can be used only with CHAT variant
1221
- * TODO: [🐣][main] !!!! Validate that parameter with reserved name not used RESERVED_PARAMETER_NAMES
1222
- * TODO: [🐣][main] !!!! Validate that reserved parameter is not used as joker
1237
+ * TODO: [🧳][main] !!!! Validate that all samples match expectations
1238
+ * TODO: [🧳][🐝][main] !!!! Validate that knowledge is valid (non-void)
1239
+ * TODO: [🧳][main] !!!! Validate that persona can be used only with CHAT variant
1240
+ * TODO: [🧳][main] !!!! Validate that parameter with reserved name not used RESERVED_PARAMETER_NAMES
1241
+ * TODO: [🧳][main] !!!! Validate that reserved parameter is not used as joker
1223
1242
  * TODO: [🧠] Validation not only logic itself but imports around - files and websites and rerefenced pipelines exists
1224
1243
  * TODO: [🛠] Actions, instruments (and maybe knowledge) => Functions and tools
1225
1244
  */
@@ -1634,6 +1653,22 @@ var TemplateTypes = [
1634
1653
  // <- [🅱]
1635
1654
  ];
1636
1655
 
1656
+ /**
1657
+ * This error indicates that the promptbook can not retrieve knowledge from external sources
1658
+ *
1659
+ * @public exported from `@promptbook/core`
1660
+ */
1661
+ var KnowledgeScrapeError = /** @class */ (function (_super) {
1662
+ __extends(KnowledgeScrapeError, _super);
1663
+ function KnowledgeScrapeError(message) {
1664
+ var _this = _super.call(this, message) || this;
1665
+ _this.name = 'KnowledgeScrapeError';
1666
+ Object.setPrototypeOf(_this, KnowledgeScrapeError.prototype);
1667
+ return _this;
1668
+ }
1669
+ return KnowledgeScrapeError;
1670
+ }(Error));
1671
+
1637
1672
  /**
1638
1673
  * Async version of Array.forEach
1639
1674
  *
@@ -1641,6 +1676,7 @@ var TemplateTypes = [
1641
1676
  * @param options - Options for the function
1642
1677
  * @param callbackfunction - Function to call for each item
1643
1678
  * @public exported from `@promptbook/utils`
1679
+ * @deprecated [🪂] Use queues instead
1644
1680
  */
1645
1681
  function forEachAsync(array, options, callbackfunction) {
1646
1682
  return __awaiter(this, void 0, void 0, function () {
@@ -1710,59 +1746,246 @@ function forEachAsync(array, options, callbackfunction) {
1710
1746
  }
1711
1747
 
1712
1748
  /**
1713
- * Represents the usage with no resources consumed
1749
+ * This error type indicates that some tools are missing for pipeline execution or preparation
1714
1750
  *
1715
1751
  * @public exported from `@promptbook/core`
1716
1752
  */
1717
- var ZERO_USAGE = $deepFreeze({
1718
- price: { value: 0 },
1719
- input: {
1720
- tokensCount: { value: 0 },
1721
- charactersCount: { value: 0 },
1722
- wordsCount: { value: 0 },
1723
- sentencesCount: { value: 0 },
1724
- linesCount: { value: 0 },
1725
- paragraphsCount: { value: 0 },
1726
- pagesCount: { value: 0 },
1727
- },
1728
- output: {
1729
- tokensCount: { value: 0 },
1730
- charactersCount: { value: 0 },
1731
- wordsCount: { value: 0 },
1732
- sentencesCount: { value: 0 },
1733
- linesCount: { value: 0 },
1734
- paragraphsCount: { value: 0 },
1735
- pagesCount: { value: 0 },
1736
- },
1737
- });
1753
+ var MissingToolsError = /** @class */ (function (_super) {
1754
+ __extends(MissingToolsError, _super);
1755
+ function MissingToolsError(message) {
1756
+ var _this = _super.call(this, spaceTrim$1(function (block) { return "\n ".concat(block(message), "\n\n Note: You have probbably forgot to provide some tools for pipeline execution or preparation\n\n "); })) || this;
1757
+ _this.name = 'MissingToolsError';
1758
+ Object.setPrototypeOf(_this, MissingToolsError.prototype);
1759
+ return _this;
1760
+ }
1761
+ return MissingToolsError;
1762
+ }(Error));
1763
+
1738
1764
  /**
1739
- * Represents the usage with unknown resources consumed
1765
+ * Detects if the code is running in a Node.js environment
1766
+ *
1767
+ * Note: `$` is used to indicate that this function is not a pure function - it looks at the global object to determine the environment
1768
+ *
1769
+ * @public exported from `@promptbook/utils`
1770
+ */
1771
+ var $isRunningInNode = new Function("\n try {\n return this === global;\n } catch (e) {\n return false;\n }\n");
1772
+
1773
+ /**
1774
+ * This error type indicates that you try to use a feature that is not available in the current environment
1740
1775
  *
1741
1776
  * @public exported from `@promptbook/core`
1742
1777
  */
1743
- var UNCERTAIN_USAGE = $deepFreeze({
1744
- price: { value: 0, isUncertain: true },
1745
- input: {
1746
- tokensCount: { value: 0, isUncertain: true },
1747
- charactersCount: { value: 0, isUncertain: true },
1748
- wordsCount: { value: 0, isUncertain: true },
1749
- sentencesCount: { value: 0, isUncertain: true },
1750
- linesCount: { value: 0, isUncertain: true },
1751
- paragraphsCount: { value: 0, isUncertain: true },
1752
- pagesCount: { value: 0, isUncertain: true },
1753
- },
1754
- output: {
1755
- tokensCount: { value: 0, isUncertain: true },
1756
- charactersCount: { value: 0, isUncertain: true },
1757
- wordsCount: { value: 0, isUncertain: true },
1758
- sentencesCount: { value: 0, isUncertain: true },
1759
- linesCount: { value: 0, isUncertain: true },
1760
- paragraphsCount: { value: 0, isUncertain: true },
1761
- pagesCount: { value: 0, isUncertain: true },
1762
- },
1763
- });
1778
+ var EnvironmentMismatchError = /** @class */ (function (_super) {
1779
+ __extends(EnvironmentMismatchError, _super);
1780
+ function EnvironmentMismatchError(message) {
1781
+ var _this = _super.call(this, message) || this;
1782
+ _this.name = 'EnvironmentMismatchError';
1783
+ Object.setPrototypeOf(_this, EnvironmentMismatchError.prototype);
1784
+ return _this;
1785
+ }
1786
+ return EnvironmentMismatchError;
1787
+ }(Error));
1764
1788
 
1765
- var PipelineCollection = [{title:"Prepare Knowledge from Markdown",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.ptbk.md",parameters:[{name:"knowledgeContent",description:"Markdown document content",isInput:true,isOutput:false},{name:"knowledgePieces",description:"The knowledge JSON object",isInput:false,isOutput:true}],templates:[{templateType:"PROMPT_TEMPLATE",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, extract the important knowledge from the document.\n\n# Rules\n\n- Make pieces of information concise, clear, and easy to understand\n- One piece of information should be approximately 1 paragraph\n- Divide the paragraphs by markdown horizontal lines ---\n- Omit irrelevant information\n- Group redundant information\n- Write just extracted information, nothing else\n\n# The document\n\nTake information from this document:\n\n> {knowledgeContent}",resultingParameterName:"knowledgePieces",dependentParameterNames:["knowledgeContent"]}],knowledgeSources:[],knowledgePieces:[],personas:[],preparations:[],sourceFile:"./promptbook-collection/prepare-knowledge-from-markdown.ptbk.md"},{title:"Prepare Keywords",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-keywords.ptbk.md",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"keywords",description:"Keywords separated by comma",isInput:false,isOutput:true}],templates:[{templateType:"PROMPT_TEMPLATE",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, detect the important keywords in the document.\n\n# Rules\n\n- Write just keywords separated by comma\n\n# The document\n\nTake information from this document:\n\n> {knowledgePieceContent}",resultingParameterName:"keywords",dependentParameterNames:["knowledgePieceContent"]}],knowledgeSources:[],knowledgePieces:[],personas:[],preparations:[],sourceFile:"./promptbook-collection/prepare-knowledge-keywords.ptbk.md"},{title:"Prepare Title",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-title.ptbk.md",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"title",description:"The title of the document",isInput:false,isOutput:true}],templates:[{templateType:"PROMPT_TEMPLATE",name:"knowledge",title:"Knowledge",content:"You are experienced content creator, write best title for the document.\n\n# Rules\n\n- Write just title, nothing else\n- Title should be concise and clear\n- Write maximum 5 words for the title\n\n# The document\n\n> {knowledgePieceContent}",resultingParameterName:"title",expectations:{words:{min:1,max:8}},dependentParameterNames:["knowledgePieceContent"]}],knowledgeSources:[],knowledgePieces:[],personas:[],preparations:[],sourceFile:"./promptbook-collection/prepare-knowledge-title.ptbk.md"},{title:"Prepare Keywords",pipelineUrl:"https://promptbook.studio/promptbook/prepare-persona.ptbk.md",parameters:[{name:"availableModelNames",description:"List of available model names separated by comma (,)",isInput:true,isOutput:false},{name:"personaDescription",description:"Description of the persona",isInput:true,isOutput:false},{name:"modelRequirements",description:"Specific requirements for the model",isInput:false,isOutput:true}],templates:[{templateType:"PROMPT_TEMPLATE",name:"make-model-requirements",title:"Make modelRequirements",content:"You are experienced AI engineer, you need to create virtual assistant.\nWrite\n\n## Sample\n\n```json\n{\n\"modelName\": \"gpt-4o\",\n\"systemMessage\": \"You are experienced AI engineer and helpfull assistant.\",\n\"temperature\": 0.7\n}\n```\n\n## Instructions\n\n- Your output format is JSON object\n- Write just the JSON object, no other text should be present\n- It contains the following keys:\n - `modelName`: The name of the model to use\n - `systemMessage`: The system message to provide context to the model\n - `temperature`: The sampling temperature to use\n\n### Key `modelName`\n\nPick from the following models:\n\n- {availableModelNames}\n\n### Key `systemMessage`\n\nThe system message is used to communicate instructions or provide context to the model at the beginning of a conversation. It is displayed in a different format compared to user messages, helping the model understand its role in the conversation. The system message typically guides the model's behavior, sets the tone, or specifies desired output from the model. By utilizing the system message effectively, users can steer the model towards generating more accurate and relevant responses.\n\nFor example:\n\n> You are an experienced AI engineer and helpful assistant.\n\n> You are a friendly and knowledgeable chatbot.\n\n### Key `temperature`\n\nThe sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.\n\nYou can pick a value between 0 and 2. For example:\n\n- `0.1`: Low temperature, extremely conservative and deterministic\n- `0.5`: Medium temperature, balanced between conservative and creative\n- `1.0`: High temperature, creative and bit random\n- `1.5`: Very high temperature, extremely creative and often chaotic and unpredictable\n- `2.0`: Maximum temperature, completely random and unpredictable, for some extreme creative use cases\n\n# The assistant\n\nTake this description of the persona:\n\n> {personaDescription}",resultingParameterName:"modelRequirements",format:"JSON",dependentParameterNames:["availableModelNames","personaDescription"]}],knowledgeSources:[],knowledgePieces:[],personas:[],preparations:[],sourceFile:"./promptbook-collection/prepare-persona.ptbk.md"}];
1789
+ /**
1790
+ * Normalize options for `execCommand` and `execCommands`
1791
+ *
1792
+ * @private internal utility of `execCommand` and `execCommands`
1793
+ */
1794
+ function execCommandNormalizeOptions(options) {
1795
+ var _a;
1796
+ var _b, _c, _d;
1797
+ var command;
1798
+ var cwd;
1799
+ var crashOnError;
1800
+ var args = [];
1801
+ var timeout;
1802
+ if (typeof options === 'string') {
1803
+ // TODO: [1] DRY default values
1804
+ command = options;
1805
+ cwd = process.cwd();
1806
+ crashOnError = true;
1807
+ timeout = Infinity;
1808
+ }
1809
+ else {
1810
+ /*
1811
+ TODO:
1812
+ if ((options as any).commands !== undefined) {
1813
+ commands = (options as any).commands;
1814
+ } else {
1815
+ commands = [(options as any).command];
1816
+ }
1817
+ */
1818
+ // TODO: [1] DRY default values
1819
+ command = options.command;
1820
+ cwd = (_b = options.cwd) !== null && _b !== void 0 ? _b : process.cwd();
1821
+ crashOnError = (_c = options.crashOnError) !== null && _c !== void 0 ? _c : true;
1822
+ timeout = (_d = options.timeout) !== null && _d !== void 0 ? _d : Infinity;
1823
+ }
1824
+ // TODO: /(-[a-zA-Z0-9-]+\s+[^\s]*)|[^\s]*/g
1825
+ var _ = Array.from(command.matchAll(/(".*")|([^\s]*)/g))
1826
+ .map(function (_a) {
1827
+ var _b = __read(_a, 1), match = _b[0];
1828
+ return match;
1829
+ })
1830
+ .filter(function (arg) { return arg !== ''; });
1831
+ if (_.length > 1) {
1832
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1833
+ _a = __read(_), command = _a[0], args = _a.slice(1);
1834
+ }
1835
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1836
+ if (options.args) {
1837
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1838
+ args = __spreadArray(__spreadArray([], __read(args), false), __read(options.args), false);
1839
+ }
1840
+ var humanReadableCommand = !['npx', 'npm'].includes(command) ? command : args[0];
1841
+ if (['ts-node'].includes(humanReadableCommand)) {
1842
+ humanReadableCommand += " ".concat(args[1]);
1843
+ }
1844
+ return { command: command, humanReadableCommand: humanReadableCommand, args: args, cwd: cwd, crashOnError: crashOnError, timeout: timeout };
1845
+ }
1846
+ // TODO: This should show type error> execCommandNormalizeOptions({ command: '', commands: [''] });
1847
+
1848
+ /**
1849
+ * Run one command in a shell
1850
+ *
1851
+ * Note: There are 2 similar functions in the codebase:
1852
+ * - `$execCommand` which runs a single command
1853
+ * - `$execCommands` which runs multiple commands
1854
+ *
1855
+ * @public exported from `@promptbook/node`
1856
+ */
1857
+ function $execCommand(options) {
1858
+ if (!$isRunningInNode()) {
1859
+ throw new EnvironmentMismatchError('Function `$execCommand` can run only in Node environment.js');
1860
+ }
1861
+ return new Promise(
1862
+ // <- TODO: [🧱] Implement in a functional (not new Class) way
1863
+ function (resolve, reject) {
1864
+ // eslint-disable-next-line prefer-const
1865
+ var _a = execCommandNormalizeOptions(options), command = _a.command, humanReadableCommand = _a.humanReadableCommand, args = _a.args, cwd = _a.cwd, crashOnError = _a.crashOnError, timeout = _a.timeout;
1866
+ if (timeout !== Infinity) {
1867
+ // TODO: In waitasecond forTime(Infinity) should be equivalent to forEver()
1868
+ forTime(timeout).then(function () {
1869
+ if (crashOnError) {
1870
+ reject(new Error("Command \"".concat(humanReadableCommand, "\" exceeded time limit of ").concat(timeout, "ms")));
1871
+ }
1872
+ else {
1873
+ console.warn("Command \"".concat(humanReadableCommand, "\" exceeded time limit of ").concat(timeout, "ms but continues running"));
1874
+ resolve('Command exceeded time limit');
1875
+ }
1876
+ });
1877
+ }
1878
+ if (/^win/.test(process.platform) && ['npm', 'npx'].includes(command)) {
1879
+ command = "".concat(command, ".cmd");
1880
+ }
1881
+ // !!!!!! Verbose mode - to all consoles
1882
+ console.info(colors.yellow(cwd) + ' ' + colors.green(command) + ' ' + colors.blue(args.join(' ')));
1883
+ try {
1884
+ var commandProcess = spawn(command, args, { cwd: cwd, shell: true });
1885
+ commandProcess.on('message', function (message) {
1886
+ console.info({ message: message });
1887
+ });
1888
+ var output_1 = [];
1889
+ commandProcess.stdout.on('data', function (stdout) {
1890
+ output_1.push(stdout.toString());
1891
+ console.info(stdout.toString());
1892
+ });
1893
+ commandProcess.stderr.on('data', function (stderr) {
1894
+ output_1.push(stderr.toString());
1895
+ if (stderr.toString().trim()) {
1896
+ console.warn(stderr.toString());
1897
+ }
1898
+ });
1899
+ var finishWithCode = function (code) {
1900
+ if (code !== 0) {
1901
+ if (crashOnError) {
1902
+ reject(new Error(output_1.join('\n').trim() ||
1903
+ "Command \"".concat(humanReadableCommand, "\" exited with code ").concat(code)));
1904
+ }
1905
+ else {
1906
+ console.warn("Command \"".concat(humanReadableCommand, "\" exited with code ").concat(code));
1907
+ resolve(spaceTrim$1(output_1.join('\n')));
1908
+ }
1909
+ }
1910
+ else {
1911
+ resolve(spaceTrim$1(output_1.join('\n')));
1912
+ }
1913
+ };
1914
+ commandProcess.on('close', finishWithCode);
1915
+ commandProcess.on('exit', finishWithCode);
1916
+ commandProcess.on('disconnect', function () {
1917
+ // Note: Unexpected disconnection should always result in rejection
1918
+ reject(new Error("Command \"".concat(humanReadableCommand, "\" disconnected")));
1919
+ });
1920
+ commandProcess.on('error', function (error) {
1921
+ if (crashOnError) {
1922
+ reject(new Error("Command \"".concat(humanReadableCommand, "\" failed: \n").concat(error.message)));
1923
+ }
1924
+ else {
1925
+ console.warn(error);
1926
+ resolve(spaceTrim$1(output_1.join('\n')));
1927
+ }
1928
+ });
1929
+ }
1930
+ catch (error) {
1931
+ // Note: Unexpected error in sync code should always result in rejection
1932
+ reject(error);
1933
+ }
1934
+ });
1935
+ }
1936
+ /**
1937
+ * Note: [🟢 <- TODO: [🦖] !!!!!! Split scrapers into packages and enable] Code in this file should never be published outside of `@promptbook/node` and `@promptbook/cli`
1938
+ */
1939
+
1940
+ /**
1941
+ * Checks if the file exists
1942
+ *
1943
+ * Note: `$` is used to indicate that this function is not a pure function - it looks at the filesystem
1944
+ *
1945
+ * @private within the repository
1946
+ */
1947
+ function $isFileExisting(filename) {
1948
+ return __awaiter(this, void 0, void 0, function () {
1949
+ var isReadAccessAllowed, isFile;
1950
+ return __generator(this, function (_a) {
1951
+ switch (_a.label) {
1952
+ case 0:
1953
+ if (!$isRunningInNode()) {
1954
+ throw new EnvironmentMismatchError('Function `$isFileExisting` works only in Node environment.js');
1955
+ }
1956
+ return [4 /*yield*/, access(filename, constants.R_OK)
1957
+ .then(function () { return true; })
1958
+ .catch(function () { return false; })];
1959
+ case 1:
1960
+ isReadAccessAllowed = _a.sent();
1961
+ if (!isReadAccessAllowed) {
1962
+ return [2 /*return*/, false];
1963
+ }
1964
+ return [4 /*yield*/, stat(filename)
1965
+ .then(function (fileStat) { return fileStat.isFile(); })
1966
+ .catch(function () { return false; })];
1967
+ case 2:
1968
+ isFile = _a.sent();
1969
+ return [2 /*return*/, isFile];
1970
+ }
1971
+ });
1972
+ });
1973
+ }
1974
+ /**
1975
+ * Note: [🟢 <- TODO: [🦖] !!!!!! Split scrapers into packages and enable] Code in this file should never be published outside of `@promptbook/node` and `@promptbook/cli`
1976
+ * TODO: [🐠] This can be a validator - with variants that return true/false and variants that throw errors with meaningless messages
1977
+ * TODO: [🖇] What about symlinks?
1978
+ */
1979
+
1980
+ /**
1981
+ * Get the file extension from a file name
1982
+ *
1983
+ * @private within the repository
1984
+ */
1985
+ function getFileExtension(value) {
1986
+ var match = value.match(/\.([0-9a-z]+)(?:[?#]|$)/i);
1987
+ return match ? match[1].toLowerCase() : null;
1988
+ }
1766
1989
 
1767
1990
  var defaultDiacriticsRemovalMap = [
1768
1991
  {
@@ -2056,10 +2279,6 @@ function normalizeToKebabCase(text) {
2056
2279
  charType = 'NUMBER';
2057
2280
  normalizedChar = char;
2058
2281
  }
2059
- else if (/^\/$/.test(char)) {
2060
- charType = 'SLASH';
2061
- normalizedChar = char;
2062
- }
2063
2282
  else {
2064
2283
  charType = 'OTHER';
2065
2284
  normalizedChar = '-';
@@ -2104,6 +2323,32 @@ function removeEmojis(text) {
2104
2323
  return text;
2105
2324
  }
2106
2325
 
2326
+ /**
2327
+ * Tests if given string is valid URL.
2328
+ *
2329
+ * Note: This does not check if the file exists only if the path is valid
2330
+ * @public exported from `@promptbook/utils`
2331
+ */
2332
+ function isValidFilePath(filename) {
2333
+ if (typeof filename !== 'string') {
2334
+ return false;
2335
+ }
2336
+ var filenameSlashes = filename.split('\\').join('/');
2337
+ // Absolute Unix path: /hello.txt
2338
+ if (/^(\/)/i.test(filenameSlashes)) {
2339
+ return true;
2340
+ }
2341
+ // Absolute Windows path: /hello.txt
2342
+ if (/^([A-Z]{1,2}:\/?)\//i.test(filenameSlashes)) {
2343
+ return true;
2344
+ }
2345
+ // Relative path: ./hello.txt
2346
+ if (/^(\.\.?\/)+/i.test(filenameSlashes)) {
2347
+ return true;
2348
+ }
2349
+ return false;
2350
+ }
2351
+
2107
2352
  /**
2108
2353
  * @@@
2109
2354
  *
@@ -2113,20 +2358,121 @@ function removeEmojis(text) {
2113
2358
  * @public exported from `@promptbook/utils`
2114
2359
  */
2115
2360
  function titleToName(value) {
2116
- if (value.startsWith('http://') || value.startsWith('https://')) {
2117
- // TODO: Maybe check against some list unallowed characters
2118
- return value;
2361
+ if (isValidUrl(value)) {
2362
+ value = value.replace(/^https?:\/\//, '');
2363
+ value = value.replace(/\.html$/, '');
2119
2364
  }
2120
- if (value.startsWith('./') || value.startsWith('../')) {
2121
- // TODO: Maybe check against some list unallowed characters
2122
- return value;
2365
+ else if (isValidFilePath(value)) {
2366
+ value = basename(value);
2367
+ // Note: Keeping extension in the name
2123
2368
  }
2369
+ value = value.split('/').join('-');
2124
2370
  value = removeEmojis(value);
2125
2371
  value = normalizeToKebabCase(value);
2126
2372
  // TODO: [🧠] Maybe warn or add some padding to short name which are not good identifiers
2127
2373
  return value;
2128
2374
  }
2129
2375
 
2376
+ /**
2377
+ * @@@
2378
+ *
2379
+ * @private for `FileCacheStorage`
2380
+ */
2381
+ function nameToSubfolderPath(name) {
2382
+ return [name.substr(0, 1).toLowerCase(), name.substr(1, 1).toLowerCase()];
2383
+ }
2384
+
2385
+ /**
2386
+ * Just marks a place of place where should be something implemented
2387
+ * No side effects.
2388
+ *
2389
+ * Note: It can be usefull suppressing eslint errors of unused variables
2390
+ *
2391
+ * @param value any values
2392
+ * @returns void
2393
+ * @private within the repository
2394
+ */
2395
+ function TODO_USE() {
2396
+ var value = [];
2397
+ for (var _i = 0; _i < arguments.length; _i++) {
2398
+ value[_i] = arguments[_i];
2399
+ }
2400
+ }
2401
+
2402
+ /**
2403
+ * Create a filename for intermediate cache for scrapers
2404
+ *
2405
+ * Note: It also checks if directory exists and creates it if not
2406
+ *
2407
+ * @private as internal utility for scrapers
2408
+ */
2409
+ function getScraperIntermediateSource(source, options) {
2410
+ return __awaiter(this, void 0, void 0, function () {
2411
+ var sourceFilename, url, rootDirname, cacheDirname, isCacheCleaned, extension, isVerbose, hash, semanticName, pieces, name, cacheFilename, isDestroyed, fileHandler;
2412
+ return __generator(this, function (_a) {
2413
+ switch (_a.label) {
2414
+ case 0:
2415
+ sourceFilename = source.filename, url = source.url;
2416
+ rootDirname = options.rootDirname, cacheDirname = options.cacheDirname, isCacheCleaned = options.isCacheCleaned, extension = options.extension, isVerbose = options.isVerbose;
2417
+ hash = SHA256(
2418
+ // <- TODO: [🥬] Encapsulate sha256 to some private utility function
2419
+ hexEncoder.parse(sourceFilename || url || 'untitled'))
2420
+ .toString( /* hex */)
2421
+ .substring(0, 20);
2422
+ semanticName = normalizeToKebabCase(titleToName((sourceFilename || url || '').split('intermediate').join(''))).substring(0, 20);
2423
+ pieces = ['intermediate', semanticName, hash].filter(function (piece) { return piece !== ''; });
2424
+ name = pieces.join('-').split('--').join('-');
2425
+ // <- TODO: Use MAX_FILENAME_LENGTH
2426
+ TODO_USE(rootDirname); // <- TODO: !!!!!!
2427
+ cacheFilename = join.apply(void 0, __spreadArray(__spreadArray([process.cwd(),
2428
+ cacheDirname], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), [name], false)).split('\\')
2429
+ .join('/') +
2430
+ '.' +
2431
+ extension;
2432
+ return [4 /*yield*/, mkdir(dirname(cacheFilename), { recursive: true })];
2433
+ case 1:
2434
+ _a.sent();
2435
+ isDestroyed = true;
2436
+ fileHandler = {
2437
+ filename: cacheFilename,
2438
+ get isDestroyed() {
2439
+ return isDestroyed;
2440
+ },
2441
+ destroy: function () {
2442
+ return __awaiter(this, void 0, void 0, function () {
2443
+ return __generator(this, function (_a) {
2444
+ switch (_a.label) {
2445
+ case 0:
2446
+ if (!isCacheCleaned) return [3 /*break*/, 2];
2447
+ if (isVerbose) {
2448
+ console.info('legacyDocumentScraper: Clening cache');
2449
+ }
2450
+ return [4 /*yield*/, rm(cacheFilename)];
2451
+ case 1:
2452
+ _a.sent();
2453
+ _a.label = 2;
2454
+ case 2:
2455
+ isDestroyed = true;
2456
+ return [2 /*return*/];
2457
+ }
2458
+ });
2459
+ });
2460
+ },
2461
+ };
2462
+ return [2 /*return*/, fileHandler];
2463
+ }
2464
+ });
2465
+ });
2466
+ }
2467
+ /**
2468
+ * Note: Not using `FileCacheStorage` for two reasons:
2469
+ * 1) Need to store more than serialized JSONs
2470
+ * 2) Need to switch between a `rootDirname` and `cacheDirname` <- TODO: !!!!
2471
+ * TODO: [🐱‍🐉][🧠] Make some smart crop
2472
+ */
2473
+
2474
+ var PipelineCollection = [{title:"Prepare Knowledge from Markdown",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.ptbk.md",parameters:[{name:"knowledgeContent",description:"Markdown document content",isInput:true,isOutput:false},{name:"knowledgePieces",description:"The knowledge JSON object",isInput:false,isOutput:true}],templates:[{templateType:"PROMPT_TEMPLATE",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, extract the important knowledge from the document.\n\n# Rules\n\n- Make pieces of information concise, clear, and easy to understand\n- One piece of information should be approximately 1 paragraph\n- Divide the paragraphs by markdown horizontal lines ---\n- Omit irrelevant information\n- Group redundant information\n- Write just extracted information, nothing else\n\n# The document\n\nTake information from this document:\n\n> {knowledgeContent}",resultingParameterName:"knowledgePieces",dependentParameterNames:["knowledgeContent"]}],knowledgeSources:[],knowledgePieces:[],personas:[],preparations:[],sourceFile:"./promptbook-collection/prepare-knowledge-from-markdown.ptbk.md"},{title:"Prepare Keywords",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-keywords.ptbk.md",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"keywords",description:"Keywords separated by comma",isInput:false,isOutput:true}],templates:[{templateType:"PROMPT_TEMPLATE",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, detect the important keywords in the document.\n\n# Rules\n\n- Write just keywords separated by comma\n\n# The document\n\nTake information from this document:\n\n> {knowledgePieceContent}",resultingParameterName:"keywords",dependentParameterNames:["knowledgePieceContent"]}],knowledgeSources:[],knowledgePieces:[],personas:[],preparations:[],sourceFile:"./promptbook-collection/prepare-knowledge-keywords.ptbk.md"},{title:"Prepare Title",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-title.ptbk.md",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"title",description:"The title of the document",isInput:false,isOutput:true}],templates:[{templateType:"PROMPT_TEMPLATE",name:"knowledge",title:"Knowledge",content:"You are experienced content creator, write best title for the document.\n\n# Rules\n\n- Write just title, nothing else\n- Title should be concise and clear\n- Write maximum 5 words for the title\n\n# The document\n\n> {knowledgePieceContent}",resultingParameterName:"title",expectations:{words:{min:1,max:8}},dependentParameterNames:["knowledgePieceContent"]}],knowledgeSources:[],knowledgePieces:[],personas:[],preparations:[],sourceFile:"./promptbook-collection/prepare-knowledge-title.ptbk.md"},{title:"Prepare Keywords",pipelineUrl:"https://promptbook.studio/promptbook/prepare-persona.ptbk.md",parameters:[{name:"availableModelNames",description:"List of available model names separated by comma (,)",isInput:true,isOutput:false},{name:"personaDescription",description:"Description of the persona",isInput:true,isOutput:false},{name:"modelRequirements",description:"Specific requirements for the model",isInput:false,isOutput:true}],templates:[{templateType:"PROMPT_TEMPLATE",name:"make-model-requirements",title:"Make modelRequirements",content:"You are experienced AI engineer, you need to create virtual assistant.\nWrite\n\n## Sample\n\n```json\n{\n\"modelName\": \"gpt-4o\",\n\"systemMessage\": \"You are experienced AI engineer and helpfull assistant.\",\n\"temperature\": 0.7\n}\n```\n\n## Instructions\n\n- Your output format is JSON object\n- Write just the JSON object, no other text should be present\n- It contains the following keys:\n - `modelName`: The name of the model to use\n - `systemMessage`: The system message to provide context to the model\n - `temperature`: The sampling temperature to use\n\n### Key `modelName`\n\nPick from the following models:\n\n- {availableModelNames}\n\n### Key `systemMessage`\n\nThe system message is used to communicate instructions or provide context to the model at the beginning of a conversation. It is displayed in a different format compared to user messages, helping the model understand its role in the conversation. The system message typically guides the model's behavior, sets the tone, or specifies desired output from the model. By utilizing the system message effectively, users can steer the model towards generating more accurate and relevant responses.\n\nFor example:\n\n> You are an experienced AI engineer and helpful assistant.\n\n> You are a friendly and knowledgeable chatbot.\n\n### Key `temperature`\n\nThe sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.\n\nYou can pick a value between 0 and 2. For example:\n\n- `0.1`: Low temperature, extremely conservative and deterministic\n- `0.5`: Medium temperature, balanced between conservative and creative\n- `1.0`: High temperature, creative and bit random\n- `1.5`: Very high temperature, extremely creative and often chaotic and unpredictable\n- `2.0`: Maximum temperature, completely random and unpredictable, for some extreme creative use cases\n\n# The assistant\n\nTake this description of the persona:\n\n> {personaDescription}",resultingParameterName:"modelRequirements",format:"JSON",dependentParameterNames:["availableModelNames","personaDescription"]}],knowledgeSources:[],knowledgePieces:[],personas:[],preparations:[],sourceFile:"./promptbook-collection/prepare-persona.ptbk.md"}];
2475
+
2130
2476
  /**
2131
2477
  * This error indicates errors during the execution of the pipeline
2132
2478
  *
@@ -2159,22 +2505,6 @@ var CollectionError = /** @class */ (function (_super) {
2159
2505
  return CollectionError;
2160
2506
  }(Error));
2161
2507
 
2162
- /**
2163
- * This error type indicates that you try to use a feature that is not available in the current environment
2164
- *
2165
- * @public exported from `@promptbook/core`
2166
- */
2167
- var EnvironmentMismatchError = /** @class */ (function (_super) {
2168
- __extends(EnvironmentMismatchError, _super);
2169
- function EnvironmentMismatchError(message) {
2170
- var _this = _super.call(this, message) || this;
2171
- _this.name = 'EnvironmentMismatchError';
2172
- Object.setPrototypeOf(_this, EnvironmentMismatchError.prototype);
2173
- return _this;
2174
- }
2175
- return EnvironmentMismatchError;
2176
- }(Error));
2177
-
2178
2508
  /**
2179
2509
  * This error occurs when some expectation is not met in the execution of the pipeline
2180
2510
  *
@@ -2674,17 +3004,70 @@ function deepClone(objectValue) {
2674
3004
  */
2675
3005
 
2676
3006
  /**
2677
- * Function `addUsage` will add multiple usages into one
2678
- *
2679
- * Note: If you provide 0 values, it returns ZERO_USAGE
3007
+ * Represents the usage with no resources consumed
2680
3008
  *
2681
3009
  * @public exported from `@promptbook/core`
2682
3010
  */
2683
- function addUsage() {
2684
- var usageItems = [];
2685
- for (var _i = 0; _i < arguments.length; _i++) {
2686
- usageItems[_i] = arguments[_i];
2687
- }
3011
+ var ZERO_USAGE = $deepFreeze({
3012
+ price: { value: 0 },
3013
+ input: {
3014
+ tokensCount: { value: 0 },
3015
+ charactersCount: { value: 0 },
3016
+ wordsCount: { value: 0 },
3017
+ sentencesCount: { value: 0 },
3018
+ linesCount: { value: 0 },
3019
+ paragraphsCount: { value: 0 },
3020
+ pagesCount: { value: 0 },
3021
+ },
3022
+ output: {
3023
+ tokensCount: { value: 0 },
3024
+ charactersCount: { value: 0 },
3025
+ wordsCount: { value: 0 },
3026
+ sentencesCount: { value: 0 },
3027
+ linesCount: { value: 0 },
3028
+ paragraphsCount: { value: 0 },
3029
+ pagesCount: { value: 0 },
3030
+ },
3031
+ });
3032
+ /**
3033
+ * Represents the usage with unknown resources consumed
3034
+ *
3035
+ * @public exported from `@promptbook/core`
3036
+ */
3037
+ var UNCERTAIN_USAGE = $deepFreeze({
3038
+ price: { value: 0, isUncertain: true },
3039
+ input: {
3040
+ tokensCount: { value: 0, isUncertain: true },
3041
+ charactersCount: { value: 0, isUncertain: true },
3042
+ wordsCount: { value: 0, isUncertain: true },
3043
+ sentencesCount: { value: 0, isUncertain: true },
3044
+ linesCount: { value: 0, isUncertain: true },
3045
+ paragraphsCount: { value: 0, isUncertain: true },
3046
+ pagesCount: { value: 0, isUncertain: true },
3047
+ },
3048
+ output: {
3049
+ tokensCount: { value: 0, isUncertain: true },
3050
+ charactersCount: { value: 0, isUncertain: true },
3051
+ wordsCount: { value: 0, isUncertain: true },
3052
+ sentencesCount: { value: 0, isUncertain: true },
3053
+ linesCount: { value: 0, isUncertain: true },
3054
+ paragraphsCount: { value: 0, isUncertain: true },
3055
+ pagesCount: { value: 0, isUncertain: true },
3056
+ },
3057
+ });
3058
+
3059
+ /**
3060
+ * Function `addUsage` will add multiple usages into one
3061
+ *
3062
+ * Note: If you provide 0 values, it returns ZERO_USAGE
3063
+ *
3064
+ * @public exported from `@promptbook/core`
3065
+ */
3066
+ function addUsage() {
3067
+ var usageItems = [];
3068
+ for (var _i = 0; _i < arguments.length; _i++) {
3069
+ usageItems[_i] = arguments[_i];
3070
+ }
2688
3071
  return usageItems.reduce(function (acc, item) {
2689
3072
  var e_1, _a, e_2, _b;
2690
3073
  var _c;
@@ -2950,23 +3333,6 @@ function union() {
2950
3333
  return union;
2951
3334
  }
2952
3335
 
2953
- /**
2954
- * Just marks a place of place where should be something implemented
2955
- * No side effects.
2956
- *
2957
- * Note: It can be usefull suppressing eslint errors of unused variables
2958
- *
2959
- * @param value any values
2960
- * @returns void
2961
- * @private within the repository
2962
- */
2963
- function TODO_USE() {
2964
- var value = [];
2965
- for (var _i = 0; _i < arguments.length; _i++) {
2966
- value[_i] = arguments[_i];
2967
- }
2968
- }
2969
-
2970
3336
  /**
2971
3337
  * This error indicates problems parsing the format value
2972
3338
  *
@@ -3010,7 +3376,7 @@ var CsvFormatError = /** @class */ (function (_super) {
3010
3376
  */
3011
3377
  var MANDATORY_CSV_SETTINGS = Object.freeze({
3012
3378
  header: true,
3013
- // encoding: 'utf8',
3379
+ // encoding: 'utf-8',
3014
3380
  });
3015
3381
 
3016
3382
  /**
@@ -4464,25 +4830,26 @@ function filterJustOutputParameters(options) {
4464
4830
  */
4465
4831
  function executePipeline(options) {
4466
4832
  return __awaiter(this, void 0, void 0, function () {
4467
- var inputParameters, tools, onProgress, pipeline, setPreparedPipeline, pipelineIdentification, settings, maxParallelCount, isVerbose, preparedPipeline, llmTools, errors, warnings, executionReport, isReturned, _a, _b, parameter, e_1_1, _loop_1, _c, _d, parameterName, state_1, e_2_1, parametersToPass, resovedParameterNames_1, unresovedTemplates_1, resolving_1, loopLimit, _loop_2, error_1, usage_1, outputParameters_1, usage, outputParameters;
4468
- var e_1, _e, e_2, _f;
4469
- return __generator(this, function (_g) {
4470
- switch (_g.label) {
4833
+ var inputParameters, tools, onProgress, pipeline, setPreparedPipeline, pipelineIdentification, settings, maxParallelCount, rootDirname, _a, isVerbose, preparedPipeline, llmTools, errors, warnings, executionReport, isReturned, _b, _c, parameter, e_1_1, _loop_1, _d, _e, parameterName, state_1, e_2_1, parametersToPass, resovedParameterNames_1, unresovedTemplates_1, resolving_1, loopLimit, _loop_2, error_1, usage_1, outputParameters_1, usage, outputParameters;
4834
+ var e_1, _f, e_2, _g;
4835
+ return __generator(this, function (_h) {
4836
+ switch (_h.label) {
4471
4837
  case 0:
4472
4838
  inputParameters = options.inputParameters, tools = options.tools, onProgress = options.onProgress, pipeline = options.pipeline, setPreparedPipeline = options.setPreparedPipeline, pipelineIdentification = options.pipelineIdentification, settings = options.settings;
4473
- maxParallelCount = settings.maxParallelCount, isVerbose = settings.isVerbose;
4839
+ maxParallelCount = settings.maxParallelCount, rootDirname = settings.rootDirname, _a = settings.isVerbose, isVerbose = _a === void 0 ? IS_VERBOSE : _a;
4474
4840
  preparedPipeline = options.preparedPipeline;
4475
4841
  llmTools = joinLlmExecutionTools.apply(void 0, __spreadArray([], __read(arrayableToArray(tools.llm)), false));
4476
4842
  if (!(preparedPipeline === undefined)) return [3 /*break*/, 2];
4477
4843
  return [4 /*yield*/, preparePipeline(pipeline, {
4478
4844
  llmTools: llmTools,
4845
+ rootDirname: rootDirname,
4479
4846
  isVerbose: isVerbose,
4480
4847
  maxParallelCount: maxParallelCount,
4481
4848
  })];
4482
4849
  case 1:
4483
- preparedPipeline = _g.sent();
4850
+ preparedPipeline = _h.sent();
4484
4851
  setPreparedPipeline(preparedPipeline);
4485
- _g.label = 2;
4852
+ _h.label = 2;
4486
4853
  case 2:
4487
4854
  errors = [];
4488
4855
  warnings = [];
@@ -4495,17 +4862,17 @@ function executePipeline(options) {
4495
4862
  promptExecutions: [],
4496
4863
  };
4497
4864
  isReturned = false;
4498
- _g.label = 3;
4865
+ _h.label = 3;
4499
4866
  case 3:
4500
- _g.trys.push([3, 9, 10, 11]);
4501
- _a = __values(preparedPipeline.parameters.filter(function (_a) {
4867
+ _h.trys.push([3, 9, 10, 11]);
4868
+ _b = __values(preparedPipeline.parameters.filter(function (_a) {
4502
4869
  var isInput = _a.isInput;
4503
4870
  return isInput;
4504
- })), _b = _a.next();
4505
- _g.label = 4;
4871
+ })), _c = _b.next();
4872
+ _h.label = 4;
4506
4873
  case 4:
4507
- if (!!_b.done) return [3 /*break*/, 8];
4508
- parameter = _b.value;
4874
+ if (!!_c.done) return [3 /*break*/, 8];
4875
+ parameter = _c.value;
4509
4876
  if (!(inputParameters[parameter.name] === undefined)) return [3 /*break*/, 7];
4510
4877
  isReturned = true;
4511
4878
  if (!(onProgress !== undefined)) return [3 /*break*/, 6];
@@ -4513,8 +4880,8 @@ function executePipeline(options) {
4513
4880
  return [4 /*yield*/, forTime(IMMEDIATE_TIME)];
4514
4881
  case 5:
4515
4882
  // Note: Wait a short time to prevent race conditions
4516
- _g.sent();
4517
- _g.label = 6;
4883
+ _h.sent();
4884
+ _h.label = 6;
4518
4885
  case 6: return [2 /*return*/, $asDeeplyFrozenSerializableJson("Unuccessful PipelineExecutorResult (with missing parameter {".concat(parameter.name, "}) PipelineExecutorResult"), {
4519
4886
  isSuccessful: false,
4520
4887
  errors: __spreadArray([
@@ -4527,24 +4894,24 @@ function executePipeline(options) {
4527
4894
  preparedPipeline: preparedPipeline,
4528
4895
  })];
4529
4896
  case 7:
4530
- _b = _a.next();
4897
+ _c = _b.next();
4531
4898
  return [3 /*break*/, 4];
4532
4899
  case 8: return [3 /*break*/, 11];
4533
4900
  case 9:
4534
- e_1_1 = _g.sent();
4901
+ e_1_1 = _h.sent();
4535
4902
  e_1 = { error: e_1_1 };
4536
4903
  return [3 /*break*/, 11];
4537
4904
  case 10:
4538
4905
  try {
4539
- if (_b && !_b.done && (_e = _a.return)) _e.call(_a);
4906
+ if (_c && !_c.done && (_f = _b.return)) _f.call(_b);
4540
4907
  }
4541
4908
  finally { if (e_1) throw e_1.error; }
4542
4909
  return [7 /*endfinally*/];
4543
4910
  case 11:
4544
4911
  _loop_1 = function (parameterName) {
4545
4912
  var parameter;
4546
- return __generator(this, function (_h) {
4547
- switch (_h.label) {
4913
+ return __generator(this, function (_j) {
4914
+ switch (_j.label) {
4548
4915
  case 0:
4549
4916
  parameter = preparedPipeline.parameters.find(function (_a) {
4550
4917
  var name = _a.name;
@@ -4561,8 +4928,8 @@ function executePipeline(options) {
4561
4928
  return [4 /*yield*/, forTime(IMMEDIATE_TIME)];
4562
4929
  case 2:
4563
4930
  // Note: Wait a short time to prevent race conditions
4564
- _h.sent();
4565
- _h.label = 3;
4931
+ _j.sent();
4932
+ _j.label = 3;
4566
4933
  case 3: return [2 /*return*/, { value: $asDeeplyFrozenSerializableJson(spaceTrim$1(function (block) { return "\n Unuccessful PipelineExecutorResult (with extra parameter {".concat(parameter.name, "}) PipelineExecutorResult\n\n ").concat(block(pipelineIdentification), "\n "); }), {
4567
4934
  isSuccessful: false,
4568
4935
  errors: __spreadArray([
@@ -4578,39 +4945,39 @@ function executePipeline(options) {
4578
4945
  }
4579
4946
  });
4580
4947
  };
4581
- _g.label = 12;
4948
+ _h.label = 12;
4582
4949
  case 12:
4583
- _g.trys.push([12, 17, 18, 19]);
4584
- _c = __values(Object.keys(inputParameters)), _d = _c.next();
4585
- _g.label = 13;
4950
+ _h.trys.push([12, 17, 18, 19]);
4951
+ _d = __values(Object.keys(inputParameters)), _e = _d.next();
4952
+ _h.label = 13;
4586
4953
  case 13:
4587
- if (!!_d.done) return [3 /*break*/, 16];
4588
- parameterName = _d.value;
4954
+ if (!!_e.done) return [3 /*break*/, 16];
4955
+ parameterName = _e.value;
4589
4956
  return [5 /*yield**/, _loop_1(parameterName)];
4590
4957
  case 14:
4591
- state_1 = _g.sent();
4958
+ state_1 = _h.sent();
4592
4959
  if (typeof state_1 === "object")
4593
4960
  return [2 /*return*/, state_1.value];
4594
- _g.label = 15;
4961
+ _h.label = 15;
4595
4962
  case 15:
4596
- _d = _c.next();
4963
+ _e = _d.next();
4597
4964
  return [3 /*break*/, 13];
4598
4965
  case 16: return [3 /*break*/, 19];
4599
4966
  case 17:
4600
- e_2_1 = _g.sent();
4967
+ e_2_1 = _h.sent();
4601
4968
  e_2 = { error: e_2_1 };
4602
4969
  return [3 /*break*/, 19];
4603
4970
  case 18:
4604
4971
  try {
4605
- if (_d && !_d.done && (_f = _c.return)) _f.call(_c);
4972
+ if (_e && !_e.done && (_g = _d.return)) _g.call(_d);
4606
4973
  }
4607
4974
  finally { if (e_2) throw e_2.error; }
4608
4975
  return [7 /*endfinally*/];
4609
4976
  case 19:
4610
4977
  parametersToPass = inputParameters;
4611
- _g.label = 20;
4978
+ _h.label = 20;
4612
4979
  case 20:
4613
- _g.trys.push([20, 25, , 28]);
4980
+ _h.trys.push([20, 25, , 28]);
4614
4981
  resovedParameterNames_1 = preparedPipeline.parameters
4615
4982
  .filter(function (_a) {
4616
4983
  var isInput = _a.isInput;
@@ -4625,8 +4992,8 @@ function executePipeline(options) {
4625
4992
  loopLimit = LOOP_LIMIT;
4626
4993
  _loop_2 = function () {
4627
4994
  var currentTemplate, work_1;
4628
- return __generator(this, function (_j) {
4629
- switch (_j.label) {
4995
+ return __generator(this, function (_k) {
4996
+ switch (_k.label) {
4630
4997
  case 0:
4631
4998
  if (loopLimit-- < 0) {
4632
4999
  // Note: Really UnexpectedError not LimitReachedError - this should be catched during validatePipeline
@@ -4652,7 +5019,7 @@ function executePipeline(options) {
4652
5019
  if (!!currentTemplate) return [3 /*break*/, 3];
4653
5020
  /* [🤹‍♂️] */ return [4 /*yield*/, Promise.race(resolving_1)];
4654
5021
  case 2:
4655
- /* [🤹‍♂️] */ _j.sent();
5022
+ /* [🤹‍♂️] */ _k.sent();
4656
5023
  return [3 /*break*/, 4];
4657
5024
  case 3:
4658
5025
  unresovedTemplates_1 = unresovedTemplates_1.filter(function (template) { return template !== currentTemplate; });
@@ -4687,24 +5054,24 @@ function executePipeline(options) {
4687
5054
  // <- Note: Errors are catched here [3]
4688
5055
  // TODO: BUT if in multiple templates are errors, only the first one is catched so maybe we should catch errors here and save them to errors array here
4689
5056
  resolving_1.push(work_1);
4690
- _j.label = 4;
5057
+ _k.label = 4;
4691
5058
  case 4: return [2 /*return*/];
4692
5059
  }
4693
5060
  });
4694
5061
  };
4695
- _g.label = 21;
5062
+ _h.label = 21;
4696
5063
  case 21:
4697
5064
  if (!(unresovedTemplates_1.length > 0)) return [3 /*break*/, 23];
4698
5065
  return [5 /*yield**/, _loop_2()];
4699
5066
  case 22:
4700
- _g.sent();
5067
+ _h.sent();
4701
5068
  return [3 /*break*/, 21];
4702
5069
  case 23: return [4 /*yield*/, Promise.all(resolving_1)];
4703
5070
  case 24:
4704
- _g.sent();
5071
+ _h.sent();
4705
5072
  return [3 /*break*/, 28];
4706
5073
  case 25:
4707
- error_1 = _g.sent();
5074
+ error_1 = _h.sent();
4708
5075
  if (!(error_1 instanceof Error)) {
4709
5076
  throw error_1;
4710
5077
  }
@@ -4724,8 +5091,8 @@ function executePipeline(options) {
4724
5091
  return [4 /*yield*/, forTime(IMMEDIATE_TIME)];
4725
5092
  case 26:
4726
5093
  // Note: Wait a short time to prevent race conditions
4727
- _g.sent();
4728
- _g.label = 27;
5094
+ _h.sent();
5095
+ _h.label = 27;
4729
5096
  case 27: return [2 /*return*/, $asDeeplyFrozenSerializableJson('Unuccessful PipelineExecutorResult (with misc errors) PipelineExecutorResult', {
4730
5097
  isSuccessful: false,
4731
5098
  errors: __spreadArray([error_1], __read(errors), false).map(serializeError),
@@ -4752,8 +5119,8 @@ function executePipeline(options) {
4752
5119
  return [4 /*yield*/, forTime(IMMEDIATE_TIME)];
4753
5120
  case 29:
4754
5121
  // Note: Wait a short time to prevent race conditions
4755
- _g.sent();
4756
- _g.label = 30;
5122
+ _h.sent();
5123
+ _h.label = 30;
4757
5124
  case 30: return [2 /*return*/, $asDeeplyFrozenSerializableJson('Successful PipelineExecutorResult', {
4758
5125
  isSuccessful: true,
4759
5126
  errors: errors.map(serializeError),
@@ -4765,211 +5132,883 @@ function executePipeline(options) {
4765
5132
  })];
4766
5133
  }
4767
5134
  });
4768
- });
4769
- }
5135
+ });
5136
+ }
5137
+ /**
5138
+ * TODO: [🐚] Change onProgress to object that represents the running execution, can be subscribed via RxJS to and also awaited
5139
+ */
5140
+
5141
+ /**
5142
+ * Creates executor function from pipeline and execution tools.
5143
+ *
5144
+ * @returns The executor function
5145
+ * @throws {PipelineLogicError} on logical error in the pipeline
5146
+ * @public exported from `@promptbook/core`
5147
+ */
5148
+ function createPipelineExecutor(options) {
5149
+ var _this = this;
5150
+ var pipeline = options.pipeline, tools = options.tools, _a = options.settings, settings = _a === void 0 ? {} : _a;
5151
+ var _b = settings.maxExecutionAttempts, maxExecutionAttempts = _b === void 0 ? MAX_EXECUTION_ATTEMPTS : _b, _c = settings.maxParallelCount, maxParallelCount = _c === void 0 ? MAX_PARALLEL_COUNT : _c, _d = settings.csvSettings, csvSettings = _d === void 0 ? DEFAULT_CSV_SETTINGS : _d, _e = settings.isVerbose, isVerbose = _e === void 0 ? IS_VERBOSE : _e, _f = settings.isNotPreparedWarningSupressed, isNotPreparedWarningSupressed = _f === void 0 ? false : _f, _g = settings.rootDirname, rootDirname = _g === void 0 ? null : _g;
5152
+ validatePipeline(pipeline);
5153
+ var pipelineIdentification = (function () {
5154
+ // Note: This is a 😐 implementation of [🚞]
5155
+ var _ = [];
5156
+ if (pipeline.sourceFile !== undefined) {
5157
+ _.push("File: ".concat(pipeline.sourceFile));
5158
+ }
5159
+ if (pipeline.pipelineUrl !== undefined) {
5160
+ _.push("Url: ".concat(pipeline.pipelineUrl));
5161
+ }
5162
+ return _.join('\n');
5163
+ })();
5164
+ var preparedPipeline;
5165
+ if (isPipelinePrepared(pipeline)) {
5166
+ preparedPipeline = pipeline;
5167
+ }
5168
+ else if (isNotPreparedWarningSupressed !== true) {
5169
+ console.warn(spaceTrim$1(function (block) { return "\n Pipeline is not prepared\n\n ".concat(block(pipelineIdentification), "\n\n It will be prepared ad-hoc before the first execution and **returned as `preparedPipeline` in `PipelineExecutorResult`**\n But it is recommended to prepare the pipeline during collection preparation\n\n @see more at https://ptbk.io/prepare-pipeline\n "); }));
5170
+ }
5171
+ var runCount = 0;
5172
+ var pipelineExecutor = function (inputParameters, onProgress) { return __awaiter(_this, void 0, void 0, function () {
5173
+ return __generator(this, function (_a) {
5174
+ runCount++;
5175
+ return [2 /*return*/, /* not await */ executePipeline({
5176
+ pipeline: pipeline,
5177
+ preparedPipeline: preparedPipeline,
5178
+ setPreparedPipeline: function (newPreparedPipeline) {
5179
+ preparedPipeline = newPreparedPipeline;
5180
+ },
5181
+ inputParameters: inputParameters,
5182
+ tools: tools,
5183
+ onProgress: onProgress,
5184
+ pipelineIdentification: spaceTrim$1(function (block) { return "\n ".concat(block(pipelineIdentification), "\n ").concat(runCount === 1 ? '' : "Run #".concat(runCount), "\n "); }),
5185
+ settings: {
5186
+ maxExecutionAttempts: maxExecutionAttempts,
5187
+ maxParallelCount: maxParallelCount,
5188
+ csvSettings: csvSettings,
5189
+ isVerbose: isVerbose,
5190
+ isNotPreparedWarningSupressed: isNotPreparedWarningSupressed,
5191
+ rootDirname: rootDirname,
5192
+ },
5193
+ })];
5194
+ });
5195
+ }); };
5196
+ return pipelineExecutor;
5197
+ }
5198
+ /**
5199
+ * TODO: [🐚] Change onProgress to object that represents the running execution, can be subscribed via RxJS to and also awaited
5200
+ */
5201
+
5202
+ /**
5203
+ * Scraper for markdown files
5204
+ *
5205
+ * @see `documentationUrl` for more details
5206
+ * @public exported from `@promptbook/core`
5207
+ */
5208
+ var markdownScraper = {
5209
+ /**
5210
+ * Mime types that this scraper can handle
5211
+ */
5212
+ mimeTypes: ['text/markdown', 'text/plain'],
5213
+ /**
5214
+ * Link to documentation
5215
+ */
5216
+ documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
5217
+ /**
5218
+ * Scrapes the markdown file and returns the knowledge pieces or `null` if it can't scrape it
5219
+ */
5220
+ scrape: function (source, options) {
5221
+ return __awaiter(this, void 0, void 0, function () {
5222
+ var llmTools, _a, maxParallelCount, _b, isVerbose, collection, prepareKnowledgeFromMarkdownExecutor, _c, prepareTitleExecutor, _d, prepareKeywordsExecutor, _e, knowledgeContent, result, outputParameters, knowledgePiecesRaw, knowledgeTextPieces, knowledge;
5223
+ var _f, _g, _h;
5224
+ var _this = this;
5225
+ return __generator(this, function (_j) {
5226
+ switch (_j.label) {
5227
+ case 0:
5228
+ llmTools = options.llmTools, _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? MAX_PARALLEL_COUNT : _a, _b = options.isVerbose, isVerbose = _b === void 0 ? IS_VERBOSE : _b;
5229
+ if (llmTools === undefined) {
5230
+ throw new MissingToolsError('LLM tools are required for scraping external files');
5231
+ // <- Note: This scraper is used in all other scrapers, so saying "external files" not "markdown files"
5232
+ }
5233
+ TODO_USE(maxParallelCount); // <- [🪂]
5234
+ collection = createCollectionFromJson.apply(void 0, __spreadArray([], __read(PipelineCollection), false));
5235
+ _c = createPipelineExecutor;
5236
+ _f = {};
5237
+ return [4 /*yield*/, collection.getPipelineByUrl('https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.ptbk.md')];
5238
+ case 1:
5239
+ prepareKnowledgeFromMarkdownExecutor = _c.apply(void 0, [(_f.pipeline = _j.sent(),
5240
+ _f.tools = {
5241
+ llm: llmTools,
5242
+ },
5243
+ _f)]);
5244
+ _d = createPipelineExecutor;
5245
+ _g = {};
5246
+ return [4 /*yield*/, collection.getPipelineByUrl('https://promptbook.studio/promptbook/prepare-knowledge-title.ptbk.md')];
5247
+ case 2:
5248
+ prepareTitleExecutor = _d.apply(void 0, [(_g.pipeline = _j.sent(),
5249
+ _g.tools = {
5250
+ llm: llmTools,
5251
+ },
5252
+ _g)]);
5253
+ _e = createPipelineExecutor;
5254
+ _h = {};
5255
+ return [4 /*yield*/, collection.getPipelineByUrl('https://promptbook.studio/promptbook/prepare-knowledge-keywords.ptbk.md')];
5256
+ case 3:
5257
+ prepareKeywordsExecutor = _e.apply(void 0, [(_h.pipeline = _j.sent(),
5258
+ _h.tools = {
5259
+ llm: llmTools,
5260
+ },
5261
+ _h)]);
5262
+ return [4 /*yield*/, source.asText()];
5263
+ case 4:
5264
+ knowledgeContent = _j.sent();
5265
+ return [4 /*yield*/, prepareKnowledgeFromMarkdownExecutor({ knowledgeContent: knowledgeContent })];
5266
+ case 5:
5267
+ result = _j.sent();
5268
+ assertsExecutionSuccessful(result);
5269
+ outputParameters = result.outputParameters;
5270
+ knowledgePiecesRaw = outputParameters.knowledgePieces;
5271
+ knowledgeTextPieces = (knowledgePiecesRaw || '').split('\n---\n');
5272
+ // <- TODO: [main] !!!!! Smarter split and filter out empty pieces
5273
+ if (isVerbose) {
5274
+ console.info('knowledgeTextPieces:', knowledgeTextPieces);
5275
+ }
5276
+ return [4 /*yield*/, Promise.all(
5277
+ // TODO: [🪂] !! Do not send all at once but in chunks
5278
+ knowledgeTextPieces.map(function (knowledgeTextPiece, i) { return __awaiter(_this, void 0, void 0, function () {
5279
+ var name, title, knowledgePieceContent, keywords, index, titleResult, _a, titleRaw, keywordsResult, _b, keywordsRaw, embeddingResult, error_1;
5280
+ return __generator(this, function (_c) {
5281
+ switch (_c.label) {
5282
+ case 0:
5283
+ name = "piece-".concat(i);
5284
+ title = spaceTrim(knowledgeTextPiece.substring(0, 100));
5285
+ knowledgePieceContent = spaceTrim(knowledgeTextPiece);
5286
+ keywords = [];
5287
+ index = [];
5288
+ _c.label = 1;
5289
+ case 1:
5290
+ _c.trys.push([1, 7, , 8]);
5291
+ return [4 /*yield*/, prepareTitleExecutor({ knowledgePieceContent: knowledgePieceContent })];
5292
+ case 2:
5293
+ titleResult = _c.sent();
5294
+ _a = titleResult.outputParameters.title, titleRaw = _a === void 0 ? 'Untitled' : _a;
5295
+ title = spaceTrim(titleRaw) /* <- TODO: Maybe do in pipeline */;
5296
+ name = titleToName(title);
5297
+ return [4 /*yield*/, prepareKeywordsExecutor({ knowledgePieceContent: knowledgePieceContent })];
5298
+ case 3:
5299
+ keywordsResult = _c.sent();
5300
+ _b = keywordsResult.outputParameters.keywords, keywordsRaw = _b === void 0 ? '' : _b;
5301
+ keywords = (keywordsRaw || '')
5302
+ .split(',')
5303
+ .map(function (keyword) { return keyword.trim(); })
5304
+ .filter(function (keyword) { return keyword !== ''; });
5305
+ if (isVerbose) {
5306
+ console.info("Keywords for \"".concat(title, "\":"), keywords);
5307
+ }
5308
+ if (!!llmTools.callEmbeddingModel) return [3 /*break*/, 4];
5309
+ // TODO: [🟥] Detect browser / node and make it colorfull
5310
+ console.error('No callEmbeddingModel function provided');
5311
+ return [3 /*break*/, 6];
5312
+ case 4: return [4 /*yield*/, llmTools.callEmbeddingModel({
5313
+ title: "Embedding for ".concat(title) /* <- Note: No impact on embedding result itself, just for logging */,
5314
+ parameters: {},
5315
+ content: knowledgePieceContent,
5316
+ modelRequirements: {
5317
+ modelVariant: 'EMBEDDING',
5318
+ },
5319
+ })];
5320
+ case 5:
5321
+ embeddingResult = _c.sent();
5322
+ index.push({
5323
+ modelName: embeddingResult.modelName,
5324
+ position: embeddingResult.content,
5325
+ });
5326
+ _c.label = 6;
5327
+ case 6: return [3 /*break*/, 8];
5328
+ case 7:
5329
+ error_1 = _c.sent();
5330
+ // Note: Here is expected error:
5331
+ // > PipelineExecutionError: You have not provided any `LlmExecutionTools` that support model variant "EMBEDDING
5332
+ if (!(error_1 instanceof PipelineExecutionError)) {
5333
+ throw error_1;
5334
+ }
5335
+ // TODO: [🟥] Detect browser / node and make it colorfull
5336
+ console.error(error_1, "<- Note: This error is not critical to prepare the pipeline, just knowledge pieces won't have embeddings");
5337
+ return [3 /*break*/, 8];
5338
+ case 8: return [2 /*return*/, {
5339
+ name: name,
5340
+ title: title,
5341
+ content: knowledgePieceContent,
5342
+ keywords: keywords,
5343
+ index: index,
5344
+ // <- TODO: [☀] sources,
5345
+ }];
5346
+ }
5347
+ });
5348
+ }); }))];
5349
+ case 6:
5350
+ knowledge = _j.sent();
5351
+ return [2 /*return*/, knowledge];
5352
+ }
5353
+ });
5354
+ });
5355
+ },
5356
+ } /* TODO: [🦷] as const */;
5357
+ /**
5358
+ * TODO: [🦖] Make some system for putting scrapers to separete packages
5359
+ * TODO: [🪂] Do it in parallel 11:11
5360
+ * TODO: [🦷] Ideally use `as const satisfies Scraper` BUT this combination throws errors
5361
+ * Note: No need to aggregate usage here, it is done by intercepting the llmTools
5362
+ */
5363
+
5364
+ /**
5365
+ * Scraper of .docx and .odt files
5366
+ *
5367
+ * @see `documentationUrl` for more details
5368
+ * @public exported from `@promptbook/core`
5369
+ */
5370
+ var documentScraper = {
5371
+ /**
5372
+ * Mime types that this scraper can handle
5373
+ */
5374
+ mimeTypes: ['application/vnd.openxmlformats-officedocument.wordprocessingml.document'],
5375
+ /**
5376
+ * Link to documentation
5377
+ */
5378
+ documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
5379
+ /**
5380
+ * Convert the `.docx` or `.odt` to `.md` file and returns intermediate source
5381
+ *
5382
+ * Note: `$` is used to indicate that this function is not a pure function - it leaves files on the disk and you are responsible for cleaning them by calling `destroy` method of returned object
5383
+ */
5384
+ $convert: function (source, options) {
5385
+ return __awaiter(this, void 0, void 0, function () {
5386
+ var _a, externalProgramsPaths, rootDirname, _b, cacheDirname, _c, isCacheCleaned, _d, isVerbose, extension, cacheFilehandler, command_1;
5387
+ return __generator(this, function (_e) {
5388
+ switch (_e.label) {
5389
+ case 0:
5390
+ _a = options.externalProgramsPaths, externalProgramsPaths = _a === void 0 ? {} : _a, rootDirname = options.rootDirname, _b = options.cacheDirname, cacheDirname = _b === void 0 ? SCRAPE_CACHE_DIRNAME : _b, _c = options.isCacheCleaned, isCacheCleaned = _c === void 0 ? false : _c, _d = options.isVerbose, isVerbose = _d === void 0 ? IS_VERBOSE : _d;
5391
+ if (!$isRunningInNode()) {
5392
+ throw new KnowledgeScrapeError('Scraping .docx files is only supported in Node environment');
5393
+ }
5394
+ if (externalProgramsPaths.pandocPath === undefined) {
5395
+ throw new MissingToolsError('Pandoc is required for scraping .docx files');
5396
+ }
5397
+ if (source.filename === null) {
5398
+ // TODO: [🧠] Maybe save file as temporary
5399
+ throw new KnowledgeScrapeError('When parsing .docx file, it must be real file in the file system');
5400
+ }
5401
+ extension = getFileExtension(source.filename);
5402
+ return [4 /*yield*/, getScraperIntermediateSource(source, {
5403
+ rootDirname: rootDirname,
5404
+ cacheDirname: cacheDirname,
5405
+ isCacheCleaned: isCacheCleaned,
5406
+ extension: 'md',
5407
+ isVerbose: isVerbose,
5408
+ })];
5409
+ case 1:
5410
+ cacheFilehandler = _e.sent();
5411
+ return [4 /*yield*/, $isFileExisting(cacheFilehandler.filename)];
5412
+ case 2:
5413
+ if (!!(_e.sent())) return [3 /*break*/, 5];
5414
+ command_1 = "\"".concat(externalProgramsPaths.pandocPath, "\" -f ").concat(extension, " -t markdown \"").concat(source.filename, "\" -o \"").concat(cacheFilehandler.filename, "\"");
5415
+ // TODO: !!!!!! [🕊] Make execCommand standard (?node-)util of the promptbook
5416
+ return [4 /*yield*/, $execCommand(command_1)];
5417
+ case 3:
5418
+ // TODO: !!!!!! [🕊] Make execCommand standard (?node-)util of the promptbook
5419
+ _e.sent();
5420
+ return [4 /*yield*/, $isFileExisting(cacheFilehandler.filename)];
5421
+ case 4:
5422
+ // Note: [0]
5423
+ if (!(_e.sent())) {
5424
+ throw new UnexpectedError(spaceTrim(function (block) { return "\n File that was supposed to be created by Pandoc does not exist for unknown reason\n\n Expected file:\n ".concat(block(cacheFilehandler.filename), "\n\n Command:\n > ").concat(block(command_1), "\n\n "); }));
5425
+ }
5426
+ _e.label = 5;
5427
+ case 5: return [2 /*return*/, cacheFilehandler];
5428
+ }
5429
+ });
5430
+ });
5431
+ },
5432
+ /**
5433
+ * Scrapes the docx file and returns the knowledge pieces or `null` if it can't scrape it
5434
+ */
5435
+ scrape: function (source, options) {
5436
+ return __awaiter(this, void 0, void 0, function () {
5437
+ var cacheFilehandler, markdownSource, knowledge;
5438
+ return __generator(this, function (_a) {
5439
+ switch (_a.label) {
5440
+ case 0: return [4 /*yield*/, documentScraper.$convert(source, options)];
5441
+ case 1:
5442
+ cacheFilehandler = _a.sent();
5443
+ markdownSource = {
5444
+ source: source.source,
5445
+ filename: cacheFilehandler.filename,
5446
+ url: null,
5447
+ mimeType: 'text/markdown',
5448
+ asText: function () {
5449
+ return __awaiter(this, void 0, void 0, function () {
5450
+ return __generator(this, function (_a) {
5451
+ switch (_a.label) {
5452
+ case 0: return [4 /*yield*/, readFile(cacheFilehandler.filename, 'utf-8')];
5453
+ case 1:
5454
+ // Note: [0] In $convert we check that the file exists
5455
+ return [2 /*return*/, _a.sent()];
5456
+ }
5457
+ });
5458
+ });
5459
+ },
5460
+ asJson: function () {
5461
+ throw new UnexpectedError('Did not expect that `markdownScraper` would need to get the content `asJson`');
5462
+ },
5463
+ asBlob: function () {
5464
+ throw new UnexpectedError('Did not expect that `markdownScraper` would need to get the content `asBlob`');
5465
+ },
5466
+ };
5467
+ knowledge = markdownScraper.scrape(markdownSource, options);
5468
+ return [4 /*yield*/, cacheFilehandler.destroy()];
5469
+ case 2:
5470
+ _a.sent();
5471
+ return [2 /*return*/, knowledge];
5472
+ }
5473
+ });
5474
+ });
5475
+ },
5476
+ } /* TODO: [🦷] as const */;
5477
+ /**
5478
+ * TODO: [👣] Converted documents can act as cached items - there is no need to run conversion each time
5479
+ * TODO: [🦖] Make some system for putting scrapers to separete packages
5480
+ * TODO: [🪂] Do it in parallel 11:11
5481
+ * TODO: [🦷] Ideally use `as const satisfies Converter & Scraper` BUT this combination throws errors
5482
+ * Note: No need to aggregate usage here, it is done by intercepting the llmTools
5483
+ */
5484
+
5485
+ /**
5486
+ * Scraper for .docx files
5487
+ *
5488
+ * @see `documentationUrl` for more details
5489
+ * @public exported from `@promptbook/core`
5490
+ */
5491
+ var legacyDocumentScraper = {
5492
+ /**
5493
+ * Mime types that this scraper can handle
5494
+ */
5495
+ mimeTypes: ['application/msword', 'text/rtf'],
5496
+ /**
5497
+ * Link to documentation
5498
+ */
5499
+ documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
5500
+ /**
5501
+ * Convert the `.doc` or `.rtf` to `.doc` file and returns intermediate source
5502
+ *
5503
+ * Note: `$` is used to indicate that this function is not a pure function - it leaves files on the disk and you are responsible for cleaning them by calling `destroy` method of returned object
5504
+ */
5505
+ $convert: function (source, options) {
5506
+ return __awaiter(this, void 0, void 0, function () {
5507
+ var _a, externalProgramsPaths, rootDirname, _b, cacheDirname, _c, isCacheCleaned, _d, isVerbose, extension, cacheFilehandler, documentSourceOutdirPathForLibreOffice_1, command_1, files_1, file;
5508
+ return __generator(this, function (_e) {
5509
+ switch (_e.label) {
5510
+ case 0:
5511
+ _a = options.externalProgramsPaths, externalProgramsPaths = _a === void 0 ? {} : _a, rootDirname = options.rootDirname, _b = options.cacheDirname, cacheDirname = _b === void 0 ? SCRAPE_CACHE_DIRNAME : _b, _c = options.isCacheCleaned, isCacheCleaned = _c === void 0 ? false : _c, _d = options.isVerbose, isVerbose = _d === void 0 ? IS_VERBOSE : _d;
5512
+ if (!$isRunningInNode()) {
5513
+ throw new KnowledgeScrapeError('Scraping .doc files is only supported in Node environment');
5514
+ }
5515
+ if (externalProgramsPaths.libreOfficePath === undefined) {
5516
+ throw new MissingToolsError('LibreOffice is required for scraping .doc and .rtf files');
5517
+ }
5518
+ if (source.filename === null) {
5519
+ // TODO: [🧠] Maybe save file as temporary
5520
+ throw new KnowledgeScrapeError('When parsing .doc or .rtf file, it must be real file in the file system');
5521
+ }
5522
+ extension = getFileExtension(source.filename);
5523
+ return [4 /*yield*/, getScraperIntermediateSource(source, {
5524
+ rootDirname: rootDirname,
5525
+ cacheDirname: cacheDirname,
5526
+ isCacheCleaned: isCacheCleaned,
5527
+ extension: 'docx',
5528
+ isVerbose: isVerbose,
5529
+ })];
5530
+ case 1:
5531
+ cacheFilehandler = _e.sent();
5532
+ if (isVerbose) {
5533
+ console.info("documentScraper: Converting .".concat(extension, " -> .docx"));
5534
+ }
5535
+ return [4 /*yield*/, $isFileExisting(cacheFilehandler.filename)];
5536
+ case 2:
5537
+ if (!!(_e.sent())) return [3 /*break*/, 8];
5538
+ documentSourceOutdirPathForLibreOffice_1 = join(dirname(cacheFilehandler.filename), 'libreoffice')
5539
+ .split('\\')
5540
+ .join('/');
5541
+ command_1 = "\"".concat(externalProgramsPaths.libreOfficePath, "\" --headless --convert-to docx \"").concat(source.filename, "\" --outdir \"").concat(documentSourceOutdirPathForLibreOffice_1, "\"");
5542
+ // TODO: !!!!!! [🕊] Make execCommand standard (?node-)util of the promptbook - this should trigger build polution error
5543
+ return [4 /*yield*/, $execCommand(command_1)];
5544
+ case 3:
5545
+ // TODO: !!!!!! [🕊] Make execCommand standard (?node-)util of the promptbook - this should trigger build polution error
5546
+ _e.sent();
5547
+ return [4 /*yield*/, readdir(documentSourceOutdirPathForLibreOffice_1)];
5548
+ case 4:
5549
+ files_1 = _e.sent();
5550
+ if (files_1.length !== 1) {
5551
+ throw new UnexpectedError(spaceTrim(function (block) { return "\n Expected exactly 1 file in the LibreOffice output directory, got ".concat(files_1.length, "\n\n The temporary folder:\n ").concat(block(documentSourceOutdirPathForLibreOffice_1), "\n\n Command:\n > ").concat(block(command_1), "\n "); }));
5552
+ }
5553
+ file = files_1[0];
5554
+ return [4 /*yield*/, rename(join(documentSourceOutdirPathForLibreOffice_1, file), cacheFilehandler.filename)];
5555
+ case 5:
5556
+ _e.sent();
5557
+ return [4 /*yield*/, rmdir(documentSourceOutdirPathForLibreOffice_1)];
5558
+ case 6:
5559
+ _e.sent();
5560
+ return [4 /*yield*/, $isFileExisting(cacheFilehandler.filename)];
5561
+ case 7:
5562
+ if (!(_e.sent())) {
5563
+ throw new UnexpectedError(spaceTrim(function (block) { return "\n File that was supposed to be created by LibreOffice does not exist for unknown reason\n\n Expected file:\n ".concat(block(cacheFilehandler.filename), "\n\n The temporary folder:\n ").concat(block(documentSourceOutdirPathForLibreOffice_1), "\n\n Command:\n > ").concat(block(command_1), "\n\n "); }));
5564
+ }
5565
+ _e.label = 8;
5566
+ case 8: return [2 /*return*/, cacheFilehandler];
5567
+ }
5568
+ });
5569
+ });
5570
+ },
5571
+ /**
5572
+ * Scrapes the `.doc` or `.rtf` file and returns the knowledge pieces or `null` if it can't scrape it
5573
+ */
5574
+ scrape: function (source, options) {
5575
+ return __awaiter(this, void 0, void 0, function () {
5576
+ var cacheFilehandler, markdownSource, knowledge;
5577
+ return __generator(this, function (_a) {
5578
+ switch (_a.label) {
5579
+ case 0: return [4 /*yield*/, legacyDocumentScraper.$convert(source, options)];
5580
+ case 1:
5581
+ cacheFilehandler = _a.sent();
5582
+ markdownSource = {
5583
+ source: source.source,
5584
+ filename: cacheFilehandler.filename,
5585
+ url: null,
5586
+ mimeType: 'text/markdown',
5587
+ asText: function () {
5588
+ throw new UnexpectedError('Did not expect that `documentScraper` would need to get the content `asText`');
5589
+ },
5590
+ asJson: function () {
5591
+ throw new UnexpectedError('Did not expect that `documentScraper` would need to get the content `asJson`');
5592
+ },
5593
+ asBlob: function () {
5594
+ throw new UnexpectedError('Did not expect that `documentScraper` would need to get the content `asBlob`');
5595
+ },
5596
+ };
5597
+ knowledge = documentScraper.scrape(markdownSource, options);
5598
+ return [4 /*yield*/, cacheFilehandler.destroy()];
5599
+ case 2:
5600
+ _a.sent();
5601
+ return [2 /*return*/, knowledge];
5602
+ }
5603
+ });
5604
+ });
5605
+ },
5606
+ } /* TODO: [🦷] as const */;
5607
+ /**
5608
+ * TODO: [👣] Converted documents can act as cached items - there is no need to run conversion each time
5609
+ * TODO: [🦖] Make some system for putting scrapers to separete packages
5610
+ * TODO: [🪂] Do it in parallel 11:11
5611
+ * TODO: [🦷] Ideally use `as const satisfies Converter & Scraper` BUT this combination throws errors
5612
+ * Note: No need to aggregate usage here, it is done by intercepting the llmTools
5613
+ */
5614
+
5615
+ /**
5616
+ * Scraper for .docx files
5617
+ *
5618
+ * @see `documentationUrl` for more details
5619
+ * @public exported from `@promptbook/core`
5620
+ */
5621
+ var pdfScraper = {
5622
+ /**
5623
+ * Mime types that this scraper can handle
5624
+ */
5625
+ mimeTypes: ['application/pdf'],
5626
+ /**
5627
+ * Link to documentation
5628
+ */
5629
+ documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
5630
+ /**
5631
+ * Converts the `.pdf` file to `.md` file and returns intermediate source
5632
+ */
5633
+ $convert: function (source, options) {
5634
+ return __awaiter(this, void 0, void 0, function () {
5635
+ return __generator(this, function (_a) {
5636
+ TODO_USE(source);
5637
+ TODO_USE(options);
5638
+ throw new NotYetImplementedError('PDF conversion not yet implemented');
5639
+ });
5640
+ });
5641
+ },
5642
+ /**
5643
+ * Scrapes the `.pdf` file and returns the knowledge pieces or `null` if it can't scrape it
5644
+ */
5645
+ scrape: function (source, options) {
5646
+ return __awaiter(this, void 0, void 0, function () {
5647
+ return __generator(this, function (_a) {
5648
+ TODO_USE(source);
5649
+ TODO_USE(options);
5650
+ /*
5651
+ const {
5652
+ externalProgramsPaths = {},
5653
+ cacheDirname = SCRAPE_CACHE_DIRNAME,
5654
+ isCacheCleaned = false,
5655
+ isVerbose = IS_VERBOSE,
5656
+ } = options;
5657
+ */
5658
+ throw new NotYetImplementedError('PDF scraping not yet implemented');
5659
+ });
5660
+ });
5661
+ },
5662
+ } /* TODO: [🦷] as const */;
5663
+ /**
5664
+ * TODO: [👣] Converted documents can act as cached items - there is no need to run conversion each time
5665
+ * TODO: [🦖] Make some system for putting scrapers to separete packages
5666
+ * TODO: [🪂] Do it in parallel 11:11
5667
+ * TODO: [🦷] Ideally use `as const satisfies Converter & Scraper` BUT this combination throws errors
5668
+ * Note: No need to aggregate usage here, it is done by intercepting the llmTools
5669
+ */
5670
+
5671
+ /**
5672
+ * A converter instance that uses showdown and highlight extensions
5673
+ *
5674
+ * @type {Converter}
5675
+ * @private for markdown and html knowledge scrapers
5676
+ */
5677
+ var markdownConverter = new Converter({
5678
+ flavor: 'github', // <- TODO: !!!!!! Explicitly specify the flavor of promptbook markdown
5679
+ /*
5680
+ > import showdownHighlight from 'showdown-highlight';
5681
+ > extensions: [
5682
+ > showdownHighlight({
5683
+ > // Whether to add the classes to the <pre> tag, default is false
5684
+ > pre: true,
5685
+ > // Whether to use hljs' auto language detection, default is true
5686
+ > auto_detection: true,
5687
+ > }),
5688
+ > ],
5689
+ */
5690
+ });
5691
+ /**
5692
+ * TODO: !!!!!! Figure out better name not to confuse with `Converter`
5693
+ * TODO: !!!!!! Lazy-make converter
5694
+ */
5695
+
5696
+ /**
5697
+ * Scraper for .docx files
5698
+ *
5699
+ * @see `documentationUrl` for more details
5700
+ * @public exported from `@promptbook/core`
5701
+ */
5702
+ var websiteScraper = {
5703
+ /**
5704
+ * Mime types that this scraper can handle
5705
+ */
5706
+ mimeTypes: ['text/html'],
5707
+ /**
5708
+ * Link to documentation
5709
+ */
5710
+ documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
5711
+ /**
5712
+ * Convert the website to `.md` file and returns intermediate source
5713
+ *
5714
+ * Note: `$` is used to indicate that this function is not a pure function - it leaves files on the disk and you are responsible for cleaning them by calling `destroy` method of returned object
5715
+ */
5716
+ $convert: function (source, options) {
5717
+ return __awaiter(this, void 0, void 0, function () {
5718
+ var
5719
+ // TODO: [🧠] Maybe in node use headless browser not just JSDOM
5720
+ // externalProgramsPaths = {},
5721
+ rootDirname, _a, cacheDirname, _b, isCacheCleaned, _c, isVerbose, jsdom, _d, reader, article, html, i, cacheFilehandler, markdown;
5722
+ return __generator(this, function (_e) {
5723
+ switch (_e.label) {
5724
+ case 0:
5725
+ rootDirname = options.rootDirname, _a = options.cacheDirname, cacheDirname = _a === void 0 ? SCRAPE_CACHE_DIRNAME : _a, _b = options.isCacheCleaned, isCacheCleaned = _b === void 0 ? false : _b, _c = options.isVerbose, isVerbose = _c === void 0 ? IS_VERBOSE : _c;
5726
+ // TODO: !!!!!! Does this work in browser? Make it work.
5727
+ if (source.url === null) {
5728
+ throw new KnowledgeScrapeError('Website scraper requires URL');
5729
+ }
5730
+ _d = JSDOM.bind;
5731
+ return [4 /*yield*/, source.asText()];
5732
+ case 1:
5733
+ jsdom = new (_d.apply(JSDOM, [void 0, _e.sent(), {
5734
+ url: source.url,
5735
+ }]))();
5736
+ reader = new Readability(jsdom.window.document);
5737
+ article = reader.parse();
5738
+ console.log(article);
5739
+ return [4 /*yield*/, forTime(10000)];
5740
+ case 2:
5741
+ _e.sent();
5742
+ html = (article === null || article === void 0 ? void 0 : article.content) || (article === null || article === void 0 ? void 0 : article.textContent) || jsdom.window.document.body.innerHTML;
5743
+ // Note: Unwrap html such as it is convertable by `markdownConverter`
5744
+ for (i = 0; i < 2; i++) {
5745
+ html = html.replace(/<div\s*(?:id="readability-page-\d+"\s+class="page")?>(.*)<\/div>/is, '$1');
5746
+ }
5747
+ if (html.includes('<div')) {
5748
+ html = (article === null || article === void 0 ? void 0 : article.textContent) || '';
5749
+ }
5750
+ return [4 /*yield*/, getScraperIntermediateSource(source, {
5751
+ rootDirname: rootDirname,
5752
+ cacheDirname: cacheDirname,
5753
+ isCacheCleaned: isCacheCleaned,
5754
+ extension: 'html',
5755
+ isVerbose: isVerbose,
5756
+ })];
5757
+ case 3:
5758
+ cacheFilehandler = _e.sent();
5759
+ return [4 /*yield*/, writeFile(cacheFilehandler.filename, html, 'utf-8')];
5760
+ case 4:
5761
+ _e.sent();
5762
+ markdown = markdownConverter.makeMarkdown(html, jsdom.window.document);
5763
+ return [2 /*return*/, __assign(__assign({}, cacheFilehandler), { markdown: markdown })];
5764
+ }
5765
+ });
5766
+ });
5767
+ },
5768
+ /**
5769
+ * Scrapes the website and returns the knowledge pieces or `null` if it can't scrape it
5770
+ */
5771
+ scrape: function (source, options) {
5772
+ return __awaiter(this, void 0, void 0, function () {
5773
+ var cacheFilehandler, markdownSource, knowledge;
5774
+ return __generator(this, function (_a) {
5775
+ switch (_a.label) {
5776
+ case 0: return [4 /*yield*/, websiteScraper.$convert(source, options)];
5777
+ case 1:
5778
+ cacheFilehandler = _a.sent();
5779
+ markdownSource = {
5780
+ source: source.source,
5781
+ filename: cacheFilehandler.filename,
5782
+ url: null,
5783
+ mimeType: 'text/markdown',
5784
+ asText: function () {
5785
+ return cacheFilehandler.markdown;
5786
+ },
5787
+ asJson: function () {
5788
+ throw new UnexpectedError('Did not expect that `markdownScraper` would need to get the content `asJson`');
5789
+ },
5790
+ asBlob: function () {
5791
+ throw new UnexpectedError('Did not expect that `markdownScraper` would need to get the content `asBlob`');
5792
+ },
5793
+ };
5794
+ knowledge = markdownScraper.scrape(markdownSource, options);
5795
+ return [4 /*yield*/, cacheFilehandler.destroy()];
5796
+ case 2:
5797
+ _a.sent();
5798
+ return [2 /*return*/, knowledge];
5799
+ }
5800
+ });
5801
+ });
5802
+ },
5803
+ } /* TODO: [🦷] as const */;
4770
5804
  /**
4771
- * TODO: [🐚] Change onProgress to object that represents the running execution, can be subscribed via RxJS to and also awaited
5805
+ * TODO: !!!!!! Put into separate package
5806
+ * TODO: [👣] Scraped website in .md can act as cache item - there is no need to run conversion each time
5807
+ * TODO: [🦖] Make some system for putting scrapers to separete packages
5808
+ * TODO: [🪂] Do it in parallel 11:11
5809
+ * TODO: [🦷] Ideally use `as const satisfies Converter & Scraper` BUT this combination throws errors
5810
+ * Note: No need to aggregate usage here, it is done by intercepting the llmTools
4772
5811
  */
4773
5812
 
5813
+ // TODO: [🦖] !!!!!! Pass scrapers as dependency,
4774
5814
  /**
4775
- * Creates executor function from pipeline and execution tools.
5815
+ * @@@
4776
5816
  *
4777
- * @returns The executor function
4778
- * @throws {PipelineLogicError} on logical error in the pipeline
4779
5817
  * @public exported from `@promptbook/core`
4780
5818
  */
4781
- function createPipelineExecutor(options) {
4782
- var _this = this;
4783
- var pipeline = options.pipeline, tools = options.tools, _a = options.settings, settings = _a === void 0 ? {} : _a;
4784
- var _b = settings.maxExecutionAttempts, maxExecutionAttempts = _b === void 0 ? MAX_EXECUTION_ATTEMPTS : _b, _c = settings.maxParallelCount, maxParallelCount = _c === void 0 ? MAX_PARALLEL_COUNT : _c, _d = settings.csvSettings, csvSettings = _d === void 0 ? DEFAULT_CSV_SETTINGS : _d, _e = settings.isVerbose, isVerbose = _e === void 0 ? IS_VERBOSE : _e, _f = settings.isNotPreparedWarningSupressed, isNotPreparedWarningSupressed = _f === void 0 ? false : _f;
4785
- validatePipeline(pipeline);
4786
- var pipelineIdentification = (function () {
4787
- // Note: This is a 😐 implementation of [🚞]
4788
- var _ = [];
4789
- if (pipeline.sourceFile !== undefined) {
4790
- _.push("File: ".concat(pipeline.sourceFile));
4791
- }
4792
- if (pipeline.pipelineUrl !== undefined) {
4793
- _.push("Url: ".concat(pipeline.pipelineUrl));
4794
- }
4795
- return _.join('\n');
4796
- })();
4797
- var preparedPipeline;
4798
- if (isPipelinePrepared(pipeline)) {
4799
- preparedPipeline = pipeline;
4800
- }
4801
- else if (isNotPreparedWarningSupressed !== true) {
4802
- console.warn(spaceTrim$1(function (block) { return "\n Pipeline is not prepared\n\n ".concat(block(pipelineIdentification), "\n\n It will be prepared ad-hoc before the first execution and **returned as `preparedPipeline` in `PipelineExecutorResult`**\n But it is recommended to prepare the pipeline during collection preparation\n\n @see more at https://ptbk.io/prepare-pipeline\n "); }));
4803
- }
4804
- var runCount = 0;
4805
- var pipelineExecutor = function (inputParameters, onProgress) { return __awaiter(_this, void 0, void 0, function () {
4806
- return __generator(this, function (_a) {
4807
- runCount++;
4808
- return [2 /*return*/, /* not await */ executePipeline({
4809
- pipeline: pipeline,
4810
- preparedPipeline: preparedPipeline,
4811
- setPreparedPipeline: function (newPreparedPipeline) {
4812
- preparedPipeline = newPreparedPipeline;
4813
- },
4814
- inputParameters: inputParameters,
4815
- tools: tools,
4816
- onProgress: onProgress,
4817
- pipelineIdentification: spaceTrim$1(function (block) { return "\n ".concat(block(pipelineIdentification), "\n ").concat(runCount === 1 ? '' : "Run #".concat(runCount), "\n "); }),
4818
- settings: {
4819
- maxExecutionAttempts: maxExecutionAttempts,
4820
- maxParallelCount: maxParallelCount,
4821
- csvSettings: csvSettings,
4822
- isVerbose: isVerbose,
4823
- isNotPreparedWarningSupressed: isNotPreparedWarningSupressed,
4824
- },
4825
- })];
4826
- });
4827
- }); };
4828
- return pipelineExecutor;
5819
+ var SCRAPERS = [
5820
+ markdownScraper,
5821
+ documentScraper,
5822
+ legacyDocumentScraper,
5823
+ pdfScraper,
5824
+ websiteScraper,
5825
+ // <- Note: [♓️] This is the order of the scrapers for knowledge, BUT consider some better (more explicit) way to do this
5826
+ ];
5827
+
5828
+ /**
5829
+ * Creates unique name for the source
5830
+ *
5831
+ * @private within the repository
5832
+ */
5833
+ function sourceContentToName(sourceContent) {
5834
+ // TODO: !!!!!! Better name for source than gibberish hash
5835
+ var hash = SHA256(hexEncoder.parse(JSON.stringify(sourceContent)))
5836
+ // <- TODO: [🥬] Encapsulate sha256 to some private utility function
5837
+ .toString( /* hex */)
5838
+ .substring(0, 20);
5839
+ // <- TODO: [🥬] Make some system for hashes and ids of promptbook
5840
+ var semanticName = normalizeToKebabCase(sourceContent.substring(0, 20));
5841
+ var pieces = ['source', semanticName, hash].filter(function (piece) { return piece !== ''; });
5842
+ var name = pieces.join('-').split('--').join('-');
5843
+ // <- TODO: Use MAX_FILENAME_LENGTH
5844
+ return name;
4829
5845
  }
4830
5846
  /**
4831
- * TODO: [🐚] Change onProgress to object that represents the running execution, can be subscribed via RxJS to and also awaited
5847
+ * TODO: [🐱‍🐉][🧠] Make some smart crop NOT source-i-m-pavol-a-develop-... BUT source-i-m-pavol-a-developer-...
5848
+ */
5849
+
5850
+ /**
5851
+ * Convert file extension to mime type
5852
+ *
5853
+ * @private within the repository
4832
5854
  */
5855
+ function extensionToMimeType(value) {
5856
+ return lookup(value) || 'application/octet-stream';
5857
+ }
4833
5858
 
4834
5859
  /**
4835
5860
  * @@@
4836
5861
  *
4837
- * @public exported from `@promptbook/core`
5862
+ * @private for scraper utilities
4838
5863
  */
4839
- function prepareKnowledgeFromMarkdown(knowledgeContent /* <- TODO: [🖖] (?maybe not) Always the file */, options) {
5864
+ function makeKnowledgeSourceHandler(knowledgeSource, options) {
5865
+ var _a;
4840
5866
  return __awaiter(this, void 0, void 0, function () {
4841
- var llmTools, _a, maxParallelCount, _b, isVerbose, collection, prepareKnowledgeFromMarkdownExecutor, _c, prepareTitleExecutor, _d, prepareKeywordsExecutor, _e, result, outputParameters, knowledgePiecesRaw, knowledgeTextPieces, knowledge;
4842
- var _f, _g, _h;
4843
- var _this = this;
4844
- return __generator(this, function (_j) {
4845
- switch (_j.label) {
5867
+ var sourceContent, name, _b, _c, rootDirname, _d, isVerbose, url, response_1, mimeType, filename_1, fileExtension, mimeType_1;
5868
+ return __generator(this, function (_e) {
5869
+ switch (_e.label) {
4846
5870
  case 0:
4847
- llmTools = options.llmTools, _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? MAX_PARALLEL_COUNT : _a, _b = options.isVerbose, isVerbose = _b === void 0 ? IS_VERBOSE : _b;
4848
- TODO_USE(maxParallelCount); // <- [🪂]
4849
- collection = createCollectionFromJson.apply(void 0, __spreadArray([], __read(PipelineCollection), false));
4850
- _c = createPipelineExecutor;
4851
- _f = {};
4852
- return [4 /*yield*/, collection.getPipelineByUrl('https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.ptbk.md')];
5871
+ sourceContent = knowledgeSource.sourceContent;
5872
+ name = knowledgeSource.name;
5873
+ _b = options || {}, _c = _b.rootDirname, rootDirname = _c === void 0 ? null : _c, _d = _b.isVerbose, isVerbose = _d === void 0 ? IS_VERBOSE : _d;
5874
+ TODO_USE(isVerbose);
5875
+ if (!name) {
5876
+ name = sourceContentToName(sourceContent);
5877
+ }
5878
+ if (!isValidUrl(sourceContent)) return [3 /*break*/, 2];
5879
+ url = sourceContent;
5880
+ return [4 /*yield*/, fetch(url)];
4853
5881
  case 1:
4854
- prepareKnowledgeFromMarkdownExecutor = _c.apply(void 0, [(_f.pipeline = _j.sent(),
4855
- _f.tools = {
4856
- llm: llmTools,
5882
+ response_1 = _e.sent();
5883
+ mimeType = ((_a = response_1.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
5884
+ return [2 /*return*/, {
5885
+ source: name,
5886
+ filename: null,
5887
+ url: url,
5888
+ mimeType: mimeType,
5889
+ asBlob: function () {
5890
+ return __awaiter(this, void 0, void 0, function () {
5891
+ var content;
5892
+ return __generator(this, function (_a) {
5893
+ switch (_a.label) {
5894
+ case 0: return [4 /*yield*/, response_1.blob()];
5895
+ case 1:
5896
+ content = _a.sent();
5897
+ return [2 /*return*/, content];
5898
+ }
5899
+ });
5900
+ });
4857
5901
  },
4858
- _f)]);
4859
- _d = createPipelineExecutor;
4860
- _g = {};
4861
- return [4 /*yield*/, collection.getPipelineByUrl('https://promptbook.studio/promptbook/prepare-knowledge-title.ptbk.md')];
4862
- case 2:
4863
- prepareTitleExecutor = _d.apply(void 0, [(_g.pipeline = _j.sent(),
4864
- _g.tools = {
4865
- llm: llmTools,
5902
+ asJson: function () {
5903
+ return __awaiter(this, void 0, void 0, function () {
5904
+ var content;
5905
+ return __generator(this, function (_a) {
5906
+ switch (_a.label) {
5907
+ case 0: return [4 /*yield*/, response_1.json()];
5908
+ case 1:
5909
+ content = _a.sent();
5910
+ return [2 /*return*/, content];
5911
+ }
5912
+ });
5913
+ });
4866
5914
  },
4867
- _g)]);
4868
- _e = createPipelineExecutor;
4869
- _h = {};
4870
- return [4 /*yield*/, collection.getPipelineByUrl('https://promptbook.studio/promptbook/prepare-knowledge-keywords.ptbk.md')];
4871
- case 3:
4872
- prepareKeywordsExecutor = _e.apply(void 0, [(_h.pipeline = _j.sent(),
4873
- _h.tools = {
4874
- llm: llmTools,
5915
+ asText: function () {
5916
+ return __awaiter(this, void 0, void 0, function () {
5917
+ var content;
5918
+ return __generator(this, function (_a) {
5919
+ switch (_a.label) {
5920
+ case 0: return [4 /*yield*/, response_1.text()];
5921
+ case 1:
5922
+ content = _a.sent();
5923
+ return [2 /*return*/, content];
5924
+ }
5925
+ });
5926
+ });
4875
5927
  },
4876
- _h)]);
4877
- return [4 /*yield*/, prepareKnowledgeFromMarkdownExecutor({ knowledgeContent: knowledgeContent })];
4878
- case 4:
4879
- result = _j.sent();
4880
- assertsExecutionSuccessful(result);
4881
- outputParameters = result.outputParameters;
4882
- knowledgePiecesRaw = outputParameters.knowledgePieces;
4883
- knowledgeTextPieces = (knowledgePiecesRaw || '').split('\n---\n');
4884
- // <- TODO: [main] !!!!! Smarter split and filter out empty pieces
4885
- if (isVerbose) {
4886
- console.info('knowledgeTextPieces:', knowledgeTextPieces);
5928
+ }];
5929
+ case 2:
5930
+ if (!(isValidFilePath(sourceContent) || /\.[a-z]{1,10}$/i.exec(sourceContent))) return [3 /*break*/, 4];
5931
+ if (!$isRunningInNode()) {
5932
+ throw new EnvironmentMismatchError('Importing knowledge source file works only in Node.js environment');
4887
5933
  }
4888
- return [4 /*yield*/, Promise.all(
4889
- // TODO: [🪂] !! Do not send all at once but in chunks
4890
- knowledgeTextPieces.map(function (knowledgeTextPiece, i) { return __awaiter(_this, void 0, void 0, function () {
4891
- var name, title, knowledgePieceContent, keywords, index, titleResult, _a, titleRaw, keywordsResult, _b, keywordsRaw, embeddingResult, error_1;
4892
- return __generator(this, function (_c) {
4893
- switch (_c.label) {
4894
- case 0:
4895
- name = "piece-".concat(i);
4896
- title = spaceTrim(knowledgeTextPiece.substring(0, 100));
4897
- knowledgePieceContent = spaceTrim(knowledgeTextPiece);
4898
- keywords = [];
4899
- index = [];
4900
- _c.label = 1;
4901
- case 1:
4902
- _c.trys.push([1, 7, , 8]);
4903
- return [4 /*yield*/, prepareTitleExecutor({ knowledgePieceContent: knowledgePieceContent })];
4904
- case 2:
4905
- titleResult = _c.sent();
4906
- _a = titleResult.outputParameters.title, titleRaw = _a === void 0 ? 'Untitled' : _a;
4907
- title = spaceTrim(titleRaw) /* <- TODO: Maybe do in pipeline */;
4908
- name = titleToName(title);
4909
- return [4 /*yield*/, prepareKeywordsExecutor({ knowledgePieceContent: knowledgePieceContent })];
4910
- case 3:
4911
- keywordsResult = _c.sent();
4912
- _b = keywordsResult.outputParameters.keywords, keywordsRaw = _b === void 0 ? '' : _b;
4913
- keywords = (keywordsRaw || '')
4914
- .split(',')
4915
- .map(function (keyword) { return keyword.trim(); })
4916
- .filter(function (keyword) { return keyword !== ''; });
4917
- if (isVerbose) {
4918
- console.info("Keywords for \"".concat(title, "\":"), keywords);
5934
+ if (rootDirname === null) {
5935
+ throw new EnvironmentMismatchError('Can not import file knowledge in non-file pipeline');
5936
+ // <- TODO: [🧠] What is the best error type here`
5937
+ }
5938
+ filename_1 = join(rootDirname, sourceContent).split('\\').join('/');
5939
+ fileExtension = getFileExtension(filename_1);
5940
+ mimeType_1 = extensionToMimeType(fileExtension || '');
5941
+ return [4 /*yield*/, $isFileExisting(filename_1)];
5942
+ case 3:
5943
+ if (!(_e.sent())) {
5944
+ throw new NotFoundError(spaceTrim(function (block) { return "\n Can not make source handler for file which does not exist:\n\n File:\n ".concat(block(filename_1), "\n "); }));
5945
+ }
5946
+ // TODO: !!!!!! Test security file - file is scoped to the project (maybe do this in `filesystemTools`)
5947
+ return [2 /*return*/, {
5948
+ source: name,
5949
+ filename: filename_1,
5950
+ url: null,
5951
+ mimeType: mimeType_1,
5952
+ asBlob: function () {
5953
+ return __awaiter(this, void 0, void 0, function () {
5954
+ var content;
5955
+ return __generator(this, function (_a) {
5956
+ switch (_a.label) {
5957
+ case 0: return [4 /*yield*/, readFile(filename_1)];
5958
+ case 1:
5959
+ content = _a.sent();
5960
+ // <- Note: Its OK to use sync in tooling for tests
5961
+ return [2 /*return*/, new Blob([
5962
+ content,
5963
+ // <- TODO: !!!!!! Maybe not working
5964
+ ], { type: mimeType_1 })];
4919
5965
  }
4920
- if (!!llmTools.callEmbeddingModel) return [3 /*break*/, 4];
4921
- // TODO: [🟥] Detect browser / node and make it colorfull
4922
- console.error('No callEmbeddingModel function provided');
4923
- return [3 /*break*/, 6];
4924
- case 4: return [4 /*yield*/, llmTools.callEmbeddingModel({
4925
- title: "Embedding for ".concat(title) /* <- Note: No impact on embedding result itself, just for logging */,
4926
- parameters: {},
4927
- content: knowledgePieceContent,
4928
- modelRequirements: {
4929
- modelVariant: 'EMBEDDING',
4930
- },
4931
- })];
4932
- case 5:
4933
- embeddingResult = _c.sent();
4934
- index.push({
4935
- modelName: embeddingResult.modelName,
4936
- position: embeddingResult.content,
4937
- });
4938
- _c.label = 6;
4939
- case 6: return [3 /*break*/, 8];
4940
- case 7:
4941
- error_1 = _c.sent();
4942
- // Note: Here is expected error:
4943
- // > PipelineExecutionError: You have not provided any `LlmExecutionTools` that support model variant "EMBEDDING
4944
- if (!(error_1 instanceof PipelineExecutionError)) {
4945
- throw error_1;
5966
+ });
5967
+ });
5968
+ },
5969
+ asJson: function () {
5970
+ return __awaiter(this, void 0, void 0, function () {
5971
+ var _a, _b;
5972
+ return __generator(this, function (_c) {
5973
+ switch (_c.label) {
5974
+ case 0:
5975
+ _b = (_a = JSON).parse;
5976
+ return [4 /*yield*/, readFile(filename_1, 'utf-8')];
5977
+ case 1: return [2 /*return*/, _b.apply(_a, [_c.sent()])];
4946
5978
  }
4947
- // TODO: [🟥] Detect browser / node and make it colorfull
4948
- console.error(error_1, "<- Note: This error is not critical to prepare the pipeline, just knowledge pieces won't have embeddings");
4949
- return [3 /*break*/, 8];
4950
- case 8: return [2 /*return*/, {
4951
- name: name,
4952
- title: title,
4953
- content: knowledgePieceContent,
4954
- keywords: keywords,
4955
- index: index,
4956
- // <- TODO: [☀] sources,
4957
- }];
4958
- }
4959
- });
4960
- }); }))];
4961
- case 5:
4962
- knowledge = _j.sent();
4963
- return [2 /*return*/, knowledge];
5979
+ });
5980
+ });
5981
+ },
5982
+ asText: function () {
5983
+ return __awaiter(this, void 0, void 0, function () {
5984
+ return __generator(this, function (_a) {
5985
+ switch (_a.label) {
5986
+ case 0: return [4 /*yield*/, readFile(filename_1, 'utf-8')];
5987
+ case 1: return [2 /*return*/, _a.sent()];
5988
+ }
5989
+ });
5990
+ });
5991
+ },
5992
+ }];
5993
+ case 4: return [2 /*return*/, {
5994
+ source: name,
5995
+ filename: null,
5996
+ url: null,
5997
+ mimeType: 'text/markdown',
5998
+ asText: function () {
5999
+ return knowledgeSource.sourceContent;
6000
+ },
6001
+ asJson: function () {
6002
+ throw new UnexpectedError('Did not expect that `markdownScraper` would need to get the content `asJson`');
6003
+ },
6004
+ asBlob: function () {
6005
+ throw new UnexpectedError('Did not expect that `markdownScraper` would need to get the content `asBlob`');
6006
+ },
6007
+ }];
4964
6008
  }
4965
6009
  });
4966
6010
  });
4967
6011
  }
4968
- /**
4969
- * TODO: [🐝][🔼][main] !!! Export via `@promptbook/markdown`
4970
- * TODO: [🪂] Do it in parallel 11:11
4971
- * Note: No need to aggregate usage here, it is done by intercepting the llmTools
4972
- */
4973
6012
 
4974
6013
  /**
4975
6014
  * Prepares the knowle
@@ -4979,21 +6018,64 @@ function prepareKnowledgeFromMarkdown(knowledgeContent /* <- TODO: [🖖] (?mayb
4979
6018
  */
4980
6019
  function prepareKnowledgePieces(knowledgeSources, options) {
4981
6020
  return __awaiter(this, void 0, void 0, function () {
4982
- var _a, maxParallelCount, knowledgePrepared;
6021
+ var _a, maxParallelCount, rootDirname, _b, isVerbose, knowledgePreparedUnflatten, knowledgePrepared;
4983
6022
  var _this = this;
4984
- return __generator(this, function (_b) {
4985
- switch (_b.label) {
6023
+ return __generator(this, function (_c) {
6024
+ switch (_c.label) {
4986
6025
  case 0:
4987
- _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? MAX_PARALLEL_COUNT : _a;
4988
- knowledgePrepared = [];
4989
- return [4 /*yield*/, forEachAsync(knowledgeSources, { maxParallelCount: maxParallelCount }, function (knowledgeSource) { return __awaiter(_this, void 0, void 0, function () {
4990
- var partialPieces, pieces;
4991
- return __generator(this, function (_a) {
4992
- switch (_a.label) {
4993
- case 0: return [4 /*yield*/, prepareKnowledgeFromMarkdown(knowledgeSource.sourceContent, // <- TODO: [🐝][main] !!! Unhardcode markdown, detect which type it is - BE AWARE of big package size
4994
- options)];
6026
+ _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? MAX_PARALLEL_COUNT : _a, rootDirname = options.rootDirname, _b = options.isVerbose, isVerbose = _b === void 0 ? IS_VERBOSE : _b;
6027
+ knowledgePreparedUnflatten = new Array(knowledgeSources.length);
6028
+ return [4 /*yield*/, forEachAsync(knowledgeSources, { maxParallelCount: maxParallelCount }, function (knowledgeSource, index) { return __awaiter(_this, void 0, void 0, function () {
6029
+ var partialPieces, sourceHandler, SCRAPERS_1, SCRAPERS_1_1, scraper, partialPiecesUnchecked, e_1_1, pieces;
6030
+ var e_1, _a;
6031
+ return __generator(this, function (_b) {
6032
+ switch (_b.label) {
6033
+ case 0:
6034
+ partialPieces = null;
6035
+ return [4 /*yield*/, makeKnowledgeSourceHandler(knowledgeSource, { rootDirname: rootDirname, isVerbose: isVerbose })];
4995
6036
  case 1:
4996
- partialPieces = _a.sent();
6037
+ sourceHandler = _b.sent();
6038
+ _b.label = 2;
6039
+ case 2:
6040
+ _b.trys.push([2, 7, 8, 9]);
6041
+ SCRAPERS_1 = __values(SCRAPERS), SCRAPERS_1_1 = SCRAPERS_1.next();
6042
+ _b.label = 3;
6043
+ case 3:
6044
+ if (!!SCRAPERS_1_1.done) return [3 /*break*/, 6];
6045
+ scraper = SCRAPERS_1_1.value;
6046
+ if (!scraper.mimeTypes.includes(sourceHandler.mimeType)
6047
+ // <- TODO: [🦔] Implement mime-type wildcards
6048
+ ) {
6049
+ return [3 /*break*/, 5];
6050
+ }
6051
+ return [4 /*yield*/, scraper.scrape(sourceHandler, options)];
6052
+ case 4:
6053
+ partialPiecesUnchecked = _b.sent();
6054
+ if (partialPiecesUnchecked !== null) {
6055
+ partialPieces = partialPiecesUnchecked;
6056
+ return [3 /*break*/, 6];
6057
+ }
6058
+ _b.label = 5;
6059
+ case 5:
6060
+ SCRAPERS_1_1 = SCRAPERS_1.next();
6061
+ return [3 /*break*/, 3];
6062
+ case 6: return [3 /*break*/, 9];
6063
+ case 7:
6064
+ e_1_1 = _b.sent();
6065
+ e_1 = { error: e_1_1 };
6066
+ return [3 /*break*/, 9];
6067
+ case 8:
6068
+ try {
6069
+ if (SCRAPERS_1_1 && !SCRAPERS_1_1.done && (_a = SCRAPERS_1.return)) _a.call(SCRAPERS_1);
6070
+ }
6071
+ finally { if (e_1) throw e_1.error; }
6072
+ return [7 /*endfinally*/];
6073
+ case 9:
6074
+ if (partialPieces === null) {
6075
+ throw new KnowledgeScrapeError(spaceTrim(function (block) { return "\n Cannot scrape knowledge from source: ".concat(knowledgeSource.sourceContent, "\n\n No scraper found for the mime type \"").concat(sourceHandler.mimeType, "\"\n\n Available scrapers:\n ").concat(block(SCRAPERS.flatMap(function (scraper) { return scraper.mimeTypes; })
6076
+ .map(function (mimeType) { return "- ".concat(mimeType); })
6077
+ .join('\n')), "\n\n\n "); }));
6078
+ }
4997
6079
  pieces = partialPieces.map(function (partialPiece) { return (__assign(__assign({}, partialPiece), { sources: [
4998
6080
  {
4999
6081
  name: knowledgeSource.name,
@@ -5001,13 +6083,14 @@ function prepareKnowledgePieces(knowledgeSources, options) {
5001
6083
  // <- TODO: [❎]
5002
6084
  },
5003
6085
  ] })); });
5004
- knowledgePrepared.push.apply(knowledgePrepared, __spreadArray([], __read(pieces), false));
6086
+ knowledgePreparedUnflatten[index] = pieces;
5005
6087
  return [2 /*return*/];
5006
6088
  }
5007
6089
  });
5008
6090
  }); })];
5009
6091
  case 1:
5010
- _b.sent();
6092
+ _c.sent();
6093
+ knowledgePrepared = knowledgePreparedUnflatten.flat();
5011
6094
  return [2 /*return*/, knowledgePrepared];
5012
6095
  }
5013
6096
  });
@@ -5024,7 +6107,7 @@ TODO: [🧊] This is how it can look in future
5024
6107
  >
5025
6108
  > export async function prepareKnowledgePieces(
5026
6109
  > knowledge: PrepareKnowledgeKnowledge,
5027
- > options: PrepareOptions,
6110
+ > options: PrepareAndScrapeOptions,
5028
6111
  > ):
5029
6112
  */
5030
6113
  /**
@@ -5137,6 +6220,9 @@ function preparePersona(personaDescription, options) {
5137
6220
  switch (_d.label) {
5138
6221
  case 0:
5139
6222
  llmTools = options.llmTools, _a = options.isVerbose, isVerbose = _a === void 0 ? IS_VERBOSE : _a;
6223
+ if (llmTools === undefined) {
6224
+ throw new MissingToolsError('LLM tools are required for preparing persona');
6225
+ }
5140
6226
  collection = createCollectionFromJson.apply(void 0, __spreadArray([], __read(PipelineCollection), false));
5141
6227
  _b = createPipelineExecutor;
5142
6228
  _c = {};
@@ -5280,7 +6366,7 @@ function prepareTemplates(pipeline, options) {
5280
6366
  */
5281
6367
  function preparePipeline(pipeline, options) {
5282
6368
  return __awaiter(this, void 0, void 0, function () {
5283
- var llmTools, _a, maxParallelCount, _b, isVerbose, parameters, templates,
6369
+ var llmTools, rootDirname, _a, maxParallelCount, _b, isVerbose, parameters, templates,
5284
6370
  /*
5285
6371
  <- TODO: [🧠][🪑] `promptbookVersion` */
5286
6372
  knowledgeSources /*
@@ -5293,8 +6379,11 @@ function preparePipeline(pipeline, options) {
5293
6379
  if (isPipelinePrepared(pipeline)) {
5294
6380
  return [2 /*return*/, pipeline];
5295
6381
  }
5296
- llmTools = options.llmTools, _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? MAX_PARALLEL_COUNT : _a, _b = options.isVerbose, isVerbose = _b === void 0 ? IS_VERBOSE : _b;
6382
+ llmTools = options.llmTools, rootDirname = options.rootDirname, _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? MAX_PARALLEL_COUNT : _a, _b = options.isVerbose, isVerbose = _b === void 0 ? IS_VERBOSE : _b;
5297
6383
  parameters = pipeline.parameters, templates = pipeline.templates, knowledgeSources = pipeline.knowledgeSources, personas = pipeline.personas;
6384
+ if (llmTools === undefined) {
6385
+ throw new MissingToolsError('LLM tools are required for preparing the pipeline');
6386
+ }
5298
6387
  llmToolsWithUsage = countTotalUsage(llmTools);
5299
6388
  currentPreparation = {
5300
6389
  id: 1,
@@ -5316,6 +6405,7 @@ function preparePipeline(pipeline, options) {
5316
6405
  switch (_a.label) {
5317
6406
  case 0: return [4 /*yield*/, preparePersona(persona.description, {
5318
6407
  llmTools: llmToolsWithUsage,
6408
+ rootDirname: rootDirname,
5319
6409
  maxParallelCount: maxParallelCount /* <- TODO: [🪂] */,
5320
6410
  isVerbose: isVerbose,
5321
6411
  })];
@@ -5330,11 +6420,7 @@ function preparePipeline(pipeline, options) {
5330
6420
  case 1:
5331
6421
  _c.sent();
5332
6422
  knowledgeSourcesPrepared = knowledgeSources.map(function (source) { return (__assign(__assign({}, source), { preparationIds: [/* TODO: [🧊] -> */ currentPreparation.id] })); });
5333
- return [4 /*yield*/, prepareKnowledgePieces(knowledgeSources /* <- TODO: [🧊] {knowledgeSources, knowledgePieces} */, {
5334
- llmTools: llmToolsWithUsage,
5335
- maxParallelCount: maxParallelCount /* <- TODO: [🪂] */,
5336
- isVerbose: isVerbose,
5337
- })];
6423
+ return [4 /*yield*/, prepareKnowledgePieces(knowledgeSources /* <- TODO: [🧊] {knowledgeSources, knowledgePieces} */, __assign(__assign({}, options), { llmTools: llmToolsWithUsage, rootDirname: rootDirname, maxParallelCount: maxParallelCount /* <- TODO: [🪂] */, isVerbose: isVerbose }))];
5338
6424
  case 2:
5339
6425
  partialknowledgePiecesPrepared = _c.sent();
5340
6426
  knowledgePiecesPrepared = partialknowledgePiecesPrepared.map(function (piece) { return (__assign(__assign({}, piece), { preparationIds: [/* TODO: [🧊] -> */ currentPreparation.id] })); });
@@ -5344,6 +6430,7 @@ function preparePipeline(pipeline, options) {
5344
6430
  knowledgePiecesCount: knowledgePiecesPrepared.length,
5345
6431
  }, {
5346
6432
  llmTools: llmToolsWithUsage,
6433
+ rootDirname: rootDirname,
5347
6434
  maxParallelCount: maxParallelCount /* <- TODO: [🪂] */,
5348
6435
  isVerbose: isVerbose,
5349
6436
  })];
@@ -5366,36 +6453,10 @@ function preparePipeline(pipeline, options) {
5366
6453
  * @see https://docs.anthropic.com/en/docs/test-and-evaluate/strengthen-guardrails/increase-consistency#specify-the-desired-output-format
5367
6454
  */
5368
6455
 
5369
- /**
5370
- * Tests if given string is valid URL.
5371
- *
5372
- * Note: This does not check if the file exists only if the path is valid
5373
- * @public exported from `@promptbook/utils`
5374
- */
5375
- function isValidFilePath(filePath) {
5376
- if (typeof filePath !== 'string') {
5377
- return false;
5378
- }
5379
- var filePathSlashes = filePath.split('\\').join('/');
5380
- // Absolute Unix path: /hello.txt
5381
- if (/^(\/)/i.test(filePathSlashes)) {
5382
- return true;
5383
- }
5384
- // Absolute Windows path: /hello.txt
5385
- if (/^([A-Z]{1,2}:\/?)\//i.test(filePathSlashes)) {
5386
- return true;
5387
- }
5388
- // Relative path: ./hello.txt
5389
- if (/^(\.\.?\/)+/i.test(filePathSlashes)) {
5390
- return true;
5391
- }
5392
- return false;
5393
- }
5394
-
5395
6456
  /**
5396
6457
  * Parses the knowledge command
5397
6458
  *
5398
- * @see ./KNOWLEDGE-README.md for more details
6459
+ * @see `documentationUrl` for more details
5399
6460
  * @private within the commands folder
5400
6461
  */
5401
6462
  var knowledgeCommandParser = {
@@ -5413,7 +6474,7 @@ var knowledgeCommandParser = {
5413
6474
  */
5414
6475
  description: "Tells promptbook which external knowledge to use",
5415
6476
  /**
5416
- * Link to discussion
6477
+ * Link to documentation
5417
6478
  */
5418
6479
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/41',
5419
6480
  /**
@@ -5457,11 +6518,8 @@ var knowledgeCommandParser = {
5457
6518
  */
5458
6519
  $applyToPipelineJson: function (command, $pipelineJson) {
5459
6520
  var sourceContent = command.sourceContent;
5460
- var name = 'source-' + sha256(hexEncoder.parse(JSON.stringify(sourceContent))).toString( /* hex */);
5461
- // <- TODO: [🥬] Encapsulate sha256 to some private utility function
5462
- // <- TODO: This should be replaced with a better name later in preparation (done with some propper LLM summarization)
5463
6521
  $pipelineJson.knowledgeSources.push({
5464
- name: name,
6522
+ name: sourceContentToName(sourceContent),
5465
6523
  sourceContent: sourceContent,
5466
6524
  });
5467
6525
  },
@@ -5491,7 +6549,7 @@ var knowledgeCommandParser = {
5491
6549
  /**
5492
6550
  * Parses the template command
5493
6551
  *
5494
- * @see ./TEMPLATE-README.md for more details
6552
+ * @see `documentationUrl` for more details
5495
6553
  * @private within the commands folder
5496
6554
  */
5497
6555
  var templateCommandParser = {
@@ -5527,7 +6585,7 @@ var templateCommandParser = {
5527
6585
  */
5528
6586
  description: "What should the code template template do",
5529
6587
  /**
5530
- * Link to discussion
6588
+ * Link to documentation
5531
6589
  */
5532
6590
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/64',
5533
6591
  /**
@@ -5752,7 +6810,7 @@ function parseNumber(value) {
5752
6810
  /**
5753
6811
  * Parses the expect command
5754
6812
  *
5755
- * @see ./EXPECT-README.md for more details
6813
+ * @see `documentationUrl` for more details
5756
6814
  * @private within the commands folder
5757
6815
  */
5758
6816
  var expectCommandParser = {
@@ -5770,7 +6828,7 @@ var expectCommandParser = {
5770
6828
  */
5771
6829
  description: spaceTrim("\n Expect command describes the desired output of the template (after post-processing)\n It can set limits for the maximum/minimum length of the output, measured in characters, words, sentences, paragraphs or some other shape of the output.\n "),
5772
6830
  /**
5773
- * Link to discussion
6831
+ * Link to documentation
5774
6832
  */
5775
6833
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/30',
5776
6834
  /**
@@ -5930,10 +6988,6 @@ function normalizeTo_SCREAMING_CASE(text) {
5930
6988
  charType = 'NUMBER';
5931
6989
  normalizedChar = char;
5932
6990
  }
5933
- else if (/^\/$/.test(char)) {
5934
- charType = 'SLASH';
5935
- normalizedChar = char;
5936
- }
5937
6991
  else {
5938
6992
  charType = 'OTHER';
5939
6993
  normalizedChar = '_';
@@ -6140,7 +7194,7 @@ function validateParameterName(parameterName) {
6140
7194
  *
6141
7195
  * Note: @@@ This command is used as foreach for new commands - it should NOT be used in any `.ptbk.md` file
6142
7196
  *
6143
- * @see ./FOREACH-README.md for more details <- TODO: @@@ Write theese README files OR remove this link + add annotation here (to all commands)
7197
+ * @see `documentationUrl` for more details
6144
7198
  * @private within the commands folder
6145
7199
  */
6146
7200
  var foreachCommandParser = {
@@ -6162,7 +7216,7 @@ var foreachCommandParser = {
6162
7216
  */
6163
7217
  description: "@@",
6164
7218
  /**
6165
- * Link to discussion
7219
+ * Link to documentation
6166
7220
  */
6167
7221
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/148',
6168
7222
  /**
@@ -6297,7 +7351,7 @@ var foreachCommandParser = {
6297
7351
  /**
6298
7352
  * Parses the format command
6299
7353
  *
6300
- * @see ./FORMAT-README.md for more details
7354
+ * @see `documentationUrl` for more details
6301
7355
  * @private within the commands folder
6302
7356
  */
6303
7357
  var formatCommandParser = {
@@ -6315,7 +7369,7 @@ var formatCommandParser = {
6315
7369
  */
6316
7370
  description: spaceTrim("\n Format command describes the desired output of the template (after post-processing)\n It can set limits for the maximum/minimum length of the output, measured in characters, words, sentences, paragraphs or some other shape of the output.\n "),
6317
7371
  /**
6318
- * Link to discussion
7372
+ * Link to documentation
6319
7373
  */
6320
7374
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/30',
6321
7375
  /**
@@ -6371,7 +7425,7 @@ var formatCommandParser = {
6371
7425
  /**
6372
7426
  * Parses the joker command
6373
7427
  *
6374
- * @see ./JOKER-README.md for more details
7428
+ * @see `documentationUrl` for more details
6375
7429
  * @private within the commands folder
6376
7430
  */
6377
7431
  var jokerCommandParser = {
@@ -6389,7 +7443,7 @@ var jokerCommandParser = {
6389
7443
  */
6390
7444
  description: "Joker parameter is used instead of executing the template result if jokers value meets the expectations requirements",
6391
7445
  /**
6392
- * Link to discussion
7446
+ * Link to documentation
6393
7447
  */
6394
7448
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/66',
6395
7449
  /**
@@ -6450,7 +7504,7 @@ var MODEL_VARIANTS = ['COMPLETION', 'CHAT', 'EMBEDDING' /* <- TODO [🏳] */ /*
6450
7504
  /**
6451
7505
  * Parses the model command
6452
7506
  *
6453
- * @see ./MODEL-README.md for more details
7507
+ * @see `documentationUrl` for more details
6454
7508
  * @private within the commands folder
6455
7509
  */
6456
7510
  var modelCommandParser = {
@@ -6468,7 +7522,7 @@ var modelCommandParser = {
6468
7522
  */
6469
7523
  description: "Tells which `modelRequirements` (for example which model) to use for the prompt template execution",
6470
7524
  /**
6471
- * Link to discussion
7525
+ * Link to documentation
6472
7526
  */
6473
7527
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/67',
6474
7528
  /**
@@ -6550,7 +7604,11 @@ var modelCommandParser = {
6550
7604
  // TODO: [🚜] DRY
6551
7605
  if ($templateJson.modelRequirements[command.key] !== undefined) {
6552
7606
  if ($templateJson.modelRequirements[command.key] === command.value) {
6553
- console.warn("Multiple commands `MODEL ".concat(command.key, " ").concat(command.value, "` in the template \"").concat($templateJson.title || $templateJson.name, "\""));
7607
+ console.warn("Multiple commands `MODEL ".concat({
7608
+ modelName: 'NAME',
7609
+ modelVariant: 'VARIANT',
7610
+ maxTokens: '???',
7611
+ }[command.key], " ").concat(command.value, "` in the template \"").concat($templateJson.title || $templateJson.name, "\""));
6554
7612
  }
6555
7613
  else {
6556
7614
  throw new ParseError(spaceTrim("\n Redefinition of MODEL `".concat(command.key, "` in the template \"").concat($templateJson.title || $templateJson.name, "\"\n\n You have used:\n - MODEL ").concat(command.key, " ").concat($templateJson.modelRequirements[command.key], "\n - MODEL ").concat(command.key, " ").concat(command.value, "\n ")));
@@ -6593,7 +7651,7 @@ var modelCommandParser = {
6593
7651
  /**
6594
7652
  * Parses the parameter command
6595
7653
  *
6596
- * @see ./PARAMETER-README.md for more details
7654
+ * @see `documentationUrl` for more details
6597
7655
  * @private within the commands folder
6598
7656
  */
6599
7657
  var parameterCommandParser = {
@@ -6618,7 +7676,7 @@ var parameterCommandParser = {
6618
7676
  */
6619
7677
  description: "Describes one parameter of the template",
6620
7678
  /**
6621
- * Link to discussion
7679
+ * Link to documentation
6622
7680
  */
6623
7681
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/68',
6624
7682
  /**
@@ -6703,7 +7761,7 @@ var parameterCommandParser = {
6703
7761
  /**
6704
7762
  * Parses the persona command
6705
7763
  *
6706
- * @see ./PERSONA-README.md for more details
7764
+ * @see `documentationUrl` for more details
6707
7765
  * @private within the commands folder
6708
7766
  */
6709
7767
  var personaCommandParser = {
@@ -6725,7 +7783,7 @@ var personaCommandParser = {
6725
7783
  */
6726
7784
  description: "Persona command is used to specify who the system is, it will be transformed into system message, top_t,...",
6727
7785
  /**
6728
- * Link to discussion
7786
+ * Link to documentation
6729
7787
  */
6730
7788
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/22',
6731
7789
  /**
@@ -6841,7 +7899,7 @@ function isValidJavascriptName(javascriptName) {
6841
7899
  /**
6842
7900
  * Parses the postprocess command
6843
7901
  *
6844
- * @see ./POSTPROCESS-README.md for more details
7902
+ * @see `documentationUrl` for more details
6845
7903
  * @private within the commands folder
6846
7904
  */
6847
7905
  var postprocessCommandParser = {
@@ -6860,7 +7918,7 @@ var postprocessCommandParser = {
6860
7918
  */
6861
7919
  description: "Defines the postprocess function to be used on the result from LLM and before the result is validated",
6862
7920
  /**
6863
- * Link to discussion
7921
+ * Link to documentation
6864
7922
  */
6865
7923
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/31',
6866
7924
  /**
@@ -6921,7 +7979,7 @@ var postprocessCommandParser = {
6921
7979
  /**
6922
7980
  * Parses the PROMPTBOOK_VERSION command
6923
7981
  *
6924
- * @see ./PROMPTBOOK_VERSION-README.md for more details
7982
+ * @see `documentationUrl` for more details
6925
7983
  * @private within the commands folder
6926
7984
  */
6927
7985
  var promptbookVersionCommandParser = {
@@ -6940,7 +7998,7 @@ var promptbookVersionCommandParser = {
6940
7998
  */
6941
7999
  description: "Which version of the promptbook is the .ptbk.md using",
6942
8000
  /**
6943
- * Link to discussion
8001
+ * Link to documentation
6944
8002
  */
6945
8003
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/69',
6946
8004
  /**
@@ -6999,7 +8057,7 @@ var promptbookVersionCommandParser = {
6999
8057
  /**
7000
8058
  * Parses the url command
7001
8059
  *
7002
- * @see ./URL-README.md for more details
8060
+ * @see `documentationUrl` for more details
7003
8061
  * @private within the commands folder
7004
8062
  */
7005
8063
  var urlCommandParser = {
@@ -7022,7 +8080,7 @@ var urlCommandParser = {
7022
8080
  */
7023
8081
  description: "Declares unique URL for the pipeline",
7024
8082
  /**
7025
- * Link to discussion
8083
+ * Link to documentation
7026
8084
  */
7027
8085
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/70',
7028
8086
  /**
@@ -7102,7 +8160,7 @@ var urlCommandParser = {
7102
8160
  /**
7103
8161
  * Parses the action command
7104
8162
  *
7105
- * @see ./ACTION-README.md for more details
8163
+ * @see `documentationUrl` for more details
7106
8164
  * @private within the commands folder
7107
8165
  */
7108
8166
  var actionCommandParser = {
@@ -7120,7 +8178,7 @@ var actionCommandParser = {
7120
8178
  */
7121
8179
  description: "Actions influences from the pipeline or template into external world. Like turning on a light, sending an email, etc.",
7122
8180
  /**
7123
- * Link to discussion
8181
+ * Link to documentation
7124
8182
  */
7125
8183
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/72',
7126
8184
  /**
@@ -7172,7 +8230,7 @@ var actionCommandParser = {
7172
8230
  /**
7173
8231
  * Parses the instrument command
7174
8232
  *
7175
- * @see ./INSTRUMENT-README.md for more details
8233
+ * @see `documentationUrl` for more details
7176
8234
  * @private within the commands folder
7177
8235
  */
7178
8236
  var instrumentCommandParser = {
@@ -7190,7 +8248,7 @@ var instrumentCommandParser = {
7190
8248
  */
7191
8249
  description: "Instrument command is used to specify the instrument to be used in the pipeline or template like search, calculate, etc.",
7192
8250
  /**
7193
- * Link to discussion
8251
+ * Link to documentation
7194
8252
  */
7195
8253
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/71',
7196
8254
  /**
@@ -7245,7 +8303,7 @@ var instrumentCommandParser = {
7245
8303
  *
7246
8304
  * Note: @@@ This command is used as boilerplate for new commands - it should NOT be used in any `.ptbk.md` file
7247
8305
  *
7248
- * @see ./BOILERPLATE-README.md for more details <- TODO: @@@ Write theese README files OR remove this link + add annotation here (to all commands)
8306
+ * @see `documentationUrl` for more details
7249
8307
  * @private within the commands folder
7250
8308
  */
7251
8309
  var boilerplateCommandParser = {
@@ -7267,7 +8325,7 @@ var boilerplateCommandParser = {
7267
8325
  */
7268
8326
  description: "@@",
7269
8327
  /**
7270
- * Link to discussion
8328
+ * Link to documentation
7271
8329
  */
7272
8330
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
7273
8331
  /**
@@ -8117,16 +9175,17 @@ function pipelineStringToJsonSync(pipelineString) {
8117
9175
  * @public exported from `@promptbook/core`
8118
9176
  */
8119
9177
  function pipelineStringToJson(pipelineString, options) {
8120
- if (options === void 0) { options = { llmTools: null }; }
8121
9178
  return __awaiter(this, void 0, void 0, function () {
8122
9179
  var llmTools, pipelineJson;
8123
9180
  return __generator(this, function (_a) {
8124
9181
  switch (_a.label) {
8125
9182
  case 0:
8126
- llmTools = options.llmTools;
9183
+ llmTools = (options || {}).llmTools;
8127
9184
  pipelineJson = pipelineStringToJsonSync(pipelineString);
8128
- if (!(llmTools !== null)) return [3 /*break*/, 2];
8129
- return [4 /*yield*/, preparePipeline(pipelineJson, { llmTools: llmTools })];
9185
+ if (!(llmTools !== undefined)) return [3 /*break*/, 2];
9186
+ return [4 /*yield*/, preparePipeline(pipelineJson, options || {
9187
+ rootDirname: null,
9188
+ })];
8130
9189
  case 1:
8131
9190
  pipelineJson = _a.sent();
8132
9191
  _a.label = 2;
@@ -8334,6 +9393,38 @@ function stringifyPipelineJson(pipeline) {
8334
9393
  * TODO: [🍙] Make some standard order of json properties
8335
9394
  */
8336
9395
 
9396
+ /**
9397
+ * Delagates the user interaction to a async callback function
9398
+ * You need to provide your own implementation of this callback function and its bind to UI.
9399
+ *
9400
+ * @public exported from `@promptbook/core`
9401
+ */
9402
+ var CallbackInterfaceTools = /** @class */ (function () {
9403
+ function CallbackInterfaceTools(options) {
9404
+ this.options = options;
9405
+ }
9406
+ /**
9407
+ * Trigger the custom callback function
9408
+ */
9409
+ CallbackInterfaceTools.prototype.promptDialog = function (options) {
9410
+ return __awaiter(this, void 0, void 0, function () {
9411
+ var answer;
9412
+ return __generator(this, function (_a) {
9413
+ switch (_a.label) {
9414
+ case 0: return [4 /*yield*/, this.options.callback(options)];
9415
+ case 1:
9416
+ answer = _a.sent();
9417
+ if (this.options.isVerbose) {
9418
+ console.info(spaceTrim$1(function (block) { return "\n \uD83D\uDCD6 ".concat(block(options.promptTitle), "\n \uD83D\uDC64 ").concat(block(answer), "\n "); }));
9419
+ }
9420
+ return [2 /*return*/, answer];
9421
+ }
9422
+ });
9423
+ });
9424
+ };
9425
+ return CallbackInterfaceTools;
9426
+ }());
9427
+
8337
9428
  /**
8338
9429
  * Pretty print an embedding vector for logging
8339
9430
  *
@@ -8409,38 +9500,6 @@ function usageToHuman(usage) {
8409
9500
  * TODO: [🏛] Maybe make some markdown builder
8410
9501
  */
8411
9502
 
8412
- /**
8413
- * Delagates the user interaction to a async callback function
8414
- * You need to provide your own implementation of this callback function and its bind to UI.
8415
- *
8416
- * @public exported from `@promptbook/core`
8417
- */
8418
- var CallbackInterfaceTools = /** @class */ (function () {
8419
- function CallbackInterfaceTools(options) {
8420
- this.options = options;
8421
- }
8422
- /**
8423
- * Trigger the custom callback function
8424
- */
8425
- CallbackInterfaceTools.prototype.promptDialog = function (options) {
8426
- return __awaiter(this, void 0, void 0, function () {
8427
- var answer;
8428
- return __generator(this, function (_a) {
8429
- switch (_a.label) {
8430
- case 0: return [4 /*yield*/, this.options.callback(options)];
8431
- case 1:
8432
- answer = _a.sent();
8433
- if (this.options.isVerbose) {
8434
- console.info(spaceTrim$1(function (block) { return "\n \uD83D\uDCD6 ".concat(block(options.promptTitle), "\n \uD83D\uDC64 ").concat(block(answer), "\n "); }));
8435
- }
8436
- return [2 /*return*/, answer];
8437
- }
8438
- });
8439
- });
8440
- };
8441
- return CallbackInterfaceTools;
8442
- }());
8443
-
8444
9503
  /**
8445
9504
  * @@@
8446
9505
  *
@@ -9390,5 +10449,5 @@ function executionReportJsonToString(executionReportJson, options) {
9390
10449
  * TODO: [🧠] Should be in generated file GENERATOR_WARNING
9391
10450
  */
9392
10451
 
9393
- export { $llmToolsMetadataRegister, $llmToolsRegister, AbstractFormatError, CLAIM, CallbackInterfaceTools, CollectionError, CsvFormatDefinition, CsvFormatError, DEFAULT_CSV_SETTINGS, DEFAULT_REMOTE_URL, DEFAULT_REMOTE_URL_PATH, ERRORS, EXECUTIONS_CACHE_DIRNAME, EXPECTATION_UNITS, EnvironmentMismatchError, ExecutionReportStringOptionsDefaults, ExpectError, IS_VERBOSE, LimitReachedError, MANDATORY_CSV_SETTINGS, MAX_EXECUTION_ATTEMPTS, MAX_FILENAME_LENGTH, MAX_KNOWLEDGE_SOURCES_SCRAPING_DEPTH, MAX_KNOWLEDGE_SOURCES_SCRAPING_TOTAL, MAX_PARALLEL_COUNT, MODEL_VARIANTS, MemoryStorage, NotFoundError, NotYetImplementedError, PIPELINE_COLLECTION_BASE_FILENAME, PROMPTBOOK_VERSION, ParseError, PipelineExecutionError, PipelineLogicError, PipelineUrlError, PrefixStorage, RESERVED_PARAMETER_NAMES, TemplateTypes, TextFormatDefinition, UNCERTAIN_USAGE, UnexpectedError, ZERO_USAGE, _AnthropicClaudeMetadataRegistration, _AzureOpenAiMetadataRegistration, _OpenAiAssistantMetadataRegistration, _OpenAiMetadataRegistration, addUsage, assertsExecutionSuccessful, cacheLlmTools, collectionToJson, countTotalUsage, createCollectionFromJson, createCollectionFromPromise, createCollectionFromUrl, createLlmToolsFromConfiguration, createPipelineExecutor, createSubcollection, embeddingVectorToString, executionReportJsonToString, isPassingExpectations, isPipelinePrepared, joinLlmExecutionTools, limitTotalUsage, pipelineJsonToString, pipelineStringToJson, pipelineStringToJsonSync, prepareKnowledgeFromMarkdown, prepareKnowledgePieces, preparePersona, preparePipeline, prepareTemplates, prettifyPipelineString, stringifyPipelineJson, unpreparePipeline, usageToHuman, usageToWorktime, validatePipeline };
10452
+ export { $llmToolsMetadataRegister, $llmToolsRegister, AbstractFormatError, CLAIM, CallbackInterfaceTools, CollectionError, CsvFormatDefinition, CsvFormatError, DEFAULT_CSV_SETTINGS, DEFAULT_REMOTE_URL, DEFAULT_REMOTE_URL_PATH, ERRORS, EXECUTIONS_CACHE_DIRNAME, EXPECTATION_UNITS, EnvironmentMismatchError, ExecutionReportStringOptionsDefaults, ExpectError, IS_VERBOSE, KnowledgeScrapeError, LimitReachedError, MANDATORY_CSV_SETTINGS, MAX_EXECUTION_ATTEMPTS, MAX_FILENAME_LENGTH, MAX_KNOWLEDGE_SOURCES_SCRAPING_DEPTH, MAX_KNOWLEDGE_SOURCES_SCRAPING_TOTAL, MAX_PARALLEL_COUNT, MODEL_VARIANTS, MemoryStorage, MissingToolsError, NotFoundError, NotYetImplementedError, PIPELINE_COLLECTION_BASE_FILENAME, PROMPTBOOK_VERSION, ParseError, PipelineExecutionError, PipelineLogicError, PipelineUrlError, PrefixStorage, RESERVED_PARAMETER_NAMES, SCRAPERS, SCRAPE_CACHE_DIRNAME, TemplateTypes, TextFormatDefinition, UNCERTAIN_USAGE, UnexpectedError, ZERO_USAGE, _AnthropicClaudeMetadataRegistration, _AzureOpenAiMetadataRegistration, _OpenAiAssistantMetadataRegistration, _OpenAiMetadataRegistration, addUsage, assertsExecutionSuccessful, cacheLlmTools, collectionToJson, countTotalUsage, createCollectionFromJson, createCollectionFromPromise, createCollectionFromUrl, createLlmToolsFromConfiguration, createPipelineExecutor, createSubcollection, documentScraper, embeddingVectorToString, executionReportJsonToString, isPassingExpectations, isPipelinePrepared, joinLlmExecutionTools, legacyDocumentScraper, limitTotalUsage, markdownScraper, pdfScraper, pipelineJsonToString, pipelineStringToJson, pipelineStringToJsonSync, prepareKnowledgePieces, preparePersona, preparePipeline, prepareTemplates, prettifyPipelineString, stringifyPipelineJson, unpreparePipeline, usageToHuman, usageToWorktime, validatePipeline, websiteScraper };
9394
10453
  //# sourceMappingURL=index.es.js.map