@promptbook/core 0.72.0-6 → 0.72.0-8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. package/README.md +6 -0
  2. package/esm/index.es.js +1514 -454
  3. package/esm/index.es.js.map +1 -1
  4. package/esm/typings/src/_packages/browser.index.d.ts +1 -1
  5. package/esm/typings/src/_packages/core.index.d.ts +22 -10
  6. package/esm/typings/src/_packages/node.index.d.ts +6 -2
  7. package/esm/typings/src/_packages/types.index.d.ts +28 -20
  8. package/esm/typings/src/cli/cli-commands/about.d.ts +1 -1
  9. package/esm/typings/src/cli/cli-commands/hello.d.ts +2 -1
  10. package/esm/typings/src/cli/cli-commands/make.d.ts +1 -1
  11. package/esm/typings/src/cli/cli-commands/prettify.d.ts +2 -1
  12. package/esm/typings/src/cli/cli-commands/test-command.d.ts +13 -0
  13. package/esm/typings/src/cli/main.d.ts +1 -1
  14. package/esm/typings/src/cli/promptbookCli.d.ts +1 -1
  15. package/esm/typings/src/collection/constructors/createCollectionFromDirectory.d.ts +8 -5
  16. package/esm/typings/src/commands/EXPECT/expectCommandParser.d.ts +1 -1
  17. package/esm/typings/src/commands/FOREACH/foreachCommandParser.d.ts +1 -1
  18. package/esm/typings/src/commands/FORMAT/formatCommandParser.d.ts +1 -1
  19. package/esm/typings/src/commands/JOKER/jokerCommandParser.d.ts +1 -1
  20. package/esm/typings/src/commands/KNOWLEDGE/knowledgeCommandParser.d.ts +1 -1
  21. package/esm/typings/src/commands/KNOWLEDGE/utils/sourceContentToName.d.ts +11 -0
  22. package/esm/typings/src/commands/KNOWLEDGE/utils/sourceContentToName.test.d.ts +4 -0
  23. package/esm/typings/src/commands/MODEL/modelCommandParser.d.ts +1 -1
  24. package/esm/typings/src/commands/PARAMETER/parameterCommandParser.d.ts +1 -1
  25. package/esm/typings/src/commands/PERSONA/personaCommandParser.d.ts +1 -1
  26. package/esm/typings/src/commands/POSTPROCESS/postprocessCommandParser.d.ts +1 -1
  27. package/esm/typings/src/commands/PROMPTBOOK_VERSION/promptbookVersionCommandParser.d.ts +1 -1
  28. package/esm/typings/src/commands/TEMPLATE/templateCommandParser.d.ts +1 -1
  29. package/esm/typings/src/commands/URL/urlCommandParser.d.ts +1 -1
  30. package/esm/typings/src/commands/X_ACTION/actionCommandParser.d.ts +1 -1
  31. package/esm/typings/src/commands/X_INSTRUMENT/instrumentCommandParser.d.ts +1 -1
  32. package/esm/typings/src/commands/_BOILERPLATE/boilerplateCommandParser.d.ts +1 -1
  33. package/esm/typings/src/config.d.ts +10 -0
  34. package/esm/typings/src/conversion/pipelineStringToJson.d.ts +2 -15
  35. package/esm/typings/src/conversion/validation/_importPipeline.d.ts +1 -1
  36. package/esm/typings/src/conversion/validation/validatePipeline.d.ts +5 -5
  37. package/esm/typings/src/{knowledge/dialogs → dialogs}/callback/CallbackInterfaceTools.d.ts +2 -2
  38. package/esm/typings/src/{knowledge/dialogs → dialogs}/callback/CallbackInterfaceToolsOptions.d.ts +2 -2
  39. package/esm/typings/src/{knowledge/dialogs → dialogs}/simple-prompt/SimplePromptInterfaceTools.d.ts +4 -4
  40. package/esm/typings/src/errors/KnowledgeScrapeError.d.ts +9 -0
  41. package/esm/typings/src/errors/MissingToolsError.d.ts +9 -0
  42. package/esm/typings/src/execution/ExecutionTools.d.ts +3 -3
  43. package/esm/typings/src/execution/createPipelineExecutor/00-CreatePipelineExecutorOptions.d.ts +5 -2
  44. package/esm/typings/src/execution/createPipelineExecutor/00-CreatePipelineExecutorSettings.d.ts +2 -13
  45. package/esm/typings/src/execution/createPipelineExecutor/00-createPipelineExecutor.d.ts +1 -1
  46. package/esm/typings/src/execution/createPipelineExecutor/10-executePipeline.d.ts +1 -1
  47. package/esm/typings/src/execution/translation/automatic-translate/translateMessages.d.ts +3 -0
  48. package/esm/typings/src/execution/utils/forEachAsync.d.ts +1 -0
  49. package/esm/typings/src/llm-providers/_common/createLlmToolsFromConfigurationFromEnv.d.ts +1 -1
  50. package/esm/typings/src/llm-providers/_common/createLlmToolsFromEnv.d.ts +1 -1
  51. package/esm/typings/src/llm-providers/_common/getLlmToolsForCli.d.ts +1 -1
  52. package/esm/typings/src/llm-providers/anthropic-claude/playground/playground.d.ts +1 -0
  53. package/esm/typings/src/llm-providers/azure-openai/AzureOpenAiExecutionTools.d.ts +6 -0
  54. package/esm/typings/src/llm-providers/azure-openai/playground/playground.d.ts +1 -0
  55. package/esm/typings/src/llm-providers/langtail/playground/playground.d.ts +3 -0
  56. package/esm/typings/src/llm-providers/multiple/playground/playground.d.ts +3 -0
  57. package/esm/typings/src/llm-providers/openai/playground/playground.d.ts +1 -0
  58. package/esm/typings/src/llm-providers/remote/playground/playground.d.ts +3 -0
  59. package/esm/typings/src/personas/preparePersona.d.ts +2 -2
  60. package/esm/typings/src/prepare/PrepareAndScrapeOptions.d.ts +67 -0
  61. package/esm/typings/src/prepare/preparePipeline.d.ts +2 -2
  62. package/esm/typings/src/prepare/prepareTemplates.d.ts +2 -2
  63. package/esm/typings/src/scrapers/_common/Converter.d.ts +28 -0
  64. package/esm/typings/src/scrapers/_common/Scraper.d.ts +71 -0
  65. package/esm/typings/src/scrapers/_common/ScraperIntermediateSource.d.ts +11 -0
  66. package/esm/typings/src/{knowledge/prepare-knowledge → scrapers}/_common/prepareKnowledgePieces.d.ts +4 -4
  67. package/esm/typings/src/scrapers/_common/utils/getScraperIntermediateSource.d.ts +33 -0
  68. package/esm/typings/src/scrapers/_common/utils/getScraperIntermediateSource.test.d.ts +4 -0
  69. package/esm/typings/src/scrapers/_common/utils/makeKnowledgeSourceHandler.d.ts +10 -0
  70. package/esm/typings/src/scrapers/document/documentScraper.d.ts +37 -0
  71. package/esm/typings/src/scrapers/document/documentScraper.test.d.ts +4 -0
  72. package/esm/typings/src/scrapers/document/playground/document-scraper-playground.d.ts +5 -0
  73. package/esm/typings/src/scrapers/document-legacy/legacyDocumentScraper.d.ts +37 -0
  74. package/esm/typings/src/scrapers/document-legacy/legacyDocumentScraper.test.d.ts +4 -0
  75. package/esm/typings/src/scrapers/document-legacy/playground/legacy-document-scraper-playground.d.ts +5 -0
  76. package/esm/typings/src/scrapers/index.d.ts +8 -0
  77. package/esm/typings/src/scrapers/markdown/markdownScraper.d.ts +29 -0
  78. package/esm/typings/src/scrapers/markdown/playground/markdown-scraper-playground.d.ts +5 -0
  79. package/esm/typings/src/scrapers/pdf/pdfScraper.d.ts +35 -0
  80. package/esm/typings/src/scrapers/pdf/playground/pdf-scraper-playground.d.ts +5 -0
  81. package/esm/typings/src/scrapers/website/playground/website-scraper-playground.d.ts +5 -0
  82. package/esm/typings/src/scrapers/website/utils/markdownConverter.d.ts +12 -0
  83. package/esm/typings/src/scrapers/website/websiteScraper.d.ts +43 -0
  84. package/esm/typings/src/storage/{files-storage/FilesStorage.d.ts → file-cache-storage/FileCacheStorage.d.ts} +5 -5
  85. package/esm/typings/src/storage/file-cache-storage/FileCacheStorageOptions.d.ts +10 -0
  86. package/esm/typings/src/storage/{files-storage → file-cache-storage}/utils/nameToSubfolderPath.d.ts +1 -1
  87. package/esm/typings/src/storage/file-cache-storage/utils/nameToSubfolderPath.test.d.ts +1 -0
  88. package/esm/typings/src/storage/local-storage/getLocalStorage.d.ts +1 -1
  89. package/esm/typings/src/storage/local-storage/getSessionStorage.d.ts +1 -1
  90. package/esm/typings/src/types/PipelineJson/KnowledgeSourceJson.d.ts +9 -2
  91. package/esm/typings/src/types/PipelineJson/PipelineJson.d.ts +2 -2
  92. package/esm/typings/src/types/typeAliases.d.ts +8 -11
  93. package/esm/typings/src/utils/execCommand/$execCommand.d.ts +14 -0
  94. package/esm/typings/src/utils/execCommand/$execCommands.d.ts +17 -0
  95. package/esm/typings/src/utils/execCommand/IExecCommandOptions.d.ts +23 -0
  96. package/esm/typings/src/utils/execCommand/execCommand.test.d.ts +1 -0
  97. package/esm/typings/src/utils/execCommand/execCommandNormalizeOptions.d.ts +10 -0
  98. package/esm/typings/src/utils/execCommand/execCommandNormalizeOptions.test.d.ts +1 -0
  99. package/esm/typings/src/utils/files/$isDirectoryExisting.d.ts +3 -3
  100. package/esm/typings/src/utils/files/$isFileExisting.d.ts +3 -3
  101. package/esm/typings/src/utils/files/$listAllFiles.d.ts +5 -4
  102. package/esm/typings/src/utils/files/extensionToMimeType.d.ts +8 -0
  103. package/esm/typings/src/utils/files/extensionToMimeType.test.d.ts +1 -0
  104. package/esm/typings/src/utils/files/getFileExtension.d.ts +8 -0
  105. package/esm/typings/src/utils/files/getFileExtension.test.d.ts +1 -0
  106. package/esm/typings/src/utils/validators/filePath/isValidFilePath.d.ts +2 -2
  107. package/package.json +6 -1
  108. package/umd/index.umd.js +1518 -459
  109. package/umd/index.umd.js.map +1 -1
  110. package/esm/typings/src/knowledge/prepare-knowledge/_common/Scraper.d.ts +0 -37
  111. package/esm/typings/src/knowledge/prepare-knowledge/markdown/playground/markdown-knowledge-playground.d.ts +0 -2
  112. package/esm/typings/src/knowledge/prepare-knowledge/markdown/prepareKnowledgeFromMarkdown.d.ts +0 -14
  113. package/esm/typings/src/knowledge/prepare-knowledge/pdf/prepareKnowledgeFromPdf.d.ts +0 -15
  114. package/esm/typings/src/prepare/PrepareOptions.d.ts +0 -22
  115. package/esm/typings/src/storage/files-storage/FilesStorageOptions.d.ts +0 -10
  116. /package/esm/typings/src/{knowledge/dialogs → dialogs}/user-interface-execution-tools.test.d.ts +0 -0
  117. /package/esm/typings/src/{knowledge/prepare-knowledge → scrapers}/_common/prepareKnowledgePieces.test.d.ts +0 -0
  118. /package/esm/typings/src/{knowledge/prepare-knowledge/markdown/prepareKnowledgeFromMarkdown.test.d.ts → scrapers/markdown/markdownScraper.test.d.ts} +0 -0
  119. /package/esm/typings/src/{knowledge/prepare-knowledge/pdf/prepareKnowledgeFromPdf.test.d.ts → scrapers/website/utils/markdownConverter.test.d.ts} +0 -0
  120. /package/esm/typings/src/{storage/files-storage/utils/nameToSubfolderPath.test.d.ts → scrapers/website/websiteScraper.test.d.ts} +0 -0
package/esm/index.es.js CHANGED
@@ -1,9 +1,18 @@
1
1
  import spaceTrim, { spaceTrim as spaceTrim$1 } from 'spacetrim';
2
2
  import { format } from 'prettier';
3
3
  import parserHtml from 'prettier/parser-html';
4
+ import { stat, access, constants, mkdir, rm, readFile, rmdir, rename, readdir, writeFile } from 'fs/promises';
5
+ import { basename, join, dirname } from 'path';
6
+ import { spawn } from 'child_process';
7
+ import colors from 'colors';
4
8
  import { forTime } from 'waitasecond';
5
- import { unparse, parse } from 'papaparse';
9
+ import { SHA256 } from 'crypto-js';
6
10
  import hexEncoder from 'crypto-js/enc-hex';
11
+ import { unparse, parse } from 'papaparse';
12
+ import { Readability } from '@mozilla/readability';
13
+ import { JSDOM } from 'jsdom';
14
+ import { Converter } from 'showdown';
15
+ import { lookup } from 'mime-types';
7
16
  import sha256 from 'crypto-js/sha256';
8
17
  import moment from 'moment';
9
18
 
@@ -11,7 +20,7 @@ import moment from 'moment';
11
20
  /**
12
21
  * The version of the Promptbook library
13
22
  */
14
- var PROMPTBOOK_VERSION = '0.72.0-5';
23
+ var PROMPTBOOK_VERSION = '0.72.0-7';
15
24
  // TODO: [main] !!!! List here all the versions and annotate + put into script
16
25
 
17
26
  /*! *****************************************************************************
@@ -683,9 +692,19 @@ var MAX_KNOWLEDGE_SOURCES_SCRAPING_TOTAL = 200;
683
692
  /**
684
693
  * Where to store the cache of executions for promptbook CLI
685
694
  *
695
+ * Note: When the folder does not exist, it is created recursively
696
+ *
686
697
  * @public exported from `@promptbook/core`
687
698
  */
688
699
  var EXECUTIONS_CACHE_DIRNAME = '/.promptbook/executions-cache';
700
+ /**
701
+ * Where to store the scrape cache
702
+ *
703
+ * Note: When the folder does not exist, it is created recursively
704
+ *
705
+ * @public exported from `@promptbook/core`
706
+ */
707
+ var SCRAPE_CACHE_DIRNAME = '/.promptbook/scrape-cache';
689
708
  /**
690
709
  * The name of the builded pipeline collection made by CLI `ptbk make` and for lookup in `createCollectionFromDirectory`
691
710
  *
@@ -1215,11 +1234,11 @@ function validatePipelineCore(pipeline) {
1215
1234
  * > ex port function validatePipeline(promptbook: really_unknown): asserts promptbook is PipelineJson {
1216
1235
  */
1217
1236
  /**
1218
- * TODO: [🐣][main] !!!! Validate that all samples match expectations
1219
- * TODO: [🐣][🐝][main] !!!! Validate that knowledge is valid (non-void)
1220
- * TODO: [🐣][main] !!!! Validate that persona can be used only with CHAT variant
1221
- * TODO: [🐣][main] !!!! Validate that parameter with reserved name not used RESERVED_PARAMETER_NAMES
1222
- * TODO: [🐣][main] !!!! Validate that reserved parameter is not used as joker
1237
+ * TODO: [🧳][main] !!!! Validate that all samples match expectations
1238
+ * TODO: [🧳][🐝][main] !!!! Validate that knowledge is valid (non-void)
1239
+ * TODO: [🧳][main] !!!! Validate that persona can be used only with CHAT variant
1240
+ * TODO: [🧳][main] !!!! Validate that parameter with reserved name not used RESERVED_PARAMETER_NAMES
1241
+ * TODO: [🧳][main] !!!! Validate that reserved parameter is not used as joker
1223
1242
  * TODO: [🧠] Validation not only logic itself but imports around - files and websites and rerefenced pipelines exists
1224
1243
  * TODO: [🛠] Actions, instruments (and maybe knowledge) => Functions and tools
1225
1244
  */
@@ -1634,6 +1653,22 @@ var TemplateTypes = [
1634
1653
  // <- [🅱]
1635
1654
  ];
1636
1655
 
1656
+ /**
1657
+ * This error indicates that the promptbook can not retrieve knowledge from external sources
1658
+ *
1659
+ * @public exported from `@promptbook/core`
1660
+ */
1661
+ var KnowledgeScrapeError = /** @class */ (function (_super) {
1662
+ __extends(KnowledgeScrapeError, _super);
1663
+ function KnowledgeScrapeError(message) {
1664
+ var _this = _super.call(this, message) || this;
1665
+ _this.name = 'KnowledgeScrapeError';
1666
+ Object.setPrototypeOf(_this, KnowledgeScrapeError.prototype);
1667
+ return _this;
1668
+ }
1669
+ return KnowledgeScrapeError;
1670
+ }(Error));
1671
+
1637
1672
  /**
1638
1673
  * Async version of Array.forEach
1639
1674
  *
@@ -1641,6 +1676,7 @@ var TemplateTypes = [
1641
1676
  * @param options - Options for the function
1642
1677
  * @param callbackfunction - Function to call for each item
1643
1678
  * @public exported from `@promptbook/utils`
1679
+ * @deprecated [🪂] Use queues instead
1644
1680
  */
1645
1681
  function forEachAsync(array, options, callbackfunction) {
1646
1682
  return __awaiter(this, void 0, void 0, function () {
@@ -1710,59 +1746,246 @@ function forEachAsync(array, options, callbackfunction) {
1710
1746
  }
1711
1747
 
1712
1748
  /**
1713
- * Represents the usage with no resources consumed
1749
+ * This error type indicates that some tools are missing for pipeline execution or preparation
1714
1750
  *
1715
1751
  * @public exported from `@promptbook/core`
1716
1752
  */
1717
- var ZERO_USAGE = $deepFreeze({
1718
- price: { value: 0 },
1719
- input: {
1720
- tokensCount: { value: 0 },
1721
- charactersCount: { value: 0 },
1722
- wordsCount: { value: 0 },
1723
- sentencesCount: { value: 0 },
1724
- linesCount: { value: 0 },
1725
- paragraphsCount: { value: 0 },
1726
- pagesCount: { value: 0 },
1727
- },
1728
- output: {
1729
- tokensCount: { value: 0 },
1730
- charactersCount: { value: 0 },
1731
- wordsCount: { value: 0 },
1732
- sentencesCount: { value: 0 },
1733
- linesCount: { value: 0 },
1734
- paragraphsCount: { value: 0 },
1735
- pagesCount: { value: 0 },
1736
- },
1737
- });
1753
+ var MissingToolsError = /** @class */ (function (_super) {
1754
+ __extends(MissingToolsError, _super);
1755
+ function MissingToolsError(message) {
1756
+ var _this = _super.call(this, spaceTrim$1(function (block) { return "\n ".concat(block(message), "\n\n Note: You have probbably forgot to provide some tools for pipeline execution or preparation\n\n "); })) || this;
1757
+ _this.name = 'MissingToolsError';
1758
+ Object.setPrototypeOf(_this, MissingToolsError.prototype);
1759
+ return _this;
1760
+ }
1761
+ return MissingToolsError;
1762
+ }(Error));
1763
+
1738
1764
  /**
1739
- * Represents the usage with unknown resources consumed
1765
+ * Detects if the code is running in a Node.js environment
1766
+ *
1767
+ * Note: `$` is used to indicate that this function is not a pure function - it looks at the global object to determine the environment
1768
+ *
1769
+ * @public exported from `@promptbook/utils`
1770
+ */
1771
+ var $isRunningInNode = new Function("\n try {\n return this === global;\n } catch (e) {\n return false;\n }\n");
1772
+
1773
+ /**
1774
+ * This error type indicates that you try to use a feature that is not available in the current environment
1740
1775
  *
1741
1776
  * @public exported from `@promptbook/core`
1742
1777
  */
1743
- var UNCERTAIN_USAGE = $deepFreeze({
1744
- price: { value: 0, isUncertain: true },
1745
- input: {
1746
- tokensCount: { value: 0, isUncertain: true },
1747
- charactersCount: { value: 0, isUncertain: true },
1748
- wordsCount: { value: 0, isUncertain: true },
1749
- sentencesCount: { value: 0, isUncertain: true },
1750
- linesCount: { value: 0, isUncertain: true },
1751
- paragraphsCount: { value: 0, isUncertain: true },
1752
- pagesCount: { value: 0, isUncertain: true },
1753
- },
1754
- output: {
1755
- tokensCount: { value: 0, isUncertain: true },
1756
- charactersCount: { value: 0, isUncertain: true },
1757
- wordsCount: { value: 0, isUncertain: true },
1758
- sentencesCount: { value: 0, isUncertain: true },
1759
- linesCount: { value: 0, isUncertain: true },
1760
- paragraphsCount: { value: 0, isUncertain: true },
1761
- pagesCount: { value: 0, isUncertain: true },
1762
- },
1763
- });
1778
+ var EnvironmentMismatchError = /** @class */ (function (_super) {
1779
+ __extends(EnvironmentMismatchError, _super);
1780
+ function EnvironmentMismatchError(message) {
1781
+ var _this = _super.call(this, message) || this;
1782
+ _this.name = 'EnvironmentMismatchError';
1783
+ Object.setPrototypeOf(_this, EnvironmentMismatchError.prototype);
1784
+ return _this;
1785
+ }
1786
+ return EnvironmentMismatchError;
1787
+ }(Error));
1764
1788
 
1765
- var PipelineCollection = [{title:"Prepare Knowledge from Markdown",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.ptbk.md",parameters:[{name:"knowledgeContent",description:"Markdown document content",isInput:true,isOutput:false},{name:"knowledgePieces",description:"The knowledge JSON object",isInput:false,isOutput:true}],templates:[{templateType:"PROMPT_TEMPLATE",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, extract the important knowledge from the document.\n\n# Rules\n\n- Make pieces of information concise, clear, and easy to understand\n- One piece of information should be approximately 1 paragraph\n- Divide the paragraphs by markdown horizontal lines ---\n- Omit irrelevant information\n- Group redundant information\n- Write just extracted information, nothing else\n\n# The document\n\nTake information from this document:\n\n> {knowledgeContent}",resultingParameterName:"knowledgePieces",dependentParameterNames:["knowledgeContent"]}],knowledgeSources:[],knowledgePieces:[],personas:[],preparations:[],sourceFile:"./promptbook-collection/prepare-knowledge-from-markdown.ptbk.md"},{title:"Prepare Keywords",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-keywords.ptbk.md",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"keywords",description:"Keywords separated by comma",isInput:false,isOutput:true}],templates:[{templateType:"PROMPT_TEMPLATE",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, detect the important keywords in the document.\n\n# Rules\n\n- Write just keywords separated by comma\n\n# The document\n\nTake information from this document:\n\n> {knowledgePieceContent}",resultingParameterName:"keywords",dependentParameterNames:["knowledgePieceContent"]}],knowledgeSources:[],knowledgePieces:[],personas:[],preparations:[],sourceFile:"./promptbook-collection/prepare-knowledge-keywords.ptbk.md"},{title:"Prepare Title",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-title.ptbk.md",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"title",description:"The title of the document",isInput:false,isOutput:true}],templates:[{templateType:"PROMPT_TEMPLATE",name:"knowledge",title:"Knowledge",content:"You are experienced content creator, write best title for the document.\n\n# Rules\n\n- Write just title, nothing else\n- Title should be concise and clear\n- Write maximum 5 words for the title\n\n# The document\n\n> {knowledgePieceContent}",resultingParameterName:"title",expectations:{words:{min:1,max:8}},dependentParameterNames:["knowledgePieceContent"]}],knowledgeSources:[],knowledgePieces:[],personas:[],preparations:[],sourceFile:"./promptbook-collection/prepare-knowledge-title.ptbk.md"},{title:"Prepare Keywords",pipelineUrl:"https://promptbook.studio/promptbook/prepare-persona.ptbk.md",parameters:[{name:"availableModelNames",description:"List of available model names separated by comma (,)",isInput:true,isOutput:false},{name:"personaDescription",description:"Description of the persona",isInput:true,isOutput:false},{name:"modelRequirements",description:"Specific requirements for the model",isInput:false,isOutput:true}],templates:[{templateType:"PROMPT_TEMPLATE",name:"make-model-requirements",title:"Make modelRequirements",content:"You are experienced AI engineer, you need to create virtual assistant.\nWrite\n\n## Sample\n\n```json\n{\n\"modelName\": \"gpt-4o\",\n\"systemMessage\": \"You are experienced AI engineer and helpfull assistant.\",\n\"temperature\": 0.7\n}\n```\n\n## Instructions\n\n- Your output format is JSON object\n- Write just the JSON object, no other text should be present\n- It contains the following keys:\n - `modelName`: The name of the model to use\n - `systemMessage`: The system message to provide context to the model\n - `temperature`: The sampling temperature to use\n\n### Key `modelName`\n\nPick from the following models:\n\n- {availableModelNames}\n\n### Key `systemMessage`\n\nThe system message is used to communicate instructions or provide context to the model at the beginning of a conversation. It is displayed in a different format compared to user messages, helping the model understand its role in the conversation. The system message typically guides the model's behavior, sets the tone, or specifies desired output from the model. By utilizing the system message effectively, users can steer the model towards generating more accurate and relevant responses.\n\nFor example:\n\n> You are an experienced AI engineer and helpful assistant.\n\n> You are a friendly and knowledgeable chatbot.\n\n### Key `temperature`\n\nThe sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.\n\nYou can pick a value between 0 and 2. For example:\n\n- `0.1`: Low temperature, extremely conservative and deterministic\n- `0.5`: Medium temperature, balanced between conservative and creative\n- `1.0`: High temperature, creative and bit random\n- `1.5`: Very high temperature, extremely creative and often chaotic and unpredictable\n- `2.0`: Maximum temperature, completely random and unpredictable, for some extreme creative use cases\n\n# The assistant\n\nTake this description of the persona:\n\n> {personaDescription}",resultingParameterName:"modelRequirements",format:"JSON",dependentParameterNames:["availableModelNames","personaDescription"]}],knowledgeSources:[],knowledgePieces:[],personas:[],preparations:[],sourceFile:"./promptbook-collection/prepare-persona.ptbk.md"}];
1789
+ /**
1790
+ * Normalize options for `execCommand` and `execCommands`
1791
+ *
1792
+ * @private internal utility of `execCommand` and `execCommands`
1793
+ */
1794
+ function execCommandNormalizeOptions(options) {
1795
+ var _a;
1796
+ var _b, _c, _d;
1797
+ var command;
1798
+ var cwd;
1799
+ var crashOnError;
1800
+ var args = [];
1801
+ var timeout;
1802
+ if (typeof options === 'string') {
1803
+ // TODO: [1] DRY default values
1804
+ command = options;
1805
+ cwd = process.cwd();
1806
+ crashOnError = true;
1807
+ timeout = Infinity;
1808
+ }
1809
+ else {
1810
+ /*
1811
+ TODO:
1812
+ if ((options as any).commands !== undefined) {
1813
+ commands = (options as any).commands;
1814
+ } else {
1815
+ commands = [(options as any).command];
1816
+ }
1817
+ */
1818
+ // TODO: [1] DRY default values
1819
+ command = options.command;
1820
+ cwd = (_b = options.cwd) !== null && _b !== void 0 ? _b : process.cwd();
1821
+ crashOnError = (_c = options.crashOnError) !== null && _c !== void 0 ? _c : true;
1822
+ timeout = (_d = options.timeout) !== null && _d !== void 0 ? _d : Infinity;
1823
+ }
1824
+ // TODO: /(-[a-zA-Z0-9-]+\s+[^\s]*)|[^\s]*/g
1825
+ var _ = Array.from(command.matchAll(/(".*")|([^\s]*)/g))
1826
+ .map(function (_a) {
1827
+ var _b = __read(_a, 1), match = _b[0];
1828
+ return match;
1829
+ })
1830
+ .filter(function (arg) { return arg !== ''; });
1831
+ if (_.length > 1) {
1832
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1833
+ _a = __read(_), command = _a[0], args = _a.slice(1);
1834
+ }
1835
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1836
+ if (options.args) {
1837
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1838
+ args = __spreadArray(__spreadArray([], __read(args), false), __read(options.args), false);
1839
+ }
1840
+ var humanReadableCommand = !['npx', 'npm'].includes(command) ? command : args[0];
1841
+ if (['ts-node'].includes(humanReadableCommand)) {
1842
+ humanReadableCommand += " ".concat(args[1]);
1843
+ }
1844
+ return { command: command, humanReadableCommand: humanReadableCommand, args: args, cwd: cwd, crashOnError: crashOnError, timeout: timeout };
1845
+ }
1846
+ // TODO: This should show type error> execCommandNormalizeOptions({ command: '', commands: [''] });
1847
+
1848
+ /**
1849
+ * Run one command in a shell
1850
+ *
1851
+ * Note: There are 2 similar functions in the codebase:
1852
+ * - `$execCommand` which runs a single command
1853
+ * - `$execCommands` which runs multiple commands
1854
+ *
1855
+ * @public exported from `@promptbook/node`
1856
+ */
1857
+ function $execCommand(options) {
1858
+ if (!$isRunningInNode()) {
1859
+ throw new EnvironmentMismatchError('Function `$execCommand` can run only in Node environment.js');
1860
+ }
1861
+ return new Promise(
1862
+ // <- TODO: [🧱] Implement in a functional (not new Class) way
1863
+ function (resolve, reject) {
1864
+ // eslint-disable-next-line prefer-const
1865
+ var _a = execCommandNormalizeOptions(options), command = _a.command, humanReadableCommand = _a.humanReadableCommand, args = _a.args, cwd = _a.cwd, crashOnError = _a.crashOnError, timeout = _a.timeout;
1866
+ if (timeout !== Infinity) {
1867
+ // TODO: In waitasecond forTime(Infinity) should be equivalent to forEver()
1868
+ forTime(timeout).then(function () {
1869
+ if (crashOnError) {
1870
+ reject(new Error("Command \"".concat(humanReadableCommand, "\" exceeded time limit of ").concat(timeout, "ms")));
1871
+ }
1872
+ else {
1873
+ console.warn("Command \"".concat(humanReadableCommand, "\" exceeded time limit of ").concat(timeout, "ms but continues running"));
1874
+ resolve('Command exceeded time limit');
1875
+ }
1876
+ });
1877
+ }
1878
+ if (/^win/.test(process.platform) && ['npm', 'npx'].includes(command)) {
1879
+ command = "".concat(command, ".cmd");
1880
+ }
1881
+ // !!!!!! Verbose mode - to all consoles
1882
+ console.info(colors.yellow(cwd) + ' ' + colors.green(command) + ' ' + colors.blue(args.join(' ')));
1883
+ try {
1884
+ var commandProcess = spawn(command, args, { cwd: cwd, shell: true });
1885
+ commandProcess.on('message', function (message) {
1886
+ console.info({ message: message });
1887
+ });
1888
+ var output_1 = [];
1889
+ commandProcess.stdout.on('data', function (stdout) {
1890
+ output_1.push(stdout.toString());
1891
+ console.info(stdout.toString());
1892
+ });
1893
+ commandProcess.stderr.on('data', function (stderr) {
1894
+ output_1.push(stderr.toString());
1895
+ if (stderr.toString().trim()) {
1896
+ console.warn(stderr.toString());
1897
+ }
1898
+ });
1899
+ var finishWithCode = function (code) {
1900
+ if (code !== 0) {
1901
+ if (crashOnError) {
1902
+ reject(new Error(output_1.join('\n').trim() ||
1903
+ "Command \"".concat(humanReadableCommand, "\" exited with code ").concat(code)));
1904
+ }
1905
+ else {
1906
+ console.warn("Command \"".concat(humanReadableCommand, "\" exited with code ").concat(code));
1907
+ resolve(spaceTrim$1(output_1.join('\n')));
1908
+ }
1909
+ }
1910
+ else {
1911
+ resolve(spaceTrim$1(output_1.join('\n')));
1912
+ }
1913
+ };
1914
+ commandProcess.on('close', finishWithCode);
1915
+ commandProcess.on('exit', finishWithCode);
1916
+ commandProcess.on('disconnect', function () {
1917
+ // Note: Unexpected disconnection should always result in rejection
1918
+ reject(new Error("Command \"".concat(humanReadableCommand, "\" disconnected")));
1919
+ });
1920
+ commandProcess.on('error', function (error) {
1921
+ if (crashOnError) {
1922
+ reject(new Error("Command \"".concat(humanReadableCommand, "\" failed: \n").concat(error.message)));
1923
+ }
1924
+ else {
1925
+ console.warn(error);
1926
+ resolve(spaceTrim$1(output_1.join('\n')));
1927
+ }
1928
+ });
1929
+ }
1930
+ catch (error) {
1931
+ // Note: Unexpected error in sync code should always result in rejection
1932
+ reject(error);
1933
+ }
1934
+ });
1935
+ }
1936
+ /**
1937
+ * Note: [🟢 <- TODO: [🦖] !!!!!! Split scrapers into packages and enable] Code in this file should never be published outside of `@promptbook/node` and `@promptbook/cli`
1938
+ */
1939
+
1940
+ /**
1941
+ * Checks if the file exists
1942
+ *
1943
+ * Note: `$` is used to indicate that this function is not a pure function - it looks at the filesystem
1944
+ *
1945
+ * @private within the repository
1946
+ */
1947
+ function $isFileExisting(filename) {
1948
+ return __awaiter(this, void 0, void 0, function () {
1949
+ var isReadAccessAllowed, isFile;
1950
+ return __generator(this, function (_a) {
1951
+ switch (_a.label) {
1952
+ case 0:
1953
+ if (!$isRunningInNode()) {
1954
+ throw new EnvironmentMismatchError('Function `$isFileExisting` works only in Node environment.js');
1955
+ }
1956
+ return [4 /*yield*/, access(filename, constants.R_OK)
1957
+ .then(function () { return true; })
1958
+ .catch(function () { return false; })];
1959
+ case 1:
1960
+ isReadAccessAllowed = _a.sent();
1961
+ if (!isReadAccessAllowed) {
1962
+ return [2 /*return*/, false];
1963
+ }
1964
+ return [4 /*yield*/, stat(filename)
1965
+ .then(function (fileStat) { return fileStat.isFile(); })
1966
+ .catch(function () { return false; })];
1967
+ case 2:
1968
+ isFile = _a.sent();
1969
+ return [2 /*return*/, isFile];
1970
+ }
1971
+ });
1972
+ });
1973
+ }
1974
+ /**
1975
+ * Note: [🟢 <- TODO: [🦖] !!!!!! Split scrapers into packages and enable] Code in this file should never be published outside of `@promptbook/node` and `@promptbook/cli`
1976
+ * TODO: [🐠] This can be a validator - with variants that return true/false and variants that throw errors with meaningless messages
1977
+ * TODO: [🖇] What about symlinks?
1978
+ */
1979
+
1980
+ /**
1981
+ * Get the file extension from a file name
1982
+ *
1983
+ * @private within the repository
1984
+ */
1985
+ function getFileExtension(value) {
1986
+ var match = value.match(/\.([0-9a-z]+)(?:[?#]|$)/i);
1987
+ return match ? match[1].toLowerCase() : null;
1988
+ }
1766
1989
 
1767
1990
  var defaultDiacriticsRemovalMap = [
1768
1991
  {
@@ -2056,10 +2279,6 @@ function normalizeToKebabCase(text) {
2056
2279
  charType = 'NUMBER';
2057
2280
  normalizedChar = char;
2058
2281
  }
2059
- else if (/^\/$/.test(char)) {
2060
- charType = 'SLASH';
2061
- normalizedChar = char;
2062
- }
2063
2282
  else {
2064
2283
  charType = 'OTHER';
2065
2284
  normalizedChar = '-';
@@ -2104,6 +2323,32 @@ function removeEmojis(text) {
2104
2323
  return text;
2105
2324
  }
2106
2325
 
2326
+ /**
2327
+ * Tests if given string is valid URL.
2328
+ *
2329
+ * Note: This does not check if the file exists only if the path is valid
2330
+ * @public exported from `@promptbook/utils`
2331
+ */
2332
+ function isValidFilePath(filename) {
2333
+ if (typeof filename !== 'string') {
2334
+ return false;
2335
+ }
2336
+ var filenameSlashes = filename.split('\\').join('/');
2337
+ // Absolute Unix path: /hello.txt
2338
+ if (/^(\/)/i.test(filenameSlashes)) {
2339
+ return true;
2340
+ }
2341
+ // Absolute Windows path: /hello.txt
2342
+ if (/^([A-Z]{1,2}:\/?)\//i.test(filenameSlashes)) {
2343
+ return true;
2344
+ }
2345
+ // Relative path: ./hello.txt
2346
+ if (/^(\.\.?\/)+/i.test(filenameSlashes)) {
2347
+ return true;
2348
+ }
2349
+ return false;
2350
+ }
2351
+
2107
2352
  /**
2108
2353
  * @@@
2109
2354
  *
@@ -2113,20 +2358,121 @@ function removeEmojis(text) {
2113
2358
  * @public exported from `@promptbook/utils`
2114
2359
  */
2115
2360
  function titleToName(value) {
2116
- if (value.startsWith('http://') || value.startsWith('https://')) {
2117
- // TODO: Maybe check against some list unallowed characters
2118
- return value;
2361
+ if (isValidUrl(value)) {
2362
+ value = value.replace(/^https?:\/\//, '');
2363
+ value = value.replace(/\.html$/, '');
2119
2364
  }
2120
- if (value.startsWith('./') || value.startsWith('../')) {
2121
- // TODO: Maybe check against some list unallowed characters
2122
- return value;
2365
+ else if (isValidFilePath(value)) {
2366
+ value = basename(value);
2367
+ // Note: Keeping extension in the name
2123
2368
  }
2369
+ value = value.split('/').join('-');
2124
2370
  value = removeEmojis(value);
2125
2371
  value = normalizeToKebabCase(value);
2126
2372
  // TODO: [🧠] Maybe warn or add some padding to short name which are not good identifiers
2127
2373
  return value;
2128
2374
  }
2129
2375
 
2376
+ /**
2377
+ * @@@
2378
+ *
2379
+ * @private for `FileCacheStorage`
2380
+ */
2381
+ function nameToSubfolderPath(name) {
2382
+ return [name.substr(0, 1).toLowerCase(), name.substr(1, 1).toLowerCase()];
2383
+ }
2384
+
2385
+ /**
2386
+ * Just marks a place of place where should be something implemented
2387
+ * No side effects.
2388
+ *
2389
+ * Note: It can be usefull suppressing eslint errors of unused variables
2390
+ *
2391
+ * @param value any values
2392
+ * @returns void
2393
+ * @private within the repository
2394
+ */
2395
+ function TODO_USE() {
2396
+ var value = [];
2397
+ for (var _i = 0; _i < arguments.length; _i++) {
2398
+ value[_i] = arguments[_i];
2399
+ }
2400
+ }
2401
+
2402
+ /**
2403
+ * Create a filename for intermediate cache for scrapers
2404
+ *
2405
+ * Note: It also checks if directory exists and creates it if not
2406
+ *
2407
+ * @private as internal utility for scrapers
2408
+ */
2409
+ function getScraperIntermediateSource(source, options) {
2410
+ return __awaiter(this, void 0, void 0, function () {
2411
+ var sourceFilename, url, rootDirname, cacheDirname, isCacheCleaned, extension, isVerbose, hash, semanticName, pieces, name, cacheFilename, isDestroyed, fileHandler;
2412
+ return __generator(this, function (_a) {
2413
+ switch (_a.label) {
2414
+ case 0:
2415
+ sourceFilename = source.filename, url = source.url;
2416
+ rootDirname = options.rootDirname, cacheDirname = options.cacheDirname, isCacheCleaned = options.isCacheCleaned, extension = options.extension, isVerbose = options.isVerbose;
2417
+ hash = SHA256(
2418
+ // <- TODO: [🥬] Encapsulate sha256 to some private utility function
2419
+ hexEncoder.parse(sourceFilename || url || 'untitled'))
2420
+ .toString( /* hex */)
2421
+ .substring(0, 20);
2422
+ semanticName = normalizeToKebabCase(titleToName((sourceFilename || url || '').split('intermediate').join(''))).substring(0, 20);
2423
+ pieces = ['intermediate', semanticName, hash].filter(function (piece) { return piece !== ''; });
2424
+ name = pieces.join('-').split('--').join('-');
2425
+ // <- TODO: Use MAX_FILENAME_LENGTH
2426
+ TODO_USE(rootDirname); // <- TODO: !!!!!!
2427
+ cacheFilename = join.apply(void 0, __spreadArray(__spreadArray([process.cwd(),
2428
+ cacheDirname], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), [name], false)).split('\\')
2429
+ .join('/') +
2430
+ '.' +
2431
+ extension;
2432
+ return [4 /*yield*/, mkdir(dirname(cacheFilename), { recursive: true })];
2433
+ case 1:
2434
+ _a.sent();
2435
+ isDestroyed = true;
2436
+ fileHandler = {
2437
+ filename: cacheFilename,
2438
+ get isDestroyed() {
2439
+ return isDestroyed;
2440
+ },
2441
+ destroy: function () {
2442
+ return __awaiter(this, void 0, void 0, function () {
2443
+ return __generator(this, function (_a) {
2444
+ switch (_a.label) {
2445
+ case 0:
2446
+ if (!isCacheCleaned) return [3 /*break*/, 2];
2447
+ if (isVerbose) {
2448
+ console.info('legacyDocumentScraper: Clening cache');
2449
+ }
2450
+ return [4 /*yield*/, rm(cacheFilename)];
2451
+ case 1:
2452
+ _a.sent();
2453
+ _a.label = 2;
2454
+ case 2:
2455
+ isDestroyed = true;
2456
+ return [2 /*return*/];
2457
+ }
2458
+ });
2459
+ });
2460
+ },
2461
+ };
2462
+ return [2 /*return*/, fileHandler];
2463
+ }
2464
+ });
2465
+ });
2466
+ }
2467
+ /**
2468
+ * Note: Not using `FileCacheStorage` for two reasons:
2469
+ * 1) Need to store more than serialized JSONs
2470
+ * 2) Need to switch between a `rootDirname` and `cacheDirname` <- TODO: !!!!
2471
+ * TODO: [🐱‍🐉][🧠] Make some smart crop
2472
+ */
2473
+
2474
+ var PipelineCollection = [{title:"Prepare Knowledge from Markdown",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.ptbk.md",parameters:[{name:"knowledgeContent",description:"Markdown document content",isInput:true,isOutput:false},{name:"knowledgePieces",description:"The knowledge JSON object",isInput:false,isOutput:true}],templates:[{templateType:"PROMPT_TEMPLATE",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, extract the important knowledge from the document.\n\n# Rules\n\n- Make pieces of information concise, clear, and easy to understand\n- One piece of information should be approximately 1 paragraph\n- Divide the paragraphs by markdown horizontal lines ---\n- Omit irrelevant information\n- Group redundant information\n- Write just extracted information, nothing else\n\n# The document\n\nTake information from this document:\n\n> {knowledgeContent}",resultingParameterName:"knowledgePieces",dependentParameterNames:["knowledgeContent"]}],knowledgeSources:[],knowledgePieces:[],personas:[],preparations:[],sourceFile:"./promptbook-collection/prepare-knowledge-from-markdown.ptbk.md"},{title:"Prepare Keywords",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-keywords.ptbk.md",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"keywords",description:"Keywords separated by comma",isInput:false,isOutput:true}],templates:[{templateType:"PROMPT_TEMPLATE",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, detect the important keywords in the document.\n\n# Rules\n\n- Write just keywords separated by comma\n\n# The document\n\nTake information from this document:\n\n> {knowledgePieceContent}",resultingParameterName:"keywords",dependentParameterNames:["knowledgePieceContent"]}],knowledgeSources:[],knowledgePieces:[],personas:[],preparations:[],sourceFile:"./promptbook-collection/prepare-knowledge-keywords.ptbk.md"},{title:"Prepare Title",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-title.ptbk.md",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"title",description:"The title of the document",isInput:false,isOutput:true}],templates:[{templateType:"PROMPT_TEMPLATE",name:"knowledge",title:"Knowledge",content:"You are experienced content creator, write best title for the document.\n\n# Rules\n\n- Write just title, nothing else\n- Title should be concise and clear\n- Write maximum 5 words for the title\n\n# The document\n\n> {knowledgePieceContent}",resultingParameterName:"title",expectations:{words:{min:1,max:8}},dependentParameterNames:["knowledgePieceContent"]}],knowledgeSources:[],knowledgePieces:[],personas:[],preparations:[],sourceFile:"./promptbook-collection/prepare-knowledge-title.ptbk.md"},{title:"Prepare Keywords",pipelineUrl:"https://promptbook.studio/promptbook/prepare-persona.ptbk.md",parameters:[{name:"availableModelNames",description:"List of available model names separated by comma (,)",isInput:true,isOutput:false},{name:"personaDescription",description:"Description of the persona",isInput:true,isOutput:false},{name:"modelRequirements",description:"Specific requirements for the model",isInput:false,isOutput:true}],templates:[{templateType:"PROMPT_TEMPLATE",name:"make-model-requirements",title:"Make modelRequirements",content:"You are experienced AI engineer, you need to create virtual assistant.\nWrite\n\n## Sample\n\n```json\n{\n\"modelName\": \"gpt-4o\",\n\"systemMessage\": \"You are experienced AI engineer and helpfull assistant.\",\n\"temperature\": 0.7\n}\n```\n\n## Instructions\n\n- Your output format is JSON object\n- Write just the JSON object, no other text should be present\n- It contains the following keys:\n - `modelName`: The name of the model to use\n - `systemMessage`: The system message to provide context to the model\n - `temperature`: The sampling temperature to use\n\n### Key `modelName`\n\nPick from the following models:\n\n- {availableModelNames}\n\n### Key `systemMessage`\n\nThe system message is used to communicate instructions or provide context to the model at the beginning of a conversation. It is displayed in a different format compared to user messages, helping the model understand its role in the conversation. The system message typically guides the model's behavior, sets the tone, or specifies desired output from the model. By utilizing the system message effectively, users can steer the model towards generating more accurate and relevant responses.\n\nFor example:\n\n> You are an experienced AI engineer and helpful assistant.\n\n> You are a friendly and knowledgeable chatbot.\n\n### Key `temperature`\n\nThe sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.\n\nYou can pick a value between 0 and 2. For example:\n\n- `0.1`: Low temperature, extremely conservative and deterministic\n- `0.5`: Medium temperature, balanced between conservative and creative\n- `1.0`: High temperature, creative and bit random\n- `1.5`: Very high temperature, extremely creative and often chaotic and unpredictable\n- `2.0`: Maximum temperature, completely random and unpredictable, for some extreme creative use cases\n\n# The assistant\n\nTake this description of the persona:\n\n> {personaDescription}",resultingParameterName:"modelRequirements",format:"JSON",dependentParameterNames:["availableModelNames","personaDescription"]}],knowledgeSources:[],knowledgePieces:[],personas:[],preparations:[],sourceFile:"./promptbook-collection/prepare-persona.ptbk.md"}];
2475
+
2130
2476
  /**
2131
2477
  * This error indicates errors during the execution of the pipeline
2132
2478
  *
@@ -2159,22 +2505,6 @@ var CollectionError = /** @class */ (function (_super) {
2159
2505
  return CollectionError;
2160
2506
  }(Error));
2161
2507
 
2162
- /**
2163
- * This error type indicates that you try to use a feature that is not available in the current environment
2164
- *
2165
- * @public exported from `@promptbook/core`
2166
- */
2167
- var EnvironmentMismatchError = /** @class */ (function (_super) {
2168
- __extends(EnvironmentMismatchError, _super);
2169
- function EnvironmentMismatchError(message) {
2170
- var _this = _super.call(this, message) || this;
2171
- _this.name = 'EnvironmentMismatchError';
2172
- Object.setPrototypeOf(_this, EnvironmentMismatchError.prototype);
2173
- return _this;
2174
- }
2175
- return EnvironmentMismatchError;
2176
- }(Error));
2177
-
2178
2508
  /**
2179
2509
  * This error occurs when some expectation is not met in the execution of the pipeline
2180
2510
  *
@@ -2674,17 +3004,70 @@ function deepClone(objectValue) {
2674
3004
  */
2675
3005
 
2676
3006
  /**
2677
- * Function `addUsage` will add multiple usages into one
2678
- *
2679
- * Note: If you provide 0 values, it returns ZERO_USAGE
3007
+ * Represents the usage with no resources consumed
2680
3008
  *
2681
3009
  * @public exported from `@promptbook/core`
2682
3010
  */
2683
- function addUsage() {
2684
- var usageItems = [];
2685
- for (var _i = 0; _i < arguments.length; _i++) {
2686
- usageItems[_i] = arguments[_i];
2687
- }
3011
+ var ZERO_USAGE = $deepFreeze({
3012
+ price: { value: 0 },
3013
+ input: {
3014
+ tokensCount: { value: 0 },
3015
+ charactersCount: { value: 0 },
3016
+ wordsCount: { value: 0 },
3017
+ sentencesCount: { value: 0 },
3018
+ linesCount: { value: 0 },
3019
+ paragraphsCount: { value: 0 },
3020
+ pagesCount: { value: 0 },
3021
+ },
3022
+ output: {
3023
+ tokensCount: { value: 0 },
3024
+ charactersCount: { value: 0 },
3025
+ wordsCount: { value: 0 },
3026
+ sentencesCount: { value: 0 },
3027
+ linesCount: { value: 0 },
3028
+ paragraphsCount: { value: 0 },
3029
+ pagesCount: { value: 0 },
3030
+ },
3031
+ });
3032
+ /**
3033
+ * Represents the usage with unknown resources consumed
3034
+ *
3035
+ * @public exported from `@promptbook/core`
3036
+ */
3037
+ var UNCERTAIN_USAGE = $deepFreeze({
3038
+ price: { value: 0, isUncertain: true },
3039
+ input: {
3040
+ tokensCount: { value: 0, isUncertain: true },
3041
+ charactersCount: { value: 0, isUncertain: true },
3042
+ wordsCount: { value: 0, isUncertain: true },
3043
+ sentencesCount: { value: 0, isUncertain: true },
3044
+ linesCount: { value: 0, isUncertain: true },
3045
+ paragraphsCount: { value: 0, isUncertain: true },
3046
+ pagesCount: { value: 0, isUncertain: true },
3047
+ },
3048
+ output: {
3049
+ tokensCount: { value: 0, isUncertain: true },
3050
+ charactersCount: { value: 0, isUncertain: true },
3051
+ wordsCount: { value: 0, isUncertain: true },
3052
+ sentencesCount: { value: 0, isUncertain: true },
3053
+ linesCount: { value: 0, isUncertain: true },
3054
+ paragraphsCount: { value: 0, isUncertain: true },
3055
+ pagesCount: { value: 0, isUncertain: true },
3056
+ },
3057
+ });
3058
+
3059
+ /**
3060
+ * Function `addUsage` will add multiple usages into one
3061
+ *
3062
+ * Note: If you provide 0 values, it returns ZERO_USAGE
3063
+ *
3064
+ * @public exported from `@promptbook/core`
3065
+ */
3066
+ function addUsage() {
3067
+ var usageItems = [];
3068
+ for (var _i = 0; _i < arguments.length; _i++) {
3069
+ usageItems[_i] = arguments[_i];
3070
+ }
2688
3071
  return usageItems.reduce(function (acc, item) {
2689
3072
  var e_1, _a, e_2, _b;
2690
3073
  var _c;
@@ -2950,23 +3333,6 @@ function union() {
2950
3333
  return union;
2951
3334
  }
2952
3335
 
2953
- /**
2954
- * Just marks a place of place where should be something implemented
2955
- * No side effects.
2956
- *
2957
- * Note: It can be usefull suppressing eslint errors of unused variables
2958
- *
2959
- * @param value any values
2960
- * @returns void
2961
- * @private within the repository
2962
- */
2963
- function TODO_USE() {
2964
- var value = [];
2965
- for (var _i = 0; _i < arguments.length; _i++) {
2966
- value[_i] = arguments[_i];
2967
- }
2968
- }
2969
-
2970
3336
  /**
2971
3337
  * This error indicates problems parsing the format value
2972
3338
  *
@@ -3010,7 +3376,7 @@ var CsvFormatError = /** @class */ (function (_super) {
3010
3376
  */
3011
3377
  var MANDATORY_CSV_SETTINGS = Object.freeze({
3012
3378
  header: true,
3013
- // encoding: 'utf8',
3379
+ // encoding: 'utf-8',
3014
3380
  });
3015
3381
 
3016
3382
  /**
@@ -4464,25 +4830,26 @@ function filterJustOutputParameters(options) {
4464
4830
  */
4465
4831
  function executePipeline(options) {
4466
4832
  return __awaiter(this, void 0, void 0, function () {
4467
- var inputParameters, tools, onProgress, pipeline, setPreparedPipeline, pipelineIdentification, settings, maxParallelCount, isVerbose, preparedPipeline, llmTools, errors, warnings, executionReport, isReturned, _a, _b, parameter, e_1_1, _loop_1, _c, _d, parameterName, state_1, e_2_1, parametersToPass, resovedParameterNames_1, unresovedTemplates_1, resolving_1, loopLimit, _loop_2, error_1, usage_1, outputParameters_1, usage, outputParameters;
4468
- var e_1, _e, e_2, _f;
4469
- return __generator(this, function (_g) {
4470
- switch (_g.label) {
4833
+ var inputParameters, tools, onProgress, pipeline, setPreparedPipeline, pipelineIdentification, settings, maxParallelCount, rootDirname, _a, isVerbose, preparedPipeline, llmTools, errors, warnings, executionReport, isReturned, _b, _c, parameter, e_1_1, _loop_1, _d, _e, parameterName, state_1, e_2_1, parametersToPass, resovedParameterNames_1, unresovedTemplates_1, resolving_1, loopLimit, _loop_2, error_1, usage_1, outputParameters_1, usage, outputParameters;
4834
+ var e_1, _f, e_2, _g;
4835
+ return __generator(this, function (_h) {
4836
+ switch (_h.label) {
4471
4837
  case 0:
4472
4838
  inputParameters = options.inputParameters, tools = options.tools, onProgress = options.onProgress, pipeline = options.pipeline, setPreparedPipeline = options.setPreparedPipeline, pipelineIdentification = options.pipelineIdentification, settings = options.settings;
4473
- maxParallelCount = settings.maxParallelCount, isVerbose = settings.isVerbose;
4839
+ maxParallelCount = settings.maxParallelCount, rootDirname = settings.rootDirname, _a = settings.isVerbose, isVerbose = _a === void 0 ? IS_VERBOSE : _a;
4474
4840
  preparedPipeline = options.preparedPipeline;
4475
4841
  llmTools = joinLlmExecutionTools.apply(void 0, __spreadArray([], __read(arrayableToArray(tools.llm)), false));
4476
4842
  if (!(preparedPipeline === undefined)) return [3 /*break*/, 2];
4477
4843
  return [4 /*yield*/, preparePipeline(pipeline, {
4478
4844
  llmTools: llmTools,
4845
+ rootDirname: rootDirname,
4479
4846
  isVerbose: isVerbose,
4480
4847
  maxParallelCount: maxParallelCount,
4481
4848
  })];
4482
4849
  case 1:
4483
- preparedPipeline = _g.sent();
4850
+ preparedPipeline = _h.sent();
4484
4851
  setPreparedPipeline(preparedPipeline);
4485
- _g.label = 2;
4852
+ _h.label = 2;
4486
4853
  case 2:
4487
4854
  errors = [];
4488
4855
  warnings = [];
@@ -4495,17 +4862,17 @@ function executePipeline(options) {
4495
4862
  promptExecutions: [],
4496
4863
  };
4497
4864
  isReturned = false;
4498
- _g.label = 3;
4865
+ _h.label = 3;
4499
4866
  case 3:
4500
- _g.trys.push([3, 9, 10, 11]);
4501
- _a = __values(preparedPipeline.parameters.filter(function (_a) {
4867
+ _h.trys.push([3, 9, 10, 11]);
4868
+ _b = __values(preparedPipeline.parameters.filter(function (_a) {
4502
4869
  var isInput = _a.isInput;
4503
4870
  return isInput;
4504
- })), _b = _a.next();
4505
- _g.label = 4;
4871
+ })), _c = _b.next();
4872
+ _h.label = 4;
4506
4873
  case 4:
4507
- if (!!_b.done) return [3 /*break*/, 8];
4508
- parameter = _b.value;
4874
+ if (!!_c.done) return [3 /*break*/, 8];
4875
+ parameter = _c.value;
4509
4876
  if (!(inputParameters[parameter.name] === undefined)) return [3 /*break*/, 7];
4510
4877
  isReturned = true;
4511
4878
  if (!(onProgress !== undefined)) return [3 /*break*/, 6];
@@ -4513,8 +4880,8 @@ function executePipeline(options) {
4513
4880
  return [4 /*yield*/, forTime(IMMEDIATE_TIME)];
4514
4881
  case 5:
4515
4882
  // Note: Wait a short time to prevent race conditions
4516
- _g.sent();
4517
- _g.label = 6;
4883
+ _h.sent();
4884
+ _h.label = 6;
4518
4885
  case 6: return [2 /*return*/, $asDeeplyFrozenSerializableJson("Unuccessful PipelineExecutorResult (with missing parameter {".concat(parameter.name, "}) PipelineExecutorResult"), {
4519
4886
  isSuccessful: false,
4520
4887
  errors: __spreadArray([
@@ -4527,24 +4894,24 @@ function executePipeline(options) {
4527
4894
  preparedPipeline: preparedPipeline,
4528
4895
  })];
4529
4896
  case 7:
4530
- _b = _a.next();
4897
+ _c = _b.next();
4531
4898
  return [3 /*break*/, 4];
4532
4899
  case 8: return [3 /*break*/, 11];
4533
4900
  case 9:
4534
- e_1_1 = _g.sent();
4901
+ e_1_1 = _h.sent();
4535
4902
  e_1 = { error: e_1_1 };
4536
4903
  return [3 /*break*/, 11];
4537
4904
  case 10:
4538
4905
  try {
4539
- if (_b && !_b.done && (_e = _a.return)) _e.call(_a);
4906
+ if (_c && !_c.done && (_f = _b.return)) _f.call(_b);
4540
4907
  }
4541
4908
  finally { if (e_1) throw e_1.error; }
4542
4909
  return [7 /*endfinally*/];
4543
4910
  case 11:
4544
4911
  _loop_1 = function (parameterName) {
4545
4912
  var parameter;
4546
- return __generator(this, function (_h) {
4547
- switch (_h.label) {
4913
+ return __generator(this, function (_j) {
4914
+ switch (_j.label) {
4548
4915
  case 0:
4549
4916
  parameter = preparedPipeline.parameters.find(function (_a) {
4550
4917
  var name = _a.name;
@@ -4561,8 +4928,8 @@ function executePipeline(options) {
4561
4928
  return [4 /*yield*/, forTime(IMMEDIATE_TIME)];
4562
4929
  case 2:
4563
4930
  // Note: Wait a short time to prevent race conditions
4564
- _h.sent();
4565
- _h.label = 3;
4931
+ _j.sent();
4932
+ _j.label = 3;
4566
4933
  case 3: return [2 /*return*/, { value: $asDeeplyFrozenSerializableJson(spaceTrim$1(function (block) { return "\n Unuccessful PipelineExecutorResult (with extra parameter {".concat(parameter.name, "}) PipelineExecutorResult\n\n ").concat(block(pipelineIdentification), "\n "); }), {
4567
4934
  isSuccessful: false,
4568
4935
  errors: __spreadArray([
@@ -4578,39 +4945,39 @@ function executePipeline(options) {
4578
4945
  }
4579
4946
  });
4580
4947
  };
4581
- _g.label = 12;
4948
+ _h.label = 12;
4582
4949
  case 12:
4583
- _g.trys.push([12, 17, 18, 19]);
4584
- _c = __values(Object.keys(inputParameters)), _d = _c.next();
4585
- _g.label = 13;
4950
+ _h.trys.push([12, 17, 18, 19]);
4951
+ _d = __values(Object.keys(inputParameters)), _e = _d.next();
4952
+ _h.label = 13;
4586
4953
  case 13:
4587
- if (!!_d.done) return [3 /*break*/, 16];
4588
- parameterName = _d.value;
4954
+ if (!!_e.done) return [3 /*break*/, 16];
4955
+ parameterName = _e.value;
4589
4956
  return [5 /*yield**/, _loop_1(parameterName)];
4590
4957
  case 14:
4591
- state_1 = _g.sent();
4958
+ state_1 = _h.sent();
4592
4959
  if (typeof state_1 === "object")
4593
4960
  return [2 /*return*/, state_1.value];
4594
- _g.label = 15;
4961
+ _h.label = 15;
4595
4962
  case 15:
4596
- _d = _c.next();
4963
+ _e = _d.next();
4597
4964
  return [3 /*break*/, 13];
4598
4965
  case 16: return [3 /*break*/, 19];
4599
4966
  case 17:
4600
- e_2_1 = _g.sent();
4967
+ e_2_1 = _h.sent();
4601
4968
  e_2 = { error: e_2_1 };
4602
4969
  return [3 /*break*/, 19];
4603
4970
  case 18:
4604
4971
  try {
4605
- if (_d && !_d.done && (_f = _c.return)) _f.call(_c);
4972
+ if (_e && !_e.done && (_g = _d.return)) _g.call(_d);
4606
4973
  }
4607
4974
  finally { if (e_2) throw e_2.error; }
4608
4975
  return [7 /*endfinally*/];
4609
4976
  case 19:
4610
4977
  parametersToPass = inputParameters;
4611
- _g.label = 20;
4978
+ _h.label = 20;
4612
4979
  case 20:
4613
- _g.trys.push([20, 25, , 28]);
4980
+ _h.trys.push([20, 25, , 28]);
4614
4981
  resovedParameterNames_1 = preparedPipeline.parameters
4615
4982
  .filter(function (_a) {
4616
4983
  var isInput = _a.isInput;
@@ -4625,8 +4992,8 @@ function executePipeline(options) {
4625
4992
  loopLimit = LOOP_LIMIT;
4626
4993
  _loop_2 = function () {
4627
4994
  var currentTemplate, work_1;
4628
- return __generator(this, function (_j) {
4629
- switch (_j.label) {
4995
+ return __generator(this, function (_k) {
4996
+ switch (_k.label) {
4630
4997
  case 0:
4631
4998
  if (loopLimit-- < 0) {
4632
4999
  // Note: Really UnexpectedError not LimitReachedError - this should be catched during validatePipeline
@@ -4652,7 +5019,7 @@ function executePipeline(options) {
4652
5019
  if (!!currentTemplate) return [3 /*break*/, 3];
4653
5020
  /* [🤹‍♂️] */ return [4 /*yield*/, Promise.race(resolving_1)];
4654
5021
  case 2:
4655
- /* [🤹‍♂️] */ _j.sent();
5022
+ /* [🤹‍♂️] */ _k.sent();
4656
5023
  return [3 /*break*/, 4];
4657
5024
  case 3:
4658
5025
  unresovedTemplates_1 = unresovedTemplates_1.filter(function (template) { return template !== currentTemplate; });
@@ -4687,24 +5054,24 @@ function executePipeline(options) {
4687
5054
  // <- Note: Errors are catched here [3]
4688
5055
  // TODO: BUT if in multiple templates are errors, only the first one is catched so maybe we should catch errors here and save them to errors array here
4689
5056
  resolving_1.push(work_1);
4690
- _j.label = 4;
5057
+ _k.label = 4;
4691
5058
  case 4: return [2 /*return*/];
4692
5059
  }
4693
5060
  });
4694
5061
  };
4695
- _g.label = 21;
5062
+ _h.label = 21;
4696
5063
  case 21:
4697
5064
  if (!(unresovedTemplates_1.length > 0)) return [3 /*break*/, 23];
4698
5065
  return [5 /*yield**/, _loop_2()];
4699
5066
  case 22:
4700
- _g.sent();
5067
+ _h.sent();
4701
5068
  return [3 /*break*/, 21];
4702
5069
  case 23: return [4 /*yield*/, Promise.all(resolving_1)];
4703
5070
  case 24:
4704
- _g.sent();
5071
+ _h.sent();
4705
5072
  return [3 /*break*/, 28];
4706
5073
  case 25:
4707
- error_1 = _g.sent();
5074
+ error_1 = _h.sent();
4708
5075
  if (!(error_1 instanceof Error)) {
4709
5076
  throw error_1;
4710
5077
  }
@@ -4724,8 +5091,8 @@ function executePipeline(options) {
4724
5091
  return [4 /*yield*/, forTime(IMMEDIATE_TIME)];
4725
5092
  case 26:
4726
5093
  // Note: Wait a short time to prevent race conditions
4727
- _g.sent();
4728
- _g.label = 27;
5094
+ _h.sent();
5095
+ _h.label = 27;
4729
5096
  case 27: return [2 /*return*/, $asDeeplyFrozenSerializableJson('Unuccessful PipelineExecutorResult (with misc errors) PipelineExecutorResult', {
4730
5097
  isSuccessful: false,
4731
5098
  errors: __spreadArray([error_1], __read(errors), false).map(serializeError),
@@ -4752,8 +5119,8 @@ function executePipeline(options) {
4752
5119
  return [4 /*yield*/, forTime(IMMEDIATE_TIME)];
4753
5120
  case 29:
4754
5121
  // Note: Wait a short time to prevent race conditions
4755
- _g.sent();
4756
- _g.label = 30;
5122
+ _h.sent();
5123
+ _h.label = 30;
4757
5124
  case 30: return [2 /*return*/, $asDeeplyFrozenSerializableJson('Successful PipelineExecutorResult', {
4758
5125
  isSuccessful: true,
4759
5126
  errors: errors.map(serializeError),
@@ -4765,211 +5132,884 @@ function executePipeline(options) {
4765
5132
  })];
4766
5133
  }
4767
5134
  });
4768
- });
4769
- }
5135
+ });
5136
+ }
5137
+ /**
5138
+ * TODO: [🐚] Change onProgress to object that represents the running execution, can be subscribed via RxJS to and also awaited
5139
+ */
5140
+
5141
+ /**
5142
+ * Creates executor function from pipeline and execution tools.
5143
+ *
5144
+ * @returns The executor function
5145
+ * @throws {PipelineLogicError} on logical error in the pipeline
5146
+ * @public exported from `@promptbook/core`
5147
+ */
5148
+ function createPipelineExecutor(options) {
5149
+ var _this = this;
5150
+ var pipeline = options.pipeline, tools = options.tools, _a = options.settings, settings = _a === void 0 ? {} : _a;
5151
+ var _b = settings.maxExecutionAttempts, maxExecutionAttempts = _b === void 0 ? MAX_EXECUTION_ATTEMPTS : _b, _c = settings.maxParallelCount, maxParallelCount = _c === void 0 ? MAX_PARALLEL_COUNT : _c, _d = settings.csvSettings, csvSettings = _d === void 0 ? DEFAULT_CSV_SETTINGS : _d, _e = settings.isVerbose, isVerbose = _e === void 0 ? IS_VERBOSE : _e, _f = settings.isNotPreparedWarningSupressed, isNotPreparedWarningSupressed = _f === void 0 ? false : _f, _g = settings.rootDirname, rootDirname = _g === void 0 ? null : _g;
5152
+ validatePipeline(pipeline);
5153
+ var pipelineIdentification = (function () {
5154
+ // Note: This is a 😐 implementation of [🚞]
5155
+ var _ = [];
5156
+ if (pipeline.sourceFile !== undefined) {
5157
+ _.push("File: ".concat(pipeline.sourceFile));
5158
+ }
5159
+ if (pipeline.pipelineUrl !== undefined) {
5160
+ _.push("Url: ".concat(pipeline.pipelineUrl));
5161
+ }
5162
+ return _.join('\n');
5163
+ })();
5164
+ var preparedPipeline;
5165
+ if (isPipelinePrepared(pipeline)) {
5166
+ preparedPipeline = pipeline;
5167
+ }
5168
+ else if (isNotPreparedWarningSupressed !== true) {
5169
+ console.warn(spaceTrim$1(function (block) { return "\n Pipeline is not prepared\n\n ".concat(block(pipelineIdentification), "\n\n It will be prepared ad-hoc before the first execution and **returned as `preparedPipeline` in `PipelineExecutorResult`**\n But it is recommended to prepare the pipeline during collection preparation\n\n @see more at https://ptbk.io/prepare-pipeline\n "); }));
5170
+ }
5171
+ var runCount = 0;
5172
+ var pipelineExecutor = function (inputParameters, onProgress) { return __awaiter(_this, void 0, void 0, function () {
5173
+ return __generator(this, function (_a) {
5174
+ runCount++;
5175
+ return [2 /*return*/, /* not await */ executePipeline({
5176
+ pipeline: pipeline,
5177
+ preparedPipeline: preparedPipeline,
5178
+ setPreparedPipeline: function (newPreparedPipeline) {
5179
+ preparedPipeline = newPreparedPipeline;
5180
+ },
5181
+ inputParameters: inputParameters,
5182
+ tools: tools,
5183
+ onProgress: onProgress,
5184
+ pipelineIdentification: spaceTrim$1(function (block) { return "\n ".concat(block(pipelineIdentification), "\n ").concat(runCount === 1 ? '' : "Run #".concat(runCount), "\n "); }),
5185
+ settings: {
5186
+ maxExecutionAttempts: maxExecutionAttempts,
5187
+ maxParallelCount: maxParallelCount,
5188
+ csvSettings: csvSettings,
5189
+ isVerbose: isVerbose,
5190
+ isNotPreparedWarningSupressed: isNotPreparedWarningSupressed,
5191
+ rootDirname: rootDirname,
5192
+ },
5193
+ })];
5194
+ });
5195
+ }); };
5196
+ return pipelineExecutor;
5197
+ }
5198
+ /**
5199
+ * TODO: [🐚] Change onProgress to object that represents the running execution, can be subscribed via RxJS to and also awaited
5200
+ */
5201
+
5202
+ /**
5203
+ * Scraper for markdown files
5204
+ *
5205
+ * @see `documentationUrl` for more details
5206
+ * @public exported from `@promptbook/core`
5207
+ */
5208
+ var markdownScraper = {
5209
+ /**
5210
+ * Mime types that this scraper can handle
5211
+ */
5212
+ mimeTypes: ['text/markdown', 'text/plain'],
5213
+ /**
5214
+ * Link to documentation
5215
+ */
5216
+ documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
5217
+ /**
5218
+ * Scrapes the markdown file and returns the knowledge pieces or `null` if it can't scrape it
5219
+ */
5220
+ scrape: function (source, options) {
5221
+ return __awaiter(this, void 0, void 0, function () {
5222
+ var llmTools, _a, maxParallelCount, _b, isVerbose, collection, prepareKnowledgeFromMarkdownExecutor, _c, prepareTitleExecutor, _d, prepareKeywordsExecutor, _e, knowledgeContent, result, outputParameters, knowledgePiecesRaw, knowledgeTextPieces, knowledge;
5223
+ var _f, _g, _h;
5224
+ var _this = this;
5225
+ return __generator(this, function (_j) {
5226
+ switch (_j.label) {
5227
+ case 0:
5228
+ llmTools = options.llmTools, _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? MAX_PARALLEL_COUNT : _a, _b = options.isVerbose, isVerbose = _b === void 0 ? IS_VERBOSE : _b;
5229
+ if (llmTools === undefined) {
5230
+ throw new MissingToolsError('LLM tools are required for scraping external files');
5231
+ // <- Note: This scraper is used in all other scrapers, so saying "external files" not "markdown files"
5232
+ }
5233
+ TODO_USE(maxParallelCount); // <- [🪂]
5234
+ collection = createCollectionFromJson.apply(void 0, __spreadArray([], __read(PipelineCollection), false));
5235
+ _c = createPipelineExecutor;
5236
+ _f = {};
5237
+ return [4 /*yield*/, collection.getPipelineByUrl('https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.ptbk.md')];
5238
+ case 1:
5239
+ prepareKnowledgeFromMarkdownExecutor = _c.apply(void 0, [(_f.pipeline = _j.sent(),
5240
+ _f.tools = {
5241
+ llm: llmTools,
5242
+ },
5243
+ _f)]);
5244
+ _d = createPipelineExecutor;
5245
+ _g = {};
5246
+ return [4 /*yield*/, collection.getPipelineByUrl('https://promptbook.studio/promptbook/prepare-knowledge-title.ptbk.md')];
5247
+ case 2:
5248
+ prepareTitleExecutor = _d.apply(void 0, [(_g.pipeline = _j.sent(),
5249
+ _g.tools = {
5250
+ llm: llmTools,
5251
+ },
5252
+ _g)]);
5253
+ _e = createPipelineExecutor;
5254
+ _h = {};
5255
+ return [4 /*yield*/, collection.getPipelineByUrl('https://promptbook.studio/promptbook/prepare-knowledge-keywords.ptbk.md')];
5256
+ case 3:
5257
+ prepareKeywordsExecutor = _e.apply(void 0, [(_h.pipeline = _j.sent(),
5258
+ _h.tools = {
5259
+ llm: llmTools,
5260
+ },
5261
+ _h)]);
5262
+ return [4 /*yield*/, source.asText()];
5263
+ case 4:
5264
+ knowledgeContent = _j.sent();
5265
+ return [4 /*yield*/, prepareKnowledgeFromMarkdownExecutor({ knowledgeContent: knowledgeContent })];
5266
+ case 5:
5267
+ result = _j.sent();
5268
+ assertsExecutionSuccessful(result);
5269
+ outputParameters = result.outputParameters;
5270
+ knowledgePiecesRaw = outputParameters.knowledgePieces;
5271
+ knowledgeTextPieces = (knowledgePiecesRaw || '').split('\n---\n');
5272
+ // <- TODO: [main] !!!!! Smarter split and filter out empty pieces
5273
+ if (isVerbose) {
5274
+ console.info('knowledgeTextPieces:', knowledgeTextPieces);
5275
+ }
5276
+ return [4 /*yield*/, Promise.all(
5277
+ // TODO: [🪂] !! Do not send all at once but in chunks
5278
+ knowledgeTextPieces.map(function (knowledgeTextPiece, i) { return __awaiter(_this, void 0, void 0, function () {
5279
+ var name, title, knowledgePieceContent, keywords, index, titleResult, _a, titleRaw, keywordsResult, _b, keywordsRaw, embeddingResult, error_1;
5280
+ return __generator(this, function (_c) {
5281
+ switch (_c.label) {
5282
+ case 0:
5283
+ name = "piece-".concat(i);
5284
+ title = spaceTrim(knowledgeTextPiece.substring(0, 100));
5285
+ knowledgePieceContent = spaceTrim(knowledgeTextPiece);
5286
+ keywords = [];
5287
+ index = [];
5288
+ _c.label = 1;
5289
+ case 1:
5290
+ _c.trys.push([1, 7, , 8]);
5291
+ return [4 /*yield*/, prepareTitleExecutor({ knowledgePieceContent: knowledgePieceContent })];
5292
+ case 2:
5293
+ titleResult = _c.sent();
5294
+ _a = titleResult.outputParameters.title, titleRaw = _a === void 0 ? 'Untitled' : _a;
5295
+ title = spaceTrim(titleRaw) /* <- TODO: Maybe do in pipeline */;
5296
+ name = titleToName(title);
5297
+ return [4 /*yield*/, prepareKeywordsExecutor({ knowledgePieceContent: knowledgePieceContent })];
5298
+ case 3:
5299
+ keywordsResult = _c.sent();
5300
+ _b = keywordsResult.outputParameters.keywords, keywordsRaw = _b === void 0 ? '' : _b;
5301
+ keywords = (keywordsRaw || '')
5302
+ .split(',')
5303
+ .map(function (keyword) { return keyword.trim(); })
5304
+ .filter(function (keyword) { return keyword !== ''; });
5305
+ if (isVerbose) {
5306
+ console.info("Keywords for \"".concat(title, "\":"), keywords);
5307
+ }
5308
+ if (!!llmTools.callEmbeddingModel) return [3 /*break*/, 4];
5309
+ // TODO: [🟥] Detect browser / node and make it colorfull
5310
+ console.error('No callEmbeddingModel function provided');
5311
+ return [3 /*break*/, 6];
5312
+ case 4: return [4 /*yield*/, llmTools.callEmbeddingModel({
5313
+ title: "Embedding for ".concat(title) /* <- Note: No impact on embedding result itself, just for logging */,
5314
+ parameters: {},
5315
+ content: knowledgePieceContent,
5316
+ modelRequirements: {
5317
+ modelVariant: 'EMBEDDING',
5318
+ },
5319
+ })];
5320
+ case 5:
5321
+ embeddingResult = _c.sent();
5322
+ index.push({
5323
+ modelName: embeddingResult.modelName,
5324
+ position: embeddingResult.content,
5325
+ });
5326
+ _c.label = 6;
5327
+ case 6: return [3 /*break*/, 8];
5328
+ case 7:
5329
+ error_1 = _c.sent();
5330
+ // Note: Here is expected error:
5331
+ // > PipelineExecutionError: You have not provided any `LlmExecutionTools` that support model variant "EMBEDDING
5332
+ if (!(error_1 instanceof PipelineExecutionError)) {
5333
+ throw error_1;
5334
+ }
5335
+ // TODO: [🟥] Detect browser / node and make it colorfull
5336
+ console.error(error_1, "<- Note: This error is not critical to prepare the pipeline, just knowledge pieces won't have embeddings");
5337
+ return [3 /*break*/, 8];
5338
+ case 8: return [2 /*return*/, {
5339
+ name: name,
5340
+ title: title,
5341
+ content: knowledgePieceContent,
5342
+ keywords: keywords,
5343
+ index: index,
5344
+ // <- TODO: [☀] sources,
5345
+ }];
5346
+ }
5347
+ });
5348
+ }); }))];
5349
+ case 6:
5350
+ knowledge = _j.sent();
5351
+ return [2 /*return*/, knowledge];
5352
+ }
5353
+ });
5354
+ });
5355
+ },
5356
+ } /* TODO: [🦷] as const */;
5357
+ /**
5358
+ * TODO: [🦖] Make some system for putting scrapers to separete packages
5359
+ * TODO: [🪂] Do it in parallel 11:11
5360
+ * TODO: [🦷] Ideally use `as const satisfies Scraper` BUT this combination throws errors
5361
+ * Note: No need to aggregate usage here, it is done by intercepting the llmTools
5362
+ */
5363
+
5364
+ /**
5365
+ * Scraper of .docx and .odt files
5366
+ *
5367
+ * @see `documentationUrl` for more details
5368
+ * @public exported from `@promptbook/core`
5369
+ */
5370
+ var documentScraper = {
5371
+ /**
5372
+ * Mime types that this scraper can handle
5373
+ */
5374
+ mimeTypes: ['application/vnd.openxmlformats-officedocument.wordprocessingml.document'],
5375
+ /**
5376
+ * Link to documentation
5377
+ */
5378
+ documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
5379
+ /**
5380
+ * Convert the `.docx` or `.odt` to `.md` file and returns intermediate source
5381
+ *
5382
+ * Note: `$` is used to indicate that this function is not a pure function - it leaves files on the disk and you are responsible for cleaning them by calling `destroy` method of returned object
5383
+ */
5384
+ $convert: function (source, options) {
5385
+ return __awaiter(this, void 0, void 0, function () {
5386
+ var _a, externalProgramsPaths, rootDirname, _b, cacheDirname, _c, isCacheCleaned, _d, isVerbose, extension, cacheFilehandler, command_1;
5387
+ return __generator(this, function (_e) {
5388
+ switch (_e.label) {
5389
+ case 0:
5390
+ _a = options.externalProgramsPaths, externalProgramsPaths = _a === void 0 ? {} : _a, rootDirname = options.rootDirname, _b = options.cacheDirname, cacheDirname = _b === void 0 ? SCRAPE_CACHE_DIRNAME : _b, _c = options.isCacheCleaned, isCacheCleaned = _c === void 0 ? false : _c, _d = options.isVerbose, isVerbose = _d === void 0 ? IS_VERBOSE : _d;
5391
+ if (!$isRunningInNode()) {
5392
+ throw new KnowledgeScrapeError('Scraping .docx files is only supported in Node environment');
5393
+ }
5394
+ if (externalProgramsPaths.pandocPath === undefined) {
5395
+ throw new MissingToolsError('Pandoc is required for scraping .docx files');
5396
+ }
5397
+ if (source.filename === null) {
5398
+ // TODO: [🧠] Maybe save file as temporary
5399
+ throw new KnowledgeScrapeError('When parsing .docx file, it must be real file in the file system');
5400
+ }
5401
+ extension = getFileExtension(source.filename);
5402
+ return [4 /*yield*/, getScraperIntermediateSource(source, {
5403
+ rootDirname: rootDirname,
5404
+ cacheDirname: cacheDirname,
5405
+ isCacheCleaned: isCacheCleaned,
5406
+ extension: 'md',
5407
+ isVerbose: isVerbose,
5408
+ })];
5409
+ case 1:
5410
+ cacheFilehandler = _e.sent();
5411
+ return [4 /*yield*/, $isFileExisting(cacheFilehandler.filename)];
5412
+ case 2:
5413
+ if (!!(_e.sent())) return [3 /*break*/, 5];
5414
+ command_1 = "\"".concat(externalProgramsPaths.pandocPath, "\" -f ").concat(extension, " -t markdown \"").concat(source.filename, "\" -o \"").concat(cacheFilehandler.filename, "\"");
5415
+ // TODO: !!!!!! [🕊] Make execCommand standard (?node-)util of the promptbook
5416
+ return [4 /*yield*/, $execCommand(command_1)];
5417
+ case 3:
5418
+ // TODO: !!!!!! [🕊] Make execCommand standard (?node-)util of the promptbook
5419
+ _e.sent();
5420
+ return [4 /*yield*/, $isFileExisting(cacheFilehandler.filename)];
5421
+ case 4:
5422
+ // Note: [0]
5423
+ if (!(_e.sent())) {
5424
+ throw new UnexpectedError(spaceTrim(function (block) { return "\n File that was supposed to be created by Pandoc does not exist for unknown reason\n\n Expected file:\n ".concat(block(cacheFilehandler.filename), "\n\n Command:\n > ").concat(block(command_1), "\n\n "); }));
5425
+ }
5426
+ _e.label = 5;
5427
+ case 5: return [2 /*return*/, cacheFilehandler];
5428
+ }
5429
+ });
5430
+ });
5431
+ },
5432
+ /**
5433
+ * Scrapes the docx file and returns the knowledge pieces or `null` if it can't scrape it
5434
+ */
5435
+ scrape: function (source, options) {
5436
+ return __awaiter(this, void 0, void 0, function () {
5437
+ var cacheFilehandler, markdownSource, knowledge;
5438
+ return __generator(this, function (_a) {
5439
+ switch (_a.label) {
5440
+ case 0: return [4 /*yield*/, documentScraper.$convert(source, options)];
5441
+ case 1:
5442
+ cacheFilehandler = _a.sent();
5443
+ markdownSource = {
5444
+ source: source.source,
5445
+ filename: cacheFilehandler.filename,
5446
+ url: null,
5447
+ mimeType: 'text/markdown',
5448
+ asText: function () {
5449
+ return __awaiter(this, void 0, void 0, function () {
5450
+ return __generator(this, function (_a) {
5451
+ switch (_a.label) {
5452
+ case 0: return [4 /*yield*/, readFile(cacheFilehandler.filename, 'utf-8')];
5453
+ case 1:
5454
+ // Note: [0] In $convert we check that the file exists
5455
+ return [2 /*return*/, _a.sent()];
5456
+ }
5457
+ });
5458
+ });
5459
+ },
5460
+ asJson: function () {
5461
+ throw new UnexpectedError('Did not expect that `markdownScraper` would need to get the content `asJson`');
5462
+ },
5463
+ asBlob: function () {
5464
+ throw new UnexpectedError('Did not expect that `markdownScraper` would need to get the content `asBlob`');
5465
+ },
5466
+ };
5467
+ knowledge = markdownScraper.scrape(markdownSource, options);
5468
+ return [4 /*yield*/, cacheFilehandler.destroy()];
5469
+ case 2:
5470
+ _a.sent();
5471
+ return [2 /*return*/, knowledge];
5472
+ }
5473
+ });
5474
+ });
5475
+ },
5476
+ } /* TODO: [🦷] as const */;
5477
+ /**
5478
+ * TODO: [👣] Converted documents can act as cached items - there is no need to run conversion each time
5479
+ * TODO: [🦖] Make some system for putting scrapers to separete packages
5480
+ * TODO: [🪂] Do it in parallel 11:11
5481
+ * TODO: [🦷] Ideally use `as const satisfies Converter & Scraper` BUT this combination throws errors
5482
+ * Note: No need to aggregate usage here, it is done by intercepting the llmTools
5483
+ */
5484
+
5485
+ /**
5486
+ * Scraper for .docx files
5487
+ *
5488
+ * @see `documentationUrl` for more details
5489
+ * @public exported from `@promptbook/core`
5490
+ */
5491
+ var legacyDocumentScraper = {
5492
+ /**
5493
+ * Mime types that this scraper can handle
5494
+ */
5495
+ mimeTypes: ['application/msword', 'text/rtf'],
5496
+ /**
5497
+ * Link to documentation
5498
+ */
5499
+ documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
5500
+ /**
5501
+ * Convert the `.doc` or `.rtf` to `.doc` file and returns intermediate source
5502
+ *
5503
+ * Note: `$` is used to indicate that this function is not a pure function - it leaves files on the disk and you are responsible for cleaning them by calling `destroy` method of returned object
5504
+ */
5505
+ $convert: function (source, options) {
5506
+ return __awaiter(this, void 0, void 0, function () {
5507
+ var _a, externalProgramsPaths, rootDirname, _b, cacheDirname, _c, isCacheCleaned, _d, isVerbose, extension, cacheFilehandler, documentSourceOutdirPathForLibreOffice_1, command_1, files_1, file;
5508
+ return __generator(this, function (_e) {
5509
+ switch (_e.label) {
5510
+ case 0:
5511
+ _a = options.externalProgramsPaths, externalProgramsPaths = _a === void 0 ? {} : _a, rootDirname = options.rootDirname, _b = options.cacheDirname, cacheDirname = _b === void 0 ? SCRAPE_CACHE_DIRNAME : _b, _c = options.isCacheCleaned, isCacheCleaned = _c === void 0 ? false : _c, _d = options.isVerbose, isVerbose = _d === void 0 ? IS_VERBOSE : _d;
5512
+ if (!$isRunningInNode()) {
5513
+ throw new KnowledgeScrapeError('Scraping .doc files is only supported in Node environment');
5514
+ }
5515
+ if (externalProgramsPaths.libreOfficePath === undefined) {
5516
+ throw new MissingToolsError('LibreOffice is required for scraping .doc and .rtf files');
5517
+ }
5518
+ if (source.filename === null) {
5519
+ // TODO: [🧠] Maybe save file as temporary
5520
+ throw new KnowledgeScrapeError('When parsing .doc or .rtf file, it must be real file in the file system');
5521
+ }
5522
+ extension = getFileExtension(source.filename);
5523
+ return [4 /*yield*/, getScraperIntermediateSource(source, {
5524
+ rootDirname: rootDirname,
5525
+ cacheDirname: cacheDirname,
5526
+ isCacheCleaned: isCacheCleaned,
5527
+ extension: 'docx',
5528
+ isVerbose: isVerbose,
5529
+ })];
5530
+ case 1:
5531
+ cacheFilehandler = _e.sent();
5532
+ if (isVerbose) {
5533
+ console.info("documentScraper: Converting .".concat(extension, " -> .docx"));
5534
+ }
5535
+ return [4 /*yield*/, $isFileExisting(cacheFilehandler.filename)];
5536
+ case 2:
5537
+ if (!!(_e.sent())) return [3 /*break*/, 8];
5538
+ documentSourceOutdirPathForLibreOffice_1 = join(dirname(cacheFilehandler.filename), 'libreoffice')
5539
+ .split('\\')
5540
+ .join('/');
5541
+ command_1 = "\"".concat(externalProgramsPaths.libreOfficePath, "\" --headless --convert-to docx \"").concat(source.filename, "\" --outdir \"").concat(documentSourceOutdirPathForLibreOffice_1, "\"");
5542
+ // TODO: !!!!!! [🕊] Make execCommand standard (?node-)util of the promptbook - this should trigger build polution error
5543
+ return [4 /*yield*/, $execCommand(command_1)];
5544
+ case 3:
5545
+ // TODO: !!!!!! [🕊] Make execCommand standard (?node-)util of the promptbook - this should trigger build polution error
5546
+ _e.sent();
5547
+ return [4 /*yield*/, readdir(documentSourceOutdirPathForLibreOffice_1)];
5548
+ case 4:
5549
+ files_1 = _e.sent();
5550
+ if (files_1.length !== 1) {
5551
+ throw new UnexpectedError(spaceTrim(function (block) { return "\n Expected exactly 1 file in the LibreOffice output directory, got ".concat(files_1.length, "\n\n The temporary folder:\n ").concat(block(documentSourceOutdirPathForLibreOffice_1), "\n\n Command:\n > ").concat(block(command_1), "\n "); }));
5552
+ }
5553
+ file = files_1[0];
5554
+ return [4 /*yield*/, rename(join(documentSourceOutdirPathForLibreOffice_1, file), cacheFilehandler.filename)];
5555
+ case 5:
5556
+ _e.sent();
5557
+ return [4 /*yield*/, rmdir(documentSourceOutdirPathForLibreOffice_1)];
5558
+ case 6:
5559
+ _e.sent();
5560
+ return [4 /*yield*/, $isFileExisting(cacheFilehandler.filename)];
5561
+ case 7:
5562
+ if (!(_e.sent())) {
5563
+ throw new UnexpectedError(spaceTrim(function (block) { return "\n File that was supposed to be created by LibreOffice does not exist for unknown reason\n\n Expected file:\n ".concat(block(cacheFilehandler.filename), "\n\n The temporary folder:\n ").concat(block(documentSourceOutdirPathForLibreOffice_1), "\n\n Command:\n > ").concat(block(command_1), "\n\n "); }));
5564
+ }
5565
+ _e.label = 8;
5566
+ case 8: return [2 /*return*/, cacheFilehandler];
5567
+ }
5568
+ });
5569
+ });
5570
+ },
5571
+ /**
5572
+ * Scrapes the `.doc` or `.rtf` file and returns the knowledge pieces or `null` if it can't scrape it
5573
+ */
5574
+ scrape: function (source, options) {
5575
+ return __awaiter(this, void 0, void 0, function () {
5576
+ var cacheFilehandler, markdownSource, knowledge;
5577
+ return __generator(this, function (_a) {
5578
+ switch (_a.label) {
5579
+ case 0: return [4 /*yield*/, legacyDocumentScraper.$convert(source, options)];
5580
+ case 1:
5581
+ cacheFilehandler = _a.sent();
5582
+ markdownSource = {
5583
+ source: source.source,
5584
+ filename: cacheFilehandler.filename,
5585
+ url: null,
5586
+ mimeType: 'text/markdown',
5587
+ asText: function () {
5588
+ throw new UnexpectedError('Did not expect that `documentScraper` would need to get the content `asText`');
5589
+ },
5590
+ asJson: function () {
5591
+ throw new UnexpectedError('Did not expect that `documentScraper` would need to get the content `asJson`');
5592
+ },
5593
+ asBlob: function () {
5594
+ throw new UnexpectedError('Did not expect that `documentScraper` would need to get the content `asBlob`');
5595
+ },
5596
+ };
5597
+ knowledge = documentScraper.scrape(markdownSource, options);
5598
+ return [4 /*yield*/, cacheFilehandler.destroy()];
5599
+ case 2:
5600
+ _a.sent();
5601
+ return [2 /*return*/, knowledge];
5602
+ }
5603
+ });
5604
+ });
5605
+ },
5606
+ } /* TODO: [🦷] as const */;
5607
+ /**
5608
+ * TODO: [👣] Converted documents can act as cached items - there is no need to run conversion each time
5609
+ * TODO: [🦖] Make some system for putting scrapers to separete packages
5610
+ * TODO: [🪂] Do it in parallel 11:11
5611
+ * TODO: [🦷] Ideally use `as const satisfies Converter & Scraper` BUT this combination throws errors
5612
+ * Note: No need to aggregate usage here, it is done by intercepting the llmTools
5613
+ */
5614
+
5615
+ /**
5616
+ * Scraper for .docx files
5617
+ *
5618
+ * @see `documentationUrl` for more details
5619
+ * @public exported from `@promptbook/core`
5620
+ */
5621
+ var pdfScraper = {
5622
+ /**
5623
+ * Mime types that this scraper can handle
5624
+ */
5625
+ mimeTypes: ['application/pdf'],
5626
+ /**
5627
+ * Link to documentation
5628
+ */
5629
+ documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
5630
+ /**
5631
+ * Converts the `.pdf` file to `.md` file and returns intermediate source
5632
+ */
5633
+ $convert: function (source, options) {
5634
+ return __awaiter(this, void 0, void 0, function () {
5635
+ return __generator(this, function (_a) {
5636
+ TODO_USE(source);
5637
+ TODO_USE(options);
5638
+ throw new NotYetImplementedError('PDF conversion not yet implemented');
5639
+ });
5640
+ });
5641
+ },
5642
+ /**
5643
+ * Scrapes the `.pdf` file and returns the knowledge pieces or `null` if it can't scrape it
5644
+ */
5645
+ scrape: function (source, options) {
5646
+ return __awaiter(this, void 0, void 0, function () {
5647
+ return __generator(this, function (_a) {
5648
+ TODO_USE(source);
5649
+ TODO_USE(options);
5650
+ /*
5651
+ const {
5652
+ externalProgramsPaths = {},
5653
+ cacheDirname = SCRAPE_CACHE_DIRNAME,
5654
+ isCacheCleaned = false,
5655
+ isVerbose = IS_VERBOSE,
5656
+ } = options;
5657
+ */
5658
+ throw new NotYetImplementedError('PDF scraping not yet implemented');
5659
+ });
5660
+ });
5661
+ },
5662
+ } /* TODO: [🦷] as const */;
5663
+ /**
5664
+ * TODO: [👣] Converted documents can act as cached items - there is no need to run conversion each time
5665
+ * TODO: [🦖] Make some system for putting scrapers to separete packages
5666
+ * TODO: [🪂] Do it in parallel 11:11
5667
+ * TODO: [🦷] Ideally use `as const satisfies Converter & Scraper` BUT this combination throws errors
5668
+ * Note: No need to aggregate usage here, it is done by intercepting the llmTools
5669
+ */
5670
+
5671
+ /**
5672
+ * A converter instance that uses showdown and highlight extensions
5673
+ *
5674
+ * @type {Converter}
5675
+ * @private for markdown and html knowledge scrapers
5676
+ */
5677
+ var markdownConverter = new Converter({
5678
+ flavor: 'github', // <- TODO: !!!!!! Explicitly specify the flavor of promptbook markdown
5679
+ /*
5680
+ > import showdownHighlight from 'showdown-highlight';
5681
+ > extensions: [
5682
+ > showdownHighlight({
5683
+ > // Whether to add the classes to the <pre> tag, default is false
5684
+ > pre: true,
5685
+ > // Whether to use hljs' auto language detection, default is true
5686
+ > auto_detection: true,
5687
+ > }),
5688
+ > ],
5689
+ */
5690
+ });
5691
+ /**
5692
+ * TODO: !!!!!! Figure out better name not to confuse with `Converter`
5693
+ * TODO: !!!!!! Lazy-make converter
5694
+ */
5695
+
5696
+ /**
5697
+ * Scraper for .docx files
5698
+ *
5699
+ * @see `documentationUrl` for more details
5700
+ * @public exported from `@promptbook/core`
5701
+ */
5702
+ var websiteScraper = {
5703
+ /**
5704
+ * Mime types that this scraper can handle
5705
+ */
5706
+ mimeTypes: ['text/html'],
5707
+ /**
5708
+ * Link to documentation
5709
+ */
5710
+ documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
5711
+ /**
5712
+ * Convert the website to `.md` file and returns intermediate source
5713
+ *
5714
+ * Note: `$` is used to indicate that this function is not a pure function - it leaves files on the disk and you are responsible for cleaning them by calling `destroy` method of returned object
5715
+ */
5716
+ $convert: function (source, options) {
5717
+ return __awaiter(this, void 0, void 0, function () {
5718
+ var
5719
+ // TODO: [🧠] Maybe in node use headless browser not just JSDOM
5720
+ // externalProgramsPaths = {},
5721
+ rootDirname, _a, cacheDirname, _b, isCacheCleaned, _c, isVerbose, jsdom, _d, reader, article, html, i, cacheFilehandler, markdown;
5722
+ return __generator(this, function (_e) {
5723
+ switch (_e.label) {
5724
+ case 0:
5725
+ rootDirname = options.rootDirname, _a = options.cacheDirname, cacheDirname = _a === void 0 ? SCRAPE_CACHE_DIRNAME : _a, _b = options.isCacheCleaned, isCacheCleaned = _b === void 0 ? false : _b, _c = options.isVerbose, isVerbose = _c === void 0 ? IS_VERBOSE : _c;
5726
+ // TODO: !!!!!! Does this work in browser? Make it work.
5727
+ if (source.url === null) {
5728
+ throw new KnowledgeScrapeError('Website scraper requires URL');
5729
+ }
5730
+ _d = JSDOM.bind;
5731
+ return [4 /*yield*/, source.asText()];
5732
+ case 1:
5733
+ jsdom = new (_d.apply(JSDOM, [void 0, _e.sent(), {
5734
+ url: source.url,
5735
+ }]))();
5736
+ reader = new Readability(jsdom.window.document);
5737
+ article = reader.parse();
5738
+ console.log(article);
5739
+ return [4 /*yield*/, forTime(10000)];
5740
+ case 2:
5741
+ _e.sent();
5742
+ html = (article === null || article === void 0 ? void 0 : article.content) || (article === null || article === void 0 ? void 0 : article.textContent) || jsdom.window.document.body.innerHTML;
5743
+ // Note: Unwrap html such as it is convertable by `markdownConverter`
5744
+ for (i = 0; i < 2; i++) {
5745
+ html = html.replace(/<div\s*(?:id="readability-page-\d+"\s+class="page")?>(.*)<\/div>/is, '$1');
5746
+ }
5747
+ if (html.includes('<div')) {
5748
+ html = (article === null || article === void 0 ? void 0 : article.textContent) || '';
5749
+ }
5750
+ return [4 /*yield*/, getScraperIntermediateSource(source, {
5751
+ rootDirname: rootDirname,
5752
+ cacheDirname: cacheDirname,
5753
+ isCacheCleaned: isCacheCleaned,
5754
+ extension: 'html',
5755
+ isVerbose: isVerbose,
5756
+ })];
5757
+ case 3:
5758
+ cacheFilehandler = _e.sent();
5759
+ return [4 /*yield*/, writeFile(cacheFilehandler.filename, html, 'utf-8')];
5760
+ case 4:
5761
+ _e.sent();
5762
+ markdown = markdownConverter.makeMarkdown(html, jsdom.window.document);
5763
+ return [2 /*return*/, __assign(__assign({}, cacheFilehandler), { markdown: markdown })];
5764
+ }
5765
+ });
5766
+ });
5767
+ },
5768
+ /**
5769
+ * Scrapes the website and returns the knowledge pieces or `null` if it can't scrape it
5770
+ */
5771
+ scrape: function (source, options) {
5772
+ return __awaiter(this, void 0, void 0, function () {
5773
+ var cacheFilehandler, markdownSource, knowledge;
5774
+ return __generator(this, function (_a) {
5775
+ switch (_a.label) {
5776
+ case 0: return [4 /*yield*/, websiteScraper.$convert(source, options)];
5777
+ case 1:
5778
+ cacheFilehandler = _a.sent();
5779
+ markdownSource = {
5780
+ source: source.source,
5781
+ filename: cacheFilehandler.filename,
5782
+ url: null,
5783
+ mimeType: 'text/markdown',
5784
+ asText: function () {
5785
+ return cacheFilehandler.markdown;
5786
+ },
5787
+ asJson: function () {
5788
+ throw new UnexpectedError('Did not expect that `markdownScraper` would need to get the content `asJson`');
5789
+ },
5790
+ asBlob: function () {
5791
+ throw new UnexpectedError('Did not expect that `markdownScraper` would need to get the content `asBlob`');
5792
+ },
5793
+ };
5794
+ knowledge = markdownScraper.scrape(markdownSource, options);
5795
+ return [4 /*yield*/, cacheFilehandler.destroy()];
5796
+ case 2:
5797
+ _a.sent();
5798
+ return [2 /*return*/, knowledge];
5799
+ }
5800
+ });
5801
+ });
5802
+ },
5803
+ } /* TODO: [🦷] as const */;
4770
5804
  /**
4771
- * TODO: [🐚] Change onProgress to object that represents the running execution, can be subscribed via RxJS to and also awaited
5805
+ * TODO: !!!!!! Put into separate package
5806
+ * TODO: [👣] Scraped website in .md can act as cache item - there is no need to run conversion each time
5807
+ * TODO: [🦖] Make some system for putting scrapers to separete packages
5808
+ * TODO: [🪂] Do it in parallel 11:11
5809
+ * TODO: [🦷] Ideally use `as const satisfies Converter & Scraper` BUT this combination throws errors
5810
+ * Note: No need to aggregate usage here, it is done by intercepting the llmTools
4772
5811
  */
4773
5812
 
5813
+ // TODO: [🦖] !!!!!! Pass scrapers as dependency,
4774
5814
  /**
4775
- * Creates executor function from pipeline and execution tools.
5815
+ * @@@
4776
5816
  *
4777
- * @returns The executor function
4778
- * @throws {PipelineLogicError} on logical error in the pipeline
4779
- * @public exported from `@promptbook/core`
5817
+ * @private because this will be replaced by a system of one scraper per package [🦖]
5818
+ * TODO: [🦖] System for scrapers NOT public exported from `@promptbook/core`
4780
5819
  */
4781
- function createPipelineExecutor(options) {
4782
- var _this = this;
4783
- var pipeline = options.pipeline, tools = options.tools, _a = options.settings, settings = _a === void 0 ? {} : _a;
4784
- var _b = settings.maxExecutionAttempts, maxExecutionAttempts = _b === void 0 ? MAX_EXECUTION_ATTEMPTS : _b, _c = settings.maxParallelCount, maxParallelCount = _c === void 0 ? MAX_PARALLEL_COUNT : _c, _d = settings.csvSettings, csvSettings = _d === void 0 ? DEFAULT_CSV_SETTINGS : _d, _e = settings.isVerbose, isVerbose = _e === void 0 ? IS_VERBOSE : _e, _f = settings.isNotPreparedWarningSupressed, isNotPreparedWarningSupressed = _f === void 0 ? false : _f;
4785
- validatePipeline(pipeline);
4786
- var pipelineIdentification = (function () {
4787
- // Note: This is a 😐 implementation of [🚞]
4788
- var _ = [];
4789
- if (pipeline.sourceFile !== undefined) {
4790
- _.push("File: ".concat(pipeline.sourceFile));
4791
- }
4792
- if (pipeline.pipelineUrl !== undefined) {
4793
- _.push("Url: ".concat(pipeline.pipelineUrl));
4794
- }
4795
- return _.join('\n');
4796
- })();
4797
- var preparedPipeline;
4798
- if (isPipelinePrepared(pipeline)) {
4799
- preparedPipeline = pipeline;
4800
- }
4801
- else if (isNotPreparedWarningSupressed !== true) {
4802
- console.warn(spaceTrim$1(function (block) { return "\n Pipeline is not prepared\n\n ".concat(block(pipelineIdentification), "\n\n It will be prepared ad-hoc before the first execution and **returned as `preparedPipeline` in `PipelineExecutorResult`**\n But it is recommended to prepare the pipeline during collection preparation\n\n @see more at https://ptbk.io/prepare-pipeline\n "); }));
4803
- }
4804
- var runCount = 0;
4805
- var pipelineExecutor = function (inputParameters, onProgress) { return __awaiter(_this, void 0, void 0, function () {
4806
- return __generator(this, function (_a) {
4807
- runCount++;
4808
- return [2 /*return*/, /* not await */ executePipeline({
4809
- pipeline: pipeline,
4810
- preparedPipeline: preparedPipeline,
4811
- setPreparedPipeline: function (newPreparedPipeline) {
4812
- preparedPipeline = newPreparedPipeline;
4813
- },
4814
- inputParameters: inputParameters,
4815
- tools: tools,
4816
- onProgress: onProgress,
4817
- pipelineIdentification: spaceTrim$1(function (block) { return "\n ".concat(block(pipelineIdentification), "\n ").concat(runCount === 1 ? '' : "Run #".concat(runCount), "\n "); }),
4818
- settings: {
4819
- maxExecutionAttempts: maxExecutionAttempts,
4820
- maxParallelCount: maxParallelCount,
4821
- csvSettings: csvSettings,
4822
- isVerbose: isVerbose,
4823
- isNotPreparedWarningSupressed: isNotPreparedWarningSupressed,
4824
- },
4825
- })];
4826
- });
4827
- }); };
4828
- return pipelineExecutor;
5820
+ var SCRAPERS = [
5821
+ markdownScraper,
5822
+ documentScraper,
5823
+ legacyDocumentScraper,
5824
+ pdfScraper,
5825
+ websiteScraper,
5826
+ // <- Note: [♓️] This is the order of the scrapers for knowledge, BUT consider some better (more explicit) way to do this
5827
+ ];
5828
+
5829
+ /**
5830
+ * Creates unique name for the source
5831
+ *
5832
+ * @private within the repository
5833
+ */
5834
+ function sourceContentToName(sourceContent) {
5835
+ // TODO: !!!!!! Better name for source than gibberish hash
5836
+ var hash = SHA256(hexEncoder.parse(JSON.stringify(sourceContent)))
5837
+ // <- TODO: [🥬] Encapsulate sha256 to some private utility function
5838
+ .toString( /* hex */)
5839
+ .substring(0, 20);
5840
+ // <- TODO: [🥬] Make some system for hashes and ids of promptbook
5841
+ var semanticName = normalizeToKebabCase(sourceContent.substring(0, 20));
5842
+ var pieces = ['source', semanticName, hash].filter(function (piece) { return piece !== ''; });
5843
+ var name = pieces.join('-').split('--').join('-');
5844
+ // <- TODO: Use MAX_FILENAME_LENGTH
5845
+ return name;
4829
5846
  }
4830
5847
  /**
4831
- * TODO: [🐚] Change onProgress to object that represents the running execution, can be subscribed via RxJS to and also awaited
5848
+ * TODO: [🐱‍🐉][🧠] Make some smart crop NOT source-i-m-pavol-a-develop-... BUT source-i-m-pavol-a-developer-...
5849
+ */
5850
+
5851
+ /**
5852
+ * Convert file extension to mime type
5853
+ *
5854
+ * @private within the repository
4832
5855
  */
5856
+ function extensionToMimeType(value) {
5857
+ return lookup(value) || 'application/octet-stream';
5858
+ }
4833
5859
 
4834
5860
  /**
4835
5861
  * @@@
4836
5862
  *
4837
- * @public exported from `@promptbook/core`
5863
+ * @private for scraper utilities
4838
5864
  */
4839
- function prepareKnowledgeFromMarkdown(knowledgeContent /* <- TODO: [🖖] (?maybe not) Always the file */, options) {
5865
+ function makeKnowledgeSourceHandler(knowledgeSource, options) {
5866
+ var _a;
4840
5867
  return __awaiter(this, void 0, void 0, function () {
4841
- var llmTools, _a, maxParallelCount, _b, isVerbose, collection, prepareKnowledgeFromMarkdownExecutor, _c, prepareTitleExecutor, _d, prepareKeywordsExecutor, _e, result, outputParameters, knowledgePiecesRaw, knowledgeTextPieces, knowledge;
4842
- var _f, _g, _h;
4843
- var _this = this;
4844
- return __generator(this, function (_j) {
4845
- switch (_j.label) {
5868
+ var sourceContent, name, _b, _c, rootDirname, _d, isVerbose, url, response_1, mimeType, filename_1, fileExtension, mimeType_1;
5869
+ return __generator(this, function (_e) {
5870
+ switch (_e.label) {
4846
5871
  case 0:
4847
- llmTools = options.llmTools, _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? MAX_PARALLEL_COUNT : _a, _b = options.isVerbose, isVerbose = _b === void 0 ? IS_VERBOSE : _b;
4848
- TODO_USE(maxParallelCount); // <- [🪂]
4849
- collection = createCollectionFromJson.apply(void 0, __spreadArray([], __read(PipelineCollection), false));
4850
- _c = createPipelineExecutor;
4851
- _f = {};
4852
- return [4 /*yield*/, collection.getPipelineByUrl('https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.ptbk.md')];
5872
+ sourceContent = knowledgeSource.sourceContent;
5873
+ name = knowledgeSource.name;
5874
+ _b = options || {}, _c = _b.rootDirname, rootDirname = _c === void 0 ? null : _c, _d = _b.isVerbose, isVerbose = _d === void 0 ? IS_VERBOSE : _d;
5875
+ TODO_USE(isVerbose);
5876
+ if (!name) {
5877
+ name = sourceContentToName(sourceContent);
5878
+ }
5879
+ if (!isValidUrl(sourceContent)) return [3 /*break*/, 2];
5880
+ url = sourceContent;
5881
+ return [4 /*yield*/, fetch(url)];
4853
5882
  case 1:
4854
- prepareKnowledgeFromMarkdownExecutor = _c.apply(void 0, [(_f.pipeline = _j.sent(),
4855
- _f.tools = {
4856
- llm: llmTools,
5883
+ response_1 = _e.sent();
5884
+ mimeType = ((_a = response_1.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
5885
+ return [2 /*return*/, {
5886
+ source: name,
5887
+ filename: null,
5888
+ url: url,
5889
+ mimeType: mimeType,
5890
+ asBlob: function () {
5891
+ return __awaiter(this, void 0, void 0, function () {
5892
+ var content;
5893
+ return __generator(this, function (_a) {
5894
+ switch (_a.label) {
5895
+ case 0: return [4 /*yield*/, response_1.blob()];
5896
+ case 1:
5897
+ content = _a.sent();
5898
+ return [2 /*return*/, content];
5899
+ }
5900
+ });
5901
+ });
4857
5902
  },
4858
- _f)]);
4859
- _d = createPipelineExecutor;
4860
- _g = {};
4861
- return [4 /*yield*/, collection.getPipelineByUrl('https://promptbook.studio/promptbook/prepare-knowledge-title.ptbk.md')];
4862
- case 2:
4863
- prepareTitleExecutor = _d.apply(void 0, [(_g.pipeline = _j.sent(),
4864
- _g.tools = {
4865
- llm: llmTools,
5903
+ asJson: function () {
5904
+ return __awaiter(this, void 0, void 0, function () {
5905
+ var content;
5906
+ return __generator(this, function (_a) {
5907
+ switch (_a.label) {
5908
+ case 0: return [4 /*yield*/, response_1.json()];
5909
+ case 1:
5910
+ content = _a.sent();
5911
+ return [2 /*return*/, content];
5912
+ }
5913
+ });
5914
+ });
4866
5915
  },
4867
- _g)]);
4868
- _e = createPipelineExecutor;
4869
- _h = {};
4870
- return [4 /*yield*/, collection.getPipelineByUrl('https://promptbook.studio/promptbook/prepare-knowledge-keywords.ptbk.md')];
4871
- case 3:
4872
- prepareKeywordsExecutor = _e.apply(void 0, [(_h.pipeline = _j.sent(),
4873
- _h.tools = {
4874
- llm: llmTools,
5916
+ asText: function () {
5917
+ return __awaiter(this, void 0, void 0, function () {
5918
+ var content;
5919
+ return __generator(this, function (_a) {
5920
+ switch (_a.label) {
5921
+ case 0: return [4 /*yield*/, response_1.text()];
5922
+ case 1:
5923
+ content = _a.sent();
5924
+ return [2 /*return*/, content];
5925
+ }
5926
+ });
5927
+ });
4875
5928
  },
4876
- _h)]);
4877
- return [4 /*yield*/, prepareKnowledgeFromMarkdownExecutor({ knowledgeContent: knowledgeContent })];
4878
- case 4:
4879
- result = _j.sent();
4880
- assertsExecutionSuccessful(result);
4881
- outputParameters = result.outputParameters;
4882
- knowledgePiecesRaw = outputParameters.knowledgePieces;
4883
- knowledgeTextPieces = (knowledgePiecesRaw || '').split('\n---\n');
4884
- // <- TODO: [main] !!!!! Smarter split and filter out empty pieces
4885
- if (isVerbose) {
4886
- console.info('knowledgeTextPieces:', knowledgeTextPieces);
5929
+ }];
5930
+ case 2:
5931
+ if (!(isValidFilePath(sourceContent) || /\.[a-z]{1,10}$/i.exec(sourceContent))) return [3 /*break*/, 4];
5932
+ if (!$isRunningInNode()) {
5933
+ throw new EnvironmentMismatchError('Importing knowledge source file works only in Node.js environment');
4887
5934
  }
4888
- return [4 /*yield*/, Promise.all(
4889
- // TODO: [🪂] !! Do not send all at once but in chunks
4890
- knowledgeTextPieces.map(function (knowledgeTextPiece, i) { return __awaiter(_this, void 0, void 0, function () {
4891
- var name, title, knowledgePieceContent, keywords, index, titleResult, _a, titleRaw, keywordsResult, _b, keywordsRaw, embeddingResult, error_1;
4892
- return __generator(this, function (_c) {
4893
- switch (_c.label) {
4894
- case 0:
4895
- name = "piece-".concat(i);
4896
- title = spaceTrim(knowledgeTextPiece.substring(0, 100));
4897
- knowledgePieceContent = spaceTrim(knowledgeTextPiece);
4898
- keywords = [];
4899
- index = [];
4900
- _c.label = 1;
4901
- case 1:
4902
- _c.trys.push([1, 7, , 8]);
4903
- return [4 /*yield*/, prepareTitleExecutor({ knowledgePieceContent: knowledgePieceContent })];
4904
- case 2:
4905
- titleResult = _c.sent();
4906
- _a = titleResult.outputParameters.title, titleRaw = _a === void 0 ? 'Untitled' : _a;
4907
- title = spaceTrim(titleRaw) /* <- TODO: Maybe do in pipeline */;
4908
- name = titleToName(title);
4909
- return [4 /*yield*/, prepareKeywordsExecutor({ knowledgePieceContent: knowledgePieceContent })];
4910
- case 3:
4911
- keywordsResult = _c.sent();
4912
- _b = keywordsResult.outputParameters.keywords, keywordsRaw = _b === void 0 ? '' : _b;
4913
- keywords = (keywordsRaw || '')
4914
- .split(',')
4915
- .map(function (keyword) { return keyword.trim(); })
4916
- .filter(function (keyword) { return keyword !== ''; });
4917
- if (isVerbose) {
4918
- console.info("Keywords for \"".concat(title, "\":"), keywords);
5935
+ if (rootDirname === null) {
5936
+ throw new EnvironmentMismatchError('Can not import file knowledge in non-file pipeline');
5937
+ // <- TODO: [🧠] What is the best error type here`
5938
+ }
5939
+ filename_1 = join(rootDirname, sourceContent).split('\\').join('/');
5940
+ fileExtension = getFileExtension(filename_1);
5941
+ mimeType_1 = extensionToMimeType(fileExtension || '');
5942
+ return [4 /*yield*/, $isFileExisting(filename_1)];
5943
+ case 3:
5944
+ if (!(_e.sent())) {
5945
+ throw new NotFoundError(spaceTrim(function (block) { return "\n Can not make source handler for file which does not exist:\n\n File:\n ".concat(block(filename_1), "\n "); }));
5946
+ }
5947
+ // TODO: !!!!!! Test security file - file is scoped to the project (maybe do this in `filesystemTools`)
5948
+ return [2 /*return*/, {
5949
+ source: name,
5950
+ filename: filename_1,
5951
+ url: null,
5952
+ mimeType: mimeType_1,
5953
+ asBlob: function () {
5954
+ return __awaiter(this, void 0, void 0, function () {
5955
+ var content;
5956
+ return __generator(this, function (_a) {
5957
+ switch (_a.label) {
5958
+ case 0: return [4 /*yield*/, readFile(filename_1)];
5959
+ case 1:
5960
+ content = _a.sent();
5961
+ // <- Note: Its OK to use sync in tooling for tests
5962
+ return [2 /*return*/, new Blob([
5963
+ content,
5964
+ // <- TODO: !!!!!! Maybe not working
5965
+ ], { type: mimeType_1 })];
4919
5966
  }
4920
- if (!!llmTools.callEmbeddingModel) return [3 /*break*/, 4];
4921
- // TODO: [🟥] Detect browser / node and make it colorfull
4922
- console.error('No callEmbeddingModel function provided');
4923
- return [3 /*break*/, 6];
4924
- case 4: return [4 /*yield*/, llmTools.callEmbeddingModel({
4925
- title: "Embedding for ".concat(title) /* <- Note: No impact on embedding result itself, just for logging */,
4926
- parameters: {},
4927
- content: knowledgePieceContent,
4928
- modelRequirements: {
4929
- modelVariant: 'EMBEDDING',
4930
- },
4931
- })];
4932
- case 5:
4933
- embeddingResult = _c.sent();
4934
- index.push({
4935
- modelName: embeddingResult.modelName,
4936
- position: embeddingResult.content,
4937
- });
4938
- _c.label = 6;
4939
- case 6: return [3 /*break*/, 8];
4940
- case 7:
4941
- error_1 = _c.sent();
4942
- // Note: Here is expected error:
4943
- // > PipelineExecutionError: You have not provided any `LlmExecutionTools` that support model variant "EMBEDDING
4944
- if (!(error_1 instanceof PipelineExecutionError)) {
4945
- throw error_1;
5967
+ });
5968
+ });
5969
+ },
5970
+ asJson: function () {
5971
+ return __awaiter(this, void 0, void 0, function () {
5972
+ var _a, _b;
5973
+ return __generator(this, function (_c) {
5974
+ switch (_c.label) {
5975
+ case 0:
5976
+ _b = (_a = JSON).parse;
5977
+ return [4 /*yield*/, readFile(filename_1, 'utf-8')];
5978
+ case 1: return [2 /*return*/, _b.apply(_a, [_c.sent()])];
4946
5979
  }
4947
- // TODO: [🟥] Detect browser / node and make it colorfull
4948
- console.error(error_1, "<- Note: This error is not critical to prepare the pipeline, just knowledge pieces won't have embeddings");
4949
- return [3 /*break*/, 8];
4950
- case 8: return [2 /*return*/, {
4951
- name: name,
4952
- title: title,
4953
- content: knowledgePieceContent,
4954
- keywords: keywords,
4955
- index: index,
4956
- // <- TODO: [☀] sources,
4957
- }];
4958
- }
4959
- });
4960
- }); }))];
4961
- case 5:
4962
- knowledge = _j.sent();
4963
- return [2 /*return*/, knowledge];
5980
+ });
5981
+ });
5982
+ },
5983
+ asText: function () {
5984
+ return __awaiter(this, void 0, void 0, function () {
5985
+ return __generator(this, function (_a) {
5986
+ switch (_a.label) {
5987
+ case 0: return [4 /*yield*/, readFile(filename_1, 'utf-8')];
5988
+ case 1: return [2 /*return*/, _a.sent()];
5989
+ }
5990
+ });
5991
+ });
5992
+ },
5993
+ }];
5994
+ case 4: return [2 /*return*/, {
5995
+ source: name,
5996
+ filename: null,
5997
+ url: null,
5998
+ mimeType: 'text/markdown',
5999
+ asText: function () {
6000
+ return knowledgeSource.sourceContent;
6001
+ },
6002
+ asJson: function () {
6003
+ throw new UnexpectedError('Did not expect that `markdownScraper` would need to get the content `asJson`');
6004
+ },
6005
+ asBlob: function () {
6006
+ throw new UnexpectedError('Did not expect that `markdownScraper` would need to get the content `asBlob`');
6007
+ },
6008
+ }];
4964
6009
  }
4965
6010
  });
4966
6011
  });
4967
6012
  }
4968
- /**
4969
- * TODO: [🐝][🔼][main] !!! Export via `@promptbook/markdown`
4970
- * TODO: [🪂] Do it in parallel 11:11
4971
- * Note: No need to aggregate usage here, it is done by intercepting the llmTools
4972
- */
4973
6013
 
4974
6014
  /**
4975
6015
  * Prepares the knowle
@@ -4979,21 +6019,64 @@ function prepareKnowledgeFromMarkdown(knowledgeContent /* <- TODO: [🖖] (?mayb
4979
6019
  */
4980
6020
  function prepareKnowledgePieces(knowledgeSources, options) {
4981
6021
  return __awaiter(this, void 0, void 0, function () {
4982
- var _a, maxParallelCount, knowledgePrepared;
6022
+ var _a, maxParallelCount, rootDirname, _b, isVerbose, knowledgePreparedUnflatten, knowledgePrepared;
4983
6023
  var _this = this;
4984
- return __generator(this, function (_b) {
4985
- switch (_b.label) {
6024
+ return __generator(this, function (_c) {
6025
+ switch (_c.label) {
4986
6026
  case 0:
4987
- _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? MAX_PARALLEL_COUNT : _a;
4988
- knowledgePrepared = [];
4989
- return [4 /*yield*/, forEachAsync(knowledgeSources, { maxParallelCount: maxParallelCount }, function (knowledgeSource) { return __awaiter(_this, void 0, void 0, function () {
4990
- var partialPieces, pieces;
4991
- return __generator(this, function (_a) {
4992
- switch (_a.label) {
4993
- case 0: return [4 /*yield*/, prepareKnowledgeFromMarkdown(knowledgeSource.sourceContent, // <- TODO: [🐝][main] !!! Unhardcode markdown, detect which type it is - BE AWARE of big package size
4994
- options)];
6027
+ _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? MAX_PARALLEL_COUNT : _a, rootDirname = options.rootDirname, _b = options.isVerbose, isVerbose = _b === void 0 ? IS_VERBOSE : _b;
6028
+ knowledgePreparedUnflatten = new Array(knowledgeSources.length);
6029
+ return [4 /*yield*/, forEachAsync(knowledgeSources, { maxParallelCount: maxParallelCount }, function (knowledgeSource, index) { return __awaiter(_this, void 0, void 0, function () {
6030
+ var partialPieces, sourceHandler, SCRAPERS_1, SCRAPERS_1_1, scraper, partialPiecesUnchecked, e_1_1, pieces;
6031
+ var e_1, _a;
6032
+ return __generator(this, function (_b) {
6033
+ switch (_b.label) {
6034
+ case 0:
6035
+ partialPieces = null;
6036
+ return [4 /*yield*/, makeKnowledgeSourceHandler(knowledgeSource, { rootDirname: rootDirname, isVerbose: isVerbose })];
4995
6037
  case 1:
4996
- partialPieces = _a.sent();
6038
+ sourceHandler = _b.sent();
6039
+ _b.label = 2;
6040
+ case 2:
6041
+ _b.trys.push([2, 7, 8, 9]);
6042
+ SCRAPERS_1 = __values(SCRAPERS), SCRAPERS_1_1 = SCRAPERS_1.next();
6043
+ _b.label = 3;
6044
+ case 3:
6045
+ if (!!SCRAPERS_1_1.done) return [3 /*break*/, 6];
6046
+ scraper = SCRAPERS_1_1.value;
6047
+ if (!scraper.mimeTypes.includes(sourceHandler.mimeType)
6048
+ // <- TODO: [🦔] Implement mime-type wildcards
6049
+ ) {
6050
+ return [3 /*break*/, 5];
6051
+ }
6052
+ return [4 /*yield*/, scraper.scrape(sourceHandler, options)];
6053
+ case 4:
6054
+ partialPiecesUnchecked = _b.sent();
6055
+ if (partialPiecesUnchecked !== null) {
6056
+ partialPieces = partialPiecesUnchecked;
6057
+ return [3 /*break*/, 6];
6058
+ }
6059
+ _b.label = 5;
6060
+ case 5:
6061
+ SCRAPERS_1_1 = SCRAPERS_1.next();
6062
+ return [3 /*break*/, 3];
6063
+ case 6: return [3 /*break*/, 9];
6064
+ case 7:
6065
+ e_1_1 = _b.sent();
6066
+ e_1 = { error: e_1_1 };
6067
+ return [3 /*break*/, 9];
6068
+ case 8:
6069
+ try {
6070
+ if (SCRAPERS_1_1 && !SCRAPERS_1_1.done && (_a = SCRAPERS_1.return)) _a.call(SCRAPERS_1);
6071
+ }
6072
+ finally { if (e_1) throw e_1.error; }
6073
+ return [7 /*endfinally*/];
6074
+ case 9:
6075
+ if (partialPieces === null) {
6076
+ throw new KnowledgeScrapeError(spaceTrim(function (block) { return "\n Cannot scrape knowledge from source: ".concat(knowledgeSource.sourceContent, "\n\n No scraper found for the mime type \"").concat(sourceHandler.mimeType, "\"\n\n Available scrapers:\n ").concat(block(SCRAPERS.flatMap(function (scraper) { return scraper.mimeTypes; })
6077
+ .map(function (mimeType) { return "- ".concat(mimeType); })
6078
+ .join('\n')), "\n\n\n "); }));
6079
+ }
4997
6080
  pieces = partialPieces.map(function (partialPiece) { return (__assign(__assign({}, partialPiece), { sources: [
4998
6081
  {
4999
6082
  name: knowledgeSource.name,
@@ -5001,13 +6084,14 @@ function prepareKnowledgePieces(knowledgeSources, options) {
5001
6084
  // <- TODO: [❎]
5002
6085
  },
5003
6086
  ] })); });
5004
- knowledgePrepared.push.apply(knowledgePrepared, __spreadArray([], __read(pieces), false));
6087
+ knowledgePreparedUnflatten[index] = pieces;
5005
6088
  return [2 /*return*/];
5006
6089
  }
5007
6090
  });
5008
6091
  }); })];
5009
6092
  case 1:
5010
- _b.sent();
6093
+ _c.sent();
6094
+ knowledgePrepared = knowledgePreparedUnflatten.flat();
5011
6095
  return [2 /*return*/, knowledgePrepared];
5012
6096
  }
5013
6097
  });
@@ -5024,7 +6108,7 @@ TODO: [🧊] This is how it can look in future
5024
6108
  >
5025
6109
  > export async function prepareKnowledgePieces(
5026
6110
  > knowledge: PrepareKnowledgeKnowledge,
5027
- > options: PrepareOptions,
6111
+ > options: PrepareAndScrapeOptions,
5028
6112
  > ):
5029
6113
  */
5030
6114
  /**
@@ -5137,6 +6221,9 @@ function preparePersona(personaDescription, options) {
5137
6221
  switch (_d.label) {
5138
6222
  case 0:
5139
6223
  llmTools = options.llmTools, _a = options.isVerbose, isVerbose = _a === void 0 ? IS_VERBOSE : _a;
6224
+ if (llmTools === undefined) {
6225
+ throw new MissingToolsError('LLM tools are required for preparing persona');
6226
+ }
5140
6227
  collection = createCollectionFromJson.apply(void 0, __spreadArray([], __read(PipelineCollection), false));
5141
6228
  _b = createPipelineExecutor;
5142
6229
  _c = {};
@@ -5280,7 +6367,7 @@ function prepareTemplates(pipeline, options) {
5280
6367
  */
5281
6368
  function preparePipeline(pipeline, options) {
5282
6369
  return __awaiter(this, void 0, void 0, function () {
5283
- var llmTools, _a, maxParallelCount, _b, isVerbose, parameters, templates,
6370
+ var llmTools, rootDirname, _a, maxParallelCount, _b, isVerbose, parameters, templates,
5284
6371
  /*
5285
6372
  <- TODO: [🧠][🪑] `promptbookVersion` */
5286
6373
  knowledgeSources /*
@@ -5293,8 +6380,11 @@ function preparePipeline(pipeline, options) {
5293
6380
  if (isPipelinePrepared(pipeline)) {
5294
6381
  return [2 /*return*/, pipeline];
5295
6382
  }
5296
- llmTools = options.llmTools, _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? MAX_PARALLEL_COUNT : _a, _b = options.isVerbose, isVerbose = _b === void 0 ? IS_VERBOSE : _b;
6383
+ llmTools = options.llmTools, rootDirname = options.rootDirname, _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? MAX_PARALLEL_COUNT : _a, _b = options.isVerbose, isVerbose = _b === void 0 ? IS_VERBOSE : _b;
5297
6384
  parameters = pipeline.parameters, templates = pipeline.templates, knowledgeSources = pipeline.knowledgeSources, personas = pipeline.personas;
6385
+ if (llmTools === undefined) {
6386
+ throw new MissingToolsError('LLM tools are required for preparing the pipeline');
6387
+ }
5298
6388
  llmToolsWithUsage = countTotalUsage(llmTools);
5299
6389
  currentPreparation = {
5300
6390
  id: 1,
@@ -5316,6 +6406,7 @@ function preparePipeline(pipeline, options) {
5316
6406
  switch (_a.label) {
5317
6407
  case 0: return [4 /*yield*/, preparePersona(persona.description, {
5318
6408
  llmTools: llmToolsWithUsage,
6409
+ rootDirname: rootDirname,
5319
6410
  maxParallelCount: maxParallelCount /* <- TODO: [🪂] */,
5320
6411
  isVerbose: isVerbose,
5321
6412
  })];
@@ -5330,11 +6421,7 @@ function preparePipeline(pipeline, options) {
5330
6421
  case 1:
5331
6422
  _c.sent();
5332
6423
  knowledgeSourcesPrepared = knowledgeSources.map(function (source) { return (__assign(__assign({}, source), { preparationIds: [/* TODO: [🧊] -> */ currentPreparation.id] })); });
5333
- return [4 /*yield*/, prepareKnowledgePieces(knowledgeSources /* <- TODO: [🧊] {knowledgeSources, knowledgePieces} */, {
5334
- llmTools: llmToolsWithUsage,
5335
- maxParallelCount: maxParallelCount /* <- TODO: [🪂] */,
5336
- isVerbose: isVerbose,
5337
- })];
6424
+ return [4 /*yield*/, prepareKnowledgePieces(knowledgeSources /* <- TODO: [🧊] {knowledgeSources, knowledgePieces} */, __assign(__assign({}, options), { llmTools: llmToolsWithUsage, rootDirname: rootDirname, maxParallelCount: maxParallelCount /* <- TODO: [🪂] */, isVerbose: isVerbose }))];
5338
6425
  case 2:
5339
6426
  partialknowledgePiecesPrepared = _c.sent();
5340
6427
  knowledgePiecesPrepared = partialknowledgePiecesPrepared.map(function (piece) { return (__assign(__assign({}, piece), { preparationIds: [/* TODO: [🧊] -> */ currentPreparation.id] })); });
@@ -5344,6 +6431,7 @@ function preparePipeline(pipeline, options) {
5344
6431
  knowledgePiecesCount: knowledgePiecesPrepared.length,
5345
6432
  }, {
5346
6433
  llmTools: llmToolsWithUsage,
6434
+ rootDirname: rootDirname,
5347
6435
  maxParallelCount: maxParallelCount /* <- TODO: [🪂] */,
5348
6436
  isVerbose: isVerbose,
5349
6437
  })];
@@ -5366,36 +6454,10 @@ function preparePipeline(pipeline, options) {
5366
6454
  * @see https://docs.anthropic.com/en/docs/test-and-evaluate/strengthen-guardrails/increase-consistency#specify-the-desired-output-format
5367
6455
  */
5368
6456
 
5369
- /**
5370
- * Tests if given string is valid URL.
5371
- *
5372
- * Note: This does not check if the file exists only if the path is valid
5373
- * @public exported from `@promptbook/utils`
5374
- */
5375
- function isValidFilePath(filePath) {
5376
- if (typeof filePath !== 'string') {
5377
- return false;
5378
- }
5379
- var filePathSlashes = filePath.split('\\').join('/');
5380
- // Absolute Unix path: /hello.txt
5381
- if (/^(\/)/i.test(filePathSlashes)) {
5382
- return true;
5383
- }
5384
- // Absolute Windows path: /hello.txt
5385
- if (/^([A-Z]{1,2}:\/?)\//i.test(filePathSlashes)) {
5386
- return true;
5387
- }
5388
- // Relative path: ./hello.txt
5389
- if (/^(\.\.?\/)+/i.test(filePathSlashes)) {
5390
- return true;
5391
- }
5392
- return false;
5393
- }
5394
-
5395
6457
  /**
5396
6458
  * Parses the knowledge command
5397
6459
  *
5398
- * @see ./KNOWLEDGE-README.md for more details
6460
+ * @see `documentationUrl` for more details
5399
6461
  * @private within the commands folder
5400
6462
  */
5401
6463
  var knowledgeCommandParser = {
@@ -5413,7 +6475,7 @@ var knowledgeCommandParser = {
5413
6475
  */
5414
6476
  description: "Tells promptbook which external knowledge to use",
5415
6477
  /**
5416
- * Link to discussion
6478
+ * Link to documentation
5417
6479
  */
5418
6480
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/41',
5419
6481
  /**
@@ -5457,11 +6519,8 @@ var knowledgeCommandParser = {
5457
6519
  */
5458
6520
  $applyToPipelineJson: function (command, $pipelineJson) {
5459
6521
  var sourceContent = command.sourceContent;
5460
- var name = 'source-' + sha256(hexEncoder.parse(JSON.stringify(sourceContent))).toString( /* hex */);
5461
- // <- TODO: [🥬] Encapsulate sha256 to some private utility function
5462
- // <- TODO: This should be replaced with a better name later in preparation (done with some propper LLM summarization)
5463
6522
  $pipelineJson.knowledgeSources.push({
5464
- name: name,
6523
+ name: sourceContentToName(sourceContent),
5465
6524
  sourceContent: sourceContent,
5466
6525
  });
5467
6526
  },
@@ -5491,7 +6550,7 @@ var knowledgeCommandParser = {
5491
6550
  /**
5492
6551
  * Parses the template command
5493
6552
  *
5494
- * @see ./TEMPLATE-README.md for more details
6553
+ * @see `documentationUrl` for more details
5495
6554
  * @private within the commands folder
5496
6555
  */
5497
6556
  var templateCommandParser = {
@@ -5527,7 +6586,7 @@ var templateCommandParser = {
5527
6586
  */
5528
6587
  description: "What should the code template template do",
5529
6588
  /**
5530
- * Link to discussion
6589
+ * Link to documentation
5531
6590
  */
5532
6591
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/64',
5533
6592
  /**
@@ -5752,7 +6811,7 @@ function parseNumber(value) {
5752
6811
  /**
5753
6812
  * Parses the expect command
5754
6813
  *
5755
- * @see ./EXPECT-README.md for more details
6814
+ * @see `documentationUrl` for more details
5756
6815
  * @private within the commands folder
5757
6816
  */
5758
6817
  var expectCommandParser = {
@@ -5770,7 +6829,7 @@ var expectCommandParser = {
5770
6829
  */
5771
6830
  description: spaceTrim("\n Expect command describes the desired output of the template (after post-processing)\n It can set limits for the maximum/minimum length of the output, measured in characters, words, sentences, paragraphs or some other shape of the output.\n "),
5772
6831
  /**
5773
- * Link to discussion
6832
+ * Link to documentation
5774
6833
  */
5775
6834
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/30',
5776
6835
  /**
@@ -5930,10 +6989,6 @@ function normalizeTo_SCREAMING_CASE(text) {
5930
6989
  charType = 'NUMBER';
5931
6990
  normalizedChar = char;
5932
6991
  }
5933
- else if (/^\/$/.test(char)) {
5934
- charType = 'SLASH';
5935
- normalizedChar = char;
5936
- }
5937
6992
  else {
5938
6993
  charType = 'OTHER';
5939
6994
  normalizedChar = '_';
@@ -6140,7 +7195,7 @@ function validateParameterName(parameterName) {
6140
7195
  *
6141
7196
  * Note: @@@ This command is used as foreach for new commands - it should NOT be used in any `.ptbk.md` file
6142
7197
  *
6143
- * @see ./FOREACH-README.md for more details <- TODO: @@@ Write theese README files OR remove this link + add annotation here (to all commands)
7198
+ * @see `documentationUrl` for more details
6144
7199
  * @private within the commands folder
6145
7200
  */
6146
7201
  var foreachCommandParser = {
@@ -6162,7 +7217,7 @@ var foreachCommandParser = {
6162
7217
  */
6163
7218
  description: "@@",
6164
7219
  /**
6165
- * Link to discussion
7220
+ * Link to documentation
6166
7221
  */
6167
7222
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/148',
6168
7223
  /**
@@ -6297,7 +7352,7 @@ var foreachCommandParser = {
6297
7352
  /**
6298
7353
  * Parses the format command
6299
7354
  *
6300
- * @see ./FORMAT-README.md for more details
7355
+ * @see `documentationUrl` for more details
6301
7356
  * @private within the commands folder
6302
7357
  */
6303
7358
  var formatCommandParser = {
@@ -6315,7 +7370,7 @@ var formatCommandParser = {
6315
7370
  */
6316
7371
  description: spaceTrim("\n Format command describes the desired output of the template (after post-processing)\n It can set limits for the maximum/minimum length of the output, measured in characters, words, sentences, paragraphs or some other shape of the output.\n "),
6317
7372
  /**
6318
- * Link to discussion
7373
+ * Link to documentation
6319
7374
  */
6320
7375
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/30',
6321
7376
  /**
@@ -6371,7 +7426,7 @@ var formatCommandParser = {
6371
7426
  /**
6372
7427
  * Parses the joker command
6373
7428
  *
6374
- * @see ./JOKER-README.md for more details
7429
+ * @see `documentationUrl` for more details
6375
7430
  * @private within the commands folder
6376
7431
  */
6377
7432
  var jokerCommandParser = {
@@ -6389,7 +7444,7 @@ var jokerCommandParser = {
6389
7444
  */
6390
7445
  description: "Joker parameter is used instead of executing the template result if jokers value meets the expectations requirements",
6391
7446
  /**
6392
- * Link to discussion
7447
+ * Link to documentation
6393
7448
  */
6394
7449
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/66',
6395
7450
  /**
@@ -6450,7 +7505,7 @@ var MODEL_VARIANTS = ['COMPLETION', 'CHAT', 'EMBEDDING' /* <- TODO [🏳] */ /*
6450
7505
  /**
6451
7506
  * Parses the model command
6452
7507
  *
6453
- * @see ./MODEL-README.md for more details
7508
+ * @see `documentationUrl` for more details
6454
7509
  * @private within the commands folder
6455
7510
  */
6456
7511
  var modelCommandParser = {
@@ -6468,7 +7523,7 @@ var modelCommandParser = {
6468
7523
  */
6469
7524
  description: "Tells which `modelRequirements` (for example which model) to use for the prompt template execution",
6470
7525
  /**
6471
- * Link to discussion
7526
+ * Link to documentation
6472
7527
  */
6473
7528
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/67',
6474
7529
  /**
@@ -6550,7 +7605,11 @@ var modelCommandParser = {
6550
7605
  // TODO: [🚜] DRY
6551
7606
  if ($templateJson.modelRequirements[command.key] !== undefined) {
6552
7607
  if ($templateJson.modelRequirements[command.key] === command.value) {
6553
- console.warn("Multiple commands `MODEL ".concat(command.key, " ").concat(command.value, "` in the template \"").concat($templateJson.title || $templateJson.name, "\""));
7608
+ console.warn("Multiple commands `MODEL ".concat({
7609
+ modelName: 'NAME',
7610
+ modelVariant: 'VARIANT',
7611
+ maxTokens: '???',
7612
+ }[command.key], " ").concat(command.value, "` in the template \"").concat($templateJson.title || $templateJson.name, "\""));
6554
7613
  }
6555
7614
  else {
6556
7615
  throw new ParseError(spaceTrim("\n Redefinition of MODEL `".concat(command.key, "` in the template \"").concat($templateJson.title || $templateJson.name, "\"\n\n You have used:\n - MODEL ").concat(command.key, " ").concat($templateJson.modelRequirements[command.key], "\n - MODEL ").concat(command.key, " ").concat(command.value, "\n ")));
@@ -6593,7 +7652,7 @@ var modelCommandParser = {
6593
7652
  /**
6594
7653
  * Parses the parameter command
6595
7654
  *
6596
- * @see ./PARAMETER-README.md for more details
7655
+ * @see `documentationUrl` for more details
6597
7656
  * @private within the commands folder
6598
7657
  */
6599
7658
  var parameterCommandParser = {
@@ -6618,7 +7677,7 @@ var parameterCommandParser = {
6618
7677
  */
6619
7678
  description: "Describes one parameter of the template",
6620
7679
  /**
6621
- * Link to discussion
7680
+ * Link to documentation
6622
7681
  */
6623
7682
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/68',
6624
7683
  /**
@@ -6703,7 +7762,7 @@ var parameterCommandParser = {
6703
7762
  /**
6704
7763
  * Parses the persona command
6705
7764
  *
6706
- * @see ./PERSONA-README.md for more details
7765
+ * @see `documentationUrl` for more details
6707
7766
  * @private within the commands folder
6708
7767
  */
6709
7768
  var personaCommandParser = {
@@ -6725,7 +7784,7 @@ var personaCommandParser = {
6725
7784
  */
6726
7785
  description: "Persona command is used to specify who the system is, it will be transformed into system message, top_t,...",
6727
7786
  /**
6728
- * Link to discussion
7787
+ * Link to documentation
6729
7788
  */
6730
7789
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/22',
6731
7790
  /**
@@ -6841,7 +7900,7 @@ function isValidJavascriptName(javascriptName) {
6841
7900
  /**
6842
7901
  * Parses the postprocess command
6843
7902
  *
6844
- * @see ./POSTPROCESS-README.md for more details
7903
+ * @see `documentationUrl` for more details
6845
7904
  * @private within the commands folder
6846
7905
  */
6847
7906
  var postprocessCommandParser = {
@@ -6860,7 +7919,7 @@ var postprocessCommandParser = {
6860
7919
  */
6861
7920
  description: "Defines the postprocess function to be used on the result from LLM and before the result is validated",
6862
7921
  /**
6863
- * Link to discussion
7922
+ * Link to documentation
6864
7923
  */
6865
7924
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/31',
6866
7925
  /**
@@ -6921,7 +7980,7 @@ var postprocessCommandParser = {
6921
7980
  /**
6922
7981
  * Parses the PROMPTBOOK_VERSION command
6923
7982
  *
6924
- * @see ./PROMPTBOOK_VERSION-README.md for more details
7983
+ * @see `documentationUrl` for more details
6925
7984
  * @private within the commands folder
6926
7985
  */
6927
7986
  var promptbookVersionCommandParser = {
@@ -6940,7 +7999,7 @@ var promptbookVersionCommandParser = {
6940
7999
  */
6941
8000
  description: "Which version of the promptbook is the .ptbk.md using",
6942
8001
  /**
6943
- * Link to discussion
8002
+ * Link to documentation
6944
8003
  */
6945
8004
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/69',
6946
8005
  /**
@@ -6999,7 +8058,7 @@ var promptbookVersionCommandParser = {
6999
8058
  /**
7000
8059
  * Parses the url command
7001
8060
  *
7002
- * @see ./URL-README.md for more details
8061
+ * @see `documentationUrl` for more details
7003
8062
  * @private within the commands folder
7004
8063
  */
7005
8064
  var urlCommandParser = {
@@ -7022,7 +8081,7 @@ var urlCommandParser = {
7022
8081
  */
7023
8082
  description: "Declares unique URL for the pipeline",
7024
8083
  /**
7025
- * Link to discussion
8084
+ * Link to documentation
7026
8085
  */
7027
8086
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/70',
7028
8087
  /**
@@ -7102,7 +8161,7 @@ var urlCommandParser = {
7102
8161
  /**
7103
8162
  * Parses the action command
7104
8163
  *
7105
- * @see ./ACTION-README.md for more details
8164
+ * @see `documentationUrl` for more details
7106
8165
  * @private within the commands folder
7107
8166
  */
7108
8167
  var actionCommandParser = {
@@ -7120,7 +8179,7 @@ var actionCommandParser = {
7120
8179
  */
7121
8180
  description: "Actions influences from the pipeline or template into external world. Like turning on a light, sending an email, etc.",
7122
8181
  /**
7123
- * Link to discussion
8182
+ * Link to documentation
7124
8183
  */
7125
8184
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/72',
7126
8185
  /**
@@ -7172,7 +8231,7 @@ var actionCommandParser = {
7172
8231
  /**
7173
8232
  * Parses the instrument command
7174
8233
  *
7175
- * @see ./INSTRUMENT-README.md for more details
8234
+ * @see `documentationUrl` for more details
7176
8235
  * @private within the commands folder
7177
8236
  */
7178
8237
  var instrumentCommandParser = {
@@ -7190,7 +8249,7 @@ var instrumentCommandParser = {
7190
8249
  */
7191
8250
  description: "Instrument command is used to specify the instrument to be used in the pipeline or template like search, calculate, etc.",
7192
8251
  /**
7193
- * Link to discussion
8252
+ * Link to documentation
7194
8253
  */
7195
8254
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/71',
7196
8255
  /**
@@ -7245,7 +8304,7 @@ var instrumentCommandParser = {
7245
8304
  *
7246
8305
  * Note: @@@ This command is used as boilerplate for new commands - it should NOT be used in any `.ptbk.md` file
7247
8306
  *
7248
- * @see ./BOILERPLATE-README.md for more details <- TODO: @@@ Write theese README files OR remove this link + add annotation here (to all commands)
8307
+ * @see `documentationUrl` for more details
7249
8308
  * @private within the commands folder
7250
8309
  */
7251
8310
  var boilerplateCommandParser = {
@@ -7267,7 +8326,7 @@ var boilerplateCommandParser = {
7267
8326
  */
7268
8327
  description: "@@",
7269
8328
  /**
7270
- * Link to discussion
8329
+ * Link to documentation
7271
8330
  */
7272
8331
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
7273
8332
  /**
@@ -8117,16 +9176,17 @@ function pipelineStringToJsonSync(pipelineString) {
8117
9176
  * @public exported from `@promptbook/core`
8118
9177
  */
8119
9178
  function pipelineStringToJson(pipelineString, options) {
8120
- if (options === void 0) { options = { llmTools: null }; }
8121
9179
  return __awaiter(this, void 0, void 0, function () {
8122
9180
  var llmTools, pipelineJson;
8123
9181
  return __generator(this, function (_a) {
8124
9182
  switch (_a.label) {
8125
9183
  case 0:
8126
- llmTools = options.llmTools;
9184
+ llmTools = (options || {}).llmTools;
8127
9185
  pipelineJson = pipelineStringToJsonSync(pipelineString);
8128
- if (!(llmTools !== null)) return [3 /*break*/, 2];
8129
- return [4 /*yield*/, preparePipeline(pipelineJson, { llmTools: llmTools })];
9186
+ if (!(llmTools !== undefined)) return [3 /*break*/, 2];
9187
+ return [4 /*yield*/, preparePipeline(pipelineJson, options || {
9188
+ rootDirname: null,
9189
+ })];
8130
9190
  case 1:
8131
9191
  pipelineJson = _a.sent();
8132
9192
  _a.label = 2;
@@ -8334,6 +9394,38 @@ function stringifyPipelineJson(pipeline) {
8334
9394
  * TODO: [🍙] Make some standard order of json properties
8335
9395
  */
8336
9396
 
9397
+ /**
9398
+ * Delagates the user interaction to a async callback function
9399
+ * You need to provide your own implementation of this callback function and its bind to UI.
9400
+ *
9401
+ * @public exported from `@promptbook/core`
9402
+ */
9403
+ var CallbackInterfaceTools = /** @class */ (function () {
9404
+ function CallbackInterfaceTools(options) {
9405
+ this.options = options;
9406
+ }
9407
+ /**
9408
+ * Trigger the custom callback function
9409
+ */
9410
+ CallbackInterfaceTools.prototype.promptDialog = function (options) {
9411
+ return __awaiter(this, void 0, void 0, function () {
9412
+ var answer;
9413
+ return __generator(this, function (_a) {
9414
+ switch (_a.label) {
9415
+ case 0: return [4 /*yield*/, this.options.callback(options)];
9416
+ case 1:
9417
+ answer = _a.sent();
9418
+ if (this.options.isVerbose) {
9419
+ console.info(spaceTrim$1(function (block) { return "\n \uD83D\uDCD6 ".concat(block(options.promptTitle), "\n \uD83D\uDC64 ").concat(block(answer), "\n "); }));
9420
+ }
9421
+ return [2 /*return*/, answer];
9422
+ }
9423
+ });
9424
+ });
9425
+ };
9426
+ return CallbackInterfaceTools;
9427
+ }());
9428
+
8337
9429
  /**
8338
9430
  * Pretty print an embedding vector for logging
8339
9431
  *
@@ -8409,38 +9501,6 @@ function usageToHuman(usage) {
8409
9501
  * TODO: [🏛] Maybe make some markdown builder
8410
9502
  */
8411
9503
 
8412
- /**
8413
- * Delagates the user interaction to a async callback function
8414
- * You need to provide your own implementation of this callback function and its bind to UI.
8415
- *
8416
- * @public exported from `@promptbook/core`
8417
- */
8418
- var CallbackInterfaceTools = /** @class */ (function () {
8419
- function CallbackInterfaceTools(options) {
8420
- this.options = options;
8421
- }
8422
- /**
8423
- * Trigger the custom callback function
8424
- */
8425
- CallbackInterfaceTools.prototype.promptDialog = function (options) {
8426
- return __awaiter(this, void 0, void 0, function () {
8427
- var answer;
8428
- return __generator(this, function (_a) {
8429
- switch (_a.label) {
8430
- case 0: return [4 /*yield*/, this.options.callback(options)];
8431
- case 1:
8432
- answer = _a.sent();
8433
- if (this.options.isVerbose) {
8434
- console.info(spaceTrim$1(function (block) { return "\n \uD83D\uDCD6 ".concat(block(options.promptTitle), "\n \uD83D\uDC64 ").concat(block(answer), "\n "); }));
8435
- }
8436
- return [2 /*return*/, answer];
8437
- }
8438
- });
8439
- });
8440
- };
8441
- return CallbackInterfaceTools;
8442
- }());
8443
-
8444
9504
  /**
8445
9505
  * @@@
8446
9506
  *
@@ -9390,5 +10450,5 @@ function executionReportJsonToString(executionReportJson, options) {
9390
10450
  * TODO: [🧠] Should be in generated file GENERATOR_WARNING
9391
10451
  */
9392
10452
 
9393
- export { $llmToolsMetadataRegister, $llmToolsRegister, AbstractFormatError, CLAIM, CallbackInterfaceTools, CollectionError, CsvFormatDefinition, CsvFormatError, DEFAULT_CSV_SETTINGS, DEFAULT_REMOTE_URL, DEFAULT_REMOTE_URL_PATH, ERRORS, EXECUTIONS_CACHE_DIRNAME, EXPECTATION_UNITS, EnvironmentMismatchError, ExecutionReportStringOptionsDefaults, ExpectError, IS_VERBOSE, LimitReachedError, MANDATORY_CSV_SETTINGS, MAX_EXECUTION_ATTEMPTS, MAX_FILENAME_LENGTH, MAX_KNOWLEDGE_SOURCES_SCRAPING_DEPTH, MAX_KNOWLEDGE_SOURCES_SCRAPING_TOTAL, MAX_PARALLEL_COUNT, MODEL_VARIANTS, MemoryStorage, NotFoundError, NotYetImplementedError, PIPELINE_COLLECTION_BASE_FILENAME, PROMPTBOOK_VERSION, ParseError, PipelineExecutionError, PipelineLogicError, PipelineUrlError, PrefixStorage, RESERVED_PARAMETER_NAMES, TemplateTypes, TextFormatDefinition, UNCERTAIN_USAGE, UnexpectedError, ZERO_USAGE, _AnthropicClaudeMetadataRegistration, _AzureOpenAiMetadataRegistration, _OpenAiAssistantMetadataRegistration, _OpenAiMetadataRegistration, addUsage, assertsExecutionSuccessful, cacheLlmTools, collectionToJson, countTotalUsage, createCollectionFromJson, createCollectionFromPromise, createCollectionFromUrl, createLlmToolsFromConfiguration, createPipelineExecutor, createSubcollection, embeddingVectorToString, executionReportJsonToString, isPassingExpectations, isPipelinePrepared, joinLlmExecutionTools, limitTotalUsage, pipelineJsonToString, pipelineStringToJson, pipelineStringToJsonSync, prepareKnowledgeFromMarkdown, prepareKnowledgePieces, preparePersona, preparePipeline, prepareTemplates, prettifyPipelineString, stringifyPipelineJson, unpreparePipeline, usageToHuman, usageToWorktime, validatePipeline };
10453
+ export { $llmToolsMetadataRegister, $llmToolsRegister, AbstractFormatError, CLAIM, CallbackInterfaceTools, CollectionError, CsvFormatDefinition, CsvFormatError, DEFAULT_CSV_SETTINGS, DEFAULT_REMOTE_URL, DEFAULT_REMOTE_URL_PATH, ERRORS, EXECUTIONS_CACHE_DIRNAME, EXPECTATION_UNITS, EnvironmentMismatchError, ExecutionReportStringOptionsDefaults, ExpectError, IS_VERBOSE, KnowledgeScrapeError, LimitReachedError, MANDATORY_CSV_SETTINGS, MAX_EXECUTION_ATTEMPTS, MAX_FILENAME_LENGTH, MAX_KNOWLEDGE_SOURCES_SCRAPING_DEPTH, MAX_KNOWLEDGE_SOURCES_SCRAPING_TOTAL, MAX_PARALLEL_COUNT, MODEL_VARIANTS, MemoryStorage, MissingToolsError, NotFoundError, NotYetImplementedError, PIPELINE_COLLECTION_BASE_FILENAME, PROMPTBOOK_VERSION, ParseError, PipelineExecutionError, PipelineLogicError, PipelineUrlError, PrefixStorage, RESERVED_PARAMETER_NAMES, SCRAPE_CACHE_DIRNAME, TemplateTypes, TextFormatDefinition, UNCERTAIN_USAGE, UnexpectedError, ZERO_USAGE, _AnthropicClaudeMetadataRegistration, _AzureOpenAiMetadataRegistration, _OpenAiAssistantMetadataRegistration, _OpenAiMetadataRegistration, addUsage, assertsExecutionSuccessful, cacheLlmTools, collectionToJson, countTotalUsage, createCollectionFromJson, createCollectionFromPromise, createCollectionFromUrl, createLlmToolsFromConfiguration, createPipelineExecutor, createSubcollection, documentScraper, embeddingVectorToString, executionReportJsonToString, isPassingExpectations, isPipelinePrepared, joinLlmExecutionTools, legacyDocumentScraper, limitTotalUsage, markdownScraper, pdfScraper, pipelineJsonToString, pipelineStringToJson, pipelineStringToJsonSync, prepareKnowledgePieces, preparePersona, preparePipeline, prepareTemplates, prettifyPipelineString, stringifyPipelineJson, unpreparePipeline, usageToHuman, usageToWorktime, validatePipeline, websiteScraper };
9394
10454
  //# sourceMappingURL=index.es.js.map